Merge tag 'devicetree-fixes-for-5.12-2' of git://git.kernel.org/pub/scm/linux/kernel...
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Apr 2021 20:01:48 +0000 (13:01 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 9 Apr 2021 20:01:48 +0000 (13:01 -0700)
Pull devicetree fixes from Rob Herring:

 - Fix fw_devlink failure with ".*,nr-gpios" properties

 - Doc link reference fixes from Mauro

 - Fixes for unaligned FDT handling found on OpenRisc. First, avoid
   crash with better error handling when unflattening an unaligned FDT.
   Second, fix memory allocations for FDTs to ensure alignment.

* tag 'devicetree-fixes-for-5.12-2' of git://git.kernel.org/pub/scm/linux/kernel/git/robh/linux:
  of: property: fw_devlink: do not link ".*,nr-gpios"
  dt-bindings:iio:adc: update motorola,cpcap-adc.yaml reference
  dt-bindings: fix references for iio-bindings.txt
  dt-bindings: don't use ../dir for doc references
  of: unittest: overlay: ensure proper alignment of copied FDT
  of: properly check for error returned by fdt_get_name()

3572 files changed:
.clang-format
.gitignore
.mailmap
CREDITS
Documentation/ABI/stable/sysfs-bus-fsl-mc [new file with mode: 0644]
Documentation/ABI/stable/sysfs-driver-speakup
Documentation/ABI/testing/debugfs-driver-habanalabs
Documentation/ABI/testing/ima_policy
Documentation/ABI/testing/sysfs-bus-coresight-devices-etm4x
Documentation/ABI/testing/sysfs-bus-cxl [new file with mode: 0644]
Documentation/ABI/testing/sysfs-bus-dfl-devices-emif [new file with mode: 0644]
Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios [new file with mode: 0644]
Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic [new file with mode: 0644]
Documentation/ABI/testing/sysfs-devices-memory
Documentation/ABI/testing/sysfs-devices-xenbus [new file with mode: 0644]
Documentation/ABI/testing/sysfs-driver-habanalabs
Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb [new file with mode: 0644]
Documentation/ABI/testing/sysfs-firmware-acpi
Documentation/ABI/testing/sysfs-firmware-sfi [deleted file]
Documentation/ABI/testing/sysfs-fs-xfs
Documentation/ABI/testing/sysfs-platform-kim
Documentation/ABI/testing/sysfs-platform_profile
Documentation/PCI/endpoint/function/binding/pci-ntb.rst [new file with mode: 0644]
Documentation/PCI/endpoint/index.rst
Documentation/PCI/endpoint/pci-endpoint-cfs.rst
Documentation/PCI/endpoint/pci-ntb-function.rst [new file with mode: 0644]
Documentation/PCI/endpoint/pci-ntb-howto.rst [new file with mode: 0644]
Documentation/admin-guide/auxdisplay/cfag12864b.rst
Documentation/admin-guide/auxdisplay/ks0108.rst
Documentation/admin-guide/cgroup-v2.rst
Documentation/admin-guide/cifs/authors.rst
Documentation/admin-guide/cifs/changes.rst
Documentation/admin-guide/cifs/introduction.rst
Documentation/admin-guide/cifs/todo.rst
Documentation/admin-guide/cifs/usage.rst
Documentation/admin-guide/kernel-parameters.txt
Documentation/admin-guide/mm/memory-hotplug.rst
Documentation/admin-guide/spkguide.txt
Documentation/admin-guide/sysctl/vm.rst
Documentation/admin-guide/xfs.rst
Documentation/arm64/acpi_object_usage.rst
Documentation/arm64/silicon-errata.rst
Documentation/block/bfq-iosched.rst
Documentation/conf.py
Documentation/core-api/dma-api.rst
Documentation/core-api/mm-api.rst
Documentation/dev-tools/index.rst
Documentation/dev-tools/kasan.rst
Documentation/dev-tools/kfence.rst [new file with mode: 0644]
Documentation/devicetree/bindings/Makefile
Documentation/devicetree/bindings/arm/coresight.txt
Documentation/devicetree/bindings/arm/rockchip.yaml
Documentation/devicetree/bindings/display/connector/dp-connector.yaml
Documentation/devicetree/bindings/display/mediatek/mediatek,dsi.txt
Documentation/devicetree/bindings/display/mediatek/mediatek,hdmi.txt
Documentation/devicetree/bindings/dma/ingenic,dma.yaml
Documentation/devicetree/bindings/dma/intel,ldma.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/dma/owl-dma.yaml
Documentation/devicetree/bindings/dma/renesas,rcar-dmac.yaml
Documentation/devicetree/bindings/dma/sirfsoc-dma.txt [deleted file]
Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt [deleted file]
Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/dma/ste-coh901318.txt [deleted file]
Documentation/devicetree/bindings/dma/zxdma.txt [deleted file]
Documentation/devicetree/bindings/gpio/sifive,gpio.yaml
Documentation/devicetree/bindings/hwlock/ti,omap-hwspinlock.yaml
Documentation/devicetree/bindings/input/goodix,gt7375p.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/input/google,cros-ec-keyb.yaml
Documentation/devicetree/bindings/interconnect/qcom,qcs404.yaml [deleted file]
Documentation/devicetree/bindings/interconnect/qcom,rpm.yaml [moved from Documentation/devicetree/bindings/interconnect/qcom,msm8916.yaml with 77% similarity]
Documentation/devicetree/bindings/interconnect/qcom,rpmh.yaml
Documentation/devicetree/bindings/interrupt-controller/ingenic,intc.yaml
Documentation/devicetree/bindings/interrupt-controller/sifive,plic-1.0.0.yaml
Documentation/devicetree/bindings/leds/leds-lgm.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
Documentation/devicetree/bindings/mailbox/qcom,apcs-kpss-global.yaml
Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/misc/eeprom-93xx46.txt
Documentation/devicetree/bindings/nvmem/rmem.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pci/brcm,stb-pcie.yaml
Documentation/devicetree/bindings/pci/layerscape-pci.txt
Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pci/qcom,pcie.txt
Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt [deleted file]
Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/mediatek,tphy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt [deleted file]
Documentation/devicetree/bindings/phy/phy-mtk-ufs.txt [deleted file]
Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt [deleted file]
Documentation/devicetree/bindings/phy/phy-stm32-usbphyc.yaml
Documentation/devicetree/bindings/phy/qcom,qmp-phy.yaml
Documentation/devicetree/bindings/phy/qcom,qusb2-phy.yaml
Documentation/devicetree/bindings/phy/qcom,usb-hs-28nm.yaml
Documentation/devicetree/bindings/phy/qcom,usb-snps-femto-v2.yaml
Documentation/devicetree/bindings/phy/rockchip-emmc-phy.txt
Documentation/devicetree/bindings/phy/ti,phy-j721e-wiz.yaml
Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/pwm/pwm-sifive.yaml
Documentation/devicetree/bindings/pwm/pwm-zx.txt [deleted file]
Documentation/devicetree/bindings/remoteproc/mtk,scp.txt
Documentation/devicetree/bindings/remoteproc/qcom,adsp.txt
Documentation/devicetree/bindings/remoteproc/qcom,wcnss-pil.txt
Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/riscv/canaan.yaml [new file with mode: 0644]
Documentation/devicetree/bindings/riscv/cpus.yaml
Documentation/devicetree/bindings/riscv/sifive-l2-cache.yaml
Documentation/devicetree/bindings/riscv/sifive.yaml
Documentation/devicetree/bindings/serial/sifive-serial.yaml
Documentation/devicetree/bindings/sound/fsl,spdif.yaml
Documentation/devicetree/bindings/timer/sifive,clint.yaml
Documentation/devicetree/bindings/timer/snps,dw-apb-timer.yaml
Documentation/driver-api/cxl/index.rst [new file with mode: 0644]
Documentation/driver-api/cxl/memory-devices.rst [new file with mode: 0644]
Documentation/driver-api/index.rst
Documentation/driver-api/pti_intel_mid.rst [deleted file]
Documentation/features/core/cBPF-JIT/arch-support.txt
Documentation/features/core/eBPF-JIT/arch-support.txt
Documentation/features/core/generic-idle-thread/arch-support.txt
Documentation/features/core/jump-labels/arch-support.txt
Documentation/features/core/tracehook/arch-support.txt
Documentation/features/debug/KASAN/arch-support.txt
Documentation/features/debug/debug-vm-pgtable/arch-support.txt
Documentation/features/debug/gcov-profile-all/arch-support.txt
Documentation/features/debug/kcov/arch-support.txt
Documentation/features/debug/kgdb/arch-support.txt
Documentation/features/debug/kmemleak/arch-support.txt
Documentation/features/debug/kprobes-on-ftrace/arch-support.txt
Documentation/features/debug/kprobes/arch-support.txt
Documentation/features/debug/kretprobes/arch-support.txt
Documentation/features/debug/optprobes/arch-support.txt
Documentation/features/debug/stackprotector/arch-support.txt
Documentation/features/debug/uprobes/arch-support.txt
Documentation/features/debug/user-ret-profiler/arch-support.txt
Documentation/features/io/dma-contiguous/arch-support.txt
Documentation/features/locking/cmpxchg-local/arch-support.txt
Documentation/features/locking/lockdep/arch-support.txt
Documentation/features/locking/queued-rwlocks/arch-support.txt
Documentation/features/locking/queued-spinlocks/arch-support.txt
Documentation/features/perf/kprobes-event/arch-support.txt
Documentation/features/perf/perf-regs/arch-support.txt
Documentation/features/perf/perf-stackdump/arch-support.txt
Documentation/features/sched/membarrier-sync-core/arch-support.txt
Documentation/features/sched/numa-balancing/arch-support.txt
Documentation/features/seccomp/seccomp-filter/arch-support.txt
Documentation/features/time/arch-tick-broadcast/arch-support.txt
Documentation/features/time/clockevents/arch-support.txt
Documentation/features/time/context-tracking/arch-support.txt
Documentation/features/time/irq-time-acct/arch-support.txt
Documentation/features/time/virt-cpuacct/arch-support.txt
Documentation/features/vm/ELF-ASLR/arch-support.txt
Documentation/features/vm/PG_uncached/arch-support.txt
Documentation/features/vm/THP/arch-support.txt
Documentation/features/vm/TLB/arch-support.txt
Documentation/features/vm/huge-vmap/arch-support.txt
Documentation/features/vm/ioremap_prot/arch-support.txt
Documentation/features/vm/pte_special/arch-support.txt
Documentation/filesystems/locking.rst
Documentation/filesystems/porting.rst
Documentation/filesystems/proc.rst
Documentation/filesystems/seq_file.rst
Documentation/filesystems/vfs.rst
Documentation/fpga/dfl.rst
Documentation/gpu/todo.rst
Documentation/hid/amd-sfh-hid.rst
Documentation/hid/hid-alps.rst
Documentation/hid/hid-sensor.rst
Documentation/hid/hid-transport.rst
Documentation/hid/hiddev.rst
Documentation/hid/hidraw.rst
Documentation/hid/intel-ish-hid.rst
Documentation/hid/uhid.rst
Documentation/kbuild/makefiles.rst
Documentation/networking/bonding.rst
Documentation/networking/device_drivers/ethernet/amazon/ena.rst
Documentation/networking/devlink/devlink-dpipe.rst
Documentation/networking/devlink/devlink-port.rst
Documentation/networking/netdev-FAQ.rst
Documentation/networking/xfrm_device.rst
Documentation/powerpc/syscall64-abi.rst
Documentation/process/4.Coding.rst
Documentation/process/stable-kernel-rules.rst
Documentation/process/submit-checklist.rst
Documentation/process/submitting-patches.rst
Documentation/security/keys/core.rst
Documentation/trace/coresight/coresight.rst
Documentation/translations/it_IT/process/4.Coding.rst
Documentation/translations/it_IT/process/submit-checklist.rst
Documentation/translations/zh_CN/process/4.Coding.rst
Documentation/userspace-api/ioctl/ioctl-number.rst
Documentation/virt/acrn/cpuid.rst [new file with mode: 0644]
Documentation/virt/acrn/index.rst [new file with mode: 0644]
Documentation/virt/acrn/introduction.rst [new file with mode: 0644]
Documentation/virt/acrn/io-request.rst [new file with mode: 0644]
Documentation/virt/index.rst
Documentation/virt/kvm/api.rst
Documentation/vm/arch_pgtable_helpers.rst
MAINTAINERS
Makefile
arch/Kconfig
arch/alpha/configs/defconfig
arch/alpha/kernel/process.c
arch/alpha/kernel/syscalls/Makefile
arch/alpha/kernel/syscalls/syscall.tbl
arch/arc/boot/dts/haps_hs.dts
arch/arc/kernel/process.c
arch/arc/kernel/signal.c
arch/arc/kernel/unwind.c
arch/arm/Kconfig
arch/arm/boot/dts/am33xx.dtsi
arch/arm/boot/dts/armada-385-turris-omnia.dts
arch/arm/boot/dts/at91-sam9x60ek.dts
arch/arm/boot/dts/at91-sama5d27_som1.dtsi
arch/arm/boot/dts/bcm2711.dtsi
arch/arm/boot/dts/cros-ec-keyboard.dtsi
arch/arm/boot/dts/imx6qdl-phytec-pfla02.dtsi
arch/arm/boot/dts/imx6ul-14x14-evk.dtsi
arch/arm/boot/dts/imx6ull-myir-mys-6ulx-eval.dts
arch/arm/boot/dts/omap4.dtsi
arch/arm/boot/dts/omap44xx-clocks.dtsi
arch/arm/boot/dts/omap5.dtsi
arch/arm/boot/dts/sam9x60.dtsi
arch/arm/configs/bcm2835_defconfig
arch/arm/configs/mxs_defconfig
arch/arm/kernel/process.c
arch/arm/mach-imx/avic.c
arch/arm/mach-imx/common.h
arch/arm/mach-imx/mach-imx1.c
arch/arm/mach-imx/mach-imx25.c
arch/arm/mach-imx/mach-imx27.c
arch/arm/mach-imx/mach-imx31.c
arch/arm/mach-imx/mach-imx35.c
arch/arm/mach-imx/mm-imx3.c
arch/arm/mach-keystone/keystone.c
arch/arm/mach-omap1/ams-delta-fiq-handler.S
arch/arm/mach-omap2/omap-secure.c
arch/arm/mach-omap2/omap-secure.h
arch/arm/mach-omap2/pmic-cpcap.c
arch/arm/mach-omap2/sr_device.c
arch/arm/mach-pxa/mainstone.c
arch/arm/tools/Makefile
arch/arm/tools/syscall.tbl
arch/arm/xen/p2m.c
arch/arm64/Kconfig
arch/arm64/boot/dts/freescale/fsl-ls1012a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1043a.dtsi
arch/arm64/boot/dts/freescale/fsl-ls1046a.dtsi
arch/arm64/boot/dts/freescale/imx8mm-pinfunc.h
arch/arm64/boot/dts/freescale/imx8mp-phyboard-pollux-rdk.dts
arch/arm64/boot/dts/freescale/imx8mp-phycore-som.dtsi
arch/arm64/boot/dts/freescale/imx8mq-pinfunc.h
arch/arm64/boot/dts/marvell/armada-cp11x.dtsi
arch/arm64/configs/defconfig
arch/arm64/include/asm/cache.h
arch/arm64/include/asm/checksum.h
arch/arm64/include/asm/cpucaps.h
arch/arm64/include/asm/kasan.h
arch/arm64/include/asm/kfence.h [new file with mode: 0644]
arch/arm64/include/asm/kvm_arm.h
arch/arm64/include/asm/kvm_asm.h
arch/arm64/include/asm/kvm_hyp.h
arch/arm64/include/asm/memory.h
arch/arm64/include/asm/mmu_context.h
arch/arm64/include/asm/module.lds.h
arch/arm64/include/asm/mte-def.h
arch/arm64/include/asm/mte-kasan.h
arch/arm64/include/asm/mte.h
arch/arm64/include/asm/numa.h
arch/arm64/include/asm/pgtable-prot.h
arch/arm64/include/asm/pgtable.h
arch/arm64/include/asm/processor.h
arch/arm64/include/asm/sysreg.h
arch/arm64/include/asm/thread_info.h
arch/arm64/include/asm/unistd.h
arch/arm64/include/asm/unistd32.h
arch/arm64/kernel/acpi_numa.c
arch/arm64/kernel/cpu_errata.c
arch/arm64/kernel/cpufeature.c
arch/arm64/kernel/cpuinfo.c
arch/arm64/kernel/crash_dump.c
arch/arm64/kernel/head.S
arch/arm64/kernel/hyp-stub.S
arch/arm64/kernel/idreg-override.c
arch/arm64/kernel/image-vars.h
arch/arm64/kernel/machine_kexec_file.c
arch/arm64/kernel/mte.c
arch/arm64/kernel/perf_event.c
arch/arm64/kernel/probes/uprobes.c
arch/arm64/kernel/process.c
arch/arm64/kernel/ptrace.c
arch/arm64/kernel/stacktrace.c
arch/arm64/kernel/suspend.c
arch/arm64/kernel/vdso/Makefile
arch/arm64/kvm/arm.c
arch/arm64/kvm/debug.c
arch/arm64/kvm/hyp/entry.S
arch/arm64/kvm/hyp/hyp-entry.S
arch/arm64/kvm/hyp/include/hyp/switch.h
arch/arm64/kvm/hyp/nvhe/debug-sr.c
arch/arm64/kvm/hyp/nvhe/host.S
arch/arm64/kvm/hyp/nvhe/hyp-main.c
arch/arm64/kvm/hyp/nvhe/switch.c
arch/arm64/kvm/hyp/nvhe/tlb.c
arch/arm64/kvm/hyp/pgtable.c
arch/arm64/kvm/hyp/vgic-v3-sr.c
arch/arm64/kvm/hyp/vhe/tlb.c
arch/arm64/kvm/mmu.c
arch/arm64/kvm/perf.c
arch/arm64/kvm/pmu-emul.c
arch/arm64/kvm/reset.c
arch/arm64/kvm/vgic/vgic-v3.c
arch/arm64/lib/mte.S
arch/arm64/mm/Makefile
arch/arm64/mm/fault.c
arch/arm64/mm/init.c
arch/arm64/mm/mmu.c
arch/csky/Kconfig
arch/csky/abiv1/inc/abi/cacheflush.h
arch/csky/abiv1/inc/abi/ckmmu.h
arch/csky/abiv1/inc/abi/entry.h
arch/csky/abiv1/inc/abi/page.h
arch/csky/abiv1/inc/abi/pgtable-bits.h
arch/csky/abiv1/inc/abi/reg_ops.h
arch/csky/abiv1/inc/abi/regdef.h
arch/csky/abiv1/inc/abi/string.h
arch/csky/abiv1/inc/abi/switch_context.h
arch/csky/abiv1/inc/abi/vdso.h
arch/csky/abiv2/cacheflush.c
arch/csky/abiv2/inc/abi/ckmmu.h
arch/csky/abiv2/inc/abi/entry.h
arch/csky/abiv2/inc/abi/fpu.h
arch/csky/abiv2/inc/abi/page.h
arch/csky/abiv2/inc/abi/pgtable-bits.h
arch/csky/abiv2/inc/abi/reg_ops.h
arch/csky/abiv2/inc/abi/regdef.h
arch/csky/abiv2/inc/abi/switch_context.h
arch/csky/abiv2/inc/abi/vdso.h
arch/csky/abiv2/sysdep.h
arch/csky/include/asm/addrspace.h
arch/csky/include/asm/atomic.h [deleted file]
arch/csky/include/asm/barrier.h
arch/csky/include/asm/bitops.h
arch/csky/include/asm/bug.h
arch/csky/include/asm/cacheflush.h
arch/csky/include/asm/checksum.h
arch/csky/include/asm/clocksource.h [new file with mode: 0644]
arch/csky/include/asm/cmpxchg.h
arch/csky/include/asm/elf.h
arch/csky/include/asm/fixmap.h
arch/csky/include/asm/ftrace.h
arch/csky/include/asm/futex.h [new file with mode: 0644]
arch/csky/include/asm/highmem.h
arch/csky/include/asm/io.h
arch/csky/include/asm/memory.h
arch/csky/include/asm/mmu.h
arch/csky/include/asm/mmu_context.h
arch/csky/include/asm/page.h
arch/csky/include/asm/perf_event.h
arch/csky/include/asm/pgalloc.h
arch/csky/include/asm/pgtable.h
arch/csky/include/asm/processor.h
arch/csky/include/asm/ptrace.h
arch/csky/include/asm/segment.h
arch/csky/include/asm/shmparam.h
arch/csky/include/asm/spinlock.h
arch/csky/include/asm/spinlock_types.h
arch/csky/include/asm/string.h
arch/csky/include/asm/switch_to.h
arch/csky/include/asm/syscalls.h
arch/csky/include/asm/thread_info.h
arch/csky/include/asm/tlb.h
arch/csky/include/asm/tlbflush.h
arch/csky/include/asm/traps.h
arch/csky/include/asm/uaccess.h
arch/csky/include/asm/unistd.h
arch/csky/include/asm/vdso.h
arch/csky/include/asm/vdso/clocksource.h [new file with mode: 0644]
arch/csky/include/asm/vdso/gettimeofday.h [new file with mode: 0644]
arch/csky/include/asm/vdso/processor.h [new file with mode: 0644]
arch/csky/include/asm/vdso/vsyscall.h [new file with mode: 0644]
arch/csky/include/uapi/asm/byteorder.h
arch/csky/include/uapi/asm/perf_regs.h
arch/csky/include/uapi/asm/ptrace.h
arch/csky/include/uapi/asm/sigcontext.h
arch/csky/include/uapi/asm/unistd.h
arch/csky/kernel/Makefile
arch/csky/kernel/atomic.S
arch/csky/kernel/entry.S
arch/csky/kernel/head.S
arch/csky/kernel/perf_event.c
arch/csky/kernel/probes/ftrace.c
arch/csky/kernel/probes/simulate-insn.c
arch/csky/kernel/process.c
arch/csky/kernel/ptrace.c
arch/csky/kernel/setup.c
arch/csky/kernel/signal.c
arch/csky/kernel/smp.c
arch/csky/kernel/traps.c
arch/csky/kernel/vdso.c
arch/csky/kernel/vdso/.gitignore [moved from arch/x86/platform/sfi/Makefile with 58% similarity]
arch/csky/kernel/vdso/Makefile [new file with mode: 0644]
arch/csky/kernel/vdso/note.S [new file with mode: 0644]
arch/csky/kernel/vdso/rt_sigreturn.S [new file with mode: 0644]
arch/csky/kernel/vdso/so2s.sh [new file with mode: 0755]
arch/csky/kernel/vdso/vdso.S [new file with mode: 0644]
arch/csky/kernel/vdso/vdso.lds.S [new file with mode: 0644]
arch/csky/kernel/vdso/vgettimeofday.c [new file with mode: 0644]
arch/csky/kernel/vmlinux.lds.S
arch/csky/mm/fault.c
arch/csky/mm/init.c
arch/csky/mm/tlb.c
arch/h8300/kernel/process.c
arch/hexagon/configs/comet_defconfig
arch/hexagon/kernel/process.c
arch/ia64/Makefile
arch/ia64/include/asm/irq.h
arch/ia64/include/asm/mca.h
arch/ia64/include/asm/pal.h
arch/ia64/include/asm/pgtable.h
arch/ia64/include/asm/sal.h
arch/ia64/include/asm/syscall.h
arch/ia64/kernel/Makefile
arch/ia64/kernel/asm-offsets.c
arch/ia64/kernel/crash.c
arch/ia64/kernel/efi.c
arch/ia64/kernel/err_inject.c
arch/ia64/kernel/mca.c
arch/ia64/kernel/mca_drv.c
arch/ia64/kernel/nr-irqs.c [deleted file]
arch/ia64/kernel/process.c
arch/ia64/kernel/ptrace.c
arch/ia64/kernel/signal.c
arch/ia64/kernel/syscalls/Makefile
arch/ia64/kernel/syscalls/syscall.tbl
arch/ia64/mm/init.c
arch/m68k/coldfire/clk.c
arch/m68k/include/asm/page_mm.h
arch/m68k/include/asm/page_no.h
arch/m68k/kernel/process.c
arch/m68k/kernel/syscalls/Makefile
arch/m68k/kernel/syscalls/syscall.tbl
arch/microblaze/Kconfig
arch/microblaze/kernel/module.c
arch/microblaze/kernel/process.c
arch/microblaze/kernel/syscalls/Makefile
arch/microblaze/kernel/syscalls/syscall.tbl
arch/microblaze/kernel/vmlinux.lds.S
arch/mips/bmips/setup.c
arch/mips/boot/compressed/decompress.c
arch/mips/configs/nlm_xlp_defconfig
arch/mips/configs/nlm_xlr_defconfig
arch/mips/crypto/Makefile
arch/mips/include/asm/traps.h
arch/mips/kernel/cpu-probe.c
arch/mips/kernel/cpu-r3k-probe.c
arch/mips/kernel/process.c
arch/mips/kernel/r4k-bugs64.c
arch/mips/kernel/setup.c
arch/mips/kernel/syscalls/Makefile
arch/mips/kernel/syscalls/syscall_n32.tbl
arch/mips/kernel/syscalls/syscall_n64.tbl
arch/mips/kernel/syscalls/syscall_o32.tbl
arch/mips/kernel/traps.c
arch/mips/kernel/vmlinux.lds.S
arch/mips/lib/iomap-pci.c
arch/mips/loongson64/Platform
arch/mips/mm/cache.c
arch/mips/mm/pgtable-32.c
arch/mips/mm/pgtable-64.c
arch/mips/sgi-ip32/ip32-irq.c
arch/mips/vdso/Kconfig
arch/nds32/configs/defconfig
arch/nds32/kernel/process.c
arch/nds32/kernel/setup.c
arch/nds32/kernel/time.c
arch/nds32/kernel/traps.c
arch/nios2/kernel/entry.S
arch/nios2/kernel/process.c
arch/nios2/kernel/setup.c
arch/nios2/kernel/sys_nios2.c
arch/openrisc/Kbuild [new file with mode: 0644]
arch/openrisc/Makefile
arch/openrisc/boot/.gitignore [new file with mode: 0644]
arch/openrisc/boot/Makefile [new file with mode: 0644]
arch/openrisc/kernel/process.c
arch/openrisc/kernel/smp.c
arch/parisc/Kconfig
arch/parisc/configs/generic-32bit_defconfig
arch/parisc/configs/generic-64bit_defconfig
arch/parisc/include/asm/cmpxchg.h
arch/parisc/include/asm/hardirq.h
arch/parisc/include/asm/processor.h
arch/parisc/kernel/irq.c
arch/parisc/kernel/process.c
arch/parisc/kernel/ptrace.c
arch/parisc/kernel/syscalls/Makefile
arch/parisc/kernel/syscalls/syscall.tbl
arch/parisc/math-emu/fpu.h
arch/powerpc/Kconfig
arch/powerpc/Makefile
arch/powerpc/configs/ppc6xx_defconfig
arch/powerpc/include/asm/code-patching.h
arch/powerpc/include/asm/cpu_has_feature.h
arch/powerpc/include/asm/dcr-native.h
arch/powerpc/include/asm/interrupt.h
arch/powerpc/include/asm/irq.h
arch/powerpc/include/asm/mmu.h
arch/powerpc/include/asm/ptrace.h
arch/powerpc/include/asm/switch_to.h
arch/powerpc/include/asm/vio.h
arch/powerpc/kernel/exceptions-64s.S
arch/powerpc/kernel/head_book3s_32.S
arch/powerpc/kernel/interrupt.c
arch/powerpc/kernel/irq.c
arch/powerpc/kernel/process.c
arch/powerpc/kernel/syscalls/Makefile
arch/powerpc/kernel/syscalls/syscall.tbl
arch/powerpc/kernel/traps.c
arch/powerpc/kernel/vdso32/gettimeofday.S
arch/powerpc/kvm/book3s_64_mmu_hv.c
arch/powerpc/kvm/book3s_64_mmu_radix.c
arch/powerpc/lib/Makefile
arch/powerpc/lib/sstep.c
arch/powerpc/perf/core-book3s.c
arch/powerpc/platforms/cell/spufs/inode.c
arch/powerpc/platforms/powernv/pci-cxl.c
arch/powerpc/platforms/pseries/lpar.c
arch/powerpc/platforms/pseries/mobility.c
arch/powerpc/platforms/pseries/msi.c
arch/powerpc/platforms/pseries/vio.c
arch/riscv/Kconfig
arch/riscv/Kconfig.socs
arch/riscv/Makefile
arch/riscv/boot/dts/Makefile
arch/riscv/boot/dts/canaan/Makefile [new file with mode: 0644]
arch/riscv/boot/dts/canaan/canaan_kd233.dts [new file with mode: 0644]
arch/riscv/boot/dts/canaan/k210.dtsi [new file with mode: 0644]
arch/riscv/boot/dts/canaan/k210_generic.dts [new file with mode: 0644]
arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts [new file with mode: 0644]
arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts [new file with mode: 0644]
arch/riscv/boot/dts/canaan/sipeed_maix_go.dts [new file with mode: 0644]
arch/riscv/boot/dts/canaan/sipeed_maixduino.dts [new file with mode: 0644]
arch/riscv/boot/dts/kendryte/Makefile [deleted file]
arch/riscv/boot/dts/kendryte/k210.dts [deleted file]
arch/riscv/boot/dts/kendryte/k210.dtsi [deleted file]
arch/riscv/boot/dts/sifive/Makefile
arch/riscv/boot/dts/sifive/fu740-c000.dtsi [new file with mode: 0644]
arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts [new file with mode: 0644]
arch/riscv/configs/defconfig
arch/riscv/configs/nommu_k210_defconfig
arch/riscv/configs/nommu_k210_sdcard_defconfig [new file with mode: 0644]
arch/riscv/configs/rv32_defconfig
arch/riscv/include/asm/asm-prototypes.h
arch/riscv/include/asm/bug.h
arch/riscv/include/asm/csr.h
arch/riscv/include/asm/irq.h
arch/riscv/include/asm/kasan.h
arch/riscv/include/asm/kprobes.h
arch/riscv/include/asm/mmu.h
arch/riscv/include/asm/mmu_context.h
arch/riscv/include/asm/mmzone.h [new file with mode: 0644]
arch/riscv/include/asm/numa.h [new file with mode: 0644]
arch/riscv/include/asm/page.h
arch/riscv/include/asm/pci.h
arch/riscv/include/asm/pgtable.h
arch/riscv/include/asm/probes.h [new file with mode: 0644]
arch/riscv/include/asm/processor.h
arch/riscv/include/asm/ptrace.h
arch/riscv/include/asm/sbi.h
arch/riscv/include/asm/set_memory.h
arch/riscv/include/asm/soc.h
arch/riscv/include/asm/stackprotector.h
arch/riscv/include/asm/stacktrace.h
arch/riscv/include/asm/thread_info.h
arch/riscv/include/asm/timex.h
arch/riscv/include/asm/uaccess.h
arch/riscv/include/asm/uprobes.h [new file with mode: 0644]
arch/riscv/kernel/Makefile
arch/riscv/kernel/asm-offsets.c
arch/riscv/kernel/entry.S
arch/riscv/kernel/ftrace.c
arch/riscv/kernel/head.S
arch/riscv/kernel/image-vars.h
arch/riscv/kernel/mcount-dyn.S
arch/riscv/kernel/patch.c
arch/riscv/kernel/probes/Makefile [new file with mode: 0644]
arch/riscv/kernel/probes/decode-insn.c [new file with mode: 0644]
arch/riscv/kernel/probes/decode-insn.h [new file with mode: 0644]
arch/riscv/kernel/probes/ftrace.c [new file with mode: 0644]
arch/riscv/kernel/probes/kprobes.c [new file with mode: 0644]
arch/riscv/kernel/probes/kprobes_trampoline.S [new file with mode: 0644]
arch/riscv/kernel/probes/simulate-insn.c [new file with mode: 0644]
arch/riscv/kernel/probes/simulate-insn.h [new file with mode: 0644]
arch/riscv/kernel/probes/uprobes.c [new file with mode: 0644]
arch/riscv/kernel/process.c
arch/riscv/kernel/ptrace.c
arch/riscv/kernel/sbi.c
arch/riscv/kernel/setup.c
arch/riscv/kernel/signal.c
arch/riscv/kernel/smpboot.c
arch/riscv/kernel/soc.c
arch/riscv/kernel/stacktrace.c
arch/riscv/kernel/time.c
arch/riscv/kernel/traps.c
arch/riscv/kernel/vdso/Makefile
arch/riscv/lib/Makefile
arch/riscv/lib/error-inject.c [new file with mode: 0644]
arch/riscv/mm/Makefile
arch/riscv/mm/context.c
arch/riscv/mm/fault.c
arch/riscv/mm/init.c
arch/riscv/mm/kasan_init.c
arch/s390/Kconfig
arch/s390/configs/debug_defconfig
arch/s390/configs/defconfig
arch/s390/configs/zfcpdump_defconfig
arch/s390/include/asm/facility.h
arch/s390/include/asm/hardirq.h
arch/s390/include/asm/idle.h
arch/s390/include/asm/irq_work.h [new file with mode: 0644]
arch/s390/include/asm/pci.h
arch/s390/include/asm/pgalloc.h
arch/s390/include/asm/pgtable.h
arch/s390/include/asm/stacktrace.h
arch/s390/include/asm/timex.h
arch/s390/include/asm/vdso/data.h
arch/s390/include/uapi/asm/hwctrset.h [new file with mode: 0644]
arch/s390/kernel/cpcmd.c
arch/s390/kernel/dumpstack.c
arch/s390/kernel/idle.c
arch/s390/kernel/irq.c
arch/s390/kernel/perf_cpum_cf.c
arch/s390/kernel/perf_cpum_cf_diag.c
arch/s390/kernel/process.c
arch/s390/kernel/setup.c
arch/s390/kernel/smp.c
arch/s390/kernel/syscalls/syscall.tbl
arch/s390/kernel/time.c
arch/s390/kernel/topology.c
arch/s390/kernel/vtime.c
arch/s390/kvm/interrupt.c
arch/s390/mm/init.c
arch/s390/mm/pgalloc.c
arch/s390/mm/vmem.c
arch/s390/pci/pci.c
arch/s390/pci/pci_event.c
arch/s390/tools/opcodes.txt
arch/sh/Kconfig
arch/sh/boards/mach-landisk/gio.c
arch/sh/configs/edosk7760_defconfig
arch/sh/configs/sdk7780_defconfig
arch/sh/include/asm/irq.h
arch/sh/kernel/irq.c
arch/sh/kernel/process_32.c
arch/sh/kernel/syscalls/Makefile
arch/sh/kernel/syscalls/syscall.tbl
arch/sparc/Kconfig
arch/sparc/boot/piggyback.c
arch/sparc/configs/sparc64_defconfig
arch/sparc/include/asm/backoff.h
arch/sparc/include/asm/cmpxchg_32.h
arch/sparc/include/asm/elf_64.h
arch/sparc/include/asm/extable.h [moved from arch/sparc/include/asm/extable_64.h with 92% similarity]
arch/sparc/include/asm/irq_64.h
arch/sparc/include/asm/mman.h
arch/sparc/include/asm/pgtsrmmu.h
arch/sparc/include/asm/processor_32.h
arch/sparc/include/asm/signal.h
arch/sparc/include/asm/thread_info_64.h
arch/sparc/include/asm/uaccess.h
arch/sparc/include/asm/uaccess_32.h
arch/sparc/include/asm/uaccess_64.h
arch/sparc/kernel/entry.S
arch/sparc/kernel/head_32.S
arch/sparc/kernel/head_64.S
arch/sparc/kernel/irq_64.c
arch/sparc/kernel/led.c
arch/sparc/kernel/pci.c
arch/sparc/kernel/process_32.c
arch/sparc/kernel/process_64.c
arch/sparc/kernel/rtrap_32.S
arch/sparc/kernel/setup_32.c
arch/sparc/kernel/setup_64.c
arch/sparc/kernel/signal_32.c
arch/sparc/kernel/syscalls/Makefile
arch/sparc/kernel/syscalls/syscall.tbl
arch/sparc/kernel/traps_64.c
arch/sparc/kernel/unaligned_32.c
arch/sparc/kernel/viohs.c
arch/sparc/lib/checksum_32.S
arch/sparc/lib/copy_user.S
arch/sparc/lib/memset.S
arch/sparc/mm/Makefile
arch/sparc/mm/extable.c [deleted file]
arch/sparc/mm/fault_32.c
arch/sparc/mm/init_32.c
arch/sparc/mm/mm_32.h
arch/sparc/mm/srmmu.c
arch/um/include/asm/Kbuild
arch/um/kernel/process.c
arch/x86/Kconfig
arch/x86/Makefile
arch/x86/configs/i386_defconfig
arch/x86/configs/x86_64_defconfig
arch/x86/entry/common.c
arch/x86/entry/entry_64.S
arch/x86/entry/entry_64_compat.S
arch/x86/entry/syscalls/Makefile
arch/x86/entry/syscalls/syscall_32.tbl
arch/x86/entry/syscalls/syscall_64.tbl
arch/x86/entry/vdso/Makefile
arch/x86/events/core.c
arch/x86/events/intel/core.c
arch/x86/events/intel/ds.c
arch/x86/include/asm/acrn.h [new file with mode: 0644]
arch/x86/include/asm/apb_timer.h [deleted file]
arch/x86/include/asm/fixmap.h
arch/x86/include/asm/idtentry.h
arch/x86/include/asm/insn-eval.h
arch/x86/include/asm/insn.h
arch/x86/include/asm/intel-mid.h
arch/x86/include/asm/intel_mid_vrtc.h [deleted file]
arch/x86/include/asm/intel_scu_ipc.h
arch/x86/include/asm/intel_scu_ipc_legacy.h [deleted file]
arch/x86/include/asm/irq.h
arch/x86/include/asm/irq_stack.h
arch/x86/include/asm/kfence.h [new file with mode: 0644]
arch/x86/include/asm/kvm-x86-ops.h
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/orc_types.h
arch/x86/include/asm/platform_sst_audio.h
arch/x86/include/asm/processor.h
arch/x86/include/asm/proto.h
arch/x86/include/asm/ptrace.h
arch/x86/include/asm/smap.h
arch/x86/include/asm/smp.h
arch/x86/include/asm/softirq_stack.h [new file with mode: 0644]
arch/x86/include/asm/thread_info.h
arch/x86/include/asm/unwind_hints.h
arch/x86/kernel/Makefile
arch/x86/kernel/acpi/Makefile
arch/x86/kernel/acpi/boot.c
arch/x86/kernel/acpi/wakeup_64.S
arch/x86/kernel/apb_timer.c [deleted file]
arch/x86/kernel/apic/apic.c
arch/x86/kernel/apic/io_apic.c
arch/x86/kernel/cpu/acrn.c
arch/x86/kernel/cpu/common.c
arch/x86/kernel/dumpstack_64.c
arch/x86/kernel/ftrace_64.S
arch/x86/kernel/irq.c
arch/x86/kernel/irq_32.c
arch/x86/kernel/irq_64.c
arch/x86/kernel/kprobes/ftrace.c
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c
arch/x86/kernel/process.c
arch/x86/kernel/process_64.c
arch/x86/kernel/setup.c
arch/x86/kernel/sev-es.c
arch/x86/kernel/signal.c
arch/x86/kernel/smpboot.c
arch/x86/kernel/traps.c
arch/x86/kernel/unwind_orc.c
arch/x86/kvm/Kconfig
arch/x86/kvm/Makefile
arch/x86/kvm/cpuid.c
arch/x86/kvm/hyperv.c
arch/x86/kvm/hyperv.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/mmu_internal.h
arch/x86/kvm/mmu/paging_tmpl.h
arch/x86/kvm/mmu/tdp_iter.c
arch/x86/kvm/mmu/tdp_iter.h
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/mmu/tdp_mmu.h
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/pmu.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/pmu_intel.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
arch/x86/kvm/x86.h
arch/x86/kvm/xen.c
arch/x86/kvm/xen.h
arch/x86/lib/insn-eval.c
arch/x86/lib/insn.c
arch/x86/lib/retpoline.S
arch/x86/mm/fault.c
arch/x86/mm/mem_encrypt.c
arch/x86/mm/pat/memtype.c
arch/x86/net/bpf_jit_comp.c
arch/x86/pci/intel_mid_pci.c
arch/x86/pci/mmconfig-shared.c
arch/x86/platform/Makefile
arch/x86/platform/intel-mid/Makefile
arch/x86/platform/intel-mid/device_libs/Makefile [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_bma023.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_bt.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_emc1403.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_lis331.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_max7315.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic.h [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c [deleted file]
arch/x86/platform/intel-mid/device_libs/platform_tca6416.c [deleted file]
arch/x86/platform/intel-mid/intel-mid.c
arch/x86/platform/intel-mid/intel_mid_vrtc.c [deleted file]
arch/x86/platform/intel-mid/sfi.c [deleted file]
arch/x86/platform/iris/iris.c
arch/x86/platform/pvh/head.S
arch/x86/platform/sfi/sfi.c [deleted file]
arch/x86/power/Makefile
arch/x86/power/hibernate_asm_64.S
arch/x86/tools/Makefile
arch/x86/tools/insn_sanity.c
arch/x86/tools/relocs.c
arch/x86/xen/Makefile
arch/x86/xen/p2m.c
arch/x86/xen/setup.c
arch/x86/xen/xen-asm.S
arch/x86/xen/xen-head.S
arch/xtensa/kernel/coprocessor.S
arch/xtensa/kernel/process.c
arch/xtensa/kernel/syscalls/Makefile
arch/xtensa/kernel/syscalls/syscall.tbl
arch/xtensa/mm/fault.c
block/bfq-iosched.c
block/bio.c
block/blk-cgroup-rwstat.c
block/blk-core.c
block/blk-crypto-fallback.c
block/blk-lib.c
block/blk-map.c
block/blk-merge.c
block/blk-mq-debugfs.c
block/blk-mq-sched.c
block/blk-mq-sched.h
block/blk-pm.h
block/blk-settings.c
block/blk-sysfs.c
block/blk-zoned.c
block/bounce.c
block/genhd.c
block/ioctl.c
block/kyber-iosched.c
block/mq-deadline.c
block/partitions/core.c
certs/blacklist.c
certs/system_keyring.c
crypto/Kconfig
crypto/asymmetric_keys/asymmetric_type.c
crypto/asymmetric_keys/pkcs7_parser.h
crypto/asymmetric_keys/pkcs7_trust.c
crypto/asymmetric_keys/pkcs7_verify.c
drivers/Kconfig
drivers/Makefile
drivers/accessibility/speakup/serialio.c
drivers/accessibility/speakup/speakup_acntpc.c
drivers/accessibility/speakup/speakup_apollo.c
drivers/accessibility/speakup/speakup_audptr.c
drivers/accessibility/speakup/speakup_decext.c
drivers/accessibility/speakup/speakup_decpc.c
drivers/accessibility/speakup/speakup_dectlk.c
drivers/accessibility/speakup/speakup_dtlk.c
drivers/accessibility/speakup/speakup_keypc.c
drivers/accessibility/speakup/speakup_ltlk.c
drivers/accessibility/speakup/speakup_soft.c
drivers/accessibility/speakup/speakup_spkout.c
drivers/accessibility/speakup/spk_priv.h
drivers/accessibility/speakup/spk_ttyio.c
drivers/accessibility/speakup/spk_types.h
drivers/accessibility/speakup/synth.c
drivers/accessibility/speakup/varhandlers.c
drivers/acpi/Kconfig
drivers/acpi/Makefile
drivers/acpi/acpi_fpdt.c [new file with mode: 0644]
drivers/acpi/acpica/acobject.h
drivers/acpi/acpica/evhandler.c
drivers/acpi/acpica/evregion.c
drivers/acpi/acpica/evxfregn.c
drivers/acpi/acpica/nsaccess.c
drivers/acpi/internal.h
drivers/acpi/pci_root.c
drivers/acpi/platform_profile.c
drivers/acpi/processor_idle.c
drivers/acpi/scan.c
drivers/acpi/tables.c
drivers/acpi/video_detect.c
drivers/android/binderfs.c
drivers/atm/eni.c
drivers/atm/fore200e.c
drivers/atm/idt77105.c
drivers/atm/lanai.c
drivers/atm/uPD98402.c
drivers/auxdisplay/cfag12864b.c
drivers/auxdisplay/cfag12864bfb.c
drivers/auxdisplay/charlcd.c
drivers/auxdisplay/ks0108.c
drivers/base/Kconfig
drivers/base/Makefile
drivers/base/arch_numa.c [moved from arch/arm64/mm/numa.c with 93% similarity]
drivers/base/auxiliary.c
drivers/base/base.h
drivers/base/bus.c
drivers/base/core.c
drivers/base/dd.c
drivers/base/devtmpfs.c
drivers/base/init.c
drivers/base/memory.c
drivers/base/node.c
drivers/base/platform.c
drivers/base/power/domain.c
drivers/base/power/runtime.c
drivers/base/regmap/regmap-sdw-mbq.c
drivers/base/regmap/regmap-sdw.c
drivers/base/swnode.c
drivers/base/test/Makefile
drivers/block/Kconfig
drivers/block/Makefile
drivers/block/drbd/drbd_int.h
drivers/block/floppy.c
drivers/block/loop.c
drivers/block/n64cart.c [new file with mode: 0644]
drivers/block/nbd.c
drivers/block/null_blk/main.c
drivers/block/null_blk/null_blk.h
drivers/block/rsxx/core.c
drivers/block/rsxx/rsxx_priv.h
drivers/block/umem.c
drivers/block/virtio_blk.c
drivers/block/xen-blkback/blkback.c
drivers/block/zram/zram_drv.c
drivers/bluetooth/btrsi.c
drivers/bluetooth/btusb.c
drivers/bus/fsl-mc/Kconfig
drivers/bus/fsl-mc/Makefile
drivers/bus/fsl-mc/dprc-driver.c
drivers/bus/fsl-mc/fsl-mc-bus.c
drivers/bus/fsl-mc/fsl-mc-private.h
drivers/bus/fsl-mc/fsl-mc-uapi.c [new file with mode: 0644]
drivers/bus/fsl-mc/mc-sys.c
drivers/bus/mhi/core/init.c
drivers/bus/mhi/core/main.c
drivers/bus/mhi/pci_generic.c
drivers/bus/mvebu-mbus.c
drivers/bus/omap_l3_noc.c
drivers/bus/ti-sysc.c
drivers/char/agp/Kconfig
drivers/char/applicom.c
drivers/char/hw_random/pseries-rng.c
drivers/char/random.c
drivers/char/toshiba.c
drivers/char/tpm/tpm-chip.c
drivers/char/tpm/tpm_ibmvtpm.c
drivers/char/tpm/tpm_tis_core.c
drivers/clk/Kconfig
drivers/clk/Makefile
drivers/clk/clk-k210.c [new file with mode: 0644]
drivers/clk/clk.c
drivers/clk/qcom/clk-rcg2.c
drivers/clk/qcom/clk-rpmh.c
drivers/clk/qcom/gcc-sc7180.c
drivers/counter/stm32-timer-cnt.c
drivers/cpufreq/Kconfig.x86
drivers/cpufreq/Makefile
drivers/cpufreq/acpi-cpufreq.c
drivers/cpufreq/cpufreq-dt-platdev.c
drivers/cpufreq/cpufreq.c
drivers/cpufreq/freq_table.c
drivers/cpufreq/qcom-cpufreq-hw.c
drivers/cpufreq/sfi-cpufreq.c [deleted file]
drivers/crypto/nx/nx-842-pseries.c
drivers/crypto/nx/nx.c
drivers/cxl/Kconfig [new file with mode: 0644]
drivers/cxl/Makefile [new file with mode: 0644]
drivers/cxl/bus.c [new file with mode: 0644]
drivers/cxl/cxl.h [new file with mode: 0644]
drivers/cxl/mem.c [new file with mode: 0644]
drivers/cxl/pci.h [new file with mode: 0644]
drivers/dax/bus.c
drivers/dax/bus.h
drivers/dax/device.c
drivers/dax/kmem.c
drivers/dax/pmem/compat.c
drivers/dax/super.c
drivers/dma-buf/dma-fence.c
drivers/dma-buf/dma-heap.c
drivers/dma-buf/heaps/cma_heap.c
drivers/dma-buf/heaps/system_heap.c
drivers/dma/Kconfig
drivers/dma/Makefile
drivers/dma/at_hdmac.c
drivers/dma/at_hdmac_regs.h
drivers/dma/coh901318.c [deleted file]
drivers/dma/coh901318.h [deleted file]
drivers/dma/coh901318_lli.c [deleted file]
drivers/dma/dma-jz4780.c
drivers/dma/dw-axi-dmac/dw-axi-dmac-platform.c
drivers/dma/dw-axi-dmac/dw-axi-dmac.h
drivers/dma/fsldma.c
drivers/dma/hsu/pci.c
drivers/dma/idxd/dma.c
drivers/dma/idxd/init.c
drivers/dma/imx-sdma.c
drivers/dma/lgm/Kconfig [new file with mode: 0644]
drivers/dma/lgm/Makefile [new file with mode: 0644]
drivers/dma/lgm/lgm-dma.c [new file with mode: 0644]
drivers/dma/mmp_pdma.c
drivers/dma/owl-dma.c
drivers/dma/qcom/bam_dma.c
drivers/dma/qcom/gpi.c
drivers/dma/sh/rcar-dmac.c
drivers/dma/sirf-dma.c [deleted file]
drivers/dma/ste_dma40.c
drivers/dma/ti/k3-udma.c
drivers/dma/xilinx/xilinx_dma.c
drivers/dma/zx_dma.c [deleted file]
drivers/extcon/extcon.c
drivers/firewire/core-device.c
drivers/firewire/nosy.c
drivers/firmware/efi/efi.c
drivers/firmware/efi/libstub/Makefile
drivers/firmware/efi/libstub/arm64-stub.c
drivers/firmware/efi/libstub/efi-stub.c
drivers/firmware/efi/vars.c
drivers/firmware/google/coreboot_table.c
drivers/firmware/google/coreboot_table.h
drivers/firmware/google/framebuffer-coreboot.c
drivers/firmware/google/memconsole-coreboot.c
drivers/firmware/google/vpd.c
drivers/fpga/Kconfig
drivers/fpga/Makefile
drivers/fpga/dfl-fme-perf.c
drivers/fpga/dfl-n3000-nios.c [new file with mode: 0644]
drivers/fpga/dfl-pci.c
drivers/fpga/dfl.c
drivers/fpga/dfl.h
drivers/fpga/fpga-bridge.c
drivers/gpio/gpio-pca953x.c
drivers/gpio/gpiolib-acpi.c
drivers/gpio/gpiolib-of.c
drivers/gpio/gpiolib-of.h
drivers/gpio/gpiolib.c
drivers/gpu/drm/Kconfig
drivers/gpu/drm/amd/amdgpu/amdgpu.h
drivers/gpu/drm/amd/amdgpu/amdgpu_acpi.c
drivers/gpu/drm/amd/amdgpu/amdgpu_debugfs.c
drivers/gpu/drm/amd/amdgpu/amdgpu_device.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.c
drivers/gpu/drm/amd/amdgpu/amdgpu_display.h
drivers/gpu/drm/amd/amdgpu/amdgpu_drv.c
drivers/gpu/drm/amd/amdgpu/amdgpu_fb.c
drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c
drivers/gpu/drm/amd/amdgpu/amdgpu_object.c
drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c
drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c
drivers/gpu/drm/amd/amdgpu/dce_v10_0.c
drivers/gpu/drm/amd/amdgpu/dce_v11_0.c
drivers/gpu/drm/amd/amdgpu/dce_v6_0.c
drivers/gpu/drm/amd/amdgpu/dce_v8_0.c
drivers/gpu/drm/amd/amdgpu/dce_virtual.c
drivers/gpu/drm/amd/amdgpu/nv.c
drivers/gpu/drm/amd/amdkfd/kfd_dbgdev.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c
drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_vi.c
drivers/gpu/drm/amd/amdkfd/kfd_priv.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.h
drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm_helpers.c
drivers/gpu/drm/amd/display/dc/core/dc.c
drivers/gpu/drm/amd/display/dc/core/dc_link.c
drivers/gpu/drm/amd/display/dc/dc.h
drivers/gpu/drm/amd/display/dc/dc_stream.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hubp.h
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_hw_sequencer.c
drivers/gpu/drm/amd/display/dc/dcn10/dcn10_link_encoder.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hubp.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn20/dcn20_link_encoder.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_hubp.c
drivers/gpu/drm/amd/display/dc/dcn21/dcn21_resource.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_cm_common.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hubp.h
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_hwseq.c
drivers/gpu/drm/amd/display/dc/dcn30/dcn30_resource.c
drivers/gpu/drm/amd/display/dc/dcn301/dcn301_resource.c
drivers/gpu/drm/amd/display/dc/inc/hw/hubp.h
drivers/gpu/drm/amd/display/dc/irq/dcn21/irq_service_dcn21.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/smu7_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega10_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega12_hwmgr.c
drivers/gpu/drm/amd/pm/powerplay/hwmgr/vega20_hwmgr.c
drivers/gpu/drm/amd/pm/swsmu/amdgpu_smu.c
drivers/gpu/drm/amd/pm/swsmu/smu11/arcturus_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu11/smu_v11_0.c
drivers/gpu/drm/amd/pm/swsmu/smu11/vangogh_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu12/renoir_ppt.c
drivers/gpu/drm/amd/pm/swsmu/smu_cmn.c
drivers/gpu/drm/drm_crtc_helper_internal.h
drivers/gpu/drm/drm_fb_helper.c
drivers/gpu/drm/drm_file.c
drivers/gpu/drm/drm_gem_shmem_helper.c
drivers/gpu/drm/drm_ioc32.c
drivers/gpu/drm/drm_kms_helper_common.c
drivers/gpu/drm/drm_vblank.c
drivers/gpu/drm/etnaviv/etnaviv_gem.c
drivers/gpu/drm/exynos/exynos5433_drm_decon.c
drivers/gpu/drm/i915/Makefile
drivers/gpu/drm/i915/display/intel_acpi.c
drivers/gpu/drm/i915/display/intel_atomic_plane.c
drivers/gpu/drm/i915/display/intel_crtc.c
drivers/gpu/drm/i915/display/intel_display.c
drivers/gpu/drm/i915/display/intel_display_types.h
drivers/gpu/drm/i915/display/intel_dp.c
drivers/gpu/drm/i915/display/intel_dp_aux.c
drivers/gpu/drm/i915/display/intel_dp_link_training.c
drivers/gpu/drm/i915/display/intel_dp_link_training.h
drivers/gpu/drm/i915/display/intel_vdsc.c
drivers/gpu/drm/i915/gt/intel_engine_cs.c
drivers/gpu/drm/i915/gt/intel_ggtt_fencing.c
drivers/gpu/drm/i915/gvt/cmd_parser.c
drivers/gpu/drm/i915/gvt/execlist.c
drivers/gpu/drm/i915/gvt/scheduler.c
drivers/gpu/drm/i915/i915_cmd_parser.c
drivers/gpu/drm/i915/i915_drv.h
drivers/gpu/drm/i915/i915_perf.c
drivers/gpu/drm/i915/i915_reg.h
drivers/gpu/drm/i915/intel_pm.c
drivers/gpu/drm/i915/intel_runtime_pm.c
drivers/gpu/drm/i915/intel_runtime_pm.h
drivers/gpu/drm/imx/imx-drm-core.c
drivers/gpu/drm/imx/imx-ldb.c
drivers/gpu/drm/meson/meson_drv.c
drivers/gpu/drm/msm/adreno/a5xx.xml.h
drivers/gpu/drm/msm/adreno/a5xx_gpu.c
drivers/gpu/drm/msm/adreno/a5xx_power.c
drivers/gpu/drm/msm/adreno/a6xx_gmu.c
drivers/gpu/drm/msm/adreno/a6xx_gmu.h
drivers/gpu/drm/msm/adreno/a6xx_gpu.c
drivers/gpu/drm/msm/adreno/a6xx_gpu.h
drivers/gpu/drm/msm/adreno/adreno_device.c
drivers/gpu/drm/msm/adreno/adreno_gpu.c
drivers/gpu/drm/msm/adreno/adreno_gpu.h
drivers/gpu/drm/msm/disp/dpu1/dpu_encoder_phys_cmd.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_catalog.h
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_ctl.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_pingpong.h
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_sspp.h
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.c
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_util.h
drivers/gpu/drm/msm/disp/dpu1/dpu_hw_vbif.c
drivers/gpu/drm/msm/disp/dpu1/dpu_kms.c
drivers/gpu/drm/msm/disp/dpu1/dpu_plane.c
drivers/gpu/drm/msm/disp/mdp5/mdp5_cfg.c
drivers/gpu/drm/msm/disp/mdp5/mdp5_crtc.c
drivers/gpu/drm/msm/dp/dp_aux.c
drivers/gpu/drm/msm/dp/dp_catalog.c
drivers/gpu/drm/msm/dp/dp_ctrl.c
drivers/gpu/drm/msm/dp/dp_ctrl.h
drivers/gpu/drm/msm/dp/dp_display.c
drivers/gpu/drm/msm/dp/dp_panel.c
drivers/gpu/drm/msm/dsi/phy/dsi_phy_20nm.c
drivers/gpu/drm/msm/dsi/pll/dsi_pll.c
drivers/gpu/drm/msm/dsi/pll/dsi_pll.h
drivers/gpu/drm/msm/dsi/pll/dsi_pll_10nm.c
drivers/gpu/drm/msm/dsi/pll/dsi_pll_7nm.c
drivers/gpu/drm/msm/msm_atomic.c
drivers/gpu/drm/msm/msm_drv.c
drivers/gpu/drm/msm/msm_fence.c
drivers/gpu/drm/msm/msm_gem.c
drivers/gpu/drm/msm/msm_gem_submit.c
drivers/gpu/drm/nouveau/dispnv50/disp.c
drivers/gpu/drm/nouveau/nouveau_bo.c
drivers/gpu/drm/nouveau/nvkm/engine/fifo/gk104.c
drivers/gpu/drm/omapdrm/dss/dsi.c
drivers/gpu/drm/panel/panel-dsi-cm.c
drivers/gpu/drm/panel/panel-elida-kd35t133.c
drivers/gpu/drm/qxl/qxl_display.c
drivers/gpu/drm/qxl/qxl_drv.c
drivers/gpu/drm/qxl/qxl_release.c
drivers/gpu/drm/radeon/radeon.h
drivers/gpu/drm/radeon/radeon_gem.c
drivers/gpu/drm/radeon/radeon_prime.c
drivers/gpu/drm/radeon/radeon_ttm.c
drivers/gpu/drm/rcar-du/rcar_du_encoder.c
drivers/gpu/drm/rockchip/rockchip_drm_vop.h
drivers/gpu/drm/tegra/dc.c
drivers/gpu/drm/tegra/sor.c
drivers/gpu/drm/tilcdc/Makefile
drivers/gpu/drm/tiny/gm12u320.c
drivers/gpu/drm/ttm/ttm_bo.c
drivers/gpu/drm/ttm/ttm_pool.c
drivers/gpu/drm/udl/udl_drv.c
drivers/gpu/drm/udl/udl_drv.h
drivers/gpu/drm/udl/udl_main.c
drivers/gpu/drm/vc4/vc4_crtc.c
drivers/gpu/drm/vc4/vc4_plane.c
drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
drivers/gpu/drm/xen/xen_drm_front.c
drivers/gpu/drm/xen/xen_drm_front_conn.h
drivers/gpu/host1x/bus.c
drivers/greybus/es2.c
drivers/greybus/greybus_trace.h
drivers/hid/Kconfig
drivers/hid/Makefile
drivers/hid/hid-chicony.c
drivers/hid/hid-core.c
drivers/hid/hid-google-hammer.c
drivers/hid/hid-ids.h
drivers/hid/hid-input.c
drivers/hid/hid-ite.c
drivers/hid/hid-lg-g15.c
drivers/hid/hid-logitech-dj.c
drivers/hid/hid-logitech-hidpp.c
drivers/hid/hid-multitouch.c
drivers/hid/hid-playstation.c [new file with mode: 0644]
drivers/hid/hid-quirks.c
drivers/hid/hid-roccat-arvo.c
drivers/hid/hid-sony.c
drivers/hid/hid-uclogic-core.c
drivers/hid/hid-uclogic-params.c
drivers/hid/i2c-hid/Kconfig
drivers/hid/i2c-hid/Makefile
drivers/hid/i2c-hid/i2c-hid-acpi.c [new file with mode: 0644]
drivers/hid/i2c-hid/i2c-hid-core.c
drivers/hid/i2c-hid/i2c-hid-of-goodix.c [new file with mode: 0644]
drivers/hid/i2c-hid/i2c-hid-of.c [new file with mode: 0644]
drivers/hid/i2c-hid/i2c-hid.h
drivers/hid/intel-ish-hid/ipc/hw-ish.h
drivers/hid/intel-ish-hid/ipc/ipc.c
drivers/hid/intel-ish-hid/ipc/pci-ish.c
drivers/hid/wacom_sys.c
drivers/hid/wacom_wac.c
drivers/hv/hv_balloon.c
drivers/hwspinlock/omap_hwspinlock.c
drivers/hwtracing/coresight/coresight-catu.c
drivers/hwtracing/coresight/coresight-core.c
drivers/hwtracing/coresight/coresight-cti-core.c
drivers/hwtracing/coresight/coresight-cti-platform.c
drivers/hwtracing/coresight/coresight-etb10.c
drivers/hwtracing/coresight/coresight-etm-perf.c
drivers/hwtracing/coresight/coresight-etm3x-core.c
drivers/hwtracing/coresight/coresight-etm4x-core.c
drivers/hwtracing/coresight/coresight-etm4x-sysfs.c
drivers/hwtracing/coresight/coresight-etm4x.h
drivers/hwtracing/coresight/coresight-funnel.c
drivers/hwtracing/coresight/coresight-replicator.c
drivers/hwtracing/coresight/coresight-stm.c
drivers/hwtracing/coresight/coresight-tmc-core.c
drivers/hwtracing/coresight/coresight-tmc-etf.c
drivers/hwtracing/coresight/coresight-tmc-etr.c
drivers/hwtracing/coresight/coresight-tpiu.c
drivers/i2c/busses/i2c-brcmstb.c
drivers/i2c/busses/i2c-designware-core.h
drivers/i2c/busses/i2c-designware-master.c
drivers/i2c/busses/i2c-exynos5.c
drivers/i2c/busses/i2c-qcom-geni.c
drivers/ide/falconide.c
drivers/iio/adc/Kconfig
drivers/iio/adc/ab8500-gpadc.c
drivers/iio/adc/ad7949.c
drivers/iio/adc/qcom-spmi-vadc.c
drivers/iio/gyro/mpu3050-core.c
drivers/iio/humidity/hid-sensor-humidity.c
drivers/iio/imu/adis16400.c
drivers/iio/light/hid-sensor-prox.c
drivers/iio/temperature/hid-sensor-temperature.c
drivers/infiniband/core/addr.c
drivers/infiniband/core/cm.c
drivers/infiniband/core/uverbs_ioctl.c
drivers/infiniband/hw/cxgb4/cm.c
drivers/infiniband/hw/hfi1/affinity.c
drivers/infiniband/hw/hfi1/hfi.h
drivers/infiniband/hw/hfi1/init.c
drivers/infiniband/hw/hfi1/netdev_rx.c
drivers/infiniband/hw/hns/hns_roce_hw_v2.c
drivers/infiniband/hw/mlx5/devx.c
drivers/infiniband/hw/mlx5/odp.c
drivers/infiniband/hw/mlx5/qp.c
drivers/infiniband/hw/qedr/verbs.c
drivers/infiniband/sw/rxe/Kconfig
drivers/infiniband/sw/rxe/rxe_comp.c
drivers/infiniband/sw/rxe/rxe_net.c
drivers/infiniband/sw/rxe/rxe_recv.c
drivers/infiniband/ulp/rtrs/rtrs-clt.c
drivers/input/joydev.c
drivers/input/joystick/Kconfig
drivers/input/joystick/Makefile
drivers/input/joystick/n64joy.c [new file with mode: 0644]
drivers/input/joystick/xpad.c
drivers/input/keyboard/Kconfig
drivers/input/keyboard/applespi.c
drivers/input/keyboard/cros_ec_keyb.c
drivers/input/keyboard/omap4-keypad.c
drivers/input/misc/da7280.c
drivers/input/mouse/alps.c
drivers/input/mouse/synaptics.c
drivers/input/serio/Kconfig
drivers/input/serio/i8042-x86ia64io.h
drivers/input/tablet/aiptek.c
drivers/input/touchscreen/Kconfig
drivers/input/touchscreen/ads7846.c
drivers/input/touchscreen/elants_i2c.c
drivers/input/touchscreen/elo.c
drivers/input/touchscreen/iqs5xx.c
drivers/input/touchscreen/melfas_mip4.c
drivers/input/touchscreen/raydium_i2c_ts.c
drivers/input/touchscreen/st1232.c
drivers/input/touchscreen/stmpe-ts.c
drivers/input/touchscreen/sur40.c
drivers/input/touchscreen/surface3_spi.c
drivers/input/touchscreen/usbtouchscreen.c
drivers/input/touchscreen/zinitix.c
drivers/interconnect/bulk.c
drivers/interconnect/core.c
drivers/interconnect/qcom/Kconfig
drivers/interconnect/qcom/Makefile
drivers/interconnect/qcom/icc-rpm.c [new file with mode: 0644]
drivers/interconnect/qcom/icc-rpm.h [new file with mode: 0644]
drivers/interconnect/qcom/msm8916.c
drivers/interconnect/qcom/msm8939.c [new file with mode: 0644]
drivers/interconnect/qcom/qcs404.c
drivers/interconnect/qcom/sdx55.c [new file with mode: 0644]
drivers/interconnect/qcom/sdx55.h [new file with mode: 0644]
drivers/iommu/amd/init.c
drivers/iommu/amd/io_pgtable.c
drivers/iommu/dma-iommu.c
drivers/iommu/intel/pasid.h
drivers/iommu/tegra-smmu.c
drivers/ipack/ipack.c
drivers/irqchip/Kconfig
drivers/irqchip/irq-ingenic-tcu.c
drivers/irqchip/irq-ingenic.c
drivers/isdn/capi/kcapi.c
drivers/isdn/hardware/mISDN/mISDNipac.c
drivers/leds/Kconfig
drivers/leds/Makefile
drivers/leds/blink/Kconfig [new file with mode: 0644]
drivers/leds/blink/Makefile [new file with mode: 0644]
drivers/leds/blink/leds-lgm-sso.c [new file with mode: 0644]
drivers/leds/led-class.c
drivers/leds/led-core.c
drivers/leds/leds-apu.c
drivers/leds/leds-blinkm.c
drivers/leds/leds-gpio.c
drivers/leds/leds-lm3530.c
drivers/leds/leds-lm3533.c
drivers/leds/leds-lm355x.c
drivers/leds/leds-lm3642.c
drivers/leds/leds-lp50xx.c
drivers/leds/leds-max8997.c
drivers/leds/leds-netxbig.c
drivers/leds/leds-ss4200.c
drivers/leds/leds-wm831x-status.c
drivers/leds/leds.h
drivers/leds/trigger/ledtrig-tty.c
drivers/mailbox/arm_mhuv2.c
drivers/mailbox/omap-mailbox.c
drivers/mailbox/qcom-apcs-ipc-mailbox.c
drivers/mailbox/sprd-mailbox.c
drivers/mailbox/tegra-hsp.c
drivers/md/bcache/super.c
drivers/md/dm-bufio.c
drivers/md/dm-crypt.c
drivers/md/dm-io.c
drivers/md/dm-ioctl.c
drivers/md/dm-log-writes.c
drivers/md/dm-table.c
drivers/md/dm-verity-fec.c
drivers/md/dm-verity-target.c
drivers/md/dm-writecache.c
drivers/md/dm-zoned-target.c
drivers/md/dm.c
drivers/md/raid5-cache.c
drivers/md/raid5-ppl.c
drivers/media/firewire/firedtv-fw.c
drivers/media/pci/cx18/cx18-alsa-main.c
drivers/media/pci/cx18/cx18-driver.c
drivers/media/pci/cx25821/cx25821-alsa.c
drivers/media/pci/cx88/cx88-alsa.c
drivers/media/pci/ivtv/ivtv-alsa-main.c
drivers/media/pci/ivtv/ivtv-driver.c
drivers/media/pci/sta2x11/sta2x11_vip.c
drivers/media/platform/atmel/atmel-isi.c
drivers/media/platform/atmel/atmel-sama5d2-isc.c
drivers/media/platform/marvell-ccic/cafe-driver.c
drivers/media/platform/rockchip/rkisp1/rkisp1-params.c
drivers/media/platform/stm32/stm32-dcmi.c
drivers/media/platform/vsp1/vsp1_drm.c
drivers/media/rc/Makefile
drivers/media/rc/keymaps/Makefile
drivers/media/rc/keymaps/rc-cec.c
drivers/media/rc/rc-main.c
drivers/media/usb/cpia2/cpia2_v4l.c
drivers/media/usb/tm6000/tm6000-alsa.c
drivers/media/usb/tm6000/tm6000-dvb.c
drivers/media/usb/usbtv/usbtv-audio.c
drivers/memory/Kconfig
drivers/memory/Makefile
drivers/memory/dfl-emif.c [new file with mode: 0644]
drivers/mfd/Kconfig
drivers/mfd/Makefile
drivers/mfd/intel_msic.c [deleted file]
drivers/mfd/intel_quark_i2c_gpio.c
drivers/misc/Kconfig
drivers/misc/Makefile
drivers/misc/atmel_tclib.c [deleted file]
drivers/misc/bcm-vk/Kconfig [new file with mode: 0644]
drivers/misc/bcm-vk/Makefile [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk.h [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk_dev.c [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk_msg.c [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk_msg.h [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk_sg.c [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk_sg.h [new file with mode: 0644]
drivers/misc/bcm-vk/bcm_vk_tty.c [new file with mode: 0644]
drivers/misc/cardreader/rts5227.c
drivers/misc/cardreader/rtsx_pcr.c
drivers/misc/cxl/sysfs.c
drivers/misc/eeprom/eeprom_93xx46.c
drivers/misc/fastrpc.c
drivers/misc/habanalabs/common/Makefile
drivers/misc/habanalabs/common/asid.c
drivers/misc/habanalabs/common/command_buffer.c
drivers/misc/habanalabs/common/command_submission.c
drivers/misc/habanalabs/common/context.c
drivers/misc/habanalabs/common/debugfs.c
drivers/misc/habanalabs/common/device.c
drivers/misc/habanalabs/common/firmware_if.c
drivers/misc/habanalabs/common/habanalabs.h
drivers/misc/habanalabs/common/habanalabs_ioctl.c
drivers/misc/habanalabs/common/hw_queue.c
drivers/misc/habanalabs/common/irq.c
drivers/misc/habanalabs/common/memory.c
drivers/misc/habanalabs/common/mmu/Makefile [new file with mode: 0644]
drivers/misc/habanalabs/common/mmu/mmu.c [moved from drivers/misc/habanalabs/common/mmu.c with 75% similarity]
drivers/misc/habanalabs/common/mmu/mmu_v1.c [moved from drivers/misc/habanalabs/common/mmu_v1.c with 99% similarity]
drivers/misc/habanalabs/common/pci/Makefile [new file with mode: 0644]
drivers/misc/habanalabs/common/pci/pci.c [moved from drivers/misc/habanalabs/common/pci.c with 91% similarity]
drivers/misc/habanalabs/gaudi/gaudi.c
drivers/misc/habanalabs/gaudi/gaudiP.h
drivers/misc/habanalabs/gaudi/gaudi_coresight.c
drivers/misc/habanalabs/gaudi/gaudi_security.c
drivers/misc/habanalabs/goya/goya.c
drivers/misc/habanalabs/goya/goyaP.h
drivers/misc/habanalabs/goya/goya_coresight.c
drivers/misc/habanalabs/goya/goya_security.c
drivers/misc/habanalabs/include/common/cpucp_if.h
drivers/misc/habanalabs/include/common/hl_boot_if.h
drivers/misc/habanalabs/include/gaudi/gaudi_async_events.h
drivers/misc/habanalabs/include/gaudi/gaudi_masks.h
drivers/misc/habanalabs/include/gaudi/gaudi_packets.h
drivers/misc/habanalabs/include/goya/asic_reg/goya_masks.h
drivers/misc/ibmvmc.c
drivers/misc/lkdtm/Makefile
drivers/misc/mei/bus.c
drivers/misc/mei/client.c
drivers/misc/mei/client.h
drivers/misc/mei/debugfs.c
drivers/misc/mei/hbm.c
drivers/misc/mei/hbm.h
drivers/misc/mei/hdcp/mei_hdcp.c
drivers/misc/mei/hw-me-regs.h
drivers/misc/mei/hw.h
drivers/misc/mei/init.c
drivers/misc/mei/interrupt.c
drivers/misc/mei/main.c
drivers/misc/mei/mei_dev.h
drivers/misc/mei/pci-me.c
drivers/misc/pci_endpoint_test.c
drivers/misc/pti.c [deleted file]
drivers/misc/pvpanic.c
drivers/misc/sgi-xp/xpnet.c
drivers/misc/vmw_vmci/vmci_queue_pair.c
drivers/misc/vmw_vmci/vmci_queue_pair.h
drivers/mmc/core/bus.c
drivers/mmc/core/mmc.c
drivers/mmc/host/mmci.c
drivers/mmc/host/sdhci.c
drivers/most/core.c
drivers/mtd/maps/sun_uflash.c
drivers/net/Kconfig
drivers/net/arcnet/com20020-pci.c
drivers/net/can/c_can/c_can.c
drivers/net/can/c_can/c_can_pci.c
drivers/net/can/c_can/c_can_platform.c
drivers/net/can/dev/dev.c
drivers/net/can/dev/netlink.c
drivers/net/can/flexcan.c
drivers/net/can/kvaser_pciefd.c
drivers/net/can/m_can/m_can.c
drivers/net/can/m_can/tcan4x5x-core.c
drivers/net/can/peak_canfd/peak_pciefd_main.c
drivers/net/can/sja1000/ems_pci.c
drivers/net/can/sja1000/ems_pcmcia.c
drivers/net/can/sja1000/kvaser_pci.c
drivers/net/can/sja1000/peak_pci.c
drivers/net/can/sja1000/peak_pcmcia.c
drivers/net/can/sja1000/plx_pci.c
drivers/net/can/slcan.c
drivers/net/can/spi/mcp251xfd/mcp251xfd-core.c
drivers/net/can/usb/Kconfig
drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c
drivers/net/can/usb/peak_usb/pcan_usb.c
drivers/net/can/usb/peak_usb/pcan_usb_fd.c
drivers/net/can/usb/peak_usb/pcan_usb_pro.c
drivers/net/can/vcan.c
drivers/net/can/vxcan.c
drivers/net/dsa/b53/b53_common.c
drivers/net/dsa/b53/b53_priv.h
drivers/net/dsa/b53/b53_regs.h
drivers/net/dsa/bcm_sf2.c
drivers/net/dsa/mt7530.c
drivers/net/dsa/sja1105/sja1105_main.c
drivers/net/dsa/sja1105/sja1105_static_config.c
drivers/net/dsa/xrs700x/xrs700x.c
drivers/net/ethernet/atheros/ag71xx.c
drivers/net/ethernet/atheros/alx/main.c
drivers/net/ethernet/broadcom/Kconfig
drivers/net/ethernet/broadcom/bcm4908_enet.c
drivers/net/ethernet/broadcom/bcm63xx_enet.c
drivers/net/ethernet/broadcom/bnxt/bnxt.c
drivers/net/ethernet/cadence/macb_main.c
drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c
drivers/net/ethernet/davicom/dm9000.c
drivers/net/ethernet/faraday/ftgmac100.c
drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
drivers/net/ethernet/freescale/enetc/enetc.c
drivers/net/ethernet/freescale/enetc/enetc.h
drivers/net/ethernet/freescale/enetc/enetc_hw.h
drivers/net/ethernet/freescale/enetc/enetc_pf.c
drivers/net/ethernet/freescale/enetc/enetc_vf.c
drivers/net/ethernet/freescale/fec_ptp.c
drivers/net/ethernet/freescale/gianfar.c
drivers/net/ethernet/hisilicon/hns/hns_enet.c
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
drivers/net/ethernet/ibm/ibmveth.c
drivers/net/ethernet/ibm/ibmvnic.c
drivers/net/ethernet/intel/e1000e/82571.c
drivers/net/ethernet/intel/e1000e/hw.h
drivers/net/ethernet/intel/e1000e/netdev.c
drivers/net/ethernet/intel/i40e/i40e_ethtool.c
drivers/net/ethernet/intel/i40e/i40e_main.c
drivers/net/ethernet/intel/i40e/i40e_txrx.c
drivers/net/ethernet/intel/i40e/i40e_xsk.c
drivers/net/ethernet/intel/iavf/iavf_main.c
drivers/net/ethernet/intel/ice/ice.h
drivers/net/ethernet/intel/ice/ice_base.c
drivers/net/ethernet/intel/ice/ice_dcb_nl.c
drivers/net/ethernet/intel/ice/ice_ethtool.c
drivers/net/ethernet/intel/ice/ice_txrx.c
drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
drivers/net/ethernet/intel/ice/ice_xsk.c
drivers/net/ethernet/intel/igb/e1000_hw.h
drivers/net/ethernet/intel/igb/igb.h
drivers/net/ethernet/intel/igb/igb_main.c
drivers/net/ethernet/intel/igb/igb_ptp.c
drivers/net/ethernet/intel/igc/igc.h
drivers/net/ethernet/intel/igc/igc_ethtool.c
drivers/net/ethernet/intel/igc/igc_main.c
drivers/net/ethernet/intel/igc/igc_ptp.c
drivers/net/ethernet/intel/ixgbe/ixgbe_ipsec.c
drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
drivers/net/ethernet/intel/ixgbevf/ipsec.c
drivers/net/ethernet/marvell/Kconfig
drivers/net/ethernet/marvell/mv643xx_eth.c
drivers/net/ethernet/marvell/mvpp2/mvpp2_main.c
drivers/net/ethernet/marvell/octeontx2/af/cgx.c
drivers/net/ethernet/marvell/octeontx2/af/npc_profile.h
drivers/net/ethernet/marvell/octeontx2/af/rvu.c
drivers/net/ethernet/marvell/octeontx2/af/rvu.h
drivers/net/ethernet/marvell/octeontx2/af/rvu_cgx.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_debugfs.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
drivers/net/ethernet/marvell/octeontx2/af/rvu_npc.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_flows.c
drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
drivers/net/ethernet/marvell/pxa168_eth.c
drivers/net/ethernet/marvell/sky2.c
drivers/net/ethernet/mediatek/mtk_star_emac.c
drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
drivers/net/ethernet/mellanox/mlx4/en_netdev.c
drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
drivers/net/ethernet/mellanox/mlx5/core/en.h
drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c
drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_geneve.c
drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
drivers/net/ethernet/mellanox/mlx5/core/en_main.c
drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
drivers/net/ethernet/mellanox/mlx5/core/main.c
drivers/net/ethernet/mellanox/mlx5/core/sf/dev/dev.c
drivers/net/ethernet/mellanox/mlx5/core/sf/hw_table.c
drivers/net/ethernet/mellanox/mlx5/core/sf/mlx5_ifc_vhca_event.h
drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.c
drivers/net/ethernet/mellanox/mlx5/core/sf/vhca_event.h
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste_v1.c
drivers/net/ethernet/mellanox/mlxsw/reg.h
drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c
drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
drivers/net/ethernet/mellanox/mlxsw/switchx2.c
drivers/net/ethernet/microchip/lan743x_main.c
drivers/net/ethernet/mscc/Kconfig
drivers/net/ethernet/mscc/ocelot_flower.c
drivers/net/ethernet/netronome/nfp/flower/metadata.c
drivers/net/ethernet/netronome/nfp/flower/offload.c
drivers/net/ethernet/netronome/nfp/flower/tunnel_conf.c
drivers/net/ethernet/pensando/ionic/ionic_txrx.c
drivers/net/ethernet/qlogic/qlcnic/qlcnic_minidump.c
drivers/net/ethernet/realtek/r8169_main.c
drivers/net/ethernet/renesas/sh_eth.c
drivers/net/ethernet/socionext/netsec.c
drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c
drivers/net/ethernet/stmicro/stmmac/dwmac-visconti.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_descs.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_dma.c
drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c
drivers/net/ethernet/stmicro/stmmac/dwxgmac2_descs.c
drivers/net/ethernet/stmicro/stmmac/enh_desc.c
drivers/net/ethernet/stmicro/stmmac/hwif.h
drivers/net/ethernet/stmicro/stmmac/norm_desc.c
drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
drivers/net/ethernet/stmicro/stmmac/stmmac_tc.c
drivers/net/ethernet/sun/niu.c
drivers/net/ethernet/tehuti/tehuti.c
drivers/net/ethernet/xilinx/xilinx_axienet_main.c
drivers/net/gtp.c
drivers/net/hamradio/6pack.c
drivers/net/hamradio/scc.c
drivers/net/hyperv/hyperv_net.h
drivers/net/hyperv/netvsc_drv.c
drivers/net/hyperv/rndis_filter.c
drivers/net/ipa/ipa_cmd.c
drivers/net/ipa/ipa_qmi.c
drivers/net/netdevsim/netdev.c
drivers/net/phy/broadcom.c
drivers/net/phy/dp83822.c
drivers/net/phy/dp83tc811.c
drivers/net/phy/icplus.c
drivers/net/phy/micrel.c
drivers/net/phy/phy.c
drivers/net/phy/phy_device.c
drivers/net/phy/phylink.c
drivers/net/usb/cdc-phonet.c
drivers/net/usb/cdc_ncm.c
drivers/net/usb/qmi_wwan.c
drivers/net/usb/r8152.c
drivers/net/usb/usbnet.c
drivers/net/veth.c
drivers/net/virtio_net.c
drivers/net/vxlan.c
drivers/net/wan/fsl_ucc_hdlc.c
drivers/net/wan/hdlc_x25.c
drivers/net/wan/lapbether.c
drivers/net/wireguard/device.c
drivers/net/wireguard/device.h
drivers/net/wireguard/peer.c
drivers/net/wireguard/peer.h
drivers/net/wireguard/queueing.c
drivers/net/wireguard/queueing.h
drivers/net/wireguard/receive.c
drivers/net/wireguard/send.c
drivers/net/wireguard/socket.c
drivers/net/wireless/admtek/adm8211.c
drivers/net/wireless/ath/ath11k/mac.c
drivers/net/wireless/ath/ath11k/qmi.c
drivers/net/wireless/ath/ath5k/base.c
drivers/net/wireless/ath/ath9k/ath9k.h
drivers/net/wireless/ath/ath9k/hw.c
drivers/net/wireless/ath/ath9k/init.c
drivers/net/wireless/ath/ath9k/xmit.c
drivers/net/wireless/atmel/atmel.c
drivers/net/wireless/atmel/atmel_cs.c
drivers/net/wireless/atmel/atmel_pci.c
drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
drivers/net/wireless/broadcom/brcm80211/brcmutil/utils.c
drivers/net/wireless/cisco/airo.c
drivers/net/wireless/cisco/airo_cs.c
drivers/net/wireless/intel/iwlwifi/fw/file.h
drivers/net/wireless/intel/iwlwifi/fw/pnvm.c
drivers/net/wireless/intel/iwlwifi/iwl-op-mode.h
drivers/net/wireless/intel/iwlwifi/mvm/fw.c
drivers/net/wireless/intel/iwlwifi/pcie/drv.c
drivers/net/wireless/intel/iwlwifi/pcie/rx.c
drivers/net/wireless/intersil/hostap/hostap_cs.c
drivers/net/wireless/intersil/hostap/hostap_pci.c
drivers/net/wireless/intersil/hostap/hostap_plx.c
drivers/net/wireless/mediatek/mt76/dma.c
drivers/net/wireless/mediatek/mt76/mt7915/mac.c
drivers/net/wireless/mediatek/mt76/mt7915/testmode.c
drivers/net/wireless/mediatek/mt76/mt7921/mcu.c
drivers/net/wireless/ralink/rt2x00/rt2400pci.c
drivers/net/wireless/ralink/rt2x00/rt2500pci.c
drivers/net/wireless/ralink/rt2x00/rt2500usb.c
drivers/net/wireless/ralink/rt2x00/rt2800pci.c
drivers/net/wireless/ralink/rt2x00/rt2800usb.c
drivers/net/wireless/ralink/rt2x00/rt61pci.c
drivers/net/wireless/ralink/rt2x00/rt73usb.c
drivers/net/wireless/rsi/rsi_91x_main.c
drivers/net/wireless/rsi/rsi_91x_sdio.c
drivers/net/wireless/rsi/rsi_91x_usb.c
drivers/net/xen-netback/netback.c
drivers/nfc/microread/mei.c
drivers/nfc/pn544/mei.c
drivers/ntb/hw/Kconfig
drivers/ntb/hw/Makefile
drivers/ntb/hw/epf/Kconfig [new file with mode: 0644]
drivers/ntb/hw/epf/Makefile [new file with mode: 0644]
drivers/ntb/hw/epf/ntb_hw_epf.c [new file with mode: 0644]
drivers/nvdimm/blk.c
drivers/nvdimm/bus.c
drivers/nvdimm/dimm.c
drivers/nvdimm/pmem.c
drivers/nvdimm/region.c
drivers/nvme/host/core.c
drivers/nvme/host/fabrics.c
drivers/nvme/host/fabrics.h
drivers/nvme/host/fc.c
drivers/nvme/host/hwmon.c
drivers/nvme/host/pci.c
drivers/nvme/host/rdma.c
drivers/nvme/host/tcp.c
drivers/nvme/host/zns.c
drivers/nvme/target/admin-cmd.c
drivers/nvme/target/configfs.c
drivers/nvme/target/core.c
drivers/nvme/target/io-cmd-bdev.c
drivers/nvme/target/loop.c
drivers/nvme/target/nvmet.h
drivers/nvme/target/passthru.c
drivers/nvme/target/rdma.c
drivers/nvme/target/tcp.c
drivers/nvmem/Kconfig
drivers/nvmem/Makefile
drivers/nvmem/core.c
drivers/nvmem/imx-iim.c
drivers/nvmem/qcom-spmi-sdam.c
drivers/nvmem/rmem.c [new file with mode: 0644]
drivers/of/platform.c
drivers/of/property.c
drivers/opp/core.c
drivers/opp/opp.h
drivers/parport/parport_amiga.c
drivers/parport/parport_atari.c
drivers/parport/parport_gsc.c
drivers/parport/parport_mfc3.c
drivers/parport/parport_sunbpp.c
drivers/pci/Makefile
drivers/pci/controller/Kconfig
drivers/pci/controller/Makefile
drivers/pci/controller/cadence/pci-j721e.c
drivers/pci/controller/cadence/pcie-cadence-ep.c
drivers/pci/controller/cadence/pcie-cadence-host.c
drivers/pci/controller/cadence/pcie-cadence.h
drivers/pci/controller/dwc/pci-layerscape-ep.c
drivers/pci/controller/dwc/pci-layerscape.c
drivers/pci/controller/dwc/pcie-al.c
drivers/pci/controller/dwc/pcie-designware-ep.c
drivers/pci/controller/dwc/pcie-designware-host.c
drivers/pci/controller/dwc/pcie-designware.c
drivers/pci/controller/dwc/pcie-designware.h
drivers/pci/controller/dwc/pcie-qcom.c
drivers/pci/controller/pci-host-common.c
drivers/pci/controller/pci-hyperv.c
drivers/pci/controller/pci-xgene-msi.c
drivers/pci/controller/pci-xgene.c
drivers/pci/controller/pcie-altera-msi.c
drivers/pci/controller/pcie-brcmstb.c
drivers/pci/controller/pcie-mediatek.c
drivers/pci/controller/pcie-microchip-host.c [new file with mode: 0644]
drivers/pci/controller/pcie-rcar-host.c
drivers/pci/controller/pcie-rockchip.c
drivers/pci/controller/pcie-tango.c [deleted file]
drivers/pci/controller/pcie-xilinx-cpm.c
drivers/pci/endpoint/functions/Kconfig
drivers/pci/endpoint/functions/Makefile
drivers/pci/endpoint/functions/pci-epf-ntb.c [new file with mode: 0644]
drivers/pci/endpoint/functions/pci-epf-test.c
drivers/pci/endpoint/pci-ep-cfs.c
drivers/pci/endpoint/pci-epc-core.c
drivers/pci/endpoint/pci-epf-core.c
drivers/pci/hotplug/acpiphp.h
drivers/pci/hotplug/rpadlpar_sysfs.c
drivers/pci/hotplug/s390_pci_hpc.c
drivers/pci/pci-bridge-emul.c
drivers/pci/pci.c
drivers/pci/pcie/Kconfig
drivers/pci/pcie/Makefile
drivers/pci/pcie/aer.c
drivers/pci/pcie/bw_notification.c [deleted file]
drivers/pci/pcie/err.c
drivers/pci/pcie/portdrv.h
drivers/pci/pcie/portdrv_pci.c
drivers/pci/search.c
drivers/pci/setup-res.c
drivers/pci/syscall.c
drivers/pci/xen-pcifront.c
drivers/pcmcia/cistpl.c
drivers/perf/arm_dmc620_pmu.c
drivers/phy/Kconfig
drivers/phy/broadcom/Kconfig
drivers/phy/broadcom/phy-brcm-sata.c
drivers/phy/broadcom/phy-brcm-usb.c
drivers/phy/cadence/phy-cadence-torrent.c
drivers/phy/ingenic/phy-ingenic-usb.c
drivers/phy/lantiq/phy-lantiq-rcu-usb2.c
drivers/phy/mediatek/phy-mtk-hdmi.c
drivers/phy/mediatek/phy-mtk-mipi-dsi.c
drivers/phy/motorola/phy-cpcap-usb.c
drivers/phy/qualcomm/phy-qcom-qmp.c
drivers/phy/qualcomm/phy-qcom-qmp.h
drivers/phy/qualcomm/phy-qcom-qusb2.c
drivers/phy/qualcomm/phy-qcom-usb-hs-28nm.c
drivers/phy/rockchip/phy-rockchip-emmc.c
drivers/phy/st/phy-stm32-usbphyc.c
drivers/phy/xilinx/phy-zynqmp.c
drivers/pinctrl/Kconfig
drivers/pinctrl/Makefile
drivers/pinctrl/intel/pinctrl-intel.c
drivers/pinctrl/pinctrl-k210.c [new file with mode: 0644]
drivers/pinctrl/pinctrl-microchip-sgpio.c
drivers/pinctrl/pinctrl-rockchip.c
drivers/pinctrl/qcom/pinctrl-lpass-lpi.c
drivers/pinctrl/qcom/pinctrl-sc7280.c
drivers/pinctrl/qcom/pinctrl-sdx55.c
drivers/platform/goldfish/goldfish_pipe.c
drivers/platform/x86/Kconfig
drivers/platform/x86/dell/dell-wmi-sysman/enum-attributes.c
drivers/platform/x86/dell/dell-wmi-sysman/int-attributes.c
drivers/platform/x86/dell/dell-wmi-sysman/passobj-attributes.c
drivers/platform/x86/dell/dell-wmi-sysman/string-attributes.c
drivers/platform/x86/dell/dell-wmi-sysman/sysman.c
drivers/platform/x86/intel-hid.c
drivers/platform/x86/intel-vbtn.c
drivers/platform/x86/intel_pmc_core.c
drivers/platform/x86/intel_pmt_class.c
drivers/platform/x86/intel_pmt_crashlog.c
drivers/platform/x86/intel_scu_pcidrv.c
drivers/platform/x86/thinkpad_acpi.c
drivers/powercap/Kconfig
drivers/powercap/dtpm.c
drivers/ptp/ptp_qoriq.c
drivers/pwm/Kconfig
drivers/pwm/Makefile
drivers/pwm/pwm-iqs620a.c
drivers/pwm/pwm-lpc18xx-sct.c
drivers/pwm/pwm-rockchip.c
drivers/pwm/pwm-zx.c [deleted file]
drivers/regulator/bd9571mwv-regulator.c
drivers/regulator/mt6315-regulator.c
drivers/regulator/pca9450-regulator.c
drivers/regulator/qcom-rpmh-regulator.c
drivers/regulator/rt4831-regulator.c
drivers/remoteproc/Kconfig
drivers/remoteproc/ingenic_rproc.c
drivers/remoteproc/mtk_common.h
drivers/remoteproc/mtk_scp.c
drivers/remoteproc/pru_rproc.c
drivers/remoteproc/qcom_pil_info.c
drivers/remoteproc/qcom_q6v5_pas.c
drivers/remoteproc/qcom_wcnss.c
drivers/remoteproc/qcom_wcnss_iris.c
drivers/remoteproc/remoteproc_core.c
drivers/remoteproc/stm32_rproc.c
drivers/reset/Kconfig
drivers/reset/Makefile
drivers/reset/reset-k210.c [new file with mode: 0644]
drivers/rpmsg/qcom_glink_ssr.c
drivers/rtc/rtc-m41t80.c
drivers/s390/block/dasd.c
drivers/s390/char/tty3270.c
drivers/s390/char/vmur.c
drivers/s390/char/zcore.c
drivers/s390/cio/device_fsm.c
drivers/s390/cio/vfio_ccw_ops.c
drivers/s390/crypto/vfio_ap_ops.c
drivers/s390/net/qeth_core.h
drivers/s390/net/qeth_core_main.c
drivers/s390/virtio/virtio_ccw.c
drivers/sbus/char/display7seg.c
drivers/scsi/aic7xxx/aic79xx.h
drivers/scsi/aic7xxx/aic7xxx.h
drivers/scsi/bnx2fc/Kconfig
drivers/scsi/bnx2i/bnx2i_iscsi.c
drivers/scsi/hpsa.c
drivers/scsi/hpsa_cmd.h
drivers/scsi/ibmvscsi/ibmvfc.c
drivers/scsi/ibmvscsi/ibmvscsi.c
drivers/scsi/ibmvscsi_tgt/ibmvscsi_tgt.c
drivers/scsi/isci/request.c
drivers/scsi/iscsi_tcp.c
drivers/scsi/libiscsi.c
drivers/scsi/libiscsi_tcp.c
drivers/scsi/lpfc/lpfc_debugfs.c
drivers/scsi/mpt3sas/mpt3sas_base.c
drivers/scsi/mpt3sas/mpt3sas_base.h
drivers/scsi/mpt3sas/mpt3sas_ctl.c
drivers/scsi/mpt3sas/mpt3sas_ctl.h
drivers/scsi/mpt3sas/mpt3sas_scsih.c
drivers/scsi/mpt3sas/mpt3sas_trigger_diag.c
drivers/scsi/myrs.c
drivers/scsi/pcmcia/nsp_cs.c
drivers/scsi/pmcraid.h
drivers/scsi/qedi/qedi_main.c
drivers/scsi/qla2xxx/qla_target.c
drivers/scsi/qla2xxx/qla_target.h
drivers/scsi/qla2xxx/tcm_qla2xxx.c
drivers/scsi/qla4xxx/ql4_os.c
drivers/scsi/scsi_transport_iscsi.c
drivers/scsi/sd.c
drivers/scsi/sd_zbc.c
drivers/scsi/smartpqi/smartpqi_init.c
drivers/scsi/st.c
drivers/scsi/ufs/ufs-mediatek.c
drivers/scsi/ufs/ufs-qcom.c
drivers/scsi/ufs/ufshcd.c
drivers/scsi/ufs/ufshcd.h
drivers/scsi/vmw_pvscsi.c
drivers/scsi/vmw_pvscsi.h
drivers/sfi/Kconfig [deleted file]
drivers/sfi/Makefile [deleted file]
drivers/sfi/sfi_acpi.c [deleted file]
drivers/sfi/sfi_core.c [deleted file]
drivers/sfi/sfi_core.h [deleted file]
drivers/sh/maple/maple.c
drivers/soc/Kconfig
drivers/soc/Makefile
drivers/soc/canaan/Kconfig [new file with mode: 0644]
drivers/soc/canaan/Makefile [new file with mode: 0644]
drivers/soc/canaan/k210-sysctl.c [new file with mode: 0644]
drivers/soc/fsl/qbman/qman.c
drivers/soc/kendryte/Kconfig [deleted file]
drivers/soc/kendryte/Makefile [deleted file]
drivers/soc/kendryte/k210-sysctl.c [deleted file]
drivers/soc/litex/Kconfig
drivers/soc/litex/litex_soc_ctrl.c
drivers/soc/qcom/qcom-geni-se.c
drivers/soc/sifive/sifive_l2_cache.c
drivers/soc/ti/omap_prm.c
drivers/soundwire/bus.c
drivers/soundwire/cadence_master.c
drivers/soundwire/intel.c
drivers/soundwire/intel.h
drivers/soundwire/intel_init.c
drivers/soundwire/slave.c
drivers/soundwire/sysfs_slave.c
drivers/spi/spi-cadence-quadspi.c
drivers/spmi/spmi-pmic-arb.c
drivers/staging/comedi/drivers/addi_apci_1032.c
drivers/staging/comedi/drivers/addi_apci_1500.c
drivers/staging/comedi/drivers/adv_pci1710.c
drivers/staging/comedi/drivers/amplc_pc236_common.c
drivers/staging/comedi/drivers/cb_pcidas.c
drivers/staging/comedi/drivers/cb_pcidas64.c
drivers/staging/comedi/drivers/comedi_parport.c
drivers/staging/comedi/drivers/das6402.c
drivers/staging/comedi/drivers/das800.c
drivers/staging/comedi/drivers/dmm32at.c
drivers/staging/comedi/drivers/me4000.c
drivers/staging/comedi/drivers/ni_6527.c
drivers/staging/comedi/drivers/ni_65xx.c
drivers/staging/comedi/drivers/pcl711.c
drivers/staging/comedi/drivers/pcl726.c
drivers/staging/comedi/drivers/pcl818.c
drivers/staging/comedi/drivers/vmk80xx.c
drivers/staging/ks7010/ks_wlan_net.c
drivers/staging/media/atomisp/include/linux/atomisp_platform.h
drivers/staging/rtl8188eu/core/rtw_ap.c
drivers/staging/rtl8188eu/os_dep/ioctl_linux.c
drivers/staging/rtl8192e/Kconfig
drivers/staging/rtl8192e/rtl8192e/rtl_wx.c
drivers/staging/rtl8192e/rtllib.h
drivers/staging/rtl8192e/rtllib_rx.c
drivers/staging/rtl8192u/r8192U_wx.c
drivers/staging/rtl8712/rtl871x_cmd.c
drivers/staging/rtl8712/rtl871x_ioctl_linux.c
drivers/staging/vme/devices/vme_user.c
drivers/staging/vt6655/rxtx.h
drivers/staging/wfx/bh.c
drivers/staging/wfx/bh.h
drivers/staging/wfx/bus.h
drivers/staging/wfx/bus_sdio.c
drivers/staging/wfx/bus_spi.c
drivers/staging/wfx/data_rx.c
drivers/staging/wfx/data_tx.c
drivers/staging/wfx/data_tx.h
drivers/staging/wfx/debug.c
drivers/staging/wfx/fwio.c
drivers/staging/wfx/hif_api_cmd.h
drivers/staging/wfx/hif_api_general.h
drivers/staging/wfx/hif_tx.c
drivers/staging/wfx/hif_tx_mib.c
drivers/staging/wfx/hwio.c
drivers/staging/wfx/hwio.h
drivers/staging/wfx/key.c
drivers/staging/wfx/key.h
drivers/staging/wfx/main.c
drivers/staging/wfx/main.h
drivers/staging/wfx/queue.c
drivers/staging/wfx/queue.h
drivers/staging/wfx/scan.h
drivers/staging/wfx/sta.c
drivers/staging/wfx/sta.h
drivers/staging/wfx/traces.h
drivers/staging/wfx/wfx.h
drivers/target/sbp/sbp_target.c
drivers/target/target_core_iblock.c
drivers/target/target_core_pr.c
drivers/target/target_core_pscsi.c
drivers/target/target_core_transport.c
drivers/target/target_core_user.c
drivers/tee/optee/core.c
drivers/thermal/thermal_sysfs.c
drivers/thunderbolt/switch.c
drivers/thunderbolt/tb.c
drivers/tty/Makefile
drivers/tty/hvc/hvcs.c
drivers/tty/n_tracerouter.c [deleted file]
drivers/tty/n_tracesink.c [deleted file]
drivers/tty/n_tracesink.h [deleted file]
drivers/tty/pty.c
drivers/tty/serial/icom.c
drivers/tty/serial/jsm/jsm_driver.c
drivers/tty/serial/max310x.c
drivers/tty/serial/qcom_geni_serial.c
drivers/tty/tty_io.c
drivers/uio/uio_pci_generic.c
drivers/usb/cdns3/cdnsp-ring.c
drivers/usb/class/cdc-acm.c
drivers/usb/class/usblp.c
drivers/usb/core/hcd.c
drivers/usb/core/quirks.c
drivers/usb/core/usb.c
drivers/usb/dwc2/hcd.c
drivers/usb/dwc3/dwc3-pci.c
drivers/usb/dwc3/dwc3-qcom.c
drivers/usb/dwc3/gadget.c
drivers/usb/gadget/configfs.c
drivers/usb/gadget/function/f_uac1.c
drivers/usb/gadget/function/f_uac2.c
drivers/usb/gadget/function/u_ether_configfs.h
drivers/usb/gadget/udc/amd5536udc_pci.c
drivers/usb/gadget/udc/aspeed-vhub/hub.c
drivers/usb/gadget/udc/s3c2410_udc.c
drivers/usb/host/xhci-mtk.c
drivers/usb/host/xhci-pci.c
drivers/usb/host/xhci-ring.c
drivers/usb/host/xhci.c
drivers/usb/host/xhci.h
drivers/usb/misc/ldusb.c
drivers/usb/musb/musb_core.c
drivers/usb/renesas_usbhs/pipe.c
drivers/usb/serial/ch341.c
drivers/usb/serial/cp210x.c
drivers/usb/serial/io_edgeport.c
drivers/usb/serial/xr_serial.c
drivers/usb/storage/transport.c
drivers/usb/storage/unusual_devs.h
drivers/usb/typec/tcpm/tcpm.c
drivers/usb/typec/tps6598x.c
drivers/usb/usbip/stub_dev.c
drivers/usb/usbip/vhci_hcd.c
drivers/usb/usbip/vhci_sysfs.c
drivers/usb/usbip/vudc_sysfs.c
drivers/vdpa/Kconfig
drivers/vdpa/ifcvf/ifcvf_main.c
drivers/vdpa/mlx5/core/mlx5_vdpa.h
drivers/vdpa/mlx5/core/mr.c
drivers/vdpa/mlx5/core/resources.c
drivers/vdpa/mlx5/net/mlx5_vnet.c
drivers/vdpa/vdpa.c
drivers/vdpa/vdpa_sim/vdpa_sim.c
drivers/vdpa/vdpa_sim/vdpa_sim.h
drivers/vdpa/vdpa_sim/vdpa_sim_net.c
drivers/vfio/Kconfig
drivers/vfio/pci/Kconfig
drivers/vfio/pci/Makefile
drivers/vfio/pci/vfio_pci.c
drivers/vfio/pci/vfio_pci_igd.c
drivers/vfio/pci/vfio_pci_private.h
drivers/vfio/pci/vfio_pci_zdev.c
drivers/vfio/platform/Kconfig
drivers/vfio/vfio.c
drivers/vfio/vfio_iommu_type1.c
drivers/vhost/scsi.c
drivers/vhost/vdpa.c
drivers/vhost/vhost.c
drivers/video/fbdev/acornfb.c
drivers/video/fbdev/aty/atyfb.h
drivers/video/fbdev/aty/atyfb_base.c
drivers/video/fbdev/core/fbcon.c
drivers/video/fbdev/hyperv_fb.c
drivers/virt/Kconfig
drivers/virt/Makefile
drivers/virt/acrn/Kconfig [new file with mode: 0644]
drivers/virt/acrn/Makefile [new file with mode: 0644]
drivers/virt/acrn/acrn_drv.h [new file with mode: 0644]
drivers/virt/acrn/hsm.c [new file with mode: 0644]
drivers/virt/acrn/hypercall.h [new file with mode: 0644]
drivers/virt/acrn/ioeventfd.c [new file with mode: 0644]
drivers/virt/acrn/ioreq.c [new file with mode: 0644]
drivers/virt/acrn/irqfd.c [new file with mode: 0644]
drivers/virt/acrn/mm.c [new file with mode: 0644]
drivers/virt/acrn/vm.c [new file with mode: 0644]
drivers/virt/vboxguest/vboxguest_utils.c
drivers/virtio/Kconfig
drivers/virtio/Makefile
drivers/virtio/virtio.c
drivers/virtio/virtio_input.c
drivers/virtio/virtio_mem.c
drivers/virtio/virtio_mmio.c
drivers/virtio/virtio_pci_common.h
drivers/virtio/virtio_pci_modern.c
drivers/virtio/virtio_pci_modern_dev.c [new file with mode: 0644]
drivers/virtio/virtio_vdpa.c
drivers/vme/vme.c
drivers/w1/masters/ds2490.c
drivers/w1/slaves/w1_therm.c
drivers/w1/w1.c
drivers/watchdog/cpu5wdt.c
drivers/watchdog/cpwd.c
drivers/watchdog/mei_wdt.c
drivers/watchdog/riowd.c
drivers/xen/Kconfig
drivers/xen/balloon.c
drivers/xen/events/events_2l.c
drivers/xen/events/events_base.c
drivers/xen/events/events_fifo.c
drivers/xen/events/events_internal.h
drivers/xen/evtchn.c
drivers/xen/gntdev.c
drivers/xen/xen-acpi-processor.c
drivers/xen/xen-front-pgdir-shbuf.c
drivers/xen/xenbus/xenbus_probe.c
fs/9p/acl.c
fs/9p/v9fs.h
fs/9p/v9fs_vfs.h
fs/9p/vfs_inode.c
fs/9p/vfs_inode_dotl.c
fs/9p/xattr.c
fs/Kconfig
fs/adfs/adfs.h
fs/adfs/inode.c
fs/affs/affs.h
fs/affs/inode.c
fs/affs/namei.c
fs/afs/dir.c
fs/afs/file.c
fs/afs/fs_operation.c
fs/afs/inode.c
fs/afs/internal.h
fs/afs/mntpt.c
fs/afs/security.c
fs/afs/write.c
fs/afs/xattr.c
fs/attr.c
fs/autofs/root.c
fs/bad_inode.c
fs/bfs/dir.c
fs/binfmt_misc.c
fs/block_dev.c
fs/btrfs/Makefile
fs/btrfs/acl.c
fs/btrfs/block-group.c
fs/btrfs/block-group.h
fs/btrfs/compression.c
fs/btrfs/ctree.c
fs/btrfs/ctree.h
fs/btrfs/delayed-inode.c
fs/btrfs/dev-replace.c
fs/btrfs/disk-io.c
fs/btrfs/extent-tree.c
fs/btrfs/extent_io.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/lzo.c
fs/btrfs/qgroup.c
fs/btrfs/qgroup.h
fs/btrfs/raid56.c
fs/btrfs/reada.c
fs/btrfs/ref-verify.c
fs/btrfs/reflink.c
fs/btrfs/scrub.c
fs/btrfs/send.c
fs/btrfs/super.c
fs/btrfs/tests/btrfs-tests.c
fs/btrfs/tree-checker.c
fs/btrfs/tree-log.c
fs/btrfs/volumes.c
fs/btrfs/xattr.c
fs/btrfs/zlib.c
fs/btrfs/zoned.c
fs/btrfs/zstd.c
fs/buffer.c
fs/cachefiles/bind.c
fs/cachefiles/interface.c
fs/cachefiles/namei.c
fs/cachefiles/rdwr.c
fs/cachefiles/xattr.c
fs/ceph/acl.c
fs/ceph/dir.c
fs/ceph/inode.c
fs/ceph/super.h
fs/ceph/xattr.c
fs/cifs/Kconfig
fs/cifs/Makefile
fs/cifs/cifs_debug.c
fs/cifs/cifs_swn.c
fs/cifs/cifsacl.c
fs/cifs/cifsacl.h
fs/cifs/cifsencrypt.c
fs/cifs/cifsfs.c
fs/cifs/cifsfs.h
fs/cifs/cifsglob.h
fs/cifs/cifspdu.h
fs/cifs/cifsproto.h
fs/cifs/cifssmb.c
fs/cifs/connect.c
fs/cifs/dfs_cache.c
fs/cifs/dir.c
fs/cifs/file.c
fs/cifs/fs_context.c
fs/cifs/fs_context.h
fs/cifs/inode.c
fs/cifs/link.c
fs/cifs/sess.c
fs/cifs/smb2glob.h
fs/cifs/smb2inode.c
fs/cifs/smb2misc.c
fs/cifs/smb2ops.c
fs/cifs/smb2pdu.c
fs/cifs/smb2proto.h
fs/cifs/smb2transport.c
fs/cifs/trace.h
fs/cifs/transport.c
fs/cifs/xattr.c
fs/coda/coda_linux.h
fs/coda/dir.c
fs/coda/inode.c
fs/coda/pioctl.c
fs/configfs/configfs_internal.h
fs/configfs/dir.c
fs/configfs/file.c
fs/configfs/inode.c
fs/configfs/symlink.c
fs/coredump.c
fs/crypto/bio.c
fs/crypto/policy.c
fs/dcache.c
fs/debugfs/inode.c
fs/direct-io.c
fs/ecryptfs/crypto.c
fs/ecryptfs/inode.c
fs/ecryptfs/main.c
fs/ecryptfs/mmap.c
fs/efivarfs/file.c
fs/efivarfs/inode.c
fs/erofs/data.c
fs/erofs/inode.c
fs/erofs/internal.h
fs/erofs/zdata.c
fs/exec.c
fs/exfat/exfat_fs.h
fs/exfat/file.c
fs/exfat/namei.c
fs/ext2/acl.c
fs/ext2/acl.h
fs/ext2/ext2.h
fs/ext2/ialloc.c
fs/ext2/inode.c
fs/ext2/ioctl.c
fs/ext2/namei.c
fs/ext2/xattr_security.c
fs/ext2/xattr_trusted.c
fs/ext2/xattr_user.c
fs/ext4/.kunitconfig [new file with mode: 0644]
fs/ext4/Kconfig
fs/ext4/acl.c
fs/ext4/acl.h
fs/ext4/balloc.c
fs/ext4/ext4.h
fs/ext4/extents.c
fs/ext4/fast_commit.c
fs/ext4/ialloc.c
fs/ext4/inode.c
fs/ext4/ioctl.c
fs/ext4/mballoc.c
fs/ext4/namei.c
fs/ext4/page-io.c
fs/ext4/readpage.c
fs/ext4/super.c
fs/ext4/sysfs.c
fs/ext4/verity.c
fs/ext4/xattr.c
fs/ext4/xattr_hurd.c
fs/ext4/xattr_security.c
fs/ext4/xattr_trusted.c
fs/ext4/xattr_user.c
fs/f2fs/acl.c
fs/f2fs/acl.h
fs/f2fs/checkpoint.c
fs/f2fs/data.c
fs/f2fs/f2fs.h
fs/f2fs/file.c
fs/f2fs/namei.c
fs/f2fs/node.c
fs/f2fs/segment.c
fs/f2fs/segment.h
fs/f2fs/super.c
fs/f2fs/xattr.c
fs/fat/fat.h
fs/fat/file.c
fs/fat/namei_msdos.c
fs/fat/namei_vfat.c
fs/fcntl.c
fs/fhandle.c
fs/file.c
fs/fuse/acl.c
fs/fuse/dev.c
fs/fuse/dir.c
fs/fuse/fuse_i.h
fs/fuse/virtio_fs.c
fs/fuse/xattr.c
fs/gfs2/acl.c
fs/gfs2/acl.h
fs/gfs2/bmap.c
fs/gfs2/file.c
fs/gfs2/glock.c
fs/gfs2/glock.h
fs/gfs2/glops.c
fs/gfs2/incore.h
fs/gfs2/inode.c
fs/gfs2/inode.h
fs/gfs2/lock_dlm.c
fs/gfs2/log.c
fs/gfs2/log.h
fs/gfs2/lops.c
fs/gfs2/lops.h
fs/gfs2/main.c
fs/gfs2/ops_fstype.c
fs/gfs2/recovery.c
fs/gfs2/rgrp.c
fs/gfs2/rgrp.h
fs/gfs2/super.c
fs/gfs2/super.h
fs/gfs2/trace_gfs2.h
fs/gfs2/trans.c
fs/gfs2/trans.h
fs/gfs2/util.c
fs/gfs2/util.h
fs/gfs2/xattr.c
fs/hfs/attr.c
fs/hfs/dir.c
fs/hfs/hfs_fs.h
fs/hfs/inode.c
fs/hfsplus/dir.c
fs/hfsplus/hfsplus_fs.h
fs/hfsplus/inode.c
fs/hfsplus/ioctl.c
fs/hfsplus/xattr.c
fs/hfsplus/xattr_security.c
fs/hfsplus/xattr_trusted.c
fs/hfsplus/xattr_user.c
fs/hostfs/hostfs_kern.c
fs/hpfs/hpfs_fn.h
fs/hpfs/inode.c
fs/hpfs/namei.c
fs/hugetlbfs/inode.c
fs/init.c
fs/inode.c
fs/internal.h
fs/io-wq.c
fs/io-wq.h
fs/io_uring.c
fs/iomap/buffered-io.c
fs/iomap/direct-io.c
fs/iomap/seek.c
fs/iomap/swapfile.c
fs/jffs2/acl.c
fs/jffs2/acl.h
fs/jffs2/dir.c
fs/jffs2/fs.c
fs/jffs2/os-linux.h
fs/jffs2/security.c
fs/jffs2/xattr_trusted.c
fs/jffs2/xattr_user.c
fs/jfs/acl.c
fs/jfs/file.c
fs/jfs/ioctl.c
fs/jfs/jfs_acl.h
fs/jfs/jfs_inode.c
fs/jfs/jfs_inode.h
fs/jfs/namei.c
fs/jfs/super.c
fs/jfs/xattr.c
fs/kernfs/dir.c
fs/kernfs/inode.c
fs/kernfs/kernfs-internal.h
fs/libfs.c
fs/locks.c
fs/minix/bitmap.c
fs/minix/file.c
fs/minix/inode.c
fs/minix/minix.h
fs/minix/namei.c
fs/mount.h
fs/mpage.c
fs/namei.c
fs/namespace.c
fs/nfs/Kconfig
fs/nfs/blocklayout/blocklayout.c
fs/nfs/dir.c
fs/nfs/file.c
fs/nfs/fs_context.c
fs/nfs/fscache.c
fs/nfs/inode.c
fs/nfs/internal.h
fs/nfs/namespace.c
fs/nfs/nfs3_fs.h
fs/nfs/nfs3acl.c
fs/nfs/nfs3xdr.c
fs/nfs/nfs42proc.c
fs/nfs/nfs4client.c
fs/nfs/nfs4proc.c
fs/nfs/nfs4state.c
fs/nfs/pnfs.c
fs/nfs/read.c
fs/nfs/super.c
fs/nfs/unlink.c
fs/nfs/write.c
fs/nfsd/Kconfig
fs/nfsd/export.c
fs/nfsd/filecache.c
fs/nfsd/nfs2acl.c
fs/nfsd/nfs3acl.c
fs/nfsd/nfs4acl.c
fs/nfsd/nfs4callback.c
fs/nfsd/nfs4proc.c
fs/nfsd/nfs4recover.c
fs/nfsd/nfs4state.c
fs/nfsd/nfsfh.c
fs/nfsd/nfsproc.c
fs/nfsd/vfs.c
fs/nilfs2/inode.c
fs/nilfs2/ioctl.c
fs/nilfs2/namei.c
fs/nilfs2/nilfs.h
fs/nilfs2/segbuf.c
fs/notify/fanotify/fanotify_user.c
fs/notify/inotify/inotify_user.c
fs/ntfs/inode.c
fs/ntfs/inode.h
fs/ntfs/layout.h
fs/ocfs2/acl.c
fs/ocfs2/acl.h
fs/ocfs2/cluster/heartbeat.c
fs/ocfs2/dlm/dlmast.c
fs/ocfs2/dlm/dlmcommon.h
fs/ocfs2/dlmfs/dlmfs.c
fs/ocfs2/file.c
fs/ocfs2/file.h
fs/ocfs2/ioctl.c
fs/ocfs2/namei.c
fs/ocfs2/refcounttree.c
fs/ocfs2/super.c
fs/ocfs2/xattr.c
fs/omfs/dir.c
fs/omfs/file.c
fs/omfs/inode.c
fs/open.c
fs/orangefs/acl.c
fs/orangefs/inode.c
fs/orangefs/namei.c
fs/orangefs/orangefs-kernel.h
fs/orangefs/xattr.c
fs/overlayfs/copy_up.c
fs/overlayfs/dir.c
fs/overlayfs/file.c
fs/overlayfs/inode.c
fs/overlayfs/overlayfs.h
fs/overlayfs/super.c
fs/overlayfs/util.c
fs/pipe.c
fs/pnode.h
fs/posix_acl.c
fs/proc/base.c
fs/proc/fd.c
fs/proc/fd.h
fs/proc/generic.c
fs/proc/internal.h
fs/proc/meminfo.c
fs/proc/proc_net.c
fs/proc/proc_sysctl.c
fs/proc/root.c
fs/proc/self.c
fs/proc/task_mmu.c
fs/proc/thread_self.c
fs/proc/vmcore.c
fs/proc_namespace.c
fs/pstore/inode.c
fs/pstore/ram_core.c
fs/ramfs/file-nommu.c
fs/ramfs/inode.c
fs/reiserfs/acl.h
fs/reiserfs/inode.c
fs/reiserfs/ioctl.c
fs/reiserfs/namei.c
fs/reiserfs/reiserfs.h
fs/reiserfs/xattr.c
fs/reiserfs/xattr.h
fs/reiserfs/xattr_acl.c
fs/reiserfs/xattr_security.c
fs/reiserfs/xattr_trusted.c
fs/reiserfs/xattr_user.c
fs/remap_range.c
fs/select.c
fs/squashfs/block.c
fs/squashfs/export.c
fs/squashfs/id.c
fs/squashfs/squashfs_fs.h
fs/squashfs/xattr_id.c
fs/stat.c
fs/sysv/file.c
fs/sysv/ialloc.c
fs/sysv/itree.c
fs/sysv/namei.c
fs/sysv/sysv.h
fs/tracefs/inode.c
fs/ubifs/dir.c
fs/ubifs/file.c
fs/ubifs/ioctl.c
fs/ubifs/ubifs.h
fs/ubifs/xattr.c
fs/udf/file.c
fs/udf/ialloc.c
fs/udf/namei.c
fs/udf/symlink.c
fs/ufs/ialloc.c
fs/ufs/inode.c
fs/ufs/namei.c
fs/ufs/ufs.h
fs/utimes.c
fs/vboxsf/dir.c
fs/vboxsf/utils.c
fs/vboxsf/vfsmod.h
fs/verity/enable.c
fs/xattr.c
fs/xfs/libxfs/xfs_btree.c
fs/xfs/xfs_acl.c
fs/xfs/xfs_acl.h
fs/xfs/xfs_aops.c
fs/xfs/xfs_bio_io.c
fs/xfs/xfs_buf.c
fs/xfs/xfs_extent_busy.c
fs/xfs/xfs_file.c
fs/xfs/xfs_inode.c
fs/xfs/xfs_inode.h
fs/xfs/xfs_ioctl.c
fs/xfs/xfs_ioctl32.c
fs/xfs/xfs_iops.c
fs/xfs/xfs_iops.h
fs/xfs/xfs_itable.c
fs/xfs/xfs_itable.h
fs/xfs/xfs_mount.c
fs/xfs/xfs_qm.c
fs/xfs/xfs_super.c
fs/xfs/xfs_symlink.c
fs/xfs/xfs_symlink.h
fs/xfs/xfs_sysctl.c
fs/xfs/xfs_trans.c
fs/xfs/xfs_trans.h
fs/xfs/xfs_xattr.c
fs/zonefs/super.c
include/acpi/acpi_bus.h
include/asm-generic/Kbuild
include/asm-generic/numa.h [new file with mode: 0644]
include/asm-generic/softirq_stack.h [new file with mode: 0644]
include/asm-generic/vmlinux.lds.h
include/crypto/public_key.h
include/drm/drm_file.h
include/drm/ttm/ttm_bo_api.h
include/dt-bindings/clock/k210-clk.h
include/dt-bindings/input/cros-ec-keyboard.h [new file with mode: 0644]
include/dt-bindings/interconnect/qcom,msm8939.h [new file with mode: 0644]
include/dt-bindings/interconnect/qcom,sdx55.h [new file with mode: 0644]
include/dt-bindings/pinctrl/k210-fpioa.h [new file with mode: 0644]
include/dt-bindings/reset/k210-rst.h [new file with mode: 0644]
include/keys/encrypted-type.h
include/kvm/arm_pmu.h
include/linux/acpi.h
include/linux/amba/bus.h
include/linux/atmdev.h
include/linux/bio.h
include/linux/bitops.h
include/linux/blkdev.h
include/linux/blktrace_api.h
include/linux/bpf.h
include/linux/can/can-ml.h
include/linux/can/skb.h
include/linux/capability.h
include/linux/cfag12864b.h
include/linux/compiler-clang.h
include/linux/compiler-gcc.h
include/linux/compiler-version.h [new file with mode: 0644]
include/linux/compiler_attributes.h
include/linux/coresight-pmu.h
include/linux/coresight.h
include/linux/cpu.h
include/linux/cpuhotplug.h
include/linux/cred.h
include/linux/device-mapper.h
include/linux/device.h
include/linux/device/driver.h
include/linux/dfl.h [new file with mode: 0644]
include/linux/dma-fence.h
include/linux/dma-heap.h
include/linux/dma-map-ops.h
include/linux/dma-mapping.h
include/linux/dma/k3-psil.h
include/linux/dma/mmp-pdma.h [deleted file]
include/linux/dmaengine.h
include/linux/eeprom_93xx46.h
include/linux/efi.h
include/linux/export.h
include/linux/extcon.h
include/linux/firmware/intel/stratix10-svc-client.h
include/linux/firmware/xlnx-zynqmp.h
include/linux/fortify-string.h [new file with mode: 0644]
include/linux/fs.h
include/linux/fsl/mc.h
include/linux/fwnode.h
include/linux/gfp.h
include/linux/gpio/consumer.h
include/linux/hid-sensor-hub.h
include/linux/hid.h
include/linux/highmem-internal.h
include/linux/highmem.h
include/linux/host1x.h
include/linux/huge_mm.h
include/linux/hugetlb.h
include/linux/hugetlb_cgroup.h
include/linux/icmpv6.h
include/linux/if_macvlan.h
include/linux/ima.h
include/linux/init.h
include/linux/initrd.h
include/linux/intel-pti.h [deleted file]
include/linux/interrupt.h
include/linux/io_uring.h
include/linux/ipv6.h
include/linux/irqdomain.h
include/linux/kallsyms.h
include/linux/kasan-checks.h
include/linux/kasan.h
include/linux/kconfig.h
include/linux/kexec.h
include/linux/key.h
include/linux/kfence.h [new file with mode: 0644]
include/linux/kgdb.h
include/linux/khugepaged.h
include/linux/ks0108.h
include/linux/kvm_host.h
include/linux/led-class-flash.h
include/linux/led-class-multicolor.h
include/linux/leds.h
include/linux/litex.h
include/linux/lsm_hook_defs.h
include/linux/lsm_hooks.h
include/linux/mdev.h
include/linux/mei_cl_bus.h
include/linux/memblock.h
include/linux/memcontrol.h
include/linux/memory.h
include/linux/memory_hotplug.h
include/linux/memremap.h
include/linux/mfd/intel_msic.h [deleted file]
include/linux/mhi.h
include/linux/migrate.h
include/linux/mlx5/qp.h
include/linux/mm.h
include/linux/mm_inline.h
include/linux/mm_types.h
include/linux/mmu_notifier.h
include/linux/mmzone.h
include/linux/mod_devicetable.h
include/linux/module.h
include/linux/mount.h
include/linux/mutex.h
include/linux/nd.h
include/linux/net.h
include/linux/netdevice.h
include/linux/netfilter/x_tables.h
include/linux/nfs_fs.h
include/linux/nfs_fs_sb.h
include/linux/objtool.h
include/linux/of_irq.h
include/linux/page-flags.h
include/linux/page_counter.h
include/linux/pagemap.h
include/linux/pagevec.h
include/linux/pci-epc.h
include/linux/pci-epf.h
include/linux/pci.h
include/linux/pci_ids.h
include/linux/perf_event.h
include/linux/pgtable.h
include/linux/platform_data/dma-atmel.h [deleted file]
include/linux/platform_data/dma-coh901318.h [deleted file]
include/linux/platform_data/dma-imx-sdma.h
include/linux/platform_data/i2c-hid.h [deleted file]
include/linux/platform_profile.h
include/linux/posix_acl.h
include/linux/posix_acl_xattr.h
include/linux/property.h
include/linux/ptrace.h
include/linux/qcom-geni-se.h
include/linux/regulator/pca9450.h
include/linux/restart_block.h
include/linux/rmap.h
include/linux/rpmsg/qcom_glink.h
include/linux/sched.h
include/linux/sched/mm.h
include/linux/sched/task.h
include/linux/security.h
include/linux/seqlock.h
include/linux/sfi.h [deleted file]
include/linux/sfi_acpi.h [deleted file]
include/linux/sirfsoc_dma.h [deleted file]
include/linux/skbuff.h
include/linux/slab_def.h
include/linux/slub_def.h
include/linux/soundwire/sdw.h
include/linux/soundwire/sdw_intel.h
include/linux/stackdepot.h
include/linux/stop_machine.h
include/linux/string.h
include/linux/sunrpc/svc_rdma.h
include/linux/swap.h
include/linux/swiotlb.h
include/linux/syscalls.h
include/linux/textsearch.h
include/linux/thread_info.h
include/linux/trace_events.h
include/linux/u64_stats_sync.h
include/linux/usb.h
include/linux/usb/composite.h
include/linux/usb_usual.h
include/linux/usermode_driver.h
include/linux/vdpa.h
include/linux/verification.h
include/linux/vfio.h
include/linux/virtio.h
include/linux/virtio_net.h
include/linux/virtio_pci_modern.h [new file with mode: 0644]
include/linux/vme.h
include/linux/vmstat.h
include/linux/vmw_vmci_defs.h
include/linux/ww_mutex.h
include/linux/xarray.h
include/linux/xattr.h
include/linux/zpool.h
include/linux/zsmalloc.h
include/media/rc-map.h
include/net/dst.h
include/net/icmp.h
include/net/inet_connection_sock.h
include/net/netfilter/nf_tables.h
include/net/nexthop.h
include/net/red.h
include/net/rtnetlink.h
include/net/sock.h
include/scsi/libiscsi.h
include/scsi/scsi_transport_iscsi.h
include/soc/canaan/k210-sysctl.h [new file with mode: 0644]
include/sound/intel-nhlt.h
include/sound/soc-acpi.h
include/target/target_core_backend.h
include/trace/events/bcache.h
include/trace/events/block.h
include/trace/events/error_report.h [new file with mode: 0644]
include/trace/events/kmem.h
include/trace/events/pagemap.h
include/trace/events/rpcrdma.h
include/trace/events/workqueue.h
include/uapi/asm-generic/unistd.h
include/uapi/linux/acrn.h [new file with mode: 0644]
include/uapi/linux/blkpg.h
include/uapi/linux/bpf.h
include/uapi/linux/cxl_mem.h [new file with mode: 0644]
include/uapi/linux/firewire-cdev.h
include/uapi/linux/fsl_mc.h [new file with mode: 0644]
include/uapi/linux/fuse.h
include/uapi/linux/gfs2_ondisk.h
include/uapi/linux/input.h
include/uapi/linux/io_uring.h
include/uapi/linux/kvm.h
include/uapi/linux/l2tp.h
include/uapi/linux/map_to_7segment.h
include/uapi/linux/mempolicy.h
include/uapi/linux/misc/bcm_vk.h [new file with mode: 0644]
include/uapi/linux/mount.h
include/uapi/linux/netfilter/nfnetlink_cthelper.h
include/uapi/linux/psample.h
include/uapi/linux/vdpa.h [new file with mode: 0644]
include/uapi/linux/vfio.h
include/uapi/misc/habanalabs.h
include/xen/grant_table.h
include/xen/xenbus.h
init/Kconfig
init/initramfs.c
init/main.c
init/version.c
ipc/mqueue.c
kernel/audit_fsnotify.c
kernel/auditsc.c
kernel/bpf/bpf_inode_storage.c
kernel/bpf/bpf_struct_ops.c
kernel/bpf/btf.c
kernel/bpf/core.c
kernel/bpf/inode.c
kernel/bpf/preload/bpf_preload_kern.c
kernel/bpf/syscall.c
kernel/bpf/trampoline.c
kernel/bpf/verifier.c
kernel/capability.c
kernel/cgroup/cgroup.c
kernel/debug/debug_core.c
kernel/dma/map_benchmark.c
kernel/dma/mapping.c
kernel/dma/swiotlb.c
kernel/events/core.c
kernel/events/uprobes.c
kernel/fork.c
kernel/futex.c
kernel/gcov/clang.c
kernel/groups.c
kernel/irq/irq_sim.c
kernel/irq/irqdomain.c
kernel/irq/manage.c
kernel/jump_label.c
kernel/kallsyms.c
kernel/kexec_internal.h
kernel/livepatch/core.c
kernel/locking/mutex.c
kernel/locking/rtmutex.c
kernel/locking/rwsem.c
kernel/locking/semaphore.c
kernel/module.c
kernel/module_signature.c
kernel/module_signing.c
kernel/power/energy_model.c
kernel/reboot.c
kernel/sched/core.c
kernel/sched/cpufreq_schedutil.c
kernel/sched/fair.c
kernel/sched/membarrier.c
kernel/signal.c
kernel/softirq.c
kernel/static_call.c
kernel/sys.c
kernel/sysctl.c
kernel/time/alarmtimer.c
kernel/time/hrtimer.c
kernel/time/posix-cpu-timers.c
kernel/trace/Kconfig
kernel/trace/Makefile
kernel/trace/blktrace.c
kernel/trace/error_report-traces.c [new file with mode: 0644]
kernel/trace/ftrace.c
kernel/trace/ring_buffer.c
kernel/trace/trace.c
kernel/trace/trace.h
kernel/trace/trace_events_synth.c
kernel/trace/trace_kprobe.c
kernel/usermode_driver.c
kernel/watch_queue.c
kernel/watchdog.c
kernel/workqueue.c
lib/Kconfig
lib/Kconfig.debug
lib/Kconfig.kasan
lib/Kconfig.kfence [new file with mode: 0644]
lib/Kconfig.ubsan
lib/Makefile
lib/bug.c
lib/cmdline.c
lib/extable.c
lib/genalloc.c
lib/iov_iter.c
lib/logic_pio.c
lib/math/div64.c
lib/stackdepot.c
lib/test_kasan.c
lib/test_kasan_module.c
lib/test_ubsan.c
lib/test_xarray.c
lib/ubsan.c
lib/xarray.c
mm/Makefile
mm/backing-dev.c
mm/cma.c
mm/compaction.c
mm/debug.c
mm/debug_vm_pgtable.c
mm/dmapool.c
mm/early_ioremap.c
mm/filemap.c
mm/gup.c
mm/highmem.c
mm/huge_memory.c
mm/hugetlb.c
mm/hugetlb_cgroup.c
mm/internal.h
mm/kasan/common.c
mm/kasan/generic.c
mm/kasan/hw_tags.c
mm/kasan/kasan.h
mm/kasan/quarantine.c
mm/kasan/report.c
mm/kasan/report_generic.c
mm/kasan/report_hw_tags.c
mm/kasan/report_sw_tags.c
mm/kasan/shadow.c
mm/kasan/sw_tags.c
mm/kfence/Makefile [new file with mode: 0644]
mm/kfence/core.c [new file with mode: 0644]
mm/kfence/kfence.h [new file with mode: 0644]
mm/kfence/kfence_test.c [new file with mode: 0644]
mm/kfence/report.c [new file with mode: 0644]
mm/khugepaged.c
mm/kmemleak.c
mm/list_lru.c
mm/madvise.c
mm/memcontrol.c
mm/memory-failure.c
mm/memory.c
mm/memory_hotplug.c
mm/mempolicy.c
mm/mempool.c
mm/memremap.c
mm/migrate.c
mm/mincore.c
mm/mlock.c
mm/mmap.c
mm/mmu_notifier.c
mm/mprotect.c
mm/mremap.c
mm/oom_kill.c
mm/page-writeback.c
mm/page_alloc.c
mm/page_io.c
mm/page_owner.c
mm/page_reporting.c
mm/percpu.c
mm/pgtable-generic.c
mm/rmap.c
mm/shmem.c
mm/slab.c
mm/slab.h
mm/slab_common.c
mm/slob.c
mm/slub.c
mm/swap.c
mm/swap_slots.c
mm/swap_state.c
mm/swapfile.c
mm/truncate.c
mm/vmscan.c
mm/vmstat.c
mm/workingset.c
mm/z3fold.c
mm/zbud.c
mm/zpool.c
mm/zsmalloc.c
mm/zswap.c
net/9p/client.c
net/batman-adv/main.c
net/bridge/br_switchdev.c
net/can/af_can.c
net/can/isotp.c
net/can/j1939/main.c
net/can/j1939/socket.c
net/can/proc.c
net/core/dev.c
net/core/drop_monitor.c
net/core/dst.c
net/core/filter.c
net/core/flow_dissector.c
net/core/rtnetlink.c
net/core/skbuff.c
net/core/sock.c
net/dccp/ipv6.c
net/dsa/Kconfig
net/dsa/dsa2.c
net/dsa/dsa_priv.h
net/dsa/slave.c
net/dsa/tag_mtk.c
net/dsa/tag_rtl4_a.c
net/ethtool/channels.c
net/hsr/hsr_framereg.c
net/hsr/hsr_framereg.h
net/hsr/hsr_main.h
net/ipv4/af_inet.c
net/ipv4/cipso_ipv4.c
net/ipv4/icmp.c
net/ipv4/inet_connection_sock.c
net/ipv4/inetpeer.c
net/ipv4/ip_tunnel.c
net/ipv4/ip_vti.c
net/ipv4/ipconfig.c
net/ipv4/netfilter/arp_tables.c
net/ipv4/netfilter/ip_tables.c
net/ipv4/nexthop.c
net/ipv4/route.c
net/ipv4/tcp.c
net/ipv4/tcp_minisocks.c
net/ipv4/udp_offload.c
net/ipv6/af_inet6.c
net/ipv6/calipso.c
net/ipv6/icmp.c
net/ipv6/ip6_fib.c
net/ipv6/ip6_gre.c
net/ipv6/ip6_icmp.c
net/ipv6/ip6_input.c
net/ipv6/ip6_tunnel.c
net/ipv6/ip6_vti.c
net/ipv6/netfilter/ip6_tables.c
net/ipv6/route.c
net/ipv6/sit.c
net/ipv6/tcp_ipv6.c
net/l2tp/l2tp_core.c
net/l2tp/l2tp_core.h
net/l2tp/l2tp_netlink.c
net/mac80211/aead_api.c
net/mac80211/aes_gmac.c
net/mac80211/cfg.c
net/mac80211/ibss.c
net/mac80211/main.c
net/mac80211/mlme.c
net/mac80211/rc80211_minstrel_ht.c
net/mac80211/util.c
net/mpls/mpls_gso.c
net/mptcp/options.c
net/mptcp/protocol.c
net/mptcp/protocol.h
net/mptcp/subflow.c
net/netfilter/nf_conntrack_helper.c
net/netfilter/nf_conntrack_netlink.c
net/netfilter/nf_conntrack_proto_gre.c
net/netfilter/nf_conntrack_proto_tcp.c
net/netfilter/nf_flow_table_core.c
net/netfilter/nf_nat_proto.c
net/netfilter/nf_tables_api.c
net/netfilter/x_tables.c
net/netlabel/netlabel_cipso_v4.c
net/openvswitch/conntrack.c
net/openvswitch/conntrack.h
net/openvswitch/flow.c
net/psample/psample.c
net/qrtr/qrtr.c
net/qrtr/tun.c
net/sched/act_ct.c
net/sched/cls_api.c
net/sched/cls_flower.c
net/sched/sch_api.c
net/sched/sch_choke.c
net/sched/sch_gred.c
net/sched/sch_htb.c
net/sched/sch_red.c
net/sched/sch_sfq.c
net/sctp/output.c
net/sctp/outqueue.c
net/sctp/tsnmap.c
net/socket.c
net/sunrpc/auth_gss/svcauth_gss.c
net/sunrpc/rpc_pipe.c
net/sunrpc/sched.c
net/sunrpc/svc.c
net/sunrpc/svc_xprt.c
net/sunrpc/xprtrdma/backchannel.c
net/sunrpc/xprtrdma/frwr_ops.c
net/sunrpc/xprtrdma/rpc_rdma.c
net/sunrpc/xprtrdma/svc_rdma_backchannel.c
net/sunrpc/xprtrdma/svc_rdma_recvfrom.c
net/sunrpc/xprtrdma/xprt_rdma.h
net/sunrpc/xprtsock.c
net/tipc/node.c
net/unix/af_unix.c
net/vmw_vsock/af_vsock.c
net/wireless/nl80211.c
samples/Kconfig
samples/acrn/Makefile [new file with mode: 0644]
samples/acrn/guest.ld [new file with mode: 0644]
samples/acrn/payload.ld [new file with mode: 0644]
samples/acrn/vm-sample.c [new file with mode: 0644]
samples/auxdisplay/cfag12864b-example.c
samples/bpf/xdpsock_user.c
samples/watch_queue/watch_test.c
scripts/Kbuild.include
scripts/Kconfig.include
scripts/Makefile.build
scripts/Makefile.clean
scripts/Makefile.lib
scripts/Makefile.modfinal
scripts/Makefile.modpost
scripts/Makefile.ubsan
scripts/adjust_autoksyms.sh
scripts/cc-version.sh [new file with mode: 0755]
scripts/checkpatch.pl
scripts/clang-tools/gen_compile_commands.py
scripts/clang-version.sh [deleted file]
scripts/dtc/.gitignore
scripts/dtc/Makefile
scripts/dummy-tools/gcc
scripts/gcc-plugins/Makefile
scripts/gcc-plugins/latent_entropy_plugin.c
scripts/gcc-plugins/structleak_plugin.c
scripts/gcc-version.sh [deleted file]
scripts/gdb/linux/Makefile
scripts/gdb/linux/lists.py
scripts/gen_autoksyms.sh
scripts/generate_initcall_order.pl [new file with mode: 0755]
scripts/genksyms/genksyms.c
scripts/genksyms/genksyms.h
scripts/genksyms/lex.l
scripts/kconfig/Makefile
scripts/kconfig/conf.c
scripts/kernel-doc
scripts/ld-version.sh
scripts/link-vmlinux.sh
scripts/lld-version.sh [deleted file]
scripts/mod/Makefile
scripts/mod/devicetable-offsets.c
scripts/mod/file2alias.c
scripts/mod/modpost.c
scripts/mod/modpost.h
scripts/mod/sumversion.c
scripts/module.lds.S
scripts/recordmcount.c
scripts/spdxcheck.py
scripts/spelling.txt
scripts/syscallhdr.sh [new file with mode: 0755]
scripts/syscalltbl.sh [new file with mode: 0755]
scripts/test_dwarf5_support.sh [new file with mode: 0755]
scripts/ver_linux
security/apparmor/apparmorfs.c
security/apparmor/domain.c
security/apparmor/file.c
security/apparmor/lsm.c
security/commoncap.c
security/integrity/evm/evm_crypto.c
security/integrity/evm/evm_main.c
security/integrity/evm/evm_secfs.c
security/integrity/iint.c
security/integrity/ima/ima.h
security/integrity/ima/ima_api.c
security/integrity/ima/ima_appraise.c
security/integrity/ima/ima_asymmetric_keys.c
security/integrity/ima/ima_main.c
security/integrity/ima/ima_mok.c
security/integrity/ima/ima_policy.c
security/integrity/ima/ima_queue_keys.c
security/keys/Kconfig
security/keys/big_key.c
security/keys/key.c
security/keys/keyctl.c
security/keys/keyctl_pkey.c
security/keys/keyring.c
security/keys/process_keys.c
security/security.c
security/selinux/hooks.c
security/selinux/include/security.h
security/selinux/selinuxfs.c
security/selinux/ss/avtab.c
security/selinux/ss/avtab.h
security/selinux/ss/conditional.c
security/selinux/ss/services.c
security/selinux/ss/sidtab.c
security/selinux/ss/sidtab.h
security/smack/smack_lsm.c
security/tomoyo/network.c
sound/drivers/aloop.c
sound/drivers/dummy.c
sound/drivers/mtpav.c
sound/drivers/mts64.c
sound/drivers/pcsp/pcsp.c
sound/drivers/portman2x4.c
sound/drivers/serial-u16550.c
sound/drivers/virmidi.c
sound/firewire/dice/dice-stream.c
sound/hda/Kconfig
sound/hda/Makefile
sound/hda/ext/hdac_ext_controller.c
sound/hda/ext/hdac_ext_stream.c
sound/hda/hdac_regmap.c
sound/hda/intel-dsp-config.c
sound/hda/intel-nhlt.c
sound/hda/intel-sdw-acpi.c [new file with mode: 0644]
sound/isa/ad1816a/ad1816a.c
sound/isa/ad1848/ad1848.c
sound/isa/als100.c
sound/isa/azt2320.c
sound/isa/cmi8330.c
sound/isa/cs423x/cs4231.c
sound/isa/cs423x/cs4236.c
sound/isa/es1688/es1688.c
sound/isa/es18xx.c
sound/isa/gus/gusclassic.c
sound/isa/gus/gusextreme.c
sound/isa/gus/gusmax.c
sound/isa/gus/interwave.c
sound/isa/opl3sa2.c
sound/isa/opti9xx/miro.c
sound/isa/opti9xx/opti92x-ad1848.c
sound/isa/sb/jazz16.c
sound/isa/sb/sb16.c
sound/isa/sb/sb8.c
sound/isa/sc6000.c
sound/isa/wavefront/wavefront.c
sound/mips/sgio2audio.c
sound/mips/snd-n64.c
sound/pci/ad1889.c
sound/pci/ali5451/ali5451.c
sound/pci/als300.c
sound/pci/als4000.c
sound/pci/atiixp.c
sound/pci/atiixp_modem.c
sound/pci/au88x0/au88x0.c
sound/pci/azt3328.c
sound/pci/bt87x.c
sound/pci/ca0106/ca0106_main.c
sound/pci/cmipci.c
sound/pci/cs4281.c
sound/pci/cs46xx/cs46xx.c
sound/pci/cs5535audio/cs5535audio.c
sound/pci/ctxfi/cthw20k2.c
sound/pci/ctxfi/xfi.c
sound/pci/echoaudio/echoaudio.c
sound/pci/emu10k1/emu10k1.c
sound/pci/emu10k1/emu10k1x.c
sound/pci/ens1370.c
sound/pci/es1938.c
sound/pci/es1968.c
sound/pci/fm801.c
sound/pci/hda/hda_bind.c
sound/pci/hda/hda_codec.c
sound/pci/hda/hda_controller.c
sound/pci/hda/hda_generic.c
sound/pci/hda/hda_intel.c
sound/pci/hda/hda_jack.c
sound/pci/hda/patch_ca0132.c
sound/pci/hda/patch_conexant.c
sound/pci/hda/patch_hdmi.c
sound/pci/hda/patch_realtek.c
sound/pci/ice1712/ice1712.c
sound/pci/ice1712/ice1724.c
sound/pci/intel8x0.c
sound/pci/intel8x0m.c
sound/pci/korg1212/korg1212.c
sound/pci/lola/lola.c
sound/pci/lx6464es/lx6464es.c
sound/pci/maestro3.c
sound/pci/mixart/mixart.c
sound/pci/nm256/nm256.c
sound/pci/oxygen/oxygen.c
sound/pci/oxygen/se6x.c
sound/pci/oxygen/virtuoso.c
sound/pci/pcxhr/pcxhr.c
sound/pci/riptide/riptide.c
sound/pci/rme32.c
sound/pci/rme96.c
sound/pci/rme9652/hdsp.c
sound/pci/rme9652/hdspm.c
sound/pci/rme9652/rme9652.c
sound/pci/sis7019.c
sound/pci/sonicvibes.c
sound/pci/trident/trident.c
sound/pci/via82xx.c
sound/pci/via82xx_modem.c
sound/pci/vx222/vx222.c
sound/pci/ymfpci/ymfpci.c
sound/pcmcia/pdaudiocf/pdaudiocf.c
sound/pcmcia/vx/vxpocket.c
sound/ppc/powermac.c
sound/sh/aica.c
sound/sh/sh_dac_audio.c
sound/soc/bcm/cygnus-ssp.c
sound/soc/codecs/Kconfig
sound/soc/codecs/ak4458.c
sound/soc/codecs/ak5558.c
sound/soc/codecs/cs42l42.c
sound/soc/codecs/cs42l42.h
sound/soc/codecs/es8316.c
sound/soc/codecs/lpass-rx-macro.c
sound/soc/codecs/lpass-tx-macro.c
sound/soc/codecs/lpass-va-macro.c
sound/soc/codecs/lpass-wsa-macro.c
sound/soc/codecs/max98373-i2c.c
sound/soc/codecs/max98373-sdw.c
sound/soc/codecs/max98373.c
sound/soc/codecs/rt1015.c
sound/soc/codecs/rt5640.c
sound/soc/codecs/rt5651.c
sound/soc/codecs/rt5659.c
sound/soc/codecs/rt5670.c
sound/soc/codecs/rt5670.h
sound/soc/codecs/rt711.c
sound/soc/codecs/sgtl5000.c
sound/soc/codecs/sirf-audio-codec.h [deleted file]
sound/soc/codecs/wcd934x.c
sound/soc/codecs/wm8960.c
sound/soc/fsl/fsl_esai.c
sound/soc/fsl/fsl_ssi.c
sound/soc/generic/simple-card-utils.c
sound/soc/intel/atom/sst-mfld-platform-pcm.c
sound/soc/intel/boards/bytcr_rt5640.c
sound/soc/mediatek/mt8192/mt8192-dai-tdm.c
sound/soc/mediatek/mt8192/mt8192-reg.h
sound/soc/qcom/lpass-cpu.c
sound/soc/qcom/sdm845.c
sound/soc/soc-core.c
sound/soc/sof/Kconfig
sound/soc/sof/Makefile
sound/soc/sof/core.c
sound/soc/sof/intel/Kconfig
sound/soc/sof/intel/Makefile
sound/soc/sof/intel/apl.c
sound/soc/sof/intel/bdw.c
sound/soc/sof/intel/byt.c
sound/soc/sof/intel/cnl.c
sound/soc/sof/intel/hda-dsp.c
sound/soc/sof/intel/hda.c
sound/soc/sof/intel/hda.h
sound/soc/sof/intel/icl.c
sound/soc/sof/intel/pci-apl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-cnl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-icl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-tgl.c [new file with mode: 0644]
sound/soc/sof/intel/pci-tng.c [new file with mode: 0644]
sound/soc/sof/intel/shim.h
sound/soc/sof/intel/tgl.c
sound/soc/sof/sof-acpi-dev.c
sound/soc/sof/sof-acpi-dev.h [new file with mode: 0644]
sound/soc/sof/sof-pci-dev.c
sound/soc/sof/sof-pci-dev.h [new file with mode: 0644]
sound/soc/sunxi/sun4i-codec.c
sound/sparc/amd7930.c
sound/sparc/cs4231.c
sound/sparc/dbri.c
sound/usb/6fire/chip.c
sound/usb/caiaq/device.c
sound/usb/card.c
sound/usb/clock.c
sound/usb/hiface/chip.c
sound/usb/misc/ua101.c
sound/usb/mixer.c
sound/usb/mixer_maps.c
sound/usb/mixer_quirks.c
sound/usb/pcm.c
sound/usb/quirks.c
sound/usb/usbaudio.h
sound/usb/usx2y/usbusx2y.c
sound/x86/intel_hdmi_audio.c
sound/xen/xen_snd_front.c
tools/arch/s390/include/uapi/asm/ptrace.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/insn.h
tools/arch/x86/include/asm/orc_types.h
tools/arch/x86/include/uapi/asm/kvm.h
tools/arch/x86/include/uapi/asm/vmx.h
tools/arch/x86/lib/insn.c
tools/bpf/resolve_btfids/main.c
tools/build/Makefile
tools/include/linux/coresight-pmu.h
tools/include/linux/export.h
tools/include/linux/objtool.h
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/drm.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/bpf.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/mount.h
tools/include/uapi/linux/openat2.h
tools/kvm/kvm_stat/kvm_stat.service
tools/lib/bpf/Makefile
tools/lib/bpf/btf_dump.c
tools/lib/bpf/libbpf.c
tools/lib/bpf/netlink.c
tools/lib/bpf/xsk.c
tools/lib/perf/evlist.c
tools/lib/perf/include/internal/evlist.h
tools/objtool/.gitignore
tools/objtool/Documentation/stack-validation.txt
tools/objtool/Makefile
tools/objtool/arch/x86/decode.c
tools/objtool/arch/x86/include/arch/cfi_regs.h [moved from tools/objtool/arch/x86/include/cfi_regs.h with 100% similarity]
tools/objtool/arch/x86/include/arch/elf.h [moved from tools/objtool/arch/x86/include/arch_elf.h with 100% similarity]
tools/objtool/arch/x86/include/arch/endianness.h [new file with mode: 0644]
tools/objtool/arch/x86/include/arch/special.h [moved from tools/objtool/arch/x86/include/arch_special.h with 100% similarity]
tools/objtool/arch/x86/special.c
tools/objtool/builtin-check.c
tools/objtool/builtin-orc.c
tools/objtool/check.c
tools/objtool/elf.c
tools/objtool/include/objtool/arch.h [moved from tools/objtool/arch.h with 94% similarity]
tools/objtool/include/objtool/builtin.h [moved from tools/objtool/builtin.h with 88% similarity]
tools/objtool/include/objtool/cfi.h [moved from tools/objtool/cfi.h with 96% similarity]
tools/objtool/include/objtool/check.h [moved from tools/objtool/check.h with 63% similarity]
tools/objtool/include/objtool/elf.h [moved from tools/objtool/elf.h with 100% similarity]
tools/objtool/include/objtool/endianness.h [new file with mode: 0644]
tools/objtool/include/objtool/objtool.h [moved from tools/objtool/objtool.h with 84% similarity]
tools/objtool/include/objtool/special.h [moved from tools/objtool/special.h with 94% similarity]
tools/objtool/include/objtool/warn.h [moved from tools/objtool/warn.h with 98% similarity]
tools/objtool/objtool.c
tools/objtool/orc_dump.c
tools/objtool/orc_gen.c
tools/objtool/special.c
tools/objtool/weak.c
tools/perf/Documentation/perf-evlist.txt
tools/perf/Documentation/perf-ftrace.txt
tools/perf/Documentation/perf-kallsyms.txt
tools/perf/Documentation/perf-trace.txt
tools/perf/Makefile.perf
tools/perf/arch/arm/util/cs-etm.c
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl
tools/perf/arch/x86/Makefile
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/arch/x86/include/arch-tests.h
tools/perf/arch/x86/tests/Build
tools/perf/arch/x86/tests/arch-tests.c
tools/perf/arch/x86/tests/insn-x86.c
tools/perf/arch/x86/tests/sample-parsing.c [new file with mode: 0644]
tools/perf/arch/x86/util/archinsn.c
tools/perf/bench/numa.c
tools/perf/bench/sched-messaging.c
tools/perf/bench/sched-pipe.c
tools/perf/bench/syscall.c
tools/perf/builtin-daemon.c
tools/perf/builtin-diff.c
tools/perf/builtin-trace.c
tools/perf/perf-archive.sh
tools/perf/tests/attr.c
tools/perf/tests/bpf.c
tools/perf/tests/code-reading.c
tools/perf/tests/cpumap.c
tools/perf/tests/keep-tracking.c
tools/perf/tests/mmap-basic.c
tools/perf/tests/perf-time-to-tsc.c
tools/perf/tests/sample-parsing.c
tools/perf/tests/shell/daemon.sh
tools/perf/tests/sw-clock.c
tools/perf/tests/switch-tracking.c
tools/perf/tests/task-exit.c
tools/perf/tests/thread-map.c
tools/perf/util/auxtrace.c
tools/perf/util/bpf-event.c
tools/perf/util/evlist.c
tools/perf/util/evsel.c
tools/perf/util/evsel.h
tools/perf/util/header.c
tools/perf/util/map.c
tools/perf/util/parse-events.c
tools/perf/util/parse-events.y
tools/perf/util/pmu.c
tools/perf/util/pmu.h
tools/perf/util/python-ext-sources
tools/perf/util/sort.c
tools/perf/util/stat-display.c
tools/perf/util/stat.c
tools/perf/util/synthetic-events.c
tools/perf/util/trace-event-read.c
tools/perf/util/vdso.c
tools/testing/kunit/configs/broken_on_uml.config
tools/testing/kunit/kunit_config.py
tools/testing/radix-tree/idr-test.c
tools/testing/radix-tree/linux/compiler_types.h [deleted file]
tools/testing/radix-tree/multiorder.c
tools/testing/radix-tree/xarray.c
tools/testing/selftests/Makefile
tools/testing/selftests/arm64/fp/sve-ptrace.c
tools/testing/selftests/arm64/fp/sve-test.S
tools/testing/selftests/bpf/prog_tests/check_mtu.c
tools/testing/selftests/bpf/prog_tests/fexit_sleep.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/btf_dump_test_case_syntax.c
tools/testing/selftests/bpf/progs/fexit_sleep.c [new file with mode: 0644]
tools/testing/selftests/bpf/progs/netif_receive_skb.c
tools/testing/selftests/bpf/progs/test_check_mtu.c
tools/testing/selftests/bpf/progs/test_global_func11.c
tools/testing/selftests/bpf/progs/test_tunnel_kern.c
tools/testing/selftests/bpf/verifier/array_access.c
tools/testing/selftests/bpf/verifier/atomic_and.c
tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c
tools/testing/selftests/bpf/verifier/atomic_or.c
tools/testing/selftests/bpf/verifier/bounds_deduction.c
tools/testing/selftests/bpf/verifier/map_ptr.c
tools/testing/selftests/bpf/verifier/unpriv.c
tools/testing/selftests/bpf/verifier/value_ptr_arith.c
tools/testing/selftests/dma/dma_map_benchmark.c
tools/testing/selftests/gpio/.gitignore
tools/testing/selftests/kvm/.gitignore
tools/testing/selftests/kvm/Makefile
tools/testing/selftests/kvm/hardware_disable_test.c [new file with mode: 0644]
tools/testing/selftests/kvm/include/kvm_util.h
tools/testing/selftests/kvm/lib/kvm_util.c
tools/testing/selftests/kvm/lib/kvm_util_internal.h
tools/testing/selftests/kvm/lib/x86_64/processor.c
tools/testing/selftests/kvm/x86_64/get_msr_index_features.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/hyperv_clock.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c [new file with mode: 0644]
tools/testing/selftests/kvm/x86_64/xen_shinfo_test.c
tools/testing/selftests/mount_setattr/.gitignore [new file with mode: 0644]
tools/testing/selftests/mount_setattr/Makefile [new file with mode: 0644]
tools/testing/selftests/mount_setattr/config [new file with mode: 0644]
tools/testing/selftests/mount_setattr/mount_setattr_test.c [new file with mode: 0644]
tools/testing/selftests/net/fib_nexthops.sh
tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh
tools/testing/selftests/net/ipsec.c
tools/testing/selftests/net/mptcp/mptcp_join.sh
tools/testing/selftests/net/reuseaddr_ports_exhausted.c
tools/testing/selftests/netfilter/Makefile
tools/testing/selftests/netfilter/nf_nat_edemux.sh [new file with mode: 0755]
tools/testing/selftests/vm/Makefile
tools/testing/selftests/wireguard/netns.sh
tools/tracing/latency/latency-collector.c
usr/Kconfig
virt/kvm/kvm_main.c

index 01a341c..c24b147 100644 (file)
@@ -109,6 +109,7 @@ ForEachMacros:
   - 'css_for_each_child'
   - 'css_for_each_descendant_post'
   - 'css_for_each_descendant_pre'
+  - 'cxl_for_each_cmd'
   - 'device_for_each_child_node'
   - 'dma_fence_chain_for_each'
   - 'do_for_each_ftrace_op'
index bb65fa2..3af6627 100644 (file)
@@ -42,6 +42,7 @@
 *.so.dbg
 *.su
 *.symtypes
+*.symversions
 *.tab.[ch]
 *.tar
 *.xz
index 87a8bbd..541635d 100644 (file)
--- a/.mailmap
+++ b/.mailmap
@@ -36,6 +36,7 @@ Andrew Morton <akpm@linux-foundation.org>
 Andrew Murray <amurray@thegoodpenguin.co.uk> <amurray@embedded-bits.co.uk>
 Andrew Murray <amurray@thegoodpenguin.co.uk> <andrew.murray@arm.com>
 Andrew Vasquez <andrew.vasquez@qlogic.com>
+Andrey Konovalov <andreyknvl@gmail.com> <andreyknvl@google.com>
 Andrey Ryabinin <ryabinin.a.a@gmail.com> <a.ryabinin@samsung.com>
 Andrey Ryabinin <ryabinin.a.a@gmail.com> <aryabinin@virtuozzo.com>
 Andy Adamson <andros@citi.umich.edu>
@@ -65,6 +66,8 @@ Changbin Du <changbin.du@intel.com> <changbin.du@gmail.com>
 Changbin Du <changbin.du@intel.com> <changbin.du@intel.com>
 Chao Yu <chao@kernel.org> <chao2.yu@samsung.com>
 Chao Yu <chao@kernel.org> <yuchao0@huawei.com>
+Chris Chiu <chris.chiu@canonical.com> <chiu@endlessm.com>
+Chris Chiu <chris.chiu@canonical.com> <chiu@endlessos.org>
 Christophe Ricard <christophe.ricard@gmail.com>
 Christoph Hellwig <hch@lst.de>
 Corey Minyard <minyard@acm.org>
@@ -237,6 +240,7 @@ Maxime Ripard <mripard@kernel.org> <maxime.ripard@free-electrons.com>
 Mayuresh Janorkar <mayur@ti.com>
 Michael Buesch <m@bues.ch>
 Michel Dänzer <michel@tungstengraphics.com>
+Miguel Ojeda <ojeda@kernel.org> <miguel.ojeda.sandonis@gmail.com>
 Mike Rapoport <rppt@kernel.org> <mike@compulab.co.il>
 Mike Rapoport <rppt@kernel.org> <mike.rapoport@gmail.com>
 Mike Rapoport <rppt@kernel.org> <rppt@linux.ibm.com>
diff --git a/CREDITS b/CREDITS
index 9add7e6..cef83b9 100644 (file)
--- a/CREDITS
+++ b/CREDITS
@@ -1244,10 +1244,10 @@ S: 80050-430 - Curitiba - Paraná
 S: Brazil
 
 N: Oded Gabbay
-E: oded.gabbay@gmail.com
-D: HabanaLabs and AMD KFD maintainer
-S: 12 Shraga Raphaeli
-S: Petah-Tikva, 4906418
+E: ogabbay@kernel.org
+D: HabanaLabs maintainer
+S: 29 Duchifat St.
+S: Ra'anana 4372029
 S: Israel
 
 N: Kumar Gala
@@ -2841,14 +2841,11 @@ S: Subiaco, 6008
 S: Perth, Western Australia
 S: Australia
 
-N: Miguel Ojeda Sandonis
-E: miguel.ojeda.sandonis@gmail.com
-W: http://miguelojeda.es
-W: http://jair.lab.fi.uva.es/~migojed/
+N: Miguel Ojeda
+E: ojeda@kernel.org
+W: https://ojeda.dev
 D: Author of the ks0108, cfag12864b and cfag12864bfb auxiliary display drivers.
 D: Maintainer of the auxiliary display drivers tree (drivers/auxdisplay/*)
-S: C/ Mieses 20, 9-B
-S: Valladolid 47009
 S: Spain
 
 N: Peter Oruba
diff --git a/Documentation/ABI/stable/sysfs-bus-fsl-mc b/Documentation/ABI/stable/sysfs-bus-fsl-mc
new file mode 100644 (file)
index 0000000..58f06c7
--- /dev/null
@@ -0,0 +1,19 @@
+What:          /sys/bus/fsl-mc/rescan
+Date:          January 2021
+KernelVersion: 5.12
+Contact:       Ioana Ciornei <ioana.ciornei@nxp.com>
+Description:   Writing a non-zero value to this attribute will
+               force a rescan of fsl-mc bus in the system and
+               synchronize the objects under fsl-mc bus and the
+               Management Complex firmware.
+Users:         Userspace drivers and management tools
+
+What:          /sys/bus/fsl-mc/autorescan
+Date:          January 2021
+KernelVersion: 5.12
+Contact:       Ioana Ciornei <ioana.ciornei@nxp.com>
+Description:   Writing a zero value to this attribute will
+               disable the DPRC IRQs on which automatic rescan
+               of the fsl-mc bus is performed. A non-zero value
+               will enable the DPRC IRQs.
+Users:         Userspace drivers and management tools
index 792f58b..dc2a6ba 100644 (file)
@@ -273,7 +273,7 @@ Description:        In `/sys/accessibility/speakup` is a directory corresponding to
                Below is a description of values and  parameters for soft
                synthesizer, which is currently the most commonly used.
 
-What:          /sys/accessibility/speakup/soft/caps_start
+What:          /sys/accessibility/speakup/<synth-name>/caps_start
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   This is the string that is sent to the synthesizer to cause it
@@ -281,7 +281,7 @@ Description:        This is the string that is sent to the synthesizer to cause it
                and most others, this causes the pitch of the voice to rise
                above the currently set pitch.
 
-What:          /sys/accessibility/speakup/soft/caps_stop
+What:          /sys/accessibility/speakup/<synth-name>/caps_stop
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   This is the string sent to the synthesizer to cause it to stop
@@ -290,12 +290,12 @@ Description:      This is the string sent to the synthesizer to cause it to stop
                down to the
                currently set pitch.
 
-What:          /sys/accessibility/speakup/soft/delay_time
+What:          /sys/accessibility/speakup/<synth-name>/delay_time
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   TODO:
 
-What:          /sys/accessibility/speakup/soft/direct
+What:          /sys/accessibility/speakup/<synth-name>/direct
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Controls if punctuation is spoken by speakup, or by the
@@ -306,36 +306,43 @@ Description:      Controls if punctuation is spoken by speakup, or by the
                than". Zero lets speakup speak the punctuation. One lets the
                synthesizer itself speak punctuation.
 
-What:          /sys/accessibility/speakup/soft/freq
+What:          /sys/accessibility/speakup/<synth-name>/freq
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the frequency of the speech synthesizer. Range is
                0-9.
 
-What:          /sys/accessibility/speakup/soft/full_time
+What:          /sys/accessibility/speakup/<synth-name>/flush_time
+KernelVersion: 5.12
+Contact:       speakup@linux-speakup.org
+Description:   Gets or sets the timeout to wait for the synthesizer flush to
+               complete. This can be used when the cable gets faulty and flush
+               notifications are getting lost.
+
+What:          /sys/accessibility/speakup/<synth-name>/full_time
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   TODO:
 
-What:          /sys/accessibility/speakup/soft/jiffy_delta
+What:          /sys/accessibility/speakup/<synth-name>/jiffy_delta
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   This controls how many jiffys the kernel gives to the
                synthesizer. Setting this too high can make a system unstable,
                or even crash it.
 
-What:          /sys/accessibility/speakup/soft/pitch
+What:          /sys/accessibility/speakup/<synth-name>/pitch
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the pitch of the synthesizer. The range is 0-9.
 
-What:          /sys/accessibility/speakup/soft/inflection
+What:          /sys/accessibility/speakup/<synth-name>/inflection
 KernelVersion: 5.8
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the inflection of the synthesizer, i.e. the pitch
                range. The range is 0-9.
 
-What:          /sys/accessibility/speakup/soft/punct
+What:          /sys/accessibility/speakup/<synth-name>/punct
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the amount of punctuation spoken by the
@@ -343,13 +350,13 @@ Description:      Gets or sets the amount of punctuation spoken by the
                TODO: How is this related to speakup's punc_level, or
                reading_punc.
 
-What:          /sys/accessibility/speakup/soft/rate
+What:          /sys/accessibility/speakup/<synth-name>/rate
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the rate of the synthesizer. Range is from zero
                slowest, to nine fastest.
 
-What:          /sys/accessibility/speakup/soft/tone
+What:          /sys/accessibility/speakup/<synth-name>/tone
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the tone of the speech synthesizer. The range for
@@ -357,12 +364,12 @@ Description:      Gets or sets the tone of the speech synthesizer. The range for
                difference if using espeak and the espeakup connector.
                TODO: does espeakup support different tonalities?
 
-What:          /sys/accessibility/speakup/soft/trigger_time
+What:          /sys/accessibility/speakup/<synth-name>/trigger_time
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   TODO:
 
-What:          /sys/accessibility/speakup/soft/voice
+What:          /sys/accessibility/speakup/<synth-name>/voice
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the voice used by the synthesizer if the
@@ -371,7 +378,7 @@ Description:        Gets or sets the voice used by the synthesizer if the
                voices, this parameter will not set the voice when the espeakup
                connector is used  between speakup and espeak.
 
-What:          /sys/accessibility/speakup/soft/vol
+What:          /sys/accessibility/speakup/<synth-name>/vol
 KernelVersion: 2.6
 Contact:       speakup@linux-speakup.org
 Description:   Gets or sets the volume of the speech synthesizer. Range is 0-9,
index c5d678d..d447a61 100644 (file)
@@ -1,7 +1,7 @@
 What:           /sys/kernel/debug/habanalabs/hl<n>/addr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets the device address to be used for read or write through
                 PCI bar, or the device VA of a host mapped memory to be read or
                 written directly from the host. The latter option is allowed
@@ -11,7 +11,7 @@ Description:    Sets the device address to be used for read or write through
 What:           /sys/kernel/debug/habanalabs/hl<n>/clk_gate
 Date:           May 2020
 KernelVersion:  5.8
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allow the root user to disable/enable in runtime the clock
                 gating mechanism in Gaudi. Due to how Gaudi is built, the
                 clock gating needs to be disabled in order to access the
@@ -34,28 +34,28 @@ Description:    Allow the root user to disable/enable in runtime the clock
 What:           /sys/kernel/debug/habanalabs/hl<n>/command_buffers
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently allocated
                 command buffers
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/command_submission
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently active
                 command submissions
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/command_submission_jobs
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays a list with detailed information about each JOB (CB) of
                 each active command submission
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/data32
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the root user to read or write directly through the
                 device's PCI bar. Writing to this file generates a write
                 transaction while reading from the file generates a read
@@ -70,7 +70,7 @@ Description:    Allows the root user to read or write directly through the
 What:           /sys/kernel/debug/habanalabs/hl<n>/data64
 Date:           Jan 2020
 KernelVersion:  5.6
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the root user to read or write 64 bit data directly
                 through the device's PCI bar. Writing to this file generates a
                 write transaction while reading from the file generates a read
@@ -85,7 +85,7 @@ Description:    Allows the root user to read or write 64 bit data directly
 What:           /sys/kernel/debug/habanalabs/hl<n>/device
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Enables the root user to set the device to specific state.
                 Valid values are "disable", "enable", "suspend", "resume".
                 User can read this property to see the valid values
@@ -93,28 +93,28 @@ Description:    Enables the root user to set the device to specific state.
 What:           /sys/kernel/debug/habanalabs/hl<n>/engines
 Date:           Jul 2019
 KernelVersion:  5.3
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the status registers values of the device engines and
                 their derived idle status
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_addr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets I2C device address for I2C transaction that is generated
                 by the device's CPU
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_bus
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets I2C bus address for I2C transaction that is generated by
                 the device's CPU
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_data
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Triggers an I2C transaction that is generated by the device's
                 CPU. Writing to this file generates a write transaction while
                 reading from the file generates a read transcation
@@ -122,32 +122,32 @@ Description:    Triggers an I2C transaction that is generated by the device's
 What:           /sys/kernel/debug/habanalabs/hl<n>/i2c_reg
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets I2C register id for I2C transaction that is generated by
                 the device's CPU
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/led0
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets the state of the first S/W led on the device
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/led1
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets the state of the second S/W led on the device
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/led2
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets the state of the third S/W led on the device
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/mmu
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the hop values and physical address for a given ASID
                 and virtual address. The user should write the ASID and VA into
                 the file and then read the file to get the result.
@@ -157,14 +157,14 @@ Description:    Displays the hop values and physical address for a given ASID
 What:           /sys/kernel/debug/habanalabs/hl<n>/set_power_state
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets the PCI power state. Valid values are "1" for D0 and "2"
                 for D3Hot
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/userptr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about the currently user
                 pointers (user virtual addresses) that are pinned and mapped
                 to DMA addresses
@@ -172,13 +172,21 @@ Description:    Displays a list with information about the currently user
 What:           /sys/kernel/debug/habanalabs/hl<n>/vm
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays a list with information about all the active virtual
                 address mappings per ASID
 
 What:           /sys/kernel/debug/habanalabs/hl<n>/stop_on_err
 Date:           Mar 2020
 KernelVersion:  5.6
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Sets the stop-on_error option for the device engines. Value of
                 "0" is for disable, otherwise enable.
+
+What:           /sys/kernel/debug/habanalabs/hl<n>/dump_security_violations
+Date:           Jan 2021
+KernelVersion:  5.12
+Contact:        ogabbay@kernel.org
+Description:    Dumps all security violations to dmesg. This will also ack
+                all security violations meanings those violations will not be
+                dumped next time user calls this API
index bc8e1cb..070779e 100644 (file)
@@ -29,7 +29,7 @@ Description:
                        option: [[appraise_type=]] [template=] [permit_directio]
                                [appraise_flag=] [keyrings=]
                  base:
-                       func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK]MODULE_CHECK]
+                       func:= [BPRM_CHECK][MMAP_CHECK][CREDS_CHECK][FILE_CHECK][MODULE_CHECK]
                                [FIRMWARE_CHECK]
                                [KEXEC_KERNEL_CHECK] [KEXEC_INITRAMFS_CHECK]
                                [KEXEC_CMDLINE] [KEY_CHECK] [CRITICAL_DATA]
index 881f0cd..8e53a32 100644 (file)
@@ -371,6 +371,14 @@ Contact:   Mathieu Poirier <mathieu.poirier@linaro.org>
 Description:   (Read) Print the content of the Device ID Register
                (0xFC8).  The value is taken directly from the HW.
 
+What:          /sys/bus/coresight/devices/etm<N>/mgmt/trcdevarch
+Date:          January 2021
+KernelVersion: 5.12
+Contact:       Mathieu Poirier <mathieu.poirier@linaro.org>
+Description:   (Read) Print the content of the Device Architecture Register
+               (offset 0xFBC).  The value is taken directly read
+               from the HW.
+
 What:          /sys/bus/coresight/devices/etm<N>/mgmt/trcdevtype
 Date:          April 2015
 KernelVersion: 4.01
diff --git a/Documentation/ABI/testing/sysfs-bus-cxl b/Documentation/ABI/testing/sysfs-bus-cxl
new file mode 100644 (file)
index 0000000..2fe7490
--- /dev/null
@@ -0,0 +1,26 @@
+What:          /sys/bus/cxl/devices/memX/firmware_version
+Date:          December, 2020
+KernelVersion: v5.12
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) "FW Revision" string as reported by the Identify
+               Memory Device Output Payload in the CXL-2.0
+               specification.
+
+What:          /sys/bus/cxl/devices/memX/ram/size
+Date:          December, 2020
+KernelVersion: v5.12
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) "Volatile Only Capacity" as bytes. Represents the
+               identically named field in the Identify Memory Device Output
+               Payload in the CXL-2.0 specification.
+
+What:          /sys/bus/cxl/devices/memX/pmem/size
+Date:          December, 2020
+KernelVersion: v5.12
+Contact:       linux-cxl@vger.kernel.org
+Description:
+               (RO) "Persistent Only Capacity" as bytes. Represents the
+               identically named field in the Identify Memory Device Output
+               Payload in the CXL-2.0 specification.
diff --git a/Documentation/ABI/testing/sysfs-bus-dfl-devices-emif b/Documentation/ABI/testing/sysfs-bus-dfl-devices-emif
new file mode 100644 (file)
index 0000000..817d141
--- /dev/null
@@ -0,0 +1,25 @@
+What:          /sys/bus/dfl/devices/dfl_dev.X/infX_cal_fail
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Read-only. It indicates if the calibration failed on this
+               memory interface. "1" for calibration failure, "0" for OK.
+               Format: %u
+
+What:          /sys/bus/dfl/devices/dfl_dev.X/infX_init_done
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Read-only. It indicates if the initialization completed on
+               this memory interface. "1" for initialization complete, "0"
+               for not yet.
+               Format: %u
+
+What:          /sys/bus/dfl/devices/dfl_dev.X/infX_clear
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Write-only. Writing "1" to this file will zero out all memory
+               data in this memory interface. Writing of other values is
+               invalid.
+               Format: %u
diff --git a/Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios b/Documentation/ABI/testing/sysfs-bus-dfl-devices-n3000-nios
new file mode 100644 (file)
index 0000000..5335d74
--- /dev/null
@@ -0,0 +1,47 @@
+What:          /sys/bus/dfl/devices/dfl_dev.X/fec_mode
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Read-only. Returns the FEC mode of the 25G links of the
+               ethernet retimers configured by Nios firmware. "rs" for Reed
+               Solomon FEC, "kr" for Fire Code FEC, "no" for NO FEC.
+               "not supported" if the FEC mode setting is not supported, this
+               happens when the Nios firmware version major < 3, or no link is
+               configured to 25G.
+               Format: string
+
+What:          /sys/bus/dfl/devices/dfl_dev.X/retimer_A_mode
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Read-only. Returns the enumeration value of the working mode of
+               the retimer A configured by the Nios firmware. The value is
+               read out from shared registers filled by the Nios firmware. Now
+               the values could be:
+
+               - "0": Reset
+               - "1": 4x10G
+               - "2": 4x25G
+               - "3": 2x25G
+               - "4": 2x25G+2x10G
+               - "5": 1x25G
+
+               If the Nios firmware is updated in future to support more
+               retimer modes, more enumeration value is expected.
+               Format: 0x%x
+
+What:          /sys/bus/dfl/devices/dfl_dev.X/retimer_B_mode
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Read-only. Returns the enumeration value of the working mode of
+               the retimer B configured by the Nios firmware. The value format
+               is the same as retimer_A_mode.
+
+What:          /sys/bus/dfl/devices/dfl_dev.X/nios_fw_version
+Date:          Oct 2020
+KernelVersion: 5.12
+Contact:       Xu Yilun <yilun.xu@intel.com>
+Description:   Read-only. Returns the version of the Nios firmware in the
+               FPGA. Its format is "major.minor.patch".
+               Format: %x.%x.%x
diff --git a/Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic b/Documentation/ABI/testing/sysfs-bus-pci-devices-pvpanic
new file mode 100644 (file)
index 0000000..1936f73
--- /dev/null
@@ -0,0 +1,24 @@
+What:          /sys/devices/pci0000:00/*/QEMU0001:00/capability
+Date:          Jan 2021
+Contact:       zhenwei pi <pizhenwei@bytedance.com>
+Description:
+               Read-only attribute. Capabilities of pvpanic device which
+               are supported by QEMU.
+
+               Format: %x.
+
+               Detailed bit definition refers to section <Bit Definition>
+               from pvpanic device specification:
+               https://git.qemu.org/?p=qemu.git;a=blob_plain;f=docs/specs/pvpanic.txt
+
+What:          /sys/devices/pci0000:00/*/QEMU0001:00/events
+Date:          Jan 2021
+Contact:       zhenwei pi <pizhenwei@bytedance.com>
+Description:
+               RW attribute. Set/get which features in-use. This attribute
+               is used to enable/disable feature(s) of pvpanic device.
+               Notice that this value should be a subset of capability.
+
+               Format: %x.
+
+               Also refer to pvpanic device specification.
index 246a45b..d8b0f80 100644 (file)
@@ -13,21 +13,22 @@ What:               /sys/devices/system/memory/memoryX/removable
 Date:          June 2008
 Contact:       Badari Pulavarty <pbadari@us.ibm.com>
 Description:
-               The file /sys/devices/system/memory/memoryX/removable
-               indicates whether this memory block is removable or not.
-               This is useful for a user-level agent to determine
-               identify removable sections of the memory before attempting
-               potentially expensive hot-remove memory operation
+               The file /sys/devices/system/memory/memoryX/removable is a
+               legacy interface used to indicated whether a memory block is
+               likely to be offlineable or not.  Newer kernel versions return
+               "1" if and only if the kernel supports memory offlining.
 Users:         hotplug memory remove tools
                http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
+               lsmem/chmem part of util-linux
 
 What:          /sys/devices/system/memory/memoryX/phys_device
 Date:          September 2008
 Contact:       Badari Pulavarty <pbadari@us.ibm.com>
 Description:
                The file /sys/devices/system/memory/memoryX/phys_device
-               is read-only and is designed to show the name of physical
-               memory device.  Implementation is currently incomplete.
+               is read-only;  it is a legacy interface only ever used on s390x
+               to expose the covered storage increment.
+Users:         Legacy s390-tools lsmem/chmem
 
 What:          /sys/devices/system/memory/memoryX/phys_index
 Date:          September 2008
@@ -43,23 +44,25 @@ Date:               September 2008
 Contact:       Badari Pulavarty <pbadari@us.ibm.com>
 Description:
                The file /sys/devices/system/memory/memoryX/state
-               is read-write.  When read, its contents show the
-               online/offline state of the memory section.  When written,
-               root can toggle the the online/offline state of a removable
-               memory section (see removable file description above)
-               using the following commands::
+               is read-write.  When read, it returns the online/offline
+               state of the memory block.  When written, root can toggle
+               the online/offline state of a memory block using the following
+               commands::
 
                  # echo online > /sys/devices/system/memory/memoryX/state
                  # echo offline > /sys/devices/system/memory/memoryX/state
 
-               For example, if /sys/devices/system/memory/memory22/removable
-               contains a value of 1 and
-               /sys/devices/system/memory/memory22/state contains the
-               string "online" the following command can be executed by
-               by root to offline that section::
-
-                 # echo offline > /sys/devices/system/memory/memory22/state
-
+               On newer kernel versions, advanced states can be specified
+               when onlining to select a target zone: "online_movable"
+               selects the movable zone.  "online_kernel" selects the
+               applicable kernel zone (DMA, DMA32, or Normal).  However,
+               after successfully setting one of the advanced states,
+               reading the file will return "online"; the zone information
+               can be obtained via "valid_zones" instead.
+
+               While onlining is unlikely to fail, there are no guarantees
+               that offlining will succeed.  Offlining is more likely to
+               succeed if "valid_zones" indicates "Movable".
 Users:         hotplug memory remove tools
                http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
 
@@ -69,8 +72,19 @@ Date:           July 2014
 Contact:       Zhang Zhen <zhenzhang.zhang@huawei.com>
 Description:
                The file /sys/devices/system/memory/memoryX/valid_zones is
-               read-only and is designed to show which zone this memory
-               block can be onlined to.
+               read-only.
+
+               For online memory blocks, it returns in which zone memory
+               provided by a memory block is managed.  If multiple zones
+               apply (not applicable for hotplugged memory), "None" is returned
+               and the memory block cannot be offlined.
+
+               For offline memory blocks, it returns by which zone memory
+               provided by a memory block can be managed when onlining.
+               The first returned zone ("default") will be used when setting
+               the state of an offline memory block to "online".  Only one of
+               the kernel zones (DMA, DMA32, Normal) is applicable for a single
+               memory block.
 
 What:          /sys/devices/system/memoryX/nodeY
 Date:          October 2009
diff --git a/Documentation/ABI/testing/sysfs-devices-xenbus b/Documentation/ABI/testing/sysfs-devices-xenbus
new file mode 100644 (file)
index 0000000..fd796cb
--- /dev/null
@@ -0,0 +1,41 @@
+What:          /sys/devices/*/xenbus/event_channels
+Date:          February 2021
+Contact:       Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+               Number of Xen event channels associated with a kernel based
+               paravirtualized device frontend or backend.
+
+What:          /sys/devices/*/xenbus/events
+Date:          February 2021
+Contact:       Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+               Total number of Xen events received for a Xen pv device
+               frontend or backend.
+
+What:          /sys/devices/*/xenbus/jiffies_eoi_delayed
+Date:          February 2021
+Contact:       Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+               Summed up time in jiffies the EOI of an interrupt for a Xen
+               pv device has been delayed in order to avoid stalls due to
+               event storms. This value rising is a first sign for a rogue
+               other end of the pv device.
+
+What:          /sys/devices/*/xenbus/spurious_events
+Date:          February 2021
+Contact:       Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+               Number of events received for a Xen pv device which did not
+               require any action. Too many spurious events in a row will
+               trigger delayed EOI processing.
+
+What:          /sys/devices/*/xenbus/spurious_threshold
+Date:          February 2021
+Contact:       Xen Developers mailing list <xen-devel@lists.xenproject.org>
+Description:
+               Controls the tolerated number of subsequent spurious events
+               before delayed EOI processing is triggered for a Xen pv
+               device. Default is 1. This can be modified in case the other
+               end of the pv device is issuing spurious events on a regular
+               basis and is known not to be malicious on purpose. Raising
+               the value for such cases can improve pv device performance.
index 169ae4b..1f127f7 100644 (file)
@@ -1,7 +1,7 @@
 What:           /sys/class/habanalabs/hl<n>/armcp_kernel_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the Linux kernel running on the device's CPU.
                 Will be DEPRECATED in Linux kernel version 5.10, and be
                 replaced with cpucp_kernel_ver
@@ -9,7 +9,7 @@ Description:    Version of the Linux kernel running on the device's CPU.
 What:           /sys/class/habanalabs/hl<n>/armcp_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the application running on the device's CPU
                 Will be DEPRECATED in Linux kernel version 5.10, and be
                 replaced with cpucp_ver
@@ -17,7 +17,7 @@ Description:    Version of the application running on the device's CPU
 What:           /sys/class/habanalabs/hl<n>/clk_max_freq_mhz
 Date:           Jun 2019
 KernelVersion:  not yet upstreamed
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum clock frequency, in MHz.
                 The device clock might be set to lower value than the maximum.
                 The user should read the clk_cur_freq_mhz to see the actual
@@ -27,52 +27,52 @@ Description:    Allows the user to set the maximum clock frequency, in MHz.
 What:           /sys/class/habanalabs/hl<n>/clk_cur_freq_mhz
 Date:           Jun 2019
 KernelVersion:  not yet upstreamed
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the current frequency, in MHz, of the device clock.
                 This property is valid only for the Gaudi ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/cpld_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the Device's CPLD F/W
 
 What:           /sys/class/habanalabs/hl<n>/cpucp_kernel_ver
 Date:           Oct 2020
 KernelVersion:  5.10
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the Linux kernel running on the device's CPU
 
 What:           /sys/class/habanalabs/hl<n>/cpucp_ver
 Date:           Oct 2020
 KernelVersion:  5.10
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the application running on the device's CPU
 
 What:           /sys/class/habanalabs/hl<n>/device_type
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the code name of the device according to its type.
                 The supported values are: "GOYA"
 
 What:           /sys/class/habanalabs/hl<n>/eeprom
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    A binary file attribute that contains the contents of the
                 on-board EEPROM
 
 What:           /sys/class/habanalabs/hl<n>/fuse_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the device's version from the eFuse
 
 What:           /sys/class/habanalabs/hl<n>/hard_reset
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Interface to trigger a hard-reset operation for the device.
                 Hard-reset will reset ALL internal components of the device
                 except for the PCI interface and the internal PLLs
@@ -80,14 +80,14 @@ Description:    Interface to trigger a hard-reset operation for the device.
 What:           /sys/class/habanalabs/hl<n>/hard_reset_cnt
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays how many times the device have undergone a hard-reset
                 operation since the driver was loaded
 
 What:           /sys/class/habanalabs/hl<n>/high_pll
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum clock frequency for MME, TPC
                 and IC when the power management profile is set to "automatic".
                 This property is valid only for the Goya ASIC family
@@ -95,7 +95,7 @@ Description:    Allows the user to set the maximum clock frequency for MME, TPC
 What:           /sys/class/habanalabs/hl<n>/ic_clk
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum clock frequency, in Hz, of
                 the Interconnect fabric. Writes to this parameter affect the
                 device only when the power management profile is set to "manual"
@@ -107,27 +107,27 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
 What:           /sys/class/habanalabs/hl<n>/ic_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the Interconnect
                 fabric. This property is valid only for the Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/infineon_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the Device's power supply F/W code
 
 What:           /sys/class/habanalabs/hl<n>/max_power
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum power consumption of the
                 device in milliwatts.
 
 What:           /sys/class/habanalabs/hl<n>/mme_clk
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum clock frequency, in Hz, of
                 the MME compute engine. Writes to this parameter affect the
                 device only when the power management profile is set to "manual"
@@ -139,21 +139,21 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
 What:           /sys/class/habanalabs/hl<n>/mme_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the MME compute
                 engine. This property is valid only for the Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/pci_addr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the PCI address of the device. This is needed so the
                 user would be able to open a device based on its PCI address
 
 What:           /sys/class/habanalabs/hl<n>/pm_mng_profile
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Power management profile. Values are "auto", "manual". In "auto"
                 mode, the driver will set the maximum clock frequency to a high
                 value when a user-space process opens the device's file (unless
@@ -167,13 +167,13 @@ Description:    Power management profile. Values are "auto", "manual". In "auto"
 What:           /sys/class/habanalabs/hl<n>/preboot_btl_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the device's preboot F/W code
 
 What:           /sys/class/habanalabs/hl<n>/soft_reset
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Interface to trigger a soft-reset operation for the device.
                 Soft-reset will reset only the compute and DMA engines of the
                 device
@@ -181,26 +181,26 @@ Description:    Interface to trigger a soft-reset operation for the device.
 What:           /sys/class/habanalabs/hl<n>/soft_reset_cnt
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays how many times the device have undergone a soft-reset
                 operation since the driver was loaded
 
 What:           /sys/class/habanalabs/hl<n>/status
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Status of the card: "Operational", "Malfunction", "In reset".
 
 What:           /sys/class/habanalabs/hl<n>/thermal_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the Device's thermal daemon
 
 What:           /sys/class/habanalabs/hl<n>/tpc_clk
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Allows the user to set the maximum clock frequency, in Hz, of
                 the TPC compute engines. Writes to this parameter affect the
                 device only when the power management profile is set to "manual"
@@ -212,12 +212,12 @@ Description:    Allows the user to set the maximum clock frequency, in Hz, of
 What:           /sys/class/habanalabs/hl<n>/tpc_clk_curr
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Displays the current clock frequency, in Hz, of the TPC compute
                 engines. This property is valid only for the Goya ASIC family
 
 What:           /sys/class/habanalabs/hl<n>/uboot_ver
 Date:           Jan 2019
 KernelVersion:  5.1
-Contact:        oded.gabbay@gmail.com
+Contact:        ogabbay@kernel.org
 Description:    Version of the u-boot running on the device's CPU
\ No newline at end of file
diff --git a/Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb b/Documentation/ABI/testing/sysfs-driver-input-cros-ec-keyb
new file mode 100644 (file)
index 0000000..c7afc23
--- /dev/null
@@ -0,0 +1,6 @@
+What:          /sys/class/input/input(x)/device/function_row_physmap
+Date:          January 2021
+Contact:       Philip Chen <philipchen@chromium.org>
+Description:   A space separated list of scancodes for the top row keys,
+               ordered by the physical positions of the keys, from left
+               to right.
index b16d30a..819939d 100644 (file)
@@ -1,3 +1,46 @@
+What:          /sys/firmware/acpi/fpdt/
+Date:          Jan 2021
+Contact:       Zhang Rui <rui.zhang@intel.com>
+Description:
+               ACPI Firmware Performance Data Table (FPDT) provides
+               information for firmware performance data for system boot,
+               S3 suspend and S3 resume. This sysfs entry contains the
+               performance data retrieved from the FPDT.
+
+               boot:
+                       firmware_start_ns: Timer value logged at the beginning
+                               of firmware image execution. In nanoseconds.
+                       bootloader_load_ns: Timer value logged just prior to
+                               loading the OS boot loader into memory.
+                               In nanoseconds.
+                       bootloader_launch_ns: Timer value logged just prior to
+                               launching the currently loaded OS boot loader
+                               image. In nanoseconds.
+                       exitbootservice_start_ns: Timer value logged at the
+                               point when the OS loader calls the
+                               ExitBootServices function for UEFI compatible
+                               firmware. In nanoseconds.
+                       exitbootservice_end_ns: Timer value logged at the point
+                               just prior to the OS loader gaining control
+                               back from the ExitBootServices function for
+                               UEFI compatible firmware. In nanoseconds.
+               suspend:
+                       suspend_start_ns: Timer value recorded at the previous
+                               OS write to SLP_TYP upon entry to S3. In
+                               nanoseconds.
+                       suspend_end_ns: Timer value recorded at the previous
+                               firmware write to SLP_TYP used to trigger
+                               hardware entry to S3. In nanoseconds.
+               resume:
+                       resume_count: A count of the number of S3 resume cycles
+                               since the last full boot sequence.
+                       resume_avg_ns: Average timer value of all resume cycles
+                               logged since the last full boot sequence,
+                               including the most recent resume. In nanoseconds.
+                       resume_prev_ns: Timer recorded at the end of the previous
+                               platform runtime firmware S3 resume, just prior to
+                               handoff to the OS waking vector. In nanoseconds.
+
 What:          /sys/firmware/acpi/bgrt/
 Date:          January 2012
 Contact:       Matthew Garrett <mjg@redhat.com>
diff --git a/Documentation/ABI/testing/sysfs-firmware-sfi b/Documentation/ABI/testing/sysfs-firmware-sfi
deleted file mode 100644 (file)
index 5210e0f..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-What:          /sys/firmware/sfi/tables/
-Date:          May 2010
-Contact:       Len Brown <lenb@kernel.org>
-Description:
-               SFI defines a number of small static memory tables
-               so the kernel can get platform information from firmware.
-
-               The tables are defined in the latest SFI specification:
-               http://simplefirmware.org/documentation
-
-               While the tables are used by the kernel, user-space
-               can observe them this way::
-
-                 # cd /sys/firmware/sfi/tables
-                 # cat $TABLENAME > $TABLENAME.bin
index ea0cc8c..f704925 100644 (file)
@@ -33,7 +33,7 @@ Contact:      xfs@oss.sgi.com
 Description:
                The current state of the log write grant head. It
                represents the total log reservation of all currently
-               oustanding transactions, including regrants due to
+               outstanding transactions, including regrants due to
                rolling transactions. The grant head is exported in
                "cycle:bytes" format.
 Users:         xfstests
index a7f81de..6a52d6d 100644 (file)
@@ -7,7 +7,7 @@ Description:
                is connected. example: "/dev/ttyS0".
 
                The device name flows down to architecture specific board
-               initialization file from the SFI/ATAGS bootloader
+               initialization file from the ATAGS bootloader
                firmware. The name exposed is read from the user-space
                dameon and opens the device when install is requested.
 
index 9d6b89b..dae9c89 100644 (file)
@@ -5,13 +5,17 @@ Description:  This file contains a space-separated list of profiles supported for
 
                Drivers must use the following standard profile-names:
 
-               ============    ============================================
-               low-power       Low power consumption
-               cool            Cooler operation
-               quiet           Quieter operation
-               balanced        Balance between low power consumption and performance
-               performance     High performance operation
-               ============    ============================================
+               ====================    ========================================
+               low-power               Low power consumption
+               cool                    Cooler operation
+               quiet                   Quieter operation
+               balanced                Balance between low power consumption
+                                       and performance
+               balanced-performance    Balance between performance and low
+                                       power consumption with a slight bias
+                                       towards performance
+               performance             High performance operation
+               ====================    ========================================
 
                Userspace may expect drivers to offer more than one of these
                standard profile names.
diff --git a/Documentation/PCI/endpoint/function/binding/pci-ntb.rst b/Documentation/PCI/endpoint/function/binding/pci-ntb.rst
new file mode 100644 (file)
index 0000000..40253d3
--- /dev/null
@@ -0,0 +1,38 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================
+PCI NTB Endpoint Function
+==========================
+
+1) Create a subdirectory to pci_epf_ntb directory in configfs.
+
+Standard EPF Configurable Fields:
+
+================   ===========================================================
+vendorid          should be 0x104c
+deviceid          should be 0xb00d for TI's J721E SoC
+revid             don't care
+progif_code       don't care
+subclass_code     should be 0x00
+baseclass_code    should be 0x5
+cache_line_size           don't care
+subsys_vendor_id   don't care
+subsys_id         don't care
+interrupt_pin     don't care
+msi_interrupts    don't care
+msix_interrupts           don't care
+================   ===========================================================
+
+2) Create a subdirectory to directory created in 1
+
+NTB EPF specific configurable fields:
+
+================   ===========================================================
+db_count          Number of doorbells; default = 4
+mw1               size of memory window1
+mw2               size of memory window2
+mw3               size of memory window3
+mw4               size of memory window4
+num_mws           Number of memory windows; max = 4
+spad_count                Number of scratchpad registers; default = 64
+================   ===========================================================
index 4ca7439..38ea1f6 100644 (file)
@@ -11,5 +11,8 @@ PCI Endpoint Framework
    pci-endpoint-cfs
    pci-test-function
    pci-test-howto
+   pci-ntb-function
+   pci-ntb-howto
 
    function/binding/pci-test
+   function/binding/pci-ntb
index 1bbd81e..696f8ee 100644 (file)
@@ -68,6 +68,16 @@ created)
                                ... subsys_vendor_id
                                ... subsys_id
                                ... interrupt_pin
+                                ... primary/
+                                       ... <Symlink EPC Device1>/
+                                ... secondary/
+                                       ... <Symlink EPC Device2>/
+
+If an EPF device has to be associated with 2 EPCs (like in the case of
+Non-transparent bridge), symlink of endpoint controller connected to primary
+interface should be added in 'primary' directory and symlink of endpoint
+controller connected to secondary interface should be added in 'secondary'
+directory.
 
 EPC Device
 ==========
diff --git a/Documentation/PCI/endpoint/pci-ntb-function.rst b/Documentation/PCI/endpoint/pci-ntb-function.rst
new file mode 100644 (file)
index 0000000..3b9d836
--- /dev/null
@@ -0,0 +1,348 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+PCI NTB Function
+=================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+PCI Non-Transparent Bridges (NTB) allow two host systems to communicate
+with each other by exposing each host as a device to the other host.
+NTBs typically support the ability to generate interrupts on the remote
+machine, expose memory ranges as BARs, and perform DMA.  They also support
+scratchpads, which are areas of memory within the NTB that are accessible
+from both machines.
+
+PCI NTB Function allows two different systems (or hosts) to communicate
+with each other by configuring the endpoint instances in such a way that
+transactions from one system are routed to the other system.
+
+In the below diagram, PCI NTB function configures the SoC with multiple
+PCI Endpoint (EP) instances in such a way that transactions from one EP
+controller are routed to the other EP controller. Once PCI NTB function
+configures the SoC with multiple EP instances, HOST1 and HOST2 can
+communicate with each other using SoC as a bridge.
+
+.. code-block:: text
+
+    +-------------+                                   +-------------+
+    |             |                                   |             |
+    |    HOST1    |                                   |    HOST2    |
+    |             |                                   |             |
+    +------^------+                                   +------^------+
+           |                                                 |
+           |                                                 |
+ +---------|-------------------------------------------------|---------+
+ |  +------v------+                                   +------v------+  |
+ |  |             |                                   |             |  |
+ |  |     EP      |                                   |     EP      |  |
+ |  | CONTROLLER1 |                                   | CONTROLLER2 |  |
+ |  |             <----------------------------------->             |  |
+ |  |             |                                   |             |  |
+ |  |             |                                   |             |  |
+ |  |             |  SoC With Multiple EP Instances   |             |  |
+ |  |             |  (Configured using NTB Function)  |             |  |
+ |  +-------------+                                   +-------------+  |
+ +---------------------------------------------------------------------+
+
+Constructs used for Implementing NTB
+====================================
+
+       1) Config Region
+       2) Self Scratchpad Registers
+       3) Peer Scratchpad Registers
+       4) Doorbell (DB) Registers
+       5) Memory Window (MW)
+
+
+Config Region:
+--------------
+
+Config Region is a construct that is specific to NTB implemented using NTB
+Endpoint Function Driver. The host and endpoint side NTB function driver will
+exchange information with each other using this region. Config Region has
+Control/Status Registers for configuring the Endpoint Controller. Host can
+write into this region for configuring the outbound Address Translation Unit
+(ATU) and to indicate the link status. Endpoint can indicate the status of
+commands issued by host in this region. Endpoint can also indicate the
+scratchpad offset and number of memory windows to the host using this region.
+
+The format of Config Region is given below. All the fields here are 32 bits.
+
+.. code-block:: text
+
+       +------------------------+
+       |         COMMAND        |
+       +------------------------+
+       |         ARGUMENT       |
+       +------------------------+
+       |         STATUS         |
+       +------------------------+
+       |         TOPOLOGY       |
+       +------------------------+
+       |    ADDRESS (LOWER 32)  |
+       +------------------------+
+       |    ADDRESS (UPPER 32)  |
+       +------------------------+
+       |           SIZE         |
+       +------------------------+
+       |   NO OF MEMORY WINDOW  |
+       +------------------------+
+       |  MEMORY WINDOW1 OFFSET |
+       +------------------------+
+       |       SPAD OFFSET      |
+       +------------------------+
+       |        SPAD COUNT      |
+       +------------------------+
+       |      DB ENTRY SIZE     |
+       +------------------------+
+       |         DB DATA        |
+       +------------------------+
+       |            :           |
+       +------------------------+
+       |            :           |
+       +------------------------+
+       |         DB DATA        |
+       +------------------------+
+
+
+  COMMAND:
+
+       NTB function supports three commands:
+
+         CMD_CONFIGURE_DOORBELL (0x1): Command to configure doorbell. Before
+       invoking this command, the host should allocate and initialize
+       MSI/MSI-X vectors (i.e., initialize the MSI/MSI-X Capability in the
+       Endpoint). The endpoint on receiving this command will configure
+       the outbound ATU such that transactions to Doorbell BAR will be routed
+       to the MSI/MSI-X address programmed by the host. The ARGUMENT
+       register should be populated with number of DBs to configure (in the
+       lower 16 bits) and if MSI or MSI-X should be configured (BIT 16).
+
+         CMD_CONFIGURE_MW (0x2): Command to configure memory window (MW). The
+       host invokes this command after allocating a buffer that can be
+       accessed by remote host. The allocated address should be programmed
+       in the ADDRESS register (64 bit), the size should be programmed in
+       the SIZE register and the memory window index should be programmed
+       in the ARGUMENT register. The endpoint on receiving this command
+       will configure the outbound ATU such that transactions to MW BAR
+       are routed to the address provided by the host.
+
+         CMD_LINK_UP (0x3): Command to indicate an NTB application is
+       bound to the EP device on the host side. Once the endpoint
+       receives this command from both the hosts, the endpoint will
+       raise a LINK_UP event to both the hosts to indicate the host
+       NTB applications can start communicating with each other.
+
+  ARGUMENT:
+
+       The value of this register is based on the commands issued in
+       command register. See COMMAND section for more information.
+
+  TOPOLOGY:
+
+       Set to NTB_TOPO_B2B_USD for Primary interface
+       Set to NTB_TOPO_B2B_DSD for Secondary interface
+
+  ADDRESS/SIZE:
+
+       Address and Size to be used while configuring the memory window.
+       See "CMD_CONFIGURE_MW" for more info.
+
+  MEMORY WINDOW1 OFFSET:
+
+       Memory Window 1 and Doorbell registers are packed together in the
+       same BAR. The initial portion of the region will have doorbell
+       registers and the latter portion of the region is for memory window 1.
+       This register will specify the offset of the memory window 1.
+
+  NO OF MEMORY WINDOW:
+
+       Specifies the number of memory windows supported by the NTB device.
+
+  SPAD OFFSET:
+
+       Self scratchpad region and config region are packed together in the
+       same BAR. The initial portion of the region will have config region
+       and the latter portion of the region is for self scratchpad. This
+       register will specify the offset of the self scratchpad registers.
+
+  SPAD COUNT:
+
+       Specifies the number of scratchpad registers supported by the NTB
+       device.
+
+  DB ENTRY SIZE:
+
+       Used to determine the offset within the DB BAR that should be written
+       in order to raise doorbell. EPF NTB can use either MSI or MSI-X to
+       ring doorbell (MSI-X support will be added later). MSI uses same
+       address for all the interrupts and MSI-X can provide different
+       addresses for different interrupts. The MSI/MSI-X address is provided
+       by the host and the address it gives is based on the MSI/MSI-X
+       implementation supported by the host. For instance, ARM platform
+       using GIC ITS will have the same MSI-X address for all the interrupts.
+       In order to support all the combinations and use the same mechanism
+       for both MSI and MSI-X, EPF NTB allocates a separate region in the
+       Outbound Address Space for each of the interrupts. This region will
+       be mapped to the MSI/MSI-X address provided by the host. If a host
+       provides the same address for all the interrupts, all the regions
+       will be translated to the same address. If a host provides different
+       addresses, the regions will be translated to different addresses. This
+       will ensure there is no difference while raising the doorbell.
+
+  DB DATA:
+
+       EPF NTB supports 32 interrupts, so there are 32 DB DATA registers.
+       This holds the MSI/MSI-X data that has to be written to MSI address
+       for raising doorbell interrupt. This will be populated by EPF NTB
+       while invoking CMD_CONFIGURE_DOORBELL.
+
+Scratchpad Registers:
+---------------------
+
+  Each host has its own register space allocated in the memory of NTB endpoint
+  controller. They are both readable and writable from both sides of the bridge.
+  They are used by applications built over NTB and can be used to pass control
+  and status information between both sides of a device.
+
+  Scratchpad registers has 2 parts
+       1) Self Scratchpad: Host's own register space
+       2) Peer Scratchpad: Remote host's register space.
+
+Doorbell Registers:
+-------------------
+
+  Doorbell Registers are used by the hosts to interrupt each other.
+
+Memory Window:
+--------------
+
+  Actual transfer of data between the two hosts will happen using the
+  memory window.
+
+Modeling Constructs:
+====================
+
+There are 5 or more distinct regions (config, self scratchpad, peer
+scratchpad, doorbell, one or more memory windows) to be modeled to achieve
+NTB functionality. At least one memory window is required while more than
+one is permitted. All these regions should be mapped to BARs for hosts to
+access these regions.
+
+If one 32-bit BAR is allocated for each of these regions, the scheme would
+look like this:
+
+======  ===============
+BAR NO  CONSTRUCTS USED
+======  ===============
+BAR0    Config Region
+BAR1    Self Scratchpad
+BAR2    Peer Scratchpad
+BAR3    Doorbell
+BAR4    Memory Window 1
+BAR5    Memory Window 2
+======  ===============
+
+However if we allocate a separate BAR for each of the regions, there would not
+be enough BARs for all the regions in a platform that supports only 64-bit
+BARs.
+
+In order to be supported by most of the platforms, the regions should be
+packed and mapped to BARs in a way that provides NTB functionality and
+also makes sure the host doesn't access any region that it is not supposed
+to.
+
+The following scheme is used in EPF NTB Function:
+
+======  ===============================
+BAR NO  CONSTRUCTS USED
+======  ===============================
+BAR0    Config Region + Self Scratchpad
+BAR1    Peer Scratchpad
+BAR2    Doorbell + Memory Window 1
+BAR3    Memory Window 2
+BAR4    Memory Window 3
+BAR5    Memory Window 4
+======  ===============================
+
+With this scheme, for the basic NTB functionality 3 BARs should be sufficient.
+
+Modeling Config/Scratchpad Region:
+----------------------------------
+
+.. code-block:: text
+
+ +-----------------+------->+------------------+        +-----------------+
+ |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ +-----------------+----+   +------------------+<-------+-----------------+
+ |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ +-----------------+    +-->+------------------+<-------+-----------------+
+ |       BAR2      |            Local Memory            |       BAR2      |
+ +-----------------+                                    +-----------------+
+ |       BAR3      |                                    |       BAR3      |
+ +-----------------+                                    +-----------------+
+ |       BAR4      |                                    |       BAR4      |
+ +-----------------+                                    +-----------------+
+ |       BAR5      |                                    |       BAR5      |
+ +-----------------+                                    +-----------------+
+   EP CONTROLLER 1                                        EP CONTROLLER 2
+
+Above diagram shows Config region + Scratchpad region for HOST1 (connected to
+EP controller 1) allocated in local memory. The HOST1 can access the config
+region and scratchpad region (self scratchpad) using BAR0 of EP controller 1.
+The peer host (HOST2 connected to EP controller 2) can also access this
+scratchpad region (peer scratchpad) using BAR1 of EP controller 2. This
+diagram shows the case where Config region and Scratchpad regions are allocated
+for HOST1, however the same is applicable for HOST2.
+
+Modeling Doorbell/Memory Window 1:
+----------------------------------
+
+.. code-block:: text
+
+ +-----------------+    +----->+----------------+-----------+-----------------+
+ |       BAR0      |    |      |   Doorbell 1   +-----------> MSI-X ADDRESS 1 |
+ +-----------------+    |      +----------------+           +-----------------+
+ |       BAR1      |    |      |   Doorbell 2   +---------+ |                 |
+ +-----------------+----+      +----------------+         | |                 |
+ |       BAR2      |           |   Doorbell 3   +-------+ | +-----------------+
+ +-----------------+----+      +----------------+       | +-> MSI-X ADDRESS 2 |
+ |       BAR3      |    |      |   Doorbell 4   +-----+ |   +-----------------+
+ +-----------------+    |      |----------------+     | |   |                 |
+ |       BAR4      |    |      |                |     | |   +-----------------+
+ +-----------------+    |      |      MW1       +---+ | +-->+ MSI-X ADDRESS 3||
+ |       BAR5      |    |      |                |   | |     +-----------------+
+ +-----------------+    +----->-----------------+   | |     |                 |
+   EP CONTROLLER 1             |                |   | |     +-----------------+
+                               |                |   | +---->+ MSI-X ADDRESS 4 |
+                               +----------------+   |       +-----------------+
+                                EP CONTROLLER 2     |       |                 |
+                                  (OB SPACE)        |       |                 |
+                                                    +------->      MW1        |
+                                                            |                 |
+                                                            |                 |
+                                                            +-----------------+
+                                                            |                 |
+                                                            |                 |
+                                                            |                 |
+                                                            |                 |
+                                                            |                 |
+                                                            +-----------------+
+                                                             PCI Address Space
+                                                             (Managed by HOST2)
+
+Above diagram shows how the doorbell and memory window 1 is mapped so that
+HOST1 can raise doorbell interrupt on HOST2 and also how HOST1 can access
+buffers exposed by HOST2 using memory window1 (MW1). Here doorbell and
+memory window 1 regions are allocated in EP controller 2 outbound (OB) address
+space. Allocating and configuring BARs for doorbell and memory window1
+is done during the initialization phase of NTB endpoint function driver.
+Mapping from EP controller 2 OB space to PCI address space is done when HOST2
+sends CMD_CONFIGURE_MW/CMD_CONFIGURE_DOORBELL.
+
+Modeling Optional Memory Windows:
+---------------------------------
+
+This is modeled the same was as MW1 but each of the additional memory windows
+is mapped to separate BARs.
diff --git a/Documentation/PCI/endpoint/pci-ntb-howto.rst b/Documentation/PCI/endpoint/pci-ntb-howto.rst
new file mode 100644 (file)
index 0000000..1884bf2
--- /dev/null
@@ -0,0 +1,161 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================================================
+PCI Non-Transparent Bridge (NTB) Endpoint Function (EPF) User Guide
+===================================================================
+
+:Author: Kishon Vijay Abraham I <kishon@ti.com>
+
+This document is a guide to help users use pci-epf-ntb function driver
+and ntb_hw_epf host driver for NTB functionality. The list of steps to
+be followed in the host side and EP side is given below. For the hardware
+configuration and internals of NTB using configurable endpoints see
+Documentation/PCI/endpoint/pci-ntb-function.rst
+
+Endpoint Device
+===============
+
+Endpoint Controller Devices
+---------------------------
+
+For implementing NTB functionality at least two endpoint controller devices
+are required.
+
+To find the list of endpoint controller devices in the system::
+
+       # ls /sys/class/pci_epc/
+       2900000.pcie-ep  2910000.pcie-ep
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+       # ls /sys/kernel/config/pci_ep/controllers
+       2900000.pcie-ep  2910000.pcie-ep
+
+
+Endpoint Function Drivers
+-------------------------
+
+To find the list of endpoint function drivers in the system::
+
+       # ls /sys/bus/pci-epf/drivers
+       pci_epf_ntb   pci_epf_ntb
+
+If PCI_ENDPOINT_CONFIGFS is enabled::
+
+       # ls /sys/kernel/config/pci_ep/functions
+       pci_epf_ntb   pci_epf_ntb
+
+
+Creating pci-epf-ntb Device
+----------------------------
+
+PCI endpoint function device can be created using the configfs. To create
+pci-epf-ntb device, the following commands can be used::
+
+       # mount -t configfs none /sys/kernel/config
+       # cd /sys/kernel/config/pci_ep/
+       # mkdir functions/pci_epf_ntb/func1
+
+The "mkdir func1" above creates the pci-epf-ntb function device that will
+be probed by pci_epf_ntb driver.
+
+The PCI endpoint framework populates the directory with the following
+configurable fields::
+
+       # ls functions/pci_epf_ntb/func1
+       baseclass_code    deviceid          msi_interrupts    pci-epf-ntb.0
+       progif_code       secondary         subsys_id         vendorid
+       cache_line_size   interrupt_pin     msix_interrupts   primary
+       revid             subclass_code     subsys_vendor_id
+
+The PCI endpoint function driver populates these entries with default values
+when the device is bound to the driver. The pci-epf-ntb driver populates
+vendorid with 0xffff and interrupt_pin with 0x0001::
+
+       # cat functions/pci_epf_ntb/func1/vendorid
+       0xffff
+       # cat functions/pci_epf_ntb/func1/interrupt_pin
+       0x0001
+
+
+Configuring pci-epf-ntb Device
+-------------------------------
+
+The user can configure the pci-epf-ntb device using its configfs entry. In order
+to change the vendorid and the deviceid, the following
+commands can be used::
+
+       # echo 0x104c > functions/pci_epf_ntb/func1/vendorid
+       # echo 0xb00d > functions/pci_epf_ntb/func1/deviceid
+
+In order to configure NTB specific attributes, a new sub-directory to func1
+should be created::
+
+       # mkdir functions/pci_epf_ntb/func1/pci_epf_ntb.0/
+
+The NTB function driver will populate this directory with various attributes
+that can be configured by the user::
+
+       # ls functions/pci_epf_ntb/func1/pci_epf_ntb.0/
+       db_count    mw1         mw2         mw3         mw4         num_mws
+       spad_count
+
+A sample configuration for NTB function is given below::
+
+       # echo 4 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/db_count
+       # echo 128 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/spad_count
+       # echo 2 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/num_mws
+       # echo 0x100000 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/mw1
+       # echo 0x100000 > functions/pci_epf_ntb/func1/pci_epf_ntb.0/mw2
+
+Binding pci-epf-ntb Device to EP Controller
+--------------------------------------------
+
+NTB function device should be attached to two PCI endpoint controllers
+connected to the two hosts. Use the 'primary' and 'secondary' entries
+inside NTB function device to attach one PCI endpoint controller to
+primary interface and the other PCI endpoint controller to the secondary
+interface::
+
+       # ln -s controllers/2900000.pcie-ep/ functions/pci-epf-ntb/func1/primary
+       # ln -s controllers/2910000.pcie-ep/ functions/pci-epf-ntb/func1/secondary
+
+Once the above step is completed, both the PCI endpoint controllers are ready to
+establish a link with the host.
+
+
+Start the Link
+--------------
+
+In order for the endpoint device to establish a link with the host, the _start_
+field should be populated with '1'. For NTB, both the PCI endpoint controllers
+should establish link with the host::
+
+       # echo 1 > controllers/2900000.pcie-ep/start
+       # echo 1 > controllers/2910000.pcie-ep/start
+
+
+RootComplex Device
+==================
+
+lspci Output
+------------
+
+Note that the devices listed here correspond to the values populated in
+"Creating pci-epf-ntb Device" section above::
+
+       # lspci
+       0000:00:00.0 PCI bridge: Texas Instruments Device b00d
+       0000:01:00.0 RAM memory: Texas Instruments Device b00d
+
+
+Using ntb_hw_epf Device
+-----------------------
+
+The host side software follows the standard NTB software architecture in Linux.
+All the existing client side NTB utilities like NTB Transport Client and NTB
+Netdev, NTB Ping Pong Test Client and NTB Tool Test Client can be used with NTB
+function device.
+
+For more information on NTB see
+:doc:`Non-Transparent Bridge <../../driver-api/ntb>`
index 18c2865..da385d8 100644 (file)
@@ -3,7 +3,7 @@ cfag12864b LCD Driver Documentation
 ===================================
 
 :License:              GPLv2
-:Author & Maintainer:  Miguel Ojeda Sandonis
+:Author & Maintainer:  Miguel Ojeda <ojeda@kernel.org>
 :Date:                 2006-10-27
 
 
index c0b7faf..a7d3fe5 100644 (file)
@@ -3,7 +3,7 @@ ks0108 LCD Controller Driver Documentation
 ==========================================
 
 :License:              GPLv2
-:Author & Maintainer:  Miguel Ojeda Sandonis
+:Author & Maintainer:  Miguel Ojeda <ojeda@kernel.org>
 :Date:                 2006-10-27
 
 
index c513eaf..64c62b9 100644 (file)
@@ -1299,6 +1299,10 @@ PAGE_SIZE multiple when read back.
                Amount of cached filesystem data that was modified and
                is currently being written back to disk
 
+         swapcached
+               Amount of swap cached in memory. The swapcache is accounted
+               against both memory and swap usage.
+
          anon_thp
                Amount of memory used in anonymous mappings backed by
                transparent hugepages
@@ -2094,7 +2098,7 @@ If the program returns 0, the attempt fails with -EPERM, otherwise
 it succeeds.
 
 An example of BPF_CGROUP_DEVICE program may be found in the kernel
-source tree in the tools/testing/selftests/bpf/dev_cgroup.c file.
+source tree in the tools/testing/selftests/bpf/progs/dev_cgroup.c file.
 
 
 RDMA
index b02d6dd..5c1d2f0 100644 (file)
@@ -5,10 +5,10 @@ Authors
 Original Author
 ---------------
 
-Steve French (sfrench@samba.org)
+Steve French (smfrench@gmail.com, sfrench@samba.org)
 
 The author wishes to express his appreciation and thanks to:
-Andrew Tridgell (Samba team) for his early suggestions about smb/cifs VFS
+Andrew Tridgell (Samba team) for his early suggestions about SMB/CIFS VFS
 improvements. Thanks to IBM for allowing me time and test resources to pursue
 this project, to Jim McDonough from IBM (and the Samba Team) for his help, to
 the IBM Linux JFS team for explaining many esoteric Linux filesystem features.
@@ -51,7 +51,7 @@ Patch Contributors
 - Ronnie Sahlberg (for SMB3 xattr work, bug fixes, and lots of great work on compounding)
 - Shirish Pargaonkar (for many ACL patches over the years)
 - Sachin Prabhu (many bug fixes, including for reconnect, copy offload and security)
-- Paulo Alcantara
+- Paulo Alcantara (for some excellent work in DFS, and in booting from SMB3)
 - Long Li (some great work on RDMA, SMB Direct)
 
 
index 71f2ecb..3147bba 100644 (file)
@@ -3,6 +3,7 @@ Changes
 =======
 
 See https://wiki.samba.org/index.php/LinuxCIFSKernel for summary
-information (that may be easier to read than parsing the output of
-"git log fs/cifs") about fixes/improvements to CIFS/SMB2/SMB3 support (changes
+information about fixes/improvements to CIFS/SMB2/SMB3 support (changes
 to cifs.ko module) by kernel version (and cifs internal module version).
+This may be easier to read than parsing the output of "git log fs/cifs"
+by release.
index cc2851d..53ea629 100644 (file)
@@ -7,19 +7,19 @@ Introduction
   protocol which was the successor to the Server Message Block
   (SMB) protocol, the native file sharing mechanism for most early
   PC operating systems. New and improved versions of CIFS are now
-  called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1)
-  is strongly preferred over using older dialects like CIFS due to
-  security reasons. All modern dialects, including the most recent,
-  SMB3.1.1 are supported by the CIFS VFS module. The SMB3 protocol
-  is implemented and supported by all major file servers
-  such as all modern versions of Windows (including Windows 2016
-  Server), as well as by Samba (which provides excellent
-  CIFS/SMB2/SMB3 server support and tools for Linux and many other
-  operating systems).  Apple systems also support SMB3 well, as
-  do most Network Attached Storage vendors, so this network
-  filesystem client can mount to a wide variety of systems.
-  It also supports mounting to the cloud (for example
-  Microsoft Azure), including the necessary security features.
+  called SMB2 and SMB3. Use of SMB3 (and later, including SMB3.1.1
+  the most current dialect) is strongly preferred over using older
+  dialects like CIFS due to security reasons. All modern dialects,
+  including the most recent, SMB3.1.1, are supported by the CIFS VFS
+  module. The SMB3 protocol is implemented and supported by all major
+  file servers such as Windows (including Windows 2019 Server), as
+  well as by Samba (which provides excellent CIFS/SMB2/SMB3 server
+  support and tools for Linux and many other operating systems).
+  Apple systems also support SMB3 well, as do most Network Attached
+  Storage vendors, so this network filesystem client can mount to a
+  wide variety of systems. It also supports mounting to the cloud
+  (for example Microsoft Azure), including the necessary security
+  features.
 
   The intent of this module is to provide the most advanced network
   file system function for SMB3 compliant servers, including advanced
@@ -27,8 +27,8 @@ Introduction
   POSIX compliance, secure per-user session establishment, encryption,
   high performance safe distributed caching (leases/oplocks), optional packet
   signing, large files, Unicode support and other internationalization
-  improvements. Since both Samba server and this filesystem client support
-  the CIFS Unix extensions (and in the future SMB3 POSIX extensions),
+  improvements. Since both Samba server and this filesystem client support the
+  CIFS Unix extensions, and the Linux client also suppors SMB3 POSIX extensions,
   the combination can provide a reasonable alternative to other network and
   cluster file systems for fileserving in some Linux to Linux environments,
   not just in Linux to Windows (or Linux to Mac) environments.
index 25f1157..2646ed2 100644 (file)
@@ -13,24 +13,26 @@ is a partial list of the known problems and missing features:
 
 a) SMB3 (and SMB3.1.1) missing optional features:
 
-   - multichannel (started), integration with RDMA
-   - directory leases (improved metadata caching), started (root dir only)
+   - multichannel (partially integrated), integration of multichannel with RDMA
+   - directory leases (improved metadata caching). Currently only implemented for root dir
    - T10 copy offload ie "ODX" (copy chunk, and "Duplicate Extents" ioctl
      currently the only two server side copy mechanisms supported)
 
 b) improved sparse file support (fiemap and SEEK_HOLE are implemented
-   but additional features would be supportable by the protocol).
+   but additional features would be supportable by the protocol such
+   as FALLOC_FL_COLLAPSE_RANGE and FALLOC_FL_INSERT_RANGE)
 
 c) Directory entry caching relies on a 1 second timer, rather than
    using Directory Leases, currently only the root file handle is cached longer
+   by leveraging Directory Leases
 
-d) quota support (needs minor kernel change since quota calls
-   to make it to network filesystems or deviceless filesystems)
+d) quota support (needs minor kernel change since quota calls otherwise
+    won't make it to network filesystems or deviceless filesystems).
 
 e) Additional use cases can be optimized to use "compounding" (e.g.
    open/query/close and open/setinfo/close) to reduce the number of
    roundtrips to the server and improve performance. Various cases
-   (stat, statfs, create, unlink, mkdir) already have been improved by
+   (stat, statfs, create, unlink, mkdir, xattrs) already have been improved by
    using compounding but more can be done. In addition we could
    significantly reduce redundant opens by using deferred close (with
    handle caching leases) and better using reference counters on file
@@ -60,7 +62,9 @@ k) Add tools to take advantage of more smb3 specific ioctls and features
    metadata attributes easier from tools (e.g. extending what was done
    in smb-info tool).
 
-l) encrypted file support
+l) encrypted file support (currently the attribute showing the file is
+   encrypted on the server is reported, but changing the attribute is not
+   supported).
 
 m) improved stats gathering tools (perhaps integration with nfsometer?)
    to extend and make easier to use what is currently in /proc/fs/cifs/Stats
@@ -69,14 +73,13 @@ n) Add support for claims based ACLs ("DAC")
 
 o) mount helper GUI (to simplify the various configuration options on mount)
 
-p) Add support for witness protocol (perhaps ioctl to cifs.ko from user space
-   tool listening on witness protocol RPC) to allow for notification of share
-   move, server failover, and server adapter changes.  And also improve other
-   failover scenarios, e.g. when client knows multiple DFS entries point to
-   different servers, and the server we are connected to has gone down.
+p) Expand support for witness protocol to allow for notification of share
+   move, and server network adapter changes. Currently only notifications by
+   the witness protocol for server move is supported by the Linux client.
 
 q) Allow mount.cifs to be more verbose in reporting errors with dialect
-   or unsupported feature errors.
+   or unsupported feature errors. This would now be easier due to the
+   implementation of the new mount API.
 
 r) updating cifs documentation, and user guide.
 
@@ -87,11 +90,10 @@ t) split cifs and smb3 support into separate modules so legacy (and less
    secure) CIFS dialect can be disabled in environments that don't need it
    and simplify the code.
 
-v) POSIX Extensions for SMB3.1.1 (started, create and mkdir support added
-   so far).
+v) Additional testing of POSIX Extensions for SMB3.1.1
 
 w) Add support for additional strong encryption types, and additional spnego
-   authentication mechanisms (see MS-SMB2)
+   authentication mechanisms (see MS-SMB2).  GCM-256 is now partially implemented.
 
 x) Finish support for SMB3.1.1 compression
 
index b6d9f02..13783dc 100644 (file)
@@ -83,7 +83,7 @@ and encrypted shares and stronger signing and authentication algorithms.
 There are additional mount options that may be helpful for SMB3 to get
 improved POSIX behavior (NB: can use vers=3.0 to force only SMB3, never 2.1):
 
-     ``mfsymlinks`` and ``cifsacl`` and ``idsfromsid``
+   ``mfsymlinks`` and either ``cifsacl`` or ``modefromsid`` (usually with ``idsfromsid``)
 
 Allowing User Mounts
 ====================
index 8b5f717..0454572 100644 (file)
                                to enforce probe and suspend/resume ordering.
                        rpm --  Like "on", but also use to order runtime PM.
 
+       fw_devlink.strict=<bool>
+                       [KNL] Treat all inferred dependencies as mandatory
+                       dependencies. This only applies for fw_devlink=on|rpm.
+                       Format: <bool>
+
        gamecon.map[2|3]=
                        [HW,JOY] Multisystem joystick and NES/SNES/PSX pad
                        support via parallel port (up to 5 devices per port)
                        In such case C2/C3 won't be used again.
                        idle=nomwait: Disable mwait for CPU C-states
 
+       idxd.sva=       [HW]
+                       Format: <bool>
+                       Allow force disabling of Shared Virtual Memory (SVA)
+                       support for the idxd driver. By default it is set to
+                       true (1).
+
        ieee754=        [MIPS] Select IEEE Std 754 conformance mode
                        Format: { strict | legacy | 2008 | relaxed }
                        Default: strict
                        last alloc / free. For more information see
                        Documentation/vm/slub.rst.
 
-       slub_memcg_sysfs=       [MM, SLUB]
-                       Determines whether to enable sysfs directories for
-                       memory cgroup sub-caches. 1 to enable, 0 to disable.
-                       The default is determined by CONFIG_SLUB_MEMCG_SYSFS_ON.
-                       Enabling this can lead to a very high number of debug
-                       directories and files being created under
-                       /sys/kernel/slub.
-
        slub_max_order= [MM, SLUB]
                        Determines the maximum allowed order for slabs.
                        A high setting may cause OOMs due to memory
                        growing up) the main stack are reserved for no other
                        mapping. Default value is 256 pages.
 
+       stack_depot_disable= [KNL]
+                       Setting this to true through kernel command line will
+                       disable the stack depot thereby saving the static memory
+                       consumed by the stack hash table. By default this is set
+                       to false.
+
        stacktrace      [FTRACE]
                        Enabled the stack tracer on boot up.
 
                        default x2apic cluster mode on platforms
                        supporting x2apic.
 
-       x86_intel_mid_timer= [X86-32,APBT]
-                       Choose timer option for x86 Intel MID platform.
-                       Two valid options are apbt timer only and lapic timer
-                       plus one apbt timer for broadcast timer.
-                       x86_intel_mid_timer=apbt_only | lapic_and_apbt
-
        xen_512gb_limit         [KNL,X86-64,XEN]
                        Restricts the kernel running paravirtualized under Xen
                        to use only up to 512 GB of RAM. The reason to do so is
index 5c4432c..5307f90 100644 (file)
@@ -160,16 +160,16 @@ Under each memory block, you can see 5 files:
 
                     "online_movable", "online", "offline" command
                     which will be performed on all sections in the block.
-``phys_device``     read-only: designed to show the name of physical memory
-                    device.  This is not well implemented now.
-``removable``       read-only: contains an integer value indicating
-                    whether the memory block is removable or not
-                    removable.  A value of 1 indicates that the memory
-                    block is removable and a value of 0 indicates that
-                    it is not removable. A memory block is removable only if
-                    every section in the block is removable.
-``valid_zones``     read-only: designed to show which zones this memory block
-                   can be onlined to.
+``phys_device``            read-only: legacy interface only ever used on s390x to
+                   expose the covered storage increment.
+``removable``      read-only: legacy interface that indicated whether a memory
+                   block was likely to be offlineable or not.  Newer kernel
+                   versions return "1" if and only if the kernel supports
+                   memory offlining.
+``valid_zones``     read-only: designed to show by which zone memory provided by
+                   a memory block is managed, and to show by which zone memory
+                   provided by an offline memory block could be managed when
+                   onlining.
 
                    The first column shows it`s default zone.
 
index 5ff6a0f..977ab3f 100644 (file)
@@ -1033,7 +1033,9 @@ speakup + keypad 3, you would hear:
 The speakup key is depressed, so the name of the key state is speakup.
 This part of the message comes from the states collection.
 
-14.2.  Loading Your Own Messages
+14.2.  Changing language
+
+14.2.1. Loading Your Own Messages
 
 The files under the i18n subdirectory all follow the same format.
 They consist of lines, with one message per line.
@@ -1066,8 +1068,50 @@ echo '1 azul' > /speakup/i18n/colors
 The next time that Speakup says message 1 from the colors group, it will
 say "azul", rather than "blue."
 
+14.2.2. Choose a language
+
 In the future, translations into various languages will be made available,
-and most users will just load the files necessary for their language.
+and most users will just load the files necessary for their language. So far,
+only French language is available beyond native Canadian English language.
+
+French is only available after you are logged in.
+
+Canadian English is the default language. To toggle another language,
+download the source of Speakup and untar it in your home directory. The
+following command should let you do this:
+
+tar xvjf speakup-<version>.tar.bz2
+
+where <version> is the version number of the application.
+
+Next, change to the newly created directory, then into the tools/ directory, and
+run the script speakup_setlocale. You are asked the language that you want to
+use. Type the number associated to your language (e.g. fr for French) then press
+Enter. Needed files are copied in the i18n directory.
+
+Note: the speakupconf must be installed on your system so that settings are saved.
+Otherwise, you will have an error: your language will be loaded but you will
+have to run the script again every time Speakup restarts.
+See section 16.1. for information about speakupconf.
+
+You will have to repeat these steps for any change of locale, i.e. if you wish
+change the speakup's language or charset (iso-8859-15 ou UTF-8).
+
+If you wish store the settings, note that at your next login, you will need to
+do:
+
+speakup load
+
+Alternatively, you can add the above line to your file
+~/.bashrc or ~/.bash_profile.
+
+If your system administrator ran himself the script, all the users will be able
+to change from English to the language choosed by root and do directly
+speakupconf load (or add this to the ~/.bashrc or
+~/.bash_profile file). If there are several languages to handle, the
+administrator (or every user) will have to run the first steps until speakupconf
+save, choosing the appropriate language, in every user's home directory. Every
+user will then be able to do speakupconf load, Speakup will load his own settings.
 
 14.3.  No Support for Non-Western-European Languages
 
index e35a3f2..586cd4b 100644 (file)
@@ -983,11 +983,11 @@ that benefit from having their data cached, zone_reclaim_mode should be
 left disabled as the caching effect is likely to be more important than
 data locality.
 
-zone_reclaim may be enabled if it's known that the workload is partitioned
-such that each partition fits within a NUMA node and that accessing remote
-memory would cause a measurable performance reduction.  The page allocator
-will then reclaim easily reusable pages (those page cache pages that are
-currently not used) before allocating off node pages.
+Consider enabling one or more zone_reclaim mode bits if it's known that the
+workload is partitioned such that each partition fits within a NUMA node
+and that accessing remote memory would cause a measurable performance
+reduction.  The page allocator will take additional actions before
+allocating off node pages.
 
 Allowing zone reclaim to write out pages stops processes that are
 writing large amounts of data from dirtying pages on other nodes. Zone
index 6178153..5422407 100644 (file)
@@ -284,6 +284,9 @@ The following sysctls are available for the XFS filesystem:
        removes unused preallocation from clean inodes and releases
        the unused space back to the free pool.
 
+  fs.xfs.speculative_cow_prealloc_lifetime
+       This is an alias for speculative_prealloc_lifetime.
+
   fs.xfs.error_level           (Min: 0  Default: 3  Max: 11)
        A volume knob for error reporting when internal errors occur.
        This will generate detailed messages & backtraces for filesystem
@@ -356,12 +359,13 @@ The following sysctls are available for the XFS filesystem:
 Deprecated Sysctls
 ==================
 
-===========================     ================
-  Name                         Removal Schedule
-===========================     ================
-fs.xfs.irix_sgid_inherit        September 2025
-fs.xfs.irix_symlink_mode        September 2025
-===========================     ================
+===========================================     ================
+  Name                                          Removal Schedule
+===========================================     ================
+fs.xfs.irix_sgid_inherit                        September 2025
+fs.xfs.irix_symlink_mode                        September 2025
+fs.xfs.speculative_cow_prealloc_lifetime        September 2025
+===========================================     ================
 
 
 Removed Sysctls
index 377e9d2..0609da7 100644 (file)
@@ -17,12 +17,12 @@ For ACPI on arm64, tables also fall into the following categories:
 
        -  Recommended: BERT, EINJ, ERST, HEST, PCCT, SSDT
 
-       -  Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IORT,
-          MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT, STAO,
-         TCPA, TPM2, UEFI, XENV
+       -  Optional: BGRT, CPEP, CSRT, DBG2, DRTM, ECDT, FACS, FPDT, IBFT,
+          IORT, MCHI, MPST, MSCT, NFIT, PMTT, RASF, SBST, SLIT, SPMI, SRAT,
+          STAO, TCPA, TPM2, UEFI, XENV
 
-       -  Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IBFT, IVRS, LPIT,
-          MSDM, OEMx, PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
+       -  Not supported: BOOT, DBGP, DMAR, ETDT, HPET, IVRS, LPIT, MSDM, OEMx,
+          PSDT, RSDT, SLIC, WAET, WDAT, WDRT, WPBT
 
 ====== ========================================================================
 Table  Usage for ARMv8 Linux
index 7195102..d410a47 100644 (file)
@@ -130,6 +130,9 @@ stable kernels.
 | Marvell        | ARM-MMU-500     | #582743         | N/A                         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
+| NVIDIA         | Carmel Core     | N/A             | NVIDIA_CARMEL_CNP_ERRATUM   |
++----------------+-----------------+-----------------+-----------------------------+
++----------------+-----------------+-----------------+-----------------------------+
 | Freescale/NXP  | LS2080A/LS1043A | A-008585        | FSL_ERRATUM_A008585         |
 +----------------+-----------------+-----------------+-----------------------------+
 +----------------+-----------------+-----------------+-----------------------------+
index 19d4d15..66c5a4e 100644 (file)
@@ -430,13 +430,13 @@ fifo_expire_async
 -----------------
 
 This parameter is used to set the timeout of asynchronous requests. Default
-value of this is 248ms.
+value of this is 250ms.
 
 fifo_expire_sync
 ----------------
 
 This parameter is used to set the timeout of synchronous requests. Default
-value of this is 124ms. In case to favor synchronous requests over asynchronous
+value of this is 125ms. In case to favor synchronous requests over asynchronous
 one, this value should be decreased relative to fifo_expire_async.
 
 low_latency
index 5bd45d5..fd65168 100644 (file)
@@ -49,8 +49,7 @@ extensions = ['kerneldoc', 'rstFlatTable', 'kernel_include',
 if major >= 3:
     sys.stderr.write('''WARNING: The kernel documentation build process
         support for Sphinx v3.0 and above is brand new. Be prepared for
-        possible issues in the generated output.
-        ''')
+        possible issues in the generated output.\n''')
     if (major > 3) or (minor > 0 or patch >= 2):
         # Sphinx c function parser is more pedantic with regards to type
         # checking. Due to that, having macros at c:function cause problems.
index 75cb757..e6d23f1 100644 (file)
@@ -528,16 +528,14 @@ an I/O device, you should not be using this part of the API.
 
 ::
 
-       void *
-       dma_alloc_noncoherent(struct device *dev, size_t size,
-                       dma_addr_t *dma_handle, enum dma_data_direction dir,
-                       gfp_t gfp)
+       struct page *
+       dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle,
+                       enum dma_data_direction dir, gfp_t gfp)
 
-This routine allocates a region of <size> bytes of consistent memory.  It
-returns a pointer to the allocated region (in the processor's virtual address
-space) or NULL if the allocation failed.  The returned memory may or may not
-be in the kernel direct mapping.  Drivers must not call virt_to_page on
-the returned memory region.
+This routine allocates a region of <size> bytes of non-coherent memory.  It
+returns a pointer to first struct page for the region, or NULL if the
+allocation failed. The resulting struct page can be used for everything a
+struct page is suitable for.
 
 It also returns a <dma_handle> which may be cast to an unsigned integer the
 same width as the bus and given to the device as the DMA address base of
@@ -558,51 +556,33 @@ reused.
 ::
 
        void
-       dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
+       dma_free_pages(struct device *dev, size_t size, struct page *page,
                        dma_addr_t dma_handle, enum dma_data_direction dir)
 
-Free a region of memory previously allocated using dma_alloc_noncoherent().
-dev, size and dma_handle and dir must all be the same as those passed into
-dma_alloc_noncoherent().  cpu_addr must be the virtual address returned by
-dma_alloc_noncoherent().
+Free a region of memory previously allocated using dma_alloc_pages().
+dev, size, dma_handle and dir must all be the same as those passed into
+dma_alloc_pages().  page must be the pointer returned by dma_alloc_pages().
 
 ::
 
-       struct page *
-       dma_alloc_pages(struct device *dev, size_t size, dma_addr_t *dma_handle,
-                       enum dma_data_direction dir, gfp_t gfp)
-
-This routine allocates a region of <size> bytes of non-coherent memory.  It
-returns a pointer to first struct page for the region, or NULL if the
-allocation failed. The resulting struct page can be used for everything a
-struct page is suitable for.
-
-It also returns a <dma_handle> which may be cast to an unsigned integer the
-same width as the bus and given to the device as the DMA address base of
-the region.
-
-The dir parameter specified if data is read and/or written by the device,
-see dma_map_single() for details.
-
-The gfp parameter allows the caller to specify the ``GFP_`` flags (see
-kmalloc()) for the allocation, but rejects flags used to specify a memory
-zone such as GFP_DMA or GFP_HIGHMEM.
+       void *
+       dma_alloc_noncoherent(struct device *dev, size_t size,
+                       dma_addr_t *dma_handle, enum dma_data_direction dir,
+                       gfp_t gfp)
 
-Before giving the memory to the device, dma_sync_single_for_device() needs
-to be called, and before reading memory written by the device,
-dma_sync_single_for_cpu(), just like for streaming DMA mappings that are
-reused.
+This routine is a convenient wrapper around dma_alloc_pages that returns the
+kernel virtual address for the allocated memory instead of the page structure.
 
 ::
 
        void
-       dma_free_pages(struct device *dev, size_t size, struct page *page,
+       dma_free_noncoherent(struct device *dev, size_t size, void *cpu_addr,
                        dma_addr_t dma_handle, enum dma_data_direction dir)
 
-Free a region of memory previously allocated using dma_alloc_pages().
-dev, size and dma_handle and dir must all be the same as those passed into
-dma_alloc_noncoherent().  page must be the pointer returned by
-dma_alloc_pages().
+Free a region of memory previously allocated using dma_alloc_noncoherent().
+dev, size, dma_handle and dir must all be the same as those passed into
+dma_alloc_noncoherent().  cpu_addr must be the virtual address returned by
+dma_alloc_noncoherent().
 
 ::
 
index 2adffb3..201b542 100644 (file)
@@ -19,11 +19,8 @@ User Space Memory Access
 Memory Allocation Controls
 ==========================
 
-Functions which need to allocate memory often use GFP flags to express
-how that memory should be allocated. The GFP acronym stands for "get
-free pages", the underlying memory allocation function. Not every GFP
-flag is allowed to every function which may allocate memory. Most
-users will want to use a plain ``GFP_KERNEL``.
+.. kernel-doc:: include/linux/gfp.h
+   :internal:
 
 .. kernel-doc:: include/linux/gfp.h
    :doc: Page mobility and placement hints
index f7809c7..1b1cf4f 100644 (file)
@@ -22,6 +22,7 @@ whole; patches welcome!
    ubsan
    kmemleak
    kcsan
+   kfence
    gdb-kernel-debugging
    kgdb
    kselftest
index a248ac3..ddf4239 100644 (file)
@@ -147,16 +147,15 @@ negative values to distinguish between different kinds of inaccessible memory
 like redzones or freed memory (see mm/kasan/kasan.h).
 
 In the report above the arrows point to the shadow byte 03, which means that
-the accessed address is partially accessible.
-
-For tag-based KASAN this last report section shows the memory tags around the
-accessed address (see `Implementation details`_ section).
+the accessed address is partially accessible. For tag-based KASAN modes this
+last report section shows the memory tags around the accessed address
+(see the `Implementation details`_ section).
 
 Boot parameters
 ~~~~~~~~~~~~~~~
 
-Hardware tag-based KASAN mode (see the section about different mode below) is
-intended for use in production as a security mitigation. Therefore it supports
+Hardware tag-based KASAN mode (see the section about various modes below) is
+intended for use in production as a security mitigation. Therefore, it supports
 boot parameters that allow to disable KASAN competely or otherwise control
 particular KASAN features.
 
@@ -166,7 +165,8 @@ particular KASAN features.
   traces collection (default: ``on``).
 
 - ``kasan.fault=report`` or ``=panic`` controls whether to only print a KASAN
-  report or also panic the kernel (default: ``report``).
+  report or also panic the kernel (default: ``report``). Note, that tag
+  checking gets disabled after the first reported bug.
 
 For developers
 ~~~~~~~~~~~~~~
@@ -289,6 +289,16 @@ reserved to tag freed memory regions.
 Hardware tag-based KASAN currently only supports tagging of
 kmem_cache_alloc/kmalloc and page_alloc memory.
 
+If the hardware doesn't support MTE (pre ARMv8.5), hardware tag-based KASAN
+won't be enabled. In this case all boot parameters are ignored.
+
+Note, that enabling CONFIG_KASAN_HW_TAGS always results in in-kernel TBI being
+enabled. Even when kasan.mode=off is provided, or when the hardware doesn't
+support MTE (but supports TBI).
+
+Hardware tag-based KASAN only reports the first found bug. After that MTE tag
+checking gets disabled.
+
 What memory accesses are sanitised by KASAN?
 --------------------------------------------
 
@@ -352,17 +362,17 @@ unmapped. This will require changes in arch-specific code.
 This allows ``VMAP_STACK`` support on x86, and can simplify support of
 architectures that do not have a fixed module region.
 
-CONFIG_KASAN_KUNIT_TEST & CONFIG_TEST_KASAN_MODULE
---------------------------------------------------
+CONFIG_KASAN_KUNIT_TEST and CONFIG_KASAN_MODULE_TEST
+----------------------------------------------------
 
-KASAN tests consist on two parts:
+KASAN tests consist of two parts:
 
 1. Tests that are integrated with the KUnit Test Framework. Enabled with
 ``CONFIG_KASAN_KUNIT_TEST``. These tests can be run and partially verified
 automatically in a few different ways, see the instructions below.
 
 2. Tests that are currently incompatible with KUnit. Enabled with
-``CONFIG_TEST_KASAN_MODULE`` and can only be run as a module. These tests can
+``CONFIG_KASAN_MODULE_TEST`` and can only be run as a module. These tests can
 only be verified manually, by loading the kernel module and inspecting the
 kernel log for KASAN reports.
 
diff --git a/Documentation/dev-tools/kfence.rst b/Documentation/dev-tools/kfence.rst
new file mode 100644 (file)
index 0000000..fdf04e7
--- /dev/null
@@ -0,0 +1,298 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. Copyright (C) 2020, Google LLC.
+
+Kernel Electric-Fence (KFENCE)
+==============================
+
+Kernel Electric-Fence (KFENCE) is a low-overhead sampling-based memory safety
+error detector. KFENCE detects heap out-of-bounds access, use-after-free, and
+invalid-free errors.
+
+KFENCE is designed to be enabled in production kernels, and has near zero
+performance overhead. Compared to KASAN, KFENCE trades performance for
+precision. The main motivation behind KFENCE's design, is that with enough
+total uptime KFENCE will detect bugs in code paths not typically exercised by
+non-production test workloads. One way to quickly achieve a large enough total
+uptime is when the tool is deployed across a large fleet of machines.
+
+Usage
+-----
+
+To enable KFENCE, configure the kernel with::
+
+    CONFIG_KFENCE=y
+
+To build a kernel with KFENCE support, but disabled by default (to enable, set
+``kfence.sample_interval`` to non-zero value), configure the kernel with::
+
+    CONFIG_KFENCE=y
+    CONFIG_KFENCE_SAMPLE_INTERVAL=0
+
+KFENCE provides several other configuration options to customize behaviour (see
+the respective help text in ``lib/Kconfig.kfence`` for more info).
+
+Tuning performance
+~~~~~~~~~~~~~~~~~~
+
+The most important parameter is KFENCE's sample interval, which can be set via
+the kernel boot parameter ``kfence.sample_interval`` in milliseconds. The
+sample interval determines the frequency with which heap allocations will be
+guarded by KFENCE. The default is configurable via the Kconfig option
+``CONFIG_KFENCE_SAMPLE_INTERVAL``. Setting ``kfence.sample_interval=0``
+disables KFENCE.
+
+The KFENCE memory pool is of fixed size, and if the pool is exhausted, no
+further KFENCE allocations occur. With ``CONFIG_KFENCE_NUM_OBJECTS`` (default
+255), the number of available guarded objects can be controlled. Each object
+requires 2 pages, one for the object itself and the other one used as a guard
+page; object pages are interleaved with guard pages, and every object page is
+therefore surrounded by two guard pages.
+
+The total memory dedicated to the KFENCE memory pool can be computed as::
+
+    ( #objects + 1 ) * 2 * PAGE_SIZE
+
+Using the default config, and assuming a page size of 4 KiB, results in
+dedicating 2 MiB to the KFENCE memory pool.
+
+Note: On architectures that support huge pages, KFENCE will ensure that the
+pool is using pages of size ``PAGE_SIZE``. This will result in additional page
+tables being allocated.
+
+Error reports
+~~~~~~~~~~~~~
+
+A typical out-of-bounds access looks like this::
+
+    ==================================================================
+    BUG: KFENCE: out-of-bounds read in test_out_of_bounds_read+0xa3/0x22b
+
+    Out-of-bounds read at 0xffffffffb672efff (1B left of kfence-#17):
+     test_out_of_bounds_read+0xa3/0x22b
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    kfence-#17 [0xffffffffb672f000-0xffffffffb672f01f, size=32, cache=kmalloc-32] allocated by task 507:
+     test_alloc+0xf3/0x25b
+     test_out_of_bounds_read+0x98/0x22b
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    CPU: 4 PID: 107 Comm: kunit_try_catch Not tainted 5.8.0-rc6+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    ==================================================================
+
+The header of the report provides a short summary of the function involved in
+the access. It is followed by more detailed information about the access and
+its origin. Note that, real kernel addresses are only shown when using the
+kernel command line option ``no_hash_pointers``.
+
+Use-after-free accesses are reported as::
+
+    ==================================================================
+    BUG: KFENCE: use-after-free read in test_use_after_free_read+0xb3/0x143
+
+    Use-after-free read at 0xffffffffb673dfe0 (in kfence-#24):
+     test_use_after_free_read+0xb3/0x143
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    kfence-#24 [0xffffffffb673dfe0-0xffffffffb673dfff, size=32, cache=kmalloc-32] allocated by task 507:
+     test_alloc+0xf3/0x25b
+     test_use_after_free_read+0x76/0x143
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    freed by task 507:
+     test_use_after_free_read+0xa8/0x143
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    CPU: 4 PID: 109 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    ==================================================================
+
+KFENCE also reports on invalid frees, such as double-frees::
+
+    ==================================================================
+    BUG: KFENCE: invalid free in test_double_free+0xdc/0x171
+
+    Invalid free of 0xffffffffb6741000:
+     test_double_free+0xdc/0x171
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    kfence-#26 [0xffffffffb6741000-0xffffffffb674101f, size=32, cache=kmalloc-32] allocated by task 507:
+     test_alloc+0xf3/0x25b
+     test_double_free+0x76/0x171
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    freed by task 507:
+     test_double_free+0xa8/0x171
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    CPU: 4 PID: 111 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    ==================================================================
+
+KFENCE also uses pattern-based redzones on the other side of an object's guard
+page, to detect out-of-bounds writes on the unprotected side of the object.
+These are reported on frees::
+
+    ==================================================================
+    BUG: KFENCE: memory corruption in test_kmalloc_aligned_oob_write+0xef/0x184
+
+    Corrupted memory at 0xffffffffb6797ff9 [ 0xac . . . . . . ] (in kfence-#69):
+     test_kmalloc_aligned_oob_write+0xef/0x184
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    kfence-#69 [0xffffffffb6797fb0-0xffffffffb6797ff8, size=73, cache=kmalloc-96] allocated by task 507:
+     test_alloc+0xf3/0x25b
+     test_kmalloc_aligned_oob_write+0x57/0x184
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    CPU: 4 PID: 120 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    ==================================================================
+
+For such errors, the address where the corruption occurred as well as the
+invalidly written bytes (offset from the address) are shown; in this
+representation, '.' denote untouched bytes. In the example above ``0xac`` is
+the value written to the invalid address at offset 0, and the remaining '.'
+denote that no following bytes have been touched. Note that, real values are
+only shown if the kernel was booted with ``no_hash_pointers``; to avoid
+information disclosure otherwise, '!' is used instead to denote invalidly
+written bytes.
+
+And finally, KFENCE may also report on invalid accesses to any protected page
+where it was not possible to determine an associated object, e.g. if adjacent
+object pages had not yet been allocated::
+
+    ==================================================================
+    BUG: KFENCE: invalid read in test_invalid_access+0x26/0xe0
+
+    Invalid read at 0xffffffffb670b00a:
+     test_invalid_access+0x26/0xe0
+     kunit_try_run_case+0x51/0x85
+     kunit_generic_run_threadfn_adapter+0x16/0x30
+     kthread+0x137/0x160
+     ret_from_fork+0x22/0x30
+
+    CPU: 4 PID: 124 Comm: kunit_try_catch Tainted: G        W         5.8.0-rc6+ #7
+    Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.13.0-1 04/01/2014
+    ==================================================================
+
+DebugFS interface
+~~~~~~~~~~~~~~~~~
+
+Some debugging information is exposed via debugfs:
+
+* The file ``/sys/kernel/debug/kfence/stats`` provides runtime statistics.
+
+* The file ``/sys/kernel/debug/kfence/objects`` provides a list of objects
+  allocated via KFENCE, including those already freed but protected.
+
+Implementation Details
+----------------------
+
+Guarded allocations are set up based on the sample interval. After expiration
+of the sample interval, the next allocation through the main allocator (SLAB or
+SLUB) returns a guarded allocation from the KFENCE object pool (allocation
+sizes up to PAGE_SIZE are supported). At this point, the timer is reset, and
+the next allocation is set up after the expiration of the interval. To "gate" a
+KFENCE allocation through the main allocator's fast-path without overhead,
+KFENCE relies on static branches via the static keys infrastructure. The static
+branch is toggled to redirect the allocation to KFENCE.
+
+KFENCE objects each reside on a dedicated page, at either the left or right
+page boundaries selected at random. The pages to the left and right of the
+object page are "guard pages", whose attributes are changed to a protected
+state, and cause page faults on any attempted access. Such page faults are then
+intercepted by KFENCE, which handles the fault gracefully by reporting an
+out-of-bounds access, and marking the page as accessible so that the faulting
+code can (wrongly) continue executing (set ``panic_on_warn`` to panic instead).
+
+To detect out-of-bounds writes to memory within the object's page itself,
+KFENCE also uses pattern-based redzones. For each object page, a redzone is set
+up for all non-object memory. For typical alignments, the redzone is only
+required on the unguarded side of an object. Because KFENCE must honor the
+cache's requested alignment, special alignments may result in unprotected gaps
+on either side of an object, all of which are redzoned.
+
+The following figure illustrates the page layout::
+
+    ---+-----------+-----------+-----------+-----------+-----------+---
+       | xxxxxxxxx | O :       | xxxxxxxxx |       : O | xxxxxxxxx |
+       | xxxxxxxxx | B :       | xxxxxxxxx |       : B | xxxxxxxxx |
+       | x GUARD x | J : RED-  | x GUARD x | RED-  : J | x GUARD x |
+       | xxxxxxxxx | E :  ZONE | xxxxxxxxx |  ZONE : E | xxxxxxxxx |
+       | xxxxxxxxx | C :       | xxxxxxxxx |       : C | xxxxxxxxx |
+       | xxxxxxxxx | T :       | xxxxxxxxx |       : T | xxxxxxxxx |
+    ---+-----------+-----------+-----------+-----------+-----------+---
+
+Upon deallocation of a KFENCE object, the object's page is again protected and
+the object is marked as freed. Any further access to the object causes a fault
+and KFENCE reports a use-after-free access. Freed objects are inserted at the
+tail of KFENCE's freelist, so that the least recently freed objects are reused
+first, and the chances of detecting use-after-frees of recently freed objects
+is increased.
+
+Interface
+---------
+
+The following describes the functions which are used by allocators as well as
+page handling code to set up and deal with KFENCE allocations.
+
+.. kernel-doc:: include/linux/kfence.h
+   :functions: is_kfence_address
+               kfence_shutdown_cache
+               kfence_alloc kfence_free __kfence_free
+               kfence_ksize kfence_object_start
+               kfence_handle_page_fault
+
+Related Tools
+-------------
+
+In userspace, a similar approach is taken by `GWP-ASan
+<http://llvm.org/docs/GwpAsan.html>`_. GWP-ASan also relies on guard pages and
+a sampling strategy to detect memory unsafety bugs at scale. KFENCE's design is
+directly influenced by GWP-ASan, and can be seen as its kernel sibling. Another
+similar but non-sampling approach, that also inspired the name "KFENCE", can be
+found in the userspace `Electric Fence Malloc Debugger
+<https://linux.die.net/man/3/efence>`_.
+
+In the kernel, several tools exist to debug memory access errors, and in
+particular KASAN can detect all bug classes that KFENCE can detect. While KASAN
+is more precise, relying on compiler instrumentation, this comes at a
+performance cost.
+
+It is worth highlighting that KASAN and KFENCE are complementary, with
+different target environments. For instance, KASAN is the better debugging-aid,
+where test cases or reproducers exists: due to the lower chance to detect the
+error, it would require more effort using KFENCE to debug. Deployments at scale
+that cannot afford to enable KASAN, however, would benefit from using KFENCE to
+discover bugs due to code paths not exercised by test cases or fuzzers.
index f5530c9..780e561 100644 (file)
@@ -78,10 +78,10 @@ $(obj)/processed-schema.json: $(DT_SCHEMA_FILES) check_dtschema_version FORCE
 
 endif
 
-extra-$(CHECK_DT_BINDING) += processed-schema-examples.json
-extra-$(CHECK_DTBS) += processed-schema.json
-extra-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
-extra-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
+always-$(CHECK_DT_BINDING) += processed-schema-examples.json
+always-$(CHECK_DTBS)       += processed-schema.json
+always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dts, $(DT_SCHEMA_FILES))
+always-$(CHECK_DT_BINDING) += $(patsubst $(src)/%.yaml,%.example.dt.yaml, $(DT_SCHEMA_FILES))
 
 # Hack: avoid 'Argument list too long' error for 'make clean'. Remove most of
 # build artifacts here before they are processed by scripts/Makefile.clean
index d711676..7f9c1ca 100644 (file)
@@ -34,9 +34,12 @@ its hardware characteristcs.
                                        Program Flow Trace Macrocell:
                        "arm,coresight-etm3x", "arm,primecell";
 
-               - Embedded Trace Macrocell (version 4.x):
+               - Embedded Trace Macrocell (version 4.x), with memory mapped access.
                        "arm,coresight-etm4x", "arm,primecell";
 
+               - Embedded Trace Macrocell (version 4.x), with system register access only.
+                       "arm,coresight-etm4x-sysreg";
+
                - Coresight programmable Replicator :
                        "arm,coresight-dynamic-replicator", "arm,primecell";
 
index 8a2dd9f..c3036f9 100644 (file)
@@ -132,6 +132,7 @@ properties:
           - enum:
               - friendlyarm,nanopc-t4
               - friendlyarm,nanopi-m4
+              - friendlyarm,nanopi-m4b
               - friendlyarm,nanopi-neo4
           - const: rockchip,rk3399
 
index 1c17d60..22792a7 100644 (file)
@@ -26,7 +26,6 @@ properties:
 
   dp-pwr-supply:
     description: Power supply for the DP_PWR pin
-    maxItems: 1
 
   port:
     $ref: /schemas/graph.yaml#/properties/port
index f06f24d..8238a86 100644 (file)
@@ -22,23 +22,7 @@ Required properties:
 MIPI TX Configuration Module
 ============================
 
-The MIPI TX configuration module controls the MIPI D-PHY.
-
-Required properties:
-- compatible: "mediatek,<chip>-mipi-tx"
-- the supported chips are mt2701, 7623, mt8173 and mt8183.
-- reg: Physical base address and length of the controller's registers
-- clocks: PLL reference clock
-- clock-output-names: name of the output clock line to the DSI encoder
-- #clock-cells: must be <0>;
-- #phy-cells: must be <0>.
-
-Optional properties:
-- drive-strength-microamp: adjust driving current, should be 3000 ~ 6000. And
-                                                  the step is 200.
-- nvmem-cells: A phandle to the calibration data provided by a nvmem device. If
-               unspecified default values shall be used.
-- nvmem-cell-names: Should be "calibration-data"
+See phy/mediatek,dsi-phy.yaml
 
 Example:
 
index 6b1c586..b284ca5 100644 (file)
@@ -53,23 +53,7 @@ Required properties:
 
 HDMI PHY
 ========
-
-The HDMI PHY serializes the HDMI encoder's three channel 10-bit parallel
-output and drives the HDMI pads.
-
-Required properties:
-- compatible: "mediatek,<chip>-hdmi-phy"
-- the supported chips are mt2701, mt7623 and mt8173
-- reg: Physical base address and length of the module's registers
-- clocks: PLL reference clock
-- clock-names: must contain "pll_ref"
-- clock-output-names: must be "hdmitx_dig_cts" on mt8173
-- #phy-cells: must be <0>
-- #clock-cells: must be <0>
-
-Optional properties:
-- mediatek,ibias: TX DRV bias current for <1.65Gbps, defaults to 0xa
-- mediatek,ibias_up: TX DRV bias current for >1.65Gbps, defaults to 0x1c
+See phy/mediatek,hdmi-phy.yaml
 
 Example:
 
index 6a20437..ac4d594 100644 (file)
@@ -17,6 +17,8 @@ properties:
     enum:
       - ingenic,jz4740-dma
       - ingenic,jz4725b-dma
+      - ingenic,jz4760-dma
+      - ingenic,jz4760b-dma
       - ingenic,jz4770-dma
       - ingenic,jz4780-dma
       - ingenic,x1000-dma
diff --git a/Documentation/devicetree/bindings/dma/intel,ldma.yaml b/Documentation/devicetree/bindings/dma/intel,ldma.yaml
new file mode 100644 (file)
index 0000000..a5c4be7
--- /dev/null
@@ -0,0 +1,116 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/intel,ldma.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Lightning Mountain centralized DMA controllers.
+
+maintainers:
+  - chuanhua.lei@intel.com
+  - mallikarjunax.reddy@intel.com
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - intel,lgm-cdma
+      - intel,lgm-dma2tx
+      - intel,lgm-dma1rx
+      - intel,lgm-dma1tx
+      - intel,lgm-dma0tx
+      - intel,lgm-dma3
+      - intel,lgm-toe-dma30
+      - intel,lgm-toe-dma31
+
+  reg:
+    maxItems: 1
+
+  "#dma-cells":
+    const: 3
+    description:
+      The first cell is the peripheral's DMA request line.
+      The second cell is the peripheral's (port) number corresponding to the channel.
+      The third cell is the burst length of the channel.
+
+  dma-channels:
+    minimum: 1
+    maximum: 16
+
+  dma-channel-mask:
+    maxItems: 1
+
+  clocks:
+    maxItems: 1
+
+  resets:
+    maxItems: 1
+
+  reset-names:
+    items:
+      - const: ctrl
+
+  interrupts:
+    maxItems: 1
+
+  intel,dma-poll-cnt:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description:
+      DMA descriptor polling counter is used to control the poling mechanism
+      for the descriptor fetching for all channels.
+
+  intel,dma-byte-en:
+    type: boolean
+    description:
+      DMA byte enable is only valid for DMA write(RX).
+      Byte enable(1) means DMA write will be based on the number of dwords
+      instead of the whole burst.
+
+  intel,dma-drb:
+    type: boolean
+    description:
+      DMA descriptor read back to make sure data and desc synchronization.
+
+  intel,dma-dburst-wr:
+    type: boolean
+    description:
+      Enable RX dynamic burst write. When it is enabled, the DMA does RX dynamic burst;
+      if it is disabled, the DMA RX will still support programmable fixed burst size of 2,4,8,16.
+      It only applies to RX DMA and memcopy DMA.
+
+required:
+  - compatible
+  - reg
+
+additionalProperties: false
+
+examples:
+  - |
+    dma0: dma-controller@e0e00000 {
+      compatible = "intel,lgm-cdma";
+      reg = <0xe0e00000 0x1000>;
+      #dma-cells = <3>;
+      dma-channels = <16>;
+      dma-channel-mask = <0xFFFF>;
+      interrupt-parent = <&ioapic1>;
+      interrupts = <82 1>;
+      resets = <&rcu0 0x30 0>;
+      reset-names = "ctrl";
+      clocks = <&cgu0 80>;
+      intel,dma-poll-cnt = <4>;
+      intel,dma-byte-en;
+      intel,dma-drb;
+    };
+  - |
+    dma3: dma-controller@ec800000 {
+      compatible = "intel,lgm-dma3";
+      reg = <0xec800000 0x1000>;
+      clocks = <&cgu0 71>;
+      resets = <&rcu0 0x10 9>;
+      #dma-cells = <3>;
+      intel,dma-poll-cnt = <16>;
+      intel,dma-byte-en;
+      intel,dma-dburst-wr;
+    };
index 256d62a..93b4847 100644 (file)
@@ -8,8 +8,8 @@ title: Actions Semi Owl SoCs DMA controller
 
 description: |
   The OWL DMA is a general-purpose direct memory access controller capable of
-  supporting 10 and 12 independent DMA channels for S700 and S900 SoCs
-  respectively.
+  supporting 10 independent DMA channels for the Actions Semi S700 SoC and 12
+  independent DMA channels for the S500 and S900 SoC variants.
 
 maintainers:
   - Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
@@ -20,8 +20,9 @@ allOf:
 properties:
   compatible:
     enum:
-      - actions,s900-dma
+      - actions,s500-dma
       - actions,s700-dma
+      - actions,s900-dma
 
   reg:
     maxItems: 1
index c07eb6f..7f2a54b 100644 (file)
@@ -14,34 +14,37 @@ allOf:
 
 properties:
   compatible:
-    items:
-      - enum:
-          - renesas,dmac-r8a7742  # RZ/G1H
-          - renesas,dmac-r8a7743  # RZ/G1M
-          - renesas,dmac-r8a7744  # RZ/G1N
-          - renesas,dmac-r8a7745  # RZ/G1E
-          - renesas,dmac-r8a77470 # RZ/G1C
-          - renesas,dmac-r8a774a1 # RZ/G2M
-          - renesas,dmac-r8a774b1 # RZ/G2N
-          - renesas,dmac-r8a774c0 # RZ/G2E
-          - renesas,dmac-r8a774e1 # RZ/G2H
-          - renesas,dmac-r8a7790  # R-Car H2
-          - renesas,dmac-r8a7791  # R-Car M2-W
-          - renesas,dmac-r8a7792  # R-Car V2H
-          - renesas,dmac-r8a7793  # R-Car M2-N
-          - renesas,dmac-r8a7794  # R-Car E2
-          - renesas,dmac-r8a7795  # R-Car H3
-          - renesas,dmac-r8a7796  # R-Car M3-W
-          - renesas,dmac-r8a77961 # R-Car M3-W+
-          - renesas,dmac-r8a77965 # R-Car M3-N
-          - renesas,dmac-r8a77970 # R-Car V3M
-          - renesas,dmac-r8a77980 # R-Car V3H
-          - renesas,dmac-r8a77990 # R-Car E3
-          - renesas,dmac-r8a77995 # R-Car D3
-      - const: renesas,rcar-dmac
-
-  reg:
-    maxItems: 1
+    oneOf:
+      - items:
+          - enum:
+              - renesas,dmac-r8a7742  # RZ/G1H
+              - renesas,dmac-r8a7743  # RZ/G1M
+              - renesas,dmac-r8a7744  # RZ/G1N
+              - renesas,dmac-r8a7745  # RZ/G1E
+              - renesas,dmac-r8a77470 # RZ/G1C
+              - renesas,dmac-r8a774a1 # RZ/G2M
+              - renesas,dmac-r8a774b1 # RZ/G2N
+              - renesas,dmac-r8a774c0 # RZ/G2E
+              - renesas,dmac-r8a774e1 # RZ/G2H
+              - renesas,dmac-r8a7790  # R-Car H2
+              - renesas,dmac-r8a7791  # R-Car M2-W
+              - renesas,dmac-r8a7792  # R-Car V2H
+              - renesas,dmac-r8a7793  # R-Car M2-N
+              - renesas,dmac-r8a7794  # R-Car E2
+              - renesas,dmac-r8a7795  # R-Car H3
+              - renesas,dmac-r8a7796  # R-Car M3-W
+              - renesas,dmac-r8a77961 # R-Car M3-W+
+              - renesas,dmac-r8a77965 # R-Car M3-N
+              - renesas,dmac-r8a77970 # R-Car V3M
+              - renesas,dmac-r8a77980 # R-Car V3H
+              - renesas,dmac-r8a77990 # R-Car E3
+              - renesas,dmac-r8a77995 # R-Car D3
+          - const: renesas,rcar-dmac
+
+      - items:
+          - const: renesas,dmac-r8a779a0 # R-Car V3U
+
+  reg: true
 
   interrupts:
     minItems: 9
@@ -110,6 +113,23 @@ required:
   - power-domains
   - resets
 
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - renesas,dmac-r8a779a0
+then:
+  properties:
+    reg:
+      items:
+        - description: Base register block
+        - description: Channel register block
+else:
+  properties:
+    reg:
+      maxItems: 1
+
 additionalProperties: false
 
 examples:
diff --git a/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt b/Documentation/devicetree/bindings/dma/sirfsoc-dma.txt
deleted file mode 100644 (file)
index ccd52d6..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-* CSR SiRFSoC DMA controller
-
-See dma.txt first
-
-Required properties:
-- compatible: Should be "sirf,prima2-dmac", "sirf,atlas7-dmac" or
-  "sirf,atlas7-dmac-v2"
-- reg: Should contain DMA registers location and length.
-- interrupts: Should contain one interrupt shared by all channel
-- #dma-cells: must be <1>. used to represent the number of integer
-    cells in the dmas property of client device.
-- clocks: clock required
-
-Example:
-
-Controller:
-dmac0: dma-controller@b00b0000 {
-       compatible = "sirf,prima2-dmac";
-       reg = <0xb00b0000 0x10000>;
-       interrupts = <12>;
-       clocks = <&clks 24>;
-       #dma-cells = <1>;
-};
-
-
-Client:
-Fill the specific dma request line in dmas. In the below example, spi0 read
-channel request line is 9 of the 2nd dma controller, while write channel uses
-4 of the 2nd dma controller; spi1 read channel request line is 12 of the 1st
-dma controller, while write channel uses 13 of the 1st dma controller:
-
-spi0: spi@b00d0000 {
-       compatible = "sirf,prima2-spi";
-       dmas = <&dmac1 9>,
-               <&dmac1 4>;
-       dma-names = "rx", "tx";
-};
-
-spi1: spi@b0170000 {
-       compatible = "sirf,prima2-spi";
-       dmas = <&dmac0 12>,
-               <&dmac0 13>;
-       dma-names = "rx", "tx";
-};
diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.txt
deleted file mode 100644 (file)
index dbe1604..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-Synopsys DesignWare AXI DMA Controller
-
-Required properties:
-- compatible: "snps,axi-dma-1.01a"
-- reg: Address range of the DMAC registers. This should include
-  all of the per-channel registers.
-- interrupt: Should contain the DMAC interrupt number.
-- dma-channels: Number of channels supported by hardware.
-- snps,dma-masters: Number of AXI masters supported by the hardware.
-- snps,data-width: Maximum AXI data width supported by hardware.
-  (0 - 8bits, 1 - 16bits, 2 - 32bits, ..., 6 - 512bits)
-- snps,priority: Priority of channel. Array size is equal to the number of
-  dma-channels. Priority value must be programmed within [0:dma-channels-1]
-  range. (0 - minimum priority)
-- snps,block-size: Maximum block size supported by the controller channel.
-  Array size is equal to the number of dma-channels.
-
-Optional properties:
-- snps,axi-max-burst-len: Restrict master AXI burst length by value specified
-  in this property. If this property is missing the maximum AXI burst length
-  supported by DMAC is used. [1:256]
-
-Example:
-
-dmac: dma-controller@80000 {
-       compatible = "snps,axi-dma-1.01a";
-       reg = <0x80000 0x400>;
-       clocks = <&core_clk>, <&cfgr_clk>;
-       clock-names = "core-clk", "cfgr-clk";
-       interrupt-parent = <&intc>;
-       interrupts = <27>;
-
-       dma-channels = <4>;
-       snps,dma-masters = <2>;
-       snps,data-width = <3>;
-       snps,block-size = <4096 4096 4096 4096>;
-       snps,priority = <0 1 2 3>;
-       snps,axi-max-burst-len = <16>;
-};
diff --git a/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml b/Documentation/devicetree/bindings/dma/snps,dw-axi-dmac.yaml
new file mode 100644 (file)
index 0000000..79e2414
--- /dev/null
@@ -0,0 +1,126 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/dma/snps,dw-axi-dmac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Synopsys DesignWare AXI DMA Controller
+
+maintainers:
+  - Eugeniy Paltsev <Eugeniy.Paltsev@synopsys.com>
+  - Jee Heng Sia <jee.heng.sia@intel.com>
+
+description:
+  Synopsys DesignWare AXI DMA Controller DT Binding
+
+allOf:
+  - $ref: "dma-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - snps,axi-dma-1.01a
+      - intel,kmb-axi-dma
+
+  reg:
+    minItems: 1
+    items:
+      - description: Address range of the DMAC registers
+      - description: Address range of the DMAC APB registers
+
+  reg-names:
+    items:
+      - const: axidma_ctrl_regs
+      - const: axidma_apb_regs
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Bus Clock
+      - description: Module Clock
+
+  clock-names:
+    items:
+      - const: core-clk
+      - const: cfgr-clk
+
+  '#dma-cells':
+    const: 1
+
+  dma-channels:
+    minimum: 1
+    maximum: 8
+
+  snps,dma-masters:
+    description: |
+      Number of AXI masters supported by the hardware.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [1, 2]
+
+  snps,data-width:
+    description: |
+      AXI data width supported by hardware.
+      (0 - 8bits, 1 - 16bits, 2 - 32bits, ..., 6 - 512bits)
+    $ref: /schemas/types.yaml#/definitions/uint32
+    enum: [0, 1, 2, 3, 4, 5, 6]
+
+  snps,priority:
+    description: |
+      Channel priority specifier associated with the DMA channels.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 8
+
+  snps,block-size:
+    description: |
+      Channel block size specifier associated with the DMA channels.
+    $ref: /schemas/types.yaml#/definitions/uint32-array
+    minItems: 1
+    maxItems: 8
+
+  snps,axi-max-burst-len:
+    description: |
+      Restrict master AXI burst length by value specified in this property.
+      If this property is missing the maximum AXI burst length supported by
+      DMAC is used.
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 1
+    maximum: 256
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - interrupts
+  - '#dma-cells'
+  - dma-channels
+  - snps,dma-masters
+  - snps,data-width
+  - snps,priority
+  - snps,block-size
+
+additionalProperties: false
+
+examples:
+  - |
+     #include <dt-bindings/interrupt-controller/arm-gic.h>
+     #include <dt-bindings/interrupt-controller/irq.h>
+     /* example with snps,dw-axi-dmac */
+     dmac: dma-controller@80000 {
+         compatible = "snps,axi-dma-1.01a";
+         reg = <0x80000 0x400>;
+         clocks = <&core_clk>, <&cfgr_clk>;
+         clock-names = "core-clk", "cfgr-clk";
+         interrupt-parent = <&intc>;
+         interrupts = <27>;
+         #dma-cells = <1>;
+         dma-channels = <4>;
+         snps,dma-masters = <2>;
+         snps,data-width = <3>;
+         snps,block-size = <4096 4096 4096 4096>;
+         snps,priority = <0 1 2 3>;
+         snps,axi-max-burst-len = <16>;
+     };
diff --git a/Documentation/devicetree/bindings/dma/ste-coh901318.txt b/Documentation/devicetree/bindings/dma/ste-coh901318.txt
deleted file mode 100644 (file)
index 091ad05..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-ST-Ericsson COH 901 318 DMA Controller
-
-This is a DMA controller which has begun as a fork of the
-ARM PL08x PrimeCell VHDL code.
-
-Required properties:
-- compatible: should be "stericsson,coh901318"
-- reg: register locations and length
-- interrupts: the single DMA IRQ
-- #dma-cells: must be set to <1>, as the channels on the
-  COH 901 318 are simple and identified by a single number
-- dma-channels: the number of DMA channels handled
-
-Example:
-
-dmac: dma-controller@c00020000 {
-       compatible = "stericsson,coh901318";
-       reg = <0xc0020000 0x1000>;
-       interrupt-parent = <&vica>;
-       interrupts = <2>;
-       #dma-cells = <1>;
-       dma-channels = <40>;
-};
-
-Consumers example:
-
-uart0: serial@c0013000 {
-       compatible = "...";
-       (...)
-       dmas = <&dmac 17 &dmac 18>;
-       dma-names = "tx", "rx";
-};
diff --git a/Documentation/devicetree/bindings/dma/zxdma.txt b/Documentation/devicetree/bindings/dma/zxdma.txt
deleted file mode 100644 (file)
index 0ab80f6..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-* ZTE ZX296702 DMA controller
-
-Required properties:
-- compatible: Should be "zte,zx296702-dma"
-- reg: Should contain DMA registers location and length.
-- interrupts: Should contain one interrupt shared by all channel
-- #dma-cells: see dma.txt, should be 1, para number
-- dma-channels: physical channels supported
-- dma-requests: virtual channels supported, each virtual channel
-               have specific request line
-- clocks: clock required
-
-Example:
-
-Controller:
-       dma: dma-controller@09c00000{
-               compatible = "zte,zx296702-dma";
-               reg = <0x09c00000 0x1000>;
-               clocks = <&topclk ZX296702_DMA_ACLK>;
-               interrupts = <GIC_SPI 66 IRQ_TYPE_LEVEL_HIGH>;
-               #dma-cells = <1>;
-               dma-channels = <24>;
-               dma-requests = <24>;
-       };
-
-Client:
-Use specific request line passing from dmax
-For example, spdif0 tx channel request line is 4
-       spdif0: spdif0@b004000 {
-               #sound-dai-cells = <0>;
-               compatible = "zte,zx296702-spdif";
-               reg = <0x0b004000 0x1000>;
-               clocks = <&lsp0clk ZX296702_SPDIF0_DIV>;
-               clock-names = "tx";
-               interrupts = <GIC_SPI 21 IRQ_TYPE_LEVEL_HIGH>;
-               dmas = <&dma 4>;
-               dma-names = "tx";
-       }
index a0efd8d..c2902aa 100644 (file)
@@ -13,7 +13,10 @@ maintainers:
 properties:
   compatible:
     items:
-      - const: sifive,fu540-c000-gpio
+      - enum:
+          - sifive,fu540-c000-gpio
+          - sifive,fu740-c000-gpio
+          - canaan,k210-gpiohs
       - const: sifive,gpio0
 
   reg:
@@ -21,9 +24,9 @@ properties:
 
   interrupts:
     description:
-      interrupt mapping one per GPIO. Maximum 16 GPIOs.
+      Interrupt mapping, one per GPIO. Maximum 32 GPIOs.
     minItems: 1
-    maxItems: 16
+    maxItems: 32
 
   interrupt-controller: true
 
@@ -36,6 +39,14 @@ properties:
   "#gpio-cells":
     const: 2
 
+  ngpios:
+    description:
+      The number of GPIOs available on the controller implementation.
+      It is 16 for the SiFive SoCs and 32 for the Canaan K210.
+    minimum: 1
+    maximum: 32
+    default: 16
+
   gpio-controller: true
 
 required:
@@ -44,10 +55,20 @@ required:
   - interrupts
   - interrupt-controller
   - "#interrupt-cells"
-  - clocks
   - "#gpio-cells"
   - gpio-controller
 
+if:
+  properties:
+    compatible:
+      contains:
+        enum:
+          - sifive,fu540-c000-gpio
+          - sifive,fu740-c000-gpio
+then:
+  required:
+    - clocks
+
 additionalProperties: false
 
 examples:
index ac35491..ae1b37d 100644 (file)
@@ -13,6 +13,7 @@ properties:
   compatible:
     enum:
       - ti,omap4-hwspinlock  # for OMAP44xx, OMAP54xx, AM33xx, AM43xx, DRA7xx SoCs
+      - ti,am64-hwspinlock   # for K3 AM64x SoCs
       - ti,am654-hwspinlock  # for K3 AM65x, J721E and J7200 SoCs
 
   reg:
diff --git a/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml b/Documentation/devicetree/bindings/input/goodix,gt7375p.yaml
new file mode 100644 (file)
index 0000000..fe1c501
--- /dev/null
@@ -0,0 +1,65 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/input/goodix,gt7375p.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Goodix GT7375P touchscreen
+
+maintainers:
+  - Douglas Anderson <dianders@chromium.org>
+
+description:
+  Supports the Goodix GT7375P touchscreen.
+  This touchscreen uses the i2c-hid protocol but has some non-standard
+  power sequencing required.
+
+properties:
+  compatible:
+    items:
+      - const: goodix,gt7375p
+
+  reg:
+    enum:
+      - 0x5d
+      - 0x14
+
+  interrupts:
+    maxItems: 1
+
+  reset-gpios:
+    true
+
+  vdd-supply:
+    description: The 3.3V supply to the touchscreen.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - reset-gpios
+  - vdd-supply
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/qcom,rpmh.h>
+    #include <dt-bindings/gpio/gpio.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+
+    i2c {
+      #address-cells = <1>;
+      #size-cells = <0>;
+
+      ap_ts: touchscreen@5d {
+        compatible = "goodix,gt7375p";
+        reg = <0x5d>;
+
+        interrupt-parent = <&tlmm>;
+        interrupts = <9 IRQ_TYPE_LEVEL_LOW>;
+
+        reset-gpios = <&tlmm 8 GPIO_ACTIVE_LOW>;
+        vdd-supply = <&pp3300_ts>;
+      };
+    };
index 8e50c14..5377b23 100644 (file)
@@ -31,6 +31,17 @@ properties:
       if the EC does not have its own logic or hardware for this.
     type: boolean
 
+  function-row-physmap:
+    minItems: 1
+    maxItems: 15
+    description: |
+      An ordered u32 array describing the rows/columns (in the scan matrix)
+      of top row keys from physical left (KEY_F1) to right. Each entry
+      encodes the row/column as:
+      (((row) & 0xFF) << 24) | (((column) & 0xFF) << 16)
+      where the lower 16 bits are reserved. This property is specified only
+      when the keyboard has a custom design for the top row keys.
+
 required:
   - compatible
 
@@ -38,11 +49,24 @@ unevaluatedProperties: false
 
 examples:
   - |
+    #include <dt-bindings/input/input.h>
     cros-ec-keyb {
         compatible = "google,cros-ec-keyb";
         keypad,num-rows = <8>;
         keypad,num-columns = <13>;
         google,needs-ghost-filter;
+        function-row-physmap = <
+            MATRIX_KEY(0x00, 0x02, 0)   /* T1 */
+            MATRIX_KEY(0x03, 0x02, 0)   /* T2 */
+            MATRIX_KEY(0x02, 0x02, 0)   /* T3 */
+            MATRIX_KEY(0x01, 0x02, 0)   /* T4 */
+            MATRIX_KEY(0x03, 0x04, 0)   /* T5 */
+            MATRIX_KEY(0x02, 0x04, 0)   /* T6 */
+            MATRIX_KEY(0x01, 0x04, 0)   /* T7 */
+            MATRIX_KEY(0x02, 0x09, 0)   /* T8 */
+            MATRIX_KEY(0x01, 0x09, 0)   /* T9 */
+            MATRIX_KEY(0x00, 0x04, 0)   /* T10 */
+        >;
         /*
          * Keymap entries take the form of 0xRRCCKKKK where
          * RR=Row CC=Column KKKK=Key Code
diff --git a/Documentation/devicetree/bindings/interconnect/qcom,qcs404.yaml b/Documentation/devicetree/bindings/interconnect/qcom,qcs404.yaml
deleted file mode 100644 (file)
index 3fbb878..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/interconnect/qcom,qcs404.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: Qualcomm QCS404 Network-On-Chip interconnect
-
-maintainers:
-  - Georgi Djakov <georgi.djakov@linaro.org>
-
-description: |
-  The Qualcomm QCS404 interconnect providers support adjusting the
-  bandwidth requirements between the various NoC fabrics.
-
-properties:
-  reg:
-    maxItems: 1
-
-  compatible:
-    enum:
-      - qcom,qcs404-bimc
-      - qcom,qcs404-pcnoc
-      - qcom,qcs404-snoc
-
-  '#interconnect-cells':
-    const: 1
-
-  clock-names:
-    items:
-      - const: bus
-      - const: bus_a
-
-  clocks:
-    items:
-      - description: Bus Clock
-      - description: Bus A Clock
-
-required:
-  - compatible
-  - reg
-  - '#interconnect-cells'
-  - clock-names
-  - clocks
-
-additionalProperties: false
-
-examples:
-  - |
-      #include <dt-bindings/clock/qcom,rpmcc.h>
-
-      bimc: interconnect@400000 {
-              reg = <0x00400000 0x80000>;
-              compatible = "qcom,qcs404-bimc";
-              #interconnect-cells = <1>;
-              clock-names = "bus", "bus_a";
-              clocks = <&rpmcc RPM_SMD_BIMC_CLK>,
-                       <&rpmcc RPM_SMD_BIMC_A_CLK>;
-      };
-
-      pnoc: interconnect@500000 {
-             reg = <0x00500000 0x15080>;
-             compatible = "qcom,qcs404-pcnoc";
-             #interconnect-cells = <1>;
-             clock-names = "bus", "bus_a";
-             clocks = <&rpmcc RPM_SMD_PNOC_CLK>,
-                      <&rpmcc RPM_SMD_PNOC_A_CLK>;
-      };
-
-      snoc: interconnect@580000 {
-            reg = <0x00580000 0x23080>;
-            compatible = "qcom,qcs404-snoc";
-            #interconnect-cells = <1>;
-            clock-names = "bus", "bus_a";
-            clocks = <&rpmcc RPM_SMD_SNOC_CLK>,
-                     <&rpmcc RPM_SMD_SNOC_A_CLK>;
-      };
@@ -1,27 +1,35 @@
 # SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/interconnect/qcom,msm8916.yaml#
+$id: http://devicetree.org/schemas/interconnect/qcom,rpm.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
-title: Qualcomm MSM8916 Network-On-Chip interconnect
+title: Qualcomm RPM Network-On-Chip Interconnect
 
 maintainers:
   - Georgi Djakov <georgi.djakov@linaro.org>
 
 description: |
-  The Qualcomm MSM8916 interconnect providers support adjusting the
-  bandwidth requirements between the various NoC fabrics.
+  RPM interconnect providers support system bandwidth requirements through
+  RPM processor. The provider is able to communicate with the RPM through
+  the RPM shared memory device.
 
 properties:
+  reg:
+    maxItems: 1
+
   compatible:
     enum:
       - qcom,msm8916-bimc
       - qcom,msm8916-pcnoc
       - qcom,msm8916-snoc
-
-  reg:
-    maxItems: 1
+      - qcom,msm8939-bimc
+      - qcom,msm8939-pcnoc
+      - qcom,msm8939-snoc
+      - qcom,msm8939-snoc-mm
+      - qcom,qcs404-bimc
+      - qcom,qcs404-pcnoc
+      - qcom,qcs404-snoc
 
   '#interconnect-cells':
     const: 1
index ebcf465..799e73c 100644 (file)
@@ -45,6 +45,10 @@ properties:
       - qcom,sdm845-mem-noc
       - qcom,sdm845-mmss-noc
       - qcom,sdm845-system-noc
+      - qcom,sdx55-ipa-virt
+      - qcom,sdx55-mc-virt
+      - qcom,sdx55-mem-noc
+      - qcom,sdx55-system-noc
       - qcom,sm8150-aggre1-noc
       - qcom,sm8150-aggre2-noc
       - qcom,sm8150-camnoc-noc
index 0a046be..0358a77 100644 (file)
@@ -23,6 +23,7 @@ properties:
           - enum:
               - ingenic,jz4775-intc
               - ingenic,jz4770-intc
+              - ingenic,jz4760b-intc
           - const: ingenic,jz4760-intc
       - items:
           - const: ingenic,x1000-intc
index b9a61c9..08d5a57 100644 (file)
@@ -8,10 +8,11 @@ $schema: http://devicetree.org/meta-schemas/core.yaml#
 title: SiFive Platform-Level Interrupt Controller (PLIC)
 
 description:
-  SiFive SOCs include an implementation of the Platform-Level Interrupt Controller
-  (PLIC) high-level specification in the RISC-V Privileged Architecture
-  specification. The PLIC connects all external interrupts in the system to all
-  hart contexts in the system, via the external interrupt source in each hart.
+  SiFive SoCs and other RISC-V SoCs include an implementation of the
+  Platform-Level Interrupt Controller (PLIC) high-level specification in
+  the RISC-V Privileged Architecture specification. The PLIC connects all
+  external interrupts in the system to all hart contexts in the system, via
+  the external interrupt source in each hart.
 
   A hart context is a privilege mode in a hardware execution thread. For example,
   in an 4 core system with 2-way SMT, you have 8 harts and probably at least two
@@ -42,7 +43,9 @@ maintainers:
 properties:
   compatible:
     items:
-      - const: sifive,fu540-c000-plic
+      - enum:
+          - sifive,fu540-c000-plic
+          - canaan,k210-plic
       - const: sifive,plic-1.0.0
 
   reg:
diff --git a/Documentation/devicetree/bindings/leds/leds-lgm.yaml b/Documentation/devicetree/bindings/leds/leds-lgm.yaml
new file mode 100644 (file)
index 0000000..32bbf14
--- /dev/null
@@ -0,0 +1,113 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/leds/leds-lgm.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Intel Lightning Mountain (LGM) SoC LED Serial Shift Output (SSO) Controller driver
+
+maintainers:
+  - Zhu, Yi Xin <Yixin.zhu@intel.com>
+  - Amireddy Mallikarjuna reddy <mallikarjunax.reddy@intel.com>
+
+properties:
+  compatible:
+    const: intel,lgm-ssoled
+
+  gpio-controller: true
+
+  '#gpio-cells':
+    const: 2
+
+  ngpios:
+    minimum: 0
+    maximum: 32
+    description:
+      Number of GPIOs this controller provides.
+
+  intel,sso-update-rate-hz:
+    description:
+      Blink frequency for SOUTs in Hz.
+
+  led-controller:
+    type: object
+    description:
+      This sub-node must contain a sub-node for each leds.
+
+    additionalProperties: false
+
+    patternProperties:
+      "^led@[0-23]$":
+        type: object
+
+        properties:
+          reg:
+            description: Index of the LED.
+            minimum: 0
+            maximum: 2
+
+          intel,sso-hw-trigger:
+            type: boolean
+            description: This property indicates Hardware driven/control LED.
+
+          intel,sso-hw-blink:
+            type: boolean
+            description: This property indicates Enable LED blink by Hardware.
+
+          intel,sso-blink-rate-hz:
+            description: LED HW blink frequency.
+
+          retain-state-suspended:
+            type: boolean
+            description: The suspend state of LED can be retained.
+
+          retain-state-shutdown:
+            type: boolean
+            description: Retain the state of the LED on shutdown.
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - "#gpio-cells"
+  - gpio-controller
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/intel,lgm-clk.h>
+    #include <dt-bindings/leds/common.h>
+
+    ssogpio: ssogpio@e0d40000 {
+      compatible = "intel,sso-led";
+      reg = <0xE0D40000 0x2E4>;
+      gpio-controller;
+      #gpio-cells = <2>;
+      ngpios = <32>;
+      pinctrl-names = "default";
+      pinctrl-0 = <&pinctrl_ledc>;
+      clocks = <&cgu0 LGM_GCLK_LEDC0>, <&afeclk>;
+      clock-names = "sso", "fpid";
+      intel,sso-update-rate-hz = <250000>;
+
+      led-controller {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        led@0 {
+          reg = <0>;
+          function = "gphy";
+          color = <LED_COLOR_ID_GREEN>;
+          led-gpio = <&ssogpio 0 0>;
+        };
+
+        led@23 {
+          reg = <23>;
+          function = LED_FUNCTION_POWER;
+          color = <LED_COLOR_ID_GREEN>;
+          led-gpio = <&ssogpio 23 0>;
+        };
+      };
+    };
index 5fe80c1..12371f5 100644 (file)
@@ -28,6 +28,9 @@ SoCs has each of these instances form a cluster and combine multiple clusters
 into a single IP block present within the Main NavSS. The interrupt lines from
 all these clusters are multiplexed and routed to different processor subsystems
 over a limited number of common interrupt output lines of an Interrupt Router.
+The AM64x SoCS also uses a single IP block comprising of multiple clusters,
+but the number of clusters are smaller, and the interrupt output lines are
+connected directly to various processors.
 
 Mailbox Device Node:
 ====================
@@ -42,6 +45,7 @@ Required properties:
                            "ti,omap4-mailbox" for OMAP44xx, OMAP54xx, AM33xx,
                                                   AM43xx and DRA7xx SoCs
                            "ti,am654-mailbox" for K3 AM65x and J721E SoCs
+                           "ti,am64-mailbox" for K3 AM64x SoCs
 - reg:                 Contains the mailbox register address range (base
                        address and length)
 - interrupts:          Contains the interrupt information for the mailbox
index ffd09b6..5dc1173 100644 (file)
@@ -24,6 +24,7 @@ properties:
       - qcom,msm8998-apcs-hmss-global
       - qcom,qcs404-apcs-apps-global
       - qcom,sc7180-apss-shared
+      - qcom,sc8180x-apss-shared
       - qcom,sdm660-apcs-hmss-global
       - qcom,sdm845-apss-shared
       - qcom,sm8150-apss-shared
@@ -33,9 +34,11 @@ properties:
 
   clocks:
     description: phandles to the parent clocks of the clock driver
+    minItems: 2
     items:
       - description: primary pll parent of the clock driver
       - description: auxiliary parent
+      - description: reference clock
 
   '#mbox-cells':
     const: 1
@@ -44,9 +47,11 @@ properties:
     const: 0
 
   clock-names:
+    minItems: 2
     items:
       - const: pll
       - const: aux
+      - const: ref
 
 required:
   - compatible
@@ -55,6 +60,35 @@ required:
 
 additionalProperties: false
 
+allOf:
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,ipq6018-apcs-apps-global
+            - qcom,ipq8074-apcs-apps-global
+            - qcom,msm8916-apcs-kpss-global
+            - qcom,msm8994-apcs-kpss-global
+            - qcom,msm8996-apcs-hmss-global
+            - qcom,msm8998-apcs-hmss-global
+            - qcom,qcs404-apcs-apps-global
+            - qcom,sc7180-apss-shared
+            - qcom,sdm660-apcs-hmss-global
+            - qcom,sdm845-apss-shared
+            - qcom,sm8150-apss-shared
+    then:
+      properties:
+        clocks:
+          maxItems: 2
+  - if:
+      properties:
+        compatible:
+          enum:
+            - qcom,sdx55-apcs-gcc
+    then:
+      properties:
+        clocks:
+          maxItems: 3
 examples:
 
   # Example apcs with msm8996
diff --git a/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml b/Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
new file mode 100644 (file)
index 0000000..c24ad45
--- /dev/null
@@ -0,0 +1,109 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/mfd/canaan,k210-sysctl.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 System Controller Device Tree Bindings
+
+maintainers:
+  - Damien Le Moal <damien.lemoal@wdc.com>
+
+description:
+  Canaan Inc. Kendryte K210 SoC system controller which provides a
+  register map for controlling the clocks, reset signals and pin power
+  domains of the SoC.
+
+properties:
+  compatible:
+    items:
+      - const: canaan,k210-sysctl
+      - const: syscon
+      - const: simple-mfd
+
+  clocks:
+    maxItems: 1
+    description:
+      System controller Advanced Power Bus (APB) interface clock source.
+
+  clock-names:
+    items:
+      - const: pclk
+
+  reg:
+    maxItems: 1
+
+  clock-controller:
+    # Child node
+    type: object
+    $ref: "../clock/canaan,k210-clk.yaml"
+    description:
+      Clock controller for the SoC clocks. This child node definition
+      should follow the bindings specified in
+      Documentation/devicetree/bindings/clock/canaan,k210-clk.yaml.
+
+  reset-controller:
+    # Child node
+    type: object
+    $ref: "../reset/canaan,k210-rst.yaml"
+    description:
+      Reset controller for the SoC. This child node definition
+      should follow the bindings specified in
+      Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml.
+
+  syscon-reboot:
+    # Child node
+    type: object
+    $ref: "../power/reset/syscon-reboot.yaml"
+    description:
+      Reboot method for the SoC. This child node definition
+      should follow the bindings specified in
+      Documentation/devicetree/bindings/power/reset/syscon-reboot.yaml.
+
+required:
+  - compatible
+  - clocks
+  - reg
+  - clock-controller
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/k210-clk.h>
+    #include <dt-bindings/reset/k210-rst.h>
+
+    clocks {
+      in0: oscllator {
+        compatible = "fixed-clock";
+        #clock-cells = <0>;
+        clock-frequency = <26000000>;
+      };
+    };
+
+    sysctl: syscon@50440000 {
+      compatible = "canaan,k210-sysctl",
+                   "syscon", "simple-mfd";
+      reg = <0x50440000 0x100>;
+      clocks = <&sysclk K210_CLK_APB1>;
+      clock-names = "pclk";
+
+      sysclk: clock-controller {
+        #clock-cells = <1>;
+        compatible = "canaan,k210-clk";
+        clocks = <&in0>;
+      };
+
+      sysrst: reset-controller {
+        compatible = "canaan,k210-rst";
+        #reset-cells = <1>;
+      };
+
+      reboot: syscon-reboot {
+        compatible = "syscon-reboot";
+        regmap = <&sysctl>;
+        offset = <48>;
+        mask = <1>;
+        value = <1>;
+      };
+    };
index a8ebb46..7b636b7 100644 (file)
@@ -4,6 +4,7 @@ Required properties:
 - compatible : shall be one of:
     "atmel,at93c46d"
     "eeprom-93xx46"
+    "microchip,93lc46b"
 - data-size : number of data bits per word (either 8 or 16)
 
 Optional properties:
diff --git a/Documentation/devicetree/bindings/nvmem/rmem.yaml b/Documentation/devicetree/bindings/nvmem/rmem.yaml
new file mode 100644 (file)
index 0000000..1d85a0a
--- /dev/null
@@ -0,0 +1,49 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/nvmem/rmem.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Reserved Memory Based nvmem Device
+
+maintainers:
+  - Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+
+allOf:
+  - $ref: "nvmem.yaml#"
+
+properties:
+  compatible:
+    items:
+      - enum:
+          - raspberrypi,bootloader-config
+      - const: nvmem-rmem
+
+  no-map:
+    $ref: /schemas/types.yaml#/definitions/flag
+    description:
+      Avoid creating a virtual mapping of the region as part of the OS'
+      standard mapping of system memory.
+
+required:
+  - compatible
+  - no-map
+
+unevaluatedProperties: false
+
+examples:
+  - |
+        reserved-memory {
+                #address-cells = <1>;
+                #size-cells = <1>;
+
+                blconfig: nvram@10000000 {
+                        compatible = "raspberrypi,bootloader-config", "nvmem-rmem";
+                        #address-cells = <1>;
+                        #size-cells = <1>;
+                        reg = <0x10000000 0x1000>;
+                        no-map;
+                };
+        };
+
+...
index 807694b..f90557f 100644 (file)
@@ -14,6 +14,7 @@ properties:
     items:
       - enum:
           - brcm,bcm2711-pcie # The Raspberry Pi 4
+          - brcm,bcm4908-pcie
           - brcm,bcm7211-pcie # Broadcom STB version of RPi4
           - brcm,bcm7278-pcie # Broadcom 7278 Arm
           - brcm,bcm7216-pcie # Broadcom 7216 Arm
@@ -63,15 +64,6 @@ properties:
 
   aspm-no-l0s: true
 
-  resets:
-    description: for "brcm,bcm7216-pcie", must be a valid reset
-      phandle pointing to the RESCAL reset controller provider node.
-    $ref: "/schemas/types.yaml#/definitions/phandle"
-
-  reset-names:
-    items:
-      - const: rescal
-
   brcm,scb-sizes:
     description: u64 giving the 64bit PCIe memory
       viewport size of a memory controller.  There may be up to
@@ -102,8 +94,35 @@ allOf:
       properties:
         compatible:
           contains:
+            const: brcm,bcm4908-pcie
+    then:
+      properties:
+        resets:
+          items:
+            - description: reset controller handling the PERST# signal
+
+        reset-names:
+          items:
+            - const: perst
+
+      required:
+        - resets
+        - reset-names
+  - if:
+      properties:
+        compatible:
+          contains:
             const: brcm,bcm7216-pcie
     then:
+      properties:
+        resets:
+          items:
+            - description: phandle pointing to the RESCAL reset controller
+
+        reset-names:
+          items:
+            - const: rescal
+
       required:
         - resets
         - reset-names
index daa99f7..6d898dd 100644 (file)
@@ -26,6 +26,7 @@ Required properties:
        "fsl,ls1046a-pcie-ep", "fsl,ls-pcie-ep"
        "fsl,ls1088a-pcie-ep", "fsl,ls-pcie-ep"
        "fsl,ls2088a-pcie-ep", "fsl,ls-pcie-ep"
+       "fsl,lx2160ar2-pcie-ep", "fsl,ls-pcie-ep"
 - reg: base addresses and lengths of the PCIe controller register blocks.
 - interrupts: A list of interrupt outputs of the controller. Must contain an
   entry for each entry in the interrupt-names property.
diff --git a/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml b/Documentation/devicetree/bindings/pci/microchip,pcie-host.yaml
new file mode 100644 (file)
index 0000000..04251d7
--- /dev/null
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pci/microchip,pcie-host.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Microchip PCIe Root Port Bridge Controller Device Tree Bindings
+
+maintainers:
+  - Daire McNamara <daire.mcnamara@microchip.com>
+
+allOf:
+  - $ref: /schemas/pci/pci-bus.yaml#
+
+properties:
+  compatible:
+    const: microchip,pcie-host-1.0 # PolarFire
+
+  reg:
+    maxItems: 2
+
+  reg-names:
+    items:
+      - const: cfg
+      - const: apb
+
+  interrupts:
+    minItems: 1
+    maxItems: 2
+    items:
+      - description: PCIe host controller
+      - description: builtin MSI controller
+
+  interrupt-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: pcie
+      - const: msi
+
+  ranges:
+    maxItems: 1
+
+  msi-controller:
+    description: Identifies the node as an MSI controller.
+
+  msi-parent:
+    description: MSI controller the device is capable of using.
+
+required:
+  - reg
+  - reg-names
+  - "#interrupt-cells"
+  - interrupts
+  - interrupt-map-mask
+  - interrupt-map
+  - msi-controller
+
+unevaluatedProperties: false
+
+examples:
+  - |
+    soc {
+            #address-cells = <2>;
+            #size-cells = <2>;
+            pcie0: pcie@2030000000 {
+                    compatible = "microchip,pcie-host-1.0";
+                    reg = <0x0 0x70000000 0x0 0x08000000>,
+                          <0x0 0x43000000 0x0 0x00010000>;
+                    reg-names = "cfg", "apb";
+                    device_type = "pci";
+                    #address-cells = <3>;
+                    #size-cells = <2>;
+                    #interrupt-cells = <1>;
+                    interrupts = <119>;
+                    interrupt-map-mask = <0x0 0x0 0x0 0x7>;
+                    interrupt-map = <0 0 0 1 &pcie_intc0 0>,
+                                    <0 0 0 2 &pcie_intc0 1>,
+                                    <0 0 0 3 &pcie_intc0 2>,
+                                    <0 0 0 4 &pcie_intc0 3>;
+                    interrupt-parent = <&plic0>;
+                    msi-parent = <&pcie0>;
+                    msi-controller;
+                    bus-range = <0x00 0x7f>;
+                    ranges = <0x03000000 0x0 0x78000000 0x0 0x78000000 0x0 0x04000000>;
+                    pcie_intc0: interrupt-controller {
+                        #address-cells = <0>;
+                        #interrupt-cells = <1>;
+                        interrupt-controller;
+                    };
+            };
+    };
index 3b55310..0da458a 100644 (file)
                        - "master_bus"  AXI Master clock
                        - "slave_bus"   AXI Slave clock
 
--clock-names:
-       Usage: required for sdm845 and sm8250
+- clock-names:
+       Usage: required for sdm845
+       Value type: <stringlist>
+       Definition: Should contain the following entries
+                       - "aux"         Auxiliary clock
+                       - "cfg"         Configuration clock
+                       - "bus_master"  Master AXI clock
+                       - "bus_slave"   Slave AXI clock
+                       - "slave_q2a"   Slave Q2A clock
+                       - "tbu"         PCIe TBU clock
+                       - "pipe"        PIPE clock
+
+- clock-names:
+       Usage: required for sm8250
        Value type: <stringlist>
        Definition: Should contain the following entries
                        - "aux"         Auxiliary clock
                        - "bus_slave"   Slave AXI clock
                        - "slave_q2a"   Slave Q2A clock
                        - "tbu"         PCIe TBU clock
+                       - "ddrss_sf_tbu" PCIe SF TBU clock
                        - "pipe"        PIPE clock
 
 - resets:
diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt b/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.txt
deleted file mode 100644 (file)
index 698aacb..0000000
+++ /dev/null
@@ -1,86 +0,0 @@
-Broadcom STB USB PHY
-
-Required properties:
-- compatible: should be one of
-       "brcm,brcmstb-usb-phy"
-       "brcm,bcm7216-usb-phy"
-       "brcm,bcm7211-usb-phy"
-
-- reg and reg-names properties requirements are specific to the
-  compatible string.
-  "brcm,brcmstb-usb-phy":
-    - reg: 1 or 2 offset and length pairs. One for the base CTRL registers
-           and an optional pair for systems with USB 3.x support
-    - reg-names: not specified
-  "brcm,bcm7216-usb-phy":
-    - reg: 3 offset and length pairs for CTRL, XHCI_EC and XHCI_GBL
-           registers
-    - reg-names: "ctrl", "xhci_ec", "xhci_gbl"
-  "brcm,bcm7211-usb-phy":
-    - reg: 5 offset and length pairs for CTRL, XHCI_EC, XHCI_GBL,
-           USB_PHY and USB_MDIO registers and an optional pair
-          for the BDC registers
-    - reg-names: "ctrl", "xhci_ec", "xhci_gbl", "usb_phy", "usb_mdio", "bdc_ec"
-
-- #phy-cells: Shall be 1 as it expects one argument for setting
-             the type of the PHY. Possible values are:
-             - PHY_TYPE_USB2 for USB1.1/2.0 PHY
-             - PHY_TYPE_USB3 for USB3.x PHY
-
-Optional Properties:
-- clocks : clock phandles.
-- clock-names: String, clock name.
-- interrupts: wakeup interrupt
-- interrupt-names: "wakeup"
-- brcm,ipp: Boolean, Invert Port Power.
-  Possible values are: 0 (Don't invert), 1 (Invert)
-- brcm,ioc: Boolean, Invert Over Current detection.
-  Possible values are: 0 (Don't invert), 1 (Invert)
-- dr_mode: String, PHY Device mode.
-  Possible values are: "host", "peripheral ", "drd" or "typec-pd"
-  If this property is not defined, the phy will default to "host" mode.
-- brcm,syscon-piarbctl: phandle to syscon for handling config registers
-NOTE: one or both of the following two properties must be set
-- brcm,has-xhci: Boolean indicating the phy has an XHCI phy.
-- brcm,has-eohci: Boolean indicating the phy has an EHCI/OHCI phy.
-
-
-Example:
-
-usbphy_0: usb-phy@f0470200 {
-       reg = <0xf0470200 0xb8>,
-               <0xf0471940 0x6c0>;
-       compatible = "brcm,brcmstb-usb-phy";
-       #phy-cells = <1>;
-       dr_mode = "host"
-       brcm,ioc = <1>;
-       brcm,ipp = <1>;
-       brcm,has-xhci;
-       brcm,has-eohci;
-       clocks = <&usb20>, <&usb30>;
-       clock-names = "sw_usb", "sw_usb3";
-};
-
-usb-phy@29f0200 {
-       reg = <0x29f0200 0x200>,
-               <0x29c0880 0x30>,
-               <0x29cc100 0x534>,
-               <0x2808000 0x24>,
-               <0x2980080 0x8>;
-       reg-names = "ctrl",
-               "xhci_ec",
-               "xhci_gbl",
-               "usb_phy",
-               "usb_mdio";
-       brcm,ioc = <0x0>;
-       brcm,ipp = <0x0>;
-       compatible = "brcm,bcm7211-usb-phy";
-       interrupts = <0x30>;
-       interrupt-parent = <&vpu_intr1_nosec_intc>;
-       interrupt-names = "wake";
-       #phy-cells = <0x1>;
-       brcm,has-xhci;
-       syscon-piarbctl = <&syscon_piarbctl>;
-       clocks = <&scmi_clk 256>;
-       clock-names = "sw_usb";
-};
diff --git a/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml b/Documentation/devicetree/bindings/phy/brcm,brcmstb-usb-phy.yaml
new file mode 100644 (file)
index 0000000..0497368
--- /dev/null
@@ -0,0 +1,196 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/brcm,brcmstb-usb-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Broadcom STB USB PHY
+
+description: Broadcom's PHY that handles EHCI/OHCI and/or XHCI
+
+maintainers:
+  - Al Cooper <alcooperx@gmail.com>
+  - Rafał Miłecki <rafal@milecki.pl>
+
+properties:
+  compatible:
+    enum:
+      - brcm,bcm4908-usb-phy
+      - brcm,bcm7211-usb-phy
+      - brcm,bcm7216-usb-phy
+      - brcm,brcmstb-usb-phy
+
+  reg:
+    minItems: 1
+    maxItems: 6
+    items:
+      - description: the base CTRL register
+      - description: XHCI EC register
+      - description: XHCI GBL register
+      - description: USB PHY register
+      - description: USB MDIO register
+      - description: BDC register
+
+  reg-names:
+    minItems: 1
+    maxItems: 6
+    items:
+      - const: ctrl
+      - const: xhci_ec
+      - const: xhci_gbl
+      - const: usb_phy
+      - const: usb_mdio
+      - const: bdc_ec
+
+  clocks:
+    minItems: 1
+    maxItems: 2
+
+  clock-names:
+    minItems: 1
+    maxItems: 2
+    items:
+      - const: sw_usb
+      - const: sw_usb3
+
+  interrupts:
+    description: wakeup interrupt
+
+  interrupt-names:
+    const: wake
+
+  brcm,ipp:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Invert Port Power
+    minimum: 0
+    maximum: 1
+
+  brcm,ioc:
+    $ref: /schemas/types.yaml#/definitions/uint32
+    description: Invert Over Current detection
+    minimum: 0
+    maximum: 1
+
+  dr_mode:
+    description: PHY Device mode. If this property is not defined, the PHY will
+      default to "host" mode.
+    enum:
+      - host
+      - peripheral
+      - drd
+      - typec-pd
+
+  brcm,syscon-piarbctl:
+    description: phandle to syscon for handling config registers
+    $ref: /schemas/types.yaml#/definitions/phandle
+
+  brcm,has-xhci:
+    description: Indicates the PHY has an XHCI PHY.
+    type: boolean
+
+  brcm,has-eohci:
+    description: Indicates the PHY has an EHCI/OHCI PHY.
+    type: boolean
+
+  "#phy-cells":
+    description: |
+      Cell allows setting the type of the PHY. Possible values are:
+      - PHY_TYPE_USB2 for USB1.1/2.0 PHY
+      - PHY_TYPE_USB3 for USB3.x PHY
+    const: 1
+
+required:
+  - reg
+  - "#phy-cells"
+
+anyOf:
+  - required:
+      - brcm,has-xhci
+  - required:
+      - brcm,has-eohci
+
+allOf:
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - const: brcm,bcm4908-usb-phy
+              - const: brcm,brcmstb-usb-phy
+    then:
+      properties:
+        reg:
+          minItems: 1
+          maxItems: 2
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: brcm,bcm7211-usb-phy
+    then:
+      properties:
+        reg:
+          minItems: 5
+          maxItems: 6
+        reg-names:
+          minItems: 5
+          maxItems: 6
+  - if:
+      properties:
+        compatible:
+          contains:
+            const: brcm,bcm7216-usb-phy
+    then:
+      properties:
+        reg:
+          minItems: 3
+          maxItems: 3
+        reg-names:
+          minItems: 3
+          maxItems: 3
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/phy/phy.h>
+
+    usb-phy@f0470200 {
+        compatible = "brcm,brcmstb-usb-phy";
+        reg = <0xf0470200 0xb8>,
+              <0xf0471940 0x6c0>;
+        #phy-cells = <1>;
+        dr_mode = "host";
+        brcm,ioc = <1>;
+        brcm,ipp = <1>;
+        brcm,has-xhci;
+        brcm,has-eohci;
+        clocks = <&usb20>, <&usb30>;
+        clock-names = "sw_usb", "sw_usb3";
+    };
+  - |
+    #include <dt-bindings/phy/phy.h>
+
+    usb-phy@29f0200 {
+        compatible = "brcm,bcm7211-usb-phy";
+        reg = <0x29f0200 0x200>,
+              <0x29c0880 0x30>,
+              <0x29cc100 0x534>,
+              <0x2808000 0x24>,
+              <0x2980080 0x8>;
+        reg-names = "ctrl",
+            "xhci_ec",
+            "xhci_gbl",
+            "usb_phy",
+            "usb_mdio";
+        brcm,ioc = <0x0>;
+        brcm,ipp = <0x0>;
+        interrupts = <0x30>;
+        interrupt-parent = <&vpu_intr1_nosec_intc>;
+        interrupt-names = "wake";
+        #phy-cells = <0x1>;
+        brcm,has-xhci;
+        brcm,syscon-piarbctl = <&syscon_piarbctl>;
+        clocks = <&scmi_clk 256>;
+        clock-names = "sw_usb";
+    };
diff --git a/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,dsi-phy.yaml
new file mode 100644 (file)
index 0000000..71d4ace
--- /dev/null
@@ -0,0 +1,85 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2020 MediaTek
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/mediatek,dsi-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek MIPI Display Serial Interface (DSI) PHY binding
+
+maintainers:
+  - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+  - Philipp Zabel <p.zabel@pengutronix.de>
+  - Chunfeng Yun <chunfeng.yun@mediatek.com>
+
+description: The MIPI DSI PHY supports up to 4-lane output.
+
+properties:
+  $nodename:
+    pattern: "^dsi-phy@[0-9a-f]+$"
+
+  compatible:
+    enum:
+      - mediatek,mt2701-mipi-tx
+      - mediatek,mt7623-mipi-tx
+      - mediatek,mt8173-mipi-tx
+      - mediatek,mt8183-mipi-tx
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: PLL reference clock
+
+  clock-output-names:
+    maxItems: 1
+
+  "#phy-cells":
+    const: 0
+
+  "#clock-cells":
+    const: 0
+
+  nvmem-cells:
+    maxItems: 1
+    description: A phandle to the calibration data provided by a nvmem device,
+      if unspecified, default values shall be used.
+
+  nvmem-cell-names:
+    items:
+      - const: calibration-data
+
+  drive-strength-microamp:
+    description: adjust driving current
+    multipleOf: 200
+    minimum: 2000
+    maximum: 6000
+    default: 4600
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-output-names
+  - "#phy-cells"
+  - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt8173-clk.h>
+    dsi-phy@10215000 {
+        compatible = "mediatek,mt8173-mipi-tx";
+        reg = <0x10215000 0x1000>;
+        clocks = <&clk26m>;
+        clock-output-names = "mipi_tx0_pll";
+        drive-strength-microamp = <4000>;
+        nvmem-cells= <&mipi_tx_calibration>;
+        nvmem-cell-names = "calibration-data";
+        #clock-cells = <0>;
+        #phy-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,hdmi-phy.yaml
new file mode 100644 (file)
index 0000000..4752517
--- /dev/null
@@ -0,0 +1,92 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2020 MediaTek
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/mediatek,hdmi-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek High Definition Multimedia Interface (HDMI) PHY binding
+
+maintainers:
+  - Chun-Kuang Hu <chunkuang.hu@kernel.org>
+  - Philipp Zabel <p.zabel@pengutronix.de>
+  - Chunfeng Yun <chunfeng.yun@mediatek.com>
+
+description: |
+  The HDMI PHY serializes the HDMI encoder's three channel 10-bit parallel
+  output and drives the HDMI pads.
+
+properties:
+  $nodename:
+    pattern: "^hdmi-phy@[0-9a-f]+$"
+
+  compatible:
+    enum:
+      - mediatek,mt2701-hdmi-phy
+      - mediatek,mt7623-hdmi-phy
+      - mediatek,mt8173-hdmi-phy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: PLL reference clock
+
+  clock-names:
+    items:
+      - const: pll_ref
+
+  clock-output-names:
+    items:
+      - const: hdmitx_dig_cts
+
+  "#phy-cells":
+    const: 0
+
+  "#clock-cells":
+    const: 0
+
+  mediatek,ibias:
+    description:
+      TX DRV bias current for < 1.65Gbps
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 63
+    default: 0xa
+
+  mediatek,ibias_up:
+    description:
+      TX DRV bias current for >= 1.65Gbps
+    $ref: /schemas/types.yaml#/definitions/uint32
+    minimum: 0
+    maximum: 63
+    default: 0x1c
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - clock-names
+  - clock-output-names
+  - "#phy-cells"
+  - "#clock-cells"
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt8173-clk.h>
+    hdmi_phy: hdmi-phy@10209100 {
+        compatible = "mediatek,mt8173-hdmi-phy";
+        reg = <0x10209100 0x24>;
+        clocks = <&apmixedsys CLK_APMIXED_HDMI_REF>;
+        clock-names = "pll_ref";
+        clock-output-names = "hdmitx_dig_cts";
+        mediatek,ibias = <0xa>;
+        mediatek,ibias_up = <0x1c>;
+        #clock-cells = <0>;
+        #phy-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,tphy.yaml
new file mode 100644 (file)
index 0000000..602e6ff
--- /dev/null
@@ -0,0 +1,260 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2020 MediaTek
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/mediatek,tphy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek T-PHY Controller Device Tree Bindings
+
+maintainers:
+  - Chunfeng Yun <chunfeng.yun@mediatek.com>
+
+description: |
+  The T-PHY controller supports physical layer functionality for a number of
+  controllers on MediaTek SoCs, includes USB2.0, USB3.0, PCIe and SATA.
+
+  Layout differences of banks between T-PHY V1 (mt8173/mt2701) and
+  T-PHY V2 (mt2712) when works on USB mode:
+  -----------------------------------
+  Version 1:
+  port        offset    bank
+  shared      0x0000    SPLLC
+              0x0100    FMREG
+  u2 port0    0x0800    U2PHY_COM
+  u3 port0    0x0900    U3PHYD
+              0x0a00    U3PHYD_BANK2
+              0x0b00    U3PHYA
+              0x0c00    U3PHYA_DA
+  u2 port1    0x1000    U2PHY_COM
+  u3 port1    0x1100    U3PHYD
+              0x1200    U3PHYD_BANK2
+              0x1300    U3PHYA
+              0x1400    U3PHYA_DA
+  u2 port2    0x1800    U2PHY_COM
+              ...
+
+  Version 2:
+  port        offset    bank
+  u2 port0    0x0000    MISC
+              0x0100    FMREG
+              0x0300    U2PHY_COM
+  u3 port0    0x0700    SPLLC
+              0x0800    CHIP
+              0x0900    U3PHYD
+              0x0a00    U3PHYD_BANK2
+              0x0b00    U3PHYA
+              0x0c00    U3PHYA_DA
+  u2 port1    0x1000    MISC
+              0x1100    FMREG
+              0x1300    U2PHY_COM
+  u3 port1    0x1700    SPLLC
+              0x1800    CHIP
+              0x1900    U3PHYD
+              0x1a00    U3PHYD_BANK2
+              0x1b00    U3PHYA
+              0x1c00    U3PHYA_DA
+  u2 port2    0x2000    MISC
+              ...
+
+  SPLLC shared by u3 ports and FMREG shared by u2 ports on V1 are put back
+  into each port; a new bank MISC for u2 ports and CHIP for u3 ports are
+  added on V2.
+
+properties:
+  $nodename:
+    pattern: "^t-phy@[0-9a-f]+$"
+
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - mediatek,mt2701-tphy
+              - mediatek,mt7623-tphy
+              - mediatek,mt7622-tphy
+              - mediatek,mt8516-tphy
+          - const: mediatek,generic-tphy-v1
+      - items:
+          - enum:
+              - mediatek,mt2712-tphy
+              - mediatek,mt7629-tphy
+              - mediatek,mt8183-tphy
+          - const: mediatek,generic-tphy-v2
+      - const: mediatek,mt2701-u3phy
+        deprecated: true
+      - const: mediatek,mt2712-u3phy
+        deprecated: true
+      - const: mediatek,mt8173-u3phy
+
+  reg:
+    description:
+      Register shared by multiple ports, exclude port's private register.
+      It is needed for T-PHY V1, such as mt2701 and mt8173, but not for
+      T-PHY V2, such as mt2712.
+    maxItems: 1
+
+  "#address-cells":
+    enum: [1, 2]
+
+  "#size-cells":
+    enum: [1, 2]
+
+  # Used with non-empty value if optional 'reg' is not provided.
+  # The format of the value is an arbitrary number of triplets of
+  # (child-bus-address, parent-bus-address, length).
+  ranges: true
+
+  mediatek,src-ref-clk-mhz:
+    description:
+      Frequency of reference clock for slew rate calibrate
+    default: 26
+
+  mediatek,src-coef:
+    description:
+      Coefficient for slew rate calibrate, depends on SoC process
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 28
+
+# Required child node:
+patternProperties:
+  "^usb-phy@[0-9a-f]+$":
+    type: object
+    description:
+      A sub-node is required for each port the controller provides.
+      Address range information including the usual 'reg' property
+      is used inside these nodes to describe the controller's topology.
+
+    properties:
+      reg:
+        maxItems: 1
+
+      clocks:
+        minItems: 1
+        maxItems: 2
+        items:
+          - description: Reference clock, (HS is 48Mhz, SS/P is 24~27Mhz)
+          - description: Reference clock of analog phy
+        description:
+          Uses both clocks if the clock of analog and digital phys are
+          separated, otherwise uses "ref" clock only if needed.
+
+      clock-names:
+        minItems: 1
+        maxItems: 2
+        items:
+          - const: ref
+          - const: da_ref
+
+      "#phy-cells":
+        const: 1
+        description: |
+          The cells contain the following arguments.
+
+          - description: The PHY type
+              enum:
+                - PHY_TYPE_USB2
+                - PHY_TYPE_USB3
+                - PHY_TYPE_PCIE
+                - PHY_TYPE_SATA
+
+      # The following optional vendor properties are only for debug or HQA test
+      mediatek,eye-src:
+        description:
+          The value of slew rate calibrate (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 7
+
+      mediatek,eye-vrt:
+        description:
+          The selection of VRT reference voltage (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 7
+
+      mediatek,eye-term:
+        description:
+          The selection of HS_TX TERM reference voltage (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 7
+
+      mediatek,intr:
+        description:
+          The selection of internal resistor (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 31
+
+      mediatek,discth:
+        description:
+          The selection of disconnect threshold (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 15
+
+      mediatek,bc12:
+        description:
+          Specify the flag to enable BC1.2 if support it
+        type: boolean
+
+    required:
+      - reg
+      - "#phy-cells"
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - "#address-cells"
+  - "#size-cells"
+  - ranges
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt8173-clk.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/phy/phy.h>
+    usb@11271000 {
+        compatible = "mediatek,mt8173-mtu3", "mediatek,mtu3";
+        reg = <0x11271000 0x3000>, <0x11280700 0x0100>;
+        reg-names = "mac", "ippc";
+        phys = <&u2port0 PHY_TYPE_USB2>,
+               <&u3port0 PHY_TYPE_USB3>,
+               <&u2port1 PHY_TYPE_USB2>;
+        interrupts = <GIC_SPI 115 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&topckgen CLK_TOP_USB30_SEL>;
+        clock-names = "sys_ck";
+    };
+
+    t-phy@11290000 {
+        compatible = "mediatek,mt8173-u3phy";
+        reg = <0x11290000 0x800>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        u2port0: usb-phy@11290800 {
+            reg = <0x11290800 0x100>;
+            clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>, <&clk48m>;
+            clock-names = "ref", "da_ref";
+            #phy-cells = <1>;
+        };
+
+        u3port0: usb-phy@11290900 {
+            reg = <0x11290900 0x700>;
+            clocks = <&clk26m>;
+            clock-names = "ref";
+            #phy-cells = <1>;
+        };
+
+        u2port1: usb-phy@11291000 {
+            reg = <0x11291000 0x100>;
+            #phy-cells = <1>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml b/Documentation/devicetree/bindings/phy/mediatek,ufs-phy.yaml
new file mode 100644 (file)
index 0000000..3a9be82
--- /dev/null
@@ -0,0 +1,64 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2020 MediaTek
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/mediatek,ufs-phy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek Universal Flash Storage (UFS) M-PHY binding
+
+maintainers:
+  - Stanley Chu <stanley.chu@mediatek.com>
+  - Chunfeng Yun <chunfeng.yun@mediatek.com>
+
+description: |
+  UFS M-PHY nodes are defined to describe on-chip UFS M-PHY hardware macro.
+  Each UFS M-PHY node should have its own node.
+  To bind UFS M-PHY with UFS host controller, the controller node should
+  contain a phandle reference to UFS M-PHY node.
+
+properties:
+  $nodename:
+    pattern: "^ufs-phy@[0-9a-f]+$"
+
+  compatible:
+    const: mediatek,mt8183-ufsphy
+
+  reg:
+    maxItems: 1
+
+  clocks:
+    items:
+      - description: Unipro core control clock.
+      - description: M-PHY core control clock.
+
+  clock-names:
+    items:
+      - const: unipro
+      - const: mp
+
+  "#phy-cells":
+    const: 0
+
+required:
+  - compatible
+  - reg
+  - "#phy-cells"
+  - clocks
+  - clock-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/clock/mt8183-clk.h>
+    ufsphy: ufs-phy@11fa0000 {
+        compatible = "mediatek,mt8183-ufsphy";
+        reg = <0x11fa0000 0xc000>;
+        clocks = <&infracfg CLK_INFRA_UNIPRO_SCK>,
+                 <&infracfg CLK_INFRA_UFS_MP_SAP_BCLK>;
+        clock-names = "unipro", "mp";
+        #phy-cells = <0>;
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml b/Documentation/devicetree/bindings/phy/mediatek,xsphy.yaml
new file mode 100644 (file)
index 0000000..598fd2b
--- /dev/null
@@ -0,0 +1,199 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+# Copyright (c) 2020 MediaTek
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/phy/mediatek,xsphy.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek XS-PHY Controller Device Tree Bindings
+
+maintainers:
+  - Chunfeng Yun <chunfeng.yun@mediatek.com>
+
+description: |
+  The XS-PHY controller supports physical layer functionality for USB3.1
+  GEN2 controller on MediaTek SoCs.
+
+  Banks layout of xsphy
+  ----------------------------------
+  port        offset    bank
+  u2 port0    0x0000    MISC
+              0x0100    FMREG
+              0x0300    U2PHY_COM
+  u2 port1    0x1000    MISC
+              0x1100    FMREG
+              0x1300    U2PHY_COM
+  u2 port2    0x2000    MISC
+              ...
+  u31 common  0x3000    DIG_GLB
+              0x3100    PHYA_GLB
+  u31 port0   0x3400    DIG_LN_TOP
+              0x3500    DIG_LN_TX0
+              0x3600    DIG_LN_RX0
+              0x3700    DIG_LN_DAIF
+              0x3800    PHYA_LN
+  u31 port1   0x3a00    DIG_LN_TOP
+              0x3b00    DIG_LN_TX0
+              0x3c00    DIG_LN_RX0
+              0x3d00    DIG_LN_DAIF
+              0x3e00    PHYA_LN
+              ...
+  DIG_GLB & PHYA_GLB are shared by U31 ports.
+
+properties:
+  $nodename:
+    pattern: "^xs-phy@[0-9a-f]+$"
+
+  compatible:
+    items:
+      - enum:
+          - mediatek,mt3611-xsphy
+          - mediatek,mt3612-xsphy
+      - const: mediatek,xsphy
+
+  reg:
+    description:
+      Register shared by multiple U3 ports, exclude port's private register,
+      if only U2 ports provided, shouldn't use the property.
+    maxItems: 1
+
+  "#address-cells":
+    enum: [1, 2]
+
+  "#size-cells":
+    enum: [1, 2]
+
+  ranges: true
+
+  mediatek,src-ref-clk-mhz:
+    description:
+      Frequency of reference clock for slew rate calibrate
+    default: 26
+
+  mediatek,src-coef:
+    description:
+      Coefficient for slew rate calibrate, depends on SoC process
+    $ref: /schemas/types.yaml#/definitions/uint32
+    default: 17
+
+# Required child node:
+patternProperties:
+  "^usb-phy@[0-9a-f]+$":
+    type: object
+    description:
+      A sub-node is required for each port the controller provides.
+      Address range information including the usual 'reg' property
+      is used inside these nodes to describe the controller's topology.
+
+    properties:
+      reg:
+        maxItems: 1
+
+      clocks:
+        items:
+          - description: Reference clock, (HS is 48Mhz, SS/P is 24~27Mhz)
+
+      clock-names:
+        items:
+          - const: ref
+
+      "#phy-cells":
+        const: 1
+        description: |
+          The cells contain the following arguments.
+
+          - description: The PHY type
+              enum:
+                - PHY_TYPE_USB2
+                - PHY_TYPE_USB3
+
+      # The following optional vendor properties are only for debug or HQA test
+      mediatek,eye-src:
+        description:
+          The value of slew rate calibrate (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 7
+
+      mediatek,eye-vrt:
+        description:
+          The selection of VRT reference voltage (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 7
+
+      mediatek,eye-term:
+        description:
+          The selection of HS_TX TERM reference voltage (U2 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 7
+
+      mediatek,efuse-intr:
+        description:
+          The selection of Internal Resistor (U2/U3 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 63
+
+      mediatek,efuse-tx-imp:
+        description:
+          The selection of TX Impedance (U3 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 31
+
+      mediatek,efuse-rx-imp:
+        description:
+          The selection of RX Impedance (U3 phy)
+        $ref: /schemas/types.yaml#/definitions/uint32
+        minimum: 1
+        maximum: 31
+
+    required:
+      - reg
+      - clocks
+      - clock-names
+      - "#phy-cells"
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - "#address-cells"
+  - "#size-cells"
+  - ranges
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/phy/phy.h>
+
+    u3phy: xs-phy@11c40000 {
+        compatible = "mediatek,mt3611-xsphy", "mediatek,xsphy";
+        reg = <0x11c43000 0x0200>;
+        mediatek,src-ref-clk-mhz = <26>;
+        mediatek,src-coef = <17>;
+        #address-cells = <1>;
+        #size-cells = <1>;
+        ranges;
+
+        u2port0: usb-phy@11c40000 {
+            reg = <0x11c40000 0x0400>;
+            clocks = <&clk48m>;
+            clock-names = "ref";
+            mediatek,eye-src = <4>;
+            #phy-cells = <1>;
+        };
+
+        u3port0: usb-phy@11c43000 {
+            reg = <0x11c43400 0x0500>;
+            clocks = <&clk26m>;
+            clock-names = "ref";
+            mediatek,efuse-intr = <28>;
+            #phy-cells = <1>;
+        };
+    };
+
+...
diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-tphy.txt
deleted file mode 100644 (file)
index dd75b67..0000000
+++ /dev/null
@@ -1,162 +0,0 @@
-MediaTek T-PHY binding
---------------------------
-
-T-phy controller supports physical layer functionality for a number of
-controllers on MediaTek SoCs, such as, USB2.0, USB3.0, PCIe, and SATA.
-
-Required properties (controller (parent) node):
- - compatible  : should be one of
-                 "mediatek,generic-tphy-v1"
-                 "mediatek,generic-tphy-v2"
-                 "mediatek,mt2701-u3phy" (deprecated)
-                 "mediatek,mt2712-u3phy" (deprecated)
-                 "mediatek,mt8173-u3phy";
-                 make use of "mediatek,generic-tphy-v1" on mt2701 instead and
-                 "mediatek,generic-tphy-v2" on mt2712 instead.
-
-- #address-cells:      the number of cells used to represent physical
-               base addresses.
-- #size-cells: the number of cells used to represent the size of an address.
-- ranges:      the address mapping relationship to the parent, defined with
-               - empty value: if optional 'reg' is used.
-               - non-empty value: if optional 'reg' is not used. should set
-                       the child's base address to 0, the physical address
-                       within parent's address space, and the length of
-                       the address map.
-
-Required nodes : a sub-node is required for each port the controller
-                 provides. Address range information including the usual
-                 'reg' property is used inside these nodes to describe
-                 the controller's topology.
-
-Optional properties (controller (parent) node):
- - reg         : offset and length of register shared by multiple ports,
-                 exclude port's private register. It is needed on mt2701
-                 and mt8173, but not on mt2712.
- - mediatek,src-ref-clk-mhz    : frequency of reference clock for slew rate
-                 calibrate
- - mediatek,src-coef   : coefficient for slew rate calibrate, depends on
-                 SoC process
-
-Required properties (port (child) node):
-- reg          : address and length of the register set for the port.
-- #phy-cells   : should be 1 (See second example)
-                 cell after port phandle is phy type from:
-                       - PHY_TYPE_USB2
-                       - PHY_TYPE_USB3
-                       - PHY_TYPE_PCIE
-                       - PHY_TYPE_SATA
-
-Optional properties (PHY_TYPE_USB2 port (child) node):
-- clocks       : a list of phandle + clock-specifier pairs, one for each
-                 entry in clock-names
-- clock-names  : may contain
-                 "ref": 48M reference clock for HighSpeed (digital) phy; and 26M
-                       reference clock for SuperSpeed (digital) phy, sometimes is
-                       24M, 25M or 27M, depended on platform.
-                 "da_ref": the reference clock of analog phy, used if the clocks
-                       of analog and digital phys are separated, otherwise uses
-                       "ref" clock only if needed.
-
-- mediatek,eye-src     : u32, the value of slew rate calibrate
-- mediatek,eye-vrt     : u32, the selection of VRT reference voltage
-- mediatek,eye-term    : u32, the selection of HS_TX TERM reference voltage
-- mediatek,bc12        : bool, enable BC12 of u2phy if support it
-- mediatek,discth      : u32, the selection of disconnect threshold
-- mediatek,intr        : u32, the selection of internal R (resistance)
-
-Example:
-
-u3phy: usb-phy@11290000 {
-       compatible = "mediatek,mt8173-u3phy";
-       reg = <0 0x11290000 0 0x800>;
-       #address-cells = <2>;
-       #size-cells = <2>;
-       ranges;
-
-       u2port0: usb-phy@11290800 {
-               reg = <0 0x11290800 0 0x100>;
-               clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>;
-               clock-names = "ref";
-               #phy-cells = <1>;
-       };
-
-       u3port0: usb-phy@11290900 {
-               reg = <0 0x11290800 0 0x700>;
-               clocks = <&clk26m>;
-               clock-names = "ref";
-               #phy-cells = <1>;
-       };
-
-       u2port1: usb-phy@11291000 {
-               reg = <0 0x11291000 0 0x100>;
-               clocks = <&apmixedsys CLK_APMIXED_REF2USB_TX>;
-               clock-names = "ref";
-               #phy-cells = <1>;
-       };
-};
-
-Specifying phy control of devices
----------------------------------
-
-Device nodes should specify the configuration required in their "phys"
-property, containing a phandle to the phy port node and a device type;
-phy-names for each port are optional.
-
-Example:
-
-#include <dt-bindings/phy/phy.h>
-
-usb30: usb@11270000 {
-       ...
-       phys = <&u2port0 PHY_TYPE_USB2>, <&u3port0 PHY_TYPE_USB3>;
-       phy-names = "usb2-0", "usb3-0";
-       ...
-};
-
-
-Layout differences of banks between mt8173/mt2701 and mt2712
--------------------------------------------------------------
-mt8173 and mt2701:
-port        offset    bank
-shared      0x0000    SPLLC
-            0x0100    FMREG
-u2 port0    0x0800    U2PHY_COM
-u3 port0    0x0900    U3PHYD
-            0x0a00    U3PHYD_BANK2
-            0x0b00    U3PHYA
-            0x0c00    U3PHYA_DA
-u2 port1    0x1000    U2PHY_COM
-u3 port1    0x1100    U3PHYD
-            0x1200    U3PHYD_BANK2
-            0x1300    U3PHYA
-            0x1400    U3PHYA_DA
-u2 port2    0x1800    U2PHY_COM
-            ...
-
-mt2712:
-port        offset    bank
-u2 port0    0x0000    MISC
-            0x0100    FMREG
-            0x0300    U2PHY_COM
-u3 port0    0x0700    SPLLC
-            0x0800    CHIP
-            0x0900    U3PHYD
-            0x0a00    U3PHYD_BANK2
-            0x0b00    U3PHYA
-            0x0c00    U3PHYA_DA
-u2 port1    0x1000    MISC
-            0x1100    FMREG
-            0x1300    U2PHY_COM
-u3 port1    0x1700    SPLLC
-            0x1800    CHIP
-            0x1900    U3PHYD
-            0x1a00    U3PHYD_BANK2
-            0x1b00    U3PHYA
-            0x1c00    U3PHYA_DA
-u2 port2    0x2000    MISC
-            ...
-
-    SPLLC shared by u3 ports and FMREG shared by u2 ports on
-mt8173/mt2701 are put back into each port; a new bank MISC for
-u2 ports and CHIP for u3 ports are added on mt2712.
diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-ufs.txt b/Documentation/devicetree/bindings/phy/phy-mtk-ufs.txt
deleted file mode 100644 (file)
index 5789029..0000000
+++ /dev/null
@@ -1,38 +0,0 @@
-MediaTek Universal Flash Storage (UFS) M-PHY binding
---------------------------------------------------------
-
-UFS M-PHY nodes are defined to describe on-chip UFS M-PHY hardware macro.
-Each UFS M-PHY node should have its own node.
-
-To bind UFS M-PHY with UFS host controller, the controller node should
-contain a phandle reference to UFS M-PHY node.
-
-Required properties for UFS M-PHY nodes:
-- compatible         : Compatible list, contains the following controller:
-                       "mediatek,mt8183-ufsphy" for ufs phy
-                       persent on MT81xx chipsets.
-- reg                : Address and length of the UFS M-PHY register set.
-- #phy-cells         : This property shall be set to 0.
-- clocks             : List of phandle and clock specifier pairs.
-- clock-names        : List of clock input name strings sorted in the same
-                       order as the clocks property. Following clocks are
-                       mandatory.
-                       "unipro": Unipro core control clock.
-                       "mp": M-PHY core control clock.
-
-Example:
-
-       ufsphy: phy@11fa0000 {
-               compatible = "mediatek,mt8183-ufsphy";
-               reg = <0 0x11fa0000 0 0xc000>;
-               #phy-cells = <0>;
-
-               clocks = <&infracfg_ao INFRACFG_AO_UNIPRO_SCK_CG>,
-                        <&infracfg_ao INFRACFG_AO_UFS_MP_SAP_BCLK_CG>;
-               clock-names = "unipro", "mp";
-       };
-
-       ufshci@11270000 {
-               ...
-               phys = <&ufsphy>;
-       };
diff --git a/Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt b/Documentation/devicetree/bindings/phy/phy-mtk-xsphy.txt
deleted file mode 100644 (file)
index e7caefa..0000000
+++ /dev/null
@@ -1,109 +0,0 @@
-MediaTek XS-PHY binding
---------------------------
-
-The XS-PHY controller supports physical layer functionality for USB3.1
-GEN2 controller on MediaTek SoCs.
-
-Required properties (controller (parent) node):
- - compatible  : should be "mediatek,<soc-model>-xsphy", "mediatek,xsphy",
-                 soc-model is the name of SoC, such as mt3611 etc;
-                 when using "mediatek,xsphy" compatible string, you need SoC specific
-                 ones in addition, one of:
-                 - "mediatek,mt3611-xsphy"
-
- - #address-cells, #size-cells : should use the same values as the root node
- - ranges: must be present
-
-Optional properties (controller (parent) node):
- - reg         : offset and length of register shared by multiple U3 ports,
-                 exclude port's private register, if only U2 ports provided,
-                 shouldn't use the property.
- - mediatek,src-ref-clk-mhz    : u32, frequency of reference clock for slew rate
-                 calibrate
- - mediatek,src-coef   : u32, coefficient for slew rate calibrate, depends on
-                 SoC process
-
-Required nodes : a sub-node is required for each port the controller
-                 provides. Address range information including the usual
-                 'reg' property is used inside these nodes to describe
-                 the controller's topology.
-
-Required properties (port (child) node):
-- reg          : address and length of the register set for the port.
-- clocks       : a list of phandle + clock-specifier pairs, one for each
-                 entry in clock-names
-- clock-names  : must contain
-                 "ref": 48M reference clock for HighSpeed analog phy; and 26M
-                       reference clock for SuperSpeedPlus analog phy, sometimes is
-                       24M, 25M or 27M, depended on platform.
-- #phy-cells   : should be 1
-                 cell after port phandle is phy type from:
-                       - PHY_TYPE_USB2
-                       - PHY_TYPE_USB3
-
-The following optional properties are only for debug or HQA test
-Optional properties (PHY_TYPE_USB2 port (child) node):
-- mediatek,eye-src     : u32, the value of slew rate calibrate
-- mediatek,eye-vrt     : u32, the selection of VRT reference voltage
-- mediatek,eye-term    : u32, the selection of HS_TX TERM reference voltage
-- mediatek,efuse-intr  : u32, the selection of Internal Resistor
-
-Optional properties (PHY_TYPE_USB3 port (child) node):
-- mediatek,efuse-intr  : u32, the selection of Internal Resistor
-- mediatek,efuse-tx-imp        : u32, the selection of TX Impedance
-- mediatek,efuse-rx-imp        : u32, the selection of RX Impedance
-
-Banks layout of xsphy
--------------------------------------------------------------
-port        offset    bank
-u2 port0    0x0000    MISC
-            0x0100    FMREG
-            0x0300    U2PHY_COM
-u2 port1    0x1000    MISC
-            0x1100    FMREG
-            0x1300    U2PHY_COM
-u2 port2    0x2000    MISC
-            ...
-u31 common  0x3000    DIG_GLB
-            0x3100    PHYA_GLB
-u31 port0   0x3400    DIG_LN_TOP
-            0x3500    DIG_LN_TX0
-            0x3600    DIG_LN_RX0
-            0x3700    DIG_LN_DAIF
-            0x3800    PHYA_LN
-u31 port1   0x3a00    DIG_LN_TOP
-            0x3b00    DIG_LN_TX0
-            0x3c00    DIG_LN_RX0
-            0x3d00    DIG_LN_DAIF
-            0x3e00    PHYA_LN
-            ...
-
-DIG_GLB & PHYA_GLB are shared by U31 ports.
-
-Example:
-
-u3phy: usb-phy@11c40000 {
-       compatible = "mediatek,mt3611-xsphy", "mediatek,xsphy";
-       reg = <0 0x11c43000 0 0x0200>;
-       mediatek,src-ref-clk-mhz = <26>;
-       mediatek,src-coef = <17>;
-       #address-cells = <2>;
-       #size-cells = <2>;
-       ranges;
-
-       u2port0: usb-phy@11c40000 {
-               reg = <0 0x11c40000 0 0x0400>;
-               clocks = <&clk48m>;
-               clock-names = "ref";
-               mediatek,eye-src = <4>;
-               #phy-cells = <1>;
-       };
-
-       u3port0: usb-phy@11c43000 {
-               reg = <0 0x11c43400 0 0x0500>;
-               clocks = <&clk26m>;
-               clock-names = "ref";
-               mediatek,efuse-intr = <28>;
-               #phy-cells = <1>;
-       };
-};
index 0ba6197..46df678 100644 (file)
@@ -45,6 +45,12 @@ properties:
   "#size-cells":
     const: 0
 
+  vdda1v1-supply:
+    description: regulator providing 1V1 power supply to the PLL block
+
+  vdda1v8-supply:
+    description: regulator providing 1V8 power supply to the PLL block
+
 #Required child nodes:
 
 patternProperties:
@@ -61,12 +67,6 @@ patternProperties:
       phy-supply:
         description: regulator providing 3V3 power supply to the PHY.
 
-      vdda1v1-supply:
-        description: regulator providing 1V1 power supply to the PLL block
-
-      vdda1v8-supply:
-        description: regulator providing 1V8 power supply to the PLL block
-
       "#phy-cells":
         enum: [ 0x0, 0x1 ]
 
@@ -90,8 +90,6 @@ patternProperties:
     required:
       - reg
       - phy-supply
-      - vdda1v1-supply
-      - vdda1v8-supply
       - "#phy-cells"
 
     additionalProperties: false
@@ -102,6 +100,8 @@ required:
   - clocks
   - "#address-cells"
   - "#size-cells"
+  - vdda1v1-supply
+  - vdda1v8-supply
   - usb-phy@0
   - usb-phy@1
 
@@ -116,22 +116,20 @@ examples:
         reg = <0x5a006000 0x1000>;
         clocks = <&rcc USBPHY_K>;
         resets = <&rcc USBPHY_R>;
+        vdda1v1-supply = <&reg11>;
+        vdda1v8-supply = <&reg18>;
         #address-cells = <1>;
         #size-cells = <0>;
 
         usbphyc_port0: usb-phy@0 {
             reg = <0>;
             phy-supply = <&vdd_usb>;
-            vdda1v1-supply = <&reg11>;
-            vdda1v8-supply = <&reg18>;
             #phy-cells = <0>;
         };
 
         usbphyc_port1: usb-phy@1 {
             reg = <1>;
             phy-supply = <&vdd_usb>;
-            vdda1v1-supply = <&reg11>;
-            vdda1v8-supply = <&reg18>;
             #phy-cells = <1>;
         };
     };
index ec05db3..626447f 100644 (file)
@@ -25,19 +25,32 @@ properties:
       - qcom,msm8998-qmp-pcie-phy
       - qcom,msm8998-qmp-ufs-phy
       - qcom,msm8998-qmp-usb3-phy
+      - qcom,sc8180x-qmp-ufs-phy
+      - qcom,sc8180x-qmp-usb3-phy
       - qcom,sdm845-qhp-pcie-phy
       - qcom,sdm845-qmp-pcie-phy
       - qcom,sdm845-qmp-ufs-phy
       - qcom,sdm845-qmp-usb3-uni-phy
       - qcom,sm8150-qmp-ufs-phy
+      - qcom,sm8150-qmp-usb3-phy
+      - qcom,sm8150-qmp-usb3-uni-phy
       - qcom,sm8250-qmp-ufs-phy
       - qcom,sm8250-qmp-gen3x1-pcie-phy
       - qcom,sm8250-qmp-gen3x2-pcie-phy
       - qcom,sm8250-qmp-modem-pcie-phy
+      - qcom,sm8250-qmp-usb3-phy
+      - qcom,sm8250-qmp-usb3-uni-phy
+      - qcom,sm8350-qmp-ufs-phy
+      - qcom,sm8350-qmp-usb3-phy
+      - qcom,sm8350-qmp-usb3-uni-phy
+      - qcom,sdx55-qmp-usb3-uni-phy
 
   reg:
+    minItems: 1
+    maxItems: 2
     items:
       - description: Address and length of PHY's common serdes block.
+      - description: Address and length of PHY's DP_COM control block.
 
   "#clock-cells":
     enum: [ 1, 2 ]
@@ -136,6 +149,32 @@ allOf:
         compatible:
           contains:
             enum:
+              - qcom,sdx55-qmp-usb3-uni-phy
+    then:
+      properties:
+        clocks:
+          items:
+            - description: Phy aux clock.
+            - description: Phy config clock.
+            - description: 19.2 MHz ref clk.
+        clock-names:
+          items:
+            - const: aux
+            - const: cfg_ahb
+            - const: ref
+        resets:
+          items:
+            - description: reset of phy block.
+            - description: phy common block reset.
+        reset-names:
+          items:
+            - const: phy
+            - const: common
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
               - qcom,msm8996-qmp-pcie-phy
     then:
       properties:
@@ -285,6 +324,64 @@ allOf:
         reset-names:
           items:
             - const: phy
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,sm8150-qmp-usb3-phy
+              - qcom,sm8150-qmp-usb3-uni-phy
+              - qcom,sm8250-qmp-usb3-uni-phy
+              - qcom,sm8350-qmp-usb3-uni-phy
+    then:
+      properties:
+        clocks:
+          items:
+            - description: Phy aux clock.
+            - description: 19.2 MHz ref clk source.
+            - description: 19.2 MHz ref clk.
+            - description: Phy common block aux clock.
+        clock-names:
+          items:
+            - const: aux
+            - const: ref_clk_src
+            - const: ref
+            - const: com_aux
+        resets:
+          items:
+            - description: reset of phy block.
+            - description: phy common block reset.
+        reset-names:
+          items:
+            - const: phy
+            - const: common
+  - if:
+      properties:
+        compatible:
+          contains:
+            enum:
+              - qcom,sm8250-qmp-usb3-phy
+              - qcom,sm8350-qmp-usb3-phy
+    then:
+      properties:
+        clocks:
+          items:
+            - description: Phy aux clock.
+            - description: 19.2 MHz ref clk.
+            - description: Phy common block aux clock.
+        clock-names:
+          items:
+            - const: aux
+            - const: ref_clk_src
+            - const: com_aux
+        resets:
+          items:
+            - description: reset of phy block.
+            - description: phy common block reset.
+        reset-names:
+          items:
+            - const: phy
+            - const: common
 
 examples:
   - |
index d457fb6..9f9cf07 100644 (file)
@@ -21,6 +21,8 @@ properties:
               - qcom,ipq8074-qusb2-phy
               - qcom,msm8996-qusb2-phy
               - qcom,msm8998-qusb2-phy
+              - qcom,sdm660-qusb2-phy
+              - qcom,ipq6018-qusb2-phy
       - items:
           - enum:
               - qcom,sc7180-qusb2-phy
index ca6a083..abcc437 100644 (file)
@@ -16,6 +16,7 @@ properties:
   compatible:
     enum:
       - qcom,usb-hs-28nm-femtophy
+      - qcom,usb-hs-28nm-mdm9607
 
   reg:
     maxItems: 1
index 4949a28..ee77c64 100644 (file)
@@ -17,6 +17,8 @@ properties:
     enum:
       - qcom,usb-snps-hs-7nm-phy
       - qcom,sm8150-usb-hs-phy
+      - qcom,sm8250-usb-hs-phy
+      - qcom,sm8350-usb-hs-phy
       - qcom,usb-snps-femto-v2-phy
 
   reg:
index 00aa2d3..57d28c0 100644 (file)
@@ -16,11 +16,11 @@ Optional properties:
  - drive-impedance-ohm: Specifies the drive impedance in Ohm.
                         Possible values are 33, 40, 50, 66 and 100.
                         If not set, the default value of 50 will be applied.
- - enable-strobe-pulldown: Enable internal pull-down for the strobe line.
-                           If not set, pull-down is not used.
- - output-tapdelay-select: Specifies the phyctrl_otapdlysec register.
-                           If not set, the register defaults to 0x4.
-                           Maximum value 0xf.
+ - rockchip,enable-strobe-pulldown: Enable internal pull-down for the strobe
+                                    line.  If not set, pull-down is not used.
+ - rockchip,output-tapdelay-select: Specifies the phyctrl_otapdlysec register.
+                                    If not set, the register defaults to 0x4.
+                                    Maximum value 0xf.
 
 Example:
 
index c33e9bc..bbbd855 100644 (file)
@@ -151,7 +151,7 @@ patternProperties:
       WIZ node should have '1' subnode for the SERDES. It could be either
       Sierra SERDES or Torrent SERDES. Sierra SERDES should follow the
       bindings specified in
-      Documentation/devicetree/bindings/phy/phy-cadence-sierra.txt
+      Documentation/devicetree/bindings/phy/phy-cadence-sierra.yaml
       Torrent SERDES should follow the bindings specified in
       Documentation/devicetree/bindings/phy/phy-cadence-torrent.yaml
 
diff --git a/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml b/Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
new file mode 100644 (file)
index 0000000..46fbc73
--- /dev/null
@@ -0,0 +1,171 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/pinctrl/canaan,k210-fpioa.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 FPIOA Device Tree Bindings
+
+maintainers:
+  - Damien Le Moal <damien.lemoal@wdc.com>
+
+description:
+  The Canaan Kendryte K210 SoC Fully Programmable IO Array (FPIOA)
+  controller allows assiging any of 256 possible functions to any of
+  48 IO pins of the SoC. Pin function configuration is performed on
+  a per-pin basis.
+
+properties:
+  compatible:
+    const: canaan,k210-fpioa
+
+  reg:
+    maxItems: 1
+    description:
+      Address and length of the register set for the FPIOA controller.
+
+  clocks:
+    items:
+      - description: Controller reference clock source
+      - description: APB interface clock source
+
+  clock-names:
+    items:
+      - const: ref
+      - const: pclk
+
+  resets:
+    maxItems: 1
+
+  canaan,k210-sysctl-power:
+    $ref: /schemas/types.yaml#/definitions/phandle-array
+    description: |
+      phandle of the K210 system controller node and offset of its
+      power domain control register.
+
+patternProperties:
+  '-pinmux$':
+    type: object
+    $ref: /schemas/pinctrl/pinmux-node.yaml
+    description:
+      FPIOA client devices use sub-nodes to define the desired pin
+      configuration. Client device sub-nodes use the pinux property
+      below.
+
+    properties:
+      pinmux:
+        description:
+          List of IO pins alternate functions. The values for each IO
+          pin is a combination of an IO pin number (0 to 47) with the
+          desired function for the IO pin. Functions are defined as
+          macros in include/dt-bindings/pinctrl/k210-fpioa.h.
+          The K210_FPIOA(IO pin, function) macro is provided to
+          facilitate the combination of IO pin numbers and functions.
+
+    required:
+      - pinmux
+
+    additionalProperties: false
+
+  '-pins$':
+    type: object
+    $ref: /schemas/pinctrl/pincfg-node.yaml
+    description:
+      FPIOA client devices use sub-nodes to define the desired
+      configuration of pins. Client device sub-nodes use the
+      properties below.
+
+    properties:
+      pins:
+        description:
+          List of IO pins affected by the properties specified in this
+          subnode. IO pins are identified using the pin names "IO_xx".
+          Pin configuration nodes can also define the power domain to
+          be used for the SoC pin groups A0 (IO pins 0-5),
+          A1 (IO pins 6-11), A2 (IO pins 12-17), B0 (IO pins 18-23),
+          B1 (IO pins 24-29), B2 (IO pins 30-35), B3 (IO pins 30-35),
+          C0 (IO pins 36-41) and C1 (IO pins 42-47) using the
+          power-source property.
+        items:
+          anyOf:
+            - pattern: "^(IO_([0-9]*))|(A[0-2])|(B[3-5])|(C[6-7])$"
+            - enum: [ IO_0, IO_1, IO_2, IO_3, IO_4, IO_5, IO_6, IO_7,
+                      IO_8, IO_9, IO_10, IO_11, IO_12, IO_13, IO_14,
+                      IO_15, IO_16, IO_17, IO_18, IO_19, IO_20, IO_21,
+                      IO_22, IO_23, IO_24, IO_25, IO_26, IO_27, IO_28,
+                      IO_29, IO_30, IO_31, IO_32, IO_33, IO_34, IO_35,
+                      IO_36, IO_37, IO_38, IO_39, IO_40, IO_41, IO_42,
+                      IO_43, IO_44, IO_45, IO_46, IO_47,
+                      A0, A1, A2, B3, B4, B5, C6, C7 ]
+      bias-disable: true
+
+      bias-pull-down: true
+
+      bias-pull-up: true
+
+      drive-strength: true
+
+      drive-strength-microamp: true
+
+      input-enable: true
+
+      input-disable: true
+
+      input-schmitt-enable: true
+
+      input-schmitt-disable: true
+
+      input-polarity-invert:
+        description:
+          Enable or disable pin input polarity inversion.
+
+      output-enable: true
+
+      output-disable: true
+
+      output-high: true
+
+      output-low: true
+
+      output-polarity-invert:
+        description:
+          Enable or disable pin output polarity inversion.
+
+      slew-rate: true
+
+      power-source: true
+
+    additionalProperties: false
+
+required:
+  - compatible
+  - reg
+  - clocks
+  - canaan,k210-sysctl-power
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/pinctrl/k210-fpioa.h>
+    #include <dt-bindings/clock/k210-clk.h>
+    #include <dt-bindings/reset/k210-rst.h>
+
+    fpioa: pinmux@502B0000 {
+      compatible = "canaan,k210-fpioa";
+      reg = <0x502B0000 0x100>;
+      clocks = <&sysclk K210_CLK_FPIOA>,
+               <&sysclk K210_CLK_APB0>;
+      clock-names = "ref", "pclk";
+      resets = <&sysrst K210_RST_FPIOA>;
+      canaan,k210-sysctl-power = <&sysctl 108>;
+      pinctrl-0 = <&jtag_pinctrl>;
+      pinctrl-names = "default";
+
+      jtag_pinctrl: jtag-pinmux {
+        pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+                 <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+                 <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+                 <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+      };
+    };
index 5ac2527..84e6691 100644 (file)
@@ -25,12 +25,15 @@ description:
 properties:
   compatible:
     items:
-      - const: sifive,fu540-c000-pwm
+      - enum:
+          - sifive,fu540-c000-pwm
+          - sifive,fu740-c000-pwm
       - const: sifive,pwm0
     description:
       Should be "sifive,<chip>-pwm" and "sifive,pwm<version>". Supported
-      compatible strings are "sifive,fu540-c000-pwm" for the SiFive PWM v0
-      as integrated onto the SiFive FU540 chip, and "sifive,pwm0" for the
+      compatible strings are "sifive,fu540-c000-pwm" and
+      "sifive,fu740-c000-pwm" for the SiFive PWM v0 as integrated onto the
+      SiFive FU540 and FU740 chip respectively, and "sifive,pwm0" for the
       SiFive PWM v0 IP block with no chip integration tweaks.
       Please refer to sifive-blocks-ip-versioning.txt for details.
 
diff --git a/Documentation/devicetree/bindings/pwm/pwm-zx.txt b/Documentation/devicetree/bindings/pwm/pwm-zx.txt
deleted file mode 100644 (file)
index 3c8fe7a..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-ZTE ZX PWM controller
-
-Required properties:
- - compatible: Should be "zte,zx296718-pwm".
- - reg: Physical base address and length of the controller's registers.
- - clocks : The phandle and specifier referencing the controller's clocks.
- - clock-names: "pclk" for PCLK, "wclk" for WCLK to the PWM controller.  The
-   PCLK is for register access, while WCLK is the reference clock for
-   calculating period and duty cycles.
- - #pwm-cells: Should be 3. See pwm.yaml in this directory for a description of
-   the cells format.
-
-Example:
-
-       pwm: pwm@1439000 {
-               compatible = "zte,zx296718-pwm";
-               reg = <0x1439000 0x1000>;
-               clocks = <&lsp1crm LSP1_PWM_PCLK>,
-                        <&lsp1crm LSP1_PWM_WCLK>;
-               clock-names = "pclk", "wclk";
-               #pwm-cells = <3>;
-       };
index 3ba668b..3f5f787 100644 (file)
@@ -6,10 +6,10 @@ Mediatek SoCs.
 
 Required properties:
 - compatible           Should be "mediatek,mt8183-scp"
-- reg                  Should contain the address ranges for the two memory
-                       regions, SRAM and CFG.
-- reg-names            Contains the corresponding names for the two memory
-                       regions. These should be named "sram" & "cfg".
+- reg                  Should contain the address ranges for memory regions:
+                       SRAM, CFG, and L1TCM.
+- reg-names            Contains the corresponding names for the memory regions:
+                       "sram", "cfg", and "l1tcm".
 - clocks               Clock for co-processor (See: ../clock/clock-bindings.txt)
 - clock-names          Contains the corresponding name for the clock. This
                        should be named "main".
index 5473702..1c330a8 100644 (file)
@@ -25,6 +25,10 @@ on the Qualcomm ADSP Hexagon core.
                    "qcom,sm8250-adsp-pas"
                    "qcom,sm8250-cdsp-pas"
                    "qcom,sm8250-slpi-pas"
+                   "qcom,sm8350-adsp-pas"
+                   "qcom,sm8350-cdsp-pas"
+                   "qcom,sm8350-slpi-pas"
+                   "qcom,sm8350-mpss-pas"
 
 - interrupts-extended:
        Usage: required
@@ -51,10 +55,14 @@ on the Qualcomm ADSP Hexagon core.
        qcom,sm8250-adsp-pas:
        qcom,sm8250-cdsp-pas:
        qcom,sm8250-slpi-pas:
+       qcom,sm8350-adsp-pas:
+       qcom,sm8350-cdsp-pas:
+       qcom,sm8350-slpi-pas:
                    must be "wdog", "fatal", "ready", "handover", "stop-ack"
        qcom,qcs404-wcss-pas:
        qcom,sc7180-mpss-pas:
        qcom,sm8150-mpss-pas:
+       qcom,sm8350-mpss-pas:
                    must be "wdog", "fatal", "ready", "handover", "stop-ack",
                    "shutdown-ack"
 
@@ -114,13 +122,17 @@ on the Qualcomm ADSP Hexagon core.
        qcom,sm8150-adsp-pas:
        qcom,sm8150-cdsp-pas:
        qcom,sm8250-cdsp-pas:
+       qcom,sm8350-cdsp-pas:
                    must be "cx", "load_state"
        qcom,sc7180-mpss-pas:
        qcom,sm8150-mpss-pas:
+       qcom,sm8350-mpss-pas:
                    must be "cx", "load_state", "mss"
        qcom,sm8250-adsp-pas:
+       qcom,sm8350-adsp-pas:
        qcom,sm8150-slpi-pas:
        qcom,sm8250-slpi-pas:
+       qcom,sm8350-slpi-pas:
                    must be "lcx", "lmx", "load_state"
 
 - memory-region:
index cc0b7fc..da09c0d 100644 (file)
@@ -80,6 +80,7 @@ and its resource dependencies. It is described by the following properties:
        Definition: must be one of:
                    "qcom,wcn3620",
                    "qcom,wcn3660",
+                   "qcom,wcn3660b",
                    "qcom,wcn3680"
 
 - clocks:
diff --git a/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml b/Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
new file mode 100644 (file)
index 0000000..53e4ede
--- /dev/null
@@ -0,0 +1,40 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/reset/canaan,k210-rst.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan Kendryte K210 Reset Controller Device Tree Bindings
+
+maintainers:
+  - Damien Le Moal <damien.lemoal@wdc.com>
+
+description: |
+  Canaan Kendryte K210 reset controller driver which supports the SoC
+  system controller supplied reset registers for the various peripherals
+  of the SoC. The K210 reset controller node must be defined as a child
+  node of the K210 system controller node.
+
+  See also:
+  - dt-bindings/reset/k210-rst.h
+
+properties:
+  compatible:
+    const: canaan,k210-rst
+
+  '#reset-cells':
+    const: 1
+
+required:
+  - '#reset-cells'
+  - compatible
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/reset/k210-rst.h>
+    sysrst: reset-controller {
+      compatible = "canaan,k210-rst";
+      #reset-cells = <1>;
+    };
diff --git a/Documentation/devicetree/bindings/riscv/canaan.yaml b/Documentation/devicetree/bindings/riscv/canaan.yaml
new file mode 100644 (file)
index 0000000..f8f3f28
--- /dev/null
@@ -0,0 +1,47 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/riscv/canaan.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Canaan SoC-based boards
+
+maintainers:
+  - Damien Le Moal <damien.lemoal@wdc.com>
+
+description:
+  Canaan Kendryte K210 SoC-based boards
+
+properties:
+  $nodename:
+    const: '/'
+  compatible:
+    oneOf:
+      - items:
+          - const: sipeed,maix-bit
+          - const: sipeed,maix-bitm
+          - const: canaan,kendryte-k210
+
+      - items:
+          - const: sipeed,maix-go
+          - const: canaan,kendryte-k210
+
+      - items:
+          - const: sipeed,maix-dock-m1
+          - const: sipeed,maix-dock-m1w
+          - const: canaan,kendryte-k210
+
+      - items:
+          - const: sipeed,maixduino
+          - const: canaan,kendryte-k210
+
+      - items:
+          - const: canaan,kendryte-kd233
+          - const: canaan,kendryte-k210
+
+      - items:
+          - const: canaan,kendryte-k210
+
+additionalProperties: true
+
+...
index c6925e0..e534f6a 100644 (file)
@@ -28,11 +28,18 @@ properties:
       - items:
           - enum:
               - sifive,rocket0
+              - sifive,bullet0
               - sifive,e5
+              - sifive,e7
               - sifive,e51
+              - sifive,e71
               - sifive,u54-mc
+              - sifive,u74-mc
               - sifive,u54
+              - sifive,u74
               - sifive,u5
+              - sifive,u7
+              - canaan,k210
           - const: riscv
       - const: riscv    # Simulator only
     description:
@@ -50,6 +57,7 @@ properties:
       - riscv,sv32
       - riscv,sv39
       - riscv,sv48
+      - riscv,none
 
   riscv,isa:
     description:
index 2ece863..23b2276 100644 (file)
@@ -27,6 +27,7 @@ select:
       items:
         - enum:
             - sifive,fu540-c000-ccache
+            - sifive,fu740-c000-ccache
 
   required:
     - compatible
@@ -34,7 +35,9 @@ select:
 properties:
   compatible:
     items:
-      - const: sifive,fu540-c000-ccache
+      - enum:
+          - sifive,fu540-c000-ccache
+          - sifive,fu740-c000-ccache
       - const: cache
 
   cache-block-size:
@@ -52,10 +55,13 @@ properties:
   cache-unified: true
 
   interrupts:
-    description: |
-      Must contain entries for DirError, DataError and DataFail signals.
     minItems: 3
-    maxItems: 3
+    maxItems: 4
+    items:
+      - description: DirError interrupt
+      - description: DataError interrupt
+      - description: DataFail interrupt
+      - description: DirFail interrupt
 
   reg:
     maxItems: 1
@@ -68,6 +74,26 @@ properties:
       The reference to the reserved-memory for the L2 Loosely Integrated Memory region.
       The reserved memory node should be defined as per the bindings in reserved-memory.txt.
 
+if:
+  properties:
+    compatible:
+      contains:
+        const: sifive,fu540-c000-ccache
+
+then:
+  properties:
+    interrupts:
+      description: |
+        Must contain entries for DirError, DataError and DataFail signals.
+      maxItems: 3
+
+else:
+  properties:
+    interrupts:
+      description: |
+        Must contain entries for DirError, DataError, DataFail, DirFail signals.
+      minItems: 4
+
 additionalProperties: false
 
 required:
index 3a8647d..ee0a239 100644 (file)
@@ -17,11 +17,18 @@ properties:
   $nodename:
     const: '/'
   compatible:
-    items:
-      - enum:
-          - sifive,hifive-unleashed-a00
-      - const: sifive,fu540-c000
-      - const: sifive,fu540
+    oneOf:
+      - items:
+          - enum:
+              - sifive,hifive-unleashed-a00
+          - const: sifive,fu540-c000
+          - const: sifive,fu540
+
+      - items:
+          - enum:
+              - sifive,hifive-unmatched-a00
+          - const: sifive,fu740-c000
+          - const: sifive,fu740
 
 additionalProperties: true
 
index 3ac5c7f..5fa94da 100644 (file)
@@ -20,6 +20,7 @@ properties:
       - enum:
           - sifive,fu540-c000-uart
           - sifive,fu740-c000-uart
+          - canaan,k210-uarths
       - const: sifive,uart0
 
     description:
index 50449b6..4454aca 100644 (file)
@@ -21,6 +21,10 @@ properties:
       - fsl,vf610-spdif
       - fsl,imx6sx-spdif
       - fsl,imx8qm-spdif
+      - fsl,imx8qxp-spdif
+      - fsl,imx8mq-spdif
+      - fsl,imx8mm-spdif
+      - fsl,imx8mn-spdif
 
   reg:
     maxItems: 1
index 2a0e9cd..a35952f 100644 (file)
@@ -23,15 +23,19 @@ description:
 properties:
   compatible:
     items:
-      - const: sifive,fu540-c000-clint
+      - enum:
+          - sifive,fu540-c000-clint
+          - canaan,k210-clint
       - const: sifive,clint0
 
     description:
-      Should be "sifive,<chip>-clint" and "sifive,clint<version>".
+      Should be "<vendor>,<chip>-clint" and "sifive,clint<version>".
       Supported compatible strings are -
       "sifive,fu540-c000-clint" for the SiFive CLINT v0 as integrated
-      onto the SiFive FU540 chip, and "sifive,clint0" for the SiFive
-      CLINT v0 IP block with no chip integration tweaks.
+      onto the SiFive FU540 chip, "canaan,k210-clint" for the SiFive
+      CLINT v0 as integrated onto the Canaan Kendryte K210 chip, and
+      "sifive,clint0" for the SiFive CLINT v0 IP block with no chip
+      integration tweaks.
       Please refer to sifive-blocks-ip-versioning.txt for details
 
   reg:
index d65faf2..d33c920 100644 (file)
@@ -24,6 +24,9 @@ properties:
   interrupts:
     maxItems: 1
 
+  resets:
+    maxItems: 1
+
   clocks:
     minItems: 1
     items:
diff --git a/Documentation/driver-api/cxl/index.rst b/Documentation/driver-api/cxl/index.rst
new file mode 100644 (file)
index 0000000..036e495
--- /dev/null
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Compute Express Link
+====================
+
+.. toctree::
+   :maxdepth: 1
+
+   memory-devices
+
+.. only::  subproject and html
diff --git a/Documentation/driver-api/cxl/memory-devices.rst b/Documentation/driver-api/cxl/memory-devices.rst
new file mode 100644 (file)
index 0000000..1bad466
--- /dev/null
@@ -0,0 +1,46 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===================================
+Compute Express Link Memory Devices
+===================================
+
+A Compute Express Link Memory Device is a CXL component that implements the
+CXL.mem protocol. It contains some amount of volatile memory, persistent memory,
+or both. It is enumerated as a PCI device for configuration and passing
+messages over an MMIO mailbox. Its contribution to the System Physical
+Address space is handled via HDM (Host Managed Device Memory) decoders
+that optionally define a device's contribution to an interleaved address
+range across multiple devices underneath a host-bridge or interleaved
+across host-bridges.
+
+Driver Infrastructure
+=====================
+
+This section covers the driver infrastructure for a CXL memory device.
+
+CXL Memory Device
+-----------------
+
+.. kernel-doc:: drivers/cxl/mem.c
+   :doc: cxl mem
+
+.. kernel-doc:: drivers/cxl/mem.c
+   :internal:
+
+CXL Bus
+-------
+.. kernel-doc:: drivers/cxl/bus.c
+   :doc: cxl bus
+
+External Interfaces
+===================
+
+CXL IOCTL Interface
+-------------------
+
+.. kernel-doc:: include/uapi/linux/cxl_mem.h
+   :doc: UAPI
+
+.. kernel-doc:: include/uapi/linux/cxl_mem.h
+   :internal:
index 9d9af54..b0ab367 100644 (file)
@@ -35,6 +35,7 @@ available subsections can be seen below.
    usb/index
    firewire
    pci/index
+   cxl/index
    spi
    i2c
    ipmb
@@ -93,7 +94,6 @@ available subsections can be seen below.
    pps
    ptp
    phy/index
-   pti_intel_mid
    pwm
    pldmfw/index
    rfkill
diff --git a/Documentation/driver-api/pti_intel_mid.rst b/Documentation/driver-api/pti_intel_mid.rst
deleted file mode 100644 (file)
index bacc2a4..0000000
+++ /dev/null
@@ -1,108 +0,0 @@
-.. SPDX-License-Identifier: GPL-2.0
-
-=============
-Intel MID PTI
-=============
-
-The Intel MID PTI project is HW implemented in Intel Atom
-system-on-a-chip designs based on the Parallel Trace
-Interface for MIPI P1149.7 cJTAG standard.  The kernel solution
-for this platform involves the following files::
-
-       ./include/linux/pti.h
-       ./drivers/.../n_tracesink.h
-       ./drivers/.../n_tracerouter.c
-       ./drivers/.../n_tracesink.c
-       ./drivers/.../pti.c
-
-pti.c is the driver that enables various debugging features
-popular on platforms from certain mobile manufacturers.
-n_tracerouter.c and n_tracesink.c allow extra system information to
-be collected and routed to the pti driver, such as trace
-debugging data from a modem.  Although n_tracerouter
-and n_tracesink are a part of the complete PTI solution,
-these two line disciplines can work separately from
-pti.c and route any data stream from one /dev/tty node
-to another /dev/tty node via kernel-space.  This provides
-a stable, reliable connection that will not break unless
-the user-space application shuts down (plus avoids
-kernel->user->kernel context switch overheads of routing
-data).
-
-An example debugging usage for this driver system:
-
-  * Hook /dev/ttyPTI0 to syslogd.  Opening this port will also start
-    a console device to further capture debugging messages to PTI.
-  * Hook /dev/ttyPTI1 to modem debugging data to write to PTI HW.
-    This is where n_tracerouter and n_tracesink are used.
-  * Hook /dev/pti to a user-level debugging application for writing
-    to PTI HW.
-  * `Use mipi_` Kernel Driver API in other device drivers for
-    debugging to PTI by first requesting a PTI write address via
-    mipi_request_masterchannel(1).
-
-Below is example pseudo-code on how a 'privileged' application
-can hook up n_tracerouter and n_tracesink to any tty on
-a system.  'Privileged' means the application has enough
-privileges to successfully manipulate the ldisc drivers
-but is not just blindly executing as 'root'. Keep in mind
-the use of ioctl(,TIOCSETD,) is not specific to the n_tracerouter
-and n_tracesink line discpline drivers but is a generic
-operation for a program to use a line discpline driver
-on a tty port other than the default n_tty:
-
-.. code-block:: c
-
-  /////////// To hook up n_tracerouter and n_tracesink /////////
-
-  // Note that n_tracerouter depends on n_tracesink.
-  #include <errno.h>
-  #define ONE_TTY "/dev/ttyOne"
-  #define TWO_TTY "/dev/ttyTwo"
-
-  // needed global to hand onto ldisc connection
-  static int g_fd_source = -1;
-  static int g_fd_sink  = -1;
-
-  // these two vars used to grab LDISC values from loaded ldisc drivers
-  // in OS.  Look at /proc/tty/ldiscs to get the right numbers from
-  // the ldiscs loaded in the system.
-  int source_ldisc_num, sink_ldisc_num = -1;
-  int retval;
-
-  g_fd_source = open(ONE_TTY, O_RDWR); // must be R/W
-  g_fd_sink   = open(TWO_TTY, O_RDWR); // must be R/W
-
-  if (g_fd_source <= 0) || (g_fd_sink <= 0) {
-     // doubt you'll want to use these exact error lines of code
-     printf("Error on open(). errno: %d\n",errno);
-     return errno;
-  }
-
-  retval = ioctl(g_fd_sink, TIOCSETD, &sink_ldisc_num);
-  if (retval < 0) {
-     printf("Error on ioctl().  errno: %d\n", errno);
-     return errno;
-  }
-
-  retval = ioctl(g_fd_source, TIOCSETD, &source_ldisc_num);
-  if (retval < 0) {
-     printf("Error on ioctl().  errno: %d\n", errno);
-     return errno;
-  }
-
-  /////////// To disconnect n_tracerouter and n_tracesink ////////
-
-  // First make sure data through the ldiscs has stopped.
-
-  // Second, disconnect ldiscs.  This provides a
-  // little cleaner shutdown on tty stack.
-  sink_ldisc_num = 0;
-  source_ldisc_num = 0;
-  ioctl(g_fd_uart, TIOCSETD, &sink_ldisc_num);
-  ioctl(g_fd_gadget, TIOCSETD, &source_ldisc_num);
-
-  // Three, program closes connection, and cleanup:
-  close(g_fd_uart);
-  close(g_fd_gadget);
-  g_fd_uart = g_fd_gadget = NULL;
index 3999356..e59b521 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 79409bf..dcbd867 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 9ea60e4..4efcba7 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
index 894d969..0c801d1 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index cd3510e..af34308 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: |  ok  |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
index b2288dc..c244ac7 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 1c49723..7aff505 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 7563a49..b39c1a5 100644 (file)
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: |  ok  |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
index ab0ee1c..7e44013 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index bc45bac..2cb0576 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: |  ok  |
     |     hexagon: |  ok  |
index 2db7680..e9ac415 100644 (file)
@@ -10,8 +10,7 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
-    |        csky: | TODO |
+    |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
index 6225cfe..96156e8 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: | TODO |
index 371f0ac..ee95ed6 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
index 38e9525..612cb97 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
index 7f4a20e..d6ff141 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 3329559..ad4de22 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 43cac6e..8bd5548 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: | TODO |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: | TODO |
     |       sparc: |  ok  |
index d636ed0..2a3fe81 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index dfc93d0..bece895 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 1815c7f..52bdda0 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 940b0bd..a8cd163 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
index 4dd5e55..8c85949 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index b16d4f7..5f4e1b3 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 04c17c2..78f3fe0 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: |  ok  |
@@ -23,7 +22,7 @@
     |    openrisc: | TODO |
     |      parisc: |  ok  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ok  |
     |       sparc: |  ok  |
index e7450fb..5bf3b18 100644 (file)
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
index 98e79d1..d88659b 100644 (file)
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
     |        ia64: | TODO |
     |        m68k: | TODO |
     |  microblaze: | TODO |
-    |        mips: | TODO |
+    |        mips: |  ok  |
     |       nds32: | TODO |
     |       nios2: | TODO |
     |    openrisc: | TODO |
index 47e6903..883d33b 100644 (file)
@@ -33,7 +33,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 964457a..9affb7c 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ..  |
     |         arm: |  ..  |
     |       arm64: |  ok  |
-    |         c6x: |  ..  |
     |        csky: |  ..  |
     |       h8300: |  ..  |
     |     hexagon: |  ..  |
@@ -23,7 +22,7 @@
     |    openrisc: |  ..  |
     |      parisc: |  ..  |
     |     powerpc: |  ok  |
-    |       riscv: | TODO |
+    |       riscv: |  ok  |
     |        s390: |  ok  |
     |          sh: |  ..  |
     |       sparc: | TODO |
index eb3d740..26eec58 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 4d11cbb..8639fe8 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 6863a3f..9a81cb0 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: |  ok  |
     |        csky: |  ok  |
     |       h8300: |  ok  |
     |     hexagon: |  ok  |
index 52aea27..4ed116c 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 6fc03de..bc30c15 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index e51f3af..050de43 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: |  ok  |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index eccda07..99cb6d7 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index c74e3f8..6cde384 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 1c0b95f..e8238cb 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: |  ..  |
     |        csky: |  ..  |
     |       h8300: |  ..  |
     |     hexagon: |  ..  |
index 30f75a7..48a5ca5 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: |  ..  |
     |        csky: | TODO |
     |       h8300: |  ..  |
     |     hexagon: | TODO |
index c5ff3a4..439fd90 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: | TODO |
     |         arm: | TODO |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index b5fb37c..9a0c878 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: | TODO |
     |       arm64: | TODO |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index 13d0e1e..40b969f 100644 (file)
@@ -10,7 +10,6 @@
     |         arc: |  ok  |
     |         arm: |  ok  |
     |       arm64: |  ok  |
-    |         c6x: | TODO |
     |        csky: | TODO |
     |       h8300: | TODO |
     |     hexagon: | TODO |
index c0f2c75..b7dcc86 100644 (file)
@@ -126,9 +126,10 @@ prototypes::
        int (*get)(const struct xattr_handler *handler, struct dentry *dentry,
                   struct inode *inode, const char *name, void *buffer,
                   size_t size);
-       int (*set)(const struct xattr_handler *handler, struct dentry *dentry,
-                  struct inode *inode, const char *name, const void *buffer,
-                  size_t size, int flags);
+       int (*set)(const struct xattr_handler *handler,
+                   struct user_namespace *mnt_userns,
+                   struct dentry *dentry, struct inode *inode, const char *name,
+                   const void *buffer, size_t size, int flags);
 
 locking rules:
        all may block
index 1f8cf8e..0302035 100644 (file)
@@ -717,6 +717,8 @@ be removed.  Switch while you still can; the old one won't stay.
 **mandatory**
 
 ->setxattr() and xattr_handler.set() get dentry and inode passed separately.
+The xattr_handler.set() gets passed the user namespace of the mount the inode
+is seen from so filesystems can idmap the i_uid and i_gid accordingly.
 dentry might be yet to be attached to inode, so do _not_ use its ->d_inode
 in the instances.  Rationale: !@#!@# security_d_instantiate() needs to be
 called before we attach dentry to inode and !@#!@##!@$!$#!@#$!@$!@$ smack
@@ -881,3 +883,10 @@ For bvec based itererators bio_iov_iter_get_pages() now doesn't copy bvecs but
 uses the one provided. Anyone issuing kiocb-I/O should ensure that the bvec and
 page references stay until I/O has completed, i.e. until ->ki_complete() has
 been called or returned with non -EIOCBQUEUED code.
+
+---
+
+**mandatory**
+
+mnt_want_write_file() can now only be paired with mnt_drop_write_file(),
+whereas previously it could be paired with mnt_drop_write() as well.
index 9abdba1..48fbfc3 100644 (file)
@@ -691,6 +691,10 @@ files are there, and which are missing.
                 number of processes currently runnable (running or on ready queue);
                 total number of processes in system;
                 last pid created.
+                All fields are separated by one space except "number of
+                processes currently runnable" and "total number of processes
+                in system", which are separated by a slash ('/'). Example:
+                0.61 0.61 0.55 3/828 22084
  locks        Kernel locks
  meminfo      Memory info
  misc         Miscellaneous
index 5685648..a672608 100644 (file)
@@ -217,6 +217,12 @@ between the calls to start() and stop(), so holding a lock during that time
 is a reasonable thing to do. The seq_file code will also avoid taking any
 other locks while the iterator is active.
 
+The iterater value returned by start() or next() is guaranteed to be
+passed to a subsequent next() or stop() call.  This allows resources
+such as locks that were taken to be reliably released.  There is *no*
+guarantee that the iterator will be passed to show(), though in practice
+it often will be.
+
 
 Formatted output
 ================
index a4d64b1..2049bbf 100644 (file)
@@ -418,28 +418,29 @@ As of kernel 2.6.22, the following members are defined:
 .. code-block:: c
 
        struct inode_operations {
-               int (*create) (struct inode *,struct dentry *, umode_t, bool);
+               int (*create) (struct user_namespace *, struct inode *,struct dentry *, umode_t, bool);
                struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
                int (*link) (struct dentry *,struct inode *,struct dentry *);
                int (*unlink) (struct inode *,struct dentry *);
-               int (*symlink) (struct inode *,struct dentry *,const char *);
-               int (*mkdir) (struct inode *,struct dentry *,umode_t);
+               int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,const char *);
+               int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,umode_t);
                int (*rmdir) (struct inode *,struct dentry *);
-               int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
-               int (*rename) (struct inode *, struct dentry *,
+               int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,umode_t,dev_t);
+               int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
                               struct inode *, struct dentry *, unsigned int);
                int (*readlink) (struct dentry *, char __user *,int);
                const char *(*get_link) (struct dentry *, struct inode *,
                                         struct delayed_call *);
-               int (*permission) (struct inode *, int);
+               int (*permission) (struct user_namespace *, struct inode *, int);
                int (*get_acl)(struct inode *, int);
-               int (*setattr) (struct dentry *, struct iattr *);
-               int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+               int (*setattr) (struct user_namespace *, struct dentry *, struct iattr *);
+               int (*getattr) (struct user_namespace *, const struct path *, struct kstat *, u32, unsigned int);
                ssize_t (*listxattr) (struct dentry *, char *, size_t);
                void (*update_time)(struct inode *, struct timespec *, int);
                int (*atomic_open)(struct inode *, struct dentry *, struct file *,
                                   unsigned open_flag, umode_t create_mode);
-               int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+               int (*tmpfile) (struct user_namespace *, struct inode *, struct dentry *, umode_t);
+               int (*set_acl)(struct user_namespace *, struct inode *, struct posix_acl *, int);
        };
 
 Again, all methods are called without any locks being held, unless
index 0404fe6..c41ac76 100644 (file)
@@ -501,6 +501,34 @@ Developer only needs to provide a sub feature driver with matched feature id.
 FME Partial Reconfiguration Sub Feature driver (see drivers/fpga/dfl-fme-pr.c)
 could be a reference.
 
+Location of DFLs on a PCI Device
+================================
+The original method for finding a DFL on a PCI device assumed the start of the
+first DFL to offset 0 of bar 0.  If the first node of the DFL is an FME,
+then further DFLs in the port(s) are specified in FME header registers.
+Alternatively, a PCIe vendor specific capability structure can be used to
+specify the location of all the DFLs on the device, providing flexibility
+for the type of starting node in the DFL.  Intel has reserved the
+VSEC ID of 0x43 for this purpose.  The vendor specific
+data begins with a 4 byte vendor specific register for the number of DFLs followed 4 byte
+Offset/BIR vendor specific registers for each DFL. Bits 2:0 of Offset/BIR register
+indicates the BAR, and bits 31:3 form the 8 byte aligned offset where bits 2:0 are
+zero.
+::
+
+        +----------------------------+
+        |31     Number of DFLS      0|
+        +----------------------------+
+        |31     Offset     3|2 BIR  0|
+        +----------------------------+
+                      . . .
+        +----------------------------+
+        |31     Offset     3|2 BIR  0|
+        +----------------------------+
+
+Being able to specify more than one DFL per BAR has been considered, but it
+was determined the use case did not provide value.  Specifying a single DFL
+per BAR simplifies the implementation and allows for extra error checking.
 
 Open discussion
 ===============
index 40ccac6..22ce801 100644 (file)
@@ -613,6 +613,27 @@ Some of these date from the very introduction of KMS in 2008 ...
 
 Level: Intermediate
 
+Remove automatic page mapping from dma-buf importing
+----------------------------------------------------
+
+When importing dma-bufs, the dma-buf and PRIME frameworks automatically map
+imported pages into the importer's DMA area. drm_gem_prime_fd_to_handle() and
+drm_gem_prime_handle_to_fd() require that importers call dma_buf_attach()
+even if they never do actual device DMA, but only CPU access through
+dma_buf_vmap(). This is a problem for USB devices, which do not support DMA
+operations.
+
+To fix the issue, automatic page mappings should be removed from the
+buffer-sharing code. Fixing this is a bit more involved, since the import/export
+cache is also tied to &drm_gem_object.import_attach. Meanwhile we paper over
+this problem for USB devices by fishing out the USB host controller device, as
+long as that supports DMA. Otherwise importing can still needlessly fail.
+
+Contact: Thomas Zimmermann <tzimmermann@suse.de>, Daniel Vetter
+
+Level: Advanced
+
+
 Better Testing
 ==============
 
index 1f2fe29..19ae94c 100644 (file)
@@ -3,13 +3,13 @@
 
 AMD Sensor Fusion Hub
 =====================
-AMD Sensor Fusion Hub (SFH) is part of an SOC starting from Ryzen based platforms.
+AMD Sensor Fusion Hub (SFH) is part of an SOC starting from Ryzen-based platforms.
 The solution is working well on several OEM products. AMD SFH uses HID over PCIe bus.
 In terms of architecture it resembles ISH, however the major difference is all
 the HID reports are generated as part of the kernel driver.
 
-1. Block Diagram
-================
+Block Diagram
+-------------
 
 ::
 
@@ -45,20 +45,20 @@ the HID reports are generated as part of the kernel driver.
 AMD HID Transport Layer
 -----------------------
 AMD SFH transport is also implemented as a bus. Each client application executing in the AMD MP2 is
-registered as a device on this bus. Here: MP2 which is an ARM core connected to x86 for processing
+registered as a device on this bus. Here, MP2 is an ARM core connected to x86 for processing
 sensor data. The layer, which binds each device (AMD SFH HID driver) identifies the device type and
-registers with the hid core. Transport layer attach a constant "struct hid_ll_driver" object with
+registers with the HID core. Transport layer attaches a constant "struct hid_ll_driver" object with
 each device. Once a device is registered with HID core, the callbacks provided via this struct are
 used by HID core to communicate with the device. AMD HID Transport layer implements the synchronous calls.
 
 AMD HID Client Layer
 --------------------
-This layer is responsible to implement HID request and descriptors. As firmware is OS agnostic, HID
+This layer is responsible to implement HID requests and descriptors. As firmware is OS agnostic, HID
 client layer fills the HID request structure and descriptors. HID client layer is complex as it is
-interface between MP2 PCIe layer and HID. HID client layer initialized the MP2 PCIe layer and holds
-the instance of MP2 layer. It identifies the number of sensors connected using MP2-PCIe layer. Base
-on that allocates the DRAM address for each and every sensor and pass it to MP2-PCIe driver.On
-enumeration of each the sensor, client layer fills the HID Descriptor structure and HID input repor
+interface between MP2 PCIe layer and HID. HID client layer initializes the MP2 PCIe layer and holds
+the instance of MP2 layer. It identifies the number of sensors connected using MP2-PCIe layer. Based
+on that allocates the DRAM address for each and every sensor and passes it to MP2-PCIe driver. On
+enumeration of each sensor, client layer fills the HID Descriptor structure and HID input report
 structure. HID Feature report structure is optional. The report descriptor structure varies from
 sensor to sensor.
 
@@ -72,7 +72,7 @@ The communication between X86 and MP2 is split into three parts.
 2. Data transfer via DRAM.
 3. Supported sensor info via P2C registers.
 
-Commands are sent to MP2 using C2P Mailbox registers. Writing into C2P Message registers generate
+Commands are sent to MP2 using C2P Mailbox registers. Writing into C2P Message registers generates
 interrupt to MP2. The client layer allocates the physical memory and the same is sent to MP2 via
 the PCI layer. MP2 firmware writes the command output to the access DRAM memory which the client
 layer has allocated. Firmware always writes minimum of 32 bytes into DRAM. So as a protocol driver
index e2f4c4c..767c96b 100644 (file)
@@ -64,7 +64,7 @@ Case2 ReportID_3      TP      Absolute
 
 Command Read/Write
 ------------------
-To read/write to RAM, need to send a commands to the device.
+To read/write to RAM, need to send a command to the device.
 
 The command format is as below.
 
@@ -80,7 +80,7 @@ Byte6 Value Byte
 Byte7  Checksum
 =====  ======================
 
-Command Byte is read=0xD1/write=0xD2 .
+Command Byte is read=0xD1/write=0xD2.
 
 Address is read/write RAM address.
 
index 758972e..c1c9b8d 100644 (file)
@@ -48,12 +48,12 @@ for different sensors. For example an accelerometer can send X,Y and Z data, whe
 an ambient light sensor can send illumination data.
 So the implementation has two parts:
 
-- Core hid driver
+- Core HID driver
 - Individual sensor processing part (sensor drivers)
 
 Core driver
 -----------
-The core driver registers (hid-sensor-hub) registers as a HID driver. It parses
+The core driver (hid-sensor-hub) registers as a HID driver. It parses
 report descriptors and identifies all the sensors present. It adds an MFD device
 with name HID-SENSOR-xxxx (where xxxx is usage id from the specification).
 
@@ -95,14 +95,14 @@ Registration functions::
                        u32 usage_id,
                        struct hid_sensor_hub_callbacks *usage_callback):
 
-Registers callbacks for an usage id. The callback functions are not allowed
+Registers callbacks for a usage id. The callback functions are not allowed
 to sleep::
 
 
   int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev,
                        u32 usage_id):
 
-Removes callbacks for an usage id.
+Removes callbacks for a usage id.
 
 
 Parsing function::
@@ -166,7 +166,7 @@ This allows some differentiating use cases, where vendor can provide application
 Some common use cases are debug other sensors or to provide some events like
 keyboard attached/detached or lid open/close.
 
-To allow application to utilize these sensors, here they are exported uses sysfs
+To allow application to utilize these sensors, here they are exported using sysfs
 attribute groups, attributes and misc device interface.
 
 An example of this representation on sysfs::
@@ -207,9 +207,9 @@ An example of this representation on sysfs::
   │   │   │   ├── input-1-200202-units
   │   │   │   ├── input-1-200202-value
 
-Here there is a custom sensors with four fields, two feature and two inputs.
+Here there is a custom sensor with four fields: two feature and two inputs.
 Each field is represented by a set of attributes. All fields except the "value"
-are read only. The value field is a RW field.
+are read only. The value field is a read-write field.
 
 Example::
 
@@ -237,6 +237,6 @@ These reports are pushed using misc device interface in a FIFO order::
        │   │   │   ├── 10:53 -> ../HID-SENSOR-2000e1.6.auto
        │   ├──  HID-SENSOR-2000e1.6.auto
 
-Each reports can be of variable length preceded by a header. This header
-consist of a 32 bit usage id, 64 bit time stamp and 32 bit length field of raw
+Each report can be of variable length preceded by a header. This header
+consists of a 32-bit usage id, 64-bit time stamp and 32-bit length field of raw
 data.
index 0fe526f..6f1692d 100644 (file)
@@ -12,8 +12,8 @@ Bluetooth, I2C and user-space I/O drivers.
 
 The HID subsystem is designed as a bus. Any I/O subsystem may provide HID
 devices and register them with the HID bus. HID core then loads generic device
-drivers on top of it. The transport drivers are responsible of raw data
-transport and device setup/management. HID core is responsible of
+drivers on top of it. The transport drivers are responsible for raw data
+transport and device setup/management. HID core is responsible for
 report-parsing, report interpretation and the user-space API. Device specifics
 and quirks are handled by all layers depending on the quirk.
 
@@ -67,7 +67,7 @@ Transport drivers attach a constant "struct hid_ll_driver" object with each
 device. Once a device is registered with HID core, the callbacks provided via
 this struct are used by HID core to communicate with the device.
 
-Transport drivers are responsible of detecting device failures and unplugging.
+Transport drivers are responsible for detecting device failures and unplugging.
 HID core will operate a device as long as it is registered regardless of any
 device failures. Once transport drivers detect unplug or failure events, they
 must unregister the device from HID core and HID core will stop using the
@@ -101,7 +101,7 @@ properties in common.
    channel. Any unrequested incoming or outgoing data report must be sent on
    this channel and is never acknowledged by the remote side. Devices usually
    send their input events on this channel. Outgoing events are normally
-   not send via intr, except if high throughput is required.
+   not sent via intr, except if high throughput is required.
  - Control Channel (ctrl): The ctrl channel is used for synchronous requests and
    device management. Unrequested data input events must not be sent on this
    channel and are normally ignored. Instead, devices only send management
@@ -161,7 +161,7 @@ allowed on the intr channel and are the only means of data there.
    payload may be blocked by the underlying transport driver if the
    specification does not allow them.
  - SET_REPORT: A SET_REPORT request has a report ID plus data as payload. It is
-   sent from host to device and a device must update it's current report state
+   sent from host to device and a device must update its current report state
    according to the given data. Any of the 3 report types can be used. However,
    INPUT reports as payload might be blocked by the underlying transport driver
    if the specification does not allow them.
@@ -294,7 +294,7 @@ The available HID callbacks are:
       void (*request) (struct hid_device *hdev, struct hid_report *report,
                       int reqtype)
 
-   Send an HID request on the ctrl channel. "report" contains the report that
+   Send a HID request on the ctrl channel. "report" contains the report that
    should be sent and "reqtype" the request type. Request-type can be
    HID_REQ_SET_REPORT or HID_REQ_GET_REPORT.
 
index 9b28a97..caebc62 100644 (file)
@@ -27,7 +27,7 @@ the following::
                           --> hiddev.c ----> POWER / MONITOR CONTROL
 
 In addition, other subsystems (apart from USB) can potentially feed
-events into the input subsystem, but these have no effect on the hid
+events into the input subsystem, but these have no effect on the HID
 device interface.
 
 Using the HID Device Interface
@@ -73,7 +73,7 @@ The hiddev API uses a read() interface, and a set of ioctl() calls.
 HID devices exchange data with the host computer using data
 bundles called "reports".  Each report is divided into "fields",
 each of which can have one or more "usages".  In the hid-core,
-each one of these usages has a single signed 32 bit value.
+each one of these usages has a single signed 32-bit value.
 
 read():
 -------
@@ -113,7 +113,7 @@ HIDIOCAPPLICATION
   - (none)
 
 This ioctl call returns the HID application usage associated with the
-hid device. The third argument to ioctl() specifies which application
+HID device. The third argument to ioctl() specifies which application
 index to get. This is useful when the device has more than one
 application collection. If the index is invalid (greater or equal to
 the number of application collections this device has) the ioctl
@@ -181,7 +181,7 @@ looked up by type (input, output or feature) and id, so these fields
 must be filled in by the user. The ID can be absolute -- the actual
 report id as reported by the device -- or relative --
 HID_REPORT_ID_FIRST for the first report, and (HID_REPORT_ID_NEXT |
-report_id) for the next report after report_id. Without a-priori
+report_id) for the next report after report_id. Without a priori
 information about report ids, the right way to use this ioctl is to
 use the relative IDs above to enumerate the valid IDs. The ioctl
 returns non-zero when there is no more next ID. The real report ID is
@@ -200,7 +200,7 @@ HIDIOCGUCODE
   - struct hiddev_usage_ref (read/write)
 
 Returns the usage_code in a hiddev_usage_ref structure, given that
-given its report type, report id, field index, and index within the
+its report type, report id, field index, and index within the
 field have already been filled into the structure.
 
 HIDIOCGUSAGE
index f41c1f0..b717ee5 100644 (file)
@@ -21,7 +21,7 @@ Hidraw is the only alternative, short of writing a custom kernel driver, for
 these non-conformant devices.
 
 A benefit of hidraw is that its use by userspace applications is independent
-of the underlying hardware type.  Currently, Hidraw is implemented for USB
+of the underlying hardware type.  Currently, hidraw is implemented for USB
 and Bluetooth.  In the future, as new hardware bus types are developed which
 use the HID specification, hidraw will be expanded to add support for these
 new bus types.
@@ -31,9 +31,10 @@ create hidraw device nodes.  Udev will typically create the device nodes
 directly under /dev (eg: /dev/hidraw0).  As this location is distribution-
 and udev rule-dependent, applications should use libudev to locate hidraw
 devices attached to the system.  There is a tutorial on libudev with a
-working example at:
+working example at::
 
        http://www.signal11.us/oss/udev/
+       https://web.archive.org/web/2019*/www.signal11.us
 
 The HIDRAW API
 ---------------
index d4785cf..f6ce44f 100644 (file)
@@ -4,19 +4,19 @@ Intel Integrated Sensor Hub (ISH)
 
 A sensor hub enables the ability to offload sensor polling and algorithm
 processing to a dedicated low power co-processor. This allows the core
-processor to go into low power modes more often, resulting in the increased
+processor to go into low power modes more often, resulting in increased
 battery life.
 
-There are many vendors providing external sensor hubs confirming to HID
-Sensor usage tables, and used in several tablets, 2 in 1 convertible laptops
-and embedded products. Linux had this support since Linux 3.9.
+There are many vendors providing external sensor hubs conforming to HID
+Sensor usage tables. These may be found in tablets, 2-in-1 convertible laptops
+and embedded products. Linux has had this support since Linux 3.9.
 
 Intel® introduced integrated sensor hubs as a part of the SoC starting from
 Cherry Trail and now supported on multiple generations of CPU packages. There
 are many commercial devices already shipped with Integrated Sensor Hubs (ISH).
-These ISH also comply to HID sensor specification, but the  difference is the
+These ISH also comply to HID sensor specification, but the difference is the
 transport protocol used for communication. The current external sensor hubs
-mainly use HID over i2C or USB. But ISH doesn't use either i2c or USB.
+mainly use HID over I2C or USB. But ISH doesn't use either I2C or USB.
 
 1. Overview
 ===========
@@ -35,7 +35,7 @@ for a very high speed communication::
        -----------------               ----------------------
              PCI                                PCI
        -----------------               ----------------------
-        |Host controller|      -->     |    ISH processor   |
+       |Host controller|       -->     |    ISH processor   |
        -----------------               ----------------------
             USB Link
        -----------------               ----------------------
@@ -50,13 +50,13 @@ applications implemented in the firmware.
 The ISH allows multiple sensor management applications executing in the
 firmware. Like USB endpoints the messaging can be to/from a client. As part of
 enumeration process, these clients are identified. These clients can be simple
-HID sensor applications, sensor calibration application or senor firmware
-update application.
+HID sensor applications, sensor calibration applications or sensor firmware
+update applications.
 
 The implementation model is similar, like USB bus, ISH transport is also
 implemented as a bus. Each client application executing in the ISH processor
 is registered as a device on this bus. The driver, which binds each device
-(ISH HID driver) identifies the device type and registers with the hid core.
+(ISH HID driver) identifies the device type and registers with the HID core.
 
 2. ISH Implementation: Block Diagram
 ====================================
@@ -104,7 +104,7 @@ is registered as a device on this bus. The driver, which binds each device
 
 The ISH is exposed as "Non-VGA unclassified PCI device" to the host. The PCI
 product and vendor IDs are changed from different generations of processors. So
-the source code which enumerate drivers needs to update from generation to
+the source code which enumerates drivers needs to update from generation to
 generation.
 
 3.2 Inter Processor Communication (IPC) driver
@@ -112,41 +112,42 @@ generation.
 
 Location: drivers/hid/intel-ish-hid/ipc
 
-The IPC message used memory mapped I/O. The registers are defined in
+The IPC message uses memory mapped I/O. The registers are defined in
 hw-ish-regs.h.
 
 3.2.1 IPC/FW message types
 ^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-There are two types of messages, one for management of link and other messages
-are to and from transport layers.
+There are two types of messages, one for management of link and another for
+messages to and from transport layers.
 
 TX and RX of Transport messages
 ...............................
 
-A set of memory mapped register offers support of multi byte messages TX and
-RX (E.g.IPC_REG_ISH2HOST_MSG, IPC_REG_HOST2ISH_MSG). The IPC layer maintains
-internal queues to sequence messages and send them in order to the FW.
+A set of memory mapped register offers support of multi-byte messages TX and
+RX (e.g. IPC_REG_ISH2HOST_MSG, IPC_REG_HOST2ISH_MSG). The IPC layer maintains
+internal queues to sequence messages and send them in order to the firmware.
 Optionally the caller can register handler to get notification of completion.
-A door bell mechanism is used in messaging to trigger processing in host and
+A doorbell mechanism is used in messaging to trigger processing in host and
 client firmware side. When ISH interrupt handler is called, the ISH2HOST
 doorbell register is used by host drivers to determine that the interrupt
 is for ISH.
 
 Each side has 32 32-bit message registers and a 32-bit doorbell. Doorbell
-register has the following format:
-Bits 0..6: fragment length (7 bits are used)
-Bits 10..13: encapsulated protocol
-Bits 16..19: management command (for IPC management protocol)
-Bit 31: doorbell trigger (signal H/W interrupt to the other side)
-Other bits are reserved, should be 0.
+register has the following format::
+
+  Bits 0..6: fragment length (7 bits are used)
+  Bits 10..13: encapsulated protocol
+  Bits 16..19: management command (for IPC management protocol)
+  Bit 31: doorbell trigger (signal H/W interrupt to the other side)
+  Other bits are reserved, should be 0.
 
 3.2.2 Transport layer interface
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-To abstract HW level IPC communication, a set of callbacks are registered.
+To abstract HW level IPC communication, a set of callbacks is registered.
 The transport layer uses them to send and receive messages.
-Refer to  struct ishtp_hw_ops for callbacks.
+Refer to struct ishtp_hw_ops for callbacks.
 
 3.3 ISH Transport layer
 -----------------------
@@ -158,7 +159,7 @@ Location: drivers/hid/intel-ish-hid/ishtp/
 
 The transport layer is a bi-directional protocol, which defines:
 - Set of commands to start, stop, connect, disconnect and flow control
-(ishtp/hbm.h) for details
+(see ishtp/hbm.h for details)
 - A flow control mechanism to avoid buffer overflows
 
 This protocol resembles bus messages described in the following document:
@@ -168,14 +169,14 @@ specifications/dcmi-hi-1-0-spec.pdf "Chapter 7: Bus Message Layer"
 3.3.2 Connection and Flow Control Mechanism
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
-Each FW client and a protocol is identified by an UUID. In order to communicate
+Each FW client and a protocol is identified by a UUID. In order to communicate
 to a FW client, a connection must be established using connect request and
 response bus messages. If successful, a pair (host_client_id and fw_client_id)
 will identify the connection.
 
 Once connection is established, peers send each other flow control bus messages
 independently. Every peer may send a message only if it has received a
-flow-control credit before. Once it sent a message, it may not send another one
+flow-control credit before. Once it has sent a message, it may not send another one
 before receiving the next flow control credit.
 Either side can send disconnect request bus message to end communication. Also
 the link will be dropped if major FW reset occurs.
@@ -209,7 +210,7 @@ and DMA_XFER_ACK act as ownership indicators.
 At initial state all outgoing memory belongs to the sender (TX to host, RX to
 FW), DMA_XFER transfers ownership on the region that contains ISHTP message to
 the receiving side, DMA_XFER_ACK returns ownership to the sender. A sender
-needs not wait for previous DMA_XFER to be ack'ed, and may send another message
+need not wait for previous DMA_XFER to be ack'ed, and may send another message
 as long as remaining continuous memory in its ownership is enough.
 In principle, multiple DMA_XFER and DMA_XFER_ACK messages may be sent at once
 (up to IPC MTU), thus allowing for interrupt throttling.
@@ -219,8 +220,8 @@ fragments and via IPC otherwise.
 3.3.4 Ring Buffers
 ^^^^^^^^^^^^^^^^^^
 
-When a client initiate a connection, a ring or RX and TX buffers are allocated.
-The size of ring can be specified by the client. HID client set 16 and 32 for
+When a client initiates a connection, a ring of RX and TX buffers is allocated.
+The size of ring can be specified by the client. HID client sets 16 and 32 for
 TX and RX buffers respectively. On send request from client, the data to be
 sent is copied to one of the send ring buffer and scheduled to be sent using
 bus message protocol. These buffers are required because the FW may have not
@@ -230,10 +231,10 @@ to send. Same thing holds true on receive side and flow control is required.
 3.3.5 Host Enumeration
 ^^^^^^^^^^^^^^^^^^^^^^
 
-The host enumeration bus command allow discovery of clients present in the FW.
+The host enumeration bus command allows discovery of clients present in the FW.
 There can be multiple sensor clients and clients for calibration function.
 
-To ease in implantation and allow independent driver handle each client
+To ease implementation and allow independent drivers to handle each client,
 this transport layer takes advantage of Linux Bus driver model. Each
 client is registered as device on the transport bus (ishtp bus).
 
@@ -270,7 +271,7 @@ The ISHTP client driver is responsible for:
 The functionality in these drivers is the same as an external sensor hub.
 Refer to
 Documentation/hid/hid-sensor.rst for HID sensor
-Documentation/ABI/testing/sysfs-bus-iio for IIO ABIs to user space
+Documentation/ABI/testing/sysfs-bus-iio for IIO ABIs to user space.
 
 3.6 End to End HID transport Sequence Diagram
 ---------------------------------------------
@@ -341,9 +342,10 @@ Documentation/ABI/testing/sysfs-bus-iio for IIO ABIs to user space
 3.7 ISH Debugging
 -----------------
 
-To debug ISH, event tracing mechanism is used. To enable debug logs
-echo 1 > /sys/kernel/debug/tracing/events/intel_ish/enable
-cat sys/kernel/debug/tracing/trace
+To debug ISH, event tracing mechanism is used. To enable debug logs::
+
+  echo 1 > /sys/kernel/debug/tracing/events/intel_ish/enable
+  cat sys/kernel/debug/tracing/trace
 
 3.8 ISH IIO sysfs Example on Lenovo thinkpad Yoga 260
 -----------------------------------------------------
index b18cb96..2243a6b 100644 (file)
@@ -3,7 +3,7 @@ UHID - User-space I/O driver support for HID subsystem
 ======================================================
 
 UHID allows user-space to implement HID transport drivers. Please see
-hid-transport.txt for an introduction into HID transport drivers. This document
+hid-transport.rst for an introduction into HID transport drivers. This document
 relies heavily on the definitions declared there.
 
 With UHID, a user-space transport driver can create kernel hid-devices for each
@@ -15,7 +15,7 @@ There is an example user-space application in ./samples/uhid/uhid-example.c
 The UHID API
 ------------
 
-UHID is accessed through a character misc-device. The minor-number is allocated
+UHID is accessed through a character misc-device. The minor number is allocated
 dynamically so you need to rely on udev (or similar) to create the device node.
 This is /dev/uhid by default.
 
@@ -45,23 +45,23 @@ The "type" field defines the payload. For each type, there is a
 payload-structure available in the union "u" (except for empty payloads). This
 payload contains management and/or device data.
 
-The first thing you should do is sending an UHID_CREATE2 event. This will
-register the device. UHID will respond with an UHID_START event. You can now
+The first thing you should do is send a UHID_CREATE2 event. This will
+register the device. UHID will respond with a UHID_START event. You can now
 start sending data to and reading data from UHID. However, unless UHID sends the
 UHID_OPEN event, the internally attached HID Device Driver has no user attached.
 That is, you might put your device asleep unless you receive the UHID_OPEN
 event. If you receive the UHID_OPEN event, you should start I/O. If the last
-user closes the HID device, you will receive an UHID_CLOSE event. This may be
-followed by an UHID_OPEN event again and so on. There is no need to perform
+user closes the HID device, you will receive a UHID_CLOSE event. This may be
+followed by a UHID_OPEN event again and so on. There is no need to perform
 reference-counting in user-space. That is, you will never receive multiple
-UHID_OPEN events without an UHID_CLOSE event. The HID subsystem performs
+UHID_OPEN events without a UHID_CLOSE event. The HID subsystem performs
 ref-counting for you.
 You may decide to ignore UHID_OPEN/UHID_CLOSE, though. I/O is allowed even
 though the device may have no users.
 
 If you want to send data on the interrupt channel to the HID subsystem, you send
-an HID_INPUT2 event with your raw data payload. If the kernel wants to send data
-on the interrupt channel to the device, you will read an UHID_OUTPUT event.
+a HID_INPUT2 event with your raw data payload. If the kernel wants to send data
+on the interrupt channel to the device, you will read a UHID_OUTPUT event.
 Data requests on the control channel are currently limited to GET_REPORT and
 SET_REPORT (no other data reports on the control channel are defined so far).
 Those requests are always synchronous. That means, the kernel sends
@@ -71,7 +71,7 @@ the response via UHID_GET_REPORT_REPLY and UHID_SET_REPORT_REPLY to the kernel.
 The kernel blocks internal driver-execution during such round-trips (times out
 after a hard-coded period).
 
-If your device disconnects, you should send an UHID_DESTROY event. This will
+If your device disconnects, you should send a UHID_DESTROY event. This will
 unregister the device. You can now send UHID_CREATE2 again to register a new
 device.
 If you close() the fd, the device is automatically unregistered and destroyed
@@ -125,7 +125,7 @@ UHID_START:
   This is sent when the HID device is started. Consider this as an answer to
   UHID_CREATE2. This is always the first event that is sent. Note that this
   event might not be available immediately after write(UHID_CREATE2) returns.
-  Device drivers might required delayed setups.
+  Device drivers might require delayed setups.
   This event contains a payload of type uhid_start_req. The "dev_flags" field
   describes special behaviors of a device. The following flags are defined:
 
@@ -149,7 +149,7 @@ UHID_STOP:
   reloaded/changed the device driver loaded on your HID device (or some other
   maintenance actions happened).
 
-  You can usually ignored any UHID_STOP events safely.
+  You can usually ignore any UHID_STOP events safely.
 
 UHID_OPEN:
   This is sent when the HID device is opened. That is, the data that the HID
@@ -166,17 +166,17 @@ UHID_OUTPUT:
   This is sent if the HID device driver wants to send raw data to the I/O
   device on the interrupt channel. You should read the payload and forward it to
   the device. The payload is of type "struct uhid_output_req".
-  This may be received even though you haven't received UHID_OPEN, yet.
+  This may be received even though you haven't received UHID_OPEN yet.
 
 UHID_GET_REPORT:
   This event is sent if the kernel driver wants to perform a GET_REPORT request
-  on the control channeld as described in the HID specs. The report-type and
+  on the control channel as described in the HID specs. The report-type and
   report-number are available in the payload.
   The kernel serializes GET_REPORT requests so there will never be two in
   parallel. However, if you fail to respond with a UHID_GET_REPORT_REPLY, the
   request might silently time out.
-  Once you read a GET_REPORT request, you shall forward it to the hid device and
-  remember the "id" field in the payload. Once your hid device responds to the
+  Once you read a GET_REPORT request, you shall forward it to the HID device and
+  remember the "id" field in the payload. Once your HID device responds to the
   GET_REPORT (or if it fails), you must send a UHID_GET_REPORT_REPLY to the
   kernel with the exact same "id" as in the request. If the request already
   timed out, the kernel will ignore the response silently. The "id" field is
@@ -184,7 +184,7 @@ UHID_GET_REPORT:
 
 UHID_SET_REPORT:
   This is the SET_REPORT equivalent of UHID_GET_REPORT. On receipt, you shall
-  send a SET_REPORT request to your hid device. Once it replies, you must tell
+  send a SET_REPORT request to your HID device. Once it replies, you must tell
   the kernel about it via UHID_SET_REPORT_REPLY.
   The same restrictions as for UHID_GET_REPORT apply.
 
index de669c2..db3af0b 100644 (file)
@@ -12,7 +12,7 @@ This document describes the Linux kernel Makefiles.
           --- 3.1 Goal definitions
           --- 3.2 Built-in object goals - obj-y
           --- 3.3 Loadable module goals - obj-m
-          --- 3.4 Objects which export symbols
+          --- 3.4 <deleted>
           --- 3.5 Library file goals - lib-y
           --- 3.6 Descending down in directories
           --- 3.7 Non-builtin vmlinux targets - extra-y
@@ -247,12 +247,6 @@ more details, with real examples.
        kbuild will build an ext2.o file for you out of the individual
        parts and then link this into built-in.a, as you would expect.
 
-3.4 Objects which export symbols
---------------------------------
-
-       No special notation is required in the makefiles for
-       modules exporting symbols.
-
 3.5 Library file goals - lib-y
 ------------------------------
 
index 5f690f0..62f2aab 100644 (file)
@@ -1988,7 +1988,7 @@ netif_carrier.
 If use_carrier is 0, then the MII monitor will first query the
 device's (via ioctl) MII registers and check the link state.  If that
 request fails (not just that it returns carrier down), then the MII
-monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain
+monitor will make an ethtool ETHTOOL_GLINK request to attempt to obtain
 the same information.  If both methods fail (i.e., the driver either
 does not support or had some error in processing both the MII register
 and ethtool requests), then the MII monitor will assume the link is
index 3561a8a..f8c6469 100644 (file)
@@ -267,7 +267,7 @@ DATA PATH
 Tx
 --
 
-end_start_xmit() is called by the stack. This function does the following:
+ena_start_xmit() is called by the stack. This function does the following:
 
 - Maps data buffers (skb->data and frags).
 - Populates ena_buf for the push buffer (if the driver and device are
index 468fe10..af37f25 100644 (file)
@@ -52,7 +52,7 @@ purposes as a standard complementary tool. The system's view from
 ``devlink-dpipe`` should change according to the changes done by the
 standard configuration tools.
 
-For example, it’s quiet common to  implement Access Control Lists (ACL)
+For example, it’s quite common to  implement Access Control Lists (ACL)
 using Ternary Content Addressable Memory (TCAM). The TCAM memory can be
 divided into TCAM regions. Complex TC filters can have multiple rules with
 different priorities and different lookup keys. On the other hand hardware
index e99b415..ab790e7 100644 (file)
@@ -151,7 +151,7 @@ representor netdevice.
 -------------
 A subfunction devlink port is created but it is not active yet. That means the
 entities are created on devlink side, the e-switch port representor is created,
-but the subfunction device itself it not created. A user might use e-switch port
+but the subfunction device itself is not created. A user might use e-switch port
 representor to do settings, putting it into bridge, adding TC rules, etc. A user
 might as well configure the hardware address (such as MAC address) of the
 subfunction while subfunction is inactive.
@@ -173,7 +173,7 @@ Terms and Definitions
    * - Term
      - Definitions
    * - ``PCI device``
-     - A physical PCI device having one or more PCI bus consists of one or
+     - A physical PCI device having one or more PCI buses consists of one or
        more PCI controllers.
    * - ``PCI controller``
      -  A controller consists of potentially multiple physical functions,
index a64c01b..91b2cf7 100644 (file)
@@ -142,73 +142,13 @@ Please send incremental versions on top of what has been merged in order to fix
 the patches the way they would look like if your latest patch series was to be
 merged.
 
-How can I tell what patches are queued up for backporting to the various stable releases?
------------------------------------------------------------------------------------------
-Normally Greg Kroah-Hartman collects stable commits himself, but for
-networking, Dave collects up patches he deems critical for the
-networking subsystem, and then hands them off to Greg.
-
-There is a patchworks queue that you can see here:
-
-  https://patchwork.kernel.org/bundle/netdev/stable/?state=*
-
-It contains the patches which Dave has selected, but not yet handed off
-to Greg.  If Greg already has the patch, then it will be here:
-
-  https://git.kernel.org/pub/scm/linux/kernel/git/stable/stable-queue.git
-
-A quick way to find whether the patch is in this stable-queue is to
-simply clone the repo, and then git grep the mainline commit ID, e.g.
-::
-
-  stable-queue$ git grep -l 284041ef21fdf2e
-  releases/3.0.84/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-  releases/3.4.51/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-  releases/3.9.8/ipv6-fix-possible-crashes-in-ip6_cork_release.patch
-  stable/stable-queue$
-
-I see a network patch and I think it should be backported to stable. Should I request it via stable@vger.kernel.org like the references in the kernel's Documentation/process/stable-kernel-rules.rst file say?
----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-No, not for networking.  Check the stable queues as per above first
-to see if it is already queued.  If not, then send a mail to netdev,
-listing the upstream commit ID and why you think it should be a stable
-candidate.
-
-Before you jump to go do the above, do note that the normal stable rules
-in :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
-still apply.  So you need to explicitly indicate why it is a critical
-fix and exactly what users are impacted.  In addition, you need to
-convince yourself that you *really* think it has been overlooked,
-vs. having been considered and rejected.
-
-Generally speaking, the longer it has had a chance to "soak" in
-mainline, the better the odds that it is an OK candidate for stable.  So
-scrambling to request a commit be added the day after it appears should
-be avoided.
-
-I have created a network patch and I think it should be backported to stable. Should I add a Cc: stable@vger.kernel.org like the references in the kernel's Documentation/ directory say?
------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-No.  See above answer.  In short, if you think it really belongs in
-stable, then ensure you write a decent commit log that describes who
-gets impacted by the bug fix and how it manifests itself, and when the
-bug was introduced.  If you do that properly, then the commit will get
-handled appropriately and most likely get put in the patchworks stable
-queue if it really warrants it.
-
-If you think there is some valid information relating to it being in
-stable that does *not* belong in the commit log, then use the three dash
-marker line as described in
-:ref:`Documentation/process/submitting-patches.rst <the_canonical_patch_format>`
-to temporarily embed that information into the patch that you send.
-
-Are all networking bug fixes backported to all stable releases?
+Are there special rules regarding stable submissions on netdev?
 ---------------------------------------------------------------
-Due to capacity, Dave could only take care of the backports for the
-last two stable releases. For earlier stable releases, each stable
-branch maintainer is supposed to take care of them. If you find any
-patch is missing from an earlier stable branch, please notify
-stable@vger.kernel.org with either a commit ID or a formal patch
-backported, and CC Dave and other relevant networking developers.
+While it used to be the case that netdev submissions were not supposed
+to carry explicit ``CC: stable@vger.kernel.org`` tags that is no longer
+the case today. Please follow the standard stable rules in
+:ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`,
+and make sure you include appropriate Fixes tags!
 
 Is the comment style convention different for the networking content?
 ---------------------------------------------------------------------
index da1073a..01391df 100644 (file)
@@ -50,7 +50,7 @@ Callbacks to implement
 
 The NIC driver offering ipsec offload will need to implement these
 callbacks to make the offload available to the network stack's
-XFRM subsytem.  Additionally, the feature bits NETIF_F_HW_ESP and
+XFRM subsystem.  Additionally, the feature bits NETIF_F_HW_ESP and
 NETIF_F_HW_ESP_TX_CSUM will signal the availability of the offload.
 
 
index cf9b285..dabee37 100644 (file)
@@ -46,25 +46,38 @@ stack frame LR and CR save fields are not used.
 
 Register preservation rules
 ---------------------------
-Register preservation rules match the ELF ABI calling sequence with the
-following differences:
-
-+------------------------------------------------------------------------+
-|        For the sc instruction, differences with the ELF ABI           |
-+--------------+--------------+------------------------------------------+
-| r0           | Volatile     | (System call number.)                   |
-| rr3          | Volatile     | (Parameter 1, and return value.)        |
-| rr4-r8       | Volatile     | (Parameters 2-6.)                       |
-| rcr0         | Volatile     | (cr0.SO is the return error condition.)         |
-| rcr1, cr5-7  | Nonvolatile  |                                                 |
-| rlr          | Nonvolatile  |                                                 |
-+--------------+--------------+------------------------------------------+
-|      For the scv 0 instruction, differences with the ELF ABI          |
-+--------------+--------------+------------------------------------------+
-| r0           | Volatile     | (System call number.)                   |
-| r3           | Volatile     | (Parameter 1, and return value.)        |
-| r4-r8        | Volatile     | (Parameters 2-6.)                       |
-+--------------+--------------+------------------------------------------+
+Register preservation rules match the ELF ABI calling sequence with some
+differences.
+
+For the sc instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register     | Preservation Rules | Purpose                                 |
++==============+====================+=========================================+
+| r0           | Volatile           | (System call number.)                   |
++--------------+--------------------+-----------------------------------------+
+| r3           | Volatile           | (Parameter 1, and return value.)        |
++--------------+--------------------+-----------------------------------------+
+| r4-r8        | Volatile           | (Parameters 2-6.)                       |
++--------------+--------------------+-----------------------------------------+
+| cr0          | Volatile           | (cr0.SO is the return error condition.) |
++--------------+--------------------+-----------------------------------------+
+| cr1, cr5-7   | Nonvolatile        |                                         |
++--------------+--------------------+-----------------------------------------+
+| lr           | Nonvolatile        |                                         |
++--------------+--------------------+-----------------------------------------+
+
+For the scv 0 instruction, the differences from the ELF ABI are as follows:
+
++--------------+--------------------+-----------------------------------------+
+| Register     | Preservation Rules | Purpose                                 |
++==============+====================+=========================================+
+| r0           | Volatile           | (System call number.)                   |
++--------------+--------------------+-----------------------------------------+
+| r3           | Volatile           | (Parameter 1, and return value.)        |
++--------------+--------------------+-----------------------------------------+
+| r4-r8        | Volatile           | (Parameters 2-6.)                       |
++--------------+--------------------+-----------------------------------------+
 
 All floating point and vector data registers as well as control and status
 registers are nonvolatile.
index 0825dc4..1f0d81f 100644 (file)
@@ -242,7 +242,7 @@ and try to avoid "fixes" which make the warning go away without addressing
 its cause.
 
 Note that not all compiler warnings are enabled by default.  Build the
-kernel with "make EXTRA_CFLAGS=-W" to get the full set.
+kernel with "make KCFLAGS=-W" to get the full set.
 
 The kernel provides several configuration options which turn on debugging
 features; most of these are found in the "kernel hacking" submenu.  Several
index 3973556..003c865 100644 (file)
@@ -35,12 +35,6 @@ Rules on what kind of patches are accepted, and which ones are not, into the
 Procedure for submitting patches to the -stable tree
 ----------------------------------------------------
 
- - If the patch covers files in net/ or drivers/net please follow netdev stable
-   submission guidelines as described in
-   :ref:`Documentation/networking/netdev-FAQ.rst <netdev-FAQ>`
-   after first checking the stable networking queue at
-   https://patchwork.kernel.org/bundle/netdev/stable/?state=*
-   to ensure the requested patch is not already queued up.
  - Security patches should not be handled (solely) by the -stable review
    process but should follow the procedures in
    :ref:`Documentation/admin-guide/security-bugs.rst <securitybugs>`.
index f709bea..b1bc2d3 100644 (file)
@@ -96,7 +96,7 @@ and elsewhere regarding submitting Linux kernel patches.
     injection might be appropriate.
 
 20) Newly-added code has been compiled with ``gcc -W`` (use
-    ``make EXTRA_CFLAGS=-W``).  This will generate lots of noise, but is good
+    ``make KCFLAGS=-W``).  This will generate lots of noise, but is good
     for finding bugs like "warning: comparison between signed and unsigned".
 
 21) Tested after it has been merged into the -mm patchset to make sure
index 8c991c8..91de63b 100644 (file)
@@ -250,11 +250,6 @@ should also read
 :ref:`Documentation/process/stable-kernel-rules.rst <stable_kernel_rules>`
 in addition to this file.
 
-Note, however, that some subsystem maintainers want to come to their own
-conclusions on which patches should go to the stable trees.  The networking
-maintainer, in particular, would rather not see individual developers
-adding lines like the above to their patches.
-
 If changes affect userland-kernel interfaces, please send the MAN-PAGES
 maintainer (as listed in the MAINTAINERS file) a man-pages patch, or at
 least a notification of the change, so that some information makes its way
index aa00816..b3ed5c5 100644 (file)
@@ -1040,8 +1040,8 @@ The keyctl syscall functions are:
 
      "key" is the ID of the key to be watched.
 
-     "queue_fd" is a file descriptor referring to an open "/dev/watch_queue"
-     which manages the buffer into which notifications will be delivered.
+     "queue_fd" is a file descriptor referring to an open pipe which
+     manages the buffer into which notifications will be delivered.
 
      "filter" is either NULL to remove a watch or a filter specification to
      indicate what events are required from the key.
index 0b73acb..169749e 100644 (file)
@@ -512,6 +512,38 @@ The --itrace option controls the type and frequency of synthesized events
 Note that only 64-bit programs are currently supported - further work is
 required to support instruction decode of 32-bit Arm programs.
 
+2.2) Tracing PID
+
+The kernel can be built to write the PID value into the PE ContextID registers.
+For a kernel running at EL1, the PID is stored in CONTEXTIDR_EL1.  A PE may
+implement Arm Virtualization Host Extensions (VHE), which the kernel can
+run at EL2 as a virtualisation host; in this case, the PID value is stored in
+CONTEXTIDR_EL2.
+
+perf provides PMU formats that program the ETM to insert these values into the
+trace data; the PMU formats are defined as below:
+
+  "contextid1": Available on both EL1 kernel and EL2 kernel.  When the
+                kernel is running at EL1, "contextid1" enables the PID
+                tracing; when the kernel is running at EL2, this enables
+                tracing the PID of guest applications.
+
+  "contextid2": Only usable when the kernel is running at EL2.  When
+                selected, enables PID tracing on EL2 kernel.
+
+  "contextid":  Will be an alias for the option that enables PID
+                tracing.  I.e,
+                contextid == contextid1, on EL1 kernel.
+                contextid == contextid2, on EL2 kernel.
+
+perf will always enable PID tracing at the relevant EL, this is accomplished by
+automatically enable the "contextid" config - but for EL2 it is possible to make
+specific adjustments using configs "contextid1" and "contextid2", E.g. if a user
+wants to trace PIDs for both host and guest, the two configs "contextid1" and
+"contextid2" can be set at the same time:
+
+  perf record -e cs_etm/contextid1,contextid2/u -- vm
+
 
 Generating coverage files for Feedback Directed Optimization: AutoFDO
 ---------------------------------------------------------------------
index a5e36aa..8012fe9 100644 (file)
@@ -256,7 +256,7 @@ e cercate di evitare le "riparazioni" che fan sparire l'avvertimento senza
 però averne trovato la causa.
 
 Tenete a mente che non tutti gli avvertimenti sono disabilitati di default.
-Costruite il kernel con "make EXTRA_CFLAGS=-W" per ottenerli tutti.
+Costruite il kernel con "make KCFLAGS=-W" per ottenerli tutti.
 
 Il kernel fornisce differenti opzioni che abilitano funzionalità di debugging;
 molti di queste sono trovano all'interno del sotto menu "kernel hacking".
index 3e57550..614fc17 100644 (file)
@@ -104,7 +104,7 @@ sottomissione delle patch, in particolare
     l'iniezione di fallimenti specifici per il sottosistema.
 
 22) Il nuovo codice è stato compilato con ``gcc -W`` (usate
-    ``make EXTRA_CFLAGS=-W``).  Questo genererà molti avvisi, ma è ottimo
+    ``make KCFLAGS=-W``).  Questo genererà molti avvisi, ma è ottimo
     per scovare bachi come  "warning: comparison between signed and unsigned".
 
 23) La patch è stata verificata dopo essere stata inclusa nella serie di patch
index 959a06b..66cd8ee 100644 (file)
@@ -165,7 +165,7 @@ Linus对这个问题给出了最佳答案:
 通常,这些警告都指向真正的问题。提交以供审阅的代码通常不会产生任何编译器警告。
 在消除警告时,注意了解真正的原因,并尽量避免“修复”,使警告消失而不解决其原因。
 
-请注意,并非所有编译器警告都默认启用。使用“make EXTRA_CFLAGS=-W”构建内核以
+请注意,并非所有编译器警告都默认启用。使用“make KCFLAGS=-W”构建内核以
 获得完整集合。
 
 内核提供了几个配置选项,可以打开调试功能;大多数配置选项位于“kernel hacking”
index d02ba2f..599bd44 100644 (file)
@@ -179,6 +179,7 @@ Code  Seq#    Include File                                           Comments
 'R'   00-1F  linux/random.h                                          conflict!
 'R'   01     linux/rfkill.h                                          conflict!
 'R'   C0-DF  net/bluetooth/rfcomm.h
+'R'   E0     uapi/linux/fsl_mc.h
 'S'   all    linux/cdrom.h                                           conflict!
 'S'   80-81  scsi/scsi_ioctl.h                                       conflict!
 'S'   82-FF  scsi/scsi.h                                             conflict!
@@ -318,6 +319,7 @@ Code  Seq#    Include File                                           Comments
 0xA0  all    linux/sdp/sdp.h                                         Industrial Device Project
                                                                      <mailto:kenji@bitgate.com>
 0xA1  0      linux/vtpm_proxy.h                                      TPM Emulator Proxy Driver
+0xA2  all    uapi/linux/acrn.h                                       ACRN hypervisor
 0xA3  80-8F                                                          Port ACL  in development:
                                                                      <mailto:tlewis@mindspring.com>
 0xA3  90-9F  linux/dtlk.h
@@ -353,6 +355,7 @@ Code  Seq#    Include File                                           Comments
                                                                      <mailto:michael.klein@puffin.lb.shuttle.de>
 0xCC  00-0F  drivers/misc/ibmvmc.h                                   pseries VMC driver
 0xCD  01     linux/reiserfs_fs.h
+0xCE  01-02  uapi/linux/cxl_mem.h                                    Compute Express Link Memory Devices
 0xCF  02     fs/cifs/ioctl.c
 0xDB  00-0F  drivers/char/mwave/mwavepub.h
 0xDD  00-3F                                                          ZFCP device driver see drivers/s390/scsi/
diff --git a/Documentation/virt/acrn/cpuid.rst b/Documentation/virt/acrn/cpuid.rst
new file mode 100644 (file)
index 0000000..65fa4b9
--- /dev/null
@@ -0,0 +1,46 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+ACRN CPUID bits
+===============
+
+A guest VM running on an ACRN hypervisor can check some of its features using
+CPUID.
+
+ACRN cpuid functions are:
+
+function: 0x40000000
+
+returns::
+
+   eax = 0x40000010
+   ebx = 0x4e524341
+   ecx = 0x4e524341
+   edx = 0x4e524341
+
+Note that this value in ebx, ecx and edx corresponds to the string
+"ACRNACRNACRN". The value in eax corresponds to the maximum cpuid function
+present in this leaf, and will be updated if more functions are added in the
+future.
+
+function: define ACRN_CPUID_FEATURES (0x40000001)
+
+returns::
+
+          ebx, ecx, edx
+          eax = an OR'ed group of (1 << flag)
+
+where ``flag`` is defined as below:
+
+================================= =========== ================================
+flag                              value       meaning
+================================= =========== ================================
+ACRN_FEATURE_PRIVILEGED_VM        0           guest VM is a privileged VM
+================================= =========== ================================
+
+function: 0x40000010
+
+returns::
+
+          ebx, ecx, edx
+          eax = (Virtual) TSC frequency in kHz.
diff --git a/Documentation/virt/acrn/index.rst b/Documentation/virt/acrn/index.rst
new file mode 100644 (file)
index 0000000..b5f793e
--- /dev/null
@@ -0,0 +1,12 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+ACRN Hypervisor
+===============
+
+.. toctree::
+   :maxdepth: 1
+
+   introduction
+   io-request
+   cpuid
diff --git a/Documentation/virt/acrn/introduction.rst b/Documentation/virt/acrn/introduction.rst
new file mode 100644 (file)
index 0000000..f8d081b
--- /dev/null
@@ -0,0 +1,43 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+ACRN Hypervisor Introduction
+============================
+
+The ACRN Hypervisor is a Type 1 hypervisor, running directly on bare-metal
+hardware. It has a privileged management VM, called Service VM, to manage User
+VMs and do I/O emulation.
+
+ACRN userspace is an application running in the Service VM that emulates
+devices for a User VM based on command line configurations. ACRN Hypervisor
+Service Module (HSM) is a kernel module in the Service VM which provides
+hypervisor services to the ACRN userspace.
+
+Below figure shows the architecture.
+
+::
+
+                Service VM                    User VM
+      +----------------------------+  |  +------------------+
+      |        +--------------+    |  |  |                  |
+      |        |ACRN userspace|    |  |  |                  |
+      |        +--------------+    |  |  |                  |
+      |-----------------ioctl------|  |  |                  |   ...
+      |kernel space   +----------+ |  |  |                  |
+      |               |   HSM    | |  |  | Drivers          |
+      |               +----------+ |  |  |                  |
+      +--------------------|-------+  |  +------------------+
+  +---------------------hypercall----------------------------------------+
+  |                         ACRN Hypervisor                              |
+  +----------------------------------------------------------------------+
+  |                          Hardware                                    |
+  +----------------------------------------------------------------------+
+
+ACRN userspace allocates memory for the User VM, configures and initializes the
+devices used by the User VM, loads the virtual bootloader, initializes the
+virtual CPU state and handles I/O request accesses from the User VM. It uses
+ioctls to communicate with the HSM. HSM implements hypervisor services by
+interacting with the ACRN Hypervisor via hypercalls. HSM exports a char device
+interface (/dev/acrn_hsm) to userspace.
+
+The ACRN hypervisor is open for contribution from anyone. The source repo is
+available at https://github.com/projectacrn/acrn-hypervisor.
diff --git a/Documentation/virt/acrn/io-request.rst b/Documentation/virt/acrn/io-request.rst
new file mode 100644 (file)
index 0000000..6cc3ea0
--- /dev/null
@@ -0,0 +1,97 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+I/O request handling
+====================
+
+An I/O request of a User VM, which is constructed by the hypervisor, is
+distributed by the ACRN Hypervisor Service Module to an I/O client
+corresponding to the address range of the I/O request. Details of I/O request
+handling are described in the following sections.
+
+1. I/O request
+--------------
+
+For each User VM, there is a shared 4-KByte memory region used for I/O requests
+communication between the hypervisor and Service VM. An I/O request is a
+256-byte structure buffer, which is 'struct acrn_io_request', that is filled by
+an I/O handler of the hypervisor when a trapped I/O access happens in a User
+VM. ACRN userspace in the Service VM first allocates a 4-KByte page and passes
+the GPA (Guest Physical Address) of the buffer to the hypervisor. The buffer is
+used as an array of 16 I/O request slots with each I/O request slot being 256
+bytes. This array is indexed by vCPU ID.
+
+2. I/O clients
+--------------
+
+An I/O client is responsible for handling User VM I/O requests whose accessed
+GPA falls in a certain range. Multiple I/O clients can be associated with each
+User VM. There is a special client associated with each User VM, called the
+default client, that handles all I/O requests that do not fit into the range of
+any other clients. The ACRN userspace acts as the default client for each User
+VM.
+
+Below illustration shows the relationship between I/O requests shared buffer,
+I/O requests and I/O clients.
+
+::
+
+     +------------------------------------------------------+
+     |                                       Service VM     |
+     |+--------------------------------------------------+  |
+     ||      +----------------------------------------+  |  |
+     ||      | shared page            ACRN userspace  |  |  |
+     ||      |    +-----------------+  +------------+ |  |  |
+     ||   +----+->| acrn_io_request |<-+  default   | |  |  |
+     ||   |  | |  +-----------------+  | I/O client | |  |  |
+     ||   |  | |  |       ...       |  +------------+ |  |  |
+     ||   |  | |  +-----------------+                 |  |  |
+     ||   |  +-|--------------------------------------+  |  |
+     ||---|----|-----------------------------------------|  |
+     ||   |    |                             kernel      |  |
+     ||   |    |            +----------------------+     |  |
+     ||   |    |            | +-------------+  HSM |     |  |
+     ||   |    +--------------+             |      |     |  |
+     ||   |                 | | I/O clients |      |     |  |
+     ||   |                 | |             |      |     |  |
+     ||   |                 | +-------------+      |     |  |
+     ||   |                 +----------------------+     |  |
+     |+---|----------------------------------------------+  |
+     +----|-------------------------------------------------+
+          |
+     +----|-------------------------------------------------+
+     |  +-+-----------+                                     |
+     |  | I/O handler |              ACRN Hypervisor        |
+     |  +-------------+                                     |
+     +------------------------------------------------------+
+
+3. I/O request state transition
+-------------------------------
+
+The state transitions of an ACRN I/O request are as follows.
+
+::
+
+   FREE -> PENDING -> PROCESSING -> COMPLETE -> FREE -> ...
+
+- FREE: this I/O request slot is empty
+- PENDING: a valid I/O request is pending in this slot
+- PROCESSING: the I/O request is being processed
+- COMPLETE: the I/O request has been processed
+
+An I/O request in COMPLETE or FREE state is owned by the hypervisor. HSM and
+ACRN userspace are in charge of processing the others.
+
+4. Processing flow of I/O requests
+----------------------------------
+
+a. The I/O handler of the hypervisor will fill an I/O request with PENDING
+   state when a trapped I/O access happens in a User VM.
+b. The hypervisor makes an upcall, which is a notification interrupt, to
+   the Service VM.
+c. The upcall handler schedules a worker to dispatch I/O requests.
+d. The worker looks for the PENDING I/O requests, assigns them to different
+   registered clients based on the address of the I/O accesses, updates
+   their state to PROCESSING, and notifies the corresponding client to handle.
+e. The notified client handles the assigned I/O requests.
+f. The HSM updates I/O requests states to COMPLETE and notifies the hypervisor
+   of the completion via hypercalls.
index 350f5c8..edea7fe 100644 (file)
@@ -12,6 +12,7 @@ Linux Virtualization Support
    paravirt_ops
    guest-halt-polling
    ne_overview
+   acrn/index
 
 .. only:: html and subproject
 
index 45fd862..307f2fc 100644 (file)
@@ -182,6 +182,9 @@ is dependent on the CPU capability and the kernel configuration. The limit can
 be retrieved using KVM_CAP_ARM_VM_IPA_SIZE of the KVM_CHECK_EXTENSION
 ioctl() at run-time.
 
+Creation of the VM will fail if the requested IPA size (whether it is
+implicit or explicit) is unsupported on the host.
+
 Please note that configuring the IPA size does not affect the capability
 exposed by the guest CPUs in ID_AA64MMFR0_EL1[PARange]. It only affects
 size of the address translated by the stage2 level (guest physical to
@@ -1492,7 +1495,8 @@ Fails if any VCPU has already been created.
 
 Define which vcpu is the Bootstrap Processor (BSP).  Values are the same
 as the vcpu id in KVM_CREATE_VCPU.  If this ioctl is not called, the default
-is vcpu 0.
+is vcpu 0. This ioctl has to be called before vcpu creation,
+otherwise it will return EBUSY error.
 
 
 4.42 KVM_GET_XSAVE
@@ -3856,49 +3860,20 @@ base 2 of the page size in the bottom 6 bits.
          -EFAULT if struct kvm_reinject_control cannot be read,
         -EINVAL if the supplied shift or flags are invalid,
         -ENOMEM if unable to allocate the new HPT,
-        -ENOSPC if there was a hash collision
-
-::
-
-  struct kvm_ppc_rmmu_info {
-       struct kvm_ppc_radix_geom {
-               __u8    page_shift;
-               __u8    level_bits[4];
-               __u8    pad[3];
-       }       geometries[8];
-       __u32   ap_encodings[8];
-  };
-
-The geometries[] field gives up to 8 supported geometries for the
-radix page table, in terms of the log base 2 of the smallest page
-size, and the number of bits indexed at each level of the tree, from
-the PTE level up to the PGD level in that order.  Any unused entries
-will have 0 in the page_shift field.
-
-The ap_encodings gives the supported page sizes and their AP field
-encodings, encoded with the AP value in the top 3 bits and the log
-base 2 of the page size in the bottom 6 bits.
-
-4.102 KVM_PPC_RESIZE_HPT_PREPARE
---------------------------------
-
-:Capability: KVM_CAP_SPAPR_RESIZE_HPT
-:Architectures: powerpc
-:Type: vm ioctl
-:Parameters: struct kvm_ppc_resize_hpt (in)
-:Returns: 0 on successful completion,
-        >0 if a new HPT is being prepared, the value is an estimated
-         number of milliseconds until preparation is complete,
-         -EFAULT if struct kvm_reinject_control cannot be read,
-        -EINVAL if the supplied shift or flags are invalid,when moving existing
-         HPT entries to the new HPT,
-        -EIO on other error conditions
 
 Used to implement the PAPR extension for runtime resizing of a guest's
 Hashed Page Table (HPT).  Specifically this starts, stops or monitors
 the preparation of a new potential HPT for the guest, essentially
 implementing the H_RESIZE_HPT_PREPARE hypercall.
 
+::
+
+  struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+  };
+
 If called with shift > 0 when there is no pending HPT for the guest,
 this begins preparation of a new pending HPT of size 2^(shift) bytes.
 It then returns a positive integer with the estimated number of
@@ -3926,14 +3901,6 @@ Normally this will be called repeatedly with the same parameters until
 it returns <= 0.  The first call will initiate preparation, subsequent
 ones will monitor preparation until it completes or fails.
 
-::
-
-  struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-  };
-
 4.103 KVM_PPC_RESIZE_HPT_COMMIT
 -------------------------------
 
@@ -3956,6 +3923,14 @@ Hashed Page Table (HPT).  Specifically this requests that the guest be
 transferred to working with the new HPT, essentially implementing the
 H_RESIZE_HPT_COMMIT hypercall.
 
+::
+
+  struct kvm_ppc_resize_hpt {
+       __u64 flags;
+       __u32 shift;
+       __u32 pad;
+  };
+
 This should only be called after KVM_PPC_RESIZE_HPT_PREPARE has
 returned 0 with the same parameters.  In other cases
 KVM_PPC_RESIZE_HPT_COMMIT will return an error (usually -ENXIO or
@@ -3971,14 +3946,6 @@ HPT and the previous HPT will be discarded.
 
 On failure, the guest will still be operating on its previous HPT.
 
-::
-
-  struct kvm_ppc_resize_hpt {
-       __u64 flags;
-       __u32 shift;
-       __u32 pad;
-  };
-
 4.104 KVM_X86_GET_MCE_CAP_SUPPORTED
 -----------------------------------
 
@@ -4519,6 +4486,7 @@ KVM_GET_SUPPORTED_CPUID ioctl because some of them intersect with KVM feature
 leaves (0x40000000, 0x40000001).
 
 Currently, the following list of CPUID leaves are returned:
+
  - HYPERV_CPUID_VENDOR_AND_MAX_FUNCTIONS
  - HYPERV_CPUID_INTERFACE
  - HYPERV_CPUID_VERSION
@@ -4543,6 +4511,7 @@ userspace should not expect to get any particular value there.
 Note, vcpu version of KVM_GET_SUPPORTED_HV_CPUID is currently deprecated. Unlike
 system ioctl which exposes all supported feature bits unconditionally, vcpu
 version has the following quirks:
+
 - HYPERV_CPUID_NESTED_FEATURES leaf and HV_X64_ENLIGHTENED_VMCS_RECOMMENDED
   feature bit are only exposed when Enlightened VMCS was previously enabled
   on the corresponding vCPU (KVM_CAP_HYPERV_ENLIGHTENED_VMCS).
@@ -4838,8 +4807,10 @@ If an MSR access is not permitted through the filtering, it generates a
 allows user space to deflect and potentially handle various MSR accesses
 into user space.
 
-If a vCPU is in running state while this ioctl is invoked, the vCPU may
-experience inconsistent filtering behavior on MSR accesses.
+Note, invoking this ioctl with a vCPU is running is inherently racy.  However,
+KVM does guarantee that vCPUs will see either the previous filter or the new
+filter, e.g. MSRs with identical settings in both the old and new filter will
+have deterministic behavior.
 
 4.127 KVM_XEN_HVM_SET_ATTR
 --------------------------
@@ -4913,6 +4884,14 @@ see KVM_XEN_HVM_SET_ATTR above.
        union {
                __u64 gpa;
                __u64 pad[4];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
        } u;
   };
 
@@ -4925,6 +4904,31 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
   Sets the guest physical address of an additional pvclock structure
   for a given vCPU. This is typically used for guest vsyscall support.
 
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR
+  Sets the guest physical address of the vcpu_runstate_info for a given
+  vCPU. This is how a Xen guest tracks CPU state such as steal time.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT
+  Sets the runstate (RUNSTATE_running/_runnable/_blocked/_offline) of
+  the given vCPU from the .u.runstate.state member of the structure.
+  KVM automatically accounts running and runnable time but blocked
+  and offline states are only entered explicitly.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA
+  Sets all fields of the vCPU runstate data from the .u.runstate member
+  of the structure, including the current runstate. The state_entry_time
+  must equal the sum of the other four times.
+
+KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST
+  This *adds* the contents of the .u.runstate members of the structure
+  to the corresponding members of the given vCPU's runstate data, thus
+  permitting atomic adjustments to the runstate times. The adjustment
+  to the state_entry_time must equal the sum of the adjustments to the
+  other four times. The state field must be set to -1, or to a valid
+  runstate value (RUNSTATE_running, RUNSTATE_runnable, RUNSTATE_blocked
+  or RUNSTATE_offline) to set the current accounted state as of the
+  adjusted state_entry_time.
+
 4.130 KVM_XEN_VCPU_GET_ATTR
 ---------------------------
 
@@ -4937,6 +4941,9 @@ KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO
 Allows Xen vCPU attributes to be read. For the structure and types,
 see KVM_XEN_VCPU_SET_ATTR above.
 
+The KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST type may not be used
+with the KVM_XEN_VCPU_GET_ATTR ioctl.
+
 5. The kvm_run structure
 ========================
 
@@ -4998,7 +5005,8 @@ local APIC is not used.
        __u16 flags;
 
 More architecture-specific flags detailing state of the VCPU that may
-affect the device's behavior. Current defined flags:
+affect the device's behavior. Current defined flags::
+
   /* x86, set if the VCPU is in system management mode */
   #define KVM_RUN_X86_SMM     (1 << 0)
   /* x86, set if bus lock detected in VM */
@@ -6215,7 +6223,7 @@ the bus lock vm exit can be preempted by a higher priority VM exit, the exit
 notifications to userspace can be KVM_EXIT_BUS_LOCK or other reasons.
 KVM_RUN_BUS_LOCK flag is used to distinguish between them.
 
-7.22 KVM_CAP_PPC_DAWR1
+7.23 KVM_CAP_PPC_DAWR1
 ----------------------
 
 :Architectures: ppc
@@ -6700,6 +6708,7 @@ PVHVM guests. Valid flags are::
   #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR     (1 << 0)
   #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL   (1 << 1)
   #define KVM_XEN_HVM_CONFIG_SHARED_INFO       (1 << 2)
+  #define KVM_XEN_HVM_CONFIG_RUNSTATE          (1 << 2)
 
 The KVM_XEN_HVM_CONFIG_HYPERCALL_MSR flag indicates that the KVM_XEN_HVM_CONFIG
 ioctl is available, for the guest to set its hypercall page.
@@ -6714,3 +6723,7 @@ KVM_XEN_HVM_SET_ATTR, KVM_XEN_HVM_GET_ATTR, KVM_XEN_VCPU_SET_ATTR and
 KVM_XEN_VCPU_GET_ATTR ioctls, as well as the delivery of exception vectors
 for event channel upcalls when the evtchn_upcall_pending field of a vcpu's
 vcpu_info is set.
+
+The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
+features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
+supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
index f3591ee..552567d 100644 (file)
@@ -50,7 +50,7 @@ PTE Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | pte_mkwrite               | Creates a writable PTE                           |
 +---------------------------+--------------------------------------------------+
-| pte_mkwrprotect           | Creates a write protected PTE                    |
+| pte_wrprotect             | Creates a write protected PTE                    |
 +---------------------------+--------------------------------------------------+
 | pte_mkspecial             | Creates a special PTE                            |
 +---------------------------+--------------------------------------------------+
@@ -120,7 +120,7 @@ PMD Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | pmd_mkwrite               | Creates a writable PMD                           |
 +---------------------------+--------------------------------------------------+
-| pmd_mkwrprotect           | Creates a write protected PMD                    |
+| pmd_wrprotect             | Creates a write protected PMD                    |
 +---------------------------+--------------------------------------------------+
 | pmd_mkspecial             | Creates a special PMD                            |
 +---------------------------+--------------------------------------------------+
@@ -186,7 +186,7 @@ PUD Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | pud_mkwrite               | Creates a writable PUD                           |
 +---------------------------+--------------------------------------------------+
-| pud_mkwrprotect           | Creates a write protected PUD                    |
+| pud_wrprotect             | Creates a write protected PUD                    |
 +---------------------------+--------------------------------------------------+
 | pud_mkdevmap              | Creates a ZONE_DEVICE mapped PUD                 |
 +---------------------------+--------------------------------------------------+
@@ -224,7 +224,7 @@ HugeTLB Page Table Helpers
 +---------------------------+--------------------------------------------------+
 | huge_pte_mkwrite          | Creates a writable HugeTLB                       |
 +---------------------------+--------------------------------------------------+
-| huge_pte_mkwrprotect      | Creates a write protected HugeTLB                |
+| huge_pte_wrprotect        | Creates a write protected HugeTLB                |
 +---------------------------+--------------------------------------------------+
 | huge_ptep_get_and_clear   | Clears a HugeTLB                                 |
 +---------------------------+--------------------------------------------------+
index a9efff1..d5dfb71 100644 (file)
@@ -261,6 +261,8 @@ ABI/API
 L:     linux-api@vger.kernel.org
 F:     include/linux/syscalls.h
 F:     kernel/sys_ni.c
+X:     include/uapi/
+X:     arch/*/include/uapi/
 
 ABIT UGURU 1,2 HARDWARE MONITOR DRIVER
 M:     Hans de Goede <hdegoede@redhat.com>
@@ -436,6 +438,15 @@ S: Orphan
 F:     drivers/platform/x86/wmi.c
 F:     include/uapi/linux/wmi.h
 
+ACRN HYPERVISOR SERVICE MODULE
+M:     Shuo Liu <shuo.a.liu@intel.com>
+L:     acrn-dev@lists.projectacrn.org (subscribers-only)
+S:     Supported
+W:     https://projectacrn.org
+F:     Documentation/virt/acrn/
+F:     drivers/virt/acrn/
+F:     include/uapi/linux/acrn.h
+
 AD1889 ALSA SOUND DRIVER
 L:     linux-parisc@vger.kernel.org
 S:     Maintained
@@ -1017,7 +1028,7 @@ F:        Documentation/devicetree/bindings/mux/adi,adgs1408.txt
 F:     drivers/mux/adgs1408.c
 
 ANALOG DEVICES INC ADIN DRIVER
-M:     Alexandru Ardelean <alexaundru.ardelean@analog.com>
+M:     Michael Hennerich <michael.hennerich@analog.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 W:     http://ez.analog.com/community/linux-device-drivers
@@ -1025,7 +1036,7 @@ F:        Documentation/devicetree/bindings/net/adi,adin.yaml
 F:     drivers/net/phy/adin.c
 
 ANALOG DEVICES INC ADIS DRIVER LIBRARY
-M:     Alexandru Ardelean <alexandru.ardelean@analog.com>
+M:     Nuno Sa <nuno.sa@analog.com>
 L:     linux-iio@vger.kernel.org
 S:     Supported
 F:     drivers/iio/imu/adis.c
@@ -1170,7 +1181,7 @@ M:        Joel Fernandes <joel@joelfernandes.org>
 M:     Christian Brauner <christian@brauner.io>
 M:     Hridya Valsaraju <hridya@google.com>
 M:     Suren Baghdasaryan <surenb@google.com>
-L:     devel@driverdev.osuosl.org
+L:     linux-kernel@vger.kernel.org
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
 F:     drivers/android/
@@ -2478,7 +2489,7 @@ N:        sc27xx
 N:     sc2731
 
 ARM/STI ARCHITECTURE
-M:     Patrice Chotard <patrice.chotard@st.com>
+M:     Patrice Chotard <patrice.chotard@foss.st.com>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
 W:     http://www.stlinux.com
@@ -2511,7 +2522,7 @@ F:        include/linux/remoteproc/st_slim_rproc.h
 
 ARM/STM32 ARCHITECTURE
 M:     Maxime Coquelin <mcoquelin.stm32@gmail.com>
-M:     Alexandre Torgue <alexandre.torgue@st.com>
+M:     Alexandre Torgue <alexandre.torgue@foss.st.com>
 L:     linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers)
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 S:     Maintained
@@ -2569,7 +2580,7 @@ L:        linux-kernel@vger.kernel.org
 S:     Maintained
 F:     drivers/clk/keystone/
 
-ARM/TEXAS INSTRUMENT KEYSTONE ClOCKSOURCE
+ARM/TEXAS INSTRUMENT KEYSTONE CLOCKSOURCE
 M:     Santosh Shilimkar <ssantosh@kernel.org>
 L:     linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
 L:     linux-kernel@vger.kernel.org
@@ -2828,9 +2839,7 @@ S:        Odd fixes
 W:     http://sourceforge.net/projects/xscaleiop
 F:     Documentation/crypto/async-tx-api.rst
 F:     crypto/async_tx/
-F:     drivers/dma/
 F:     include/linux/async_tx.h
-F:     include/linux/dmaengine.h
 
 AT24 EEPROM DRIVER
 M:     Bartosz Golaszewski <bgolaszewski@baylibre.com>
@@ -2975,7 +2984,7 @@ F:        include/uapi/linux/audit.h
 F:     kernel/audit*
 
 AUXILIARY DISPLAY DRIVERS
-M:     Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M:     Miguel Ojeda <ojeda@kernel.org>
 S:     Maintained
 F:     drivers/auxdisplay/
 F:     include/linux/cfag12864b.h
@@ -3106,7 +3115,7 @@ C:        irc://irc.oftc.net/bcache
 F:     drivers/md/bcache/
 
 BDISP ST MEDIA DRIVER
-M:     Fabien Dessenne <fabien.dessenne@st.com>
+M:     Fabien Dessenne <fabien.dessenne@foss.st.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
 W:     https://linuxtv.org
@@ -3666,7 +3675,7 @@ M:        bcm-kernel-feedback-list@broadcom.com
 L:     linux-pm@vger.kernel.org
 S:     Maintained
 T:     git git://github.com/broadcom/stblinux.git
-F:     drivers/soc/bcm/bcm-pmb.c
+F:     drivers/soc/bcm/bcm63xx/bcm-pmb.c
 F:     include/dt-bindings/soc/bcm-pmb.h
 
 BROADCOM SPECIFIC AMBA DRIVER (BCMA)
@@ -3733,6 +3742,13 @@ L:       netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/broadcom/tg3.*
 
+BROADCOM VK DRIVER
+M:     Scott Branden <scott.branden@broadcom.com>
+L:     bcm-kernel-feedback-list@broadcom.com
+S:     Supported
+F:     drivers/misc/bcm-vk/
+F:     include/uapi/linux/misc/bcm_vk.h
+
 BROCADE BFA FC SCSI DRIVER
 M:     Anil Gurumurthy <anil.gurumurthy@qlogic.com>
 M:     Sudarsana Kalluru <sudarsana.kalluru@qlogic.com>
@@ -3839,6 +3855,29 @@ W:       https://github.com/Cascoda/ca8210-linux.git
 F:     Documentation/devicetree/bindings/net/ieee802154/ca8210.txt
 F:     drivers/net/ieee802154/ca8210.c
 
+CANAAN/KENDRYTE K210 SOC FPIOA DRIVER
+M:     Damien Le Moal <damien.lemoal@wdc.com>
+L:     linux-riscv@lists.infradead.org
+L:     linux-gpio@vger.kernel.org (pinctrl driver)
+F:     Documentation/devicetree/bindings/pinctrl/canaan,k210-fpioa.yaml
+F:     drivers/pinctrl/pinctrl-k210.c
+
+CANAAN/KENDRYTE K210 SOC RESET CONTROLLER DRIVER
+M:     Damien Le Moal <damien.lemoal@wdc.com>
+L:     linux-kernel@vger.kernel.org
+L:     linux-riscv@lists.infradead.org
+S:     Maintained
+F:     Documentation/devicetree/bindings/reset/canaan,k210-rst.yaml
+F:     drivers/reset/reset-k210.c
+
+CANAAN/KENDRYTE K210 SOC SYSTEM CONTROLLER DRIVER
+M:     Damien Le Moal <damien.lemoal@wdc.com>
+L:     linux-riscv@lists.infradead.org
+S:     Maintained
+F:      Documentation/devicetree/bindings/mfd/canaan,k210-sysctl.yaml
+F:     drivers/soc/canaan/
+F:     include/soc/canaan/
+
 CACHEFILES: FS-CACHE BACKEND FOR CACHING ON MOUNTED FILESYSTEMS
 M:     David Howells <dhowells@redhat.com>
 L:     linux-cachefs@redhat.com (moderated for non-subscribers)
@@ -4114,13 +4153,13 @@ F:      scripts/extract-cert.c
 F:     scripts/sign-file.c
 
 CFAG12864B LCD DRIVER
-M:     Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M:     Miguel Ojeda <ojeda@kernel.org>
 S:     Maintained
 F:     drivers/auxdisplay/cfag12864b.c
 F:     include/linux/cfag12864b.h
 
 CFAG12864BFB LCD FRAMEBUFFER DRIVER
-M:     Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M:     Miguel Ojeda <ojeda@kernel.org>
 S:     Maintained
 F:     drivers/auxdisplay/cfag12864bfb.c
 F:     include/linux/cfag12864b.h
@@ -4290,7 +4329,7 @@ S:        Supported
 F:     drivers/infiniband/hw/usnic/
 
 CLANG-FORMAT FILE
-M:     Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+M:     Miguel Ojeda <ojeda@kernel.org>
 S:     Maintained
 F:     .clang-format
 
@@ -4305,8 +4344,6 @@ C:        irc://chat.freenode.net/clangbuiltlinux
 F:     Documentation/kbuild/llvm.rst
 F:     include/linux/compiler-clang.h
 F:     scripts/clang-tools/
-F:     scripts/clang-version.sh
-F:     scripts/lld-version.sh
 K:     \b(?i:clang|llvm)\b
 
 CLEANCACHE API
@@ -4432,10 +4469,21 @@ S:      Maintained
 F:     drivers/platform/x86/compal-laptop.c
 
 COMPILER ATTRIBUTES
-M:     Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
+M:     Miguel Ojeda <ojeda@kernel.org>
 S:     Maintained
 F:     include/linux/compiler_attributes.h
 
+COMPUTE EXPRESS LINK (CXL)
+M:     Alison Schofield <alison.schofield@intel.com>
+M:     Vishal Verma <vishal.l.verma@intel.com>
+M:     Ira Weiny <ira.weiny@intel.com>
+M:     Ben Widawsky <ben.widawsky@intel.com>
+M:     Dan Williams <dan.j.williams@intel.com>
+L:     linux-cxl@vger.kernel.org
+S:     Maintained
+F:     drivers/cxl/
+F:     include/uapi/linux/cxl_mem.h
+
 CONEXANT ACCESSRUNNER USB DRIVER
 L:     accessrunner-general@lists.sourceforge.net
 S:     Orphan
@@ -5032,7 +5080,7 @@ S:        Maintained
 F:     drivers/platform/x86/dell/dell-wmi.c
 
 DELTA ST MEDIA DRIVER
-M:     Hugues Fruchet <hugues.fruchet@st.com>
+M:     Hugues Fruchet <hugues.fruchet@foss.st.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
 W:     https://linuxtv.org
@@ -5271,6 +5319,7 @@ T:        git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/dmaengine.git
 F:     Documentation/devicetree/bindings/dma/
 F:     Documentation/driver-api/dmaengine/
 F:     drivers/dma/
+F:     include/linux/dma/
 F:     include/linux/dmaengine.h
 F:     include/linux/of_dma.h
 
@@ -5786,7 +5835,7 @@ M:        David Airlie <airlied@linux.ie>
 M:     Daniel Vetter <daniel@ffwll.ch>
 L:     dri-devel@lists.freedesktop.org
 S:     Maintained
-B:     https://bugs.freedesktop.org/
+B:     https://gitlab.freedesktop.org/drm
 C:     irc://chat.freenode.net/dri-devel
 T:     git git://anongit.freedesktop.org/drm/drm
 F:     Documentation/devicetree/bindings/display/
@@ -5957,7 +6006,6 @@ F:        drivers/gpu/drm/rockchip/
 
 DRM DRIVERS FOR STI
 M:     Benjamin Gaignard <benjamin.gaignard@linaro.org>
-M:     Vincent Abriou <vincent.abriou@st.com>
 L:     dri-devel@lists.freedesktop.org
 S:     Maintained
 T:     git git://anongit.freedesktop.org/drm/drm-misc
@@ -5965,10 +6013,9 @@ F:       Documentation/devicetree/bindings/display/st,stih4xx.txt
 F:     drivers/gpu/drm/sti
 
 DRM DRIVERS FOR STM
-M:     Yannick Fertre <yannick.fertre@st.com>
-M:     Philippe Cornu <philippe.cornu@st.com>
+M:     Yannick Fertre <yannick.fertre@foss.st.com>
+M:     Philippe Cornu <philippe.cornu@foss.st.com>
 M:     Benjamin Gaignard <benjamin.gaignard@linaro.org>
-M:     Vincent Abriou <vincent.abriou@st.com>
 L:     dri-devel@lists.freedesktop.org
 S:     Maintained
 T:     git git://anongit.freedesktop.org/drm/drm-misc
@@ -6940,9 +6987,10 @@ M:       Wu Hao <hao.wu@intel.com>
 R:     Tom Rix <trix@redhat.com>
 L:     linux-fpga@vger.kernel.org
 S:     Maintained
-F:     Documentation/ABI/testing/sysfs-bus-dfl
+F:     Documentation/ABI/testing/sysfs-bus-dfl*
 F:     Documentation/fpga/dfl.rst
 F:     drivers/fpga/dfl*
+F:     include/linux/dfl.h
 F:     include/uapi/linux/fpga-dfl.h
 
 FPGA MANAGER FRAMEWORK
@@ -7426,8 +7474,9 @@ F:        include/uapi/asm-generic/
 GENERIC PHY FRAMEWORK
 M:     Kishon Vijay Abraham I <kishon@ti.com>
 M:     Vinod Koul <vkoul@kernel.org>
-L:     linux-kernel@vger.kernel.org
+L:     linux-phy@lists.infradead.org
 S:     Supported
+Q:     https://patchwork.kernel.org/project/linux-phy/list/
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git
 F:     Documentation/devicetree/bindings/phy/
 F:     drivers/phy/
@@ -7908,6 +7957,12 @@ F:       drivers/hid/
 F:     include/linux/hid*
 F:     include/uapi/linux/hid*
 
+HID PLAYSTATION DRIVER
+M:     Roderick Colenbrander <roderick.colenbrander@sony.com>
+L:     linux-input@vger.kernel.org
+S:     Supported
+F:     drivers/hid/hid-playstation.c
+
 HID SENSOR HUB DRIVERS
 M:     Jiri Kosina <jikos@kernel.org>
 M:     Jonathan Cameron <jic23@kernel.org>
@@ -8060,7 +8115,6 @@ F:        drivers/crypto/hisilicon/sec2/sec_main.c
 
 HISILICON STAGING DRIVERS FOR HIKEY 960/970
 M:     Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
-L:     devel@driverdev.osuosl.org
 S:     Maintained
 F:     drivers/staging/hikey9xx/
 
@@ -8175,7 +8229,7 @@ F:        include/linux/hugetlb.h
 F:     mm/hugetlb.c
 
 HVA ST MEDIA DRIVER
-M:     Jean-Christophe Trotin <jean-christophe.trotin@st.com>
+M:     Jean-Christophe Trotin <jean-christophe.trotin@foss.st.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
 W:     https://linuxtv.org
@@ -8465,6 +8519,7 @@ IBM Power SRIOV Virtual NIC Device Driver
 M:     Dany Madden <drt@linux.ibm.com>
 M:     Lijun Pan <ljp@linux.ibm.com>
 M:     Sukadev Bhattiprolu <sukadev@linux.ibm.com>
+R:     Thomas Falcon <tlfalcon@linux.ibm.com>
 L:     netdev@vger.kernel.org
 S:     Supported
 F:     drivers/net/ethernet/ibm/ibmvnic.*
@@ -9089,9 +9144,7 @@ F:        drivers/gpio/gpio-*cove.c
 INTEL PMIC MULTIFUNCTION DEVICE DRIVERS
 M:     Andy Shevchenko <andy@kernel.org>
 S:     Maintained
-F:     drivers/mfd/intel_msic.c
 F:     drivers/mfd/intel_soc_pmic*
-F:     include/linux/mfd/intel_msic.h
 F:     include/linux/mfd/intel_soc_pmic*
 
 INTEL PMT DRIVER
@@ -9838,6 +9891,18 @@ F:       include/linux/keyctl.h
 F:     include/uapi/linux/keyctl.h
 F:     security/keys/
 
+KFENCE
+M:     Alexander Potapenko <glider@google.com>
+M:     Marco Elver <elver@google.com>
+R:     Dmitry Vyukov <dvyukov@google.com>
+L:     kasan-dev@googlegroups.com
+S:     Maintained
+F:     Documentation/dev-tools/kfence.rst
+F:     arch/*/include/asm/kfence.h
+F:     include/linux/kfence.h
+F:     lib/Kconfig.kfence
+F:     mm/kfence/
+
 KFIFO
 M:     Stefani Seibold <stefani@seibold.net>
 S:     Maintained
@@ -9898,7 +9963,7 @@ F:        include/linux/kprobes.h
 F:     kernel/kprobes.c
 
 KS0108 LCD CONTROLLER DRIVER
-M:     Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>
+M:     Miguel Ojeda <ojeda@kernel.org>
 S:     Maintained
 F:     Documentation/admin-guide/auxdisplay/ks0108.rst
 F:     drivers/auxdisplay/ks0108.c
@@ -9964,7 +10029,6 @@ F:       scripts/leaking_addresses.pl
 
 LED SUBSYSTEM
 M:     Pavel Machek <pavel@ucw.cz>
-R:     Dan Murphy <dmurphy@ti.com>
 L:     linux-leds@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
@@ -10650,7 +10714,8 @@ F:      drivers/net/ethernet/marvell/mvpp2/
 
 MARVELL MWIFIEX WIRELESS DRIVER
 M:     Amitkumar Karwar <amitkarwar@gmail.com>
-M:     Ganapathi Bhat <ganapathi.bhat@nxp.com>
+M:     Ganapathi Bhat <ganapathi017@gmail.com>
+M:     Sharvari Harisangam <sharvari.harisangam@nxp.com>
 M:     Xinming Hu <huxinming820@gmail.com>
 L:     linux-wireless@vger.kernel.org
 S:     Maintained
@@ -10839,7 +10904,6 @@ T:      git git://linuxtv.org/media_tree.git
 F:     drivers/media/radio/radio-maxiradio*
 
 MCAN MMIO DEVICE DRIVER
-M:     Dan Murphy <dmurphy@ti.com>
 M:     Pankaj Sharma <pankj.sharma@samsung.com>
 L:     linux-can@vger.kernel.org
 S:     Maintained
@@ -11100,7 +11164,7 @@ T:      git git://linuxtv.org/media_tree.git
 F:     drivers/media/dvb-frontends/stv6111*
 
 MEDIA DRIVERS FOR STM32 - DCMI
-M:     Hugues Fruchet <hugues.fruchet@st.com>
+M:     Hugues Fruchet <hugues.fruchet@foss.st.com>
 L:     linux-media@vger.kernel.org
 S:     Supported
 T:     git git://linuxtv.org/media_tree.git
@@ -11608,7 +11672,6 @@ F:      drivers/dma/at_hdmac.c
 F:     drivers/dma/at_hdmac_regs.h
 F:     drivers/dma/at_xdmac.c
 F:     include/dt-bindings/dma/at91.h
-F:     include/linux/platform_data/dma-atmel.h
 
 MICROCHIP AT91 SERIAL DRIVER
 M:     Richard Genoud <richard.genoud@gmail.com>
@@ -12472,7 +12535,7 @@ NETWORKING [MPTCP]
 M:     Mat Martineau <mathew.j.martineau@linux.intel.com>
 M:     Matthieu Baerts <matthieu.baerts@tessares.net>
 L:     netdev@vger.kernel.org
-L:     mptcp@lists.01.org
+L:     mptcp@lists.linux.dev
 S:     Maintained
 W:     https://github.com/multipath-tcp/mptcp_net-next/wiki
 B:     https://github.com/multipath-tcp/mptcp_net-next/issues
@@ -13860,6 +13923,13 @@ S:     Supported
 F:     Documentation/devicetree/bindings/pci/mediatek*
 F:     drivers/pci/controller/*mediatek*
 
+PCIE DRIVER FOR MICROCHIP
+M:     Daire McNamara <daire.mcnamara@microchip.com>
+L:     linux-pci@vger.kernel.org
+S:     Supported
+F:     Documentation/devicetree/bindings/pci/microchip*
+F:     drivers/pci/controller/*microchip*
+
 PCIE DRIVER FOR QUALCOMM MSM
 M:     Stanimir Varbanov <svarbanov@mm-sol.com>
 L:     linux-pci@vger.kernel.org
@@ -14636,15 +14706,11 @@ F:    drivers/net/ethernet/qlogic/qlcnic/
 QLOGIC QLGE 10Gb ETHERNET DRIVER
 M:     Manish Chopra <manishc@marvell.com>
 M:     GR-Linux-NIC-Dev@marvell.com
-L:     netdev@vger.kernel.org
-S:     Supported
-F:     drivers/staging/qlge/
-
-QLOGIC QLGE 10Gb ETHERNET DRIVER
 M:     Coiby Xu <coiby.xu@gmail.com>
 L:     netdev@vger.kernel.org
-S:     Maintained
+S:     Supported
 F:     Documentation/networking/device_drivers/qlogic/qlge.rst
+F:     drivers/staging/qlge/
 
 QM1D1B0004 MEDIA DRIVER
 M:     Akihiro Tsukada <tskd08@gmail.com>
@@ -14671,9 +14737,11 @@ M:     Stuart Yoder <stuyoder@gmail.com>
 M:     Laurentiu Tudor <laurentiu.tudor@nxp.com>
 L:     linux-kernel@vger.kernel.org
 S:     Maintained
+F:     Documentation/ABI/stable/sysfs-bus-fsl-mc
 F:     Documentation/devicetree/bindings/misc/fsl,qoriq-mc.txt
 F:     Documentation/networking/device_drivers/ethernet/freescale/dpaa2/overview.rst
 F:     drivers/bus/fsl-mc/
+F:     include/uapi/linux/fsl_mc.h
 
 QT1010 MEDIA DRIVER
 M:     Antti Palosaari <crope@iki.fi>
@@ -15131,6 +15199,7 @@ F:      fs/reiserfs/
 REMOTE PROCESSOR (REMOTEPROC) SUBSYSTEM
 M:     Ohad Ben-Cohen <ohad@wizery.com>
 M:     Bjorn Andersson <bjorn.andersson@linaro.org>
+M:     Mathieu Poirier <mathieu.poirier@linaro.org>
 L:     linux-remoteproc@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rproc-next
@@ -15144,6 +15213,7 @@ F:      include/linux/remoteproc/
 REMOTE PROCESSOR MESSAGING (RPMSG) SUBSYSTEM
 M:     Ohad Ben-Cohen <ohad@wizery.com>
 M:     Bjorn Andersson <bjorn.andersson@linaro.org>
+M:     Mathieu Poirier <mathieu.poirier@linaro.org>
 L:     linux-remoteproc@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/andersson/remoteproc.git rpmsg-next
@@ -15560,8 +15630,8 @@ F:      Documentation/s390/pci.rst
 
 S390 VFIO AP DRIVER
 M:     Tony Krowiak <akrowiak@linux.ibm.com>
-M:     Pierre Morel <pmorel@linux.ibm.com>
 M:     Halil Pasic <pasic@linux.ibm.com>
+M:     Jason Herne <jjherne@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 S:     Supported
 W:     http://www.ibm.com/developerworks/linux/linux390/
@@ -15573,6 +15643,7 @@ F:      drivers/s390/crypto/vfio_ap_private.h
 S390 VFIO-CCW DRIVER
 M:     Cornelia Huck <cohuck@redhat.com>
 M:     Eric Farman <farman@linux.ibm.com>
+M:     Matthew Rosato <mjrosato@linux.ibm.com>
 R:     Halil Pasic <pasic@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 L:     kvm@vger.kernel.org
@@ -15583,6 +15654,7 @@ F:      include/uapi/linux/vfio_ccw.h
 
 S390 VFIO-PCI DRIVER
 M:     Matthew Rosato <mjrosato@linux.ibm.com>
+M:     Eric Farman <farman@linux.ibm.com>
 L:     linux-s390@vger.kernel.org
 L:     kvm@vger.kernel.org
 S:     Supported
@@ -16288,13 +16360,6 @@ S:     Maintained
 F:     Documentation/devicetree/bindings/i3c/silvaco,i3c-master.yaml
 F:     drivers/i3c/master/svc-i3c-master.c
 
-SIMPLE FIRMWARE INTERFACE (SFI)
-S:     Obsolete
-W:     http://simplefirmware.org/
-F:     arch/x86/platform/sfi/
-F:     drivers/sfi/
-F:     include/linux/sfi*.h
-
 SIMPLEFB FB DRIVER
 M:     Hans de Goede <hdegoede@redhat.com>
 L:     linux-fbdev@vger.kernel.org
@@ -16716,6 +16781,7 @@ R:      Pierre-Louis Bossart <pierre-louis.bossart@linux.intel.com>
 R:     Sanyog Kale <sanyog.r.kale@intel.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Supported
+T:     git git://git.kernel.org/pub/scm/linux/kernel/git/vkoul/soundwire.git
 F:     Documentation/driver-api/soundwire/
 F:     drivers/soundwire/
 F:     include/linux/soundwire/
@@ -16818,8 +16884,10 @@ F:     tools/spi/
 
 SPIDERNET NETWORK DRIVER for CELL
 M:     Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
+M:     Geoff Levand <geoff@infradead.org>
 L:     netdev@vger.kernel.org
-S:     Supported
+L:     linuxppc-dev@lists.ozlabs.org
+S:     Maintained
 F:     Documentation/networking/device_drivers/ethernet/toshiba/spider_net.rst
 F:     drivers/net/ethernet/toshiba/spider_net*
 
@@ -16873,7 +16941,8 @@ F:      Documentation/devicetree/bindings/media/i2c/st,st-mipid02.txt
 F:     drivers/media/i2c/st-mipid02.c
 
 ST STM32 I2C/SMBUS DRIVER
-M:     Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+M:     Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
+M:     Alain Volmat <alain.volmat@foss.st.com>
 L:     linux-i2c@vger.kernel.org
 S:     Maintained
 F:     drivers/i2c/busses/i2c-stm32*
@@ -16971,7 +17040,7 @@ F:      drivers/staging/vt665?/
 
 STAGING SUBSYSTEM
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:     devel@driverdev.osuosl.org
+L:     linux-staging@lists.linux.dev
 S:     Supported
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
 F:     drivers/staging/
@@ -16998,7 +17067,7 @@ F:      kernel/jump_label.c
 F:     kernel/static_call.c
 
 STI AUDIO (ASoC) DRIVERS
-M:     Arnaud Pouliquen <arnaud.pouliquen@st.com>
+M:     Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
@@ -17018,15 +17087,15 @@ T:    git git://linuxtv.org/media_tree.git
 F:     drivers/media/usb/stk1160/
 
 STM32 AUDIO (ASoC) DRIVERS
-M:     Olivier Moysan <olivier.moysan@st.com>
-M:     Arnaud Pouliquen <arnaud.pouliquen@st.com>
+M:     Olivier Moysan <olivier.moysan@foss.st.com>
+M:     Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
 L:     alsa-devel@alsa-project.org (moderated for non-subscribers)
 S:     Maintained
 F:     Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml
 F:     sound/soc/stm/
 
 STM32 TIMER/LPTIMER DRIVERS
-M:     Fabrice Gasnier <fabrice.gasnier@st.com>
+M:     Fabrice Gasnier <fabrice.gasnier@foss.st.com>
 S:     Maintained
 F:     Documentation/ABI/testing/*timer-stm32
 F:     Documentation/devicetree/bindings/*/*stm32-*timer*
@@ -17036,7 +17105,7 @@ F:      include/linux/*/stm32-*tim*
 
 STMMAC ETHERNET DRIVER
 M:     Giuseppe Cavallaro <peppe.cavallaro@st.com>
-M:     Alexandre Torgue <alexandre.torgue@st.com>
+M:     Alexandre Torgue <alexandre.torgue@foss.st.com>
 M:     Jose Abreu <joabreu@synopsys.com>
 L:     netdev@vger.kernel.org
 S:     Supported
@@ -17778,7 +17847,6 @@ S:      Maintained
 F:     drivers/thermal/ti-soc-thermal/
 
 TI BQ27XXX POWER SUPPLY DRIVER
-R:     Dan Murphy <dmurphy@ti.com>
 F:     drivers/power/supply/bq27xxx_battery.c
 F:     drivers/power/supply/bq27xxx_battery_i2c.c
 F:     include/linux/power/bq27xxx_battery.h
@@ -17913,7 +17981,6 @@ S:      Odd Fixes
 F:     sound/soc/codecs/tas571x*
 
 TI TCAN4X5X DEVICE DRIVER
-M:     Dan Murphy <dmurphy@ti.com>
 L:     linux-can@vger.kernel.org
 S:     Maintained
 F:     Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@ -19066,7 +19133,7 @@ VME SUBSYSTEM
 M:     Martyn Welch <martyn@welchs.me.uk>
 M:     Manohar Vanga <manohar.vanga@gmail.com>
 M:     Greg Kroah-Hartman <gregkh@linuxfoundation.org>
-L:     devel@driverdev.osuosl.org
+L:     linux-kernel@vger.kernel.org
 S:     Maintained
 T:     git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
 F:     Documentation/driver-api/vme.rst
@@ -19097,7 +19164,7 @@ S:      Maintained
 F:     drivers/infiniband/hw/vmw_pvrdma/
 
 VMware PVSCSI driver
-M:     Jim Gill <jgill@vmware.com>
+M:     Vishal Bhakta <vbhakta@vmware.com>
 M:     VMware PV-Drivers <pv-drivers@vmware.com>
 L:     linux-scsi@vger.kernel.org
 S:     Maintained
index ac02514..cc77fd4 100644 (file)
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,9 @@
 # SPDX-License-Identifier: GPL-2.0
 VERSION = 5
-PATCHLEVEL = 11
+PATCHLEVEL = 12
 SUBLEVEL = 0
-EXTRAVERSION =
-NAME = 💕 Valentine's Day Edition 💕
+EXTRAVERSION = -rc6
+NAME = Frozen Wasteland
 
 # *DOCUMENTATION*
 # To see a list of typical targets execute "make help"
@@ -96,10 +96,41 @@ endif
 
 ifneq ($(findstring s,$(filter-out --%,$(MAKEFLAGS))),)
   quiet=silent_
+  KBUILD_VERBOSE = 0
 endif
 
 export quiet Q KBUILD_VERBOSE
 
+# Call a source code checker (by default, "sparse") as part of the
+# C compilation.
+#
+# Use 'make C=1' to enable checking of only re-compiled files.
+# Use 'make C=2' to enable checking of *all* source files, regardless
+# of whether they are re-compiled or not.
+#
+# See the file "Documentation/dev-tools/sparse.rst" for more details,
+# including where to get the "sparse" utility.
+
+ifeq ("$(origin C)", "command line")
+  KBUILD_CHECKSRC = $(C)
+endif
+ifndef KBUILD_CHECKSRC
+  KBUILD_CHECKSRC = 0
+endif
+
+export KBUILD_CHECKSRC
+
+# Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the
+# directory of external module to build. Setting M= takes precedence.
+ifeq ("$(origin M)", "command line")
+  KBUILD_EXTMOD := $(M)
+endif
+
+$(if $(word 2, $(KBUILD_EXTMOD)), \
+       $(error building multiple external modules is not supported))
+
+export KBUILD_EXTMOD
+
 # Kbuild will save output files in the current working directory.
 # This does not need to match to the root of the kernel source tree.
 #
@@ -145,7 +176,8 @@ else
 need-sub-make := 1
 endif
 
-abs_srctree := $(realpath $(dir $(lastword $(MAKEFILE_LIST))))
+this-makefile := $(lastword $(MAKEFILE_LIST))
+abs_srctree := $(realpath $(dir $(this-makefile)))
 
 ifneq ($(words $(subst :, ,$(abs_srctree))), 1)
 $(error source directory cannot contain spaces or colons)
@@ -160,8 +192,6 @@ MAKEFLAGS += --include-dir=$(abs_srctree)
 need-sub-make := 1
 endif
 
-this-makefile := $(lastword $(MAKEFILE_LIST))
-
 ifneq ($(filter 3.%,$(MAKE_VERSION)),)
 # 'MAKEFLAGS += -rR' does not immediately become effective for GNU Make 3.x
 # We need to invoke sub-make to avoid implicit rules in the top Makefile.
@@ -195,36 +225,6 @@ ifeq ($(need-sub-make),)
 # so that IDEs/editors are able to understand relative filenames.
 MAKEFLAGS += --no-print-directory
 
-# Call a source code checker (by default, "sparse") as part of the
-# C compilation.
-#
-# Use 'make C=1' to enable checking of only re-compiled files.
-# Use 'make C=2' to enable checking of *all* source files, regardless
-# of whether they are re-compiled or not.
-#
-# See the file "Documentation/dev-tools/sparse.rst" for more details,
-# including where to get the "sparse" utility.
-
-ifeq ("$(origin C)", "command line")
-  KBUILD_CHECKSRC = $(C)
-endif
-ifndef KBUILD_CHECKSRC
-  KBUILD_CHECKSRC = 0
-endif
-
-# Use make M=dir or set the environment variable KBUILD_EXTMOD to specify the
-# directory of external module to build. Setting M= takes precedence.
-ifeq ("$(origin M)", "command line")
-  KBUILD_EXTMOD := $(M)
-endif
-
-$(if $(word 2, $(KBUILD_EXTMOD)), \
-       $(error building multiple external modules is not supported))
-
-export KBUILD_CHECKSRC KBUILD_EXTMOD
-
-extmod-prefix = $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)
-
 ifeq ($(abs_srctree),$(abs_objtree))
         # building in the source tree
         srctree := .
@@ -257,7 +257,6 @@ export building_out_of_srctree srctree objtree VPATH
 # of make so .config is not included in this case either (for *config).
 
 version_h := include/generated/uapi/linux/version.h
-old_version_h := include/linux/version.h
 
 clean-targets := %clean mrproper cleandocs
 no-dot-config-targets := $(clean-targets) \
@@ -265,7 +264,8 @@ no-dot-config-targets := $(clean-targets) \
                         $(version_h) headers headers_% archheaders archscripts \
                         %asm-generic kernelversion %src-pkg dt_binding_check \
                         outputmakefile
-no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease
+no-sync-config-targets := $(no-dot-config-targets) %install kernelrelease \
+                         image_name
 single-targets := %.a %.i %.ko %.lds %.ll %.lst %.mod %.o %.s %.symtypes %/
 
 config-build   :=
@@ -479,6 +479,7 @@ USERINCLUDE    := \
                -I$(objtree)/arch/$(SRCARCH)/include/generated/uapi \
                -I$(srctree)/include/uapi \
                -I$(objtree)/include/generated/uapi \
+                -include $(srctree)/include/linux/compiler-version.h \
                 -include $(srctree)/include/linux/kconfig.h
 
 # Use LINUXINCLUDE when you must reference the include/ directory.
@@ -558,7 +559,13 @@ ifdef building_out_of_srctree
        { echo "# this is build directory, ignore it"; echo "*"; } > .gitignore
 endif
 
-ifneq ($(shell $(CC) --version 2>&1 | head -n 1 | grep clang),)
+# The expansion should be delayed until arch/$(SRCARCH)/Makefile is included.
+# Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile.
+# CC_VERSION_TEXT is referenced from Kconfig (so it needs export),
+# and from include/config/auto.conf.cmd to detect the compiler upgrade.
+CC_VERSION_TEXT = $(shell $(CC) --version 2>/dev/null | head -n 1 | sed 's/\#//g')
+
+ifneq ($(findstring clang,$(CC_VERSION_TEXT)),)
 ifneq ($(CROSS_COMPILE),)
 CLANG_FLAGS    += --target=$(notdir $(CROSS_COMPILE:%-=%))
 GCC_TOOLCHAIN_DIR := $(dir $(shell which $(CROSS_COMPILE)elfedit))
@@ -577,12 +584,6 @@ KBUILD_AFLAGS      += $(CLANG_FLAGS)
 export CLANG_FLAGS
 endif
 
-# The expansion should be delayed until arch/$(SRCARCH)/Makefile is included.
-# Some architectures define CROSS_COMPILE in arch/$(SRCARCH)/Makefile.
-# CC_VERSION_TEXT is referenced from Kconfig (so it needs export),
-# and from include/config/auto.conf.cmd to detect the compiler upgrade.
-CC_VERSION_TEXT = $(shell $(CC) --version 2>/dev/null | head -n 1)
-
 ifdef config-build
 # ===========================================================================
 # *config targets only - make sure prerequisites are updated, and descend
@@ -832,8 +833,10 @@ ifneq ($(LLVM_IAS),1)
 KBUILD_AFLAGS  += -Wa,-gdwarf-2
 endif
 
-ifdef CONFIG_DEBUG_INFO_DWARF4
-DEBUG_CFLAGS   += -gdwarf-4
+ifndef CONFIG_DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
+dwarf-version-$(CONFIG_DEBUG_INFO_DWARF4) := 4
+dwarf-version-$(CONFIG_DEBUG_INFO_DWARF5) := 5
+DEBUG_CFLAGS   += -gdwarf-$(dwarf-version-y)
 endif
 
 ifdef CONFIG_DEBUG_INFO_REDUCED
@@ -853,12 +856,8 @@ KBUILD_CFLAGS += $(DEBUG_CFLAGS)
 export DEBUG_CFLAGS
 
 ifdef CONFIG_FUNCTION_TRACER
-ifdef CONFIG_FTRACE_MCOUNT_RECORD
-  # gcc 5 supports generating the mcount tables directly
-  ifeq ($(call cc-option-yn,-mrecord-mcount),y)
-    CC_FLAGS_FTRACE    += -mrecord-mcount
-    export CC_USING_RECORD_MCOUNT := 1
-  endif
+ifdef CONFIG_FTRACE_MCOUNT_USE_CC
+  CC_FLAGS_FTRACE      += -mrecord-mcount
   ifdef CONFIG_HAVE_NOP_MCOUNT
     ifeq ($(call cc-option-yn, -mnop-mcount),y)
       CC_FLAGS_FTRACE  += -mnop-mcount
@@ -866,6 +865,15 @@ ifdef CONFIG_FTRACE_MCOUNT_RECORD
     endif
   endif
 endif
+ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
+  CC_FLAGS_USING       += -DCC_USING_NOP_MCOUNT
+endif
+ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
+  ifdef CONFIG_HAVE_C_RECORDMCOUNT
+    BUILD_C_RECORDMCOUNT := y
+    export BUILD_C_RECORDMCOUNT
+  endif
+endif
 ifdef CONFIG_HAVE_FENTRY
   ifeq ($(call cc-option-yn, -mfentry),y)
     CC_FLAGS_FTRACE    += -mfentry
@@ -875,12 +883,6 @@ endif
 export CC_FLAGS_FTRACE
 KBUILD_CFLAGS  += $(CC_FLAGS_FTRACE) $(CC_FLAGS_USING)
 KBUILD_AFLAGS  += $(CC_FLAGS_USING)
-ifdef CONFIG_DYNAMIC_FTRACE
-       ifdef CONFIG_HAVE_C_RECORDMCOUNT
-               BUILD_C_RECORDMCOUNT := y
-               export BUILD_C_RECORDMCOUNT
-       endif
-endif
 endif
 
 # We trigger additional mismatches with less inlining
@@ -899,6 +901,25 @@ KBUILD_CFLAGS      += $(CC_FLAGS_SCS)
 export CC_FLAGS_SCS
 endif
 
+ifdef CONFIG_LTO_CLANG
+ifdef CONFIG_LTO_CLANG_THIN
+CC_FLAGS_LTO   := -flto=thin -fsplit-lto-unit
+KBUILD_LDFLAGS += --thinlto-cache-dir=$(extmod-prefix).thinlto-cache
+else
+CC_FLAGS_LTO   := -flto
+endif
+CC_FLAGS_LTO   += -fvisibility=hidden
+
+# Limit inlining across translation units to reduce binary size
+KBUILD_LDFLAGS += -mllvm -import-instr-limit=5
+endif
+
+ifdef CONFIG_LTO
+KBUILD_CFLAGS  += -fno-lto $(CC_FLAGS_LTO)
+KBUILD_AFLAGS  += -fno-lto
+export CC_FLAGS_LTO
+endif
+
 ifdef CONFIG_DEBUG_FORCE_FUNCTION_ALIGN_32B
 KBUILD_CFLAGS += -falign-functions=32
 endif
@@ -1054,7 +1075,7 @@ ifdef CONFIG_MODULE_COMPRESS
     mod_compress_cmd = $(KGZIP) -n -f
   endif # CONFIG_MODULE_COMPRESS_GZIP
   ifdef CONFIG_MODULE_COMPRESS_XZ
-    mod_compress_cmd = $(XZ) -f
+    mod_compress_cmd = $(XZ) --lzma2=dict=2MiB -f
   endif # CONFIG_MODULE_COMPRESS_XZ
 endif # CONFIG_MODULE_COMPRESS
 export mod_compress_cmd
@@ -1105,6 +1126,7 @@ endif # CONFIG_BPF
 
 PHONY += prepare0
 
+extmod-prefix = $(if $(KBUILD_EXTMOD),$(KBUILD_EXTMOD)/)
 export MODORDER := $(extmod-prefix)modules.order
 export MODULES_NSDEPS := $(extmod-prefix)modules.nsdeps
 
@@ -1229,6 +1251,10 @@ uapi-asm-generic:
 PHONY += prepare-objtool prepare-resolve_btfids
 prepare-objtool: $(objtool_target)
 ifeq ($(SKIP_STACK_VALIDATION),1)
+ifdef CONFIG_FTRACE_MCOUNT_USE_OBJTOOL
+       @echo "error: Cannot generate __mcount_loc for CONFIG_DYNAMIC_FTRACE=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
+       @false
+endif
 ifdef CONFIG_UNWINDER_ORC
        @echo "error: Cannot generate ORC metadata for CONFIG_UNWINDER_ORC=y, please install libelf-dev, libelf-devel or elfutils-libelf-devel" >&2
        @false
@@ -1258,14 +1284,24 @@ define filechk_utsrelease.h
 endef
 
 define filechk_version.h
-       echo \#define LINUX_VERSION_CODE $(shell                         \
-       expr $(VERSION) \* 65536 + 0$(PATCHLEVEL) \* 256 + 0$(SUBLEVEL)); \
-       echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) + (c))'
+       if [ $(SUBLEVEL) -gt 255 ]; then                                 \
+               echo \#define LINUX_VERSION_CODE $(shell                 \
+               expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + 255); \
+       else                                                             \
+               echo \#define LINUX_VERSION_CODE $(shell                 \
+               expr $(VERSION) \* 65536 + $(PATCHLEVEL) \* 256 + $(SUBLEVEL)); \
+       fi;                                                              \
+       echo '#define KERNEL_VERSION(a,b,c) (((a) << 16) + ((b) << 8) +  \
+       ((c) > 255 ? 255 : (c)))';                                       \
+       echo \#define LINUX_VERSION_MAJOR $(VERSION);                    \
+       echo \#define LINUX_VERSION_PATCHLEVEL $(PATCHLEVEL);            \
+       echo \#define LINUX_VERSION_SUBLEVEL $(SUBLEVEL)
 endef
 
+$(version_h): PATCHLEVEL := $(if $(PATCHLEVEL), $(PATCHLEVEL), 0)
+$(version_h): SUBLEVEL := $(if $(SUBLEVEL), $(SUBLEVEL), 0)
 $(version_h): FORCE
        $(call filechk,version.h)
-       $(Q)rm -f $(old_version_h)
 
 include/generated/utsrelease.h: include/config/kernel.release FORCE
        $(call filechk,utsrelease.h)
@@ -1479,7 +1515,7 @@ endif # CONFIG_MODULES
 # Directories & files removed with 'make clean'
 CLEAN_FILES += include/ksym vmlinux.symvers \
               modules.builtin modules.builtin.modinfo modules.nsdeps \
-              compile_commands.json
+              compile_commands.json .thinlto-cache
 
 # Directories & files removed with 'make mrproper'
 MRPROPER_FILES += include/config include/generated          \
@@ -1739,7 +1775,7 @@ PHONY += compile_commands.json
 
 clean-dirs := $(KBUILD_EXTMOD)
 clean: rm-files := $(KBUILD_EXTMOD)/Module.symvers $(KBUILD_EXTMOD)/modules.nsdeps \
-       $(KBUILD_EXTMOD)/compile_commands.json
+       $(KBUILD_EXTMOD)/compile_commands.json $(KBUILD_EXTMOD)/.thinlto-cache
 
 PHONY += help
 help:
@@ -1836,7 +1872,8 @@ clean: $(clean-dirs)
                -o -name '.tmp_*.o.*' \
                -o -name '*.c.[012]*.*' \
                -o -name '*.ll' \
-               -o -name '*.gcno' \) -type f -print | xargs rm -f
+               -o -name '*.gcno' \
+               -o -name '*.*.symversions' \) -type f -print | xargs rm -f
 
 # Generate tags for editors
 # ---------------------------------------------------------------------------
index 9fc4eb8..ecfd352 100644 (file)
@@ -603,6 +603,95 @@ config SHADOW_CALL_STACK
          reading and writing arbitrary memory may be able to locate them
          and hijack control flow by modifying the stacks.
 
+config LTO
+       bool
+       help
+         Selected if the kernel will be built using the compiler's LTO feature.
+
+config LTO_CLANG
+       bool
+       select LTO
+       help
+         Selected if the kernel will be built using Clang's LTO feature.
+
+config ARCH_SUPPORTS_LTO_CLANG
+       bool
+       help
+         An architecture should select this option if it supports:
+         - compiling with Clang,
+         - compiling inline assembly with Clang's integrated assembler,
+         - and linking with LLD.
+
+config ARCH_SUPPORTS_LTO_CLANG_THIN
+       bool
+       help
+         An architecture should select this option if it can support Clang's
+         ThinLTO mode.
+
+config HAS_LTO_CLANG
+       def_bool y
+       # Clang >= 11: https://github.com/ClangBuiltLinux/linux/issues/510
+       depends on CC_IS_CLANG && CLANG_VERSION >= 110000 && LD_IS_LLD
+       depends on $(success,test $(LLVM_IAS) -eq 1)
+       depends on $(success,$(NM) --help | head -n 1 | grep -qi llvm)
+       depends on $(success,$(AR) --help | head -n 1 | grep -qi llvm)
+       depends on ARCH_SUPPORTS_LTO_CLANG
+       depends on !FTRACE_MCOUNT_USE_RECORDMCOUNT
+       depends on !KASAN || KASAN_HW_TAGS
+       depends on !GCOV_KERNEL
+       help
+         The compiler and Kconfig options support building with Clang's
+         LTO.
+
+choice
+       prompt "Link Time Optimization (LTO)"
+       default LTO_NONE
+       help
+         This option enables Link Time Optimization (LTO), which allows the
+         compiler to optimize binaries globally.
+
+         If unsure, select LTO_NONE. Note that LTO is very resource-intensive
+         so it's disabled by default.
+
+config LTO_NONE
+       bool "None"
+       help
+         Build the kernel normally, without Link Time Optimization (LTO).
+
+config LTO_CLANG_FULL
+       bool "Clang Full LTO (EXPERIMENTAL)"
+       depends on HAS_LTO_CLANG
+       depends on !COMPILE_TEST
+       select LTO_CLANG
+       help
+          This option enables Clang's full Link Time Optimization (LTO), which
+          allows the compiler to optimize the kernel globally. If you enable
+          this option, the compiler generates LLVM bitcode instead of ELF
+          object files, and the actual compilation from bitcode happens at
+          the LTO link step, which may take several minutes depending on the
+          kernel configuration. More information can be found from LLVM's
+          documentation:
+
+           https://llvm.org/docs/LinkTimeOptimization.html
+
+         During link time, this option can use a large amount of RAM, and
+         may take much longer than the ThinLTO option.
+
+config LTO_CLANG_THIN
+       bool "Clang ThinLTO (EXPERIMENTAL)"
+       depends on HAS_LTO_CLANG && ARCH_SUPPORTS_LTO_CLANG_THIN
+       select LTO_CLANG
+       help
+         This option enables Clang's ThinLTO, which allows for parallel
+         optimization and faster incremental compiles compared to the
+         CONFIG_LTO_CLANG_FULL option. More information can be found
+         from Clang's documentation:
+
+           https://clang.llvm.org/docs/ThinLTO.html
+
+         If unsure, say Y.
+endchoice
+
 config HAVE_ARCH_WITHIN_STACK_FRAMES
        bool
        help
@@ -731,6 +820,12 @@ config HAVE_IRQ_EXIT_ON_IRQ_STACK
          This spares a stack switch and improves cache usage on softirq
          processing.
 
+config HAVE_SOFTIRQ_ON_OWN_STACK
+       bool
+       help
+         Architecture provides a function to run __do_softirq() on a
+         seperate stack.
+
 config PGTABLE_LEVELS
        int
        default 2
index 6293675..724c407 100644 (file)
@@ -1,4 +1,3 @@
-CONFIG_EXPERIMENTAL=y
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_LOG_BUF_SHIFT=14
index 6c71554..5112ab9 100644 (file)
@@ -249,7 +249,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        childti->pcb.ksp = (unsigned long) childstack;
        childti->pcb.flags = 1; /* set FEN, clear everything else */
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
                memset(childstack, 0,
                        sizeof(struct switch_stack) + sizeof(struct pt_regs));
index 659faef..285aaba 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -21,18 +21,19 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_abi_$(basetarget))'                \
                   '$(systbl_offset_$(basetarget))'
 
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-$(kapi)/syscall_table.h: $(syscall) $(systbl)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h
 kapisyshdr-y           += syscall_table.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index a661706..02f0244 100644 (file)
 549    common  faccessat2                      sys_faccessat2
 550    common  process_madvise                 sys_process_madvise
 551    common  epoll_pwait2                    sys_epoll_pwait2
+552    common  mount_setattr                   sys_mount_setattr
index 60d578e..76ad527 100644 (file)
@@ -16,7 +16,7 @@
        memory {
                device_type = "memory";
                /* CONFIG_LINUX_RAM_BASE needs to match low mem start */
-               reg = <0x0 0x80000000 0x0 0x20000000    /* 512 MB low mem */
+               reg = <0x0 0x80000000 0x0 0x40000000    /* 1 GB low mem */
                       0x1 0x00000000 0x0 0x40000000>;  /* 1 GB highmem */
        };
 
index 37f724a..d838d0d 100644 (file)
@@ -191,7 +191,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        childksp[0] = 0;                        /* fp */
        childksp[1] = (unsigned long)ret_from_fork; /* blink */
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(c_regs, 0, sizeof(struct pt_regs));
 
                c_callee->r13 = kthread_arg;
index a78d8f7..fdbe06c 100644 (file)
@@ -96,7 +96,7 @@ stash_usr_regs(struct rt_sigframe __user *sf, struct pt_regs *regs,
                             sizeof(sf->uc.uc_mcontext.regs.scratch));
        err |= __copy_to_user(&sf->uc.uc_sigmask, set, sizeof(sigset_t));
 
-       return err;
+       return err ? -EFAULT : 0;
 }
 
 static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
@@ -110,7 +110,7 @@ static int restore_usr_regs(struct pt_regs *regs, struct rt_sigframe __user *sf)
                                &(sf->uc.uc_mcontext.regs.scratch),
                                sizeof(sf->uc.uc_mcontext.regs.scratch));
        if (err)
-               return err;
+               return -EFAULT;
 
        set_current_blocked(&set);
        regs->bta       = uregs.scratch.bta;
index 74ad425..47bab67 100644 (file)
@@ -187,25 +187,26 @@ static void init_unwind_table(struct unwind_table *table, const char *name,
                              const void *table_start, unsigned long table_size,
                              const u8 *header_start, unsigned long header_size)
 {
-       const u8 *ptr = header_start + 4;
-       const u8 *end = header_start + header_size;
-
        table->core.pc = (unsigned long)core_start;
        table->core.range = core_size;
        table->init.pc = (unsigned long)init_start;
        table->init.range = init_size;
        table->address = table_start;
        table->size = table_size;
-
-       /* See if the linker provided table looks valid. */
-       if (header_size <= 4
-           || header_start[0] != 1
-           || (void *)read_pointer(&ptr, end, header_start[1]) != table_start
-           || header_start[2] == DW_EH_PE_omit
-           || read_pointer(&ptr, end, header_start[2]) <= 0
-           || header_start[3] == DW_EH_PE_omit)
-               header_start = NULL;
-
+       /* To avoid the pointer addition with NULL pointer.*/
+       if (header_start != NULL) {
+               const u8 *ptr = header_start + 4;
+               const u8 *end = header_start + header_size;
+               /* See if the linker provided table looks valid. */
+               if (header_size <= 4
+               || header_start[0] != 1
+               || (void *)read_pointer(&ptr, end, header_start[1])
+                               != table_start
+               || header_start[2] == DW_EH_PE_omit
+               || read_pointer(&ptr, end, header_start[2]) <= 0
+               || header_start[3] == DW_EH_PE_omit)
+                       header_start = NULL;
+       }
        table->hdrsz = header_size;
        smp_wmb();
        table->header = header_start;
index 853aab5..5da96f5 100644 (file)
@@ -348,6 +348,7 @@ config ARCH_EP93XX
        select ARM_AMBA
        imply ARM_PATCH_PHYS_VIRT
        select ARM_VIC
+       select GENERIC_IRQ_MULTI_HANDLER
        select AUTO_ZRELADDR
        select CLKDEV_LOOKUP
        select CLKSRC_MMIO
index 5b213a1..5e33d0e 100644 (file)
@@ -40,6 +40,9 @@
                ethernet1 = &cpsw_emac1;
                spi0 = &spi0;
                spi1 = &spi1;
+               mmc0 = &mmc1;
+               mmc1 = &mmc2;
+               mmc2 = &mmc3;
        };
 
        cpus {
index 646a064..5bd6a66 100644 (file)
@@ -32,7 +32,8 @@
                ranges = <MBUS_ID(0xf0, 0x01) 0 0xf1000000 0x100000
                          MBUS_ID(0x01, 0x1d) 0 0xfff00000 0x100000
                          MBUS_ID(0x09, 0x19) 0 0xf1100000 0x10000
-                         MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000>;
+                         MBUS_ID(0x09, 0x15) 0 0xf1110000 0x10000
+                         MBUS_ID(0x0c, 0x04) 0 0xf1200000 0x100000>;
 
                internal-regs {
 
        phy1: ethernet-phy@1 {
                compatible = "ethernet-phy-ieee802.3-c22";
                reg = <1>;
+               marvell,reg-init = <3 18 0 0x4985>;
 
                /* irq is connected to &pcawan pin 7 */
        };
index 73b6b1f..775ceb3 100644 (file)
 };
 
 &pinctrl {
-       atmel,mux-mask = <
-                        /*     A       B       C       */
-                        0xFFFFFE7F 0xC0E0397F 0xEF00019D       /* pioA */
-                        0x03FFFFFF 0x02FC7E68 0x00780000       /* pioB */
-                        0xffffffff 0xF83FFFFF 0xB800F3FC       /* pioC */
-                        0x003FFFFF 0x003F8000 0x00000000       /* pioD */
-                        >;
-
        adc {
                pinctrl_adc_default: adc_default {
                        atmel,pins = <AT91_PIOB 15 AT91_PERIPH_A AT91_PINCTRL_NONE>;
index 1b11638..e3251f3 100644 (file)
@@ -84,8 +84,8 @@
                                pinctrl-0 = <&pinctrl_macb0_default>;
                                phy-mode = "rmii";
 
-                               ethernet-phy@0 {
-                                       reg = <0x0>;
+                               ethernet-phy@7 {
+                                       reg = <0x7>;
                                        interrupt-parent = <&pioA>;
                                        interrupts = <PIN_PD31 IRQ_TYPE_LEVEL_LOW>;
                                        pinctrl-names = "default";
index 462b1df..720beec 100644 (file)
                        #reset-cells = <1>;
                };
 
-               bsc_intr: interrupt-controller@7ef00040 {
-                       compatible = "brcm,bcm2711-l2-intc", "brcm,l2-intc";
-                       reg = <0x7ef00040 0x30>;
-                       interrupts = <GIC_SPI 117 IRQ_TYPE_LEVEL_HIGH>;
-                       interrupt-controller;
-                       #interrupt-cells = <1>;
-               };
-
                aon_intr: interrupt-controller@7ef00100 {
                        compatible = "brcm,bcm2711-l2-intc", "brcm,l2-intc";
                        reg = <0x7ef00100 0x30>;
                        reg = <0x7ef04500 0x100>, <0x7ef00b00 0x300>;
                        reg-names = "bsc", "auto-i2c";
                        clock-frequency = <97500>;
-                       interrupt-parent = <&bsc_intr>;
-                       interrupts = <0>;
                        status = "disabled";
                };
 
                        reg = <0x7ef09500 0x100>, <0x7ef05b00 0x300>;
                        reg-names = "bsc", "auto-i2c";
                        clock-frequency = <97500>;
-                       interrupt-parent = <&bsc_intr>;
-                       interrupts = <1>;
                        status = "disabled";
                };
        };
index 165c5bc..55c4744 100644 (file)
 */
 
 #include <dt-bindings/input/input.h>
+#include <dt-bindings/input/cros-ec-keyboard.h>
 
 &cros_ec {
-       keyboard-controller {
+       keyboard_controller: keyboard-controller {
                compatible = "google,cros-ec-keyb";
                keypad,num-rows = <8>;
                keypad,num-columns = <13>;
                google,needs-ghost-filter;
 
                linux,keymap = <
-                       MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)
-                       MATRIX_KEY(0x00, 0x02, KEY_F1)
-                       MATRIX_KEY(0x00, 0x03, KEY_B)
-                       MATRIX_KEY(0x00, 0x04, KEY_F10)
-                       MATRIX_KEY(0x00, 0x05, KEY_RO)
-                       MATRIX_KEY(0x00, 0x06, KEY_N)
-                       MATRIX_KEY(0x00, 0x08, KEY_EQUAL)
-                       MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)
-
-                       MATRIX_KEY(0x01, 0x01, KEY_ESC)
-                       MATRIX_KEY(0x01, 0x02, KEY_F4)
-                       MATRIX_KEY(0x01, 0x03, KEY_G)
-                       MATRIX_KEY(0x01, 0x04, KEY_F7)
-                       MATRIX_KEY(0x01, 0x06, KEY_H)
-                       MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)
-                       MATRIX_KEY(0x01, 0x09, KEY_F9)
-                       MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)
-                       MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)
-
-                       MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)
-                       MATRIX_KEY(0x02, 0x01, KEY_TAB)
-                       MATRIX_KEY(0x02, 0x02, KEY_F3)
-                       MATRIX_KEY(0x02, 0x03, KEY_T)
-                       MATRIX_KEY(0x02, 0x04, KEY_F6)
-                       MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)
-                       MATRIX_KEY(0x02, 0x06, KEY_Y)
-                       MATRIX_KEY(0x02, 0x07, KEY_102ND)
-                       MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)
-                       MATRIX_KEY(0x02, 0x09, KEY_F8)
-                       MATRIX_KEY(0x02, 0x0a, KEY_YEN)
-
-                       MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)
-                       MATRIX_KEY(0x03, 0x01, KEY_GRAVE)
-                       MATRIX_KEY(0x03, 0x02, KEY_F2)
-                       MATRIX_KEY(0x03, 0x03, KEY_5)
-                       MATRIX_KEY(0x03, 0x04, KEY_F5)
-                       MATRIX_KEY(0x03, 0x06, KEY_6)
-                       MATRIX_KEY(0x03, 0x08, KEY_MINUS)
-                       MATRIX_KEY(0x03, 0x09, KEY_F13)
-                       MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)
-                       MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)
-
-                       MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)
-                       MATRIX_KEY(0x04, 0x01, KEY_A)
-                       MATRIX_KEY(0x04, 0x02, KEY_D)
-                       MATRIX_KEY(0x04, 0x03, KEY_F)
-                       MATRIX_KEY(0x04, 0x04, KEY_S)
-                       MATRIX_KEY(0x04, 0x05, KEY_K)
-                       MATRIX_KEY(0x04, 0x06, KEY_J)
-                       MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)
-                       MATRIX_KEY(0x04, 0x09, KEY_L)
-                       MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)
-                       MATRIX_KEY(0x04, 0x0b, KEY_ENTER)
-
-                       MATRIX_KEY(0x05, 0x01, KEY_Z)
-                       MATRIX_KEY(0x05, 0x02, KEY_C)
-                       MATRIX_KEY(0x05, 0x03, KEY_V)
-                       MATRIX_KEY(0x05, 0x04, KEY_X)
-                       MATRIX_KEY(0x05, 0x05, KEY_COMMA)
-                       MATRIX_KEY(0x05, 0x06, KEY_M)
-                       MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)
-                       MATRIX_KEY(0x05, 0x08, KEY_SLASH)
-                       MATRIX_KEY(0x05, 0x09, KEY_DOT)
-                       MATRIX_KEY(0x05, 0x0b, KEY_SPACE)
-
-                       MATRIX_KEY(0x06, 0x01, KEY_1)
-                       MATRIX_KEY(0x06, 0x02, KEY_3)
-                       MATRIX_KEY(0x06, 0x03, KEY_4)
-                       MATRIX_KEY(0x06, 0x04, KEY_2)
-                       MATRIX_KEY(0x06, 0x05, KEY_8)
-                       MATRIX_KEY(0x06, 0x06, KEY_7)
-                       MATRIX_KEY(0x06, 0x08, KEY_0)
-                       MATRIX_KEY(0x06, 0x09, KEY_9)
-                       MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)
-                       MATRIX_KEY(0x06, 0x0b, KEY_DOWN)
-                       MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)
-
-                       MATRIX_KEY(0x07, 0x01, KEY_Q)
-                       MATRIX_KEY(0x07, 0x02, KEY_E)
-                       MATRIX_KEY(0x07, 0x03, KEY_R)
-                       MATRIX_KEY(0x07, 0x04, KEY_W)
-                       MATRIX_KEY(0x07, 0x05, KEY_I)
-                       MATRIX_KEY(0x07, 0x06, KEY_U)
-                       MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)
-                       MATRIX_KEY(0x07, 0x08, KEY_P)
-                       MATRIX_KEY(0x07, 0x09, KEY_O)
-                       MATRIX_KEY(0x07, 0x0b, KEY_UP)
-                       MATRIX_KEY(0x07, 0x0c, KEY_LEFT)
+                       CROS_STD_TOP_ROW_KEYMAP
+                       CROS_STD_MAIN_KEYMAP
                >;
        };
 };
index 7a1e531..f28a96f 100644 (file)
        pinctrl-0 = <&pinctrl_usdhc2>;
        cd-gpios = <&gpio1 4 GPIO_ACTIVE_LOW>;
        wp-gpios = <&gpio1 2 GPIO_ACTIVE_HIGH>;
+       vmmc-supply = <&vdd_sd1_reg>;
        status = "disabled";
 };
 
                     &pinctrl_usdhc3_cdwp>;
        cd-gpios = <&gpio1 27 GPIO_ACTIVE_LOW>;
        wp-gpios = <&gpio1 29 GPIO_ACTIVE_HIGH>;
+       vmmc-supply = <&vdd_sd0_reg>;
        status = "disabled";
 };
index c593597..5a1e10d 100644 (file)
                        micrel,led-mode = <1>;
                        clocks = <&clks IMX6UL_CLK_ENET_REF>;
                        clock-names = "rmii-ref";
-                       reset-gpios = <&gpio_spi 1 GPIO_ACTIVE_LOW>;
-                       reset-assert-us = <10000>;
-                       reset-deassert-us = <100>;
 
                };
 
                        micrel,led-mode = <1>;
                        clocks = <&clks IMX6UL_CLK_ENET2_REF>;
                        clock-names = "rmii-ref";
-                       reset-gpios = <&gpio_spi 2 GPIO_ACTIVE_LOW>;
-                       reset-assert-us = <10000>;
-                       reset-deassert-us = <100>;
                };
        };
 };
        status = "okay";
 };
 
+&gpio_spi {
+       eth0-phy-hog {
+               gpio-hog;
+               gpios = <1 GPIO_ACTIVE_HIGH>;
+               output-high;
+               line-name = "eth0-phy";
+       };
+
+       eth1-phy-hog {
+               gpio-hog;
+               gpios = <2 GPIO_ACTIVE_HIGH>;
+               output-high;
+               line-name = "eth1-phy";
+       };
+};
+
 &i2c1 {
        clock-frequency = <100000>;
        pinctrl-names = "default";
index ecbb2cc..79cc457 100644 (file)
@@ -14,5 +14,6 @@
 };
 
 &gpmi {
+       fsl,use-minimum-ecc;
        status = "okay";
 };
index 72e4f64..4a9f949 100644 (file)
                i2c1 = &i2c2;
                i2c2 = &i2c3;
                i2c3 = &i2c4;
+               mmc0 = &mmc1;
+               mmc1 = &mmc2;
+               mmc2 = &mmc3;
+               mmc3 = &mmc4;
+               mmc4 = &mmc5;
                serial0 = &uart1;
                serial1 = &uart2;
                serial2 = &uart3;
index 5328685..1f1c04d 100644 (file)
                ti,max-div = <2>;
        };
 
-       sha2md5_fck: sha2md5_fck@15c8 {
-               #clock-cells = <0>;
-               compatible = "ti,gate-clock";
-               clocks = <&l3_div_ck>;
-               ti,bit-shift = <1>;
-               reg = <0x15c8>;
-       };
-
        usb_phy_cm_clk32k: usb_phy_cm_clk32k@640 {
                #clock-cells = <0>;
                compatible = "ti,gate-clock";
index e025b7c..ee821d0 100644 (file)
                i2c2 = &i2c3;
                i2c3 = &i2c4;
                i2c4 = &i2c5;
+               mmc0 = &mmc1;
+               mmc1 = &mmc2;
+               mmc2 = &mmc3;
+               mmc3 = &mmc4;
+               mmc4 = &mmc5;
                serial0 = &uart1;
                serial1 = &uart2;
                serial2 = &uart3;
index 84066c1..ec45ced 100644 (file)
                                compatible = "microchip,sam9x60-pinctrl", "atmel,at91sam9x5-pinctrl", "atmel,at91rm9200-pinctrl", "simple-bus";
                                ranges = <0xfffff400 0xfffff400 0x800>;
 
+                               /* mux-mask corresponding to sam9x60 SoC in TFBGA228L package */
+                               atmel,mux-mask = <
+                                                /*     A       B       C       */
+                                                0xffffffff 0xffe03fff 0xef00019d       /* pioA */
+                                                0x03ffffff 0x02fc7e7f 0x00780000       /* pioB */
+                                                0xffffffff 0xffffffff 0xf83fffff       /* pioC */
+                                                0x003fffff 0x003f8000 0x00000000       /* pioD */
+                                                >;
+
                                pioA: gpio@fffff400 {
                                        compatible = "microchip,sam9x60-gpio", "atmel,at91sam9x5-gpio", "atmel,at91rm9200-gpio";
                                        reg = <0xfffff400 0x200>;
index 1ef2bc4..383c632 100644 (file)
@@ -176,7 +176,6 @@ CONFIG_BOOT_PRINTK_DELAY=y
 CONFIG_DYNAMIC_DEBUG=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_LOCKUP_DETECTOR=y
 CONFIG_SCHED_TRACER=y
index a9c6f32..ca32446 100644 (file)
@@ -164,7 +164,6 @@ CONFIG_FONTS=y
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 CONFIG_FRAME_WARN=2048
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_SOFTLOCKUP_DETECTOR=y
index ee3aee6..5199a2b 100644 (file)
@@ -243,7 +243,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
        thread->cpu_domain = get_domain();
 #endif
 
-       if (likely(!(p->flags & PF_KTHREAD))) {
+       if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
                *childregs = *current_pt_regs();
                childregs->ARM_r0 = 0;
                if (stack_start)
index 322caa2..21bce40 100644 (file)
@@ -7,6 +7,7 @@
 #include <linux/module.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
+#include <linux/irqchip.h>
 #include <linux/io.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
@@ -162,7 +163,7 @@ static void __exception_irq_entry avic_handle_irq(struct pt_regs *regs)
  * interrupts. It registers the interrupt enable and disable functions
  * to the kernel for each interrupt source.
  */
-void __init mxc_init_irq(void __iomem *irqbase)
+static void __init mxc_init_irq(void __iomem *irqbase)
 {
        struct device_node *np;
        int irq_base;
@@ -220,3 +221,16 @@ void __init mxc_init_irq(void __iomem *irqbase)
 
        printk(KERN_INFO "MXC IRQ initialized\n");
 }
+
+static int __init imx_avic_init(struct device_node *node,
+                              struct device_node *parent)
+{
+       void __iomem *avic_base;
+
+       avic_base = of_iomap(node, 0);
+       BUG_ON(!avic_base);
+       mxc_init_irq(avic_base);
+       return 0;
+}
+
+IRQCHIP_DECLARE(imx_avic, "fsl,avic", imx_avic_init);
index 2b004cc..474dedb 100644 (file)
@@ -22,7 +22,6 @@ void mx35_map_io(void);
 void imx21_init_early(void);
 void imx31_init_early(void);
 void imx35_init_early(void);
-void mxc_init_irq(void __iomem *);
 void mx31_init_irq(void);
 void mx35_init_irq(void);
 void mxc_set_cpu_type(unsigned int type);
index 32df3b8..8eca92d 100644 (file)
@@ -17,16 +17,6 @@ static void __init imx1_init_early(void)
        mxc_set_cpu_type(MXC_CPU_MX1);
 }
 
-static void __init imx1_init_irq(void)
-{
-       void __iomem *avic_addr;
-
-       avic_addr = ioremap(MX1_AVIC_ADDR, SZ_4K);
-       WARN_ON(!avic_addr);
-
-       mxc_init_irq(avic_addr);
-}
-
 static const char * const imx1_dt_board_compat[] __initconst = {
        "fsl,imx1",
        NULL
@@ -34,7 +24,6 @@ static const char * const imx1_dt_board_compat[] __initconst = {
 
 DT_MACHINE_START(IMX1_DT, "Freescale i.MX1 (Device Tree Support)")
        .init_early     = imx1_init_early,
-       .init_irq       = imx1_init_irq,
        .dt_compat      = imx1_dt_board_compat,
        .restart        = mxc_restart,
 MACHINE_END
index 95de48a..51927bd 100644 (file)
@@ -22,17 +22,6 @@ static void __init imx25_dt_init(void)
        imx_aips_allow_unprivileged_access("fsl,imx25-aips");
 }
 
-static void __init mx25_init_irq(void)
-{
-       struct device_node *np;
-       void __iomem *avic_base;
-
-       np = of_find_compatible_node(NULL, NULL, "fsl,avic");
-       avic_base = of_iomap(np, 0);
-       BUG_ON(!avic_base);
-       mxc_init_irq(avic_base);
-}
-
 static const char * const imx25_dt_board_compat[] __initconst = {
        "fsl,imx25",
        NULL
@@ -42,6 +31,5 @@ DT_MACHINE_START(IMX25_DT, "Freescale i.MX25 (Device Tree Support)")
        .init_early     = imx25_init_early,
        .init_machine   = imx25_dt_init,
        .init_late      = imx25_pm_init,
-       .init_irq       = mx25_init_irq,
        .dt_compat      = imx25_dt_board_compat,
 MACHINE_END
index 262422a..e325c94 100644 (file)
@@ -56,17 +56,6 @@ static void __init imx27_init_early(void)
        mxc_set_cpu_type(MXC_CPU_MX27);
 }
 
-static void __init mx27_init_irq(void)
-{
-       void __iomem *avic_base;
-       struct device_node *np;
-
-       np = of_find_compatible_node(NULL, NULL, "fsl,avic");
-       avic_base = of_iomap(np, 0);
-       BUG_ON(!avic_base);
-       mxc_init_irq(avic_base);
-}
-
 static const char * const imx27_dt_board_compat[] __initconst = {
        "fsl,imx27",
        NULL
@@ -75,7 +64,6 @@ static const char * const imx27_dt_board_compat[] __initconst = {
 DT_MACHINE_START(IMX27_DT, "Freescale i.MX27 (Device Tree Support)")
        .map_io         = mx27_map_io,
        .init_early     = imx27_init_early,
-       .init_irq       = mx27_init_irq,
        .init_late      = imx27_pm_init,
        .dt_compat      = imx27_dt_board_compat,
 MACHINE_END
index dc69dfe..e9a1092 100644 (file)
@@ -14,6 +14,5 @@ static const char * const imx31_dt_board_compat[] __initconst = {
 DT_MACHINE_START(IMX31_DT, "Freescale i.MX31 (Device Tree Support)")
        .map_io         = mx31_map_io,
        .init_early     = imx31_init_early,
-       .init_irq       = mx31_init_irq,
        .dt_compat      = imx31_dt_board_compat,
 MACHINE_END
index ec5c306..0fc0821 100644 (file)
@@ -27,6 +27,5 @@ DT_MACHINE_START(IMX35_DT, "Freescale i.MX35 (Device Tree Support)")
        .l2c_aux_mask   = ~0,
        .map_io         = mx35_map_io,
        .init_early     = imx35_init_early,
-       .init_irq       = mx35_init_irq,
        .dt_compat      = imx35_dt_board_compat,
 MACHINE_END
index 5056438..28db972 100644 (file)
@@ -109,18 +109,6 @@ void __init imx31_init_early(void)
        mx3_ccm_base = of_iomap(np, 0);
        BUG_ON(!mx3_ccm_base);
 }
-
-void __init mx31_init_irq(void)
-{
-       void __iomem *avic_base;
-       struct device_node *np;
-
-       np = of_find_compatible_node(NULL, NULL, "fsl,imx31-avic");
-       avic_base = of_iomap(np, 0);
-       BUG_ON(!avic_base);
-
-       mxc_init_irq(avic_base);
-}
 #endif /* ifdef CONFIG_SOC_IMX31 */
 
 #ifdef CONFIG_SOC_IMX35
@@ -158,16 +146,4 @@ void __init imx35_init_early(void)
        mx3_ccm_base = of_iomap(np, 0);
        BUG_ON(!mx3_ccm_base);
 }
-
-void __init mx35_init_irq(void)
-{
-       void __iomem *avic_base;
-       struct device_node *np;
-
-       np = of_find_compatible_node(NULL, NULL, "fsl,imx35-avic");
-       avic_base = of_iomap(np, 0);
-       BUG_ON(!avic_base);
-
-       mxc_init_irq(avic_base);
-}
 #endif /* ifdef CONFIG_SOC_IMX35 */
index cd711bf..2c647bd 100644 (file)
@@ -65,7 +65,7 @@ static void __init keystone_init(void)
 static long long __init keystone_pv_fixup(void)
 {
        long long offset;
-       phys_addr_t mem_start, mem_end;
+       u64 mem_start, mem_end;
 
        mem_start = memblock_start_of_DRAM();
        mem_end = memblock_end_of_DRAM();
@@ -78,7 +78,7 @@ static long long __init keystone_pv_fixup(void)
        if (mem_start < KEYSTONE_HIGH_PHYS_START ||
            mem_end   > KEYSTONE_HIGH_PHYS_END) {
                pr_crit("Invalid address space for memory (%08llx-%08llx)\n",
-                       (u64)mem_start, (u64)mem_end);
+                       mem_start, mem_end);
                return 0;
        }
 
index 14a6c3e..f745a65 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/platform_data/gpio-omap.h>
 
 #include <asm/assembler.h>
+#include <asm/irq.h>
 
 #include "ams-delta-fiq.h"
 #include "board-ams-delta.h"
index f70d561..0659ab4 100644 (file)
@@ -9,6 +9,7 @@
  */
 
 #include <linux/arm-smccc.h>
+#include <linux/cpu_pm.h>
 #include <linux/kernel.h>
 #include <linux/init.h>
 #include <linux/io.h>
@@ -20,6 +21,7 @@
 
 #include "common.h"
 #include "omap-secure.h"
+#include "soc.h"
 
 static phys_addr_t omap_secure_memblock_base;
 
@@ -213,3 +215,40 @@ void __init omap_secure_init(void)
 {
        omap_optee_init_check();
 }
+
+/*
+ * Dummy dispatcher call after core OSWR and MPU off. Updates the ROM return
+ * address after MMU has been re-enabled after CPU1 has been woken up again.
+ * Otherwise the ROM code will attempt to use the earlier physical return
+ * address that got set with MMU off when waking up CPU1. Only used on secure
+ * devices.
+ */
+static int cpu_notifier(struct notifier_block *nb, unsigned long cmd, void *v)
+{
+       switch (cmd) {
+       case CPU_CLUSTER_PM_EXIT:
+               omap_secure_dispatcher(OMAP4_PPA_SERVICE_0,
+                                      FLAG_START_CRITICAL,
+                                      0, 0, 0, 0, 0);
+               break;
+       default:
+               break;
+       }
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block secure_notifier_block = {
+       .notifier_call = cpu_notifier,
+};
+
+static int __init secure_pm_init(void)
+{
+       if (omap_type() == OMAP2_DEVICE_TYPE_GP || !soc_is_omap44xx())
+               return 0;
+
+       cpu_pm_register_notifier(&secure_notifier_block);
+
+       return 0;
+}
+omap_arch_initcall(secure_pm_init);
index 4aaa957..172069f 100644 (file)
@@ -50,6 +50,7 @@
 #define OMAP5_DRA7_MON_SET_ACR_INDEX   0x107
 
 /* Secure PPA(Primary Protected Application) APIs */
+#define OMAP4_PPA_SERVICE_0            0x21
 #define OMAP4_PPA_L2_POR_INDEX         0x23
 #define OMAP4_PPA_CPU_ACTRL_SMP_INDEX  0x25
 
index 09076ad..668dc84 100644 (file)
@@ -246,10 +246,10 @@ int __init omap4_cpcap_init(void)
        omap_voltage_register_pmic(voltdm, &omap443x_max8952_mpu);
 
        if (of_machine_is_compatible("motorola,droid-bionic")) {
-               voltdm = voltdm_lookup("mpu");
+               voltdm = voltdm_lookup("core");
                omap_voltage_register_pmic(voltdm, &omap_cpcap_core);
 
-               voltdm = voltdm_lookup("mpu");
+               voltdm = voltdm_lookup("iva");
                omap_voltage_register_pmic(voltdm, &omap_cpcap_iva);
        } else {
                voltdm = voltdm_lookup("core");
index 62df666..17b66f0 100644 (file)
@@ -88,34 +88,26 @@ static void __init sr_set_nvalues(struct omap_volt_data *volt_data,
 
 extern struct omap_sr_data omap_sr_pdata[];
 
-static int __init sr_dev_init(struct omap_hwmod *oh, void *user)
+static int __init sr_init_by_name(const char *name, const char *voltdm)
 {
        struct omap_sr_data *sr_data = NULL;
        struct omap_volt_data *volt_data;
-       struct omap_smartreflex_dev_attr *sr_dev_attr;
        static int i;
 
-       if (!strncmp(oh->name, "smartreflex_mpu_iva", 20) ||
-           !strncmp(oh->name, "smartreflex_mpu", 16))
+       if (!strncmp(name, "smartreflex_mpu_iva", 20) ||
+           !strncmp(name, "smartreflex_mpu", 16))
                sr_data = &omap_sr_pdata[OMAP_SR_MPU];
-       else if (!strncmp(oh->name, "smartreflex_core", 17))
+       else if (!strncmp(name, "smartreflex_core", 17))
                sr_data = &omap_sr_pdata[OMAP_SR_CORE];
-       else if (!strncmp(oh->name, "smartreflex_iva", 16))
+       else if (!strncmp(name, "smartreflex_iva", 16))
                sr_data = &omap_sr_pdata[OMAP_SR_IVA];
 
        if (!sr_data) {
-               pr_err("%s: Unknown instance %s\n", __func__, oh->name);
+               pr_err("%s: Unknown instance %s\n", __func__, name);
                return -EINVAL;
        }
 
-       sr_dev_attr = (struct omap_smartreflex_dev_attr *)oh->dev_attr;
-       if (!sr_dev_attr || !sr_dev_attr->sensor_voltdm_name) {
-               pr_err("%s: No voltage domain specified for %s. Cannot initialize\n",
-                      __func__, oh->name);
-               goto exit;
-       }
-
-       sr_data->name = oh->name;
+       sr_data->name = name;
        if (cpu_is_omap343x())
                sr_data->ip_type = 1;
        else
@@ -136,10 +128,10 @@ static int __init sr_dev_init(struct omap_hwmod *oh, void *user)
                }
        }
 
-       sr_data->voltdm = voltdm_lookup(sr_dev_attr->sensor_voltdm_name);
+       sr_data->voltdm = voltdm_lookup(voltdm);
        if (!sr_data->voltdm) {
                pr_err("%s: Unable to get voltage domain pointer for VDD %s\n",
-                       __func__, sr_dev_attr->sensor_voltdm_name);
+                       __func__, voltdm);
                goto exit;
        }
 
@@ -160,6 +152,20 @@ exit:
        return 0;
 }
 
+static int __init sr_dev_init(struct omap_hwmod *oh, void *user)
+{
+       struct omap_smartreflex_dev_attr *sr_dev_attr;
+
+       sr_dev_attr = (struct omap_smartreflex_dev_attr *)oh->dev_attr;
+       if (!sr_dev_attr || !sr_dev_attr->sensor_voltdm_name) {
+               pr_err("%s: No voltage domain specified for %s. Cannot initialize\n",
+                      __func__, oh->name);
+               return 0;
+       }
+
+       return sr_init_by_name(oh->name, sr_dev_attr->sensor_voltdm_name);
+}
+
 /*
  * API to be called from board files to enable smartreflex
  * autocompensation at init.
@@ -169,7 +175,42 @@ void __init omap_enable_smartreflex_on_init(void)
        sr_enable_on_init = true;
 }
 
+static const char * const omap4_sr_instances[] = {
+       "mpu",
+       "iva",
+       "core",
+};
+
+static const char * const dra7_sr_instances[] = {
+       "mpu",
+       "core",
+};
+
 int __init omap_devinit_smartreflex(void)
 {
+       const char * const *sr_inst;
+       int i, nr_sr = 0;
+
+       if (soc_is_omap44xx()) {
+               sr_inst = omap4_sr_instances;
+               nr_sr = ARRAY_SIZE(omap4_sr_instances);
+
+       } else if (soc_is_dra7xx()) {
+               sr_inst = dra7_sr_instances;
+               nr_sr = ARRAY_SIZE(dra7_sr_instances);
+       }
+
+       if (nr_sr) {
+               const char *name, *voltdm;
+
+               for (i = 0; i < nr_sr; i++) {
+                       name = kasprintf(GFP_KERNEL, "smartreflex_%s", sr_inst[i]);
+                       voltdm = sr_inst[i];
+                       sr_init_by_name(name, voltdm);
+               }
+
+               return 0;
+       }
+
        return omap_hwmod_for_each_by_class("smartreflex", sr_dev_init, NULL);
 }
index d1010ec..d237bd0 100644 (file)
@@ -502,16 +502,20 @@ static inline void mainstone_init_keypad(void) {}
 #endif
 
 static int mst_pcmcia0_irqs[11] = {
-       [0 ... 10] = -1,
+       [0 ... 4] = -1,
        [5] = MAINSTONE_S0_CD_IRQ,
+       [6 ... 7] = -1,
        [8] = MAINSTONE_S0_STSCHG_IRQ,
+       [9] = -1,
        [10] = MAINSTONE_S0_IRQ,
 };
 
 static int mst_pcmcia1_irqs[11] = {
-       [0 ... 10] = -1,
+       [0 ... 4] = -1,
        [5] = MAINSTONE_S1_CD_IRQ,
+       [6 ... 7] = -1,
        [8] = MAINSTONE_S1_STSCHG_IRQ,
+       [9] = -1,
        [10] = MAINSTONE_S1_IRQ,
 };
 
index 27d8beb..3654f97 100644 (file)
@@ -11,7 +11,7 @@ uapi := $(gen)/uapi/asm
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 sysnr := $(srctree)/$(src)/syscallnr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 
 gen-y := $(gen)/calls-oabi.S
 gen-y += $(gen)/calls-eabi.S
index 20e1170..dcc1191 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index fd6e3aa..84a1cea 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <xen/xen.h>
 #include <xen/interface/memory.h>
+#include <xen/grant_table.h>
 #include <xen/page.h>
 #include <xen/swiotlb-xen.h>
 
@@ -93,17 +94,43 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
        int i;
 
        for (i = 0; i < count; i++) {
+               struct gnttab_unmap_grant_ref unmap;
+               int rc;
+
                if (map_ops[i].status)
                        continue;
-               if (unlikely(!set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
-                                   map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT))) {
-                       return -ENOMEM;
-               }
+               if (likely(set_phys_to_machine(map_ops[i].host_addr >> XEN_PAGE_SHIFT,
+                                   map_ops[i].dev_bus_addr >> XEN_PAGE_SHIFT)))
+                       continue;
+
+               /*
+                * Signal an error for this slot. This in turn requires
+                * immediate unmapping.
+                */
+               map_ops[i].status = GNTST_general_error;
+               unmap.host_addr = map_ops[i].host_addr,
+               unmap.handle = map_ops[i].handle;
+               map_ops[i].handle = INVALID_GRANT_HANDLE;
+               if (map_ops[i].flags & GNTMAP_device_map)
+                       unmap.dev_bus_addr = map_ops[i].dev_bus_addr;
+               else
+                       unmap.dev_bus_addr = 0;
+
+               /*
+                * Pre-populate the status field, to be recognizable in
+                * the log message below.
+                */
+               unmap.status = 1;
+
+               rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                              &unmap, 1);
+               if (rc || unmap.status != GNTST_okay)
+                       pr_err_once("gnttab unmap failed: rc=%d st=%d\n",
+                                   rc, unmap.status);
        }
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
 
 int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
                              struct gnttab_unmap_grant_ref *kunmap_ops,
@@ -118,7 +145,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 
        return 0;
 }
-EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
 
 bool __set_phys_to_machine_multi(unsigned long pfn,
                unsigned long mfn, unsigned long nr_pages)
index 1b8cecd..e4e1b65 100644 (file)
@@ -73,6 +73,8 @@ config ARM64
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
        select ARCH_SUPPORTS_MEMORY_FAILURE
        select ARCH_SUPPORTS_SHADOW_CALL_STACK if CC_HAVE_SHADOW_CALL_STACK
+       select ARCH_SUPPORTS_LTO_CLANG if CPU_LITTLE_ENDIAN
+       select ARCH_SUPPORTS_LTO_CLANG_THIN
        select ARCH_SUPPORTS_ATOMIC_RMW
        select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG)
        select ARCH_SUPPORTS_NUMA_BALANCING
@@ -138,6 +140,7 @@ config ARM64
        select HAVE_ARCH_KASAN if !(ARM64_16K_PAGES && ARM64_VA_BITS_48)
        select HAVE_ARCH_KASAN_SW_TAGS if HAVE_ARCH_KASAN
        select HAVE_ARCH_KASAN_HW_TAGS if (HAVE_ARCH_KASAN && ARM64_MTE)
+       select HAVE_ARCH_KFENCE
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_MMAP_RND_BITS
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS if COMPAT
@@ -162,6 +165,8 @@ config ARM64
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS \
                if $(cc-option,-fpatchable-function-entry=2)
+       select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY \
+               if DYNAMIC_FTRACE_WITH_REGS
        select HAVE_EFFICIENT_UNALIGNED_ACCESS
        select HAVE_FAST_GUP
        select HAVE_FTRACE_MCOUNT_RECORD
@@ -805,6 +810,16 @@ config QCOM_FALKOR_ERRATUM_E1041
 
          If unsure, say Y.
 
+config NVIDIA_CARMEL_CNP_ERRATUM
+       bool "NVIDIA Carmel CNP: CNP on Carmel semantically different than ARM cores"
+       default y
+       help
+         If CNP is enabled on Carmel cores, non-sharable TLBIs on a core will not
+         invalidate shared TLB entries installed by a different core, as it would
+         on standard ARM cores.
+
+         If unsure, say Y.
+
 config SOCIONEXT_SYNQUACER_PREITS
        bool "Socionext Synquacer: Workaround for GICv3 pre-ITS"
        default y
@@ -994,6 +1009,7 @@ config HOTPLUG_CPU
 # Common NUMA Features
 config NUMA
        bool "NUMA Memory Allocation and Scheduler Support"
+       select GENERIC_ARCH_NUMA
        select ACPI_NUMA if ACPI
        select OF_NUMA
        help
@@ -1049,8 +1065,6 @@ config HW_PERF_EVENTS
 config SYS_SUPPORTS_HUGETLBFS
        def_bool y
 
-config ARCH_WANT_HUGE_PMD_SHARE
-
 config ARCH_HAS_CACHE_LINE_SIZE
        def_bool y
 
@@ -1151,8 +1165,8 @@ config XEN
 
 config FORCE_MAX_ZONEORDER
        int
-       default "14" if (ARM64_64K_PAGES && TRANSPARENT_HUGEPAGE)
-       default "12" if (ARM64_16K_PAGES && TRANSPARENT_HUGEPAGE)
+       default "14" if ARM64_64K_PAGES
+       default "12" if ARM64_16K_PAGES
        default "11"
        help
          The kernel memory allocator divides physically contiguous memory
@@ -1481,7 +1495,7 @@ config ARM64_PTR_AUTH
        depends on (CC_HAS_SIGN_RETURN_ADDRESS || CC_HAS_BRANCH_PROT_PAC_RET) && AS_HAS_PAC
        # Modern compilers insert a .note.gnu.property section note for PAC
        # which is only understood by binutils starting with version 2.33.1.
-       depends on LD_IS_LLD || LD_VERSION >= 233010000 || (CC_IS_GCC && GCC_VERSION < 90100)
+       depends on LD_IS_LLD || LD_VERSION >= 23301 || (CC_IS_GCC && GCC_VERSION < 90100)
        depends on !CC_IS_CLANG || AS_HAS_CFI_NEGATE_RA_STATE
        depends on (!FUNCTION_GRAPH_TRACER || DYNAMIC_FTRACE_WITH_REGS)
        help
@@ -1849,12 +1863,6 @@ config CMDLINE_FROM_BOOTLOADER
          the boot loader doesn't provide any, the default kernel command
          string provided in CMDLINE will be used.
 
-config CMDLINE_EXTEND
-       bool "Extend bootloader kernel arguments"
-       help
-         The command-line arguments provided by the boot loader will be
-         appended to the default kernel command string.
-
 config CMDLINE_FORCE
        bool "Always use the default kernel command string"
        help
index 7de6b37..9058cfa 100644 (file)
                        ranges = <0x0 0x00 0x1700000 0x100000>;
                        reg = <0x00 0x1700000 0x0 0x100000>;
                        interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       dma-coherent;
 
                        sec_jr0: jr@10000 {
                                compatible = "fsl,sec-v5.4-job-ring",
index 5a8a1dc..28c51e5 100644 (file)
                        ranges = <0x0 0x00 0x1700000 0x100000>;
                        reg = <0x00 0x1700000 0x0 0x100000>;
                        interrupts = <0 75 0x4>;
+                       dma-coherent;
 
                        sec_jr0: jr@10000 {
                                compatible = "fsl,sec-v5.4-job-ring",
index 1d6dfd1..3945830 100644 (file)
                        ranges = <0x0 0x00 0x1700000 0x100000>;
                        reg = <0x00 0x1700000 0x0 0x100000>;
                        interrupts = <GIC_SPI 75 IRQ_TYPE_LEVEL_HIGH>;
+                       dma-coherent;
 
                        sec_jr0: jr@10000 {
                                compatible = "fsl,sec-v5.4-job-ring",
index 5ccc4cc..a003e6a 100644 (file)
 #define MX8MM_IOMUXC_SD1_CMD_USDHC1_CMD                                     0x0A4 0x30C 0x000 0x0 0x0
 #define MX8MM_IOMUXC_SD1_CMD_GPIO2_IO1                                      0x0A4 0x30C 0x000 0x5 0x0
 #define MX8MM_IOMUXC_SD1_DATA0_USDHC1_DATA0                                 0x0A8 0x310 0x000 0x0 0x0
-#define MX8MM_IOMUXC_SD1_DATA0_GPIO2_IO2                                    0x0A8 0x31  0x000 0x5 0x0
+#define MX8MM_IOMUXC_SD1_DATA0_GPIO2_IO2                                    0x0A8 0x310 0x000 0x5 0x0
 #define MX8MM_IOMUXC_SD1_DATA1_USDHC1_DATA1                                 0x0AC 0x314 0x000 0x0 0x0
 #define MX8MM_IOMUXC_SD1_DATA1_GPIO2_IO3                                    0x0AC 0x314 0x000 0x5 0x0
 #define MX8MM_IOMUXC_SD1_DATA2_USDHC1_DATA2                                 0x0B0 0x318 0x000 0x0 0x0
index 0e1a6d9..122c95d 100644 (file)
@@ -35,7 +35,7 @@
 
 &i2c2 {
        clock-frequency = <400000>;
-       pinctrl-names = "default";
+       pinctrl-names = "default", "gpio";
        pinctrl-0 = <&pinctrl_i2c2>;
        pinctrl-1 = <&pinctrl_i2c2_gpio>;
        sda-gpios = <&gpio5 17 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
index 44a8c23..f3965ec 100644 (file)
@@ -67,7 +67,7 @@
 
 &i2c1 {
        clock-frequency = <400000>;
-       pinctrl-names = "default";
+       pinctrl-names = "default", "gpio";
        pinctrl-0 = <&pinctrl_i2c1>;
        pinctrl-1 = <&pinctrl_i2c1_gpio>;
        sda-gpios = <&gpio5 15 (GPIO_ACTIVE_HIGH | GPIO_OPEN_DRAIN)>;
index b94b020..68e8fa1 100644 (file)
 #define MX8MQ_IOMUXC_SD1_CMD_USDHC1_CMD                                     0x0A4 0x30C 0x000 0x0 0x0
 #define MX8MQ_IOMUXC_SD1_CMD_GPIO2_IO1                                      0x0A4 0x30C 0x000 0x5 0x0
 #define MX8MQ_IOMUXC_SD1_DATA0_USDHC1_DATA0                                 0x0A8 0x310 0x000 0x0 0x0
-#define MX8MQ_IOMUXC_SD1_DATA0_GPIO2_IO2                                    0x0A8 0x31  0x000 0x5 0x0
+#define MX8MQ_IOMUXC_SD1_DATA0_GPIO2_IO2                                    0x0A8 0x310 0x000 0x5 0x0
 #define MX8MQ_IOMUXC_SD1_DATA1_USDHC1_DATA1                                 0x0AC 0x314 0x000 0x0 0x0
 #define MX8MQ_IOMUXC_SD1_DATA1_GPIO2_IO3                                    0x0AC 0x314 0x000 0x5 0x0
 #define MX8MQ_IOMUXC_SD1_DATA2_USDHC1_DATA2                                 0x0B0 0x318 0x000 0x0 0x0
index 64179a3..c6f5df2 100644 (file)
                };
 
                CP11X_LABEL(sata0): sata@540000 {
-                       compatible = "marvell,armada-8k-ahci";
+                       compatible = "marvell,armada-8k-ahci",
+                       "generic-ahci";
                        reg = <0x540000 0x30000>;
                        dma-coherent;
+                       interrupts = <107 IRQ_TYPE_LEVEL_HIGH>;
                        clocks = <&CP11X_LABEL(clk) 1 15>,
                                 <&CP11X_LABEL(clk) 1 16>;
                        #address-cells = <1>;
                        status = "disabled";
 
                        sata-port@0 {
-                               interrupts = <109 IRQ_TYPE_LEVEL_HIGH>;
                                reg = <0>;
                        };
 
                        sata-port@1 {
-                               interrupts = <107 IRQ_TYPE_LEVEL_HIGH>;
                                reg = <1>;
                        };
                };
index 43c4280..d612f63 100644 (file)
@@ -770,7 +770,8 @@ CONFIG_SND_SOC_LPASS_VA_MACRO=m
 CONFIG_SND_SIMPLE_CARD=m
 CONFIG_SND_AUDIO_GRAPH_CARD=m
 CONFIG_HID_MULTITOUCH=m
-CONFIG_I2C_HID=m
+CONFIG_I2C_HID_ACPI=m
+CONFIG_I2C_HID_OF=m
 CONFIG_USB_CONN_GPIO=m
 CONFIG_USB=y
 CONFIG_USB_OTG=y
index 77cbbe3..a074459 100644 (file)
@@ -6,7 +6,6 @@
 #define __ASM_CACHE_H
 
 #include <asm/cputype.h>
-#include <asm/mte-kasan.h>
 
 #define CTR_L1IP_SHIFT         14
 #define CTR_L1IP_MASK          3
index 93a161b..dc52b73 100644 (file)
@@ -37,7 +37,7 @@ static inline __sum16 ip_fast_csum(const void *iph, unsigned int ihl)
        } while (--n > 0);
 
        sum += ((sum >> 32) | (sum << 32));
-       return csum_fold((__force u32)(sum >> 32));
+       return csum_fold((__force __wsum)(sum >> 32));
 }
 #define ip_fast_csum ip_fast_csum
 
index b77d997..c40f249 100644 (file)
@@ -66,7 +66,8 @@
 #define ARM64_WORKAROUND_1508412               58
 #define ARM64_HAS_LDAPR                                59
 #define ARM64_KVM_PROTECTED_MODE               60
+#define ARM64_WORKAROUND_NVIDIA_CARMEL_CNP     61
 
-#define ARM64_NCAPS                            61
+#define ARM64_NCAPS                            62
 
 #endif /* __ASM_CPUCAPS_H */
index 0aaf904..12d5f47 100644 (file)
@@ -6,6 +6,7 @@
 
 #include <linux/linkage.h>
 #include <asm/memory.h>
+#include <asm/mte-kasan.h>
 #include <asm/pgtable-types.h>
 
 #define arch_kasan_set_tag(addr, tag)  __tag_set(addr, tag)
diff --git a/arch/arm64/include/asm/kfence.h b/arch/arm64/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..d061176
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * arm64 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef __ASM_KFENCE_H
+#define __ASM_KFENCE_H
+
+#include <asm/cacheflush.h>
+
+static inline bool arch_kfence_init_pool(void) { return true; }
+
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       set_memory_valid(addr, 1, !protect);
+
+       return true;
+}
+
+#endif /* __ASM_KFENCE_H */
index 4e90c2d..94d4025 100644 (file)
 #define CPTR_EL2_DEFAULT       CPTR_EL2_RES1
 
 /* Hyp Debug Configuration Register bits */
+#define MDCR_EL2_TTRF          (1 << 19)
 #define MDCR_EL2_TPMS          (1 << 14)
 #define MDCR_EL2_E2PB_MASK     (UL(0x3))
 #define MDCR_EL2_E2PB_SHIFT    (UL(12))
index 22d933e..a7ab84f 100644 (file)
 #define __KVM_HOST_SMCCC_FUNC___kvm_flush_vm_context           2
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid_ipa         3
 #define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_vmid             4
-#define __KVM_HOST_SMCCC_FUNC___kvm_tlb_flush_local_vmid       5
+#define __KVM_HOST_SMCCC_FUNC___kvm_flush_cpu_context          5
 #define __KVM_HOST_SMCCC_FUNC___kvm_timer_set_cntvoff          6
 #define __KVM_HOST_SMCCC_FUNC___kvm_enable_ssbs                        7
-#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_ich_vtr_el2                8
+#define __KVM_HOST_SMCCC_FUNC___vgic_v3_get_gic_config         8
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_read_vmcr              9
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_write_vmcr             10
 #define __KVM_HOST_SMCCC_FUNC___vgic_v3_init_lrs               11
@@ -183,16 +183,16 @@ DECLARE_KVM_HYP_SYM(__bp_harden_hyp_vecs);
 #define __bp_harden_hyp_vecs   CHOOSE_HYP_SYM(__bp_harden_hyp_vecs)
 
 extern void __kvm_flush_vm_context(void);
+extern void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu);
 extern void __kvm_tlb_flush_vmid_ipa(struct kvm_s2_mmu *mmu, phys_addr_t ipa,
                                     int level);
 extern void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu);
-extern void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu);
 
 extern void __kvm_timer_set_cntvoff(u64 cntvoff);
 
 extern int __kvm_vcpu_run(struct kvm_vcpu *vcpu);
 
-extern u64 __vgic_v3_get_ich_vtr_el2(void);
+extern u64 __vgic_v3_get_gic_config(void);
 extern u64 __vgic_v3_read_vmcr(void);
 extern void __vgic_v3_write_vmcr(u32 vmcr);
 extern void __vgic_v3_init_lrs(void);
index c045082..32ae676 100644 (file)
@@ -83,6 +83,11 @@ void sysreg_restore_guest_state_vhe(struct kvm_cpu_context *ctxt);
 void __debug_switch_to_guest(struct kvm_vcpu *vcpu);
 void __debug_switch_to_host(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_NVHE_HYPERVISOR__
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu);
+#endif
+
 void __fpsimd_save_state(struct user_fpsimd_state *fp_regs);
 void __fpsimd_restore_state(struct user_fpsimd_state *fp_regs);
 
@@ -97,7 +102,8 @@ bool kvm_host_psci_handler(struct kvm_cpu_context *host_ctxt);
 
 void __noreturn hyp_panic(void);
 #ifdef __KVM_NVHE_HYPERVISOR__
-void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
+void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+                              u64 elr, u64 par);
 #endif
 
 #endif /* __ARM64_KVM_HYP_H__ */
index bc09af2..0aabc3b 100644 (file)
@@ -244,6 +244,7 @@ static inline const void *__tag_set(const void *addr, u8 tag)
 
 #ifdef CONFIG_KASAN_HW_TAGS
 #define arch_enable_tagging()                  mte_enable_kernel()
+#define arch_set_tagging_report_once(state)    mte_set_report_once(state)
 #define arch_init_tags(max_tag)                        mte_init_tags(max_tag)
 #define arch_get_random_tag()                  mte_get_random_tag()
 #define arch_get_mem_tag(addr)                 mte_get_mem_tag(addr)
@@ -327,6 +328,11 @@ static inline void *phys_to_virt(phys_addr_t x)
 #define ARCH_PFN_OFFSET                ((unsigned long)PHYS_PFN_OFFSET)
 
 #if !defined(CONFIG_SPARSEMEM_VMEMMAP) || defined(CONFIG_DEBUG_VIRTUAL)
+#define page_to_virt(x)        ({                                              \
+       __typeof__(x) __page = x;                                       \
+       void *__addr = __va(page_to_phys(__page));                      \
+       (void *)__tag_set((const void *)__addr, page_kasan_tag(__page));\
+})
 #define virt_to_page(x)                pfn_to_page(virt_to_pfn(x))
 #else
 #define page_to_virt(x)        ({                                              \
index 70ce8c1..bd02e99 100644 (file)
@@ -63,23 +63,6 @@ static inline void cpu_switch_mm(pgd_t *pgd, struct mm_struct *mm)
 extern u64 idmap_t0sz;
 extern u64 idmap_ptrs_per_pgd;
 
-static inline bool __cpu_uses_extended_idmap(void)
-{
-       if (IS_ENABLED(CONFIG_ARM64_VA_BITS_52))
-               return false;
-
-       return unlikely(idmap_t0sz != TCR_T0SZ(VA_BITS));
-}
-
-/*
- * True if the extended ID map requires an extra level of translation table
- * to be configured.
- */
-static inline bool __cpu_uses_extended_idmap_level(void)
-{
-       return ARM64_HW_PGTABLE_LEVELS(64 - idmap_t0sz) > CONFIG_PGTABLE_LEVELS;
-}
-
 /*
  * Ensure TCR.T0SZ is set to the provided value.
  */
index 691f15a..8100456 100644 (file)
@@ -1,7 +1,7 @@
 #ifdef CONFIG_ARM64_MODULE_PLTS
 SECTIONS {
-       .plt (NOLOAD) : { BYTE(0) }
-       .init.plt (NOLOAD) : { BYTE(0) }
-       .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
+       .plt (NOLOAD) : { BYTE(0) }
+       .init.plt (NOLOAD) : { BYTE(0) }
+       .text.ftrace_trampoline (NOLOAD) : { BYTE(0) }
 }
 #endif
index 2d73a16..cf241b0 100644 (file)
@@ -11,4 +11,6 @@
 #define MTE_TAG_SIZE           4
 #define MTE_TAG_MASK           GENMASK((MTE_TAG_SHIFT + (MTE_TAG_SIZE - 1)), MTE_TAG_SHIFT)
 
+#define __MTE_PREAMBLE         ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
+
 #endif /* __ASM_MTE_DEF_H  */
index 26349a4..7ab500e 100644 (file)
 
 #include <linux/types.h>
 
+#ifdef CONFIG_ARM64_MTE
+
 /*
- * The functions below are meant to be used only for the
- * KASAN_HW_TAGS interface defined in asm/memory.h.
+ * These functions are meant to be only used from KASAN runtime through
+ * the arch_*() interface defined in asm/memory.h.
+ * These functions don't include system_supports_mte() checks,
+ * as KASAN only calls them when MTE is supported and enabled.
  */
-#ifdef CONFIG_ARM64_MTE
 
 static inline u8 mte_get_ptr_tag(void *ptr)
 {
@@ -25,13 +28,61 @@ static inline u8 mte_get_ptr_tag(void *ptr)
        return tag;
 }
 
-u8 mte_get_mem_tag(void *addr);
-u8 mte_get_random_tag(void);
-void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag);
+/* Get allocation tag for the address. */
+static inline u8 mte_get_mem_tag(void *addr)
+{
+       asm(__MTE_PREAMBLE "ldg %0, [%0]"
+               : "+r" (addr));
+
+       return mte_get_ptr_tag(addr);
+}
+
+/* Generate a random tag. */
+static inline u8 mte_get_random_tag(void)
+{
+       void *addr;
+
+       asm(__MTE_PREAMBLE "irg %0, %0"
+               : "=r" (addr));
+
+       return mte_get_ptr_tag(addr);
+}
+
+/*
+ * Assign allocation tags for a region of memory based on the pointer tag.
+ * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
+ * size must be non-zero and MTE_GRANULE_SIZE aligned.
+ */
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
+{
+       u64 curr, end;
+
+       if (!size)
+               return;
+
+       curr = (u64)__tag_set(addr, tag);
+       end = curr + size;
+
+       do {
+               /*
+                * 'asm volatile' is required to prevent the compiler to move
+                * the statement outside of the loop.
+                */
+               asm volatile(__MTE_PREAMBLE "stg %0, [%0]"
+                            :
+                            : "r" (curr)
+                            : "memory");
+
+               curr += MTE_GRANULE_SIZE;
+       } while (curr != end);
+}
 
 void mte_enable_kernel(void);
 void mte_init_tags(u64 max_tag);
 
+void mte_set_report_once(bool state);
+bool mte_report_once(void);
+
 #else /* CONFIG_ARM64_MTE */
 
 static inline u8 mte_get_ptr_tag(void *ptr)
@@ -43,13 +94,14 @@ static inline u8 mte_get_mem_tag(void *addr)
 {
        return 0xFF;
 }
+
 static inline u8 mte_get_random_tag(void)
 {
        return 0xFF;
 }
-static inline void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
+
+static inline void mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
 {
-       return addr;
 }
 
 static inline void mte_enable_kernel(void)
@@ -60,6 +112,15 @@ static inline void mte_init_tags(u64 max_tag)
 {
 }
 
+static inline void mte_set_report_once(bool state)
+{
+}
+
+static inline bool mte_report_once(void)
+{
+       return false;
+}
+
 #endif /* CONFIG_ARM64_MTE */
 
 #endif /* __ASSEMBLY__ */
index d02aff9..9b557a4 100644 (file)
@@ -8,8 +8,6 @@
 #include <asm/compiler.h>
 #include <asm/mte-def.h>
 
-#define __MTE_PREAMBLE         ARM64_ASM_PREAMBLE ".arch_extension memtag\n"
-
 #ifndef __ASSEMBLY__
 
 #include <linux/bitfield.h>
index dd87039..8c8cf42 100644 (file)
@@ -3,52 +3,6 @@
 #define __ASM_NUMA_H
 
 #include <asm/topology.h>
-
-#ifdef CONFIG_NUMA
-
-#define NR_NODE_MEMBLKS                (MAX_NUMNODES * 2)
-
-int __node_distance(int from, int to);
-#define node_distance(a, b) __node_distance(a, b)
-
-extern nodemask_t numa_nodes_parsed __initdata;
-
-extern bool numa_off;
-
-/* Mappings between node number and cpus on that node. */
-extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
-void numa_clear_node(unsigned int cpu);
-
-#ifdef CONFIG_DEBUG_PER_CPU_MAPS
-const struct cpumask *cpumask_of_node(int node);
-#else
-/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
-static inline const struct cpumask *cpumask_of_node(int node)
-{
-       if (node == NUMA_NO_NODE)
-               return cpu_all_mask;
-
-       return node_to_cpumask_map[node];
-}
-#endif
-
-void __init arm64_numa_init(void);
-int __init numa_add_memblk(int nodeid, u64 start, u64 end);
-void __init numa_set_distance(int from, int to, int distance);
-void __init numa_free_distance(void);
-void __init early_map_cpu_to_node(unsigned int cpu, int nid);
-void numa_store_cpu_info(unsigned int cpu);
-void numa_add_cpu(unsigned int cpu);
-void numa_remove_cpu(unsigned int cpu);
-
-#else  /* CONFIG_NUMA */
-
-static inline void numa_store_cpu_info(unsigned int cpu) { }
-static inline void numa_add_cpu(unsigned int cpu) { }
-static inline void numa_remove_cpu(unsigned int cpu) { }
-static inline void arm64_numa_init(void) { }
-static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
-
-#endif /* CONFIG_NUMA */
+#include <asm-generic/numa.h>
 
 #endif /* __ASM_NUMA_H */
index 046be78..9a65fb5 100644 (file)
@@ -66,7 +66,6 @@ extern bool arm64_use_ng_mappings;
 #define _PAGE_DEFAULT          (_PROT_DEFAULT | PTE_ATTRINDX(MT_NORMAL))
 
 #define PAGE_KERNEL            __pgprot(PROT_NORMAL)
-#define PAGE_KERNEL_TAGGED     __pgprot(PROT_NORMAL_TAGGED)
 #define PAGE_KERNEL_RO         __pgprot((PROT_NORMAL & ~PTE_WRITE) | PTE_RDONLY)
 #define PAGE_KERNEL_ROX                __pgprot((PROT_NORMAL & ~(PTE_WRITE | PTE_PXN)) | PTE_RDONLY)
 #define PAGE_KERNEL_EXEC       __pgprot(PROT_NORMAL & ~PTE_PXN)
index e17b96d..4702779 100644 (file)
@@ -486,6 +486,9 @@ static inline pmd_t pmd_mkdevmap(pmd_t pmd)
        __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_NC) | PTE_PXN | PTE_UXN)
 #define pgprot_device(prot) \
        __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_DEVICE_nGnRE) | PTE_PXN | PTE_UXN)
+#define pgprot_tagged(prot) \
+       __pgprot_modify(prot, PTE_ATTRINDX_MASK, PTE_ATTRINDX(MT_NORMAL_TAGGED))
+#define pgprot_mhp     pgprot_tagged
 /*
  * DMA allocations for non-coherent devices use what the Arm architecture calls
  * "Normal non-cacheable" memory, which permits speculation, unaligned accesses
index ca2cd75..efc10e9 100644 (file)
@@ -251,6 +251,8 @@ unsigned long get_wchan(struct task_struct *p);
 extern struct task_struct *cpu_switch_to(struct task_struct *prev,
                                         struct task_struct *next);
 
+asmlinkage void arm64_preempt_schedule_irq(void);
+
 #define task_pt_regs(p) \
        ((struct pt_regs *)(THREAD_SIZE + task_stack_page(p)) - 1)
 
index f9fbbb4..d4a5fca 100644 (file)
 #define SYS_GCR_EL1                    sys_reg(3, 0, 1, 0, 6)
 
 #define SYS_ZCR_EL1                    sys_reg(3, 0, 1, 2, 0)
+#define SYS_TRFCR_EL1                  sys_reg(3, 0, 1, 2, 1)
 
 #define SYS_TTBR0_EL1                  sys_reg(3, 0, 2, 0, 0)
 #define SYS_TTBR1_EL1                  sys_reg(3, 0, 2, 0, 1)
 
 #define SYS_SCTLR_EL2                  sys_reg(3, 4, 1, 0, 0)
 #define SYS_ZCR_EL2                    sys_reg(3, 4, 1, 2, 0)
+#define SYS_TRFCR_EL2                  sys_reg(3, 4, 1, 2, 1)
 #define SYS_DACR32_EL2                 sys_reg(3, 4, 3, 0, 0)
 #define SYS_SPSR_EL2                   sys_reg(3, 4, 4, 0, 0)
 #define SYS_ELR_EL2                    sys_reg(3, 4, 4, 0, 1)
 #define ID_AA64MMFR0_PARANGE_48                0x5
 #define ID_AA64MMFR0_PARANGE_52                0x6
 
+#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT 0x0
+#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE    0x1
+#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN     0x2
+#define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX     0x7
+
 #ifdef CONFIG_ARM64_PA_BITS_52
 #define ID_AA64MMFR0_PARANGE_MAX       ID_AA64MMFR0_PARANGE_52
 #else
 #define ID_AA64MMFR2_CNP_SHIFT         0
 
 /* id_aa64dfr0 */
+#define ID_AA64DFR0_TRACE_FILT_SHIFT   40
 #define ID_AA64DFR0_DOUBLELOCK_SHIFT   36
 #define ID_AA64DFR0_PMSVER_SHIFT       32
 #define ID_AA64DFR0_CTX_CMPS_SHIFT     28
 #define ID_PFR1_PROGMOD_SHIFT          0
 
 #if defined(CONFIG_ARM64_4K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT       ID_AA64MMFR0_TGRAN4_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN4_SUPPORTED
+#define ID_AA64MMFR0_TGRAN_SHIFT               ID_AA64MMFR0_TGRAN4_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN       ID_AA64MMFR0_TGRAN4_SUPPORTED
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX       0x7
 #elif defined(CONFIG_ARM64_16K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT       ID_AA64MMFR0_TGRAN16_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN16_SUPPORTED
+#define ID_AA64MMFR0_TGRAN_SHIFT               ID_AA64MMFR0_TGRAN16_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN       ID_AA64MMFR0_TGRAN16_SUPPORTED
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX       0xF
 #elif defined(CONFIG_ARM64_64K_PAGES)
-#define ID_AA64MMFR0_TGRAN_SHIFT       ID_AA64MMFR0_TGRAN64_SHIFT
-#define ID_AA64MMFR0_TGRAN_SUPPORTED   ID_AA64MMFR0_TGRAN64_SUPPORTED
+#define ID_AA64MMFR0_TGRAN_SHIFT               ID_AA64MMFR0_TGRAN64_SHIFT
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN       ID_AA64MMFR0_TGRAN64_SUPPORTED
+#define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX       0x7
 #endif
 
 #define MVFR2_FPMISC_SHIFT             4
 /* Safe value for MPIDR_EL1: Bit31:RES1, Bit30:U:0, Bit24:MT:0 */
 #define SYS_MPIDR_SAFE_VAL     (BIT(31))
 
+#define TRFCR_ELx_TS_SHIFT             5
+#define TRFCR_ELx_TS_VIRTUAL           ((0x1UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_GUEST_PHYSICAL    ((0x2UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_ELx_TS_PHYSICAL          ((0x3UL) << TRFCR_ELx_TS_SHIFT)
+#define TRFCR_EL2_CX                   BIT(3)
+#define TRFCR_ELx_ExTRE                        BIT(1)
+#define TRFCR_ELx_E0TRE                        BIT(0)
+
 #ifdef __ASSEMBLY__
 
        .irp    num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
index 9f4e3b2..6623c99 100644 (file)
@@ -55,6 +55,8 @@ void arch_setup_new_exec(void);
 #define arch_setup_new_exec     arch_setup_new_exec
 
 void arch_release_task_struct(struct task_struct *tsk);
+int arch_dup_task_struct(struct task_struct *dst,
+                               struct task_struct *src);
 
 #endif
 
index 86a9d7b..949788f 100644 (file)
@@ -38,7 +38,7 @@
 #define __ARM_NR_compat_set_tls                (__ARM_NR_COMPAT_BASE + 5)
 #define __ARM_NR_COMPAT_END            (__ARM_NR_COMPAT_BASE + 0x800)
 
-#define __NR_compat_syscalls           442
+#define __NR_compat_syscalls           443
 #endif
 
 #define __ARCH_WANT_SYS_CLONE
index cccfbbe..3d874f6 100644 (file)
@@ -891,6 +891,8 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
 #define __NR_epoll_pwait2 441
 __SYSCALL(__NR_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
 
 /*
  * Please add new compat syscalls above this comment and update
index 7ff8000..fdfecf0 100644 (file)
@@ -118,15 +118,3 @@ void __init acpi_numa_gicc_affinity_init(struct acpi_srat_gicc_affinity *pa)
        node_set(node, numa_nodes_parsed);
 }
 
-int __init arm64_acpi_numa_init(void)
-{
-       int ret;
-
-       ret = acpi_numa_init();
-       if (ret) {
-               pr_info("Failed to initialise from firmware\n");
-               return ret;
-       }
-
-       return srat_disabled() ? -EINVAL : 0;
-}
index 506a1cd..e2c20c0 100644 (file)
@@ -526,6 +526,14 @@ const struct arm64_cpu_capabilities arm64_errata[] = {
                                  1, 0),
        },
 #endif
+#ifdef CONFIG_NVIDIA_CARMEL_CNP_ERRATUM
+       {
+               /* NVIDIA Carmel */
+               .desc = "NVIDIA Carmel CNP erratum",
+               .capability = ARM64_WORKAROUND_NVIDIA_CARMEL_CNP,
+               ERRATA_MIDR_ALL_VERSIONS(MIDR_NVIDIA_CARMEL),
+       },
+#endif
        {
        }
 };
index 0660307..e5281e1 100644 (file)
@@ -383,7 +383,6 @@ static const struct arm64_ftr_bits ftr_id_aa64dfr0[] = {
         * of support.
         */
        S_ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_EXACT, ID_AA64DFR0_PMUVER_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_TRACEVER_SHIFT, 4, 0),
        ARM64_FTR_BITS(FTR_HIDDEN, FTR_STRICT, FTR_EXACT, ID_AA64DFR0_DEBUGVER_SHIFT, 4, 0x6),
        ARM64_FTR_END,
 };
@@ -1321,7 +1320,10 @@ has_useable_cnp(const struct arm64_cpu_capabilities *entry, int scope)
         * may share TLB entries with a CPU stuck in the crashed
         * kernel.
         */
-        if (is_kdump_kernel())
+       if (is_kdump_kernel())
+               return false;
+
+       if (cpus_have_const_cap(ARM64_WORKAROUND_NVIDIA_CARMEL_CNP))
                return false;
 
        return has_cpuid_feature(entry, scope);
index 77605ae..51fcf99 100644 (file)
@@ -353,7 +353,7 @@ static void __cpuinfo_store_cpu(struct cpuinfo_arm64 *info)
         * with the CLIDR_EL1 fields to avoid triggering false warnings
         * when there is a mismatch across the CPUs. Keep track of the
         * effective value of the CTR_EL0 in our internal records for
-        * acurate sanity check and feature enablement.
+        * accurate sanity check and feature enablement.
         */
        info->reg_ctr = read_cpuid_effective_cachetype();
        info->reg_dczid = read_cpuid(DCZID_EL0);
index e6e2842..58303a9 100644 (file)
@@ -64,5 +64,7 @@ ssize_t copy_oldmem_page(unsigned long pfn, char *buf,
 ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos)
 {
        memcpy(buf, phys_to_virt((phys_addr_t)*ppos), count);
+       *ppos += count;
+
        return count;
 }
index 1e30b55..840bda1 100644 (file)
@@ -319,7 +319,7 @@ SYM_FUNC_START_LOCAL(__create_page_tables)
         */
        adrp    x5, __idmap_text_end
        clz     x5, x5
-       cmp     x5, TCR_T0SZ(VA_BITS)   // default T0SZ small enough?
+       cmp     x5, TCR_T0SZ(VA_BITS_MIN) // default T0SZ small enough?
        b.ge    1f                      // .. then skip VA range extension
 
        adr_l   x6, idmap_t0sz
@@ -655,8 +655,10 @@ SYM_FUNC_END(__secondary_too_slow)
 SYM_FUNC_START(__enable_mmu)
        mrs     x2, ID_AA64MMFR0_EL1
        ubfx    x2, x2, #ID_AA64MMFR0_TGRAN_SHIFT, 4
-       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED
-       b.ne    __no_granule_support
+       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MIN
+       b.lt    __no_granule_support
+       cmp     x2, #ID_AA64MMFR0_TGRAN_SUPPORTED_MAX
+       b.gt    __no_granule_support
        update_early_cpu_boot_status 0, x2, x3
        adrp    x2, idmap_pg_dir
        phys_to_ttbr x1, x1
@@ -837,6 +839,7 @@ SYM_FUNC_START_LOCAL(__primary_switch)
 
        tlbi    vmalle1                         // Remove any stale TLB entries
        dsb     nsh
+       isb
 
        set_sctlr_el1   x19                     // re-enable the MMU
 
index 678cd2c..5eccbd6 100644 (file)
@@ -75,9 +75,6 @@ SYM_CODE_END(el1_sync)
 
 // nVHE? No way! Give me the real thing!
 SYM_CODE_START_LOCAL(mutate_to_vhe)
-       // Be prepared to fail
-       mov_q   x0, HVC_STUB_ERR
-
        // Sanity check: MMU *must* be off
        mrs     x1, sctlr_el2
        tbnz    x1, #0, 1f
@@ -96,8 +93,11 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
        cmp     x1, xzr
        and     x2, x2, x1
        csinv   x2, x2, xzr, ne
-       cbz     x2, 1f
+       cbnz    x2, 2f
 
+1:     mov_q   x0, HVC_STUB_ERR
+       eret
+2:
        // Engage the VHE magic!
        mov_q   x0, HCR_HOST_VHE_FLAGS
        msr     hcr_el2, x0
@@ -131,9 +131,28 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
        msr     mair_el1, x0
        isb
 
+       // Hack the exception return to stay at EL2
+       mrs     x0, spsr_el1
+       and     x0, x0, #~PSR_MODE_MASK
+       mov     x1, #PSR_MODE_EL2h
+       orr     x0, x0, x1
+       msr     spsr_el1, x0
+
+       b       enter_vhe
+SYM_CODE_END(mutate_to_vhe)
+
+       // At the point where we reach enter_vhe(), we run with
+       // the MMU off (which is enforced by mutate_to_vhe()).
+       // We thus need to be in the idmap, or everything will
+       // explode when enabling the MMU.
+
+       .pushsection    .idmap.text, "ax"
+
+SYM_CODE_START_LOCAL(enter_vhe)
        // Invalidate TLBs before enabling the MMU
        tlbi    vmalle1
        dsb     nsh
+       isb
 
        // Enable the EL2 S1 MMU, as set up from EL1
        mrs_s   x0, SYS_SCTLR_EL12
@@ -143,17 +162,12 @@ SYM_CODE_START_LOCAL(mutate_to_vhe)
        mov_q   x0, INIT_SCTLR_EL1_MMU_OFF
        msr_s   SYS_SCTLR_EL12, x0
 
-       // Hack the exception return to stay at EL2
-       mrs     x0, spsr_el1
-       and     x0, x0, #~PSR_MODE_MASK
-       mov     x1, #PSR_MODE_EL2h
-       orr     x0, x0, x1
-       msr     spsr_el1, x0
-
        mov     x0, xzr
 
-1:     eret
-SYM_CODE_END(mutate_to_vhe)
+       eret
+SYM_CODE_END(enter_vhe)
+
+       .popsection
 
 .macro invalid_vector  label
 SYM_CODE_START_LOCAL(\label)
index dffb166..83f1c4b 100644 (file)
@@ -163,33 +163,36 @@ static __init void __parse_cmdline(const char *cmdline, bool parse_aliases)
        } while (1);
 }
 
-static __init void parse_cmdline(void)
+static __init const u8 *get_bootargs_cmdline(void)
 {
-       if (!IS_ENABLED(CONFIG_CMDLINE_FORCE)) {
-               const u8 *prop;
-               void *fdt;
-               int node;
+       const u8 *prop;
+       void *fdt;
+       int node;
 
-               fdt = get_early_fdt_ptr();
-               if (!fdt)
-                       goto out;
+       fdt = get_early_fdt_ptr();
+       if (!fdt)
+               return NULL;
 
-               node = fdt_path_offset(fdt, "/chosen");
-               if (node < 0)
-                       goto out;
+       node = fdt_path_offset(fdt, "/chosen");
+       if (node < 0)
+               return NULL;
 
-               prop = fdt_getprop(fdt, node, "bootargs", NULL);
-               if (!prop)
-                       goto out;
+       prop = fdt_getprop(fdt, node, "bootargs", NULL);
+       if (!prop)
+               return NULL;
 
-               __parse_cmdline(prop, true);
+       return strlen(prop) ? prop : NULL;
+}
 
-               if (!IS_ENABLED(CONFIG_CMDLINE_EXTEND))
-                       return;
-       }
+static __init void parse_cmdline(void)
+{
+       const u8 *prop = get_bootargs_cmdline();
 
-out:
-       __parse_cmdline(CONFIG_CMDLINE, true);
+       if (IS_ENABLED(CONFIG_CMDLINE_FORCE) || !prop)
+               __parse_cmdline(CONFIG_CMDLINE, true);
+
+       if (!IS_ENABLED(CONFIG_CMDLINE_FORCE) && prop)
+               __parse_cmdline(prop, true);
 }
 
 /* Keep checkers quiet */
index 23f1a55..5aa9ed1 100644 (file)
@@ -101,6 +101,9 @@ KVM_NVHE_ALIAS(__stop___kvm_ex_table);
 /* Array containing bases of nVHE per-CPU memory regions. */
 KVM_NVHE_ALIAS(kvm_arm_hyp_percpu_base);
 
+/* PMU available static key */
+KVM_NVHE_ALIAS(kvm_arm_pmu_available);
+
 #endif /* CONFIG_KVM */
 
 #endif /* __ARM64_KERNEL_IMAGE_VARS_H */
index 03210f6..0cde47a 100644 (file)
@@ -182,8 +182,10 @@ static int create_dtb(struct kimage *image,
 
                /* duplicate a device tree blob */
                ret = fdt_open_into(initial_boot_params, buf, buf_size);
-               if (ret)
+               if (ret) {
+                       vfree(buf);
                        return -EINVAL;
+               }
 
                ret = setup_dtb(image, initrd_load_addr, initrd_len,
                                cmdline, buf);
index 80b62fe..b3c70a6 100644 (file)
 #include <asm/barrier.h>
 #include <asm/cpufeature.h>
 #include <asm/mte.h>
-#include <asm/mte-kasan.h>
 #include <asm/ptrace.h>
 #include <asm/sysreg.h>
 
 u64 gcr_kernel_excl __ro_after_init;
 
+static bool report_fault_once = true;
+
 static void mte_sync_page_tags(struct page *page, pte_t *ptep, bool check_swap)
 {
        pte_t old_pte = READ_ONCE(*ptep);
@@ -86,51 +87,6 @@ int memcmp_pages(struct page *page1, struct page *page2)
        return ret;
 }
 
-u8 mte_get_mem_tag(void *addr)
-{
-       if (!system_supports_mte())
-               return 0xFF;
-
-       asm(__MTE_PREAMBLE "ldg %0, [%0]"
-           : "+r" (addr));
-
-       return mte_get_ptr_tag(addr);
-}
-
-u8 mte_get_random_tag(void)
-{
-       void *addr;
-
-       if (!system_supports_mte())
-               return 0xFF;
-
-       asm(__MTE_PREAMBLE "irg %0, %0"
-           : "+r" (addr));
-
-       return mte_get_ptr_tag(addr);
-}
-
-void *mte_set_mem_tag_range(void *addr, size_t size, u8 tag)
-{
-       void *ptr = addr;
-
-       if ((!system_supports_mte()) || (size == 0))
-               return addr;
-
-       /* Make sure that size is MTE granule aligned. */
-       WARN_ON(size & (MTE_GRANULE_SIZE - 1));
-
-       /* Make sure that the address is MTE granule aligned. */
-       WARN_ON((u64)addr & (MTE_GRANULE_SIZE - 1));
-
-       tag = 0xF0 | tag;
-       ptr = (void *)__tag_set(ptr, tag);
-
-       mte_assign_mem_tag_range(ptr, size);
-
-       return ptr;
-}
-
 void mte_init_tags(u64 max_tag)
 {
        static bool gcr_kernel_excl_initialized;
@@ -158,6 +114,16 @@ void mte_enable_kernel(void)
        isb();
 }
 
+void mte_set_report_once(bool state)
+{
+       WRITE_ONCE(report_fault_once, state);
+}
+
+bool mte_report_once(void)
+{
+       return READ_ONCE(report_fault_once);
+}
+
 static void update_sctlr_el1_tcf0(u64 tcf0)
 {
        /* ISB required for the kernel uaccess routines */
index 7d2318f..4658fcf 100644 (file)
@@ -460,7 +460,7 @@ static inline int armv8pmu_counter_has_overflowed(u32 pmnc, int idx)
        return pmnc & BIT(ARMV8_IDX_TO_COUNTER(idx));
 }
 
-static inline u32 armv8pmu_read_evcntr(int idx)
+static inline u64 armv8pmu_read_evcntr(int idx)
 {
        u32 counter = ARMV8_IDX_TO_COUNTER(idx);
 
index a412d8e..2c24763 100644 (file)
@@ -38,7 +38,7 @@ int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
 
        /* TODO: Currently we do not support AARCH32 instruction probing */
        if (mm->context.flags & MMCF_AARCH32)
-               return -ENOTSUPP;
+               return -EOPNOTSUPP;
        else if (!IS_ALIGNED(addr, AARCH64_INSN_SIZE))
                return -EINVAL;
 
index 4cc1ccc..6e60aa3 100644 (file)
@@ -57,6 +57,8 @@
 #include <asm/processor.h>
 #include <asm/pointer_auth.h>
 #include <asm/stacktrace.h>
+#include <asm/switch_to.h>
+#include <asm/system_misc.h>
 
 #if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
@@ -398,7 +400,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
        ptrauth_thread_init_kernel(p);
 
-       if (likely(!(p->flags & PF_KTHREAD))) {
+       if (likely(!(p->flags & (PF_KTHREAD | PF_IO_WORKER)))) {
                *childregs = *current_pt_regs();
                childregs->regs[0] = 0;
 
index 3d5c8af..170f42f 100644 (file)
@@ -1797,7 +1797,7 @@ int syscall_trace_enter(struct pt_regs *regs)
 
        if (flags & (_TIF_SYSCALL_EMU | _TIF_SYSCALL_TRACE)) {
                tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
-               if (!in_syscall(regs) || (flags & _TIF_SYSCALL_EMU))
+               if (flags & _TIF_SYSCALL_EMU)
                        return NO_SYSCALL;
        }
 
index 0fb4212..d55bdfb 100644 (file)
@@ -46,7 +46,7 @@ int notrace unwind_frame(struct task_struct *tsk, struct stackframe *frame)
 
        /* Terminal record; nothing to unwind */
        if (!fp)
-               return -EINVAL;
+               return -ENOENT;
 
        if (fp & 0xf)
                return -EINVAL;
@@ -194,8 +194,9 @@ void show_stack(struct task_struct *tsk, unsigned long *sp, const char *loglvl)
 
 #ifdef CONFIG_STACKTRACE
 
-void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
-                    struct task_struct *task, struct pt_regs *regs)
+noinline void arch_stack_walk(stack_trace_consume_fn consume_entry,
+                             void *cookie, struct task_struct *task,
+                             struct pt_regs *regs)
 {
        struct stackframe frame;
 
@@ -203,8 +204,8 @@ void arch_stack_walk(stack_trace_consume_fn consume_entry, void *cookie,
                start_backtrace(&frame, regs->regs[29], regs->pc);
        else if (task == current)
                start_backtrace(&frame,
-                               (unsigned long)__builtin_frame_address(0),
-                               (unsigned long)arch_stack_walk);
+                               (unsigned long)__builtin_frame_address(1),
+                               (unsigned long)__builtin_return_address(0));
        else
                start_backtrace(&frame, thread_saved_fp(task),
                                thread_saved_pc(task));
index a67b37a..d756489 100644 (file)
@@ -119,7 +119,7 @@ int cpu_suspend(unsigned long arg, int (*fn)(unsigned long))
                if (!ret)
                        ret = -EOPNOTSUPP;
        } else {
-               __cpu_suspend_exit();
+               RCU_NONIDLE(__cpu_suspend_exit());
        }
 
        unpause_graph_tracing();
index 76c0255..945e6bb 100644 (file)
@@ -29,7 +29,8 @@ ldflags-y := -shared -nostdlib -soname=linux-vdso.so.1 --hash-style=sysv      \
 ccflags-y := -fno-common -fno-builtin -fno-stack-protector -ffixed-x18
 ccflags-y += -DDISABLE_BRANCH_PROFILING -DBUILD_VDSO
 
-CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS)
+CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os $(CC_FLAGS_SCS) $(GCC_PLUGINS_CFLAGS) \
+                               $(CC_FLAGS_LTO)
 KASAN_SANITIZE                 := n
 UBSAN_SANITIZE                 := n
 OBJECT_FILES_NON_STANDARD      := y
index fc4c95d..7f06ba7 100644 (file)
@@ -385,11 +385,16 @@ void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
        last_ran = this_cpu_ptr(mmu->last_vcpu_ran);
 
        /*
+        * We guarantee that both TLBs and I-cache are private to each
+        * vcpu. If detecting that a vcpu from the same VM has
+        * previously run on the same physical CPU, call into the
+        * hypervisor code to nuke the relevant contexts.
+        *
         * We might get preempted before the vCPU actually runs, but
         * over-invalidation doesn't affect correctness.
         */
        if (*last_ran != vcpu->vcpu_id) {
-               kvm_call_hyp(__kvm_tlb_flush_local_vmid, mmu);
+               kvm_call_hyp(__kvm_flush_cpu_context, mmu);
                *last_ran = vcpu->vcpu_id;
        }
 
index 7a7e425..dbc8905 100644 (file)
@@ -89,6 +89,7 @@ void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu)
  *  - Debug ROM Address (MDCR_EL2_TDRA)
  *  - OS related registers (MDCR_EL2_TDOSA)
  *  - Statistical profiler (MDCR_EL2_TPMS/MDCR_EL2_E2PB)
+ *  - Self-hosted Trace Filter controls (MDCR_EL2_TTRF)
  *
  * Additionally, KVM only traps guest accesses to the debug registers if
  * the guest is not actively using them (see the KVM_ARM64_DEBUG_DIRTY
@@ -112,6 +113,7 @@ void kvm_arm_setup_debug(struct kvm_vcpu *vcpu)
        vcpu->arch.mdcr_el2 = __this_cpu_read(mdcr_el2) & MDCR_EL2_HPMN_MASK;
        vcpu->arch.mdcr_el2 |= (MDCR_EL2_TPM |
                                MDCR_EL2_TPMS |
+                               MDCR_EL2_TTRF |
                                MDCR_EL2_TPMCR |
                                MDCR_EL2_TDRA |
                                MDCR_EL2_TDOSA);
index b0afad7..e831d3d 100644 (file)
@@ -85,8 +85,10 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
 
        // If the hyp context is loaded, go straight to hyp_panic
        get_loaded_vcpu x0, x1
-       cbz     x0, hyp_panic
+       cbnz    x0, 1f
+       b       hyp_panic
 
+1:
        // The hyp context is saved so make sure it is restored to allow
        // hyp_panic to run at hyp and, subsequently, panic to run in the host.
        // This makes use of __guest_exit to avoid duplication but sets the
@@ -94,7 +96,7 @@ SYM_INNER_LABEL(__guest_exit_panic, SYM_L_GLOBAL)
        // current state is saved to the guest context but it will only be
        // accurate if the guest had been completely restored.
        adr_this_cpu x0, kvm_hyp_ctxt, x1
-       adr     x1, hyp_panic
+       adr_l   x1, hyp_panic
        str     x1, [x0, #CPU_XREG_OFFSET(30)]
 
        get_vcpu_ptr    x1, x0
@@ -146,7 +148,7 @@ SYM_INNER_LABEL(__guest_exit, SYM_L_GLOBAL)
        // Now restore the hyp regs
        restore_callee_saved_regs x2
 
-       set_loaded_vcpu xzr, x1, x2
+       set_loaded_vcpu xzr, x2, x3
 
 alternative_if ARM64_HAS_RAS_EXTN
        // If we have the RAS extensions we can consume a pending error
index d179056..5f49df4 100644 (file)
@@ -119,7 +119,7 @@ el2_error:
 
 .macro invalid_vector  label, target = __guest_exit_panic
        .align  2
-SYM_CODE_START(\label)
+SYM_CODE_START_LOCAL(\label)
        b \target
 SYM_CODE_END(\label)
 .endm
index 54f4860..6c1f51f 100644 (file)
@@ -90,15 +90,18 @@ static inline void __activate_traps_common(struct kvm_vcpu *vcpu)
         * counter, which could make a PMXEVCNTR_EL0 access UNDEF at
         * EL1 instead of being trapped to EL2.
         */
-       write_sysreg(0, pmselr_el0);
-       write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+       if (kvm_arm_support_pmu_v3()) {
+               write_sysreg(0, pmselr_el0);
+               write_sysreg(ARMV8_PMU_USERENR_MASK, pmuserenr_el0);
+       }
        write_sysreg(vcpu->arch.mdcr_el2, mdcr_el2);
 }
 
 static inline void __deactivate_traps_common(void)
 {
        write_sysreg(0, hstr_el2);
-       write_sysreg(0, pmuserenr_el0);
+       if (kvm_arm_support_pmu_v3())
+               write_sysreg(0, pmuserenr_el0);
 }
 
 static inline void ___activate_traps(struct kvm_vcpu *vcpu)
index 91a711a..f401724 100644 (file)
@@ -58,16 +58,24 @@ static void __debug_restore_spe(u64 pmscr_el1)
        write_sysreg_s(pmscr_el1, SYS_PMSCR_EL1);
 }
 
-void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+void __debug_save_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
        /* Disable and flush SPE data generation */
        __debug_save_spe(&vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_guest(struct kvm_vcpu *vcpu)
+{
        __debug_switch_to_guest_common(vcpu);
 }
 
-void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+void __debug_restore_host_buffers_nvhe(struct kvm_vcpu *vcpu)
 {
        __debug_restore_spe(vcpu->arch.host_debug_state.pmscr_el1);
+}
+
+void __debug_switch_to_host(struct kvm_vcpu *vcpu)
+{
        __debug_switch_to_host_common(vcpu);
 }
 
index 6585a7c..5d94584 100644 (file)
@@ -71,7 +71,8 @@ SYM_FUNC_START(__host_enter)
 SYM_FUNC_END(__host_enter)
 
 /*
- * void __noreturn __hyp_do_panic(bool restore_host, u64 spsr, u64 elr, u64 par);
+ * void __noreturn __hyp_do_panic(struct kvm_cpu_context *host_ctxt, u64 spsr,
+ *                               u64 elr, u64 par);
  */
 SYM_FUNC_START(__hyp_do_panic)
        /* Prepare and exit to the host's panic funciton. */
@@ -82,9 +83,11 @@ SYM_FUNC_START(__hyp_do_panic)
        hyp_kimg_va lr, x6
        msr     elr_el2, lr
 
-       /* Set the panic format string. Use the, now free, LR as scratch. */
-       ldr     lr, =__hyp_panic_string
-       hyp_kimg_va lr, x6
+       mov     x29, x0
+
+       /* Load the format string into x0 and arguments into x1-7 */
+       ldr     x0, =__hyp_panic_string
+       hyp_kimg_va x0, x6
 
        /* Load the format arguments into x1-7. */
        mov     x6, x3
@@ -94,9 +97,7 @@ SYM_FUNC_START(__hyp_do_panic)
        mrs     x5, hpfar_el2
 
        /* Enter the host, conditionally restoring the host context. */
-       cmp     x0, xzr
-       mov     x0, lr
-       b.eq    __host_enter_without_restoring
+       cbz     x29, __host_enter_without_restoring
        b       __host_enter_for_panic
 SYM_FUNC_END(__hyp_do_panic)
 
index f012f86..9363282 100644 (file)
@@ -46,11 +46,11 @@ static void handle___kvm_tlb_flush_vmid(struct kvm_cpu_context *host_ctxt)
        __kvm_tlb_flush_vmid(kern_hyp_va(mmu));
 }
 
-static void handle___kvm_tlb_flush_local_vmid(struct kvm_cpu_context *host_ctxt)
+static void handle___kvm_flush_cpu_context(struct kvm_cpu_context *host_ctxt)
 {
        DECLARE_REG(struct kvm_s2_mmu *, mmu, host_ctxt, 1);
 
-       __kvm_tlb_flush_local_vmid(kern_hyp_va(mmu));
+       __kvm_flush_cpu_context(kern_hyp_va(mmu));
 }
 
 static void handle___kvm_timer_set_cntvoff(struct kvm_cpu_context *host_ctxt)
@@ -67,9 +67,9 @@ static void handle___kvm_enable_ssbs(struct kvm_cpu_context *host_ctxt)
        write_sysreg_el2(tmp, SYS_SCTLR);
 }
 
-static void handle___vgic_v3_get_ich_vtr_el2(struct kvm_cpu_context *host_ctxt)
+static void handle___vgic_v3_get_gic_config(struct kvm_cpu_context *host_ctxt)
 {
-       cpu_reg(host_ctxt, 1) = __vgic_v3_get_ich_vtr_el2();
+       cpu_reg(host_ctxt, 1) = __vgic_v3_get_gic_config();
 }
 
 static void handle___vgic_v3_read_vmcr(struct kvm_cpu_context *host_ctxt)
@@ -115,10 +115,10 @@ static const hcall_t host_hcall[] = {
        HANDLE_FUNC(__kvm_flush_vm_context),
        HANDLE_FUNC(__kvm_tlb_flush_vmid_ipa),
        HANDLE_FUNC(__kvm_tlb_flush_vmid),
-       HANDLE_FUNC(__kvm_tlb_flush_local_vmid),
+       HANDLE_FUNC(__kvm_flush_cpu_context),
        HANDLE_FUNC(__kvm_timer_set_cntvoff),
        HANDLE_FUNC(__kvm_enable_ssbs),
-       HANDLE_FUNC(__vgic_v3_get_ich_vtr_el2),
+       HANDLE_FUNC(__vgic_v3_get_gic_config),
        HANDLE_FUNC(__vgic_v3_read_vmcr),
        HANDLE_FUNC(__vgic_v3_write_vmcr),
        HANDLE_FUNC(__vgic_v3_init_lrs),
index f3d0e9e..68ab6b4 100644 (file)
@@ -192,6 +192,14 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
        pmu_switch_needed = __pmu_switch_to_guest(host_ctxt);
 
        __sysreg_save_state_nvhe(host_ctxt);
+       /*
+        * We must flush and disable the SPE buffer for nVHE, as
+        * the translation regime(EL1&0) is going to be loaded with
+        * that of the guest. And we must do this before we change the
+        * translation regime to EL2 (via MDCR_EL2_E2PB == 0) and
+        * before we load guest Stage1.
+        */
+       __debug_save_host_buffers_nvhe(vcpu);
 
        __adjust_pc(vcpu);
 
@@ -234,11 +242,12 @@ int __kvm_vcpu_run(struct kvm_vcpu *vcpu)
        if (vcpu->arch.flags & KVM_ARM64_FP_ENABLED)
                __fpsimd_save_fpexc32(vcpu);
 
+       __debug_switch_to_host(vcpu);
        /*
         * This must come after restoring the host sysregs, since a non-VHE
         * system may enable SPE here and make use of the TTBRs.
         */
-       __debug_switch_to_host(vcpu);
+       __debug_restore_host_buffers_nvhe(vcpu);
 
        if (pmu_switch_needed)
                __pmu_switch_to_host(host_ctxt);
@@ -257,7 +266,6 @@ void __noreturn hyp_panic(void)
        u64 spsr = read_sysreg_el2(SYS_SPSR);
        u64 elr = read_sysreg_el2(SYS_ELR);
        u64 par = read_sysreg_par();
-       bool restore_host = true;
        struct kvm_cpu_context *host_ctxt;
        struct kvm_vcpu *vcpu;
 
@@ -271,7 +279,7 @@ void __noreturn hyp_panic(void)
                __sysreg_restore_state_nvhe(host_ctxt);
        }
 
-       __hyp_do_panic(restore_host, spsr, elr, par);
+       __hyp_do_panic(host_ctxt, spsr, elr, par);
        unreachable();
 }
 
index fbde89a..229b067 100644 (file)
@@ -123,7 +123,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
        __tlb_switch_to_host(&cxt);
 }
 
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
 
@@ -131,6 +131,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
        __tlb_switch_to_guest(mmu, &cxt);
 
        __tlbi(vmalle1);
+       asm volatile("ic iallu");
        dsb(nsh);
        isb();
 
index 4d177ce..926fc07 100644 (file)
@@ -223,6 +223,7 @@ static inline int __kvm_pgtable_visit(struct kvm_pgtable_walk_data *data,
                goto out;
 
        if (!table) {
+               data->addr = ALIGN_DOWN(data->addr, kvm_granule_size(level));
                data->addr += kvm_granule_size(level);
                goto out;
        }
index 80406f4..39f8f7f 100644 (file)
@@ -405,9 +405,54 @@ void __vgic_v3_init_lrs(void)
                __gic_v3_set_lr(0, i);
 }
 
-u64 __vgic_v3_get_ich_vtr_el2(void)
+/*
+ * Return the GIC CPU configuration:
+ * - [31:0]  ICH_VTR_EL2
+ * - [62:32] RES0
+ * - [63]    MMIO (GICv2) capable
+ */
+u64 __vgic_v3_get_gic_config(void)
 {
-       return read_gicreg(ICH_VTR_EL2);
+       u64 val, sre = read_gicreg(ICC_SRE_EL1);
+       unsigned long flags = 0;
+
+       /*
+        * To check whether we have a MMIO-based (GICv2 compatible)
+        * CPU interface, we need to disable the system register
+        * view. To do that safely, we have to prevent any interrupt
+        * from firing (which would be deadly).
+        *
+        * Note that this only makes sense on VHE, as interrupts are
+        * already masked for nVHE as part of the exception entry to
+        * EL2.
+        */
+       if (has_vhe())
+               flags = local_daif_save();
+
+       /*
+        * Table 11-2 "Permitted ICC_SRE_ELx.SRE settings" indicates
+        * that to be able to set ICC_SRE_EL1.SRE to 0, all the
+        * interrupt overrides must be set. You've got to love this.
+        */
+       sysreg_clear_set(hcr_el2, 0, HCR_AMO | HCR_FMO | HCR_IMO);
+       isb();
+       write_gicreg(0, ICC_SRE_EL1);
+       isb();
+
+       val = read_gicreg(ICC_SRE_EL1);
+
+       write_gicreg(sre, ICC_SRE_EL1);
+       isb();
+       sysreg_clear_set(hcr_el2, HCR_AMO | HCR_FMO | HCR_IMO, 0);
+       isb();
+
+       if (has_vhe())
+               local_daif_restore(flags);
+
+       val  = (val & ICC_SRE_EL1_SRE) ? 0 : (1ULL << 63);
+       val |= read_gicreg(ICH_VTR_EL2);
+
+       return val;
 }
 
 u64 __vgic_v3_read_vmcr(void)
index fd78959..66f1734 100644 (file)
@@ -127,7 +127,7 @@ void __kvm_tlb_flush_vmid(struct kvm_s2_mmu *mmu)
        __tlb_switch_to_host(&cxt);
 }
 
-void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
+void __kvm_flush_cpu_context(struct kvm_s2_mmu *mmu)
 {
        struct tlb_inv_context cxt;
 
@@ -135,6 +135,7 @@ void __kvm_tlb_flush_local_vmid(struct kvm_s2_mmu *mmu)
        __tlb_switch_to_guest(mmu, &cxt);
 
        __tlbi(vmalle1);
+       asm volatile("ic iallu");
        dsb(nsh);
        isb();
 
index 77cb2d2..8711894 100644 (file)
@@ -1312,8 +1312,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
         * Prevent userspace from creating a memory region outside of the IPA
         * space addressable by the KVM guest IPA space.
         */
-       if (memslot->base_gfn + memslot->npages >=
-           (kvm_phys_size(kvm) >> PAGE_SHIFT))
+       if ((memslot->base_gfn + memslot->npages) > (kvm_phys_size(kvm) >> PAGE_SHIFT))
                return -EFAULT;
 
        mmap_read_lock(current->mm);
index d45b8b9..7391643 100644 (file)
@@ -11,6 +11,8 @@
 
 #include <asm/kvm_emulate.h>
 
+DEFINE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
 static int kvm_is_in_guest(void)
 {
         return kvm_get_running_vcpu() != NULL;
@@ -48,6 +50,14 @@ static struct perf_guest_info_callbacks kvm_guest_cbs = {
 
 int kvm_perf_init(void)
 {
+       /*
+        * Check if HW_PERF_EVENTS are supported by checking the number of
+        * hardware performance counters. This could ensure the presence of
+        * a physical PMU and CONFIG_PERF_EVENT is selected.
+        */
+       if (IS_ENABLED(CONFIG_ARM_PMU) && perf_num_counters() > 0)
+               static_branch_enable(&kvm_arm_pmu_available);
+
        return perf_register_guest_info_callbacks(&kvm_guest_cbs);
 }
 
index e9ec08b..e32c6e1 100644 (file)
@@ -823,16 +823,6 @@ u64 kvm_pmu_get_pmceid(struct kvm_vcpu *vcpu, bool pmceid1)
        return val & mask;
 }
 
-bool kvm_arm_support_pmu_v3(void)
-{
-       /*
-        * Check if HW_PERF_EVENTS are supported by checking the number of
-        * hardware performance counters. This could ensure the presence of
-        * a physical PMU and CONFIG_PERF_EVENT is selected.
-        */
-       return (perf_num_counters() > 0);
-}
-
 int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu)
 {
        if (!kvm_vcpu_has_pmu(vcpu))
index 47f3f03..bd354cd 100644 (file)
@@ -311,23 +311,24 @@ int kvm_set_ipa_limit(void)
        }
 
        switch (cpuid_feature_extract_unsigned_field(mmfr0, tgran_2)) {
-       default:
-       case 1:
+       case ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE:
                kvm_err("PAGE_SIZE not supported at Stage-2, giving up\n");
                return -EINVAL;
-       case 0:
+       case ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT:
                kvm_debug("PAGE_SIZE supported at Stage-2 (default)\n");
                break;
-       case 2:
+       case ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN ... ID_AA64MMFR0_TGRAN_2_SUPPORTED_MAX:
                kvm_debug("PAGE_SIZE supported at Stage-2 (advertised)\n");
                break;
+       default:
+               kvm_err("Unsupported value for TGRAN_2, giving up\n");
+               return -EINVAL;
        }
 
        kvm_ipa_limit = id_aa64mmfr0_parange_to_phys_shift(parange);
-       WARN(kvm_ipa_limit < KVM_PHYS_SHIFT,
-            "KVM IPA Size Limit (%d bits) is smaller than default size\n",
-            kvm_ipa_limit);
-       kvm_info("IPA Size Limit: %d bits\n", kvm_ipa_limit);
+       kvm_info("IPA Size Limit: %d bits%s\n", kvm_ipa_limit,
+                ((kvm_ipa_limit < KVM_PHYS_SHIFT) ?
+                 " (Reduced IPA size, limited VM/VMM compatibility)" : ""));
 
        return 0;
 }
@@ -356,6 +357,11 @@ int kvm_arm_setup_stage2(struct kvm *kvm, unsigned long type)
                        return -EINVAL;
        } else {
                phys_shift = KVM_PHYS_SHIFT;
+               if (phys_shift > kvm_ipa_limit) {
+                       pr_warn_once("%s using unsupported default IPA limit, upgrade your VMM\n",
+                                    current->comm);
+                       return -EINVAL;
+               }
        }
 
        mmfr0 = read_sanitised_ftr_reg(SYS_ID_AA64MMFR0_EL1);
index 52915b3..6f53092 100644 (file)
@@ -574,9 +574,13 @@ early_param("kvm-arm.vgic_v4_enable", early_gicv4_enable);
  */
 int vgic_v3_probe(const struct gic_kvm_info *info)
 {
-       u32 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_ich_vtr_el2);
+       u64 ich_vtr_el2 = kvm_call_hyp_ret(__vgic_v3_get_gic_config);
+       bool has_v2;
        int ret;
 
+       has_v2 = ich_vtr_el2 >> 63;
+       ich_vtr_el2 = (u32)ich_vtr_el2;
+
        /*
         * The ListRegs field is 5 bits, but there is an architectural
         * maximum of 16 list registers. Just ignore bit 4...
@@ -594,13 +598,15 @@ int vgic_v3_probe(const struct gic_kvm_info *info)
                         gicv4_enable ? "en" : "dis");
        }
 
+       kvm_vgic_global_state.vcpu_base = 0;
+
        if (!info->vcpu.start) {
                kvm_info("GICv3: no GICV resource entry\n");
-               kvm_vgic_global_state.vcpu_base = 0;
+       } else if (!has_v2) {
+               pr_warn(FW_BUG "CPU interface incapable of MMIO access\n");
        } else if (!PAGE_ALIGNED(info->vcpu.start)) {
                pr_warn("GICV physical address 0x%llx not page aligned\n",
                        (unsigned long long)info->vcpu.start);
-               kvm_vgic_global_state.vcpu_base = 0;
        } else {
                kvm_vgic_global_state.vcpu_base = info->vcpu.start;
                kvm_vgic_global_state.can_emulate_gicv2 = true;
index 9e1a12e..351537c 100644 (file)
@@ -149,19 +149,3 @@ SYM_FUNC_START(mte_restore_page_tags)
 
        ret
 SYM_FUNC_END(mte_restore_page_tags)
-
-/*
- * Assign allocation tags for a region of memory based on the pointer tag
- *   x0 - source pointer
- *   x1 - size
- *
- * Note: The address must be non-NULL and MTE_GRANULE_SIZE aligned and
- * size must be non-zero and MTE_GRANULE_SIZE aligned.
- */
-SYM_FUNC_START(mte_assign_mem_tag_range)
-1:     stg     x0, [x0]
-       add     x0, x0, #MTE_GRANULE_SIZE
-       subs    x1, x1, #MTE_GRANULE_SIZE
-       b.gt    1b
-       ret
-SYM_FUNC_END(mte_assign_mem_tag_range)
index 77222d9..f188c90 100644 (file)
@@ -7,7 +7,6 @@ obj-$(CONFIG_HUGETLB_PAGE)      += hugetlbpage.o
 obj-$(CONFIG_PTDUMP_CORE)      += ptdump.o
 obj-$(CONFIG_PTDUMP_DEBUGFS)   += ptdump_debugfs.o
 obj-$(CONFIG_TRANS_TABLE)      += trans_pgd.o
-obj-$(CONFIG_NUMA)             += numa.o
 obj-$(CONFIG_DEBUG_VIRTUAL)    += physaddr.o
 obj-$(CONFIG_ARM64_MTE)                += mteswap.o
 KASAN_SANITIZE_physaddr.o      += n
index 2e339f0..f37d4e3 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/acpi.h>
 #include <linux/bitfield.h>
 #include <linux/extable.h>
+#include <linux/kfence.h>
 #include <linux/signal.h>
 #include <linux/mm.h>
 #include <linux/hardirq.h>
@@ -302,12 +303,24 @@ static void die_kernel_fault(const char *msg, unsigned long addr,
 static void report_tag_fault(unsigned long addr, unsigned int esr,
                             struct pt_regs *regs)
 {
-       bool is_write  = ((esr & ESR_ELx_WNR) >> ESR_ELx_WNR_SHIFT) != 0;
+       static bool reported;
+       bool is_write;
+
+       if (READ_ONCE(reported))
+               return;
+
+       /*
+        * This is used for KASAN tests and assumes that no MTE faults
+        * happened before running the tests.
+        */
+       if (mte_report_once())
+               WRITE_ONCE(reported, true);
 
        /*
         * SAS bits aren't set for all faults reported in EL1, so we can't
         * find out access size.
         */
+       is_write = !!(esr & ESR_ELx_WNR);
        kasan_report(addr, 0, is_write, regs->pc);
 }
 #else
@@ -319,12 +332,8 @@ static inline void report_tag_fault(unsigned long addr, unsigned int esr,
 static void do_tag_recovery(unsigned long addr, unsigned int esr,
                           struct pt_regs *regs)
 {
-       static bool reported;
 
-       if (!READ_ONCE(reported)) {
-               report_tag_fault(addr, esr, regs);
-               WRITE_ONCE(reported, true);
-       }
+       report_tag_fault(addr, esr, regs);
 
        /*
         * Disable MTE Tag Checking on the local CPU for the current EL.
@@ -381,6 +390,9 @@ static void __do_kernel_fault(unsigned long addr, unsigned int esr,
        } else if (addr < PAGE_SIZE) {
                msg = "NULL pointer dereference";
        } else {
+               if (kfence_handle_page_fault(addr, esr & ESR_ELx_WNR, regs))
+                       return;
+
                msg = "paging request";
        }
 
index 709d98f..3685e12 100644 (file)
@@ -219,17 +219,40 @@ static void __init zone_sizes_init(unsigned long min, unsigned long max)
 
 int pfn_valid(unsigned long pfn)
 {
-       phys_addr_t addr = pfn << PAGE_SHIFT;
+       phys_addr_t addr = PFN_PHYS(pfn);
 
-       if ((addr >> PAGE_SHIFT) != pfn)
+       /*
+        * Ensure the upper PAGE_SHIFT bits are clear in the
+        * pfn. Else it might lead to false positives when
+        * some of the upper bits are set, but the lower bits
+        * match a valid pfn.
+        */
+       if (PHYS_PFN(addr) != pfn)
                return 0;
 
 #ifdef CONFIG_SPARSEMEM
+{
+       struct mem_section *ms;
+
        if (pfn_to_section_nr(pfn) >= NR_MEM_SECTIONS)
                return 0;
 
-       if (!valid_section(__pfn_to_section(pfn)))
+       ms = __pfn_to_section(pfn);
+       if (!valid_section(ms))
                return 0;
+
+       /*
+        * ZONE_DEVICE memory does not have the memblock entries.
+        * memblock_is_map_memory() check for ZONE_DEVICE based
+        * addresses will always fail. Even the normal hotplugged
+        * memory will never have MEMBLOCK_NOMAP flag set in their
+        * memblock entries. Skip memblock search for all non early
+        * memory sections covering all of hotplug memory including
+        * both normal and ZONE_DEVICE based.
+        */
+       if (!early_section(ms))
+               return pfn_section_valid(ms, pfn);
+}
 #endif
        return memblock_is_map_memory(addr);
 }
@@ -416,10 +439,10 @@ void __init bootmem_init(void)
        max_pfn = max_low_pfn = max;
        min_low_pfn = min;
 
-       arm64_numa_init();
+       arch_numa_init();
 
        /*
-        * must be done after arm64_numa_init() which calls numa_init() to
+        * must be done after arch_numa_init() which calls numa_init() to
         * initialize node_online_map that gets used in hugetlb_cma_reserve()
         * while allocating required CMA size across online nodes.
         */
index 25af183..5d9550f 100644 (file)
@@ -40,7 +40,7 @@
 #define NO_BLOCK_MAPPINGS      BIT(0)
 #define NO_CONT_MAPPINGS       BIT(1)
 
-u64 idmap_t0sz = TCR_T0SZ(VA_BITS);
+u64 idmap_t0sz = TCR_T0SZ(VA_BITS_MIN);
 u64 idmap_ptrs_per_pgd = PTRS_PER_PGD;
 
 u64 __section(".mmuoff.data.write") vabits_actual;
@@ -512,7 +512,8 @@ static void __init map_mem(pgd_t *pgdp)
                 * if MTE is present. Otherwise, it has the same attributes as
                 * PAGE_KERNEL.
                 */
-               __map_memblock(pgdp, start, end, PAGE_KERNEL_TAGGED, flags);
+               __map_memblock(pgdp, start, end, pgprot_tagged(PAGE_KERNEL),
+                              flags);
        }
 
        /*
@@ -1155,7 +1156,7 @@ void vmemmap_free(unsigned long start, unsigned long end,
 }
 #endif /* CONFIG_SPARSEMEM_VMEMMAP */
 
-static inline pud_t * fixmap_pud(unsigned long addr)
+static inline pud_t *fixmap_pud(unsigned long addr)
 {
        pgd_t *pgdp = pgd_offset_k(addr);
        p4d_t *p4dp = p4d_offset(pgdp, addr);
@@ -1166,7 +1167,7 @@ static inline pud_t * fixmap_pud(unsigned long addr)
        return pud_offset_kimg(p4dp, addr);
 }
 
-static inline pmd_t * fixmap_pmd(unsigned long addr)
+static inline pmd_t *fixmap_pmd(unsigned long addr)
 {
        pud_t *pudp = fixmap_pud(addr);
        pud_t pud = READ_ONCE(*pudp);
@@ -1176,7 +1177,7 @@ static inline pmd_t * fixmap_pmd(unsigned long addr)
        return pmd_offset_kimg(pudp, addr);
 }
 
-static inline pte_t * fixmap_pte(unsigned long addr)
+static inline pte_t *fixmap_pte(unsigned long addr)
 {
        return &bm_pte[pte_index(addr)];
 }
@@ -1444,16 +1445,36 @@ static void __remove_pgd_mapping(pgd_t *pgdir, unsigned long start, u64 size)
        free_empty_tables(start, end, PAGE_OFFSET, PAGE_END);
 }
 
-static bool inside_linear_region(u64 start, u64 size)
+struct range arch_get_mappable_range(void)
 {
+       struct range mhp_range;
+       u64 start_linear_pa = __pa(_PAGE_OFFSET(vabits_actual));
+       u64 end_linear_pa = __pa(PAGE_END - 1);
+
+       if (IS_ENABLED(CONFIG_RANDOMIZE_BASE)) {
+               /*
+                * Check for a wrap, it is possible because of randomized linear
+                * mapping the start physical address is actually bigger than
+                * the end physical address. In this case set start to zero
+                * because [0, end_linear_pa] range must still be able to cover
+                * all addressable physical addresses.
+                */
+               if (start_linear_pa > end_linear_pa)
+                       start_linear_pa = 0;
+       }
+
+       WARN_ON(start_linear_pa > end_linear_pa);
+
        /*
         * Linear mapping region is the range [PAGE_OFFSET..(PAGE_END - 1)]
         * accommodating both its ends but excluding PAGE_END. Max physical
         * range which can be mapped inside this linear mapping range, must
         * also be derived from its end points.
         */
-       return start >= __pa(_PAGE_OFFSET(vabits_actual)) &&
-              (start + size - 1) <= __pa(PAGE_END - 1);
+       mhp_range.start = start_linear_pa;
+       mhp_range.end =  end_linear_pa;
+
+       return mhp_range;
 }
 
 int arch_add_memory(int nid, u64 start, u64 size,
@@ -1461,12 +1482,14 @@ int arch_add_memory(int nid, u64 start, u64 size,
 {
        int ret, flags = 0;
 
-       if (!inside_linear_region(start, size)) {
-               pr_err("[%llx %llx] is outside linear mapping region\n", start, start + size);
-               return -EINVAL;
-       }
+       VM_BUG_ON(!mhp_range_allowed(start, size, true));
 
-       if (rodata_full || debug_pagealloc_enabled())
+       /*
+        * KFENCE requires linear map to be mapped at page granularity, so that
+        * it is possible to protect/unprotect single pages in the KFENCE pool.
+        */
+       if (rodata_full || debug_pagealloc_enabled() ||
+           IS_ENABLED(CONFIG_KFENCE))
                flags = NO_BLOCK_MAPPINGS | NO_CONT_MAPPINGS;
 
        __create_pgd_mapping(swapper_pg_dir, start, __phys_to_virt(start),
index 89dd2fc..34e9122 100644 (file)
@@ -7,7 +7,7 @@ config CSKY
        select ARCH_HAS_SYNC_DMA_FOR_CPU
        select ARCH_HAS_SYNC_DMA_FOR_DEVICE
        select ARCH_USE_BUILTIN_BSWAP
-       select ARCH_USE_QUEUED_RWLOCKS if NR_CPUS>2
+       select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_WANT_FRAME_POINTERS if !CPU_CK610
        select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
        select COMMON_CLK
@@ -35,6 +35,9 @@ config CSKY
        select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_SCHED_CLOCK
        select GENERIC_SMP_IDLE_THREAD
+       select GENERIC_TIME_VSYSCALL
+       select GENERIC_VDSO_32
+       select GENERIC_GETTIMEOFDAY
        select GX6605S_TIMER if CPU_CK610
        select HAVE_ARCH_TRACEHOOK
        select HAVE_ARCH_AUDITSYSCALL
@@ -43,11 +46,14 @@ config CSKY
        select HAVE_CONTEXT_TRACKING
        select HAVE_VIRT_CPU_ACCOUNTING_GEN
        select HAVE_DEBUG_BUGVERBOSE
+       select HAVE_DEBUG_KMEMLEAK
        select HAVE_DYNAMIC_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
+       select HAVE_GENERIC_VDSO
        select HAVE_FUNCTION_TRACER
        select HAVE_FUNCTION_GRAPH_TRACER
        select HAVE_FUNCTION_ERROR_INJECTION
+       select HAVE_FUTEX_CMPXCHG if FUTEX && SMP
        select HAVE_FTRACE_MCOUNT_RECORD
        select HAVE_KERNEL_GZIP
        select HAVE_KERNEL_LZO
@@ -192,6 +198,22 @@ config CPU_CK860
 endchoice
 
 choice
+       prompt "PAGE OFFSET"
+       default PAGE_OFFSET_80000000
+
+config PAGE_OFFSET_80000000
+       bool "PAGE OFFSET 2G (user:kernel = 2:2)"
+
+config PAGE_OFFSET_A0000000
+       bool "PAGE OFFSET 2.5G (user:kernel = 2.5:1.5)"
+endchoice
+
+config PAGE_OFFSET
+       hex
+       default 0x80000000 if PAGE_OFFSET_80000000
+       default 0xa0000000 if PAGE_OFFSET_A0000000
+choice
+
        prompt "C-SKY PMU type"
        depends on PERF_EVENTS
        depends on CPU_CK807 || CPU_CK810 || CPU_CK860
index d3e0420..6cab7af 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ABI_CSKY_CACHEFLUSH_H
 #define __ABI_CSKY_CACHEFLUSH_H
index ba8eb58..416b30c 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_CKMMUV1_H
 #define __ASM_CSKY_CKMMUV1_H
@@ -89,13 +88,14 @@ static inline void tlb_invalid_indexed(void)
        cpwcr("cpcr8", 0x02000000);
 }
 
-static inline void setup_pgd(unsigned long pgd, bool kernel)
+static inline void setup_pgd(pgd_t *pgd, int asid)
 {
-       cpwcr("cpcr29", pgd | BIT(0));
+       cpwcr("cpcr29", __pa(pgd) | BIT(0));
+       write_mmu_entryhi(asid);
 }
 
-static inline unsigned long get_pgd(void)
+static inline pgd_t *get_pgd(void)
 {
-       return cprcr("cpcr29") & ~BIT(0);
+       return __va(cprcr("cpcr29") & ~BIT(0));
 }
 #endif /* __ASM_CSKY_CKMMUV1_H */
index 13c23e2..b6a2109 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_ENTRY_H
 #define __ASM_CSKY_ENTRY_H
index c864519..2d21599 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #include <asm/shmparam.h>
 
index d605445..752c8b3 100644 (file)
@@ -1,37 +1,49 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PGTABLE_BITS_H
 #define __ASM_CSKY_PGTABLE_BITS_H
 
 /* implemented in software */
-#define _PAGE_ACCESSED         (1<<3)
-#define PAGE_ACCESSED_BIT      (3)
-
+#define _PAGE_PRESENT          (1<<0)
 #define _PAGE_READ             (1<<1)
 #define _PAGE_WRITE            (1<<2)
-#define _PAGE_PRESENT          (1<<0)
-
+#define _PAGE_ACCESSED         (1<<3)
 #define _PAGE_MODIFIED         (1<<4)
-#define PAGE_MODIFIED_BIT      (4)
 
 /* implemented in hardware */
 #define _PAGE_GLOBAL           (1<<6)
-
 #define _PAGE_VALID            (1<<7)
-#define PAGE_VALID_BIT         (7)
-
 #define _PAGE_DIRTY            (1<<8)
-#define PAGE_DIRTY_BIT         (8)
 
 #define _PAGE_CACHE            (3<<9)
 #define _PAGE_UNCACHE          (2<<9)
 #define _PAGE_SO               _PAGE_UNCACHE
-
 #define _CACHE_MASK            (7<<9)
 
-#define _CACHE_CACHED          (_PAGE_VALID | _PAGE_CACHE)
-#define _CACHE_UNCACHED                (_PAGE_VALID | _PAGE_UNCACHE)
+#define _CACHE_CACHED          _PAGE_CACHE
+#define _CACHE_UNCACHED                _PAGE_UNCACHE
+
+#define _PAGE_PROT_NONE                _PAGE_READ
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Format of swap PTE:
+ *     bit          0:    _PAGE_PRESENT (zero)
+ *     bit          1:    _PAGE_READ (zero)
+ *     bit      2 - 5:    swap type[0 - 3]
+ *     bit          6:    _PAGE_GLOBAL (zero)
+ *     bit          7:    _PAGE_VALID (zero)
+ *     bit          8:    swap type[4]
+ *     bit     9 - 31:    swap offset
+ */
+#define __swp_type(x)                  ((((x).val >> 2) & 0xf) | \
+                                       (((x).val >> 4) & 0x10))
+#define __swp_offset(x)                        ((x).val >> 9)
+#define __swp_entry(type, offset)      ((swp_entry_t) { \
+                                       ((type & 0xf) << 2) | \
+                                       ((type & 0x10) << 4) | \
+                                       ((offset) << 9)})
 
 #define HAVE_ARCH_UNMAPPED_AREA
 
index a153bd3..abd01a2 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ABI_REG_OPS_H
 #define __ABI_REG_OPS_H
index 104707f..7b386fd 100644 (file)
@@ -1,10 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_REGDEF_H
 #define __ASM_CSKY_REGDEF_H
 
+#ifdef __ASSEMBLY__
 #define syscallid      r1
+#else
+#define syscallid      "r1"
+#endif
+
 #define regs_syscallid(regs) regs->regs[9]
 #define regs_fp(regs) regs->regs[2]
 
index 0cd4338..9d95594 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ABI_CSKY_STRING_H
 #define __ABI_CSKY_STRING_H
index 17c8268..ec73fd7 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ABI_CSKY_PTRACE_H
 #define __ABI_CSKY_PTRACE_H
index 14352f5..9e6d0a2 100644 (file)
@@ -1,17 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
-#include <linux/uaccess.h>
+#ifndef __ABI_CSKY_VDSO_H
+#define __ABI_CSKY_VDSO_H
 
-static inline int setup_vdso_page(unsigned short *ptr)
-{
-       int err = 0;
+/* movi r1, 127; addi r1, (139 - 127) */
+#define SET_SYSCALL_ID .long 0x20b167f1
 
-       /* movi r1, 127 */
-       err |= __put_user(0x67f1, ptr + 0);
-       /* addi r1, (139 - 127) */
-       err |= __put_user(0x20b1, ptr + 1);
-       /* trap 0 */
-       err |= __put_user(0x0008, ptr + 2);
-
-       return err;
-}
+#endif /* __ABI_CSKY_VDSO_H */
index 790f1eb..39c5139 100644 (file)
@@ -12,6 +12,9 @@ void update_mmu_cache(struct vm_area_struct *vma, unsigned long address,
        unsigned long addr;
        struct page *page;
 
+       if (!pfn_valid(pte_pfn(*pte)))
+               return;
+
        page = pfn_to_page(pte_pfn(*pte));
        if (page == ZERO_PAGE(0))
                return;
index 73ded7c..64215f2 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_CKMMUV2_H
 #define __ASM_CSKY_CKMMUV2_H
@@ -78,8 +77,13 @@ static inline void tlb_read(void)
 static inline void tlb_invalid_all(void)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.alls\n":::"memory");
        sync_is();
+       asm volatile(
+               "tlbi.alls      \n"
+               "sync.i         \n"
+               :
+               :
+               : "memory");
 #else
        mtcr("cr<8, 15>", 0x04000000);
 #endif
@@ -88,8 +92,13 @@ static inline void tlb_invalid_all(void)
 static inline void local_tlb_invalid_all(void)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.all\n":::"memory");
        sync_is();
+       asm volatile(
+               "tlbi.all       \n"
+               "sync.i         \n"
+               :
+               :
+               : "memory");
 #else
        tlb_invalid_all();
 #endif
@@ -100,16 +109,31 @@ static inline void tlb_invalid_indexed(void)
        mtcr("cr<8, 15>", 0x02000000);
 }
 
-static inline void setup_pgd(unsigned long pgd, bool kernel)
+#define NOP32 ".long 0x4820c400\n"
+
+static inline void setup_pgd(pgd_t *pgd, int asid)
 {
-       if (kernel)
-               mtcr("cr<28, 15>", pgd | BIT(0));
-       else
-               mtcr("cr<29, 15>", pgd | BIT(0));
+#ifdef CONFIG_CPU_HAS_TLBI
+       sync_is();
+#else
+       mb();
+#endif
+       asm volatile(
+#ifdef CONFIG_CPU_HAS_TLBI
+               "mtcr %1, cr<28, 15>    \n"
+#endif
+               "mtcr %1, cr<29, 15>    \n"
+               "mtcr %0, cr< 4, 15>    \n"
+               ".rept 64               \n"
+               NOP32
+               ".endr                  \n"
+               :
+               :"r"(asid), "r"(__pa(pgd) | BIT(0))
+               :"memory");
 }
 
-static inline unsigned long get_pgd(void)
+static inline pgd_t *get_pgd(void)
 {
-       return mfcr("cr<29, 15>") & ~BIT(0);
+       return __va(mfcr("cr<29, 15>") & ~BIT(0));
 }
 #endif /* __ASM_CSKY_CKMMUV2_H */
index bedcc6f..cca63e6 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_ENTRY_H
 #define __ASM_CSKY_ENTRY_H
@@ -26,6 +25,9 @@
        stw     tls, (sp, 0)
        stw     lr, (sp, 4)
 
+       RD_MEH  lr
+       WR_MEH  lr
+
        mfcr    lr, epc
        movi    tls, \epc_inc
        add     lr, tls
        mtcr    \rx, cr<8, 15>
 .endm
 
+#ifdef CONFIG_PAGE_OFFSET_80000000
+#define MSA_SET cr<30, 15>
+#define MSA_CLR cr<31, 15>
+#endif
+
+#ifdef CONFIG_PAGE_OFFSET_A0000000
+#define MSA_SET cr<31, 15>
+#define MSA_CLR cr<30, 15>
+#endif
+
 .macro SETUP_MMU
        /* Init psr and enable ee */
        lrw     r6, DEFAULT_PSR_VALUE
         * 31 - 29 | 28 - 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0
         *   BA     Reserved  SH  WA  B   SO SEC  C   D   V
         */
-       mfcr    r6, cr<30, 15> /* Get MSA0 */
+       mfcr    r6, MSA_SET /* Get MSA */
 2:
        lsri    r6, 29
        lsli    r6, 29
        addi    r6, 0x1ce
-       mtcr    r6, cr<30, 15> /* Set MSA0 */
+       mtcr    r6, MSA_SET /* Set MSA */
 
        movi    r6, 0
-       mtcr    r6, cr<31, 15> /* Clr MSA1 */
+       mtcr    r6, MSA_CLR /* Clr MSA */
 
        /* enable MMU */
        mfcr    r6, cr18
index 09e2700..aabb793 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_FPU_H
 #define __ASM_CSKY_FPU_H
index 0a70cb5..cf005f1 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 static inline void clear_user_page(void *addr, unsigned long vaddr,
                                   struct page *page)
index 137f793..7e7f389 100644 (file)
@@ -1,37 +1,48 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PGTABLE_BITS_H
 #define __ASM_CSKY_PGTABLE_BITS_H
 
 /* implemented in software */
 #define _PAGE_ACCESSED         (1<<7)
-#define PAGE_ACCESSED_BIT      (7)
-
 #define _PAGE_READ             (1<<8)
 #define _PAGE_WRITE            (1<<9)
 #define _PAGE_PRESENT          (1<<10)
-
 #define _PAGE_MODIFIED         (1<<11)
-#define PAGE_MODIFIED_BIT      (11)
 
 /* implemented in hardware */
 #define _PAGE_GLOBAL           (1<<0)
-
 #define _PAGE_VALID            (1<<1)
-#define PAGE_VALID_BIT         (1)
-
 #define _PAGE_DIRTY            (1<<2)
-#define PAGE_DIRTY_BIT         (2)
 
 #define _PAGE_SO               (1<<5)
 #define _PAGE_BUF              (1<<6)
-
 #define _PAGE_CACHE            (1<<3)
-
 #define _CACHE_MASK            _PAGE_CACHE
 
-#define _CACHE_CACHED          (_PAGE_VALID | _PAGE_CACHE | _PAGE_BUF)
-#define _CACHE_UNCACHED                (_PAGE_VALID)
+#define _CACHE_CACHED          (_PAGE_CACHE | _PAGE_BUF)
+#define _CACHE_UNCACHED                (0)
+
+#define _PAGE_PROT_NONE                _PAGE_WRITE
+
+/*
+ * Encode and decode a swap entry
+ *
+ * Format of swap PTE:
+ *     bit          0:    _PAGE_GLOBAL (zero)
+ *     bit          1:    _PAGE_VALID (zero)
+ *     bit      2 - 6:    swap type
+ *     bit      7 - 8:    swap offset[0 - 1]
+ *     bit          9:    _PAGE_WRITE (zero)
+ *     bit         10:    _PAGE_PRESENT (zero)
+ *     bit    11 - 31:    swap offset[2 - 22]
+ */
+#define __swp_type(x)                  (((x).val >> 2) & 0x1f)
+#define __swp_offset(x)                        ((((x).val >> 7) & 0x3) | \
+                                       (((x).val >> 9) & 0x7ffffc))
+#define __swp_entry(type, offset)      ((swp_entry_t) { \
+                                       ((type & 0x1f) << 2) | \
+                                       ((offset & 0x3) << 7) | \
+                                       ((offset & 0x7ffffc) << 9)})
 
 #endif /* __ASM_CSKY_PGTABLE_BITS_H */
index ae82c3f..49ba18a 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ABI_REG_OPS_H
 #define __ABI_REG_OPS_H
index d7328bb..0933add 100644 (file)
@@ -1,10 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_REGDEF_H
 #define __ASM_CSKY_REGDEF_H
 
+#ifdef __ASSEMBLY__
 #define syscallid      r7
+#else
+#define syscallid      "r7"
+#endif
+
 #define regs_syscallid(regs) regs->regs[3]
 #define regs_fp(regs) regs->regs[4]
 
index 73a8124..5dd5c3f 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ABI_CSKY_PTRACE_H
 #define __ABI_CSKY_PTRACE_H
index b60d4a0..40fd10d 100644 (file)
@@ -3,21 +3,7 @@
 #ifndef __ABI_CSKY_VDSO_H
 #define __ABI_CSKY_VDSO_H
 
-#include <linux/uaccess.h>
+/* movi r7, 173 */
+#define SET_SYSCALL_ID .long 0x008bea07
 
-static inline int setup_vdso_page(unsigned short *ptr)
-{
-       int err = 0;
-
-       /* movi r7, 173 */
-       err |= __put_user(0xea07, ptr);
-       err |= __put_user(0x008b,      ptr+1);
-
-       /* trap 0 */
-       err |= __put_user(0xc000,   ptr+2);
-       err |= __put_user(0x2020,   ptr+3);
-
-       return err;
-}
-
-#endif /* __ABI_CSKY_STRING_H */
+#endif /* __ABI_CSKY_VDSO_H */
index bbbedfd..61abe92 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __SYSDEP_H
 #define __SYSDEP_H
index d1c2ede..6fc05d4 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_ADDRSPACE_H
 #define __ASM_CSKY_ADDRSPACE_H
diff --git a/arch/csky/include/asm/atomic.h b/arch/csky/include/asm/atomic.h
deleted file mode 100644 (file)
index e369d73..0000000
+++ /dev/null
@@ -1,212 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-
-#ifndef __ASM_CSKY_ATOMIC_H
-#define __ASM_CSKY_ATOMIC_H
-
-#include <linux/version.h>
-#include <asm/cmpxchg.h>
-#include <asm/barrier.h>
-
-#ifdef CONFIG_CPU_HAS_LDSTEX
-
-#define __atomic_add_unless __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
-       unsigned long tmp, ret;
-
-       smp_mb();
-
-       asm volatile (
-       "1:     ldex.w          %0, (%3) \n"
-       "       mov             %1, %0   \n"
-       "       cmpne           %0, %4   \n"
-       "       bf              2f       \n"
-       "       add             %0, %2   \n"
-       "       stex.w          %0, (%3) \n"
-       "       bez             %0, 1b   \n"
-       "2:                              \n"
-               : "=&r" (tmp), "=&r" (ret)
-               : "r" (a), "r"(&v->counter), "r"(u)
-               : "memory");
-
-       if (ret != u)
-               smp_mb();
-
-       return ret;
-}
-
-#define ATOMIC_OP(op, c_op)                                            \
-static inline void atomic_##op(int i, atomic_t *v)                     \
-{                                                                      \
-       unsigned long tmp;                                              \
-                                                                       \
-       asm volatile (                                                  \
-       "1:     ldex.w          %0, (%2) \n"                            \
-       "       " #op "         %0, %1   \n"                            \
-       "       stex.w          %0, (%2) \n"                            \
-       "       bez             %0, 1b   \n"                            \
-               : "=&r" (tmp)                                           \
-               : "r" (i), "r"(&v->counter)                             \
-               : "memory");                                            \
-}
-
-#define ATOMIC_OP_RETURN(op, c_op)                                     \
-static inline int atomic_##op##_return(int i, atomic_t *v)             \
-{                                                                      \
-       unsigned long tmp, ret;                                         \
-                                                                       \
-       smp_mb();                                                       \
-       asm volatile (                                                  \
-       "1:     ldex.w          %0, (%3) \n"                            \
-       "       " #op "         %0, %2   \n"                            \
-       "       mov             %1, %0   \n"                            \
-       "       stex.w          %0, (%3) \n"                            \
-       "       bez             %0, 1b   \n"                            \
-               : "=&r" (tmp), "=&r" (ret)                              \
-               : "r" (i), "r"(&v->counter)                             \
-               : "memory");                                            \
-       smp_mb();                                                       \
-                                                                       \
-       return ret;                                                     \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op)                                      \
-static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
-{                                                                      \
-       unsigned long tmp, ret;                                         \
-                                                                       \
-       smp_mb();                                                       \
-       asm volatile (                                                  \
-       "1:     ldex.w          %0, (%3) \n"                            \
-       "       mov             %1, %0   \n"                            \
-       "       " #op "         %0, %2   \n"                            \
-       "       stex.w          %0, (%3) \n"                            \
-       "       bez             %0, 1b   \n"                            \
-               : "=&r" (tmp), "=&r" (ret)                              \
-               : "r" (i), "r"(&v->counter)                             \
-               : "memory");                                            \
-       smp_mb();                                                       \
-                                                                       \
-       return ret;                                                     \
-}
-
-#else /* CONFIG_CPU_HAS_LDSTEX */
-
-#include <linux/irqflags.h>
-
-#define __atomic_add_unless __atomic_add_unless
-static inline int __atomic_add_unless(atomic_t *v, int a, int u)
-{
-       unsigned long tmp, ret, flags;
-
-       raw_local_irq_save(flags);
-
-       asm volatile (
-       "       ldw             %0, (%3) \n"
-       "       mov             %1, %0   \n"
-       "       cmpne           %0, %4   \n"
-       "       bf              2f       \n"
-       "       add             %0, %2   \n"
-       "       stw             %0, (%3) \n"
-       "2:                              \n"
-               : "=&r" (tmp), "=&r" (ret)
-               : "r" (a), "r"(&v->counter), "r"(u)
-               : "memory");
-
-       raw_local_irq_restore(flags);
-
-       return ret;
-}
-
-#define ATOMIC_OP(op, c_op)                                            \
-static inline void atomic_##op(int i, atomic_t *v)                     \
-{                                                                      \
-       unsigned long tmp, flags;                                       \
-                                                                       \
-       raw_local_irq_save(flags);                                      \
-                                                                       \
-       asm volatile (                                                  \
-       "       ldw             %0, (%2) \n"                            \
-       "       " #op "         %0, %1   \n"                            \
-       "       stw             %0, (%2) \n"                            \
-               : "=&r" (tmp)                                           \
-               : "r" (i), "r"(&v->counter)                             \
-               : "memory");                                            \
-                                                                       \
-       raw_local_irq_restore(flags);                                   \
-}
-
-#define ATOMIC_OP_RETURN(op, c_op)                                     \
-static inline int atomic_##op##_return(int i, atomic_t *v)             \
-{                                                                      \
-       unsigned long tmp, ret, flags;                                  \
-                                                                       \
-       raw_local_irq_save(flags);                                      \
-                                                                       \
-       asm volatile (                                                  \
-       "       ldw             %0, (%3) \n"                            \
-       "       " #op "         %0, %2   \n"                            \
-       "       stw             %0, (%3) \n"                            \
-       "       mov             %1, %0   \n"                            \
-               : "=&r" (tmp), "=&r" (ret)                              \
-               : "r" (i), "r"(&v->counter)                             \
-               : "memory");                                            \
-                                                                       \
-       raw_local_irq_restore(flags);                                   \
-                                                                       \
-       return ret;                                                     \
-}
-
-#define ATOMIC_FETCH_OP(op, c_op)                                      \
-static inline int atomic_fetch_##op(int i, atomic_t *v)                        \
-{                                                                      \
-       unsigned long tmp, ret, flags;                                  \
-                                                                       \
-       raw_local_irq_save(flags);                                      \
-                                                                       \
-       asm volatile (                                                  \
-       "       ldw             %0, (%3) \n"                            \
-       "       mov             %1, %0   \n"                            \
-       "       " #op "         %0, %2   \n"                            \
-       "       stw             %0, (%3) \n"                            \
-               : "=&r" (tmp), "=&r" (ret)                              \
-               : "r" (i), "r"(&v->counter)                             \
-               : "memory");                                            \
-                                                                       \
-       raw_local_irq_restore(flags);                                   \
-                                                                       \
-       return ret;                                                     \
-}
-
-#endif /* CONFIG_CPU_HAS_LDSTEX */
-
-#define atomic_add_return atomic_add_return
-ATOMIC_OP_RETURN(add, +)
-#define atomic_sub_return atomic_sub_return
-ATOMIC_OP_RETURN(sub, -)
-
-#define atomic_fetch_add atomic_fetch_add
-ATOMIC_FETCH_OP(add, +)
-#define atomic_fetch_sub atomic_fetch_sub
-ATOMIC_FETCH_OP(sub, -)
-#define atomic_fetch_and atomic_fetch_and
-ATOMIC_FETCH_OP(and, &)
-#define atomic_fetch_or atomic_fetch_or
-ATOMIC_FETCH_OP(or, |)
-#define atomic_fetch_xor atomic_fetch_xor
-ATOMIC_FETCH_OP(xor, ^)
-
-#define atomic_and atomic_and
-ATOMIC_OP(and, &)
-#define atomic_or atomic_or
-ATOMIC_OP(or, |)
-#define atomic_xor atomic_xor
-ATOMIC_OP(xor, ^)
-
-#undef ATOMIC_FETCH_OP
-#undef ATOMIC_OP_RETURN
-#undef ATOMIC_OP
-
-#include <asm-generic/atomic.h>
-
-#endif /* __ASM_CSKY_ATOMIC_H */
index a430e7f..84fc600 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_BARRIER_H
 #define __ASM_CSKY_BARRIER_H
@@ -8,6 +7,61 @@
 
 #define nop()  asm volatile ("nop\n":::"memory")
 
+#ifdef CONFIG_SMP
+
+/*
+ * bar.brwarws: ordering barrier for all load/store instructions
+ *              before/after
+ *
+ * |31|30 26|25 21|20 16|15  10|9   5|4           0|
+ *  1  10000 00000 00000 100001        00001 0 bw br aw ar
+ *
+ * b: before
+ * a: after
+ * r: read
+ * w: write
+ *
+ * Here are all combinations:
+ *
+ * bar.brw
+ * bar.br
+ * bar.bw
+ * bar.arw
+ * bar.ar
+ * bar.aw
+ * bar.brwarw
+ * bar.brarw
+ * bar.bwarw
+ * bar.brwar
+ * bar.brwaw
+ * bar.brar
+ * bar.bwaw
+ */
+#define __bar_brw()    asm volatile (".long 0x842cc000\n":::"memory")
+#define __bar_br()     asm volatile (".long 0x8424c000\n":::"memory")
+#define __bar_bw()     asm volatile (".long 0x8428c000\n":::"memory")
+#define __bar_arw()    asm volatile (".long 0x8423c000\n":::"memory")
+#define __bar_ar()     asm volatile (".long 0x8421c000\n":::"memory")
+#define __bar_aw()     asm volatile (".long 0x8422c000\n":::"memory")
+#define __bar_brwarw() asm volatile (".long 0x842fc000\n":::"memory")
+#define __bar_brarw()  asm volatile (".long 0x8427c000\n":::"memory")
+#define __bar_bwarw()  asm volatile (".long 0x842bc000\n":::"memory")
+#define __bar_brwar()  asm volatile (".long 0x842dc000\n":::"memory")
+#define __bar_brwaw()  asm volatile (".long 0x842ec000\n":::"memory")
+#define __bar_brar()   asm volatile (".long 0x8425c000\n":::"memory")
+#define __bar_brar()   asm volatile (".long 0x8425c000\n":::"memory")
+#define __bar_bwaw()   asm volatile (".long 0x842ac000\n":::"memory")
+
+#define __smp_mb()     __bar_brwarw()
+#define __smp_rmb()    __bar_brar()
+#define __smp_wmb()    __bar_bwaw()
+
+#define ACQUIRE_FENCE          ".long 0x8427c000\n"
+#define __smp_acquire_fence()  __bar_brarw()
+#define __smp_release_fence()  __bar_brwaw()
+
+#endif /* CONFIG_SMP */
+
 /*
  * sync:        completion barrier, all sync.xx instructions
  *              guarantee the last response recieved by bus transaction
  * sync.s:      inherit from sync, but also shareable to other cores
  * sync.i:      inherit from sync, but also flush cpu pipeline
  * sync.is:     the same with sync.i + sync.s
- *
- * bar.brwarw:  ordering barrier for all load/store instructions before it
- * bar.brwarws: ordering barrier for all load/store instructions before it
- *                                             and shareable to other cores
- * bar.brar:    ordering barrier for all load       instructions before it
- * bar.brars:   ordering barrier for all load       instructions before it
- *                                             and shareable to other cores
- * bar.bwaw:    ordering barrier for all store      instructions before it
- * bar.bwaws:   ordering barrier for all store      instructions before it
- *                                             and shareable to other cores
  */
+#define mb()           asm volatile ("sync\n":::"memory")
 
 #ifdef CONFIG_CPU_HAS_CACHEV2
-#define mb()           asm volatile ("sync.s\n":::"memory")
-
-#ifdef CONFIG_SMP
-#define __smp_mb()     asm volatile ("bar.brwarws\n":::"memory")
-#define __smp_rmb()    asm volatile ("bar.brars\n":::"memory")
-#define __smp_wmb()    asm volatile ("bar.bwaws\n":::"memory")
-#endif /* CONFIG_SMP */
-
-#define sync_is()      asm volatile ("sync.is\n":::"memory")
-
-#else /* !CONFIG_CPU_HAS_CACHEV2 */
-#define mb()           asm volatile ("sync\n":::"memory")
+/*
+ * Using three sync.is to prevent speculative PTW
+ */
+#define sync_is()      asm volatile ("sync.is\nsync.is\nsync.is\n":::"memory")
 #endif
 
 #include <asm-generic/barrier.h>
index 43b9838..9181878 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_BITOPS_H
 #define __ASM_CSKY_BITOPS_H
index 33ebd16..03f1a5f 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_BUG_H
 #define __ASM_CSKY_BUG_H
@@ -21,6 +20,8 @@ do {                                  \
 struct pt_regs;
 
 void die(struct pt_regs *regs, const char *str);
+void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
+
 void show_regs(struct pt_regs *regs);
 void show_code(struct pt_regs *regs);
 
index f0b8f25..d0f9eaf 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_CACHEFLUSH_H
 #define __ASM_CSKY_CACHEFLUSH_H
index 7685824..aa12ef4 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_CHECKSUM_H
 #define __ASM_CSKY_CHECKSUM_H
diff --git a/arch/csky/include/asm/clocksource.h b/arch/csky/include/asm/clocksource.h
new file mode 100644 (file)
index 0000000..54da0e4
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_CLOCKSOURCE_H
+#define __ASM_VDSO_CSKY_CLOCKSOURCE_H
+
+#include <asm/vdso/clocksource.h>
+
+#endif
index 8922453..dabc8e4 100644 (file)
@@ -3,12 +3,12 @@
 #ifndef __ASM_CSKY_CMPXCHG_H
 #define __ASM_CSKY_CMPXCHG_H
 
-#ifdef CONFIG_CPU_HAS_LDSTEX
+#ifdef CONFIG_SMP
 #include <asm/barrier.h>
 
 extern void __bad_xchg(void);
 
-#define __xchg(new, ptr, size)                                 \
+#define __xchg_relaxed(new, ptr, size)                         \
 ({                                                             \
        __typeof__(ptr) __ptr = (ptr);                          \
        __typeof__(new) __new = (new);                          \
@@ -16,7 +16,6 @@ extern void __bad_xchg(void);
        unsigned long tmp;                                      \
        switch (size) {                                         \
        case 4:                                                 \
-               smp_mb();                                       \
                asm volatile (                                  \
                "1:     ldex.w          %0, (%3) \n"            \
                "       mov             %1, %2   \n"            \
@@ -25,7 +24,6 @@ extern void __bad_xchg(void);
                        : "=&r" (__ret), "=&r" (tmp)            \
                        : "r" (__new), "r"(__ptr)               \
                        :);                                     \
-               smp_mb();                                       \
                break;                                          \
        default:                                                \
                __bad_xchg();                                   \
@@ -33,9 +31,10 @@ extern void __bad_xchg(void);
        __ret;                                                  \
 })
 
-#define xchg(ptr, x)   (__xchg((x), (ptr), sizeof(*(ptr))))
+#define xchg_relaxed(ptr, x) \
+               (__xchg_relaxed((x), (ptr), sizeof(*(ptr))))
 
-#define __cmpxchg(ptr, old, new, size)                         \
+#define __cmpxchg_relaxed(ptr, old, new, size)                 \
 ({                                                             \
        __typeof__(ptr) __ptr = (ptr);                          \
        __typeof__(new) __new = (new);                          \
@@ -44,7 +43,6 @@ extern void __bad_xchg(void);
        __typeof__(*(ptr)) __ret;                               \
        switch (size) {                                         \
        case 4:                                                 \
-               smp_mb();                                       \
                asm volatile (                                  \
                "1:     ldex.w          %0, (%3) \n"            \
                "       cmpne           %0, %4   \n"            \
@@ -56,7 +54,6 @@ extern void __bad_xchg(void);
                        : "=&r" (__ret), "=&r" (__tmp)          \
                        : "r" (__new), "r"(__ptr), "r"(__old)   \
                        :);                                     \
-               smp_mb();                                       \
                break;                                          \
        default:                                                \
                __bad_xchg();                                   \
@@ -64,8 +61,18 @@ extern void __bad_xchg(void);
        __ret;                                                  \
 })
 
-#define cmpxchg(ptr, o, n) \
-       (__cmpxchg((ptr), (o), (n), sizeof(*(ptr))))
+#define cmpxchg_relaxed(ptr, o, n) \
+       (__cmpxchg_relaxed((ptr), (o), (n), sizeof(*(ptr))))
+
+#define cmpxchg(ptr, o, n)                                     \
+({                                                             \
+       __typeof__(*(ptr)) __ret;                               \
+       __smp_release_fence();                                  \
+       __ret = cmpxchg_relaxed(ptr, o, n);                     \
+       __smp_acquire_fence();                                  \
+       __ret;                                                  \
+})
+
 #else
 #include <asm-generic/cmpxchg.h>
 #endif
index eb2cc5a..48b83e2 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_ELF_H
 #define __ASM_CSKY_ELF_H
index 4b589cc..49a77cb 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_FIXMAP_H
 #define __ASM_CSKY_FIXMAP_H
index fae72b0..9b86341 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_FTRACE_H
 #define __ASM_CSKY_FTRACE_H
diff --git a/arch/csky/include/asm/futex.h b/arch/csky/include/asm/futex.h
new file mode 100644 (file)
index 0000000..6cfd312
--- /dev/null
@@ -0,0 +1,121 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_CSKY_FUTEX_H
+#define __ASM_CSKY_FUTEX_H
+
+#ifndef CONFIG_SMP
+#include <asm-generic/futex.h>
+#else
+#include <linux/atomic.h>
+#include <linux/futex.h>
+#include <linux/uaccess.h>
+#include <linux/errno.h>
+
+#define __futex_atomic_op(insn, ret, oldval, uaddr, oparg)             \
+{                                                                      \
+       u32 tmp;                                                        \
+                                                                       \
+       __atomic_pre_full_fence();                                      \
+                                                                       \
+       __asm__ __volatile__ (                                          \
+       "1:     ldex.w  %[ov], %[u]                     \n"             \
+       "       "insn"                                  \n"             \
+       "2:     stex.w  %[t], %[u]                      \n"             \
+       "       bez     %[t], 1b                        \n"             \
+       "       br      4f                              \n"             \
+       "3:     mov     %[r], %[e]                      \n"             \
+       "4:                                             \n"             \
+       "       .section __ex_table,\"a\"               \n"             \
+       "       .balign 4                               \n"             \
+       "       .long   1b, 3b                          \n"             \
+       "       .long   2b, 3b                          \n"             \
+       "       .previous                               \n"             \
+       : [r] "+r" (ret), [ov] "=&r" (oldval),                          \
+         [u] "+m" (*uaddr), [t] "=&r" (tmp)                            \
+       : [op] "Jr" (oparg), [e] "jr" (-EFAULT)                         \
+       : "memory");                                                    \
+                                                                       \
+       __atomic_post_full_fence();                                     \
+}
+
+static inline int
+arch_futex_atomic_op_inuser(int op, int oparg, int *oval, u32 __user *uaddr)
+{
+       int oldval = 0, ret = 0;
+
+       if (!access_ok(uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       switch (op) {
+       case FUTEX_OP_SET:
+               __futex_atomic_op("mov %[t], %[ov]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_ADD:
+               __futex_atomic_op("add %[t], %[ov], %[op]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_OR:
+               __futex_atomic_op("or %[t], %[ov], %[op]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       case FUTEX_OP_ANDN:
+               __futex_atomic_op("and %[t], %[ov], %[op]",
+                                 ret, oldval, uaddr, ~oparg);
+               break;
+       case FUTEX_OP_XOR:
+               __futex_atomic_op("xor %[t], %[ov], %[op]",
+                                 ret, oldval, uaddr, oparg);
+               break;
+       default:
+               ret = -ENOSYS;
+       }
+
+       if (!ret)
+               *oval = oldval;
+
+       return ret;
+}
+
+
+
+static inline int
+futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr,
+                             u32 oldval, u32 newval)
+{
+       int ret = 0;
+       u32 val, tmp;
+
+       if (!access_ok(uaddr, sizeof(u32)))
+               return -EFAULT;
+
+       __atomic_pre_full_fence();
+
+       __asm__ __volatile__ (
+       "1:     ldex.w  %[v], %[u]                      \n"
+       "       cmpne   %[v], %[ov]                     \n"
+       "       bt      4f                              \n"
+       "       mov     %[t], %[nv]                     \n"
+       "2:     stex.w  %[t], %[u]                      \n"
+       "       bez     %[t], 1b                        \n"
+       "       br      4f                              \n"
+       "3:     mov     %[r], %[e]                      \n"
+       "4:                                             \n"
+       "       .section __ex_table,\"a\"               \n"
+       "       .balign 4                               \n"
+       "       .long   1b, 3b                          \n"
+       "       .long   2b, 3b                          \n"
+       "       .previous                               \n"
+       : [r] "+r" (ret), [v] "=&r" (val), [u] "+m" (*uaddr),
+         [t] "=&r" (tmp)
+       : [ov] "Jr" (oldval), [nv] "Jr" (newval), [e] "Jr" (-EFAULT)
+       : "memory");
+
+       __atomic_post_full_fence();
+
+       *uval = val;
+       return ret;
+}
+
+#endif /* CONFIG_SMP */
+#endif /* __ASM_CSKY_FUTEX_H */
index 1f4ed3f..1ed810e 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_HIGHMEM_H
 #define __ASM_CSKY_HIGHMEM_H
index e909587..f826540 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_IO_H
 #define __ASM_CSKY_IO_H
index a65c675..d121798 100644 (file)
@@ -10,7 +10,7 @@
 
 #define FIXADDR_TOP    _AC(0xffffc000, UL)
 #define PKMAP_BASE     _AC(0xff800000, UL)
-#define VMALLOC_START  _AC(0xc0008000, UL)
+#define VMALLOC_START  (PAGE_OFFSET + LOWMEM_LIMIT + (PAGE_SIZE * 8))
 #define VMALLOC_END    (PKMAP_BASE - (PAGE_SIZE * 2))
 
 #ifdef CONFIG_HAVE_TCM
index 26fbb1d..d783219 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_MMU_H
 #define __ASM_CSKY_MMU_H
index b227d29..95d99b3 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_MMU_CONTEXT_H
 #define __ASM_CSKY_MMU_CONTEXT_H
 #include <linux/sched.h>
 #include <abi/ckmmu.h>
 
-#define TLBMISS_HANDLER_SETUP_PGD(pgd) \
-       setup_pgd(__pa(pgd), false)
-
-#define TLBMISS_HANDLER_SETUP_PGD_KERNEL(pgd) \
-       setup_pgd(__pa(pgd), true)
-
 #define ASID_MASK              ((1 << CONFIG_CPU_ASID_BITS) - 1)
 #define cpu_asid(mm)           (atomic64_read(&mm->context.asid) & ASID_MASK)
 
@@ -36,8 +29,7 @@ switch_mm(struct mm_struct *prev, struct mm_struct *next,
        if (prev != next)
                check_and_switch_context(next, cpu);
 
-       TLBMISS_HANDLER_SETUP_PGD(next->pgd);
-       write_mmu_entryhi(next->context.asid.counter);
+       setup_pgd(next->pgd, next->context.asid.counter);
 
        flush_icache_deferred(next);
 }
index 9b98bf3..3b91fc3 100644 (file)
@@ -24,7 +24,7 @@
  * address region. We use them mapping kernel 1GB direct-map address area and
  * for more than 1GB of memory we use highmem.
  */
-#define PAGE_OFFSET    0x80000000
+#define PAGE_OFFSET    CONFIG_PAGE_OFFSET
 #define SSEG_SIZE      0x20000000
 #define LOWMEM_LIMIT   (SSEG_SIZE * 2)
 
index 572093e..249905d 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PERF_EVENT_H
 #define __ASM_CSKY_PERF_EVENT_H
index d58d814..cd211aa 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PGALLOC_H
 #define __ASM_CSKY_PGALLOC_H
@@ -71,7 +70,7 @@ do {                                                  \
 } while (0)
 
 extern void pagetable_init(void);
-extern void pre_mmu_init(void);
+extern void mmu_init(unsigned long min_pfn, unsigned long max_pfn);
 extern void pre_trap_init(void);
 
 #endif /* __ASM_CSKY_PGALLOC_H */
index 2002cb7..0d60367 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PGTABLE_H
 #define __ASM_CSKY_PGTABLE_H
@@ -14,7 +13,7 @@
 #define PGDIR_SIZE             (1UL << PGDIR_SHIFT)
 #define PGDIR_MASK             (~(PGDIR_SIZE-1))
 
-#define USER_PTRS_PER_PGD      (0x80000000UL/PGDIR_SIZE)
+#define USER_PTRS_PER_PGD      (PAGE_OFFSET/PGDIR_SIZE)
 #define FIRST_USER_ADDRESS     0UL
 
 /*
 
 #define pmd_page(pmd)  (pfn_to_page(pmd_phys(pmd) >> PAGE_SHIFT))
 #define pte_clear(mm, addr, ptep)      set_pte((ptep), \
-       (((unsigned int) addr & PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0)))
+       (((unsigned int) addr >= PAGE_OFFSET) ? __pte(_PAGE_GLOBAL) : __pte(0)))
 #define pte_none(pte)          (!(pte_val(pte) & ~_PAGE_GLOBAL))
 #define pte_present(pte)       (pte_val(pte) & _PAGE_PRESENT)
 #define pte_pfn(x)     ((unsigned long)((x).pte_low >> PAGE_SHIFT))
 #define pfn_pte(pfn, prot) __pte(((unsigned long long)(pfn) << PAGE_SHIFT) \
                                | pgprot_val(prot))
 
-#define __READABLE     (_PAGE_READ | _PAGE_VALID | _PAGE_ACCESSED)
-#define __WRITEABLE    (_PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED)
-
-#define _PAGE_CHG_MASK (PAGE_MASK | _PAGE_ACCESSED | _PAGE_MODIFIED | \
-                        _CACHE_MASK)
-
-#define __swp_type(x)                  (((x).val >> 4) & 0xff)
-#define __swp_offset(x)                        ((x).val >> 12)
-#define __swp_entry(type, offset)      ((swp_entry_t) {((type) << 4) | \
-                                       ((offset) << 12) })
 #define __pte_to_swp_entry(pte)                ((swp_entry_t) { pte_val(pte) })
 #define __swp_entry_to_pte(x)          ((pte_t) { (x).val })
 
                                        pgprot_val(pgprot))
 
 /*
- * CSKY can't do page protection for execute, and considers that the same like
- * read. Also, write permissions imply read permissions. This is the closest
- * we can get by reasonable means..
+ * C-SKY only has VALID and DIRTY bit in hardware. So we need to use the
+ * two bits emulate PRESENT, READ, WRITE, EXEC, MODIFIED, ACCESSED.
  */
-#define PAGE_NONE      __pgprot(_PAGE_PRESENT | _CACHE_CACHED)
-#define PAGE_SHARED    __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define _PAGE_BASE     (_PAGE_PRESENT | _PAGE_ACCESSED)
+
+#define PAGE_NONE      __pgprot(_PAGE_PROT_NONE)
+#define PAGE_READ      __pgprot(_PAGE_BASE | _PAGE_READ | \
                                _CACHE_CACHED)
-#define PAGE_COPY      __pgprot(_PAGE_PRESENT | _PAGE_READ | _CACHE_CACHED)
-#define PAGE_READONLY  __pgprot(_PAGE_PRESENT | _PAGE_READ | _CACHE_CACHED)
-#define PAGE_KERNEL    __pgprot(_PAGE_PRESENT | __READABLE | __WRITEABLE | \
-                               _PAGE_GLOBAL | _CACHE_CACHED)
-#define PAGE_USERIO    __pgprot(_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+#define PAGE_WRITE     __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_WRITE | \
                                _CACHE_CACHED)
+#define PAGE_SHARED PAGE_WRITE
+
+#define PAGE_KERNEL    __pgprot(_PAGE_BASE | _PAGE_READ | _PAGE_VALID | \
+                               _PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED | \
+                               _PAGE_GLOBAL | \
+                               _CACHE_CACHED)
+
+#define _PAGE_IOREMAP          (_PAGE_BASE | _PAGE_READ | _PAGE_VALID | \
+                               _PAGE_WRITE | _PAGE_DIRTY | _PAGE_MODIFIED | \
+                               _PAGE_GLOBAL | \
+                               _CACHE_UNCACHED | _PAGE_SO)
+
+#define _PAGE_CHG_MASK (~(unsigned long) \
+                               (_PAGE_PRESENT | _PAGE_READ | _PAGE_WRITE | \
+                               _CACHE_MASK | _PAGE_GLOBAL))
 
-#define _PAGE_IOREMAP \
-       (_PAGE_PRESENT | __READABLE | __WRITEABLE | _PAGE_GLOBAL | \
-        _CACHE_UNCACHED | _PAGE_SO)
+#define MAX_SWAPFILES_CHECK() \
+               BUILD_BUG_ON(MAX_SWAPFILES_SHIFT != 5)
 
 #define __P000 PAGE_NONE
-#define __P001 PAGE_READONLY
-#define __P010 PAGE_COPY
-#define __P011 PAGE_COPY
-#define __P100 PAGE_READONLY
-#define __P101 PAGE_READONLY
-#define __P110 PAGE_COPY
-#define __P111 PAGE_COPY
+#define __P001 PAGE_READ
+#define __P010 PAGE_READ
+#define __P011 PAGE_READ
+#define __P100 PAGE_READ
+#define __P101 PAGE_READ
+#define __P110 PAGE_READ
+#define __P111 PAGE_READ
 
 #define __S000 PAGE_NONE
-#define __S001 PAGE_READONLY
-#define __S010 PAGE_SHARED
-#define __S011 PAGE_SHARED
-#define __S100 PAGE_READONLY
-#define __S101 PAGE_READONLY
-#define __S110 PAGE_SHARED
-#define __S111 PAGE_SHARED
+#define __S001 PAGE_READ
+#define __S010 PAGE_WRITE
+#define __S011 PAGE_WRITE
+#define __S100 PAGE_READ
+#define __S101 PAGE_READ
+#define __S110 PAGE_WRITE
+#define __S111 PAGE_WRITE
 
 extern unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)];
 #define ZERO_PAGE(vaddr)       (virt_to_page(empty_zero_page))
index 4800f65..9e93302 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PROCESSOR_H
 #define __ASM_CSKY_PROCESSOR_H
@@ -28,7 +27,7 @@ extern struct cpuinfo_csky cpu_data[];
  * for a 64 bit kernel expandable to 8192EB, of which the current CSKY
  * implementations will "only" be able to use 1TB ...
  */
-#define TASK_SIZE       0x7fff8000UL
+#define TASK_SIZE      (PAGE_OFFSET - (PAGE_SIZE * 8))
 
 #ifdef __KERNEL__
 #define STACK_TOP       TASK_SIZE
index 91ceb1b..4202aab 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_PTRACE_H
 #define __ASM_CSKY_PTRACE_H
index 79ede9b..589e832 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_SEGMENT_H
 #define __ASM_CSKY_SEGMENT_H
@@ -10,7 +9,7 @@ typedef struct {
 
 #define KERNEL_DS              ((mm_segment_t) { 0xFFFFFFFF })
 
-#define USER_DS                        ((mm_segment_t) { 0x80000000UL })
+#define USER_DS                        ((mm_segment_t) { PAGE_OFFSET })
 #define get_fs()               (current_thread_info()->addr_limit)
 #define set_fs(x)              (current_thread_info()->addr_limit = (x))
 #define uaccess_kernel()       (get_fs().seg == KERNEL_DS.seg)
index efafe4c..2fe6cea 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_SHMPARAM_H
 #define __ASM_CSKY_SHMPARAM_H
index 7cf3f2b..69f5aa2 100644 (file)
@@ -6,8 +6,6 @@
 #include <linux/spinlock_types.h>
 #include <asm/barrier.h>
 
-#ifdef CONFIG_QUEUED_RWLOCKS
-
 /*
  * Ticket-based spin-locking.
  */
@@ -88,169 +86,4 @@ static inline int arch_spin_is_contended(arch_spinlock_t *lock)
 
 #include <asm/qrwlock.h>
 
-/* See include/linux/spinlock.h */
-#define smp_mb__after_spinlock()       smp_mb()
-
-#else /* CONFIG_QUEUED_RWLOCKS */
-
-/*
- * Test-and-set spin-locking.
- */
-static inline void arch_spin_lock(arch_spinlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       bnez            %0, 1b   \n"
-               "       movi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-       smp_mb();
-}
-
-static inline void arch_spin_unlock(arch_spinlock_t *lock)
-{
-       smp_mb();
-       WRITE_ONCE(lock->lock, 0);
-}
-
-static inline int arch_spin_trylock(arch_spinlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       bnez            %0, 2f   \n"
-               "       movi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               "       movi            %0, 0    \n"
-               "2:                              \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-
-       if (!tmp)
-               smp_mb();
-
-       return !tmp;
-}
-
-#define arch_spin_is_locked(x) (READ_ONCE((x)->lock) != 0)
-
-/*
- * read lock/unlock/trylock
- */
-static inline void arch_read_lock(arch_rwlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       blz             %0, 1b   \n"
-               "       addi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-       smp_mb();
-}
-
-static inline void arch_read_unlock(arch_rwlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       smp_mb();
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       subi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-}
-
-static inline int arch_read_trylock(arch_rwlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       blz             %0, 2f   \n"
-               "       addi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               "       movi            %0, 0    \n"
-               "2:                              \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-
-       if (!tmp)
-               smp_mb();
-
-       return !tmp;
-}
-
-/*
- * write lock/unlock/trylock
- */
-static inline void arch_write_lock(arch_rwlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       bnez            %0, 1b   \n"
-               "       subi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-       smp_mb();
-}
-
-static inline void arch_write_unlock(arch_rwlock_t *lock)
-{
-       smp_mb();
-       WRITE_ONCE(lock->lock, 0);
-}
-
-static inline int arch_write_trylock(arch_rwlock_t *lock)
-{
-       u32 *p = &lock->lock;
-       u32 tmp;
-
-       asm volatile (
-               "1:     ldex.w          %0, (%1) \n"
-               "       bnez            %0, 2f   \n"
-               "       subi            %0, 1    \n"
-               "       stex.w          %0, (%1) \n"
-               "       bez             %0, 1b   \n"
-               "       movi            %0, 0    \n"
-               "2:                              \n"
-               : "=&r" (tmp)
-               : "r"(p)
-               : "cc");
-
-       if (!tmp)
-               smp_mb();
-
-       return !tmp;
-}
-
-#endif /* CONFIG_QUEUED_RWLOCKS */
 #endif /* __ASM_CSKY_SPINLOCK_H */
index 88b8243..8ff0f6f 100644 (file)
@@ -22,16 +22,6 @@ typedef struct {
 
 #define __ARCH_SPIN_LOCK_UNLOCKED      { { 0 } }
 
-#ifdef CONFIG_QUEUED_RWLOCKS
 #include <asm-generic/qrwlock_types.h>
 
-#else /* CONFIG_NR_CPUS > 2 */
-
-typedef struct {
-       u32 lock;
-} arch_rwlock_t;
-
-#define __ARCH_RW_LOCK_UNLOCKED                { 0 }
-
-#endif /* CONFIG_QUEUED_RWLOCKS */
 #endif /* __ASM_CSKY_SPINLOCK_TYPES_H */
index 73142de..a0d81e9 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef _CSKY_STRING_MM_H_
 #define _CSKY_STRING_MM_H_
index 35a39e8..731e466 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_SWITCH_TO_H
 #define __ASM_CSKY_SWITCH_TO_H
index 5d48e5e..ea9ce61 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_SYSCALLS_H
 #define __ASM_CSKY_SYSCALLS_H
index 21456a3..8c349a8 100644 (file)
@@ -1,12 +1,10 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef _ASM_CSKY_THREAD_INFO_H
 #define _ASM_CSKY_THREAD_INFO_H
 
 #ifndef __ASSEMBLY__
 
-#include <linux/version.h>
 #include <asm/types.h>
 #include <asm/page.h>
 #include <asm/processor.h>
index fdff9b8..3498e65 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_TLB_H
 #define __ASM_CSKY_TLB_H
index 6845b06..407160b 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_TLBFLUSH_H
 #define __ASM_TLBFLUSH_H
index 1c08180..421a419 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_TRAPS_H
 #define __ASM_CSKY_TRAPS_H
index 1633ffe..3dec272 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_UACCESS_H
 #define __ASM_CSKY_UACCESS_H
index da7a182..9cf97de 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #include <uapi/asm/unistd.h>
 
index d963d69..eb5142f 100644 (file)
@@ -3,10 +3,25 @@
 #ifndef __ASM_CSKY_VDSO_H
 #define __ASM_CSKY_VDSO_H
 
-#include <abi/vdso.h>
+#include <linux/types.h>
 
-struct csky_vdso {
-       unsigned short rt_signal_retcode[4];
+#ifndef GENERIC_TIME_VSYSCALL
+struct vdso_data {
 };
+#endif
+
+/*
+ * The VDSO symbols are mapped into Linux so we can just use regular symbol
+ * addressing to get their offsets in userspace.  The symbols are mapped at an
+ * offset of 0, but since the linker must support setting weak undefined
+ * symbols to the absolute address 0 it also happens to support other low
+ * addresses even when the code model suggests those low addresses would not
+ * otherwise be availiable.
+ */
+#define VDSO_SYMBOL(base, name)                                                        \
+({                                                                             \
+       extern const char __vdso_##name[];                                      \
+       (void __user *)((unsigned long)(base) + __vdso_##name);                 \
+})
 
 #endif /* __ASM_CSKY_VDSO_H */
diff --git a/arch/csky/include/asm/vdso/clocksource.h b/arch/csky/include/asm/vdso/clocksource.h
new file mode 100644 (file)
index 0000000..dfca7b4
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_CLOCKSOURCE_H
+#define __ASM_VDSO_CSKY_CLOCKSOURCE_H
+
+#define VDSO_ARCH_CLOCKMODES   \
+       VDSO_CLOCKMODE_ARCHTIMER
+
+#endif /* __ASM_VDSO_CSKY_CLOCKSOURCE_H */
diff --git a/arch/csky/include/asm/vdso/gettimeofday.h b/arch/csky/include/asm/vdso/gettimeofday.h
new file mode 100644 (file)
index 0000000..6c4f144
--- /dev/null
@@ -0,0 +1,114 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_GETTIMEOFDAY_H
+#define __ASM_VDSO_CSKY_GETTIMEOFDAY_H
+
+#ifndef __ASSEMBLY__
+
+#include <asm/barrier.h>
+#include <asm/unistd.h>
+#include <abi/regdef.h>
+#include <uapi/linux/time.h>
+
+#define VDSO_HAS_CLOCK_GETRES  1
+
+static __always_inline
+int gettimeofday_fallback(struct __kernel_old_timeval *_tv,
+                         struct timezone *_tz)
+{
+       register struct __kernel_old_timeval *tv asm("a0") = _tv;
+       register struct timezone *tz asm("a1") = _tz;
+       register long ret asm("a0");
+       register long nr asm(syscallid) = __NR_gettimeofday;
+
+       asm volatile ("trap 0\n"
+                     : "=r" (ret)
+                     : "r"(tv), "r"(tz), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline
+long clock_gettime_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+       register clockid_t clkid asm("a0") = _clkid;
+       register struct __kernel_timespec *ts asm("a1") = _ts;
+       register long ret asm("a0");
+       register long nr asm(syscallid) = __NR_clock_gettime64;
+
+       asm volatile ("trap 0\n"
+                     : "=r" (ret)
+                     : "r"(clkid), "r"(ts), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline
+long clock_gettime32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+       register clockid_t clkid asm("a0") = _clkid;
+       register struct old_timespec32 *ts asm("a1") = _ts;
+       register long ret asm("a0");
+       register long nr asm(syscallid) = __NR_clock_gettime;
+
+       asm volatile ("trap 0\n"
+                     : "=r" (ret)
+                     : "r"(clkid), "r"(ts), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline
+int clock_getres_fallback(clockid_t _clkid, struct __kernel_timespec *_ts)
+{
+       register clockid_t clkid asm("a0") = _clkid;
+       register struct __kernel_timespec *ts asm("a1") = _ts;
+       register long ret asm("a0");
+       register long nr asm(syscallid) = __NR_clock_getres_time64;
+
+       asm volatile ("trap 0\n"
+                     : "=r" (ret)
+                     : "r"(clkid), "r"(ts), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+static __always_inline
+int clock_getres32_fallback(clockid_t _clkid, struct old_timespec32 *_ts)
+{
+       register clockid_t clkid asm("a0") = _clkid;
+       register struct old_timespec32 *ts asm("a1") = _ts;
+       register long ret asm("a0");
+       register long nr asm(syscallid) = __NR_clock_getres;
+
+       asm volatile ("trap 0\n"
+                     : "=r" (ret)
+                     : "r"(clkid), "r"(ts), "r"(nr)
+                     : "memory");
+
+       return ret;
+}
+
+uint64_t csky_pmu_read_cc(void);
+static __always_inline u64 __arch_get_hw_counter(s32 clock_mode,
+                                                const struct vdso_data *vd)
+{
+#ifdef CONFIG_CSKY_PMU_V1
+       return csky_pmu_read_cc();
+#else
+       return 0;
+#endif
+}
+
+static __always_inline const struct vdso_data *__arch_get_vdso_data(void)
+{
+       return _vdso_data;
+}
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_CSKY_GETTIMEOFDAY_H */
diff --git a/arch/csky/include/asm/vdso/processor.h b/arch/csky/include/asm/vdso/processor.h
new file mode 100644 (file)
index 0000000..39a6b56
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef __ASM_VDSO_CSKY_PROCESSOR_H
+#define __ASM_VDSO_CSKY_PROCESSOR_H
+
+#ifndef __ASSEMBLY__
+
+#define cpu_relax()    barrier()
+
+#endif /* __ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_CSKY_PROCESSOR_H */
diff --git a/arch/csky/include/asm/vdso/vsyscall.h b/arch/csky/include/asm/vdso/vsyscall.h
new file mode 100644 (file)
index 0000000..c276211
--- /dev/null
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ASM_VDSO_CSKY_VSYSCALL_H
+#define __ASM_VDSO_CSKY_VSYSCALL_H
+
+#ifndef __ASSEMBLY__
+
+#include <vdso/datapage.h>
+
+extern struct vdso_data *vdso_data;
+
+static __always_inline struct vdso_data *__csky_get_k_vdso_data(void)
+{
+       return vdso_data;
+}
+#define __arch_get_k_vdso_data __csky_get_k_vdso_data
+
+#include <asm-generic/vdso/vsyscall.h>
+
+#endif /* !__ASSEMBLY__ */
+
+#endif /* __ASM_VDSO_CSKY_VSYSCALL_H */
index d150cd6..1aedd51 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_BYTEORDER_H
 #define __ASM_CSKY_BYTEORDER_H
index 49d4e14..d0a8ac6 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2019 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef _ASM_CSKY_PERF_REGS_H
 #define _ASM_CSKY_PERF_REGS_H
index 66b2268..3be9c14 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef _CSKY_PTRACE_H
 #define _CSKY_PTRACE_H
index 670c020..859afb6 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #ifndef __ASM_CSKY_SIGCONTEXT_H
 #define __ASM_CSKY_SIGCONTEXT_H
index ba40189..7ff6a24 100644 (file)
@@ -1,5 +1,4 @@
 /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
 #define __ARCH_WANT_STAT64
 #define __ARCH_WANT_NEW_STAT
index 37f37c0..6c0f360 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 extra-y := head.o vmlinux.lds
 
-obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o
+obj-y += entry.o atomic.o signal.o traps.o irq.o time.o vdso.o vdso/
 obj-y += power.o syscall.o syscall_table.o setup.o
 obj-y += process.o cpu-probe.o ptrace.o stacktrace.o
 obj-y += probes/
index 3821ef9..e73e548 100644 (file)
  */
 ENTRY(csky_cmpxchg)
        USPTOKSP
+
+       RD_MEH  a3
+       WR_MEH  a3
+
        mfcr    a3, epc
        addi    a3, TRAP0_SIZE
 
@@ -36,11 +40,11 @@ ENTRY(csky_cmpxchg)
 2:
        sync.is
 #else
-1:
+GLOBAL(csky_cmpxchg_ldw)
        ldw     a3, (a2)
        cmpne   a0, a3
        bt16    3f
-2:
+GLOBAL(csky_cmpxchg_stw)
        stw     a1, (a2)
 3:
 #endif
@@ -55,19 +59,3 @@ ENTRY(csky_cmpxchg)
        KSPTOUSP
        rte
 END(csky_cmpxchg)
-
-#ifndef CONFIG_CPU_HAS_LDSTEX
-/*
- * Called from tlbmodified exception
- */
-ENTRY(csky_cmpxchg_fixup)
-       mfcr    a0, epc
-       lrw     a1, 2b
-       cmpne   a1, a0
-       bt      1f
-       subi    a1, (2b - 1b)
-       stw     a1, (sp, LSAVE_PC)
-1:
-       rts
-END(csky_cmpxchg_fixup)
-#endif
index 5a5cabd..c1bd7a6 100644 (file)
 #include <asm/page.h>
 #include <asm/thread_info.h>
 
-#define PTE_INDX_MSK    0xffc
-#define PTE_INDX_SHIFT  10
-#define _PGDIR_SHIFT    22
-
 .macro zero_fp
 #ifdef CONFIG_STACKTRACE
        movi    r8, 0
 #endif
 .endm
 
-.macro tlbop_begin name, val0, val1, val2
-ENTRY(csky_\name)
-       mtcr    a3, ss2
-       mtcr    r6, ss3
-       mtcr    a2, ss4
-
-       RD_PGDR r6
-       RD_MEH  a3
-#ifdef CONFIG_CPU_HAS_TLBI
-       tlbi.vaas a3
-       sync.is
-
-       btsti   a3, 31
-       bf      1f
-       RD_PGDR_K r6
-1:
-#else
-       bgeni   a2, 31
-       WR_MCIR a2
-       bgeni   a2, 25
-       WR_MCIR a2
-#endif
-       bclri   r6, 0
-       lrw     a2, va_pa_offset
-       ld.w    a2, (a2, 0)
-       subu    r6, a2
-       bseti   r6, 31
-
-       mov     a2, a3
-       lsri    a2, _PGDIR_SHIFT
-       lsli    a2, 2
-       addu    r6, a2
-       ldw     r6, (r6)
-
-       lrw     a2, va_pa_offset
-       ld.w    a2, (a2, 0)
-       subu    r6, a2
-       bseti   r6, 31
-
-       lsri    a3, PTE_INDX_SHIFT
-       lrw     a2, PTE_INDX_MSK
-       and     a3, a2
-       addu    r6, a3
-       ldw     a3, (r6)
-
-       movi    a2, (_PAGE_PRESENT | \val0)
-       and     a3, a2
-       cmpne   a3, a2
-       bt      \name
-
-       /* First read/write the page, just update the flags */
-       ldw     a3, (r6)
-       bgeni   a2, PAGE_VALID_BIT
-       bseti   a2, PAGE_ACCESSED_BIT
-       bseti   a2, \val1
-       bseti   a2, \val2
-       or      a3, a2
-       stw     a3, (r6)
-
-       /* Some cpu tlb-hardrefill bypass the cache */
-#ifdef CONFIG_CPU_NEED_TLBSYNC
-       movi    a2, 0x22
-       bseti   a2, 6
-       mtcr    r6, cr22
-       mtcr    a2, cr17
-       sync
-#endif
-
-       mfcr    a3, ss2
-       mfcr    r6, ss3
-       mfcr    a2, ss4
-       rte
-\name:
-       mfcr    a3, ss2
-       mfcr    r6, ss3
-       mfcr    a2, ss4
+.text
+ENTRY(csky_pagefault)
        SAVE_ALL 0
-.endm
-.macro tlbop_end is_write
        zero_fp
        context_tracking
-       RD_MEH  a2
-       psrset  ee, ie
+       psrset  ee
        mov     a0, sp
-       movi    a1, \is_write
        jbsr    do_page_fault
        jmpi    ret_from_exception
-.endm
-
-.text
-
-tlbop_begin tlbinvalidl, _PAGE_READ, PAGE_VALID_BIT, PAGE_ACCESSED_BIT
-tlbop_end 0
-
-tlbop_begin tlbinvalids, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT
-tlbop_end 1
-
-tlbop_begin tlbmodified, _PAGE_WRITE, PAGE_DIRTY_BIT, PAGE_MODIFIED_BIT
-#ifndef CONFIG_CPU_HAS_LDSTEX
-jbsr csky_cmpxchg_fixup
-#endif
-tlbop_end 1
 
 ENTRY(csky_systemcall)
        SAVE_ALL TRAP0_SIZE
@@ -314,6 +217,9 @@ ENTRY(csky_trap)
 ENTRY(csky_get_tls)
        USPTOKSP
 
+       RD_MEH  a0
+       WR_MEH  a0
+
        /* increase epc for continue */
        mfcr    a0, epc
        addi    a0, TRAP0_SIZE
index 17ed9d2..7e3e4f1 100644 (file)
@@ -21,10 +21,16 @@ END(_start)
 ENTRY(_start_smp_secondary)
        SETUP_MMU
 
-       /* copy msa1 from CPU0 */
-       lrw     r6, secondary_msa1
+#ifdef CONFIG_PAGE_OFFSET_80000000
+       lrw     r6, secondary_msa1
        ld.w    r6, (r6, 0)
        mtcr    r6, cr<31, 15>
+#endif
+
+       lrw     r6, secondary_pgd
+       ld.w    r6, (r6, 0)
+       mtcr    r6, cr<28, 15>
+       mtcr    r6, cr<29, 15>
 
        /* set stack point */
        lrw     r6, secondary_stack
index 1a29f11..e5f1842 100644 (file)
@@ -87,7 +87,7 @@ static int csky_pmu_irq;
 })
 
 /* cycle counter */
-static uint64_t csky_pmu_read_cc(void)
+uint64_t csky_pmu_read_cc(void)
 {
        uint32_t lo, hi, tmp;
        uint64_t result;
@@ -1319,7 +1319,7 @@ int csky_pmu_device_probe(struct platform_device *pdev,
                pr_notice("[perf] PMU request irq fail!\n");
        }
 
-       ret = cpuhp_setup_state(CPUHP_AP_PERF_ONLINE, "AP_PERF_ONLINE",
+       ret = cpuhp_setup_state(CPUHP_AP_PERF_CSKY_ONLINE, "AP_PERF_ONLINE",
                                csky_pmu_starting_cpu,
                                csky_pmu_dying_cpu);
        if (ret) {
index ae2b1c7..ef2bb9b 100644 (file)
@@ -9,7 +9,7 @@ int arch_check_ftrace_location(struct kprobe *p)
        return 0;
 }
 
-/* Ftrace callback handler for kprobes -- called under preepmt disabed */
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
 void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
                           struct ftrace_ops *ops, struct ftrace_regs *fregs)
 {
index 4e464fe..d6e8d09 100644 (file)
@@ -274,9 +274,9 @@ void __kprobes
 simulate_bnezad32(u32 opcode, long addr, struct pt_regs *regs)
 {
        unsigned long tmp = opcode & 0x1f;
-       unsigned long val;
+       long val;
 
-       csky_insn_reg_get_val(regs, tmp, &val);
+       csky_insn_reg_get_val(regs, tmp, (unsigned long *)&val);
 
        val -= 1;
 
@@ -286,7 +286,7 @@ simulate_bnezad32(u32 opcode, long addr, struct pt_regs *regs)
        } else
                instruction_pointer_set(regs, addr + 4);
 
-       csky_insn_reg_set_val(regs, tmp, val);
+       csky_insn_reg_set_val(regs, tmp, (unsigned long)val);
 }
 
 void __kprobes
@@ -297,13 +297,11 @@ simulate_bhsz32(u32 opcode, long addr, struct pt_regs *regs)
 
        csky_insn_reg_get_val(regs, tmp, &val);
 
-       if (val >= 0) {
+       if ((long) val >= 0) {
                instruction_pointer_set(regs,
                        addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
        } else
                instruction_pointer_set(regs, addr + 4);
-
-       csky_insn_reg_set_val(regs, tmp, val);
 }
 
 void __kprobes
@@ -314,13 +312,11 @@ simulate_bhz32(u32 opcode, long addr, struct pt_regs *regs)
 
        csky_insn_reg_get_val(regs, tmp, &val);
 
-       if (val > 0) {
+       if ((long) val > 0) {
                instruction_pointer_set(regs,
                        addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
        } else
                instruction_pointer_set(regs, addr + 4);
-
-       csky_insn_reg_set_val(regs, tmp, val);
 }
 
 void __kprobes
@@ -331,13 +327,11 @@ simulate_blsz32(u32 opcode, long addr, struct pt_regs *regs)
 
        csky_insn_reg_get_val(regs, tmp, &val);
 
-       if (val <= 0) {
+       if ((long) val <= 0) {
                instruction_pointer_set(regs,
                        addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
        } else
                instruction_pointer_set(regs, addr + 4);
-
-       csky_insn_reg_set_val(regs, tmp, val);
 }
 
 void __kprobes
@@ -348,13 +342,11 @@ simulate_blz32(u32 opcode, long addr, struct pt_regs *regs)
 
        csky_insn_reg_get_val(regs, tmp, &val);
 
-       if (val < 0) {
+       if ((long) val < 0) {
                instruction_pointer_set(regs,
                        addr + sign_extend32((opcode & 0xffff0000) >> 15, 15));
        } else
                instruction_pointer_set(regs, addr + 4);
-
-       csky_insn_reg_set_val(regs, tmp, val);
 }
 
 void __kprobes
index 69af6bc..3d0ca22 100644 (file)
@@ -49,7 +49,7 @@ int copy_thread(unsigned long clone_flags,
        /* setup thread.sp for switch_to !!! */
        p->thread.sp = (unsigned long)childstack;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childregs, 0, sizeof(struct pt_regs));
                childstack->r15 = (unsigned long) ret_from_kernel_thread;
                childstack->r10 = kthread_arg;
index d822144..0105ac8 100644 (file)
@@ -22,6 +22,7 @@
 #include <asm/asm-offsets.h>
 
 #include <abi/regdef.h>
+#include <abi/ckmmu.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
@@ -83,7 +84,7 @@ static int gpr_get(struct task_struct *target,
        /* Abiv1 regs->tls is fake and we need sync here. */
        regs->tls = task_thread_info(target)->tp_value;
 
-       return membuf_write(&to, regs, sizeof(regs));
+       return membuf_write(&to, regs, sizeof(*regs));
 }
 
 static int gpr_set(struct task_struct *target,
@@ -343,6 +344,124 @@ asmlinkage void syscall_trace_exit(struct pt_regs *regs)
                trace_sys_exit(regs, syscall_get_return_value(current, regs));
 }
 
+#ifdef CONFIG_CPU_CK860
+static void show_iutlb(void)
+{
+       int entry, i;
+       unsigned long flags;
+       unsigned long oldpid;
+       unsigned long entryhi[16], entrylo0[16], entrylo1[16];
+
+       oldpid = read_mmu_entryhi();
+
+       entry = 0x8000;
+
+       local_irq_save(flags);
+
+       for (i = 0; i < 16; i++) {
+               write_mmu_index(entry);
+               tlb_read();
+               entryhi[i]  = read_mmu_entryhi();
+               entrylo0[i] = read_mmu_entrylo0();
+               entrylo1[i] = read_mmu_entrylo1();
+
+               entry++;
+       }
+
+       local_irq_restore(flags);
+
+       write_mmu_entryhi(oldpid);
+
+       printk("\n\n\n");
+       for (i = 0; i < 16; i++)
+               printk("iutlb[%d]:      entryhi - 0x%lx;        entrylo0 - 0x%lx;"
+                      "        entrylo1 - 0x%lx\n",
+                        i, entryhi[i], entrylo0[i], entrylo1[i]);
+       printk("\n\n\n");
+}
+
+static void show_dutlb(void)
+{
+       int entry, i;
+       unsigned long flags;
+       unsigned long oldpid;
+       unsigned long entryhi[16], entrylo0[16], entrylo1[16];
+
+       oldpid = read_mmu_entryhi();
+
+       entry = 0x4000;
+
+       local_irq_save(flags);
+
+       for (i = 0; i < 16; i++) {
+               write_mmu_index(entry);
+               tlb_read();
+               entryhi[i]  = read_mmu_entryhi();
+               entrylo0[i] = read_mmu_entrylo0();
+               entrylo1[i] = read_mmu_entrylo1();
+
+               entry++;
+       }
+
+       local_irq_restore(flags);
+
+       write_mmu_entryhi(oldpid);
+
+       printk("\n\n\n");
+       for (i = 0; i < 16; i++)
+               printk("dutlb[%d]:      entryhi - 0x%lx;        entrylo0 - 0x%lx;"
+                      "        entrylo1 - 0x%lx\n",
+                        i, entryhi[i], entrylo0[i], entrylo1[i]);
+       printk("\n\n\n");
+}
+
+static unsigned long entryhi[1024], entrylo0[1024], entrylo1[1024];
+static void show_jtlb(void)
+{
+       int entry;
+       unsigned long flags;
+       unsigned long oldpid;
+
+       oldpid = read_mmu_entryhi();
+
+       entry = 0;
+
+       local_irq_save(flags);
+       while (entry < 1024) {
+               write_mmu_index(entry);
+               tlb_read();
+               entryhi[entry]  = read_mmu_entryhi();
+               entrylo0[entry] = read_mmu_entrylo0();
+               entrylo1[entry] = read_mmu_entrylo1();
+
+               entry++;
+       }
+       local_irq_restore(flags);
+
+       write_mmu_entryhi(oldpid);
+
+       printk("\n\n\n");
+
+       for (entry = 0; entry < 1024; entry++)
+               printk("jtlb[%x]:       entryhi - 0x%lx;        entrylo0 - 0x%lx;"
+                      "        entrylo1 - 0x%lx\n",
+                        entry, entryhi[entry], entrylo0[entry], entrylo1[entry]);
+       printk("\n\n\n");
+}
+
+static void show_tlb(void)
+{
+       show_iutlb();
+       show_dutlb();
+       show_jtlb();
+}
+#else
+static void show_tlb(void)
+{
+       return;
+}
+#endif
+
 void show_regs(struct pt_regs *fp)
 {
        pr_info("\nCURRENT PROCESS:\n\n");
@@ -363,9 +482,10 @@ void show_regs(struct pt_regs *fp)
 
        pr_info("PC: 0x%08lx (%pS)\n", (long)fp->pc, (void *)fp->pc);
        pr_info("LR: 0x%08lx (%pS)\n", (long)fp->lr, (void *)fp->lr);
-       pr_info("SP: 0x%08lx\n", (long)fp);
-       pr_info("orig_a0: 0x%08lx\n", fp->orig_a0);
+       pr_info("SP: 0x%08lx\n", (long)fp->usp);
        pr_info("PSR: 0x%08lx\n", (long)fp->sr);
+       pr_info("orig_a0: 0x%08lx\n", fp->orig_a0);
+       pr_info("PT_REGS: 0x%08lx\n", (long)fp);
 
        pr_info(" a0: 0x%08lx   a1: 0x%08lx   a2: 0x%08lx   a3: 0x%08lx\n",
                fp->a0, fp->a1, fp->a2, fp->a3);
@@ -395,5 +515,7 @@ void show_regs(struct pt_regs *fp)
                fp->regs[8], fp->regs[9]);
 #endif
 
+       show_tlb();
+
        return;
 }
index e4cab16..e93bc6f 100644 (file)
@@ -45,13 +45,17 @@ static void __init csky_memblock_init(void)
 
        if (size >= lowmem_size) {
                max_low_pfn = min_low_pfn + lowmem_size;
+#ifdef CONFIG_PAGE_OFFSET_80000000
                write_mmu_msa1(read_mmu_msa0() + SSEG_SIZE);
+#endif
        } else if (size > sseg_size) {
                max_low_pfn = min_low_pfn + sseg_size;
        }
 
        max_zone_pfn[ZONE_NORMAL] = max_low_pfn;
 
+       mmu_init(min_low_pfn, max_low_pfn);
+
 #ifdef CONFIG_HIGHMEM
        max_zone_pfn[ZONE_HIGHMEM] = max_pfn;
 
@@ -101,16 +105,26 @@ void __init setup_arch(char **cmdline_p)
 unsigned long va_pa_offset;
 EXPORT_SYMBOL(va_pa_offset);
 
+static inline unsigned long read_mmu_msa(void)
+{
+#ifdef CONFIG_PAGE_OFFSET_80000000
+       return read_mmu_msa0();
+#endif
+
+#ifdef CONFIG_PAGE_OFFSET_A0000000
+       return read_mmu_msa1();
+#endif
+}
+
 asmlinkage __visible void __init csky_start(unsigned int unused,
                                            void *dtb_start)
 {
        /* Clean up bss section */
        memset(__bss_start, 0, __bss_stop - __bss_start);
 
-       va_pa_offset = read_mmu_msa0() & ~(SSEG_SIZE - 1);
+       va_pa_offset = read_mmu_msa() & ~(SSEG_SIZE - 1);
 
        pre_trap_init();
-       pre_mmu_init();
 
        if (dtb_start == NULL)
                early_init_dt_scan(__dtb_start);
index 37ea64e..312f046 100644 (file)
@@ -134,7 +134,6 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
 {
        struct rt_sigframe *frame;
        int err = 0;
-       struct csky_vdso *vdso = current->mm->context.vdso;
 
        frame = get_sigframe(ksig, regs, sizeof(*frame));
        if (!access_ok(frame, sizeof(*frame)))
@@ -152,7 +151,8 @@ setup_rt_frame(struct ksignal *ksig, sigset_t *set, struct pt_regs *regs)
                return -EFAULT;
 
        /* Set up to return from userspace. */
-       regs->lr = (unsigned long)(vdso->rt_signal_retcode);
+       regs->lr = (unsigned long)VDSO_SYMBOL(
+               current->mm->context.vdso, rt_sigreturn);
 
        /*
         * Set up registers for signal handler.
index 041d0de..0f9f5ee 100644 (file)
@@ -203,8 +203,8 @@ volatile unsigned int secondary_hint;
 volatile unsigned int secondary_hint2;
 volatile unsigned int secondary_ccr;
 volatile unsigned int secondary_stack;
-
-unsigned long secondary_msa1;
+volatile unsigned int secondary_msa1;
+volatile unsigned int secondary_pgd;
 
 int __cpu_up(unsigned int cpu, struct task_struct *tidle)
 {
@@ -216,6 +216,7 @@ int __cpu_up(unsigned int cpu, struct task_struct *tidle)
        secondary_hint2 = mfcr("cr<21, 1>");
        secondary_ccr  = mfcr("cr18");
        secondary_msa1 = read_mmu_msa1();
+       secondary_pgd = mfcr("cr<29, 15>");
 
        /*
         * Because other CPUs are in reset status, we must flush data
@@ -262,8 +263,6 @@ void csky_start_secondary(void)
 
        flush_tlb_all();
        write_mmu_pagemask(0);
-       TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);
-       TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir);
 
 #ifdef CONFIG_CPU_HAS_FPU
        init_fpu();
index 959a917..e5fbf86 100644 (file)
@@ -39,9 +39,7 @@ asmlinkage void csky_cmpxchg(void);
 asmlinkage void csky_get_tls(void);
 asmlinkage void csky_irq(void);
 
-asmlinkage void csky_tlbinvalidl(void);
-asmlinkage void csky_tlbinvalids(void);
-asmlinkage void csky_tlbmodified(void);
+asmlinkage void csky_pagefault(void);
 
 /* Defined in head.S */
 asmlinkage void _start_smp_secondary(void);
@@ -66,9 +64,9 @@ void __init trap_init(void)
        VEC_INIT(VEC_TRAP3, csky_get_tls);
 
        /* setup MMU TLB exception */
-       VEC_INIT(VEC_TLBINVALIDL, csky_tlbinvalidl);
-       VEC_INIT(VEC_TLBINVALIDS, csky_tlbinvalids);
-       VEC_INIT(VEC_TLBMODIFIED, csky_tlbmodified);
+       VEC_INIT(VEC_TLBINVALIDL, csky_pagefault);
+       VEC_INIT(VEC_TLBINVALIDS, csky_pagefault);
+       VEC_INIT(VEC_TLBMODIFIED, csky_pagefault);
 
 #ifdef CONFIG_CPU_HAS_FPU
        init_fpu();
index abc3dbc..16c20d6 100644 (file)
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
-#include <linux/kernel.h>
-#include <linux/err.h>
-#include <linux/sched.h>
-#include <linux/mm.h>
-#include <linux/init.h>
 #include <linux/binfmts.h>
 #include <linux/elf.h>
-#include <linux/vmalloc.h>
-#include <linux/unistd.h>
-#include <linux/uaccess.h>
+#include <linux/err.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
 
+#include <asm/page.h>
+#ifdef GENERIC_TIME_VSYSCALL
+#include <vdso/datapage.h>
+#else
 #include <asm/vdso.h>
-#include <asm/cacheflush.h>
+#endif
 
-static struct page *vdso_page;
+extern char vdso_start[], vdso_end[];
 
-static int __init init_vdso(void)
-{
-       struct csky_vdso *vdso;
-       int err = 0;
-
-       vdso_page = alloc_page(GFP_KERNEL);
-       if (!vdso_page)
-               panic("Cannot allocate vdso");
+static unsigned int vdso_pages;
+static struct page **vdso_pagelist;
 
-       vdso = vmap(&vdso_page, 1, 0, PAGE_KERNEL);
-       if (!vdso)
-               panic("Cannot map vdso");
+/*
+ * The vDSO data page.
+ */
+static union {
+       struct vdso_data        data;
+       u8                      page[PAGE_SIZE];
+} vdso_data_store __page_aligned_data;
+struct vdso_data *vdso_data = &vdso_data_store.data;
 
-       clear_page(vdso);
-
-       err = setup_vdso_page(vdso->rt_signal_retcode);
-       if (err)
-               panic("Cannot set signal return code, err: %x.", err);
+static int __init vdso_init(void)
+{
+       unsigned int i;
+
+       vdso_pages = (vdso_end - vdso_start) >> PAGE_SHIFT;
+       vdso_pagelist =
+               kcalloc(vdso_pages + 1, sizeof(struct page *), GFP_KERNEL);
+       if (unlikely(vdso_pagelist == NULL)) {
+               pr_err("vdso: pagelist allocation failed\n");
+               return -ENOMEM;
+       }
 
-       dcache_wb_range((unsigned long)vdso, (unsigned long)vdso + 16);
+       for (i = 0; i < vdso_pages; i++) {
+               struct page *pg;
 
-       vunmap(vdso);
+               pg = virt_to_page(vdso_start + (i << PAGE_SHIFT));
+               vdso_pagelist[i] = pg;
+       }
+       vdso_pagelist[i] = virt_to_page(vdso_data);
 
        return 0;
 }
-subsys_initcall(init_vdso);
+arch_initcall(vdso_init);
 
-int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp)
+int arch_setup_additional_pages(struct linux_binprm *bprm,
+       int uses_interp)
 {
-       int ret;
-       unsigned long addr;
        struct mm_struct *mm = current->mm;
+       unsigned long vdso_base, vdso_len;
+       int ret;
 
-       mmap_write_lock(mm);
+       vdso_len = (vdso_pages + 1) << PAGE_SHIFT;
 
-       addr = get_unmapped_area(NULL, STACK_TOP, PAGE_SIZE, 0, 0);
-       if (IS_ERR_VALUE(addr)) {
-               ret = addr;
-               goto up_fail;
+       mmap_write_lock(mm);
+       vdso_base = get_unmapped_area(NULL, 0, vdso_len, 0, 0);
+       if (IS_ERR_VALUE(vdso_base)) {
+               ret = vdso_base;
+               goto end;
        }
 
-       ret = install_special_mapping(
-                       mm,
-                       addr,
-                       PAGE_SIZE,
-                       VM_READ|VM_EXEC|VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC,
-                       &vdso_page);
-       if (ret)
-               goto up_fail;
+       /*
+        * Put vDSO base into mm struct. We need to do this before calling
+        * install_special_mapping or the perf counter mmap tracking code
+        * will fail to recognise it as a vDSO (since arch_vma_name fails).
+        */
+       mm->context.vdso = (void *)vdso_base;
+
+       ret =
+          install_special_mapping(mm, vdso_base, vdso_pages << PAGE_SHIFT,
+               (VM_READ | VM_EXEC | VM_MAYREAD | VM_MAYWRITE | VM_MAYEXEC),
+               vdso_pagelist);
+
+       if (unlikely(ret)) {
+               mm->context.vdso = NULL;
+               goto end;
+       }
 
-       mm->context.vdso = (void *)addr;
+       vdso_base += (vdso_pages << PAGE_SHIFT);
+       ret = install_special_mapping(mm, vdso_base, PAGE_SIZE,
+               (VM_READ | VM_MAYREAD), &vdso_pagelist[vdso_pages]);
 
-up_fail:
+       if (unlikely(ret))
+               mm->context.vdso = NULL;
+end:
        mmap_write_unlock(mm);
        return ret;
 }
 
 const char *arch_vma_name(struct vm_area_struct *vma)
 {
-       if (vma->vm_mm == NULL)
-               return NULL;
-
-       if (vma->vm_start == (long)vma->vm_mm->context.vdso)
+       if (vma->vm_mm && (vma->vm_start == (long)vma->vm_mm->context.vdso))
                return "[vdso]";
-       else
-               return NULL;
+       if (vma->vm_mm && (vma->vm_start ==
+                          (long)vma->vm_mm->context.vdso + PAGE_SIZE))
+               return "[vdso_data]";
+       return NULL;
 }
similarity index 58%
rename from arch/x86/platform/sfi/Makefile
rename to arch/csky/kernel/vdso/.gitignore
index 4eba24c..3a19def 100644 (file)
@@ -1,2 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_SFI)              += sfi.o
+vdso.lds
+*.tmp
+vdso-syms.S
diff --git a/arch/csky/kernel/vdso/Makefile b/arch/csky/kernel/vdso/Makefile
new file mode 100644 (file)
index 0000000..0b6909f
--- /dev/null
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+# Absolute relocation type $(ARCH_REL_TYPE_ABS) needs to be defined before
+# the inclusion of generic Makefile.
+ARCH_REL_TYPE_ABS := R_CKCORE_ADDR32|R_CKCORE_JUMP_SLOT
+include $(srctree)/lib/vdso/Makefile
+
+# Symbols present in the vdso
+vdso-syms  += rt_sigreturn
+vdso-syms  += vgettimeofday
+
+# Files to link into the vdso
+obj-vdso = $(patsubst %, %.o, $(vdso-syms)) note.o
+
+ifneq ($(c-gettimeofday-y),)
+       CFLAGS_vgettimeofday.o += -include $(c-gettimeofday-y)
+endif
+
+ccflags-y := -fno-stack-protector -DBUILD_VDSO32
+
+# Build rules
+targets := $(obj-vdso) vdso.so vdso.so.dbg vdso.lds vdso-dummy.o
+obj-vdso := $(addprefix $(obj)/, $(obj-vdso))
+
+obj-y += vdso.o vdso-syms.o
+CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
+
+# Disable gcov profiling for VDSO code
+GCOV_PROFILE := n
+KCOV_INSTRUMENT := n
+
+# Force dependency
+$(obj)/vdso.o: $(obj)/vdso.so
+
+SYSCFLAGS_vdso.so.dbg = $(c_flags)
+$(obj)/vdso.so.dbg: $(src)/vdso.lds $(obj-vdso) FORCE
+       $(call if_changed,vdsold)
+SYSCFLAGS_vdso.so.dbg = -shared -s -Wl,-soname=linux-vdso.so.1 \
+       -Wl,--build-id=sha1 -Wl,--hash-style=both
+
+$(obj)/vdso-syms.S: $(obj)/vdso.so FORCE
+       $(call if_changed,so2s)
+
+# strip rule for the .so file
+$(obj)/%.so: OBJCOPYFLAGS := -S
+$(obj)/%.so: $(obj)/%.so.dbg FORCE
+       $(call if_changed,objcopy)
+
+# actual build commands
+# The DSO images are built using a special linker script
+# Make sure only to export the intended __vdso_xxx symbol offsets.
+quiet_cmd_vdsold = VDSOLD  $@
+      cmd_vdsold = $(CC) $(KBUILD_CFLAGS) $(call cc-option, -no-pie) -nostdlib -nostartfiles $(SYSCFLAGS_$(@F)) \
+                           -Wl,-T,$(filter-out FORCE,$^) -o $@.tmp && \
+                   $(CROSS_COMPILE)objcopy \
+                           $(patsubst %, -G __vdso_%, $(vdso-syms)) $@.tmp $@ && \
+                   rm $@.tmp
+
+# Extracts symbol offsets from the VDSO, converting them into an assembly file
+# that contains the same symbols at the same offsets.
+quiet_cmd_so2s = SO2S    $@
+      cmd_so2s = $(NM) -D $< | $(srctree)/$(src)/so2s.sh > $@
+
+# install commands for the unstripped file
+quiet_cmd_vdso_install = INSTALL $@
+      cmd_vdso_install = cp $(obj)/$@.dbg $(MODLIB)/vdso/$@
+
+vdso.so: $(obj)/vdso.so.dbg
+       @mkdir -p $(MODLIB)/vdso
+       $(call cmd,vdso_install)
+
+vdso_install: vdso.so
diff --git a/arch/csky/kernel/vdso/note.S b/arch/csky/kernel/vdso/note.S
new file mode 100644 (file)
index 0000000..2a956c9
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * This supplies .note.* sections to go into the PT_NOTE inside the vDSO text.
+ * Here we can supply some information useful to userland.
+ */
+
+#include <linux/elfnote.h>
+#include <linux/version.h>
+
+ELFNOTE_START(Linux, 0, "a")
+       .long LINUX_VERSION_CODE
+ELFNOTE_END
diff --git a/arch/csky/kernel/vdso/rt_sigreturn.S b/arch/csky/kernel/vdso/rt_sigreturn.S
new file mode 100644 (file)
index 0000000..0a6bd12
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/linkage.h>
+#include <asm/unistd.h>
+#include <abi/vdso.h>
+
+       .text
+ENTRY(__vdso_rt_sigreturn)
+       .cfi_startproc
+       .cfi_signal_frame
+       SET_SYSCALL_ID
+       trap    0
+       .cfi_endproc
+ENDPROC(__vdso_rt_sigreturn)
diff --git a/arch/csky/kernel/vdso/so2s.sh b/arch/csky/kernel/vdso/so2s.sh
new file mode 100755 (executable)
index 0000000..69da3d5
--- /dev/null
@@ -0,0 +1,5 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0+
+
+sed 's!\([0-9a-f]*\) T \([a-z0-9_]*\)\(@@LINUX_5.10\)*!.global \2\n.set \2,0x\1!' \
+| grep '^\.'
diff --git a/arch/csky/kernel/vdso/vdso.S b/arch/csky/kernel/vdso/vdso.S
new file mode 100644 (file)
index 0000000..5162ca0
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <linux/init.h>
+#include <linux/linkage.h>
+#include <asm/page.h>
+
+       __PAGE_ALIGNED_DATA
+
+       .globl vdso_start, vdso_end
+       .balign PAGE_SIZE
+vdso_start:
+       .incbin "arch/csky/kernel/vdso/vdso.so"
+       .balign PAGE_SIZE
+vdso_end:
+
+       .previous
diff --git a/arch/csky/kernel/vdso/vdso.lds.S b/arch/csky/kernel/vdso/vdso.lds.S
new file mode 100644 (file)
index 0000000..590a6c7
--- /dev/null
@@ -0,0 +1,58 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#include <asm/page.h>
+
+OUTPUT_ARCH(csky)
+
+SECTIONS
+{
+       PROVIDE(_vdso_data = . + PAGE_SIZE);
+       . = SIZEOF_HEADERS;
+
+       .hash           : { *(.hash) }                  :text
+       .gnu.hash       : { *(.gnu.hash) }
+       .dynsym         : { *(.dynsym) }
+       .dynstr         : { *(.dynstr) }
+       .gnu.version    : { *(.gnu.version) }
+       .gnu.version_d  : { *(.gnu.version_d) }
+       .gnu.version_r  : { *(.gnu.version_r) }
+
+       .note           : { *(.note.*) }                :text   :note
+       .dynamic        : { *(.dynamic) }               :text   :dynamic
+
+       .eh_frame_hdr   : { *(.eh_frame_hdr) }          :text   :eh_frame_hdr
+       .eh_frame       : { KEEP (*(.eh_frame)) }       :text
+
+       .rodata         : { *(.rodata .rodata.* .gnu.linkonce.r.*) }
+
+       . = 0x800;
+       .text           : { *(.text .text.*) }          :text
+
+       .data           : {
+               *(.got.plt) *(.got)
+               *(.data .data.* .gnu.linkonce.d.*)
+               *(.dynbss)
+               *(.bss .bss.* .gnu.linkonce.b.*)
+       }
+}
+
+PHDRS
+{
+       text            PT_LOAD         FLAGS(5) FILEHDR PHDRS; /* PF_R|PF_X */
+       dynamic         PT_DYNAMIC      FLAGS(4);               /* PF_R */
+       note            PT_NOTE         FLAGS(4);               /* PF_R */
+       eh_frame_hdr    PT_GNU_EH_FRAME;
+}
+
+VERSION
+{
+       LINUX_5.10 {
+       global:
+               __vdso_rt_sigreturn;
+               __vdso_clock_gettime;
+               __vdso_clock_gettime64;
+               __vdso_gettimeofday;
+               __vdso_clock_getres;
+       local: *;
+       };
+}
diff --git a/arch/csky/kernel/vdso/vgettimeofday.c b/arch/csky/kernel/vdso/vgettimeofday.c
new file mode 100644 (file)
index 0000000..da49183
--- /dev/null
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/time.h>
+#include <linux/types.h>
+
+int __vdso_clock_gettime(clockid_t clock,
+                        struct old_timespec32 *ts)
+{
+       return __cvdso_clock_gettime32(clock, ts);
+}
+
+int __vdso_clock_gettime64(clockid_t clock,
+                          struct __kernel_timespec *ts)
+{
+       return __cvdso_clock_gettime(clock, ts);
+}
+
+int __vdso_gettimeofday(struct __kernel_old_timeval *tv,
+                       struct timezone *tz)
+{
+       return __cvdso_gettimeofday(tv, tz);
+}
+
+int __vdso_clock_getres(clockid_t clock_id,
+                       struct old_timespec32 *res)
+{
+       return __cvdso_clock_getres_time32(clock_id, res);
+}
index f03033e..e8b1a4a 100644 (file)
@@ -33,6 +33,7 @@ SECTIONS
 
        .text : AT(ADDR(.text) - LOAD_OFFSET) {
                _text = .;
+               VBR_BASE
                IRQENTRY_TEXT
                SOFTIRQENTRY_TEXT
                TEXT_TEXT
@@ -104,7 +105,6 @@ SECTIONS
 
        EXCEPTION_TABLE(L1_CACHE_BYTES)
        BSS_SECTION(L1_CACHE_BYTES, PAGE_SIZE, L1_CACHE_BYTES)
-       VBR_BASE
        _end = . ;
 
        STABS_DEBUG
index 081b178..1482de5 100644 (file)
@@ -1,29 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
 
-#include <linux/signal.h>
-#include <linux/module.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/ptrace.h>
-#include <linux/mman.h>
-#include <linux/mm.h>
-#include <linux/smp.h>
-#include <linux/version.h>
-#include <linux/vt_kern.h>
 #include <linux/extable.h>
-#include <linux/uaccess.h>
-#include <linux/perf_event.h>
 #include <linux/kprobes.h>
-
-#include <asm/hardirq.h>
-#include <asm/mmu_context.h>
-#include <asm/traps.h>
-#include <asm/page.h>
+#include <linux/mmu_context.h>
+#include <linux/perf_event.h>
 
 int fixup_exception(struct pt_regs *regs)
 {
@@ -39,180 +20,287 @@ int fixup_exception(struct pt_regs *regs)
        return 0;
 }
 
-/*
- * This routine handles page faults. It determines the address,
- * and the problem, and then passes it off to one of the appropriate
- * routines.
- */
-asmlinkage void do_page_fault(struct pt_regs *regs, unsigned long write,
-                             unsigned long mmu_meh)
+static inline bool is_write(struct pt_regs *regs)
 {
-       struct vm_area_struct *vma = NULL;
-       struct task_struct *tsk = current;
-       struct mm_struct *mm = tsk->mm;
-       int si_code;
-       int fault;
-       unsigned long address = mmu_meh & PAGE_MASK;
+       switch (trap_no(regs)) {
+       case VEC_TLBINVALIDS:
+               return true;
+       case VEC_TLBMODIFIED:
+               return true;
+       }
 
-       if (kprobe_page_fault(regs, tsk->thread.trap_no))
+       return false;
+}
+
+#ifdef CONFIG_CPU_HAS_LDSTEX
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+       return;
+}
+#else
+extern unsigned long csky_cmpxchg_ldw;
+extern unsigned long csky_cmpxchg_stw;
+static inline void csky_cmpxchg_fixup(struct pt_regs *regs)
+{
+       if (trap_no(regs) != VEC_TLBMODIFIED)
                return;
 
-       si_code = SEGV_MAPERR;
+       if (instruction_pointer(regs) == csky_cmpxchg_stw)
+               instruction_pointer_set(regs, csky_cmpxchg_ldw);
+       return;
+}
+#endif
+
+static inline void no_context(struct pt_regs *regs, unsigned long addr)
+{
+       current->thread.trap_no = trap_no(regs);
+
+       /* Are we prepared to handle this kernel fault? */
+       if (fixup_exception(regs))
+               return;
 
-#ifndef CONFIG_CPU_HAS_TLBI
        /*
-        * We fault-in kernel-space virtual memory on-demand. The
-        * 'reference' page table is init_mm.pgd.
-        *
-        * NOTE! We MUST NOT take any locks for this case. We may
-        * be in an interrupt or a critical region, and should
-        * only copy the information from the master page table,
-        * nothing more.
+        * Oops. The kernel tried to access some bad page. We'll have to
+        * terminate things with extreme prejudice.
         */
-       if (unlikely(address >= VMALLOC_START) &&
-           unlikely(address <= VMALLOC_END)) {
-               /*
-                * Synchronize this task's top level page-table
-                * with the 'reference' page table.
-                *
-                * Do _not_ use "tsk" here. We might be inside
-                * an interrupt in the middle of a task switch..
-                */
-               int offset = pgd_index(address);
-               pgd_t *pgd, *pgd_k;
-               pud_t *pud, *pud_k;
-               pmd_t *pmd, *pmd_k;
-               pte_t *pte_k;
+       bust_spinlocks(1);
+       pr_alert("Unable to handle kernel paging request at virtual "
+                "addr 0x%08lx, pc: 0x%08lx\n", addr, regs->pc);
+       die(regs, "Oops");
+       do_exit(SIGKILL);
+}
 
-               unsigned long pgd_base;
+static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
+{
+       current->thread.trap_no = trap_no(regs);
 
-               pgd_base = (unsigned long)__va(get_pgd());
-               pgd = (pgd_t *)pgd_base + offset;
-               pgd_k = init_mm.pgd + offset;
+       if (fault & VM_FAULT_OOM) {
+               /*
+                * We ran out of memory, call the OOM killer, and return the userspace
+                * (which will retry the fault, or kill us if we got oom-killed).
+                */
+               if (!user_mode(regs)) {
+                       no_context(regs, addr);
+                       return;
+               }
+               pagefault_out_of_memory();
+               return;
+       } else if (fault & VM_FAULT_SIGBUS) {
+               /* Kernel mode? Handle exceptions or die */
+               if (!user_mode(regs)) {
+                       no_context(regs, addr);
+                       return;
+               }
+               do_trap(regs, SIGBUS, BUS_ADRERR, addr);
+               return;
+       }
+       BUG();
+}
 
-               if (!pgd_present(*pgd_k))
-                       goto no_context;
-               set_pgd(pgd, *pgd_k);
+static inline void bad_area(struct pt_regs *regs, struct mm_struct *mm, int code, unsigned long addr)
+{
+       /*
+        * Something tried to access memory that isn't in our memory map.
+        * Fix it, but check if it's kernel or user first.
+        */
+       mmap_read_unlock(mm);
+       /* User mode accesses just cause a SIGSEGV */
+       if (user_mode(regs)) {
+               do_trap(regs, SIGSEGV, code, addr);
+               return;
+       }
 
-               pud = (pud_t *)pgd;
-               pud_k = (pud_t *)pgd_k;
-               if (!pud_present(*pud_k))
-                       goto no_context;
+       no_context(regs, addr);
+}
 
-               pmd = pmd_offset(pud, address);
-               pmd_k = pmd_offset(pud_k, address);
-               if (!pmd_present(*pmd_k))
-                       goto no_context;
-               set_pmd(pmd, *pmd_k);
+static inline void vmalloc_fault(struct pt_regs *regs, int code, unsigned long addr)
+{
+       pgd_t *pgd, *pgd_k;
+       pud_t *pud, *pud_k;
+       pmd_t *pmd, *pmd_k;
+       pte_t *pte_k;
+       int offset;
 
-               pte_k = pte_offset_kernel(pmd_k, address);
-               if (!pte_present(*pte_k))
-                       goto no_context;
+       /* User mode accesses just cause a SIGSEGV */
+       if (user_mode(regs)) {
+               do_trap(regs, SIGSEGV, code, addr);
                return;
        }
-#endif
 
-       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, address);
        /*
-        * If we're in an interrupt or have no user
-        * context, we must not take the fault..
+        * Synchronize this task's top level page-table
+        * with the 'reference' page table.
+        *
+        * Do _not_ use "tsk" here. We might be inside
+        * an interrupt in the middle of a task switch..
         */
-       if (in_atomic() || !mm)
-               goto bad_area_nosemaphore;
+       offset = pgd_index(addr);
 
-       mmap_read_lock(mm);
-       vma = find_vma(mm, address);
-       if (!vma)
-               goto bad_area;
-       if (vma->vm_start <= address)
-               goto good_area;
-       if (!(vma->vm_flags & VM_GROWSDOWN))
-               goto bad_area;
-       if (expand_stack(vma, address))
-               goto bad_area;
-       /*
-        * Ok, we have a good vm_area for this memory access, so
-        * we can handle it..
-        */
-good_area:
-       si_code = SEGV_ACCERR;
+       pgd = get_pgd() + offset;
+       pgd_k = init_mm.pgd + offset;
 
-       if (write) {
+       if (!pgd_present(*pgd_k)) {
+               no_context(regs, addr);
+               return;
+       }
+       set_pgd(pgd, *pgd_k);
+
+       pud = (pud_t *)pgd;
+       pud_k = (pud_t *)pgd_k;
+       if (!pud_present(*pud_k)) {
+               no_context(regs, addr);
+               return;
+       }
+
+       pmd = pmd_offset(pud, addr);
+       pmd_k = pmd_offset(pud_k, addr);
+       if (!pmd_present(*pmd_k)) {
+               no_context(regs, addr);
+               return;
+       }
+       set_pmd(pmd, *pmd_k);
+
+       pte_k = pte_offset_kernel(pmd_k, addr);
+       if (!pte_present(*pte_k)) {
+               no_context(regs, addr);
+               return;
+       }
+
+       flush_tlb_one(addr);
+}
+
+static inline bool access_error(struct pt_regs *regs, struct vm_area_struct *vma)
+{
+       if (is_write(regs)) {
                if (!(vma->vm_flags & VM_WRITE))
-                       goto bad_area;
+                       return true;
        } else {
                if (unlikely(!vma_is_accessible(vma)))
-                       goto bad_area;
+                       return true;
        }
+       return false;
+}
+
+/*
+ * This routine handles page faults.  It determines the address and the
+ * problem, and then passes it off to one of the appropriate routines.
+ */
+asmlinkage void do_page_fault(struct pt_regs *regs)
+{
+       struct task_struct *tsk;
+       struct vm_area_struct *vma;
+       struct mm_struct *mm;
+       unsigned long addr = read_mmu_entryhi() & PAGE_MASK;
+       unsigned int flags = FAULT_FLAG_DEFAULT;
+       int code = SEGV_MAPERR;
+       vm_fault_t fault;
+
+       tsk = current;
+       mm = tsk->mm;
+
+       csky_cmpxchg_fixup(regs);
+
+       if (kprobe_page_fault(regs, tsk->thread.trap_no))
+               return;
 
        /*
-        * If for any reason at all we couldn't handle the fault,
-        * make sure we exit gracefully rather than endlessly redo
-        * the fault.
+        * Fault-in kernel-space virtual memory on-demand.
+        * The 'reference' page table is init_mm.pgd.
+        *
+        * NOTE! We MUST NOT take any locks for this case. We may
+        * be in an interrupt or a critical region, and should
+        * only copy the information from the master page table,
+        * nothing more.
         */
-       fault = handle_mm_fault(vma, address, write ? FAULT_FLAG_WRITE : 0,
-                               regs);
-       if (unlikely(fault & VM_FAULT_ERROR)) {
-               if (fault & VM_FAULT_OOM)
-                       goto out_of_memory;
-               else if (fault & VM_FAULT_SIGBUS)
-                       goto do_sigbus;
-               else if (fault & VM_FAULT_SIGSEGV)
-                       goto bad_area;
-               BUG();
+       if (unlikely((addr >= VMALLOC_START) && (addr <= VMALLOC_END))) {
+               vmalloc_fault(regs, code, addr);
+               return;
        }
-       mmap_read_unlock(mm);
-       return;
+
+       /* Enable interrupts if they were enabled in the parent context. */
+       if (likely(regs->sr & BIT(6)))
+               local_irq_enable();
 
        /*
-        * Something tried to access memory that isn't in our memory map..
-        * Fix it, but check if it's kernel or user first..
+        * If we're in an interrupt, have no user context, or are running
+        * in an atomic region, then we must not take the fault.
         */
-bad_area:
-       mmap_read_unlock(mm);
-
-bad_area_nosemaphore:
-       /* User mode accesses just cause a SIGSEGV */
-       if (user_mode(regs)) {
-               tsk->thread.trap_no = trap_no(regs);
-               force_sig_fault(SIGSEGV, si_code, (void __user *)address);
+       if (unlikely(faulthandler_disabled() || !mm)) {
+               no_context(regs, addr);
                return;
        }
 
-no_context:
-       tsk->thread.trap_no = trap_no(regs);
+       if (user_mode(regs))
+               flags |= FAULT_FLAG_USER;
 
-       /* Are we prepared to handle this kernel fault? */
-       if (fixup_exception(regs))
+       perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
+
+       if (is_write(regs))
+               flags |= FAULT_FLAG_WRITE;
+retry:
+       mmap_read_lock(mm);
+       vma = find_vma(mm, addr);
+       if (unlikely(!vma)) {
+               bad_area(regs, mm, code, addr);
+               return;
+       }
+       if (likely(vma->vm_start <= addr))
+               goto good_area;
+       if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+               bad_area(regs, mm, code, addr);
                return;
+       }
+       if (unlikely(expand_stack(vma, addr))) {
+               bad_area(regs, mm, code, addr);
+               return;
+       }
 
        /*
-        * Oops. The kernel tried to access some bad page. We'll have to
-        * terminate things with extreme prejudice.
+        * Ok, we have a good vm_area for this memory access, so
+        * we can handle it.
         */
-       bust_spinlocks(1);
-       pr_alert("Unable to handle kernel paging request at virtual "
-                "address 0x%08lx, pc: 0x%08lx\n", address, regs->pc);
-       die(regs, "Oops");
+good_area:
+       code = SEGV_ACCERR;
+
+       if (unlikely(access_error(regs, vma))) {
+               bad_area(regs, mm, code, addr);
+               return;
+       }
 
-out_of_memory:
-       tsk->thread.trap_no = trap_no(regs);
+       /*
+        * If for any reason at all we could not handle the fault,
+        * make sure we exit gracefully rather than endlessly redo
+        * the fault.
+        */
+       fault = handle_mm_fault(vma, addr, flags, regs);
 
        /*
-        * We ran out of memory, call the OOM killer, and return the userspace
-        * (which will retry the fault, or kill us if we got oom-killed).
+        * If we need to retry but a fatal signal is pending, handle the
+        * signal first. We do not need to release the mmap_lock because it
+        * would already be released in __lock_page_or_retry in mm/filemap.c.
         */
-       pagefault_out_of_memory();
-       return;
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       no_context(regs, addr);
+               return;
+       }
 
-do_sigbus:
-       tsk->thread.trap_no = trap_no(regs);
+       if (unlikely((fault & VM_FAULT_RETRY) && (flags & FAULT_FLAG_ALLOW_RETRY))) {
+               flags |= FAULT_FLAG_TRIED;
 
-       mmap_read_unlock(mm);
+               /*
+                * No need to mmap_read_unlock(mm) as we would
+                * have already released it in __lock_page_or_retry
+                * in mm/filemap.c.
+                */
+               goto retry;
+       }
 
-       /* Kernel mode? Handle exceptions or die */
-       if (!user_mode(regs))
-               goto no_context;
+       mmap_read_unlock(mm);
 
-       force_sig_fault(SIGBUS, BUS_ADRERR, (void __user *)address);
+       if (unlikely(fault & VM_FAULT_ERROR)) {
+               mm_fault_error(regs, addr, fault);
+               return;
+       }
+       return;
 }
index af62712..894050a 100644 (file)
 #include <asm/mmu_context.h>
 #include <asm/sections.h>
 #include <asm/tlb.h>
+#include <asm/cacheflush.h>
+
+#define PTRS_KERN_TABLE \
+               ((PTRS_PER_PGD - USER_PTRS_PER_PGD) * PTRS_PER_PTE)
 
 pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pte_t invalid_pte_table[PTRS_PER_PTE] __page_aligned_bss;
+pte_t kernel_pte_tables[PTRS_KERN_TABLE] __page_aligned_bss;
+
 EXPORT_SYMBOL(invalid_pte_table);
 unsigned long empty_zero_page[PAGE_SIZE / sizeof(unsigned long)]
                                                __page_aligned_bss;
@@ -80,9 +86,9 @@ void __init mem_init(void)
 #ifdef CONFIG_HIGHMEM
        unsigned long tmp;
 
-       max_mapnr = highend_pfn;
+       set_max_mapnr(highend_pfn - ARCH_PFN_OFFSET);
 #else
-       max_mapnr = max_low_pfn;
+       set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
 #endif
        high_memory = (void *) __va(max_low_pfn << PAGE_SHIFT);
 
@@ -104,24 +110,9 @@ void __init mem_init(void)
        mem_init_print_info(NULL);
 }
 
-extern char __init_begin[], __init_end[];
-
 void free_initmem(void)
 {
-       unsigned long addr;
-
-       addr = (unsigned long) &__init_begin;
-
-       while (addr < (unsigned long) &__init_end) {
-               ClearPageReserved(virt_to_page(addr));
-               init_page_count(virt_to_page(addr));
-               free_page(addr);
-               totalram_pages_inc();
-               addr += PAGE_SIZE;
-       }
-
-       pr_info("Freeing unused kernel memory: %dk freed\n",
-       ((unsigned int)&__init_end - (unsigned int)&__init_begin) >> 10);
+       free_initmem_default(-1);
 }
 
 void pgd_init(unsigned long *p)
@@ -130,20 +121,35 @@ void pgd_init(unsigned long *p)
 
        for (i = 0; i < PTRS_PER_PGD; i++)
                p[i] = __pa(invalid_pte_table);
+
+       flush_tlb_all();
+       local_icache_inv_all(NULL);
 }
 
-void __init pre_mmu_init(void)
+void __init mmu_init(unsigned long min_pfn, unsigned long max_pfn)
 {
-       /*
-        * Setup page-table and enable TLB-hardrefill
-        */
+       int i;
+
+       for (i = 0; i < USER_PTRS_PER_PGD; i++)
+               swapper_pg_dir[i].pgd = __pa(invalid_pte_table);
+
+       for (i = USER_PTRS_PER_PGD; i < PTRS_PER_PGD; i++)
+               swapper_pg_dir[i].pgd =
+                       __pa(kernel_pte_tables + (PTRS_PER_PTE * (i - USER_PTRS_PER_PGD)));
+
+       for (i = 0; i < PTRS_KERN_TABLE; i++)
+               set_pte(&kernel_pte_tables[i], __pte(_PAGE_GLOBAL));
+
+       for (i = min_pfn; i < max_pfn; i++)
+               set_pte(&kernel_pte_tables[i - PFN_DOWN(va_pa_offset)], pfn_pte(i, PAGE_KERNEL));
+
        flush_tlb_all();
-       pgd_init((unsigned long *)swapper_pg_dir);
-       TLBMISS_HANDLER_SETUP_PGD(swapper_pg_dir);
-       TLBMISS_HANDLER_SETUP_PGD_KERNEL(swapper_pg_dir);
+       local_icache_inv_all(NULL);
 
        /* Setup page mask to 4k */
        write_mmu_pagemask(0);
+
+       setup_pgd(swapper_pg_dir, 0);
 }
 
 void __init fixrange_init(unsigned long start, unsigned long end,
index ed15123..9234c5e 100644 (file)
@@ -24,7 +24,13 @@ void flush_tlb_all(void)
 void flush_tlb_mm(struct mm_struct *mm)
 {
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.asids %0"::"r"(cpu_asid(mm)));
+       sync_is();
+       asm volatile(
+               "tlbi.asids %0  \n"
+               "sync.i         \n"
+               :
+               : "r" (cpu_asid(mm))
+               : "memory");
 #else
        tlb_invalid_all();
 #endif
@@ -53,11 +59,17 @@ void flush_tlb_range(struct vm_area_struct *vma, unsigned long start,
        end   &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
+       sync_is();
        while (start < end) {
-               asm volatile("tlbi.vas %0"::"r"(start | newpid));
+               asm volatile(
+                       "tlbi.vas %0    \n"
+                       :
+                       : "r" (start | newpid)
+                       : "memory");
+
                start += 2*PAGE_SIZE;
        }
-       sync_is();
+       asm volatile("sync.i\n");
 #else
        {
        unsigned long flags, oldpid;
@@ -87,11 +99,17 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end)
        end   &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
+       sync_is();
        while (start < end) {
-               asm volatile("tlbi.vaas %0"::"r"(start));
+               asm volatile(
+                       "tlbi.vaas %0   \n"
+                       :
+                       : "r" (start)
+                       : "memory");
+
                start += 2*PAGE_SIZE;
        }
-       sync_is();
+       asm volatile("sync.i\n");
 #else
        {
        unsigned long flags, oldpid;
@@ -121,8 +139,13 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long addr)
        addr &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.vas %0"::"r"(addr | newpid));
        sync_is();
+       asm volatile(
+               "tlbi.vas %0    \n"
+               "sync.i         \n"
+               :
+               : "r" (addr | newpid)
+               : "memory");
 #else
        {
        int oldpid, idx;
@@ -147,8 +170,13 @@ void flush_tlb_one(unsigned long addr)
        addr &= TLB_ENTRY_SIZE_MASK;
 
 #ifdef CONFIG_CPU_HAS_TLBI
-       asm volatile("tlbi.vaas %0"::"r"(addr));
        sync_is();
+       asm volatile(
+               "tlbi.vaas %0   \n"
+               "sync.i         \n"
+               :
+               : "r" (addr)
+               : "memory");
 #else
        {
        int oldpid, idx;
index bc1364d..46b1342 100644 (file)
@@ -112,7 +112,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
 
        childregs = (struct pt_regs *) (THREAD_SIZE + task_stack_page(p)) - 1;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->retpc = (unsigned long) ret_from_kernel_thread;
                childregs->er4 = topstk; /* arg */
index e324f65..f19ae2a 100644 (file)
@@ -1,7 +1,6 @@
 CONFIG_SMP=y
 CONFIG_DEFAULT_MMAP_MIN_ADDR=0
 CONFIG_HZ_100=y
-CONFIG_EXPERIMENTAL=y
 CONFIG_CROSS_COMPILE="hexagon-"
 CONFIG_LOCALVERSION="-smp"
 # CONFIG_LOCALVERSION_AUTO is not set
index 6a980cb..c61165c 100644 (file)
@@ -73,7 +73,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
                                                    sizeof(*ss));
        ss->lr = (unsigned long)ret_from_fork;
        p->thread.switch_sp = ss;
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childregs, 0, sizeof(struct pt_regs));
                /* r24 <- fn, r25 <- arg */
                ss->r24 = usp;
index f3328a2..467b7e7 100644 (file)
@@ -14,7 +14,6 @@
 KBUILD_DEFCONFIG := generic_defconfig
 
 NM := $(CROSS_COMPILE)nm -B
-READELF := $(CROSS_COMPILE)readelf
 
 CHECKFLAGS     += -D__ia64=1 -D__ia64__=1 -D_LP64 -D__LP64__
 
@@ -85,9 +84,3 @@ define archhelp
   echo '  install      - Install compressed kernel image'
   echo '* unwcheck     - Check vmlinux for invalid unwind info'
 endef
-
-archprepare: make_nr_irqs_h
-PHONY += make_nr_irqs_h
-
-make_nr_irqs_h:
-       $(Q)$(MAKE) $(build)=arch/ia64/kernel include/generated/nr-irqs.h
index 5acf52e..0eccf33 100644 (file)
@@ -14,7 +14,9 @@
 
 #include <linux/types.h>
 #include <linux/cpumask.h>
-#include <generated/nr-irqs.h>
+#include <asm/native/irq.h>
+
+#define NR_IRQS                IA64_NATIVE_NR_IRQS
 
 static __inline__ int
 irq_canonicalize (int irq)
index 726df17..0580524 100644 (file)
 
 #if !defined(__ASSEMBLY__)
 
-#include <linux/interrupt.h>
+#include <linux/percpu.h>
+#include <linux/threads.h>
 #include <linux/types.h>
-
-#include <asm/param.h>
-#include <asm/sal.h>
-#include <asm/processor.h>
-#include <asm/mca_asm.h>
+#include <asm/ptrace.h>
 
 #define IA64_MCA_RENDEZ_TIMEOUT                (20 * 1000)     /* value in milliseconds - 20 seconds */
 
@@ -83,7 +80,7 @@ struct ia64_sal_os_state {
        /* common */
        unsigned long           sal_ra;                 /* Return address in SAL, physical */
        unsigned long           sal_gp;                 /* GP of the SAL - physical */
-       pal_min_state_area_t    *pal_min_state;         /* from R17.  physical in asm, virtual in C */
+       struct pal_min_state_area *pal_min_state;       /* from R17.  physical in asm, virtual in C */
        /* Previous values of IA64_KR(CURRENT) and IA64_KR(CURRENT_STACK).
         * Note: if the MCA/INIT recovery code wants to resume to a new context
         * then it must change these values to reflect the new kernel stack.
index f9d2b3b..b1d8795 100644 (file)
@@ -750,7 +750,7 @@ typedef union pal_mc_error_info_u {
  * for PAL.
  */
 
-typedef struct pal_min_state_area_s {
+struct pal_min_state_area {
        u64     pmsa_nat_bits;          /* nat bits for saved GRs  */
        u64     pmsa_gr[15];            /* GR1  - GR15             */
        u64     pmsa_bank0_gr[16];      /* GR16 - GR31             */
@@ -766,7 +766,7 @@ typedef struct pal_min_state_area_s {
        u64     pmsa_xfs;               /* previous ifs            */
        u64     pmsa_br1;               /* branch register 1       */
        u64     pmsa_reserved[70];      /* pal_min_state_area should total to 1KB */
-} pal_min_state_area_t;
+};
 
 
 struct ia64_pal_retval {
index 779b697..9b4efe8 100644 (file)
@@ -517,12 +517,6 @@ extern struct page *zero_page_memmap_ptr;
        __changed;                                                      \
 })
 #endif
-
-#  ifdef CONFIG_VIRTUAL_MEM_MAP
-  /* arch mem_map init routine is needed due to holes in a virtual mem_map */
-    extern void memmap_init (unsigned long size, int nid, unsigned long zone,
-                            unsigned long start_pfn);
-#  endif /* CONFIG_VIRTUAL_MEM_MAP */
 # endif /* !__ASSEMBLY__ */
 
 /*
index 08f5b6a..78f4f7b 100644 (file)
@@ -385,7 +385,7 @@ typedef struct sal_processor_static_info {
                    fr              : 1,
                    reserved        : 58;
        } valid;
-       pal_min_state_area_t min_state_area;
+       struct pal_min_state_area min_state_area;
        u64 br[8];
        u64 cr[128];
        u64 ar[128];
index 6c6f16e..0d23c00 100644 (file)
@@ -32,7 +32,7 @@ static inline void syscall_rollback(struct task_struct *task,
 static inline long syscall_get_error(struct task_struct *task,
                                     struct pt_regs *regs)
 {
-       return regs->r10 == -1 ? regs->r8:0;
+       return regs->r10 == -1 ? -regs->r8:0;
 }
 
 static inline long syscall_get_return_value(struct task_struct *task,
index c89bd5f..7871781 100644 (file)
@@ -47,8 +47,3 @@ CFLAGS_traps.o  += -mfixed-range=f2-f5,f16-f31
 
 # The gate DSO image is built using a special linker script.
 include $(src)/Makefile.gate
-
-include/generated/nr-irqs.h: arch/$(SRCARCH)/kernel/nr-irqs.s FORCE
-       $(call filechk,offsets,__ASM_NR_IRQS_H__)
-
-targets += nr-irqs.s
index fb0deb8..be3b90f 100644 (file)
@@ -245,23 +245,23 @@ void foo(void)
        BLANK();
 
        DEFINE(IA64_PMSA_GR_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_gr));
+              offsetof(struct pal_min_state_area, pmsa_gr));
        DEFINE(IA64_PMSA_BANK1_GR_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_bank1_gr));
+              offsetof(struct pal_min_state_area, pmsa_bank1_gr));
        DEFINE(IA64_PMSA_PR_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_pr));
+              offsetof(struct pal_min_state_area, pmsa_pr));
        DEFINE(IA64_PMSA_BR0_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_br0));
+              offsetof(struct pal_min_state_area, pmsa_br0));
        DEFINE(IA64_PMSA_RSC_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_rsc));
+              offsetof(struct pal_min_state_area, pmsa_rsc));
        DEFINE(IA64_PMSA_IIP_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_iip));
+              offsetof(struct pal_min_state_area, pmsa_iip));
        DEFINE(IA64_PMSA_IPSR_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_ipsr));
+              offsetof(struct pal_min_state_area, pmsa_ipsr));
        DEFINE(IA64_PMSA_IFS_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_ifs));
+              offsetof(struct pal_min_state_area, pmsa_ifs));
        DEFINE(IA64_PMSA_XIP_OFFSET,
-              offsetof (struct pal_min_state_area_s, pmsa_xip));
+              offsetof(struct pal_min_state_area, pmsa_xip));
        BLANK();
 
        /* used by fsys_gettimeofday in arch/ia64/kernel/fsys.S */
index 4f47741..76730f3 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/memblock.h>
 #include <linux/kexec.h>
 #include <linux/elfcore.h>
+#include <linux/reboot.h>
 #include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/kdebug.h>
index dd7fd75..c5fe21d 100644 (file)
@@ -40,6 +40,7 @@
 #include <asm/meminit.h>
 #include <asm/processor.h>
 #include <asm/mca.h>
+#include <asm/sal.h>
 #include <asm/setup.h>
 #include <asm/tlbflush.h>
 
index 8b5b8e6..dd5bfed 100644 (file)
@@ -59,7 +59,7 @@ show_##name(struct device *dev, struct device_attribute *attr,        \
                char *buf)                                              \
 {                                                                      \
        u32 cpu=dev->id;                                                \
-       return sprintf(buf, "%lx\n", name[cpu]);                        \
+       return sprintf(buf, "%llx\n", name[cpu]);                       \
 }
 
 #define store(name)                                                    \
@@ -86,9 +86,9 @@ store_call_start(struct device *dev, struct device_attribute *attr,
 
 #ifdef ERR_INJ_DEBUG
        printk(KERN_DEBUG "pal_mc_err_inject for cpu%d:\n", cpu);
-       printk(KERN_DEBUG "err_type_info=%lx,\n", err_type_info[cpu]);
-       printk(KERN_DEBUG "err_struct_info=%lx,\n", err_struct_info[cpu]);
-       printk(KERN_DEBUG "err_data_buffer=%lx, %lx, %lx.\n",
+       printk(KERN_DEBUG "err_type_info=%llx,\n", err_type_info[cpu]);
+       printk(KERN_DEBUG "err_struct_info=%llx,\n", err_struct_info[cpu]);
+       printk(KERN_DEBUG "err_data_buffer=%llx, %llx, %llx.\n",
                          err_data_buffer[cpu].data1,
                          err_data_buffer[cpu].data2,
                          err_data_buffer[cpu].data3);
@@ -117,8 +117,8 @@ store_call_start(struct device *dev, struct device_attribute *attr,
 
 #ifdef ERR_INJ_DEBUG
        printk(KERN_DEBUG "Returns: status=%d,\n", (int)status[cpu]);
-       printk(KERN_DEBUG "capabilities=%lx,\n", capabilities[cpu]);
-       printk(KERN_DEBUG "resources=%lx\n", resources[cpu]);
+       printk(KERN_DEBUG "capabilities=%llx,\n", capabilities[cpu]);
+       printk(KERN_DEBUG "resources=%llx\n", resources[cpu]);
 #endif
        return size;
 }
@@ -131,7 +131,7 @@ show_virtual_to_phys(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        unsigned int cpu=dev->id;
-       return sprintf(buf, "%lx\n", phys_addr[cpu]);
+       return sprintf(buf, "%llx\n", phys_addr[cpu]);
 }
 
 static ssize_t
@@ -145,7 +145,7 @@ store_virtual_to_phys(struct device *dev, struct device_attribute *attr,
        ret = get_user_pages_fast(virt_addr, 1, FOLL_WRITE, NULL);
        if (ret<=0) {
 #ifdef ERR_INJ_DEBUG
-               printk("Virtual address %lx is not existing.\n",virt_addr);
+               printk("Virtual address %llx is not existing.\n", virt_addr);
 #endif
                return -EINVAL;
        }
@@ -163,7 +163,7 @@ show_err_data_buffer(struct device *dev,
 {
        unsigned int cpu=dev->id;
 
-       return sprintf(buf, "%lx, %lx, %lx\n",
+       return sprintf(buf, "%llx, %llx, %llx\n",
                        err_data_buffer[cpu].data1,
                        err_data_buffer[cpu].data2,
                        err_data_buffer[cpu].data3);
@@ -178,13 +178,13 @@ store_err_data_buffer(struct device *dev,
        int ret;
 
 #ifdef ERR_INJ_DEBUG
-       printk("write err_data_buffer=[%lx,%lx,%lx] on cpu%d\n",
+       printk("write err_data_buffer=[%llx,%llx,%llx] on cpu%d\n",
                 err_data_buffer[cpu].data1,
                 err_data_buffer[cpu].data2,
                 err_data_buffer[cpu].data3,
                 cpu);
 #endif
-       ret=sscanf(buf, "%lx, %lx, %lx",
+       ret = sscanf(buf, "%llx, %llx, %llx",
                        &err_data_buffer[cpu].data1,
                        &err_data_buffer[cpu].data2,
                        &err_data_buffer[cpu].data3);
index 0fea266..adf6521 100644 (file)
@@ -97,6 +97,7 @@
 #include <asm/ptrace.h>
 #include <asm/sal.h>
 #include <asm/mca.h>
+#include <asm/mca_asm.h>
 #include <asm/kexec.h>
 
 #include <asm/irq.h>
@@ -895,7 +896,7 @@ static void
 finish_pt_regs(struct pt_regs *regs, struct ia64_sal_os_state *sos,
                unsigned long *nat)
 {
-       const pal_min_state_area_t *ms = sos->pal_min_state;
+       const struct pal_min_state_area *ms = sos->pal_min_state;
        const u64 *bank;
 
        /* If ipsr.ic then use pmsa_{iip,ipsr,ifs}, else use
@@ -971,7 +972,7 @@ ia64_mca_modify_original_stack(struct pt_regs *regs,
        char *p;
        ia64_va va;
        extern char ia64_leave_kernel[];        /* Need asm address, not function descriptor */
-       const pal_min_state_area_t *ms = sos->pal_min_state;
+       const struct pal_min_state_area *ms = sos->pal_min_state;
        struct task_struct *previous_current;
        struct pt_regs *old_regs;
        struct switch_stack *old_sw;
@@ -1823,7 +1824,7 @@ ia64_mca_cpu_init(void *cpu_data)
                        data = mca_bootmem();
                        first_time = 0;
                } else
-                       data = (void *)__get_free_pages(GFP_KERNEL,
+                       data = (void *)__get_free_pages(GFP_ATOMIC,
                                                        get_order(sz));
                if (!data)
                        panic("Could not allocate MCA memory for cpu %d\n",
index 4d0ab32..36a69b4 100644 (file)
@@ -496,7 +496,7 @@ recover_from_read_error(slidx_table_t *slidx,
                        struct ia64_sal_os_state *sos)
 {
        u64 target_identifier;
-       pal_min_state_area_t *pmsa;
+       struct pal_min_state_area *pmsa;
        struct ia64_psr *psr1, *psr2;
        ia64_fptr_t *mca_hdlr_bh = (ia64_fptr_t*)mca_handler_bhhook;
 
diff --git a/arch/ia64/kernel/nr-irqs.c b/arch/ia64/kernel/nr-irqs.c
deleted file mode 100644 (file)
index f2633b2..0000000
+++ /dev/null
@@ -1,22 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * calculate
- * NR_IRQS = max(IA64_NATIVE_NR_IRQS, XEN_NR_IRQS, FOO_NR_IRQS...)
- * depending on config.
- * This must be calculated before processing asm-offset.c.
- */
-
-#define ASM_OFFSETS_C 1
-
-#include <linux/kbuild.h>
-#include <linux/threads.h>
-#include <asm/native/irq.h>
-
-void foo(void)
-{
-       union paravirt_nr_irqs_max {
-               char ia64_native_nr_irqs[IA64_NATIVE_NR_IRQS];
-       };
-
-       DEFINE(NR_IRQS, sizeof (union paravirt_nr_irqs_max));
-}
index 4ebbfa0..7e1a152 100644 (file)
@@ -338,7 +338,7 @@ copy_thread(unsigned long clone_flags, unsigned long user_stack_base,
 
        ia64_drop_fpu(p);       /* don't pick up stale state from a CPU's fph */
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                if (unlikely(!user_stack_base)) {
                        /* fork_idle() called us */
                        return 0;
index c3490ee..e14f565 100644 (file)
@@ -2013,27 +2013,39 @@ static void syscall_get_set_args_cb(struct unw_frame_info *info, void *data)
 {
        struct syscall_get_set_args *args = data;
        struct pt_regs *pt = args->regs;
-       unsigned long *krbs, cfm, ndirty;
+       unsigned long *krbs, cfm, ndirty, nlocals, nouts;
        int i, count;
 
        if (unw_unwind_to_user(info) < 0)
                return;
 
+       /*
+        * We get here via a few paths:
+        * - break instruction: cfm is shared with caller.
+        *   syscall args are in out= regs, locals are non-empty.
+        * - epsinstruction: cfm is set by br.call
+        *   locals don't exist.
+        *
+        * For both cases argguments are reachable in cfm.sof - cfm.sol.
+        * CFM: [ ... | sor: 17..14 | sol : 13..7 | sof : 6..0 ]
+        */
        cfm = pt->cr_ifs;
+       nlocals = (cfm >> 7) & 0x7f; /* aka sol */
+       nouts = (cfm & 0x7f) - nlocals; /* aka sof - sol */
        krbs = (unsigned long *)info->task + IA64_RBS_OFFSET/8;
        ndirty = ia64_rse_num_regs(krbs, krbs + (pt->loadrs >> 19));
 
        count = 0;
        if (in_syscall(pt))
-               count = min_t(int, args->n, cfm & 0x7f);
+               count = min_t(int, args->n, nouts);
 
+       /* Iterate over outs. */
        for (i = 0; i < count; i++) {
+               int j = ndirty + nlocals + i + args->i;
                if (args->rw)
-                       *ia64_rse_skip_regs(krbs, ndirty + i + args->i) =
-                               args->args[i];
+                       *ia64_rse_skip_regs(krbs, j) = args->args[i];
                else
-                       args->args[i] = *ia64_rse_skip_regs(krbs,
-                               ndirty + i + args->i);
+                       args->args[i] = *ia64_rse_skip_regs(krbs, j);
        }
 
        if (!args->rw) {
index e67b22f..c1b2997 100644 (file)
@@ -341,7 +341,8 @@ ia64_do_signal (struct sigscratch *scr, long in_syscall)
         * need to push through a forced SIGSEGV.
         */
        while (1) {
-               get_signal(&ksig);
+               if (!get_signal(&ksig))
+                       break;
 
                /*
                 * get_signal() may have run a debugger (via notify_parent())
index 813a58c..bf4bda0 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -22,19 +22,20 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_offset_$(basetarget))'
 
 syshdr_offset_unistd_64 := __NR_Linux
-$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 systbl_offset_syscall_table := 1024
-$(kapi)/syscall_table.h: $(syscall) $(systbl)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_64.h
 kapisyshdr-y           += syscall_table.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index bfc00f2..d892311 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index b19f47a..16d0d7d 100644 (file)
@@ -536,18 +536,20 @@ virtual_memmap_init(u64 start, u64 end, void *arg)
                    / sizeof(struct page));
 
        if (map_start < map_end)
-               memmap_init_zone((unsigned long)(map_end - map_start),
+               memmap_init_range((unsigned long)(map_end - map_start),
                                 args->nid, args->zone, page_to_pfn(map_start), page_to_pfn(map_end),
                                 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
        return 0;
 }
 
-void __meminit
-memmap_init (unsigned long size, int nid, unsigned long zone,
-            unsigned long start_pfn)
+void __meminit memmap_init_zone(struct zone *zone)
 {
+       int nid = zone_to_nid(zone), zone_id = zone_idx(zone);
+       unsigned long start_pfn = zone->zone_start_pfn;
+       unsigned long size = zone->spanned_pages;
+
        if (!vmem_map) {
-               memmap_init_zone(size, nid, zone, start_pfn, start_pfn + size,
+               memmap_init_range(size, nid, zone_id, start_pfn, start_pfn + size,
                                 MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
        } else {
                struct page *start;
@@ -557,7 +559,7 @@ memmap_init (unsigned long size, int nid, unsigned long zone,
                args.start = start;
                args.end = start + size;
                args.nid = nid;
-               args.zone = zone;
+               args.zone = zone_id;
 
                efi_memmap_walk(virtual_memmap_init, &args);
        }
index 7bc666e..076a9ca 100644 (file)
@@ -90,6 +90,10 @@ EXPORT_SYMBOL(clk_get);
 int clk_enable(struct clk *clk)
 {
        unsigned long flags;
+
+       if (!clk)
+               return -EINVAL;
+
        spin_lock_irqsave(&clk_lock, flags);
        if ((clk->enabled++ == 0) && clk->clk_ops)
                clk->clk_ops->enable(clk);
index 7f5912a..9e8f0cc 100644 (file)
@@ -171,7 +171,7 @@ static inline __attribute_const__ int __virt_to_node_shift(void)
 #include <asm-generic/memory_model.h>
 #endif
 
-#define virt_addr_valid(kaddr) ((void *)(kaddr) >= (void *)PAGE_OFFSET && (void *)(kaddr) < high_memory)
+#define virt_addr_valid(kaddr) ((unsigned long)(kaddr) >= PAGE_OFFSET && (unsigned long)(kaddr) < (unsigned long)high_memory)
 #define pfn_valid(pfn)         virt_addr_valid(pfn_to_virt(pfn))
 
 #endif /* __ASSEMBLY__ */
index 6bbe520..8d0f862 100644 (file)
@@ -30,8 +30,8 @@ extern unsigned long memory_end;
 #define page_to_pfn(page)      virt_to_pfn(page_to_virt(page))
 #define pfn_valid(pfn)         ((pfn) < max_mapnr)
 
-#define        virt_addr_valid(kaddr)  (((void *)(kaddr) >= (void *)PAGE_OFFSET) && \
-                               ((void *)(kaddr) < (void *)memory_end))
+#define        virt_addr_valid(kaddr)  (((unsigned long)(kaddr) >= PAGE_OFFSET) && \
+                               ((unsigned long)(kaddr) < memory_end))
 
 #endif /* __ASSEMBLY__ */
 
index 08359a6..da83cc8 100644 (file)
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
         */
        p->thread.fs = get_fs().seg;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
                memset(frame, 0, sizeof(struct fork_frame));
                frame->regs.sr = PS_S;
index 659faef..285aaba 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -21,18 +21,19 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_abi_$(basetarget))'                \
                   '$(systbl_offset_$(basetarget))'
 
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-$(kapi)/syscall_table.h: $(syscall) $(systbl)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h
 kapisyshdr-y           += syscall_table.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 7fe4e45..72bde67 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index 25a5a3f..0660f47 100644 (file)
@@ -38,7 +38,6 @@ config MICROBLAZE
        select OF_EARLY_FLATTREE
        select PCI_DOMAINS_GENERIC if PCI
        select PCI_SYSCALL if PCI
-       select TRACING_SUPPORT
        select VIRT_TO_BUS
        select CPU_NO_EFFICIENT_FFS
        select MMU_GATHER_NO_RANGE
index 9f12e3c..e5db3a5 100644 (file)
@@ -24,9 +24,6 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
        Elf32_Sym *sym;
        unsigned long int *location;
        unsigned long int value;
-#if __GNUC__ < 4
-       unsigned long int old_value;
-#endif
 
        pr_debug("Applying add relocation section %u to %u\n",
                relsec, sechdrs[relsec].sh_info);
@@ -49,40 +46,17 @@ int apply_relocate_add(Elf32_Shdr *sechdrs, const char *strtab,
                 */
 
                case R_MICROBLAZE_32:
-#if __GNUC__ < 4
-                       old_value = *location;
-                       *location = value + old_value;
-
-                       pr_debug("R_MICROBLAZE_32 (%08lx->%08lx)\n",
-                               old_value, value);
-#else
                        *location = value;
-#endif
                        break;
 
                case R_MICROBLAZE_64:
-#if __GNUC__ < 4
-                       /* Split relocs only required/used pre gcc4.1.1 */
-                       old_value = ((location[0] & 0x0000FFFF) << 16) |
-                                       (location[1] & 0x0000FFFF);
-                       value += old_value;
-#endif
                        location[0] = (location[0] & 0xFFFF0000) |
                                        (value >> 16);
                        location[1] = (location[1] & 0xFFFF0000) |
                                        (value & 0xFFFF);
-#if __GNUC__ < 4
-                       pr_debug("R_MICROBLAZE_64 (%08lx->%08lx)\n",
-                               old_value, value);
-#endif
                        break;
 
                case R_MICROBLAZE_64_PCREL:
-#if __GNUC__ < 4
-                       old_value = (location[0] & 0xFFFF) << 16 |
-                               (location[1] & 0xFFFF);
-                       value -= old_value;
-#endif
                        value -= (unsigned long int)(location) + 4;
                        location[0] = (location[0] & 0xFFFF0000) |
                                        (value >> 16);
index 657c2be..62aa237 100644 (file)
@@ -59,7 +59,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
        struct pt_regs *childregs = task_pt_regs(p);
        struct thread_info *ti = task_thread_info(p);
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* if we're creating a new kernel thread then just zeroing all
                 * the registers. That's OK for a brand new thread.*/
                memset(childregs, 0, sizeof(struct pt_regs));
index 659faef..285aaba 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -21,18 +21,19 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_abi_$(basetarget))'                \
                   '$(systbl_offset_$(basetarget))'
 
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-$(kapi)/syscall_table.h: $(syscall) $(systbl)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h
 kapisyshdr-y           += syscall_table.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index a522adf..d603a5e 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index df07b3d..fb31747 100644 (file)
@@ -45,7 +45,7 @@ SECTIONS {
                _etext = . ;
        }
 
-       . = ALIGN (4) ;
+       . = ALIGN (8) ;
        __fdt_blob : AT(ADDR(__fdt_blob) - LOAD_OFFSET) {
                _fdt_start = . ;                /* place for fdt blob */
                *(__fdt_blob) ;                 /* Any link-placed DTB */
index 95f8f10..31bcfa4 100644 (file)
@@ -196,4 +196,4 @@ static int __init plat_dev_init(void)
        return 0;
 }
 
-device_initcall(plat_dev_init);
+arch_initcall(plat_dev_init);
index e3946b0..3d70d15 100644 (file)
@@ -14,6 +14,7 @@
 
 #include <asm/addrspace.h>
 #include <asm/unaligned.h>
+#include <asm-generic/vmlinux.lds.h>
 
 /*
  * These two variables specify the free mem region
@@ -120,6 +121,13 @@ void decompress_kernel(unsigned long boot_heap_start)
                /* last four bytes is always image size in little endian */
                image_size = get_unaligned_le32((void *)&__image_end - 4);
 
+               /* The device tree's address must be properly aligned  */
+               image_size = ALIGN(image_size, STRUCT_ALIGNMENT);
+
+               puts("Copy device tree to address  ");
+               puthex(VMLINUX_LOAD_ADDRESS_ULL + image_size);
+               puts("\n");
+
                /* copy dtb to where the booted kernel will expect it */
                memcpy((void *)VMLINUX_LOAD_ADDRESS_ULL + image_size,
                       __appended_dtb, dtb_size);
index 72a211d..32c2906 100644 (file)
@@ -549,7 +549,6 @@ CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_FRAME_WARN=1024
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
index 4ecb157..bf9b924 100644 (file)
@@ -500,7 +500,6 @@ CONFIG_CRC7=m
 CONFIG_PRINTK_TIME=y
 CONFIG_DEBUG_INFO=y
 # CONFIG_ENABLE_MUST_CHECK is not set
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_DETECT_HUNG_TASK=y
 CONFIG_SCHEDSTATS=y
index 8e1deaf..5e4105c 100644 (file)
@@ -12,8 +12,8 @@ AFLAGS_chacha-core.o += -O2 # needed to fill branch delay slots
 obj-$(CONFIG_CRYPTO_POLY1305_MIPS) += poly1305-mips.o
 poly1305-mips-y := poly1305-core.o poly1305-glue.o
 
-perlasm-flavour-$(CONFIG_CPU_MIPS32) := o32
-perlasm-flavour-$(CONFIG_CPU_MIPS64) := 64
+perlasm-flavour-$(CONFIG_32BIT) := o32
+perlasm-flavour-$(CONFIG_64BIT) := 64
 
 quiet_cmd_perlasm = PERLASM $@
       cmd_perlasm = $(PERL) $(<) $(perlasm-flavour-y) $(@)
index 6aa8f12..b710e76 100644 (file)
@@ -24,8 +24,11 @@ extern void (*board_ebase_setup)(void);
 extern void (*board_cache_error_setup)(void);
 
 extern int register_nmi_notifier(struct notifier_block *nb);
+extern void reserve_exception_space(phys_addr_t addr, unsigned long size);
 extern char except_vec_nmi[];
 
+#define VECTORSPACING 0x100    /* for EI/VI mode */
+
 #define nmi_notifier(fn, pri)                                          \
 ({                                                                     \
        static struct notifier_block fn##_nb = {                        \
index 9a89637..b718920 100644 (file)
@@ -26,6 +26,7 @@
 #include <asm/elf.h>
 #include <asm/pgtable-bits.h>
 #include <asm/spram.h>
+#include <asm/traps.h>
 #include <linux/uaccess.h>
 
 #include "fpu-probe.h"
@@ -1628,6 +1629,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
                c->cputype = CPU_BMIPS3300;
                __cpu_name[cpu] = "Broadcom BMIPS3300";
                set_elf_platform(cpu, "bmips3300");
+               reserve_exception_space(0x400, VECTORSPACING * 64);
                break;
        case PRID_IMP_BMIPS43XX: {
                int rev = c->processor_id & PRID_REV_MASK;
@@ -1638,6 +1640,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
                        __cpu_name[cpu] = "Broadcom BMIPS4380";
                        set_elf_platform(cpu, "bmips4380");
                        c->options |= MIPS_CPU_RIXI;
+                       reserve_exception_space(0x400, VECTORSPACING * 64);
                } else {
                        c->cputype = CPU_BMIPS4350;
                        __cpu_name[cpu] = "Broadcom BMIPS4350";
@@ -1654,6 +1657,7 @@ static inline void cpu_probe_broadcom(struct cpuinfo_mips *c, unsigned int cpu)
                        __cpu_name[cpu] = "Broadcom BMIPS5000";
                set_elf_platform(cpu, "bmips5000");
                c->options |= MIPS_CPU_ULRI | MIPS_CPU_RIXI;
+               reserve_exception_space(0x1000, VECTORSPACING * 64);
                break;
        }
 }
@@ -2133,6 +2137,8 @@ void cpu_probe(void)
        if (cpu == 0)
                __ua_limit = ~((1ull << cpu_vmbits) - 1);
 #endif
+
+       reserve_exception_space(0, 0x1000);
 }
 
 void cpu_report(void)
index abdbbe8..af65477 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/fpu.h>
 #include <asm/mipsregs.h>
 #include <asm/elf.h>
+#include <asm/traps.h>
 
 #include "fpu-probe.h"
 
@@ -158,6 +159,8 @@ void cpu_probe(void)
                cpu_set_fpu_opts(c);
        else
                cpu_set_nofpu_opts(c);
+
+       reserve_exception_space(0, 0x400);
 }
 
 void cpu_report(void)
index af4c862..7efa0d1 100644 (file)
@@ -120,7 +120,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        /*  Put the stack after the struct pt_regs.  */
        childksp = (unsigned long) childregs;
        p->thread.cp0_status = (read_c0_status() & ~(ST0_CU2|ST0_CU1)) | ST0_KERNEL_CUMASK;
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
                unsigned long status = p->thread.cp0_status;
                memset(childregs, 0, sizeof(struct pt_regs));
index 1ff19f1..35729c9 100644 (file)
@@ -18,7 +18,7 @@
 static char bug64hit[] __initdata =
        "reliable operation impossible!\n%s";
 static char nowar[] __initdata =
-       "Please report to <linux-mips@linux-mips.org>.";
+       "Please report to <linux-mips@vger.kernel.org>.";
 static char r4kwar[] __initdata =
        "Enable CPU_R4000_WORKAROUNDS to rectify.";
 static char daddiwar[] __initdata =
index 279be01..23a1403 100644 (file)
@@ -43,7 +43,7 @@
 #include <asm/prom.h>
 
 #ifdef CONFIG_MIPS_ELF_APPENDED_DTB
-const char __section(".appended_dtb") __appended_dtb[0x100000];
+char __section(".appended_dtb") __appended_dtb[0x100000];
 #endif /* CONFIG_MIPS_ELF_APPENDED_DTB */
 
 struct cpuinfo_mips cpu_data[NR_CPUS] __read_mostly;
index a1ce8b7..51f8b80 100644 (file)
@@ -5,9 +5,9 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscalln32 := $(srctree)/$(src)/syscall_n32.tbl
-syscalln64 := $(srctree)/$(src)/syscall_n64.tbl
-syscallo32 := $(srctree)/$(src)/syscall_o32.tbl
+syscalln32 := $(src)/syscall_n32.tbl
+syscalln64 := $(src)/syscall_n64.tbl
+syscallo32 := $(src)/syscall_o32.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 sysnr := $(srctree)/$(src)/syscallnr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
@@ -31,50 +31,50 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_offset_$(basetarget))'
 
 syshdr_offset_unistd_n32 := __NR_Linux
-$(uapi)/unistd_n32.h: $(syscalln32) $(syshdr)
+$(uapi)/unistd_n32.h: $(syscalln32) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_offset_unistd_n64 := __NR_Linux
-$(uapi)/unistd_n64.h: $(syscalln64) $(syshdr)
+$(uapi)/unistd_n64.h: $(syscalln64) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_offset_unistd_o32 := __NR_Linux
-$(uapi)/unistd_o32.h: $(syscallo32) $(syshdr)
+$(uapi)/unistd_o32.h: $(syscallo32) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 sysnr_pfx_unistd_nr_n32 := N32
 sysnr_offset_unistd_nr_n32 := 6000
-$(kapi)/unistd_nr_n32.h: $(syscalln32) $(sysnr)
+$(kapi)/unistd_nr_n32.h: $(syscalln32) $(sysnr) FORCE
        $(call if_changed,sysnr)
 
 sysnr_pfx_unistd_nr_n64 := 64
 sysnr_offset_unistd_nr_n64 := 5000
-$(kapi)/unistd_nr_n64.h: $(syscalln64) $(sysnr)
+$(kapi)/unistd_nr_n64.h: $(syscalln64) $(sysnr) FORCE
        $(call if_changed,sysnr)
 
 sysnr_pfx_unistd_nr_o32 := O32
 sysnr_offset_unistd_nr_o32 := 4000
-$(kapi)/unistd_nr_o32.h: $(syscallo32) $(sysnr)
+$(kapi)/unistd_nr_o32.h: $(syscallo32) $(sysnr) FORCE
        $(call if_changed,sysnr)
 
 systbl_abi_syscall_table_32_o32 := 32_o32
 systbl_offset_syscall_table_32_o32 := 4000
-$(kapi)/syscall_table_32_o32.h: $(syscallo32) $(systbl)
+$(kapi)/syscall_table_32_o32.h: $(syscallo32) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abi_syscall_table_64_n32 := 64_n32
 systbl_offset_syscall_table_64_n32 := 6000
-$(kapi)/syscall_table_64_n32.h: $(syscalln32) $(systbl)
+$(kapi)/syscall_table_64_n32.h: $(syscalln32) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abi_syscall_table_64_n64 := 64_n64
 systbl_offset_syscall_table_64_n64 := 5000
-$(kapi)/syscall_table_64_n64.h: $(syscalln64) $(systbl)
+$(kapi)/syscall_table_64_n64.h: $(syscalln64) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abi_syscall_table_64_o32 := 64_o32
 systbl_offset_syscall_table_64_o32 := 4000
-$(kapi)/syscall_table_64_o32.h: $(syscallo32) $(systbl)
+$(kapi)/syscall_table_64_o32.h: $(syscallo32) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_n32.h                 \
@@ -88,9 +88,10 @@ kapisyshdr-y         += syscall_table_32_o32.h       \
                           unistd_nr_n64.h              \
                           unistd_nr_o32.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 0f03ad2..8fd8c17 100644 (file)
 439    n32     faccessat2                      sys_faccessat2
 440    n32     process_madvise                 sys_process_madvise
 441    n32     epoll_pwait2                    compat_sys_epoll_pwait2
+442    n32     mount_setattr                   sys_mount_setattr
index 9164969..169f214 100644 (file)
 439    n64     faccessat2                      sys_faccessat2
 440    n64     process_madvise                 sys_process_madvise
 441    n64     epoll_pwait2                    sys_epoll_pwait2
+442    n64     mount_setattr                   sys_mount_setattr
index 4bad0c4..090d29c 100644 (file)
 439    o32     faccessat2                      sys_faccessat2
 440    o32     process_madvise                 sys_process_madvise
 441    o32     epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
+442    o32     mount_setattr                   sys_mount_setattr
index e035295..808b8b6 100644 (file)
@@ -2009,13 +2009,16 @@ void __noreturn nmi_exception_handler(struct pt_regs *regs)
        nmi_exit();
 }
 
-#define VECTORSPACING 0x100    /* for EI/VI mode */
-
 unsigned long ebase;
 EXPORT_SYMBOL_GPL(ebase);
 unsigned long exception_handlers[32];
 unsigned long vi_handlers[64];
 
+void reserve_exception_space(phys_addr_t addr, unsigned long size)
+{
+       memblock_reserve(addr, size);
+}
+
 void __init *set_except_vector(int n, void *addr)
 {
        unsigned long handler = (unsigned long) addr;
@@ -2367,10 +2370,7 @@ void __init trap_init(void)
 
        if (!cpu_has_mips_r2_r6) {
                ebase = CAC_BASE;
-               ebase_pa = virt_to_phys((void *)ebase);
                vec_size = 0x400;
-
-               memblock_reserve(ebase_pa, vec_size);
        } else {
                if (cpu_has_veic || cpu_has_vint)
                        vec_size = 0x200 + VECTORSPACING*64;
index c1c345b..1f98947 100644 (file)
@@ -145,6 +145,7 @@ SECTIONS
        }
 
 #ifdef CONFIG_MIPS_ELF_APPENDED_DTB
+       STRUCT_ALIGN();
        .appended_dtb : AT(ADDR(.appended_dtb) - LOAD_OFFSET) {
                *(.appended_dtb)
                KEEP(*(.appended_dtb))
@@ -172,6 +173,11 @@ SECTIONS
 #endif
 
 #ifdef CONFIG_MIPS_RAW_APPENDED_DTB
+       .fill : {
+               FILL(0);
+               BYTE(0);
+               STRUCT_ALIGN();
+       }
        __appended_dtb = .;
        /* leave space for appended DTB */
        . += 0x100000;
index 210f5a9..a9cb288 100644 (file)
@@ -32,7 +32,7 @@ void __iomem *__pci_ioport_map(struct pci_dev *dev,
                sprintf(name, "%04x:%02x", pci_domain_nr(bus), bus->number);
                printk(KERN_WARNING "io_map_base of root PCI bus %s unset.  "
                       "Trying to continue but you better\nfix this issue or "
-                      "report it to linux-mips@linux-mips.org or your "
+                      "report it to linux-mips@vger.kernel.org or your "
                       "vendor.\n", name);
 #ifdef CONFIG_PCI_DOMAINS
                panic("To avoid data corruption io_map_base MUST be set with "
index e2354e1..3e660d6 100644 (file)
@@ -13,7 +13,7 @@ cflags-$(CONFIG_CPU_LOONGSON64)       += -Wa,--trap
 # can't easily be used safely within the kbuild framework.
 #
 ifeq ($(call cc-ifversion, -ge, 0409, y), y)
-  ifeq ($(call ld-ifversion, -ge, 225000000, y), y)
+  ifeq ($(call ld-ifversion, -ge, 22500, y), y)
     cflags-$(CONFIG_CPU_LOONGSON64)  += \
       $(call cc-option,-march=loongson3a -U_MIPS_ISA -D_MIPS_ISA=_MIPS_ISA_MIPS64)
   else
index 1754498..7719d63 100644 (file)
@@ -157,29 +157,31 @@ unsigned long _page_cachable_default;
 EXPORT_SYMBOL(_page_cachable_default);
 
 #define PM(p)  __pgprot(_page_cachable_default | (p))
+#define PVA(p) PM(_PAGE_VALID | _PAGE_ACCESSED | (p))
 
 static inline void setup_protection_map(void)
 {
        protection_map[0]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-       protection_map[1]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
-       protection_map[2]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-       protection_map[3]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
-       protection_map[4]  = PM(_PAGE_PRESENT);
-       protection_map[5]  = PM(_PAGE_PRESENT);
-       protection_map[6]  = PM(_PAGE_PRESENT);
-       protection_map[7]  = PM(_PAGE_PRESENT);
+       protection_map[1]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
+       protection_map[2]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
+       protection_map[3]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
+       protection_map[4]  = PVA(_PAGE_PRESENT);
+       protection_map[5]  = PVA(_PAGE_PRESENT);
+       protection_map[6]  = PVA(_PAGE_PRESENT);
+       protection_map[7]  = PVA(_PAGE_PRESENT);
 
        protection_map[8]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_NO_READ);
-       protection_map[9]  = PM(_PAGE_PRESENT | _PAGE_NO_EXEC);
-       protection_map[10] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
+       protection_map[9]  = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC);
+       protection_map[10] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE |
                                _PAGE_NO_READ);
-       protection_map[11] = PM(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
-       protection_map[12] = PM(_PAGE_PRESENT);
-       protection_map[13] = PM(_PAGE_PRESENT);
-       protection_map[14] = PM(_PAGE_PRESENT | _PAGE_WRITE);
-       protection_map[15] = PM(_PAGE_PRESENT | _PAGE_WRITE);
+       protection_map[11] = PVA(_PAGE_PRESENT | _PAGE_NO_EXEC | _PAGE_WRITE);
+       protection_map[12] = PVA(_PAGE_PRESENT);
+       protection_map[13] = PVA(_PAGE_PRESENT);
+       protection_map[14] = PVA(_PAGE_PRESENT);
+       protection_map[15] = PVA(_PAGE_PRESENT);
 }
 
+#undef _PVA
 #undef PM
 
 void cpu_cache_init(void)
index bd4b065..61891af 100644 (file)
@@ -45,7 +45,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                pmd_t *pmdp, pmd_t pmd)
 {
        *pmdp = pmd;
-       flush_tlb_all();
 }
 #endif /* defined(CONFIG_TRANSPARENT_HUGEPAGE) */
 
index 183ff9f..7536f78 100644 (file)
@@ -100,7 +100,6 @@ void set_pmd_at(struct mm_struct *mm, unsigned long addr,
                pmd_t *pmdp, pmd_t pmd)
 {
        *pmdp = pmd;
-       flush_tlb_all();
 }
 
 void __init pagetable_init(void)
index 1bbd5bf..e21ea1d 100644 (file)
@@ -343,7 +343,7 @@ static void ip32_unknown_interrupt(void)
        printk("Register dump:\n");
        show_regs(get_irq_regs());
 
-       printk("Please mail this report to linux-mips@linux-mips.org\n");
+       printk("Please mail this report to linux-mips@vger.kernel.org\n");
        printk("Spinning...");
        while(1) ;
 }
index 7aec721..a665f61 100644 (file)
@@ -12,7 +12,7 @@
 # the lack of relocations. As such, we disable the VDSO for microMIPS builds.
 
 config MIPS_LD_CAN_LINK_VDSO
-       def_bool LD_VERSION >= 225000000 || LD_IS_LLD
+       def_bool LD_VERSION >= 22500 || LD_IS_LLD
 
 config MIPS_DISABLE_VDSO
        def_bool CPU_MICROMIPS || (!CPU_MIPSR6 && !MIPS_LD_CAN_LINK_VDSO)
index 40313a6..f9a89cf 100644 (file)
@@ -1,4 +1,3 @@
-CONFIG_CROSS_COMPILE="nds32le-linux-"
 CONFIG_SYSVIPC=y
 CONFIG_POSIX_MQUEUE=y
 CONFIG_HIGH_RES_TIMERS=y
index e01ad5d..c1327e5 100644 (file)
@@ -156,7 +156,7 @@ int copy_thread(unsigned long clone_flags, unsigned long stack_start,
 
        memset(&p->thread.cpu_context, 0, sizeof(struct cpu_context));
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childregs, 0, sizeof(struct pt_regs));
                /* kernel thread fn */
                p->thread.cpu_context.r6 = stack_start;
index c356e48..af82e99 100644 (file)
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(elf_hwcap);
 
 /*
  * The following string table, must sync with HWCAP_xx bitmask,
- * which is defined in <asm/procinfo.h>
+ * which is defined above
  */
 static const char *hwcap_str[] = {
        "mfusr_pc",
index ac9d78c..574a3d0 100644 (file)
@@ -2,7 +2,7 @@
 // Copyright (C) 2005-2017 Andes Technology Corporation
 
 #include <linux/clocksource.h>
-#include <linux/clk-provider.h>
+#include <linux/of_clk.h>
 
 void __init time_init(void)
 {
index 6a9772b..ee0d9ae 100644 (file)
@@ -25,17 +25,8 @@ extern void show_pte(struct mm_struct *mm, unsigned long addr);
 void dump_mem(const char *lvl, unsigned long bottom, unsigned long top)
 {
        unsigned long first;
-       mm_segment_t fs;
        int i;
 
-       /*
-        * We need to switch to kernel mode so that we can use __get_user
-        * to safely read from kernel space.  Note that we now dump the
-        * code first, just in case the backtrace kills us.
-        */
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-
        pr_emerg("%s(0x%08lx to 0x%08lx)\n", lvl, bottom, top);
 
        for (first = bottom & ~31; first < top; first += 32) {
@@ -48,7 +39,9 @@ void dump_mem(const char *lvl, unsigned long bottom, unsigned long top)
                for (p = first, i = 0; i < 8 && p < top; i++, p += 4) {
                        if (p >= bottom && p < top) {
                                unsigned long val;
-                               if (__get_user(val, (unsigned long *)p) == 0)
+
+                               if (get_kernel_nofault(val,
+                                               (unsigned long *)p) == 0)
                                        sprintf(str + i * 9, " %08lx", val);
                                else
                                        sprintf(str + i * 9, " ????????");
@@ -56,46 +49,10 @@ void dump_mem(const char *lvl, unsigned long bottom, unsigned long top)
                }
                pr_emerg("%s%04lx:%s\n", lvl, first & 0xffff, str);
        }
-
-       set_fs(fs);
 }
 
 EXPORT_SYMBOL(dump_mem);
 
-static void dump_instr(struct pt_regs *regs)
-{
-       unsigned long addr = instruction_pointer(regs);
-       mm_segment_t fs;
-       char str[sizeof("00000000 ") * 5 + 2 + 1], *p = str;
-       int i;
-
-       return;
-       /*
-        * We need to switch to kernel mode so that we can use __get_user
-        * to safely read from kernel space.  Note that we now dump the
-        * code first, just in case the backtrace kills us.
-        */
-       fs = get_fs();
-       set_fs(KERNEL_DS);
-
-       pr_emerg("Code: ");
-       for (i = -4; i < 1; i++) {
-               unsigned int val, bad;
-
-               bad = __get_user(val, &((u32 *) addr)[i]);
-
-               if (!bad) {
-                       p += sprintf(p, i == 0 ? "(%08x) " : "%08x ", val);
-               } else {
-                       p += sprintf(p, "bad PC value");
-                       break;
-               }
-       }
-       pr_emerg("Code: %s\n", str);
-
-       set_fs(fs);
-}
-
 #define LOOP_TIMES (100)
 static void __dump(struct task_struct *tsk, unsigned long *base_reg,
                   const char *loglvl)
@@ -179,7 +136,6 @@ void die(const char *str, struct pt_regs *regs, int err)
 
        if (!user_mode(regs) || in_interrupt()) {
                dump_mem("Stack: ", regs->sp, (regs->sp + PAGE_SIZE) & PAGE_MASK);
-               dump_instr(regs);
                dump_stack();
        }
 
index da84424..0794cd7 100644 (file)
@@ -389,7 +389,10 @@ ENTRY(ret_from_interrupt)
  */
 ENTRY(sys_clone)
        SAVE_SWITCH_STACK
+       subi    sp, sp, 4 /* make space for tls pointer */
+       stw     r8, 0(sp) /* pass tls pointer (r8) via stack (5th argument) */
        call    nios2_clone
+       addi    sp, sp, 4
        RESTORE_SWITCH_STACK
        ret
 
index 50b4eb1..c5f916c 100644 (file)
@@ -109,7 +109,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
        struct switch_stack *childstack =
                ((struct switch_stack *)childregs) - 1;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childstack, 0,
                        sizeof(struct switch_stack) + sizeof(struct pt_regs));
 
index 3c6e3c8..d2f2195 100644 (file)
@@ -32,8 +32,6 @@ EXPORT_SYMBOL(memory_start);
 unsigned long memory_end;
 EXPORT_SYMBOL(memory_end);
 
-unsigned long memory_size;
-
 static struct pt_regs fake_regs = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
                                        0, 0, 0, 0, 0, 0,
                                        0};
@@ -141,16 +139,22 @@ asmlinkage void __init nios2_boot_init(unsigned r4, unsigned r5, unsigned r6,
        parse_early_param();
 }
 
+static void __init find_limits(unsigned long *min, unsigned long *max_low,
+                              unsigned long *max_high)
+{
+       *max_low = PFN_DOWN(memblock_get_current_limit());
+       *min = PFN_UP(memblock_start_of_DRAM());
+       *max_high = PFN_DOWN(memblock_end_of_DRAM());
+}
+
 void __init setup_arch(char **cmdline_p)
 {
        int dram_start;
 
        console_verbose();
 
-       dram_start = memblock_start_of_DRAM();
-       memory_size = memblock_phys_mem_size();
-       memory_start = PAGE_ALIGN((unsigned long)__pa(_end));
-       memory_end = (unsigned long) CONFIG_NIOS2_MEM_BASE + memory_size;
+       memory_start = memblock_start_of_DRAM();
+       memory_end = memblock_end_of_DRAM();
 
        init_mm.start_code = (unsigned long) _stext;
        init_mm.end_code = (unsigned long) _etext;
@@ -161,11 +165,10 @@ void __init setup_arch(char **cmdline_p)
        /* Keep a copy of command line */
        *cmdline_p = boot_command_line;
 
-       min_low_pfn = PFN_UP(memory_start);
-       max_low_pfn = PFN_DOWN(memory_end);
+       find_limits(&min_low_pfn, &max_low_pfn, &max_pfn);
        max_mapnr = max_low_pfn;
 
-       memblock_reserve(dram_start, memory_start - dram_start);
+       memblock_reserve(__pa_symbol(_stext), _end - _stext);
 #ifdef CONFIG_BLK_DEV_INITRD
        if (initrd_start) {
                memblock_reserve(virt_to_phys((void *)initrd_start),
index cd390ec..b1ca856 100644 (file)
@@ -22,6 +22,7 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len,
                                unsigned int op)
 {
        struct vm_area_struct *vma;
+       struct mm_struct *mm = current->mm;
 
        if (len == 0)
                return 0;
@@ -34,16 +35,22 @@ asmlinkage int sys_cacheflush(unsigned long addr, unsigned long len,
        if (addr + len < addr)
                return -EFAULT;
 
+       if (mmap_read_lock_killable(mm))
+               return -EINTR;
+
        /*
         * Verify that the specified address region actually belongs
         * to this process.
         */
-       vma = find_vma(current->mm, addr);
-       if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end)
+       vma = find_vma(mm, addr);
+       if (vma == NULL || addr < vma->vm_start || addr + len > vma->vm_end) {
+               mmap_read_unlock(mm);
                return -EFAULT;
+       }
 
        flush_cache_range(vma, addr, addr + len);
 
+       mmap_read_unlock(mm);
        return 0;
 }
 
diff --git a/arch/openrisc/Kbuild b/arch/openrisc/Kbuild
new file mode 100644 (file)
index 0000000..4234b4c
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-y += lib/ kernel/ mm/
+obj-y += boot/dts/
index bf10141..410e7ab 100644 (file)
@@ -24,6 +24,10 @@ LIBGCC               := $(shell $(CC) $(KBUILD_CFLAGS) -print-libgcc-file-name)
 
 KBUILD_CFLAGS  += -pipe -ffixed-r10 -D__linux__
 
+all: vmlinux.bin
+
+boot := arch/$(ARCH)/boot
+
 ifeq ($(CONFIG_OPENRISC_HAVE_INST_MUL),y)
        KBUILD_CFLAGS += $(call cc-option,-mhard-mul)
 else
@@ -38,14 +42,13 @@ endif
 
 head-y                 := arch/openrisc/kernel/head.o
 
-core-y         += arch/openrisc/lib/ \
-                  arch/openrisc/kernel/ \
-                  arch/openrisc/mm/
+core-y         += arch/openrisc/
 libs-y         += $(LIBGCC)
 
-ifneq '$(CONFIG_OPENRISC_BUILTIN_DTB)' '""'
-BUILTIN_DTB := y
-else
-BUILTIN_DTB := n
-endif
-core-$(BUILTIN_DTB) += arch/openrisc/boot/dts/
+PHONY += vmlinux.bin
+
+vmlinux.bin: vmlinux
+       $(Q)$(MAKE) $(build)=$(boot) $(boot)/$@
+
+archclean:
+       $(Q)$(MAKE) $(clean)=$(boot)
diff --git a/arch/openrisc/boot/.gitignore b/arch/openrisc/boot/.gitignore
new file mode 100644 (file)
index 0000000..007d6fe
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+vmlinux.bin
diff --git a/arch/openrisc/boot/Makefile b/arch/openrisc/boot/Makefile
new file mode 100644 (file)
index 0000000..5b28538
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for bootable kernel images
+#
+
+targets += vmlinux.bin
+
+OBJCOPYFLAGS_vmlinux.bin := -O binary
+$(obj)/vmlinux.bin: vmlinux FORCE
+       $(call if_changed,objcopy)
index 3c98728..eb62429 100644 (file)
@@ -34,6 +34,7 @@
 #include <linux/init_task.h>
 #include <linux/mqueue.h>
 #include <linux/fs.h>
+#include <linux/reboot.h>
 
 #include <linux/uaccess.h>
 #include <asm/io.h>
  */
 struct thread_info *current_thread_info_set[NR_CPUS] = { &init_thread_info, };
 
-void machine_restart(void)
+void machine_restart(char *cmd)
 {
-       printk(KERN_INFO "*** MACHINE RESTART ***\n");
-       __asm__("l.nop 1");
+       do_kernel_restart(cmd);
+
+       /* Give a grace period for failure to restart of 1s */
+       mdelay(1000);
+
+       /* Whoops - the platform was unable to reboot. Tell the user! */
+       pr_emerg("Reboot failed -- System halted\n");
+       while (1);
 }
 
 /*
@@ -167,7 +174,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
        sp -= sizeof(struct pt_regs);
        kregs = (struct pt_regs *)sp;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(kregs, 0, sizeof(struct pt_regs));
                kregs->gpr[20] = usp; /* fn, kernel thread */
                kregs->gpr[22] = arg;
index 29c82ef..48e1092 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/sched.h>
 #include <linux/sched/mm.h>
 #include <linux/irq.h>
+#include <linux/of.h>
 #include <asm/cpuinfo.h>
 #include <asm/mmu_context.h>
 #include <asm/tlbflush.h>
@@ -60,22 +61,32 @@ void __init smp_prepare_boot_cpu(void)
 
 void __init smp_init_cpus(void)
 {
-       int i;
+       struct device_node *cpu;
+       u32 cpu_id;
 
-       for (i = 0; i < NR_CPUS; i++)
-               set_cpu_possible(i, true);
+       for_each_of_cpu_node(cpu) {
+               if (of_property_read_u32(cpu, "reg", &cpu_id)) {
+                       pr_warn("%s missing reg property", cpu->full_name);
+                       continue;
+               }
+
+               if (cpu_id < NR_CPUS)
+                       set_cpu_possible(cpu_id, true);
+       }
 }
 
 void __init smp_prepare_cpus(unsigned int max_cpus)
 {
-       int i;
+       unsigned int cpu;
 
        /*
         * Initialise the present map, which describes the set of CPUs
         * actually populated at the present time.
         */
-       for (i = 0; i < max_cpus; i++)
-               set_cpu_present(i, true);
+       for_each_possible_cpu(cpu) {
+               if (cpu < max_cpus)
+                       set_cpu_present(cpu, true);
+       }
 }
 
 void __init smp_cpus_done(unsigned int max_cpus)
index ecef9af..afc3b8d 100644 (file)
@@ -61,8 +61,10 @@ config PARISC
        select HAVE_KRETPROBES
        select HAVE_DYNAMIC_FTRACE if $(cc-option,-fpatchable-function-entry=1,1)
        select HAVE_FTRACE_MCOUNT_RECORD if HAVE_DYNAMIC_FTRACE
+       select FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY if DYNAMIC_FTRACE
        select HAVE_KPROBES_ON_FTRACE
        select HAVE_DYNAMIC_FTRACE_WITH_REGS
+       select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
        select SET_FS
 
        help
@@ -201,9 +203,12 @@ config PREFETCH
        def_bool y
        depends on PA8X00 || PA7200
 
+config PARISC_HUGE_KERNEL
+       def_bool y if !MODULES || UBSAN || FTRACE || COMPILE_TEST
+
 config MLONGCALLS
-       def_bool y if !MODULES || UBSAN || FTRACE
-       bool "Enable the -mlong-calls compiler option for big kernels" if MODULES && !UBSAN && !FTRACE
+       def_bool y if PARISC_HUGE_KERNEL
+       bool "Enable the -mlong-calls compiler option for big kernels" if !PARISC_HUGE_KERNEL
        depends on PA8X00
        help
          If you configure the kernel to include many drivers built-in instead
index 3cbcfad..7611d48 100644 (file)
@@ -22,7 +22,6 @@ CONFIG_PCI_LBA=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-CONFIG_UNUSED_SYMBOLS=y
 # CONFIG_BLK_DEV_BSG is not set
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_BINFMT_MISC=m
index 8f81fcb..53054b8 100644 (file)
@@ -31,7 +31,6 @@ CONFIG_MODULE_FORCE_LOAD=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BINFMT_MISC=m
 # CONFIG_COMPACTION is not set
index cf5ee9b..84ee232 100644 (file)
@@ -72,7 +72,7 @@ __cmpxchg(volatile void *ptr, unsigned long old, unsigned long new_, int size)
 #endif
        case 4: return __cmpxchg_u32((unsigned int *)ptr,
                                     (unsigned int)old, (unsigned int)new_);
-       case 1: return __cmpxchg_u8((u8 *)ptr, (u8)old, (u8)new_);
+       case 1: return __cmpxchg_u8((u8 *)ptr, old & 0xff, new_ & 0xff);
        }
        __cmpxchg_called_with_bad_pointer();
        return old;
index fad29aa..1e4fbd0 100644 (file)
 #include <linux/threads.h>
 #include <linux/irq.h>
 
-#ifdef CONFIG_IRQSTACKS
-#define __ARCH_HAS_DO_SOFTIRQ
-#endif
-
 typedef struct {
        unsigned int __softirq_pending;
        unsigned int kernel_stack_usage;
index 11ece0d..b5fbcd2 100644 (file)
@@ -272,7 +272,6 @@ on downward growing arches, it looks like this:
        regs->gr[23] = 0;                               \
 } while(0)
 
-struct task_struct;
 struct mm_struct;
 
 /* Free all resources held by a thread. */
index 1dfb439..0d46b19 100644 (file)
@@ -17,6 +17,7 @@
 #include <linux/types.h>
 #include <asm/io.h>
 
+#include <asm/softirq_stack.h>
 #include <asm/smp.h>
 #include <asm/ldcw.h>
 
index fda1c1a..b144fbe 100644 (file)
@@ -200,7 +200,7 @@ copy_thread(unsigned long clone_flags, unsigned long usp,
        extern void * const ret_from_kernel_thread;
        extern void * const child_return;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
                memset(cregs, 0, sizeof(struct pt_regs));
                if (!usp) /* idle thread */
index 2127974..65de6c4 100644 (file)
@@ -567,8 +567,6 @@ static const struct user_regset_view user_parisc_native_view = {
 };
 
 #ifdef CONFIG_64BIT
-#include <linux/compat.h>
-
 static int gpr32_get(struct task_struct *target,
                     const struct user_regset *regset,
                     struct membuf to)
index c22a21c..283f644 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -22,24 +22,24 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_offset_$(basetarget))'
 
 syshdr_abis_unistd_32 := common,32
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abis_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 systbl_abis_syscall_table_32 := common,32
-$(kapi)/syscall_table_32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_64 := common,64
-$(kapi)/syscall_table_64.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_c32 := common,32
 systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h unistd_64.h
@@ -47,9 +47,10 @@ kapisyshdr-y         += syscall_table_32.h           \
                           syscall_table_64.h           \
                           syscall_table_c32.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 6bcc319..271a925 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index 853c19c..dec951d 100644 (file)
@@ -5,34 +5,10 @@
  * Floating-point emulation code
  *  Copyright (C) 2001 Hewlett-Packard (Paul Bame) <bame@debian.org>
  */
-/*
- * BEGIN_DESC
- * 
- *  File: 
- *      @(#)   pa/fp/fpu.h             $Revision: 1.1 $
- * 
- *  Purpose:
- *      <<please update with a synopis of the functionality provided by this file>>
- * 
- * 
- * END_DESC  
-*/
-
-#ifdef __NO_PA_HDRS
-    PA header file -- do not include this header file for non-PA builds.
-#endif
-
 
 #ifndef _MACHINE_FPU_INCLUDED /* allows multiple inclusion */
 #define _MACHINE_FPU_INCLUDED
 
-#if 0
-#ifndef _SYS_STDSYMS_INCLUDED
-#    include <sys/stdsyms.h>
-#endif   /* _SYS_STDSYMS_INCLUDED  */
-#include  <machine/pdc/pdc_rqsts.h>
-#endif
-
 #define PA83_FPU_FLAG    0x00000001
 #define PA89_FPU_FLAG    0x00000002
 #define PA2_0_FPU_FLAG   0x00000010
 #define COPR_FP        0x00000080      /* Floating point -- Coprocessor 0 */
 #define SFU_MPY_DIVIDE 0x00008000      /* Multiply/Divide __ SFU 0 */
 
-
 #define EM_FPU_TYPE_OFFSET 272
 
 /* version of EMULATION software for COPR,0,0 instruction */
 #define EMULATION_VERSION 4
 
 /*
- * The only was to differeniate between TIMEX and ROLEX (or PCX-S and PCX-T)
- * is thorough the potential type field from the PDC_MODEL call.  The 
- * following flags are used at assist this differeniation.
+ * The only way to differentiate between TIMEX and ROLEX (or PCX-S and PCX-T)
+ * is through the potential type field from the PDC_MODEL call.
+ * The following flags are used to assist this differentiation.
  */
 
 #define ROLEX_POTENTIAL_KEY_FLAGS      PDC_MODEL_CPU_KEY_WORD_TO_IO
 #define TIMEX_POTENTIAL_KEY_FLAGS      (PDC_MODEL_CPU_KEY_QUAD_STORE | \
                                         PDC_MODEL_CPU_KEY_RECIP_SQRT)
 
-
 #endif /* ! _MACHINE_FPU_INCLUDED */
index 2ffb229..386ae12 100644 (file)
@@ -235,6 +235,7 @@ config PPC
        select MMU_GATHER_PAGE_SIZE
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE         if PPC_BOOK3S_64 && CPU_LITTLE_ENDIAN
+       select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_VIRT_CPU_ACCOUNTING
        select HAVE_IRQ_TIME_ACCOUNTING
index b959fda..5f8544c 100644 (file)
@@ -65,7 +65,7 @@ UTS_MACHINE := $(subst $(space),,$(machine-y))
 ifdef CONFIG_PPC32
 KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o
 else
-ifeq ($(call ld-ifversion, -ge, 225000000, y),y)
+ifeq ($(call ld-ifversion, -ge, 22500, y),y)
 # Have the linker provide sfpr if possible.
 # There is a corresponding test in arch/powerpc/lib/Makefile
 KBUILD_LDFLAGS_MODULE += --save-restore-funcs
index 10c055e..6677ac0 100644 (file)
@@ -1071,7 +1071,6 @@ CONFIG_NLS_ISO8859_15=m
 CONFIG_NLS_KOI8_R=m
 CONFIG_NLS_KOI8_U=m
 CONFIG_DEBUG_INFO=y
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_HEADERS_INSTALL=y
 CONFIG_MAGIC_SYSRQ=y
 CONFIG_DEBUG_KERNEL=y
index eacc910..f1d029b 100644 (file)
@@ -73,9 +73,10 @@ void __patch_exception(int exc, unsigned long addr);
 #endif
 
 #define OP_RT_RA_MASK  0xffff0000UL
-#define LIS_R2         0x3c020000UL
-#define ADDIS_R2_R12   0x3c4c0000UL
-#define ADDI_R2_R2     0x38420000UL
+#define LIS_R2         (PPC_INST_ADDIS | __PPC_RT(R2))
+#define ADDIS_R2_R12   (PPC_INST_ADDIS | __PPC_RT(R2) | __PPC_RA(R12))
+#define ADDI_R2_R2     (PPC_INST_ADDI  | __PPC_RT(R2) | __PPC_RA(R2))
+
 
 static inline unsigned long ppc_function_entry(void *func)
 {
index 7897d16..727d4b3 100644 (file)
@@ -7,7 +7,7 @@
 #include <linux/bug.h>
 #include <asm/cputable.h>
 
-static inline bool early_cpu_has_feature(unsigned long feature)
+static __always_inline bool early_cpu_has_feature(unsigned long feature)
 {
        return !!((CPU_FTRS_ALWAYS & feature) ||
                  (CPU_FTRS_POSSIBLE & cur_cpu_spec->cpu_features & feature));
@@ -46,7 +46,7 @@ static __always_inline bool cpu_has_feature(unsigned long feature)
        return static_branch_likely(&cpu_feature_keys[i]);
 }
 #else
-static inline bool cpu_has_feature(unsigned long feature)
+static __always_inline bool cpu_has_feature(unsigned long feature)
 {
        return early_cpu_has_feature(feature);
 }
index 7141cce..a920599 100644 (file)
@@ -53,8 +53,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val)
 #define mfdcr(rn)                                              \
        ({unsigned int rval;                                    \
        if (__builtin_constant_p(rn) && rn < 1024)              \
-               asm volatile("mfdcr %0," __stringify(rn)        \
-                             : "=r" (rval));                   \
+               asm volatile("mfdcr %0, %1" : "=r" (rval)       \
+                             : "n" (rn));                      \
        else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR)))  \
                rval = mfdcrx(rn);                              \
        else                                                    \
@@ -64,8 +64,8 @@ static inline void mtdcrx(unsigned int reg, unsigned int val)
 #define mtdcr(rn, v)                                           \
 do {                                                           \
        if (__builtin_constant_p(rn) && rn < 1024)              \
-               asm volatile("mtdcr " __stringify(rn) ",%0"     \
-                             : : "r" (v));                     \
+               asm volatile("mtdcr %0, %1"                     \
+                             : : "n" (rn), "r" (v));           \
        else if (likely(cpu_has_feature(CPU_FTR_INDEXED_DCR)))  \
                mtdcrx(rn, v);                                  \
        else                                                    \
index aedfba2..e8d09a8 100644 (file)
@@ -410,7 +410,6 @@ DECLARE_INTERRUPT_HANDLER(altivec_assist_exception);
 DECLARE_INTERRUPT_HANDLER(CacheLockingException);
 DECLARE_INTERRUPT_HANDLER(SPEFloatingPointException);
 DECLARE_INTERRUPT_HANDLER(SPEFloatingPointRoundException);
-DECLARE_INTERRUPT_HANDLER(unrecoverable_exception);
 DECLARE_INTERRUPT_HANDLER(WatchdogException);
 DECLARE_INTERRUPT_HANDLER(kernel_bad_stack);
 
@@ -437,6 +436,8 @@ DECLARE_INTERRUPT_HANDLER_NMI(hmi_exception_realmode);
 
 DECLARE_INTERRUPT_HANDLER_ASYNC(TAUException);
 
+void unrecoverable_exception(struct pt_regs *regs);
+
 void replay_system_reset(void);
 void replay_soft_interrupts(void);
 
index 4f983ca..f3f264e 100644 (file)
@@ -37,8 +37,6 @@ extern int distribute_irqs;
 
 struct pt_regs;
 
-#define __ARCH_HAS_DO_SOFTIRQ
-
 #if defined(CONFIG_BOOKE) || defined(CONFIG_40x)
 /*
  * Per-cpu stacks for handling critical, debug and machine check
index 80b27f5..607168b 100644 (file)
@@ -228,7 +228,7 @@ enum {
 #define MMU_FTRS_ALWAYS                0
 #endif
 
-static inline bool early_mmu_has_feature(unsigned long feature)
+static __always_inline bool early_mmu_has_feature(unsigned long feature)
 {
        if (MMU_FTRS_ALWAYS & feature)
                return true;
@@ -286,7 +286,7 @@ static inline void mmu_feature_keys_init(void)
 
 }
 
-static inline bool mmu_has_feature(unsigned long feature)
+static __always_inline bool mmu_has_feature(unsigned long feature)
 {
        return early_mmu_has_feature(feature);
 }
index 975ba26..1499e92 100644 (file)
@@ -195,7 +195,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 #define TRAP_FLAGS_MASK                0x11
 #define TRAP(regs)             ((regs)->trap & ~TRAP_FLAGS_MASK)
 #define FULL_REGS(regs)                (((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)    ((regs)->trap |= 1)
+#define SET_FULL_REGS(regs)    ((regs)->trap &= ~1)
 #endif
 #define CHECK_FULL_REGS(regs)  BUG_ON(!FULL_REGS(regs))
 #define NV_REG_POISON          0xdeadbeefdeadbeefUL
@@ -210,7 +210,7 @@ static inline void regs_set_return_value(struct pt_regs *regs, unsigned long rc)
 #define TRAP_FLAGS_MASK                0x1F
 #define TRAP(regs)             ((regs)->trap & ~TRAP_FLAGS_MASK)
 #define FULL_REGS(regs)                (((regs)->trap & 1) == 0)
-#define SET_FULL_REGS(regs)    ((regs)->trap |= 1)
+#define SET_FULL_REGS(regs)    ((regs)->trap &= ~1)
 #define IS_CRITICAL_EXC(regs)  (((regs)->trap & 2) != 0)
 #define IS_MCHECK_EXC(regs)    (((regs)->trap & 4) != 0)
 #define IS_DEBUG_EXC(regs)     (((regs)->trap & 8) != 0)
index fdab934..9d1fbd8 100644 (file)
@@ -71,6 +71,16 @@ static inline void disable_kernel_vsx(void)
 {
        msr_check_and_clear(MSR_FP|MSR_VEC|MSR_VSX);
 }
+#else
+static inline void enable_kernel_vsx(void)
+{
+       BUILD_BUG();
+}
+
+static inline void disable_kernel_vsx(void)
+{
+       BUILD_BUG();
+}
 #endif
 
 #ifdef CONFIG_SPE
index 0cf5274..721c0d6 100644 (file)
@@ -113,7 +113,7 @@ struct vio_driver {
        const char *name;
        const struct vio_device_id *id_table;
        int (*probe)(struct vio_dev *dev, const struct vio_device_id *id);
-       int (*remove)(struct vio_dev *dev);
+       void (*remove)(struct vio_dev *dev);
        /* A driver must have a get_desired_dma() function to
         * be loaded in a CMO environment if it uses DMA.
         */
index 60d3051..8082b69 100644 (file)
@@ -466,7 +466,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real)
 
        ld      r10,PACAKMSR(r13)       /* get MSR value for kernel */
        /* MSR[RI] is clear iff using SRR regs */
-       .if IHSRR == EXC_HV_OR_STD
+       .if IHSRR_IF_HVMODE
        BEGIN_FTR_SECTION
        xori    r10,r10,MSR_RI
        END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
index 727fdab..565e84e 100644 (file)
@@ -457,11 +457,12 @@ InstructionTLBMiss:
        cmplw   0,r1,r3
 #endif
        mfspr   r2, SPRN_SDR1
-       li      r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
+       li      r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC | _PAGE_USER
        rlwinm  r2, r2, 28, 0xfffff000
 #ifdef CONFIG_MODULES
        bgt-    112f
        lis     r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, use */
+       li      r1,_PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_EXEC
        addi    r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l        /* kernel page table */
 #endif
 112:   rlwimi  r2,r3,12,20,29          /* insert top 10 bits of address */
@@ -520,10 +521,11 @@ DataLoadTLBMiss:
        lis     r1, TASK_SIZE@h         /* check if kernel address */
        cmplw   0,r1,r3
        mfspr   r2, SPRN_SDR1
-       li      r1, _PAGE_PRESENT | _PAGE_ACCESSED
+       li      r1, _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
        rlwinm  r2, r2, 28, 0xfffff000
        bgt-    112f
        lis     r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, use */
+       li      r1, _PAGE_PRESENT | _PAGE_ACCESSED
        addi    r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l        /* kernel page table */
 112:   rlwimi  r2,r3,12,20,29          /* insert top 10 bits of address */
        lwz     r2,0(r2)                /* get pmd entry */
@@ -597,10 +599,11 @@ DataStoreTLBMiss:
        lis     r1, TASK_SIZE@h         /* check if kernel address */
        cmplw   0,r1,r3
        mfspr   r2, SPRN_SDR1
-       li      r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
+       li      r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED | _PAGE_USER
        rlwinm  r2, r2, 28, 0xfffff000
        bgt-    112f
        lis     r2, (swapper_pg_dir - PAGE_OFFSET)@ha   /* if kernel address, use */
+       li      r1, _PAGE_RW | _PAGE_DIRTY | _PAGE_PRESENT | _PAGE_ACCESSED
        addi    r2, r2, (swapper_pg_dir - PAGE_OFFSET)@l        /* kernel page table */
 112:   rlwimi  r2,r3,12,20,29          /* insert top 10 bits of address */
        lwz     r2,0(r2)                /* get pmd entry */
index 398cd86..c475a22 100644 (file)
@@ -149,7 +149,7 @@ notrace long system_call_exception(long r3, long r4, long r5,
  * enabled when the interrupt handler returns (indicating a process-context /
  * synchronous interrupt) then irqs_enabled should be true.
  */
-static notrace inline bool __prep_irq_for_enabled_exit(bool clear_ri)
+static notrace __always_inline bool __prep_irq_for_enabled_exit(bool clear_ri)
 {
        /* This must be done with RI=1 because tracing may touch vmaps */
        trace_hardirqs_on();
@@ -436,7 +436,6 @@ again:
        return ret;
 }
 
-void unrecoverable_exception(struct pt_regs *regs);
 void preempt_schedule_irq(void);
 
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs, unsigned long msr)
index 086b0a7..d71fd10 100644 (file)
@@ -66,6 +66,7 @@
 #include <asm/livepatch.h>
 #include <asm/asm-prototypes.h>
 #include <asm/hw_irq.h>
+#include <asm/softirq_stack.h>
 
 #ifdef CONFIG_PPC64
 #include <asm/paca.h>
index 924d023..3231c2d 100644 (file)
@@ -1670,7 +1670,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp,
        /* Copy registers */
        sp -= sizeof(struct pt_regs);
        childregs = (struct pt_regs *) sp;
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->gpr[1] = sp + sizeof(struct pt_regs);
index 27b4895..9e3be29 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -22,31 +22,31 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_offset_$(basetarget))'
 
 syshdr_abis_unistd_32 := common,nospu,32
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abis_unistd_64 := common,nospu,64
-$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 systbl_abis_syscall_table_32 := common,nospu,32
 systbl_abi_syscall_table_32 := 32
-$(kapi)/syscall_table_32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_64 := common,nospu,64
 systbl_abi_syscall_table_64 := 64
-$(kapi)/syscall_table_64.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_c32 := common,nospu,32
 systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_spu := common,spu
 systbl_abi_syscall_table_spu := spu
-$(kapi)/syscall_table_spu.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_spu.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h unistd_64.h
@@ -55,9 +55,10 @@ kapisyshdr-y         += syscall_table_32.h           \
                           syscall_table_c32.h          \
                           syscall_table_spu.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 96b2157..0b2480c 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index 1583fd1..a44a30b 100644 (file)
@@ -2170,7 +2170,7 @@ DEFINE_INTERRUPT_HANDLER(SPEFloatingPointRoundException)
  * in the MSR is 0.  This indicates that SRR0/1 are live, and that
  * we therefore lost state by taking this exception.
  */
-DEFINE_INTERRUPT_HANDLER(unrecoverable_exception)
+void unrecoverable_exception(struct pt_regs *regs)
 {
        pr_emerg("Unrecoverable exception %lx at %lx (msr=%lx)\n",
                 regs->trap, regs->nip, regs->msr);
index a6e29f8..d21d081 100644 (file)
@@ -65,3 +65,14 @@ V_FUNCTION_END(__kernel_clock_getres)
 V_FUNCTION_BEGIN(__kernel_time)
        cvdso_call_time __c_kernel_time
 V_FUNCTION_END(__kernel_time)
+
+/* Routines for restoring integer registers, called by the compiler.  */
+/* Called with r11 pointing to the stack header word of the caller of the */
+/* function, just beyond the end of the integer restore area.  */
+_GLOBAL(_restgpr_31_x)
+_GLOBAL(_rest32gpr_31_x)
+       lwz     r0,4(r11)
+       lwz     r31,-4(r11)
+       mtlr    r0
+       mr      r1,r11
+       blr
index c77f2d4..bb67735 100644 (file)
@@ -591,7 +591,7 @@ int kvmppc_book3s_hv_page_fault(struct kvm_vcpu *vcpu,
        } else {
                /* Call KVM generic code to do the slow-path check */
                pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-                                          writing, &write_ok);
+                                          writing, &write_ok, NULL);
                if (is_error_noslot_pfn(pfn))
                        return -EFAULT;
                page = NULL;
index bb35490..e603de7 100644 (file)
@@ -822,7 +822,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu,
 
                /* Call KVM generic code to do the slow-path check */
                pfn = __gfn_to_pfn_memslot(memslot, gfn, false, NULL,
-                                          writing, upgrade_p);
+                                          writing, upgrade_p, NULL);
                if (is_error_noslot_pfn(pfn))
                        return -EFAULT;
                page = NULL;
index 69a91b5..d4efc18 100644 (file)
@@ -31,7 +31,7 @@ obj-$(CONFIG_FUNCTION_ERROR_INJECTION)        += error-inject.o
 # 64-bit linker creates .sfpr on demand for final link (vmlinux),
 # so it is only needed for modules, and only for older linkers which
 # do not support --save-restore-funcs
-ifeq ($(call ld-ifversion, -lt, 225000000, y),y)
+ifeq ($(call ld-ifversion, -lt, 22500, y),y)
 extra-$(CONFIG_PPC64)  += crtsavres.o
 endif
 
index bb5c20d..c6aebc1 100644 (file)
@@ -904,7 +904,7 @@ static nokprobe_inline int do_vsx_load(struct instruction_op *op,
        if (!address_ok(regs, ea, size) || copy_mem_in(mem, ea, size, regs))
                return -EFAULT;
 
-       nr_vsx_regs = size / sizeof(__vector128);
+       nr_vsx_regs = max(1ul, size / sizeof(__vector128));
        emulate_vsx_load(op, buf, mem, cross_endian);
        preempt_disable();
        if (reg < 32) {
@@ -951,7 +951,7 @@ static nokprobe_inline int do_vsx_store(struct instruction_op *op,
        if (!address_ok(regs, ea, size))
                return -EFAULT;
 
-       nr_vsx_regs = size / sizeof(__vector128);
+       nr_vsx_regs = max(1ul, size / sizeof(__vector128));
        preempt_disable();
        if (reg < 32) {
                /* FP regs + extensions */
index 6817331..766f064 100644 (file)
@@ -222,7 +222,7 @@ static inline void perf_get_data_addr(struct perf_event *event, struct pt_regs *
        if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid)
                *addrp = mfspr(SPRN_SDAR);
 
-       if (is_kernel_addr(mfspr(SPRN_SDAR)) && perf_allow_kernel(&event->attr) != 0)
+       if (is_kernel_addr(mfspr(SPRN_SDAR)) && event->attr.exclude_kernel)
                *addrp = 0;
 }
 
@@ -507,7 +507,7 @@ static void power_pmu_bhrb_read(struct perf_event *event, struct cpu_hw_events *
                         * addresses, hence include a check before filtering code
                         */
                        if (!(ppmu->flags & PPMU_ARCH_31) &&
-                               is_kernel_addr(addr) && perf_allow_kernel(&event->attr) != 0)
+                           is_kernel_addr(addr) && event->attr.exclude_kernel)
                                continue;
 
                        /* Branches are read most recent first (ie. mfbhrb 0 is
index 2539056..b83a367 100644 (file)
@@ -91,14 +91,15 @@ out:
 }
 
 static int
-spufs_setattr(struct dentry *dentry, struct iattr *attr)
+spufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+             struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
 
        if ((attr->ia_valid & ATTR_SIZE) &&
            (attr->ia_size != inode->i_size))
                return -EINVAL;
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 8c739c9..5317286 100644 (file)
@@ -150,25 +150,3 @@ int pnv_cxl_ioda_msi_setup(struct pci_dev *dev, unsigned int hwirq,
        return 0;
 }
 EXPORT_SYMBOL(pnv_cxl_ioda_msi_setup);
-
-#if IS_MODULE(CONFIG_CXL)
-static inline int get_cxl_module(void)
-{
-       struct module *cxl_module;
-
-       mutex_lock(&module_mutex);
-
-       cxl_module = find_module("cxl");
-       if (cxl_module)
-               __module_get(cxl_module);
-
-       mutex_unlock(&module_mutex);
-
-       if (!cxl_module)
-               return -ENODEV;
-
-       return 0;
-}
-#else
-static inline int get_cxl_module(void) { return 0; }
-#endif
index 764170f..3805519 100644 (file)
@@ -887,7 +887,8 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot,
 
        want_v = hpte_encode_avpn(vpn, psize, ssize);
 
-       flags = (newpp & 7) | H_AVPN;
+       flags = (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_KEY_LO)) | H_AVPN;
+       flags |= (newpp & HPTE_R_KEY_HI) >> 48;
        if (mmu_has_feature(MMU_FTR_KERNEL_RO))
                /* Move pp0 into bit 8 (IBM 55) */
                flags |= (newpp & HPTE_R_PP0) >> 55;
index ea4d6a6..e83e089 100644 (file)
@@ -452,12 +452,28 @@ static int do_suspend(void)
        return ret;
 }
 
+/**
+ * struct pseries_suspend_info - State shared between CPUs for join/suspend.
+ * @counter: Threads are to increment this upon resuming from suspend
+ *           or if an error is received from H_JOIN. The thread which performs
+ *           the first increment (i.e. sets it to 1) is responsible for
+ *           waking the other threads.
+ * @done: False if join/suspend is in progress. True if the operation is
+ *        complete (successful or not).
+ */
+struct pseries_suspend_info {
+       atomic_t counter;
+       bool done;
+};
+
 static int do_join(void *arg)
 {
-       atomic_t *counter = arg;
+       struct pseries_suspend_info *info = arg;
+       atomic_t *counter = &info->counter;
        long hvrc;
        int ret;
 
+retry:
        /* Must ensure MSR.EE off for H_JOIN. */
        hard_irq_disable();
        hvrc = plpar_hcall_norets(H_JOIN);
@@ -473,8 +489,20 @@ static int do_join(void *arg)
        case H_SUCCESS:
                /*
                 * The suspend is complete and this cpu has received a
-                * prod.
+                * prod, or we've received a stray prod from unrelated
+                * code (e.g. paravirt spinlocks) and we need to join
+                * again.
+                *
+                * This barrier orders the return from H_JOIN above vs
+                * the load of info->done. It pairs with the barrier
+                * in the wakeup/prod path below.
                 */
+               smp_mb();
+               if (READ_ONCE(info->done) == false) {
+                       pr_info_ratelimited("premature return from H_JOIN on CPU %i, retrying",
+                                           smp_processor_id());
+                       goto retry;
+               }
                ret = 0;
                break;
        case H_BAD_MODE:
@@ -488,6 +516,13 @@ static int do_join(void *arg)
 
        if (atomic_inc_return(counter) == 1) {
                pr_info("CPU %u waking all threads\n", smp_processor_id());
+               WRITE_ONCE(info->done, true);
+               /*
+                * This barrier orders the store to info->done vs subsequent
+                * H_PRODs to wake the other CPUs. It pairs with the barrier
+                * in the H_SUCCESS case above.
+                */
+               smp_mb();
                prod_others();
        }
        /*
@@ -535,11 +570,16 @@ static int pseries_suspend(u64 handle)
        int ret;
 
        while (true) {
-               atomic_t counter = ATOMIC_INIT(0);
+               struct pseries_suspend_info info;
                unsigned long vasi_state;
                int vasi_err;
 
-               ret = stop_machine(do_join, &counter, cpu_online_mask);
+               info = (struct pseries_suspend_info) {
+                       .counter = ATOMIC_INIT(0),
+                       .done = false,
+               };
+
+               ret = stop_machine(do_join, &info, cpu_online_mask);
                if (ret == 0)
                        break;
                /*
index b3ac245..6373003 100644 (file)
@@ -4,6 +4,7 @@
  * Copyright 2006-2007 Michael Ellerman, IBM Corp.
  */
 
+#include <linux/crash_dump.h>
 #include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/msi.h>
@@ -458,8 +459,28 @@ again:
                        return hwirq;
                }
 
-               virq = irq_create_mapping_affinity(NULL, hwirq,
-                                                  entry->affinity);
+               /*
+                * Depending on the number of online CPUs in the original
+                * kernel, it is likely for CPU #0 to be offline in a kdump
+                * kernel. The associated IRQs in the affinity mappings
+                * provided by irq_create_affinity_masks() are thus not
+                * started by irq_startup(), as per-design for managed IRQs.
+                * This can be a problem with multi-queue block devices driven
+                * by blk-mq : such a non-started IRQ is very likely paired
+                * with the single queue enforced by blk-mq during kdump (see
+                * blk_mq_alloc_tag_set()). This causes the device to remain
+                * silent and likely hangs the guest at some point.
+                *
+                * We don't really care for fine-grained affinity when doing
+                * kdump actually : simply ignore the pre-computed affinity
+                * masks in this case and let the default mask with all CPUs
+                * be used when creating the IRQ mappings.
+                */
+               if (is_kdump_kernel())
+                       virq = irq_create_mapping(NULL, hwirq);
+               else
+                       virq = irq_create_mapping_affinity(NULL, hwirq,
+                                                          entry->affinity);
 
                if (!virq) {
                        pr_debug("rtas_msi: Failed mapping hwirq %d\n", hwirq);
index b2797cf..9cb4fc8 100644 (file)
@@ -1261,7 +1261,6 @@ static int vio_bus_remove(struct device *dev)
        struct vio_dev *viodev = to_vio_dev(dev);
        struct vio_driver *viodrv = to_vio_driver(dev->driver);
        struct device *devptr;
-       int ret = 1;
 
        /*
         * Hold a reference to the device after the remove function is called
@@ -1270,13 +1269,13 @@ static int vio_bus_remove(struct device *dev)
        devptr = get_device(dev);
 
        if (viodrv->remove)
-               ret = viodrv->remove(viodev);
+               viodrv->remove(viodev);
 
-       if (!ret && firmware_has_feature(FW_FEATURE_CMO))
+       if (firmware_has_feature(FW_FEATURE_CMO))
                vio_cmo_bus_remove(viodev);
 
        put_device(devptr);
-       return ret;
+       return 0;
 }
 
 /**
index e0a34eb..0d0cf67 100644 (file)
@@ -57,6 +57,7 @@ config RISCV
        select HAVE_ARCH_JUMP_LABEL
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN if MMU && 64BIT
+       select HAVE_ARCH_KASAN_VMALLOC if MMU && 64BIT
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_KGDB_QXFER_PKT
        select HAVE_ARCH_MMAP_RND_BITS if MMU
@@ -67,14 +68,19 @@ config RISCV
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS if MMU
        select HAVE_EBPF_JIT if MMU
+       select HAVE_FUNCTION_ERROR_INJECTION
        select HAVE_FUTEX_CMPXCHG if FUTEX
        select HAVE_GCC_PLUGINS
        select HAVE_GENERIC_VDSO if MMU && 64BIT
        select HAVE_IRQ_TIME_ACCOUNTING
+       select HAVE_KPROBES
+       select HAVE_KPROBES_ON_FTRACE
+       select HAVE_KRETPROBES
        select HAVE_PCI
        select HAVE_PERF_EVENTS
        select HAVE_PERF_REGS
        select HAVE_PERF_USER_STACK_DUMP
+       select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_DOMAIN
@@ -87,7 +93,6 @@ config RISCV
        select PCI_MSI if PCI
        select RISCV_INTC
        select RISCV_TIMER if RISCV_SBI
-       select SPARSEMEM_STATIC if 32BIT
        select SPARSE_IRQ
        select SYSCTL_EXCEPTION_TRACE
        select THREAD_INFO_IN_TASK
@@ -143,12 +148,13 @@ config PAGE_OFFSET
        default 0xffffffe000000000 if 64BIT && MAXPHYSMEM_128GB
 
 config ARCH_FLATMEM_ENABLE
-       def_bool y
+       def_bool !NUMA
 
 config ARCH_SPARSEMEM_ENABLE
        def_bool y
        depends on MMU
-       select SPARSEMEM_VMEMMAP_ENABLE
+       select SPARSEMEM_STATIC if 32BIT && SPARSMEM
+       select SPARSEMEM_VMEMMAP_ENABLE if 64BIT
 
 config ARCH_SELECT_MEMORY_MODEL
        def_bool ARCH_SPARSEMEM_ENABLE
@@ -156,6 +162,9 @@ config ARCH_SELECT_MEMORY_MODEL
 config ARCH_WANT_GENERAL_HUGETLB
        def_bool y
 
+config ARCH_SUPPORTS_UPROBES
+       def_bool y
+
 config SYS_SUPPORTS_HUGETLBFS
        depends on MMU
        def_bool y
@@ -302,6 +311,36 @@ config TUNE_GENERIC
 
 endchoice
 
+# Common NUMA Features
+config NUMA
+       bool "NUMA Memory Allocation and Scheduler Support"
+       depends on SMP && MMU
+       select GENERIC_ARCH_NUMA
+       select OF_NUMA
+       select ARCH_SUPPORTS_NUMA_BALANCING
+       help
+         Enable NUMA (Non-Uniform Memory Access) support.
+
+         The kernel will try to allocate memory used by a CPU on the
+         local memory of the CPU and add some more NUMA awareness to the kernel.
+
+config NODES_SHIFT
+       int "Maximum NUMA Nodes (as a power of 2)"
+       range 1 10
+       default "2"
+       depends on NEED_MULTIPLE_NODES
+       help
+         Specify the maximum number of NUMA Nodes available on the target
+         system.  Increases memory reserved to accommodate various tables.
+
+config USE_PERCPU_NUMA_NODE_ID
+       def_bool y
+       depends on NUMA
+
+config NEED_PER_CPU_EMBED_FIRST_CHUNK
+       def_bool y
+       depends on NUMA
+
 config RISCV_ISA_C
        bool "Emit compressed instructions when building Linux"
        default y
@@ -416,11 +455,17 @@ config EFI
          allow the kernel to be booted as an EFI application. This
          is only useful on systems that have UEFI firmware.
 
+config CC_HAVE_STACKPROTECTOR_TLS
+       def_bool $(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=tp -mstack-protector-guard-offset=0)
+
+config STACKPROTECTOR_PER_TASK
+       def_bool y
+       depends on STACKPROTECTOR && CC_HAVE_STACKPROTECTOR_TLS
+
 endmenu
 
 config BUILTIN_DTB
        def_bool n
-       depends on RISCV_M_MODE
        depends on OF
 
 menu "Power management options"
index 3284d5c..e1b2690 100644 (file)
@@ -22,30 +22,43 @@ config SOC_VIRT
        help
          This enables support for QEMU Virt Machine.
 
-config SOC_KENDRYTE
-       bool "Kendryte K210 SoC"
+config SOC_CANAAN
+       bool "Canaan Kendryte K210 SoC"
        depends on !MMU
        select CLINT_TIMER if RISCV_M_MODE
        select SERIAL_SIFIVE if TTY
        select SERIAL_SIFIVE_CONSOLE if TTY
        select SIFIVE_PLIC
+       select ARCH_HAS_RESET_CONTROLLER
+       select PINCTRL
+       select COMMON_CLK
+       select COMMON_CLK_K210
        help
-         This enables support for Kendryte K210 SoC platform hardware.
+         This enables support for Canaan Kendryte K210 SoC platform hardware.
 
-config SOC_KENDRYTE_K210_DTB
-       def_bool y
-       depends on SOC_KENDRYTE_K210_DTB_BUILTIN
+if SOC_CANAAN
 
-config SOC_KENDRYTE_K210_DTB_BUILTIN
-       bool "Builtin device tree for the Kendryte K210"
-       depends on SOC_KENDRYTE
+config SOC_CANAAN_K210_DTB_BUILTIN
+       bool "Builtin device tree for the Canaan Kendryte K210"
+       depends on SOC_CANAAN
        default y
        select OF
        select BUILTIN_DTB
-       select SOC_KENDRYTE_K210_DTB
        help
-         Builds a device tree for the Kendryte K210 into the Linux image.
+         Build a device tree for the Kendryte K210 into the Linux image.
          This option should be selected if no bootloader is being used.
          If unsure, say Y.
 
+config SOC_CANAAN_K210_DTB_SOURCE
+       string "Source file for the Canaan Kendryte K210 builtin DTB"
+       depends on SOC_CANAAN
+       depends on SOC_CANAAN_K210_DTB_BUILTIN
+       default "k210_generic"
+       help
+         Base name (without suffix, relative to arch/riscv/boot/dts/canaan)
+         for the DTS file that will be used to produce the DTB linked into the
+         kernel.
+
+endif
+
 endmenu
index 8c29e55..1368d94 100644 (file)
@@ -12,6 +12,8 @@ OBJCOPYFLAGS    := -O binary
 LDFLAGS_vmlinux :=
 ifeq ($(CONFIG_DYNAMIC_FTRACE),y)
        LDFLAGS_vmlinux := --no-relax
+       KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY
+       CC_FLAGS_FTRACE := -fpatchable-function-entry=8
 endif
 
 ifeq ($(CONFIG_64BIT)$(CONFIG_CMODEL_MEDLOW),yy)
@@ -65,6 +67,16 @@ KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-relax)
 # architectures.  It's faster to have GCC emit only aligned accesses.
 KBUILD_CFLAGS += $(call cc-option,-mstrict-align)
 
+ifeq ($(CONFIG_STACKPROTECTOR_PER_TASK),y)
+prepare: stack_protector_prepare
+stack_protector_prepare: prepare0
+       $(eval KBUILD_CFLAGS += -mstack-protector-guard=tls               \
+                               -mstack-protector-guard-reg=tp            \
+                               -mstack-protector-guard-offset=$(shell    \
+                       awk '{if ($$2 == "TSK_STACK_CANARY") print $$3;}' \
+                                       include/generated/asm-offsets.h))
+endif
+
 # arch specific predefines for sparse
 CHECKFLAGS += -D__riscv -D__riscv_xlen=$(BITS)
 
@@ -83,7 +95,7 @@ PHONY += vdso_install
 vdso_install:
        $(Q)$(MAKE) $(build)=arch/riscv/kernel/vdso $@
 
-ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_KENDRYTE),yy)
+ifeq ($(CONFIG_RISCV_M_MODE)$(CONFIG_SOC_CANAAN),yy)
 KBUILD_IMAGE := $(boot)/loader.bin
 else
 KBUILD_IMAGE := $(boot)/Image.gz
index ca1f8cb..7ffd502 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
 subdir-y += sifive
-subdir-y += kendryte
+subdir-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += canaan
 
 obj-$(CONFIG_BUILTIN_DTB) := $(addsuffix /, $(subdir-y))
diff --git a/arch/riscv/boot/dts/canaan/Makefile b/arch/riscv/boot/dts/canaan/Makefile
new file mode 100644 (file)
index 0000000..9ee7156
--- /dev/null
@@ -0,0 +1,5 @@
+# SPDX-License-Identifier: GPL-2.0
+ifneq ($(CONFIG_SOC_CANAAN_K210_DTB_SOURCE),"")
+dtb-y += $(strip $(shell echo $(CONFIG_SOC_CANAAN_K210_DTB_SOURCE))).dtb
+obj-$(CONFIG_SOC_CANAAN_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
+endif
diff --git a/arch/riscv/boot/dts/canaan/canaan_kd233.dts b/arch/riscv/boot/dts/canaan/canaan_kd233.dts
new file mode 100644 (file)
index 0000000..039b92a
--- /dev/null
@@ -0,0 +1,152 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+       model = "Kendryte KD233";
+       compatible = "canaan,kendryte-kd233", "canaan,kendryte-k210";
+
+       chosen {
+               bootargs = "earlycon console=ttySIF0";
+               stdout-path = "serial0:115200n8";
+       };
+
+       gpio-leds {
+               compatible = "gpio-leds";
+
+               led0 {
+                       gpios = <&gpio0 8 GPIO_ACTIVE_LOW>;
+               };
+
+               led1 {
+                       gpios = <&gpio0 9 GPIO_ACTIVE_LOW>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               key0 {
+                       label = "KEY0";
+                       linux,code = <BTN_0>;
+                       gpios = <&gpio0 10 GPIO_ACTIVE_LOW>;
+               };
+       };
+};
+
+&fpioa {
+       pinctrl-0 = <&jtag_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+
+       jtag_pinctrl: jtag-pinmux {
+               pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+                        <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+                        <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+                        <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+       };
+
+       uarths_pinctrl: uarths-pinmux {
+               pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+                        <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+       };
+
+       spi0_pinctrl: spi0-pinmux {
+               pinmux = <K210_FPIOA(6, K210_PCF_GPIOHS20)>,  /* cs */
+                        <K210_FPIOA(7, K210_PCF_SPI0_SCLK)>, /* wr */
+                        <K210_FPIOA(8, K210_PCF_GPIOHS21)>;  /* dc */
+       };
+
+       dvp_pinctrl: dvp-pinmux {
+               pinmux = <K210_FPIOA(9, K210_PCF_SCCB_SCLK)>,
+                        <K210_FPIOA(10, K210_PCF_SCCB_SDA)>,
+                        <K210_FPIOA(11, K210_PCF_DVP_RST)>,
+                        <K210_FPIOA(12, K210_PCF_DVP_VSYNC)>,
+                        <K210_FPIOA(13, K210_PCF_DVP_PWDN)>,
+                        <K210_FPIOA(14, K210_PCF_DVP_XCLK)>,
+                        <K210_FPIOA(15, K210_PCF_DVP_PCLK)>,
+                        <K210_FPIOA(17, K210_PCF_DVP_HSYNC)>;
+       };
+
+       gpiohs_pinctrl: gpiohs-pinmux {
+               pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+                        <K210_FPIOA(20, K210_PCF_GPIOHS4)>, /* Rot. dip sw line 8 */
+                        <K210_FPIOA(21, K210_PCF_GPIOHS5)>, /* Rot. dip sw line 4 */
+                        <K210_FPIOA(22, K210_PCF_GPIOHS6)>, /* Rot. dip sw line 2 */
+                        <K210_FPIOA(23, K210_PCF_GPIOHS7)>, /* Rot. dip sw line 1 */
+                        <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+                        <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+                        <K210_FPIOA(26, K210_PCF_GPIOHS10)>;
+       };
+
+       spi1_pinctrl: spi1-pinmux {
+               pinmux = <K210_FPIOA(29, K210_PCF_SPI1_SCLK)>,
+                        <K210_FPIOA(30, K210_PCF_SPI1_D0)>,
+                        <K210_FPIOA(31, K210_PCF_SPI1_D1)>,
+                        <K210_FPIOA(32, K210_PCF_GPIOHS16)>; /* cs */
+       };
+
+       i2s0_pinctrl: i2s0-pinmux {
+               pinmux = <K210_FPIOA(33, K210_PCF_I2S0_IN_D0)>,
+                        <K210_FPIOA(34, K210_PCF_I2S0_WS)>,
+                        <K210_FPIOA(35, K210_PCF_I2S0_SCLK)>;
+       };
+};
+
+&uarths0 {
+       pinctrl-0 = <&uarths_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio0 {
+       pinctrl-0 = <&gpiohs_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&i2s0 {
+       #sound-dai-cells = <1>;
+       pinctrl-0 = <&i2s0_pinctrl>;
+       pinctrl-names = "default";
+};
+
+&spi0 {
+       pinctrl-0 = <&spi0_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+       panel@0 {
+               compatible = "ilitek,ili9341";
+               reg = <0>;
+               dc-gpios = <&gpio0 21 GPIO_ACTIVE_HIGH>;
+               spi-max-frequency = <15000000>;
+               status = "disabled";
+       };
+};
+
+&spi1 {
+       pinctrl-0 = <&spi1_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 16 GPIO_ACTIVE_LOW>;
+       status = "okay";
+
+       slot@0 {
+               compatible = "mmc-spi-slot";
+               reg = <0>;
+               voltage-ranges = <3300 3300>;
+               spi-max-frequency = <25000000>;
+               broken-cd;
+       };
+};
diff --git a/arch/riscv/boot/dts/canaan/k210.dtsi b/arch/riscv/boot/dts/canaan/k210.dtsi
new file mode 100644 (file)
index 0000000..5e8ca81
--- /dev/null
@@ -0,0 +1,459 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <dt-bindings/clock/k210-clk.h>
+#include <dt-bindings/pinctrl/k210-fpioa.h>
+#include <dt-bindings/reset/k210-rst.h>
+
+/ {
+       /*
+        * Although the K210 is a 64-bit CPU, the address bus is only 32-bits
+        * wide, and the upper half of all addresses is ignored.
+        */
+       #address-cells = <1>;
+       #size-cells = <1>;
+       compatible = "canaan,kendryte-k210";
+
+       aliases {
+               serial0 = &uarths0;
+               serial1 = &uart1;
+               serial2 = &uart2;
+               serial3 = &uart3;
+       };
+
+       /*
+        * The K210 has an sv39 MMU following the privileged specification v1.9.
+        * Since this is a non-ratified draft specification, the kernel does not
+        * support it and the K210 support enabled only for the !MMU case.
+        * Be consistent with this by setting the CPUs MMU type to "none".
+        */
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               timebase-frequency = <7800000>;
+               cpu0: cpu@0 {
+                       device_type = "cpu";
+                       compatible = "canaan,k210", "riscv";
+                       reg = <0>;
+                       riscv,isa = "rv64imafdc";
+                       mmu-type = "riscv,none";
+                       i-cache-block-size = <64>;
+                       i-cache-size = <0x8000>;
+                       d-cache-block-size = <64>;
+                       d-cache-size = <0x8000>;
+                       cpu0_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               interrupt-controller;
+                               compatible = "riscv,cpu-intc";
+                       };
+               };
+               cpu1: cpu@1 {
+                       device_type = "cpu";
+                       compatible = "canaan,k210", "riscv";
+                       reg = <1>;
+                       riscv,isa = "rv64imafdc";
+                       mmu-type = "riscv,none";
+                       i-cache-block-size = <64>;
+                       i-cache-size = <0x8000>;
+                       d-cache-block-size = <64>;
+                       d-cache-size = <0x8000>;
+                       cpu1_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               interrupt-controller;
+                               compatible = "riscv,cpu-intc";
+                       };
+               };
+       };
+
+       sram: memory@80000000 {
+               device_type = "memory";
+               compatible = "canaan,k210-sram";
+               reg = <0x80000000 0x400000>,
+                     <0x80400000 0x200000>,
+                     <0x80600000 0x200000>;
+               reg-names = "sram0", "sram1", "aisram";
+               clocks = <&sysclk K210_CLK_SRAM0>,
+                        <&sysclk K210_CLK_SRAM1>,
+                        <&sysclk K210_CLK_AI>;
+               clock-names = "sram0", "sram1", "aisram";
+       };
+
+       clocks {
+               in0: oscillator {
+                       compatible = "fixed-clock";
+                       #clock-cells = <0>;
+                       clock-frequency = <26000000>;
+               };
+       };
+
+       soc {
+               #address-cells = <1>;
+               #size-cells = <1>;
+               compatible = "simple-bus";
+               ranges;
+               interrupt-parent = <&plic0>;
+
+               rom0: nvmem@1000 {
+                       reg = <0x1000 0x1000>;
+                       read-only;
+               };
+
+               clint0: timer@2000000 {
+                       compatible = "canaan,k210-clint", "sifive,clint0";
+                       reg = <0x2000000 0xC000>;
+                       interrupts-extended = <&cpu0_intc 3 &cpu0_intc 7
+                                             &cpu1_intc 3 &cpu1_intc 7>;
+               };
+
+               plic0: interrupt-controller@c000000 {
+                       #interrupt-cells = <1>;
+                       #address-cells = <0>;
+                       compatible = "canaan,k210-plic", "sifive,plic-1.0.0";
+                       reg = <0xC000000 0x4000000>;
+                       interrupt-controller;
+                       interrupts-extended = <&cpu0_intc 11 &cpu1_intc 11>;
+                       riscv,ndev = <65>;
+               };
+
+               uarths0: serial@38000000 {
+                       compatible = "canaan,k210-uarths", "sifive,uart0";
+                       reg = <0x38000000 0x1000>;
+                       interrupts = <33>;
+                       clocks = <&sysclk K210_CLK_CPU>;
+               };
+
+               gpio0: gpio-controller@38001000 {
+                       #interrupt-cells = <2>;
+                       #gpio-cells = <2>;
+                       compatible = "canaan,k210-gpiohs", "sifive,gpio0";
+                       reg = <0x38001000 0x1000>;
+                       interrupt-controller;
+                       interrupts = <34 35 36 37 38 39 40 41
+                                     42 43 44 45 46 47 48 49
+                                     50 51 52 53 54 55 56 57
+                                     58 59 60 61 62 63 64 65>;
+                       gpio-controller;
+                       ngpios = <32>;
+               };
+
+               dmac0: dma-controller@50000000 {
+                       compatible = "snps,axi-dma-1.01a";
+                       reg = <0x50000000 0x1000>;
+                       interrupts = <27 28 29 30 31 32>;
+                       #dma-cells = <1>;
+                       clocks = <&sysclk K210_CLK_DMA>, <&sysclk K210_CLK_DMA>;
+                       clock-names = "core-clk", "cfgr-clk";
+                       resets = <&sysrst K210_RST_DMA>;
+                       dma-channels = <6>;
+                       snps,dma-masters = <2>;
+                       snps,priority = <0 1 2 3 4 5>;
+                       snps,data-width = <5>;
+                       snps,block-size = <0x200000 0x200000 0x200000
+                                          0x200000 0x200000 0x200000>;
+                       snps,axi-max-burst-len = <256>;
+               };
+
+               apb0: bus@50200000 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "simple-pm-bus";
+                       ranges;
+                       clocks = <&sysclk K210_CLK_APB0>;
+
+                       gpio1: gpio@50200000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "snps,dw-apb-gpio";
+                               reg = <0x50200000 0x80>;
+                               clocks = <&sysclk K210_CLK_APB0>,
+                                        <&sysclk K210_CLK_GPIO>;
+                               clock-names = "bus", "db";
+                               resets = <&sysrst K210_RST_GPIO>;
+
+                               gpio1_0: gpio-port@0 {
+                                       #gpio-cells = <2>;
+                                       #interrupt-cells = <2>;
+                                       compatible = "snps,dw-apb-gpio-port";
+                                       reg = <0>;
+                                       interrupt-controller;
+                                       interrupts = <23>;
+                                       gpio-controller;
+                                       ngpios = <8>;
+                               };
+                       };
+
+                       uart1: serial@50210000 {
+                               compatible = "snps,dw-apb-uart";
+                               reg = <0x50210000 0x100>;
+                               interrupts = <11>;
+                               clocks = <&sysclk K210_CLK_UART1>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "baudclk", "apb_pclk";
+                               resets = <&sysrst K210_RST_UART1>;
+                               reg-io-width = <4>;
+                               reg-shift = <2>;
+                               dcd-override;
+                               dsr-override;
+                               cts-override;
+                               ri-override;
+                       };
+
+                       uart2: serial@50220000 {
+                               compatible = "snps,dw-apb-uart";
+                               reg = <0x50220000 0x100>;
+                               interrupts = <12>;
+                               clocks = <&sysclk K210_CLK_UART2>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "baudclk", "apb_pclk";
+                               resets = <&sysrst K210_RST_UART2>;
+                               reg-io-width = <4>;
+                               reg-shift = <2>;
+                               dcd-override;
+                               dsr-override;
+                               cts-override;
+                               ri-override;
+                       };
+
+                       uart3: serial@50230000 {
+                               compatible = "snps,dw-apb-uart";
+                               reg = <0x50230000 0x100>;
+                               interrupts = <13>;
+                               clocks = <&sysclk K210_CLK_UART3>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "baudclk", "apb_pclk";
+                               resets = <&sysrst K210_RST_UART3>;
+                               reg-io-width = <4>;
+                               reg-shift = <2>;
+                               dcd-override;
+                               dsr-override;
+                               cts-override;
+                               ri-override;
+                       };
+
+                       spi2: spi@50240000 {
+                               compatible = "canaan,k210-spi";
+                               spi-slave;
+                               reg = <0x50240000 0x100>;
+                               #address-cells = <0>;
+                               #size-cells = <0>;
+                               interrupts = <3>;
+                               clocks = <&sysclk K210_CLK_SPI2>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "ssi_clk", "pclk";
+                               resets = <&sysrst K210_RST_SPI2>;
+                               spi-max-frequency = <25000000>;
+                       };
+
+                       i2s0: i2s@50250000 {
+                               compatible = "snps,designware-i2s";
+                               reg = <0x50250000 0x200>;
+                               interrupts = <5>;
+                               clocks = <&sysclk K210_CLK_I2S0>;
+                               clock-names = "i2sclk";
+                               resets = <&sysrst K210_RST_I2S0>;
+                       };
+
+                       i2s1: i2s@50260000 {
+                               compatible = "snps,designware-i2s";
+                               reg = <0x50260000 0x200>;
+                               interrupts = <6>;
+                               clocks = <&sysclk K210_CLK_I2S1>;
+                               clock-names = "i2sclk";
+                               resets = <&sysrst K210_RST_I2S1>;
+                       };
+
+                       i2s2: i2s@50270000 {
+                               compatible = "snps,designware-i2s";
+                               reg = <0x50270000 0x200>;
+                               interrupts = <7>;
+                               clocks = <&sysclk K210_CLK_I2S2>;
+                               clock-names = "i2sclk";
+                               resets = <&sysrst K210_RST_I2S2>;
+                       };
+
+                       i2c0: i2c@50280000 {
+                               compatible = "snps,designware-i2c";
+                               reg = <0x50280000 0x100>;
+                               interrupts = <8>;
+                               clocks = <&sysclk K210_CLK_I2C0>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "ref", "pclk";
+                               resets = <&sysrst K210_RST_I2C0>;
+                       };
+
+                       i2c1: i2c@50290000 {
+                               compatible = "snps,designware-i2c";
+                               reg = <0x50290000 0x100>;
+                               interrupts = <9>;
+                               clocks = <&sysclk K210_CLK_I2C1>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "ref", "pclk";
+                               resets = <&sysrst K210_RST_I2C1>;
+                       };
+
+                       i2c2: i2c@502a0000 {
+                               compatible = "snps,designware-i2c";
+                               reg = <0x502A0000 0x100>;
+                               interrupts = <10>;
+                               clocks = <&sysclk K210_CLK_I2C2>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "ref", "pclk";
+                               resets = <&sysrst K210_RST_I2C2>;
+                       };
+
+                       fpioa: pinmux@502b0000 {
+                               compatible = "canaan,k210-fpioa";
+                               reg = <0x502B0000 0x100>;
+                               clocks = <&sysclk K210_CLK_FPIOA>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "ref", "pclk";
+                               resets = <&sysrst K210_RST_FPIOA>;
+                               canaan,k210-sysctl-power = <&sysctl 108>;
+                       };
+
+                       timer0: timer@502d0000 {
+                               compatible = "snps,dw-apb-timer";
+                               reg = <0x502D0000 0x100>;
+                               interrupts = <14 15>;
+                               clocks = <&sysclk K210_CLK_TIMER0>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "timer", "pclk";
+                               resets = <&sysrst K210_RST_TIMER0>;
+                       };
+
+                       timer1: timer@502e0000 {
+                               compatible = "snps,dw-apb-timer";
+                               reg = <0x502E0000 0x100>;
+                               interrupts = <16 17>;
+                               clocks = <&sysclk K210_CLK_TIMER1>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "timer", "pclk";
+                               resets = <&sysrst K210_RST_TIMER1>;
+                       };
+
+                       timer2: timer@502f0000 {
+                               compatible = "snps,dw-apb-timer";
+                               reg = <0x502F0000 0x100>;
+                               interrupts = <18 19>;
+                               clocks = <&sysclk K210_CLK_TIMER2>,
+                                        <&sysclk K210_CLK_APB0>;
+                               clock-names = "timer", "pclk";
+                               resets = <&sysrst K210_RST_TIMER2>;
+                       };
+               };
+
+               apb1: bus@50400000 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "simple-pm-bus";
+                       ranges;
+                       clocks = <&sysclk K210_CLK_APB1>;
+
+                       wdt0: watchdog@50400000 {
+                               compatible = "snps,dw-wdt";
+                               reg = <0x50400000 0x100>;
+                               interrupts = <21>;
+                               clocks = <&sysclk K210_CLK_WDT0>,
+                                        <&sysclk K210_CLK_APB1>;
+                               clock-names = "tclk", "pclk";
+                               resets = <&sysrst K210_RST_WDT0>;
+                       };
+
+                       wdt1: watchdog@50410000 {
+                               compatible = "snps,dw-wdt";
+                               reg = <0x50410000 0x100>;
+                               interrupts = <22>;
+                               clocks = <&sysclk K210_CLK_WDT1>,
+                                        <&sysclk K210_CLK_APB1>;
+                               clock-names = "tclk", "pclk";
+                               resets = <&sysrst K210_RST_WDT1>;
+                       };
+
+                       sysctl: syscon@50440000 {
+                               compatible = "canaan,k210-sysctl",
+                                            "syscon", "simple-mfd";
+                               reg = <0x50440000 0x100>;
+                               clocks = <&sysclk K210_CLK_APB1>;
+                               clock-names = "pclk";
+
+                               sysclk: clock-controller {
+                                       #clock-cells = <1>;
+                                       compatible = "canaan,k210-clk";
+                                       clocks = <&in0>;
+                               };
+
+                               sysrst: reset-controller {
+                                       compatible = "canaan,k210-rst";
+                                       #reset-cells = <1>;
+                               };
+
+                               reboot: syscon-reboot {
+                                       compatible = "syscon-reboot";
+                                       regmap = <&sysctl>;
+                                       offset = <48>;
+                                       mask = <1>;
+                                       value = <1>;
+                               };
+                       };
+               };
+
+               apb2: bus@52000000 {
+                       #address-cells = <1>;
+                       #size-cells = <1>;
+                       compatible = "simple-pm-bus";
+                       ranges;
+                       clocks = <&sysclk K210_CLK_APB2>;
+
+                       spi0: spi@52000000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "canaan,k210-spi";
+                               reg = <0x52000000 0x100>;
+                               interrupts = <1>;
+                               clocks = <&sysclk K210_CLK_SPI0>,
+                                        <&sysclk K210_CLK_APB2>;
+                               clock-names = "ssi_clk", "pclk";
+                               resets = <&sysrst K210_RST_SPI0>;
+                               reset-names = "spi";
+                               spi-max-frequency = <25000000>;
+                               num-cs = <4>;
+                               reg-io-width = <4>;
+                       };
+
+                       spi1: spi@53000000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "canaan,k210-spi";
+                               reg = <0x53000000 0x100>;
+                               interrupts = <2>;
+                               clocks = <&sysclk K210_CLK_SPI1>,
+                                        <&sysclk K210_CLK_APB2>;
+                               clock-names = "ssi_clk", "pclk";
+                               resets = <&sysrst K210_RST_SPI1>;
+                               reset-names = "spi";
+                               spi-max-frequency = <25000000>;
+                               num-cs = <4>;
+                               reg-io-width = <4>;
+                       };
+
+                       spi3: spi@54000000 {
+                               #address-cells = <1>;
+                               #size-cells = <0>;
+                               compatible = "snps,dwc-ssi-1.01a";
+                               reg = <0x54000000 0x200>;
+                               interrupts = <4>;
+                               clocks = <&sysclk K210_CLK_SPI3>,
+                                        <&sysclk K210_CLK_APB2>;
+                               clock-names = "ssi_clk", "pclk";
+                               resets = <&sysrst K210_RST_SPI3>;
+                               reset-names = "spi";
+                               /* Could possibly go up to 200 MHz */
+                               spi-max-frequency = <100000000>;
+                               num-cs = <4>;
+                               reg-io-width = <4>;
+                       };
+               };
+       };
+};
diff --git a/arch/riscv/boot/dts/canaan/k210_generic.dts b/arch/riscv/boot/dts/canaan/k210_generic.dts
new file mode 100644 (file)
index 0000000..396c8ca
--- /dev/null
@@ -0,0 +1,46 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+       model = "Kendryte K210 generic";
+       compatible = "canaan,kendryte-k210";
+
+       chosen {
+               bootargs = "earlycon console=ttySIF0";
+               stdout-path = "serial0:115200n8";
+       };
+};
+
+&fpioa {
+       pinctrl-0 = <&jtag_pins>;
+       pinctrl-names = "default";
+       status = "okay";
+
+       jtag_pins: jtag-pinmux {
+               pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+                        <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+                        <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+                        <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+       };
+
+       uarths_pins: uarths-pinmux {
+               pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+                        <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+       };
+};
+
+&uarths0 {
+       pinctrl-0 = <&uarths_pins>;
+       pinctrl-names = "default";
+       status = "okay";
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_bit.dts
new file mode 100644 (file)
index 0000000..0bcaf35
--- /dev/null
@@ -0,0 +1,209 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+       model = "SiPeed MAIX BiT";
+       compatible = "sipeed,maix-bit", "sipeed,maix-bitm",
+                    "canaan,kendryte-k210";
+
+       chosen {
+               bootargs = "earlycon console=ttySIF0";
+               stdout-path = "serial0:115200n8";
+       };
+
+       gpio-leds {
+               compatible = "gpio-leds";
+
+               led0 {
+                       color = <LED_COLOR_ID_GREEN>;
+                       label = "green";
+                       gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+               };
+
+               led1 {
+                       color = <LED_COLOR_ID_RED>;
+                       label = "red";
+                       gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+               };
+
+               led2 {
+                       color = <LED_COLOR_ID_BLUE>;
+                       label = "blue";
+                       gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               boot {
+                       label = "BOOT";
+                       linux,code = <BTN_0>;
+                       gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+               };
+       };
+};
+
+&fpioa {
+       pinctrl-names = "default";
+       pinctrl-0 = <&jtag_pinctrl>;
+       status = "okay";
+
+       jtag_pinctrl: jtag-pinmux {
+               pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+                        <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+                        <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+                        <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+       };
+
+       uarths_pinctrl: uarths-pinmux {
+               pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+                        <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+       };
+
+       gpio_pinctrl: gpio-pinmux {
+               pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+                        <K210_FPIOA(9, K210_PCF_GPIO1)>,
+                        <K210_FPIOA(10, K210_PCF_GPIO2)>,
+                        <K210_FPIOA(11, K210_PCF_GPIO3)>,
+                        <K210_FPIOA(12, K210_PCF_GPIO4)>,
+                        <K210_FPIOA(13, K210_PCF_GPIO5)>,
+                        <K210_FPIOA(14, K210_PCF_GPIO6)>,
+                        <K210_FPIOA(15, K210_PCF_GPIO7)>;
+       };
+
+       gpiohs_pinctrl: gpiohs-pinmux {
+               pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+                        <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+                        <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+                        <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+                        <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+                        <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+                        <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+                        <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+                        <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+                        <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+                        <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+       };
+
+       i2s0_pinctrl: i2s0-pinmux {
+               pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+                        <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+                        <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+       };
+
+       dvp_pinctrl: dvp-pinmux {
+               pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+                        <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+                        <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+                        <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+                        <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+                        <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+                        <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+                        <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+       };
+
+       spi0_pinctrl: spi0-pinmux {
+               pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>,  /* cs */
+                        <K210_FPIOA(37, K210_PCF_GPIOHS21)>,  /* rst */
+                        <K210_FPIOA(38, K210_PCF_GPIOHS22)>,  /* dc */
+                        <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+       };
+
+       spi1_pinctrl: spi1-pinmux {
+               pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+                        <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+                        <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+                        <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+       };
+
+       i2c1_pinctrl: i2c1-pinmux {
+               pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>,
+                        <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;
+       };
+};
+
+&uarths0 {
+       pinctrl-0 = <&uarths_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio0 {
+       pinctrl-0 = <&gpiohs_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio1 {
+       pinctrl-0 = <&gpio_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&i2s0 {
+       #sound-dai-cells = <1>;
+       pinctrl-0 = <&i2s0_pinctrl>;
+       pinctrl-names = "default";
+};
+
+&i2c1 {
+       pinctrl-0 = <&i2c1_pinctrl>;
+       pinctrl-names = "default";
+       clock-frequency = <400000>;
+       status = "okay";
+};
+
+&spi0 {
+       pinctrl-0 = <&spi0_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+       panel@0 {
+               compatible = "sitronix,st7789v";
+               reg = <0>;
+               reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+               dc-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+               spi-max-frequency = <15000000>;
+               spi-cs-high;
+               status = "disabled";
+       };
+};
+
+&spi1 {
+       pinctrl-0 = <&spi1_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+       status = "okay";
+
+       slot@0 {
+               compatible = "mmc-spi-slot";
+               reg = <0>;
+               voltage-ranges = <3300 3300>;
+               spi-max-frequency = <25000000>;
+               broken-cd;
+       };
+};
+
+&spi3 {
+       spi-flash@0 {
+               compatible = "jedec,spi-nor";
+               reg = <0>;
+               spi-max-frequency = <50000000>;
+               m25p,fast-read;
+               broken-flash-reset;
+       };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_dock.dts
new file mode 100644 (file)
index 0000000..ac8a03f
--- /dev/null
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+       model = "SiPeed MAIX Dock";
+       compatible = "sipeed,maix-dock-m1", "sipeed,maix-dock-m1w",
+                    "canaan,kendryte-k210";
+
+       chosen {
+               bootargs = "earlycon console=ttySIF0";
+               stdout-path = "serial0:115200n8";
+       };
+
+       gpio-leds {
+               compatible = "gpio-leds";
+
+               /*
+                * Note: the board wiring drawing documents green on
+                * gpio #4, red on gpio #5 and blue on gpio #6. However,
+                * the board is actually wired differently as defined here.
+                */
+               led0 {
+                       color = <LED_COLOR_ID_BLUE>;
+                       label = "blue";
+                       gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+               };
+
+               led1 {
+                       color = <LED_COLOR_ID_GREEN>;
+                       label = "green";
+                       gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+               };
+
+               led2 {
+                       color = <LED_COLOR_ID_RED>;
+                       label = "red";
+                       gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               boot {
+                       label = "BOOT";
+                       linux,code = <BTN_0>;
+                       gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+               };
+       };
+};
+
+&fpioa {
+       pinctrl-0 = <&jtag_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+
+       jtag_pinctrl: jtag-pinmux {
+               pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+                        <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+                        <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+                        <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+       };
+
+       uarths_pinctrl: uarths-pinmux {
+               pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+                        <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+       };
+
+       gpio_pinctrl: gpio-pinmux {
+               pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+                        <K210_FPIOA(11, K210_PCF_GPIO3)>,
+                        <K210_FPIOA(12, K210_PCF_GPIO4)>,
+                        <K210_FPIOA(13, K210_PCF_GPIO5)>,
+                        <K210_FPIOA(14, K210_PCF_GPIO6)>,
+                        <K210_FPIOA(15, K210_PCF_GPIO7)>;
+       };
+
+       gpiohs_pinctrl: gpiohs-pinmux {
+               pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+                        <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+                        <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+                        <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+                        <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+                        <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+                        <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+                        <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+                        <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+                        <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+                        <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+       };
+
+       i2s0_pinctrl: i2s0-pinmux {
+               pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+                        <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+                        <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+       };
+
+       dvp_pinctrl: dvp-pinmux {
+               pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+                        <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+                        <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+                        <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+                        <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+                        <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+                        <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+                        <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+       };
+
+       spi0_pinctrl: spi0-pinmux {
+               pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>,  /* cs */
+                        <K210_FPIOA(37, K210_PCF_GPIOHS21)>,  /* rst */
+                        <K210_FPIOA(38, K210_PCF_GPIOHS22)>,  /* dc */
+                        <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+       };
+
+       spi1_pinctrl: spi1-pinmux {
+               pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+                        <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+                        <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+                        <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+       };
+
+       i2c1_pinctrl: i2c1-pinmux {
+               pinmux = <K210_FPIOA(9, K210_PCF_I2C1_SCLK)>,
+                        <K210_FPIOA(10, K210_PCF_I2C1_SDA)>;
+       };
+};
+
+&uarths0 {
+       pinctrl-0 = <&uarths_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio0 {
+       pinctrl-0 = <&gpiohs_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio1 {
+       pinctrl-0 = <&gpio_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&i2s0 {
+       #sound-dai-cells = <1>;
+       pinctrl-0 = <&i2s0_pinctrl>;
+       pinctrl-names = "default";
+};
+
+&i2c1 {
+       pinctrl-0 = <&i2c1_pinctrl>;
+       pinctrl-names = "default";
+       clock-frequency = <400000>;
+       status = "okay";
+};
+
+&spi0 {
+       pinctrl-0 = <&spi0_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+       panel@0 {
+               compatible = "sitronix,st7789v";
+               reg = <0>;
+               reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+               dc-gpios = <&gpio0 22 0>;
+               spi-max-frequency = <15000000>;
+               status = "disabled";
+       };
+};
+
+&spi1 {
+       pinctrl-0 = <&spi1_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+       status = "okay";
+
+       slot@0 {
+               compatible = "mmc-spi-slot";
+               reg = <0>;
+               voltage-ranges = <3300 3300>;
+               spi-max-frequency = <25000000>;
+               broken-cd;
+       };
+};
+
+&spi3 {
+       spi-flash@0 {
+               compatible = "jedec,spi-nor";
+               reg = <0>;
+               spi-max-frequency = <50000000>;
+               m25p,fast-read;
+               broken-flash-reset;
+       };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts b/arch/riscv/boot/dts/canaan/sipeed_maix_go.dts
new file mode 100644 (file)
index 0000000..6239981
--- /dev/null
@@ -0,0 +1,219 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+#include <dt-bindings/leds/common.h>
+
+/ {
+       model = "SiPeed MAIX GO";
+       compatible = "sipeed,maix-go", "canaan,kendryte-k210";
+
+       chosen {
+               bootargs = "earlycon console=ttySIF0";
+               stdout-path = "serial0:115200n8";
+       };
+
+       gpio-leds {
+               compatible = "gpio-leds";
+
+               led0 {
+                       color = <LED_COLOR_ID_GREEN>;
+                       label = "green";
+                       gpios = <&gpio1_0 4 GPIO_ACTIVE_LOW>;
+               };
+
+               led1 {
+                       color = <LED_COLOR_ID_RED>;
+                       label = "red";
+                       gpios = <&gpio1_0 5 GPIO_ACTIVE_LOW>;
+               };
+
+               led2 {
+                       color = <LED_COLOR_ID_BLUE>;
+                       label = "blue";
+                       gpios = <&gpio1_0 6 GPIO_ACTIVE_LOW>;
+               };
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               up {
+                       label = "UP";
+                       linux,code = <BTN_1>;
+                       gpios = <&gpio1_0 7 GPIO_ACTIVE_LOW>;
+               };
+
+               press {
+                       label = "PRESS";
+                       linux,code = <BTN_0>;
+                       gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+               };
+
+               down {
+                       label = "DOWN";
+                       linux,code = <BTN_2>;
+                       gpios = <&gpio0 1 GPIO_ACTIVE_LOW>;
+               };
+       };
+};
+
+&fpioa {
+       pinctrl-0 = <&jtag_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+
+       jtag_pinctrl: jtag-pinmux {
+               pinmux = <K210_FPIOA(0, K210_PCF_JTAG_TCLK)>,
+                        <K210_FPIOA(1, K210_PCF_JTAG_TDI)>,
+                        <K210_FPIOA(2, K210_PCF_JTAG_TMS)>,
+                        <K210_FPIOA(3, K210_PCF_JTAG_TDO)>;
+       };
+
+       uarths_pinctrl: uarths-pinmux {
+               pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>,
+                        <K210_FPIOA(5, K210_PCF_UARTHS_TX)>;
+       };
+
+       gpio_pinctrl: gpio-pinmux {
+               pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+                        <K210_FPIOA(9, K210_PCF_GPIO1)>,
+                        <K210_FPIOA(10, K210_PCF_GPIO2)>,
+                        <K210_FPIOA(11, K210_PCF_GPIO3)>,
+                        <K210_FPIOA(12, K210_PCF_GPIO4)>,
+                        <K210_FPIOA(13, K210_PCF_GPIO5)>,
+                        <K210_FPIOA(14, K210_PCF_GPIO6)>,
+                        <K210_FPIOA(15, K210_PCF_GPIO7)>;
+       };
+
+       gpiohs_pinctrl: gpiohs-pinmux {
+               pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,
+                        <K210_FPIOA(17, K210_PCF_GPIOHS1)>,
+                        <K210_FPIOA(21, K210_PCF_GPIOHS5)>,
+                        <K210_FPIOA(22, K210_PCF_GPIOHS6)>,
+                        <K210_FPIOA(23, K210_PCF_GPIOHS7)>,
+                        <K210_FPIOA(24, K210_PCF_GPIOHS8)>,
+                        <K210_FPIOA(25, K210_PCF_GPIOHS9)>,
+                        <K210_FPIOA(32, K210_PCF_GPIOHS16)>,
+                        <K210_FPIOA(33, K210_PCF_GPIOHS17)>,
+                        <K210_FPIOA(34, K210_PCF_GPIOHS18)>,
+                        <K210_FPIOA(35, K210_PCF_GPIOHS19)>;
+       };
+
+       i2s0_pinctrl: i2s0-pinmux {
+               pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+                        <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+                        <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+       };
+
+       dvp_pinctrl: dvp-pinmux {
+               pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+                        <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+                        <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+                        <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+                        <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+                        <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+                        <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+                        <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+       };
+
+       spi0_pinctrl: spi0-pinmux {
+               pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>,  /* cs */
+                        <K210_FPIOA(37, K210_PCF_GPIOHS21)>,  /* rst */
+                        <K210_FPIOA(38, K210_PCF_GPIOHS22)>,  /* dc */
+                        <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+       };
+
+       spi1_pinctrl: spi1-pinmux {
+               pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+                        <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+                        <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+                        <K210_FPIOA(29, K210_PCF_GPIOHS13)>; /* cs */
+       };
+
+       i2c1_pinctrl: i2c1-pinmux {
+               pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>,
+                        <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;
+       };
+};
+
+&uarths0 {
+       pinctrl-0 = <&uarths_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio0 {
+       pinctrl-0 = <&gpiohs_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio1 {
+       pinctrl-0 = <&gpio_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&i2s0 {
+       #sound-dai-cells = <1>;
+       pinctrl-0 = <&i2s0_pinctrl>;
+       pinctrl-names = "default";
+};
+
+&i2c1 {
+       pinctrl-0 = <&i2c1_pinctrl>;
+       pinctrl-names = "default";
+       clock-frequency = <400000>;
+       status = "okay";
+};
+
+&spi0 {
+       pinctrl-0 = <&spi0_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+       panel@0 {
+               compatible = "sitronix,st7789v";
+               reg = <0>;
+               reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+               dc-gpios = <&gpio0 22 GPIO_ACTIVE_HIGH>;
+               spi-max-frequency = <15000000>;
+               status = "disabled";
+       };
+};
+
+&spi1 {
+       pinctrl-0 = <&spi1_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 13 GPIO_ACTIVE_LOW>;
+       status = "okay";
+
+       slot@0 {
+               compatible = "mmc-spi-slot";
+               reg = <0>;
+               voltage-ranges = <3300 3300>;
+               spi-max-frequency = <25000000>;
+               broken-cd;
+       };
+};
+
+&spi3 {
+       spi-flash@0 {
+               compatible = "jedec,spi-nor";
+               reg = <0>;
+               spi-max-frequency = <50000000>;
+               m25p,fast-read;
+               broken-flash-reset;
+       };
+};
diff --git a/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts b/arch/riscv/boot/dts/canaan/sipeed_maixduino.dts
new file mode 100644 (file)
index 0000000..cf605ba
--- /dev/null
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (C) 2020 Western Digital Corporation or its affiliates.
+ */
+
+/dts-v1/;
+
+#include "k210.dtsi"
+
+#include <dt-bindings/gpio/gpio.h>
+#include <dt-bindings/input/input.h>
+
+/ {
+       model = "SiPeed MAIXDUINO";
+       compatible = "sipeed,maixduino", "canaan,kendryte-k210";
+
+       chosen {
+               bootargs = "earlycon console=ttySIF0";
+               stdout-path = "serial0:115200n8";
+       };
+
+       gpio-keys {
+               compatible = "gpio-keys";
+
+               boot {
+                       label = "BOOT";
+                       linux,code = <BTN_0>;
+                       gpios = <&gpio0 0 GPIO_ACTIVE_LOW>;
+               };
+       };
+
+       vcc_3v3: regulator-3v3 {
+               compatible = "regulator-fixed";
+               regulator-name = "3v3";
+               regulator-min-microvolt = <3300000>;
+               regulator-max-microvolt = <3300000>;
+       };
+};
+
+&fpioa {
+       status = "okay";
+
+       uarths_pinctrl: uarths-pinmux {
+               pinmux = <K210_FPIOA(4, K210_PCF_UARTHS_RX)>, /* Header "0" */
+                        <K210_FPIOA(5, K210_PCF_UARTHS_TX)>; /* Header "1" */
+       };
+
+       gpio_pinctrl: gpio-pinmux {
+               pinmux = <K210_FPIOA(8, K210_PCF_GPIO0)>,
+                        <K210_FPIOA(9, K210_PCF_GPIO1)>;
+       };
+
+       gpiohs_pinctrl: gpiohs-pinmux {
+               pinmux = <K210_FPIOA(16, K210_PCF_GPIOHS0)>,  /* BOOT */
+                        <K210_FPIOA(21, K210_PCF_GPIOHS2)>,  /* Header "2" */
+                        <K210_FPIOA(22, K210_PCF_GPIOHS3)>,  /* Header "3" */
+                        <K210_FPIOA(23, K210_PCF_GPIOHS4)>,  /* Header "4" */
+                        <K210_FPIOA(24, K210_PCF_GPIOHS5)>,  /* Header "5" */
+                        <K210_FPIOA(32, K210_PCF_GPIOHS6)>,  /* Header "6" */
+                        <K210_FPIOA(15, K210_PCF_GPIOHS7)>,  /* Header "7" */
+                        <K210_FPIOA(14, K210_PCF_GPIOHS8)>,  /* Header "8" */
+                        <K210_FPIOA(13, K210_PCF_GPIOHS9)>,  /* Header "9" */
+                        <K210_FPIOA(12, K210_PCF_GPIOHS10)>, /* Header "10" */
+                        <K210_FPIOA(11, K210_PCF_GPIOHS11)>, /* Header "11" */
+                        <K210_FPIOA(10, K210_PCF_GPIOHS12)>, /* Header "12" */
+                        <K210_FPIOA(3,  K210_PCF_GPIOHS13)>; /* Header "13" */
+       };
+
+       i2s0_pinctrl: i2s0-pinmux {
+               pinmux = <K210_FPIOA(18, K210_PCF_I2S0_SCLK)>,
+                        <K210_FPIOA(19, K210_PCF_I2S0_WS)>,
+                        <K210_FPIOA(20, K210_PCF_I2S0_IN_D0)>;
+       };
+
+       spi1_pinctrl: spi1-pinmux {
+               pinmux = <K210_FPIOA(26, K210_PCF_SPI1_D1)>,
+                        <K210_FPIOA(27, K210_PCF_SPI1_SCLK)>,
+                        <K210_FPIOA(28, K210_PCF_SPI1_D0)>,
+                        <K210_FPIOA(29, K210_PCF_GPIO2)>; /* cs */
+       };
+
+       i2c1_pinctrl: i2c1-pinmux {
+               pinmux = <K210_FPIOA(30, K210_PCF_I2C1_SCLK)>, /* Header "scl" */
+                        <K210_FPIOA(31, K210_PCF_I2C1_SDA)>;  /* Header "sda" */
+       };
+
+       i2s1_pinctrl: i2s1-pinmux {
+               pinmux = <K210_FPIOA(33, K210_PCF_I2S1_WS)>,
+                        <K210_FPIOA(34, K210_PCF_I2S1_IN_D0)>,
+                        <K210_FPIOA(35, K210_PCF_I2S1_SCLK)>;
+       };
+
+       spi0_pinctrl: spi0-pinmux {
+               pinmux = <K210_FPIOA(36, K210_PCF_GPIOHS20)>,  /* cs */
+                        <K210_FPIOA(37, K210_PCF_GPIOHS21)>,  /* rst */
+                        <K210_FPIOA(38, K210_PCF_GPIOHS22)>,  /* dc */
+                        <K210_FPIOA(39, K210_PCF_SPI0_SCLK)>; /* wr */
+       };
+
+       dvp_pinctrl: dvp-pinmux {
+               pinmux = <K210_FPIOA(40, K210_PCF_SCCB_SDA)>,
+                        <K210_FPIOA(41, K210_PCF_SCCB_SCLK)>,
+                        <K210_FPIOA(42, K210_PCF_DVP_RST)>,
+                        <K210_FPIOA(43, K210_PCF_DVP_VSYNC)>,
+                        <K210_FPIOA(44, K210_PCF_DVP_PWDN)>,
+                        <K210_FPIOA(45, K210_PCF_DVP_HSYNC)>,
+                        <K210_FPIOA(46, K210_PCF_DVP_XCLK)>,
+                        <K210_FPIOA(47, K210_PCF_DVP_PCLK)>;
+       };
+};
+
+&uarths0 {
+       pinctrl-0 = <&uarths_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio0 {
+       pinctrl-0 = <&gpiohs_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&gpio1 {
+       pinctrl-0 = <&gpio_pinctrl>;
+       pinctrl-names = "default";
+       status = "okay";
+};
+
+&i2s0 {
+       #sound-dai-cells = <1>;
+       pinctrl-0 = <&i2s0_pinctrl>;
+       pinctrl-names = "default";
+};
+
+&i2c1 {
+       pinctrl-0 = <&i2c1_pinctrl>;
+       pinctrl-names = "default";
+       clock-frequency = <400000>;
+       status = "okay";
+};
+
+&spi0 {
+       pinctrl-0 = <&spi0_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio0 20 GPIO_ACTIVE_HIGH>;
+
+       panel@0 {
+               compatible = "sitronix,st7789v";
+               reg = <0>;
+               reset-gpios = <&gpio0 21 GPIO_ACTIVE_LOW>;
+               dc-gpios = <&gpio0 22 0>;
+               spi-max-frequency = <15000000>;
+               power-supply = <&vcc_3v3>;
+       };
+};
+
+&spi1 {
+       pinctrl-0 = <&spi1_pinctrl>;
+       pinctrl-names = "default";
+       num-cs = <1>;
+       cs-gpios = <&gpio1_0 2 GPIO_ACTIVE_LOW>;
+       status = "okay";
+
+       slot@0 {
+               compatible = "mmc-spi-slot";
+               reg = <0>;
+               voltage-ranges = <3300 3300>;
+               spi-max-frequency = <25000000>;
+               broken-cd;
+       };
+};
+
+&spi3 {
+       spi-flash@0 {
+               compatible = "jedec,spi-nor";
+               reg = <0>;
+               spi-max-frequency = <50000000>;
+               m25p,fast-read;
+               broken-flash-reset;
+       };
+};
diff --git a/arch/riscv/boot/dts/kendryte/Makefile b/arch/riscv/boot/dts/kendryte/Makefile
deleted file mode 100644 (file)
index 1a88e61..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_SOC_KENDRYTE_K210_DTB) += k210.dtb
-
-obj-$(CONFIG_SOC_KENDRYTE_K210_DTB_BUILTIN) += $(addsuffix .o, $(dtb-y))
diff --git a/arch/riscv/boot/dts/kendryte/k210.dts b/arch/riscv/boot/dts/kendryte/k210.dts
deleted file mode 100644 (file)
index 0d1f28f..0000000
+++ /dev/null
@@ -1,23 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
- */
-
-/dts-v1/;
-
-#include "k210.dtsi"
-
-/ {
-       model = "Kendryte K210 generic";
-       compatible = "kendryte,k210";
-
-       chosen {
-               bootargs = "earlycon console=ttySIF0";
-               stdout-path = "serial0";
-       };
-};
-
-&uarths0 {
-       status = "okay";
-};
-
diff --git a/arch/riscv/boot/dts/kendryte/k210.dtsi b/arch/riscv/boot/dts/kendryte/k210.dtsi
deleted file mode 100644 (file)
index d2d0ff6..0000000
+++ /dev/null
@@ -1,125 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Copyright (C) 2019 Sean Anderson <seanga2@gmail.com>
- * Copyright (C) 2020 Western Digital Corporation or its affiliates.
- */
-#include <dt-bindings/clock/k210-clk.h>
-
-/ {
-       /*
-        * Although the K210 is a 64-bit CPU, the address bus is only 32-bits
-        * wide, and the upper half of all addresses is ignored.
-        */
-       #address-cells = <1>;
-       #size-cells = <1>;
-       compatible = "kendryte,k210";
-
-       aliases {
-               serial0 = &uarths0;
-       };
-
-       /*
-        * The K210 has an sv39 MMU following the priviledge specification v1.9.
-        * Since this is a non-ratified draft specification, the kernel does not
-        * support it and the K210 support enabled only for the !MMU case.
-        * Be consistent with this by setting the CPUs MMU type to "none".
-        */
-       cpus {
-               #address-cells = <1>;
-               #size-cells = <0>;
-               timebase-frequency = <7800000>;
-               cpu0: cpu@0 {
-                       device_type = "cpu";
-                       reg = <0>;
-                       compatible = "kendryte,k210", "sifive,rocket0", "riscv";
-                       riscv,isa = "rv64imafdc";
-                       mmu-type = "none";
-                       i-cache-size = <0x8000>;
-                       i-cache-block-size = <64>;
-                       d-cache-size = <0x8000>;
-                       d-cache-block-size = <64>;
-                       clocks = <&sysctl K210_CLK_CPU>;
-                       clock-frequency = <390000000>;
-                       cpu0_intc: interrupt-controller {
-                               #interrupt-cells = <1>;
-                               interrupt-controller;
-                               compatible = "riscv,cpu-intc";
-                       };
-               };
-               cpu1: cpu@1 {
-                       device_type = "cpu";
-                       reg = <1>;
-                       compatible = "kendryte,k210", "sifive,rocket0", "riscv";
-                       riscv,isa = "rv64imafdc";
-                       mmu-type = "none";
-                       i-cache-size = <0x8000>;
-                       i-cache-block-size = <64>;
-                       d-cache-size = <0x8000>;
-                       d-cache-block-size = <64>;
-                       clocks = <&sysctl K210_CLK_CPU>;
-                       clock-frequency = <390000000>;
-                       cpu1_intc: interrupt-controller {
-                               #interrupt-cells = <1>;
-                               interrupt-controller;
-                               compatible = "riscv,cpu-intc";
-                       };
-               };
-       };
-
-       sram: memory@80000000 {
-               device_type = "memory";
-               reg = <0x80000000 0x400000>,
-                     <0x80400000 0x200000>,
-                     <0x80600000 0x200000>;
-               reg-names = "sram0", "sram1", "aisram";
-       };
-
-       clocks {
-               in0: oscillator {
-                       compatible = "fixed-clock";
-                       #clock-cells = <0>;
-                       clock-frequency = <26000000>;
-               };
-       };
-
-       soc {
-               #address-cells = <1>;
-               #size-cells = <1>;
-               compatible = "kendryte,k210-soc", "simple-bus";
-               ranges;
-               interrupt-parent = <&plic0>;
-
-               sysctl: sysctl@50440000 {
-                       compatible = "kendryte,k210-sysctl", "simple-mfd";
-                       reg = <0x50440000 0x1000>;
-                       #clock-cells = <1>;
-               };
-
-               clint0: clint@2000000 {
-                       #interrupt-cells = <1>;
-                       compatible = "riscv,clint0";
-                       reg = <0x2000000 0xC000>;
-                       interrupts-extended =  <&cpu0_intc 3 &cpu0_intc 7
-                                               &cpu1_intc 3 &cpu1_intc 7>;
-                       clocks = <&sysctl K210_CLK_ACLK>;
-               };
-
-               plic0: interrupt-controller@c000000 {
-                       #interrupt-cells = <1>;
-                       interrupt-controller;
-                       compatible = "kendryte,k210-plic0", "riscv,plic0";
-                       reg = <0xC000000 0x4000000>;
-                       interrupts-extended = <&cpu0_intc 11>, <&cpu0_intc 0xffffffff>,
-                                             <&cpu1_intc 11>, <&cpu1_intc 0xffffffff>;
-                       riscv,ndev = <65>;
-                       riscv,max-priority = <7>;
-               };
-
-               uarths0: serial@38000000 {
-                       compatible = "kendryte,k210-uarths", "sifive,uart0";
-                       reg = <0x38000000 0x1000>;
-                       interrupts = <33>;
-                       clocks = <&sysctl K210_CLK_CPU>;
-               };
-       };
-};
index 6d6189e..74c47fe 100644 (file)
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0
-dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb
+dtb-$(CONFIG_SOC_SIFIVE) += hifive-unleashed-a00.dtb \
+                           hifive-unmatched-a00.dtb
diff --git a/arch/riscv/boot/dts/sifive/fu740-c000.dtsi b/arch/riscv/boot/dts/sifive/fu740-c000.dtsi
new file mode 100644 (file)
index 0000000..eeb4f8c
--- /dev/null
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 SiFive, Inc */
+
+/dts-v1/;
+
+#include <dt-bindings/clock/sifive-fu740-prci.h>
+
+/ {
+       #address-cells = <2>;
+       #size-cells = <2>;
+       compatible = "sifive,fu740-c000", "sifive,fu740";
+
+       aliases {
+               serial0 = &uart0;
+               serial1 = &uart1;
+               ethernet0 = &eth0;
+       };
+
+       chosen {
+       };
+
+       cpus {
+               #address-cells = <1>;
+               #size-cells = <0>;
+               cpu0: cpu@0 {
+                       compatible = "sifive,bullet0", "riscv";
+                       device_type = "cpu";
+                       i-cache-block-size = <64>;
+                       i-cache-sets = <128>;
+                       i-cache-size = <16384>;
+                       next-level-cache = <&ccache>;
+                       reg = <0x0>;
+                       riscv,isa = "rv64imac";
+                       status = "disabled";
+                       cpu0_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               compatible = "riscv,cpu-intc";
+                               interrupt-controller;
+                       };
+               };
+               cpu1: cpu@1 {
+                       compatible = "sifive,bullet0", "riscv";
+                       d-cache-block-size = <64>;
+                       d-cache-sets = <64>;
+                       d-cache-size = <32768>;
+                       d-tlb-sets = <1>;
+                       d-tlb-size = <40>;
+                       device_type = "cpu";
+                       i-cache-block-size = <64>;
+                       i-cache-sets = <128>;
+                       i-cache-size = <32768>;
+                       i-tlb-sets = <1>;
+                       i-tlb-size = <40>;
+                       mmu-type = "riscv,sv39";
+                       next-level-cache = <&ccache>;
+                       reg = <0x1>;
+                       riscv,isa = "rv64imafdc";
+                       tlb-split;
+                       cpu1_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               compatible = "riscv,cpu-intc";
+                               interrupt-controller;
+                       };
+               };
+               cpu2: cpu@2 {
+                       compatible = "sifive,bullet0", "riscv";
+                       d-cache-block-size = <64>;
+                       d-cache-sets = <64>;
+                       d-cache-size = <32768>;
+                       d-tlb-sets = <1>;
+                       d-tlb-size = <40>;
+                       device_type = "cpu";
+                       i-cache-block-size = <64>;
+                       i-cache-sets = <128>;
+                       i-cache-size = <32768>;
+                       i-tlb-sets = <1>;
+                       i-tlb-size = <40>;
+                       mmu-type = "riscv,sv39";
+                       next-level-cache = <&ccache>;
+                       reg = <0x2>;
+                       riscv,isa = "rv64imafdc";
+                       tlb-split;
+                       cpu2_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               compatible = "riscv,cpu-intc";
+                               interrupt-controller;
+                       };
+               };
+               cpu3: cpu@3 {
+                       compatible = "sifive,bullet0", "riscv";
+                       d-cache-block-size = <64>;
+                       d-cache-sets = <64>;
+                       d-cache-size = <32768>;
+                       d-tlb-sets = <1>;
+                       d-tlb-size = <40>;
+                       device_type = "cpu";
+                       i-cache-block-size = <64>;
+                       i-cache-sets = <128>;
+                       i-cache-size = <32768>;
+                       i-tlb-sets = <1>;
+                       i-tlb-size = <40>;
+                       mmu-type = "riscv,sv39";
+                       next-level-cache = <&ccache>;
+                       reg = <0x3>;
+                       riscv,isa = "rv64imafdc";
+                       tlb-split;
+                       cpu3_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               compatible = "riscv,cpu-intc";
+                               interrupt-controller;
+                       };
+               };
+               cpu4: cpu@4 {
+                       compatible = "sifive,bullet0", "riscv";
+                       d-cache-block-size = <64>;
+                       d-cache-sets = <64>;
+                       d-cache-size = <32768>;
+                       d-tlb-sets = <1>;
+                       d-tlb-size = <40>;
+                       device_type = "cpu";
+                       i-cache-block-size = <64>;
+                       i-cache-sets = <128>;
+                       i-cache-size = <32768>;
+                       i-tlb-sets = <1>;
+                       i-tlb-size = <40>;
+                       mmu-type = "riscv,sv39";
+                       next-level-cache = <&ccache>;
+                       reg = <0x4>;
+                       riscv,isa = "rv64imafdc";
+                       tlb-split;
+                       cpu4_intc: interrupt-controller {
+                               #interrupt-cells = <1>;
+                               compatible = "riscv,cpu-intc";
+                               interrupt-controller;
+                       };
+               };
+       };
+       soc {
+               #address-cells = <2>;
+               #size-cells = <2>;
+               compatible = "simple-bus";
+               ranges;
+               plic0: interrupt-controller@c000000 {
+                       #interrupt-cells = <1>;
+                       #address-cells = <0>;
+                       compatible = "sifive,fu540-c000-plic", "sifive,plic-1.0.0";
+                       reg = <0x0 0xc000000 0x0 0x4000000>;
+                       riscv,ndev = <69>;
+                       interrupt-controller;
+                       interrupts-extended = <
+                               &cpu0_intc 0xffffffff
+                               &cpu1_intc 0xffffffff &cpu1_intc 9
+                               &cpu2_intc 0xffffffff &cpu2_intc 9
+                               &cpu3_intc 0xffffffff &cpu3_intc 9
+                               &cpu4_intc 0xffffffff &cpu4_intc 9>;
+               };
+               prci: clock-controller@10000000 {
+                       compatible = "sifive,fu740-c000-prci";
+                       reg = <0x0 0x10000000 0x0 0x1000>;
+                       clocks = <&hfclk>, <&rtcclk>;
+                       #clock-cells = <1>;
+               };
+               uart0: serial@10010000 {
+                       compatible = "sifive,fu740-c000-uart", "sifive,uart0";
+                       reg = <0x0 0x10010000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <39>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       status = "disabled";
+               };
+               uart1: serial@10011000 {
+                       compatible = "sifive,fu740-c000-uart", "sifive,uart0";
+                       reg = <0x0 0x10011000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <40>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       status = "disabled";
+               };
+               i2c0: i2c@10030000 {
+                       compatible = "sifive,fu740-c000-i2c", "sifive,i2c0";
+                       reg = <0x0 0x10030000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <52>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       reg-shift = <2>;
+                       reg-io-width = <1>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+               i2c1: i2c@10031000 {
+                       compatible = "sifive,fu740-c000-i2c", "sifive,i2c0";
+                       reg = <0x0 0x10031000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <53>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       reg-shift = <2>;
+                       reg-io-width = <1>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+               qspi0: spi@10040000 {
+                       compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+                       reg = <0x0 0x10040000 0x0 0x1000>,
+                             <0x0 0x20000000 0x0 0x10000000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <41>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+               qspi1: spi@10041000 {
+                       compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+                       reg = <0x0 0x10041000 0x0 0x1000>,
+                             <0x0 0x30000000 0x0 0x10000000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <42>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+               spi0: spi@10050000 {
+                       compatible = "sifive,fu740-c000-spi", "sifive,spi0";
+                       reg = <0x0 0x10050000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <43>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+               eth0: ethernet@10090000 {
+                       compatible = "sifive,fu540-c000-gem";
+                       interrupt-parent = <&plic0>;
+                       interrupts = <55>;
+                       reg = <0x0 0x10090000 0x0 0x2000>,
+                             <0x0 0x100a0000 0x0 0x1000>;
+                       local-mac-address = [00 00 00 00 00 00];
+                       clock-names = "pclk", "hclk";
+                       clocks = <&prci PRCI_CLK_GEMGXLPLL>,
+                                <&prci PRCI_CLK_GEMGXLPLL>;
+                       #address-cells = <1>;
+                       #size-cells = <0>;
+                       status = "disabled";
+               };
+               pwm0: pwm@10020000 {
+                       compatible = "sifive,fu740-c000-pwm", "sifive,pwm0";
+                       reg = <0x0 0x10020000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <44>, <45>, <46>, <47>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       #pwm-cells = <3>;
+                       status = "disabled";
+               };
+               pwm1: pwm@10021000 {
+                       compatible = "sifive,fu740-c000-pwm", "sifive,pwm0";
+                       reg = <0x0 0x10021000 0x0 0x1000>;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <48>, <49>, <50>, <51>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       #pwm-cells = <3>;
+                       status = "disabled";
+               };
+               ccache: cache-controller@2010000 {
+                       compatible = "sifive,fu740-c000-ccache", "cache";
+                       cache-block-size = <64>;
+                       cache-level = <2>;
+                       cache-sets = <2048>;
+                       cache-size = <2097152>;
+                       cache-unified;
+                       interrupt-parent = <&plic0>;
+                       interrupts = <19 20 21 22>;
+                       reg = <0x0 0x2010000 0x0 0x1000>;
+               };
+               gpio: gpio@10060000 {
+                       compatible = "sifive,fu740-c000-gpio", "sifive,gpio0";
+                       interrupt-parent = <&plic0>;
+                       interrupts = <23>, <24>, <25>, <26>, <27>, <28>, <29>,
+                                    <30>, <31>, <32>, <33>, <34>, <35>, <36>,
+                                    <37>, <38>;
+                       reg = <0x0 0x10060000 0x0 0x1000>;
+                       gpio-controller;
+                       #gpio-cells = <2>;
+                       interrupt-controller;
+                       #interrupt-cells = <2>;
+                       clocks = <&prci PRCI_CLK_PCLK>;
+                       status = "disabled";
+               };
+       };
+};
diff --git a/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts b/arch/riscv/boot/dts/sifive/hifive-unmatched-a00.dts
new file mode 100644 (file)
index 0000000..b1c3c59
--- /dev/null
@@ -0,0 +1,253 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Copyright (c) 2020 SiFive, Inc */
+
+#include "fu740-c000.dtsi"
+#include <dt-bindings/interrupt-controller/irq.h>
+
+/* Clock frequency (in Hz) of the PCB crystal for rtcclk */
+#define RTCCLK_FREQ            1000000
+
+/ {
+       #address-cells = <2>;
+       #size-cells = <2>;
+       model = "SiFive HiFive Unmatched A00";
+       compatible = "sifive,hifive-unmatched-a00", "sifive,fu740-c000",
+                    "sifive,fu740";
+
+       chosen {
+               stdout-path = "serial0";
+       };
+
+       cpus {
+               timebase-frequency = <RTCCLK_FREQ>;
+       };
+
+       memory@80000000 {
+               device_type = "memory";
+               reg = <0x0 0x80000000 0x2 0x00000000>;
+       };
+
+       soc {
+       };
+
+       hfclk: hfclk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <26000000>;
+               clock-output-names = "hfclk";
+       };
+
+       rtcclk: rtcclk {
+               #clock-cells = <0>;
+               compatible = "fixed-clock";
+               clock-frequency = <RTCCLK_FREQ>;
+               clock-output-names = "rtcclk";
+       };
+};
+
+&uart0 {
+       status = "okay";
+};
+
+&uart1 {
+       status = "okay";
+};
+
+&i2c0 {
+       status = "okay";
+
+       temperature-sensor@4c {
+               compatible = "ti,tmp451";
+               reg = <0x4c>;
+               interrupt-parent = <&gpio>;
+               interrupts = <6 IRQ_TYPE_LEVEL_LOW>;
+       };
+
+       pmic@58 {
+               compatible = "dlg,da9063";
+               reg = <0x58>;
+               interrupt-parent = <&gpio>;
+               interrupts = <1 IRQ_TYPE_LEVEL_LOW>;
+               interrupt-controller;
+
+               regulators {
+                       vdd_bcore1: bcore1 {
+                               regulator-min-microvolt = <900000>;
+                               regulator-max-microvolt = <900000>;
+                               regulator-min-microamp = <5000000>;
+                               regulator-max-microamp = <5000000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_bcore2: bcore2 {
+                               regulator-min-microvolt = <900000>;
+                               regulator-max-microvolt = <900000>;
+                               regulator-min-microamp = <5000000>;
+                               regulator-max-microamp = <5000000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_bpro: bpro {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <2500000>;
+                               regulator-max-microamp = <2500000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_bperi: bperi {
+                               regulator-min-microvolt = <1050000>;
+                               regulator-max-microvolt = <1050000>;
+                               regulator-min-microamp = <1500000>;
+                               regulator-max-microamp = <1500000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_bmem: bmem {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <1200000>;
+                               regulator-min-microamp = <3000000>;
+                               regulator-max-microamp = <3000000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_bio: bio {
+                               regulator-min-microvolt = <1200000>;
+                               regulator-max-microvolt = <1200000>;
+                               regulator-min-microamp = <3000000>;
+                               regulator-max-microamp = <3000000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo1: ldo1 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <100000>;
+                               regulator-max-microamp = <100000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo2: ldo2 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo3: ldo3 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo4: ldo4 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo5: ldo5 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <100000>;
+                               regulator-max-microamp = <100000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo6: ldo6 {
+                               regulator-min-microvolt = <3300000>;
+                               regulator-max-microvolt = <3300000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo7: ldo7 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ldo8: ldo8 {
+                               regulator-min-microvolt = <1800000>;
+                               regulator-max-microvolt = <1800000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                               regulator-always-on;
+                       };
+
+                       vdd_ld09: ldo9 {
+                               regulator-min-microvolt = <1050000>;
+                               regulator-max-microvolt = <1050000>;
+                               regulator-min-microamp = <200000>;
+                               regulator-max-microamp = <200000>;
+                       };
+
+                       vdd_ldo10: ldo10 {
+                               regulator-min-microvolt = <1000000>;
+                               regulator-max-microvolt = <1000000>;
+                               regulator-min-microamp = <300000>;
+                               regulator-max-microamp = <300000>;
+                       };
+
+                       vdd_ldo11: ldo11 {
+                               regulator-min-microvolt = <2500000>;
+                               regulator-max-microvolt = <2500000>;
+                               regulator-min-microamp = <300000>;
+                               regulator-max-microamp = <300000>;
+                               regulator-always-on;
+                       };
+               };
+       };
+};
+
+&qspi0 {
+       status = "okay";
+       flash@0 {
+               compatible = "issi,is25wp256", "jedec,spi-nor";
+               reg = <0>;
+               spi-max-frequency = <50000000>;
+               m25p,fast-read;
+               spi-tx-bus-width = <4>;
+               spi-rx-bus-width = <4>;
+       };
+};
+
+&spi0 {
+       status = "okay";
+       mmc@0 {
+               compatible = "mmc-spi-slot";
+               reg = <0>;
+               spi-max-frequency = <20000000>;
+               voltage-ranges = <3300 3300>;
+               disable-wp;
+       };
+};
+
+&eth0 {
+       status = "okay";
+       phy-mode = "gmii";
+       phy-handle = <&phy0>;
+       phy0: ethernet-phy@0 {
+               reg = <0>;
+       };
+};
+
+&pwm0 {
+       status = "okay";
+};
+
+&pwm1 {
+       status = "okay";
+};
+
+&gpio {
+       status = "okay";
+};
index 8c3d1e4..6c0625a 100644 (file)
@@ -17,6 +17,7 @@ CONFIG_BPF_SYSCALL=y
 CONFIG_SOC_SIFIVE=y
 CONFIG_SOC_VIRT=y
 CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
index cd1df62..b16a2a1 100644 (file)
@@ -1,17 +1,19 @@
 # CONFIG_CPU_ISOLATION is not set
-CONFIG_LOG_BUF_SHIFT=15
+CONFIG_LOG_BUF_SHIFT=13
 CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
 CONFIG_BLK_DEV_INITRD=y
-CONFIG_INITRAMFS_FORCE=y
+# CONFIG_RD_GZIP is not set
 # CONFIG_RD_BZIP2 is not set
 # CONFIG_RD_LZMA is not set
 # CONFIG_RD_XZ is not set
 # CONFIG_RD_LZO is not set
 # CONFIG_RD_LZ4 is not set
+# CONFIG_RD_ZSTD is not set
 CONFIG_CC_OPTIMIZE_FOR_SIZE=y
 # CONFIG_SYSFS_SYSCALL is not set
 # CONFIG_FHANDLE is not set
 # CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
 # CONFIG_EPOLL is not set
 # CONFIG_SIGNALFD is not set
 # CONFIG_TIMERFD is not set
@@ -25,15 +27,17 @@ CONFIG_EMBEDDED=y
 # CONFIG_VM_EVENT_COUNTERS is not set
 # CONFIG_COMPAT_BRK is not set
 CONFIG_SLOB=y
-# CONFIG_SLAB_MERGE_DEFAULT is not set
 # CONFIG_MMU is not set
-CONFIG_SOC_KENDRYTE=y
+CONFIG_SOC_CANAAN=y
+CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
 CONFIG_MAXPHYSMEM_2GB=y
 CONFIG_SMP=y
 CONFIG_NR_CPUS=2
 CONFIG_CMDLINE="earlycon console=ttySIF0"
 CONFIG_CMDLINE_FORCE=y
-CONFIG_JUMP_LABEL=y
+# CONFIG_SECCOMP is not set
+# CONFIG_STACKPROTECTOR is not set
+# CONFIG_GCC_PLUGINS is not set
 # CONFIG_BLOCK is not set
 CONFIG_BINFMT_FLAT=y
 # CONFIG_COREDUMP is not set
@@ -41,23 +45,47 @@ CONFIG_DEVTMPFS=y
 CONFIG_DEVTMPFS_MOUNT=y
 # CONFIG_FW_LOADER is not set
 # CONFIG_ALLOW_DEV_COREDUMP is not set
-# CONFIG_INPUT_KEYBOARD is not set
-# CONFIG_INPUT_MOUSE is not set
+# CONFIG_INPUT is not set
 # CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_UNIX98_PTYS is not set
 # CONFIG_LEGACY_PTYS is not set
 # CONFIG_LDISC_AUTOLOAD is not set
 # CONFIG_HW_RANDOM is not set
 # CONFIG_DEVMEM is not set
+CONFIG_I2C=y
+# CONFIG_I2C_COMPAT is not set
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+# CONFIG_SPI_MEM is not set
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+# CONFIG_GPIO_CDEV_V1 is not set
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
 # CONFIG_HWMON is not set
-# CONFIG_VGA_CONSOLE is not set
-# CONFIG_HID is not set
 # CONFIG_USB_SUPPORT is not set
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
 # CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
+# CONFIG_FILE_LOCKING is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set
 # CONFIG_MISC_FILESYSTEMS is not set
 CONFIG_LSM="[]"
 CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+# CONFIG_FRAME_POINTER is not set
 # CONFIG_DEBUG_MISC is not set
 CONFIG_PANIC_ON_OOPS=y
 # CONFIG_SCHED_DEBUG is not set
diff --git a/arch/riscv/configs/nommu_k210_sdcard_defconfig b/arch/riscv/configs/nommu_k210_sdcard_defconfig
new file mode 100644 (file)
index 0000000..61f887f
--- /dev/null
@@ -0,0 +1,92 @@
+# CONFIG_CPU_ISOLATION is not set
+CONFIG_LOG_BUF_SHIFT=13
+CONFIG_PRINTK_SAFE_LOG_BUF_SHIFT=12
+CONFIG_CC_OPTIMIZE_FOR_SIZE=y
+# CONFIG_SYSFS_SYSCALL is not set
+# CONFIG_FHANDLE is not set
+# CONFIG_BASE_FULL is not set
+# CONFIG_FUTEX is not set
+# CONFIG_EPOLL is not set
+# CONFIG_SIGNALFD is not set
+# CONFIG_TIMERFD is not set
+# CONFIG_EVENTFD is not set
+# CONFIG_AIO is not set
+# CONFIG_IO_URING is not set
+# CONFIG_ADVISE_SYSCALLS is not set
+# CONFIG_MEMBARRIER is not set
+# CONFIG_KALLSYMS is not set
+CONFIG_EMBEDDED=y
+# CONFIG_VM_EVENT_COUNTERS is not set
+# CONFIG_COMPAT_BRK is not set
+CONFIG_SLOB=y
+# CONFIG_MMU is not set
+CONFIG_SOC_CANAAN=y
+CONFIG_SOC_CANAAN_K210_DTB_SOURCE="k210_generic"
+CONFIG_MAXPHYSMEM_2GB=y
+CONFIG_SMP=y
+CONFIG_NR_CPUS=2
+CONFIG_CMDLINE="earlycon console=ttySIF0 rootdelay=2 root=/dev/mmcblk0p1 ro"
+CONFIG_CMDLINE_FORCE=y
+# CONFIG_SECCOMP is not set
+# CONFIG_STACKPROTECTOR is not set
+# CONFIG_GCC_PLUGINS is not set
+# CONFIG_BLK_DEV_BSG is not set
+# CONFIG_MQ_IOSCHED_DEADLINE is not set
+# CONFIG_MQ_IOSCHED_KYBER is not set
+CONFIG_BINFMT_FLAT=y
+# CONFIG_COREDUMP is not set
+CONFIG_DEVTMPFS=y
+CONFIG_DEVTMPFS_MOUNT=y
+# CONFIG_FW_LOADER is not set
+# CONFIG_ALLOW_DEV_COREDUMP is not set
+# CONFIG_BLK_DEV is not set
+# CONFIG_INPUT is not set
+# CONFIG_SERIO is not set
+# CONFIG_VT is not set
+# CONFIG_LEGACY_PTYS is not set
+# CONFIG_LDISC_AUTOLOAD is not set
+# CONFIG_HW_RANDOM is not set
+# CONFIG_DEVMEM is not set
+CONFIG_I2C=y
+CONFIG_I2C_CHARDEV=y
+# CONFIG_I2C_HELPER_AUTO is not set
+CONFIG_I2C_DESIGNWARE_PLATFORM=y
+CONFIG_SPI=y
+# CONFIG_SPI_MEM is not set
+CONFIG_SPI_DESIGNWARE=y
+CONFIG_SPI_DW_MMIO=y
+# CONFIG_GPIO_CDEV_V1 is not set
+CONFIG_GPIO_DWAPB=y
+CONFIG_GPIO_SIFIVE=y
+CONFIG_POWER_RESET=y
+CONFIG_POWER_RESET_SYSCON=y
+# CONFIG_HWMON is not set
+# CONFIG_USB_SUPPORT is not set
+CONFIG_MMC=y
+# CONFIG_PWRSEQ_EMMC is not set
+# CONFIG_PWRSEQ_SIMPLE is not set
+CONFIG_MMC_SPI=y
+CONFIG_NEW_LEDS=y
+CONFIG_LEDS_CLASS=y
+CONFIG_LEDS_GPIO=y
+CONFIG_LEDS_USER=y
+# CONFIG_VIRTIO_MENU is not set
+# CONFIG_VHOST_MENU is not set
+# CONFIG_SURFACE_PLATFORMS is not set
+CONFIG_EXT2_FS=y
+# CONFIG_FILE_LOCKING is not set
+# CONFIG_DNOTIFY is not set
+# CONFIG_INOTIFY_USER is not set
+# CONFIG_MISC_FILESYSTEMS is not set
+CONFIG_LSM="[]"
+CONFIG_PRINTK_TIME=y
+# CONFIG_SYMBOLIC_ERRNAME is not set
+# CONFIG_DEBUG_BUGVERBOSE is not set
+# CONFIG_SECTION_MISMATCH_WARN_ONLY is not set
+# CONFIG_FRAME_POINTER is not set
+# CONFIG_DEBUG_MISC is not set
+CONFIG_PANIC_ON_OOPS=y
+# CONFIG_SCHED_DEBUG is not set
+# CONFIG_RCU_TRACE is not set
+# CONFIG_FTRACE is not set
+# CONFIG_RUNTIME_TESTING_MENU is not set
index 2c2cda6..8dd02b8 100644 (file)
@@ -18,6 +18,7 @@ CONFIG_SOC_SIFIVE=y
 CONFIG_SOC_VIRT=y
 CONFIG_ARCH_RV32I=y
 CONFIG_SMP=y
+CONFIG_HOTPLUG_CPU=y
 CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
index 27e005f..2a652b0 100644 (file)
@@ -9,4 +9,20 @@ long long __lshrti3(long long a, int b);
 long long __ashrti3(long long a, int b);
 long long __ashlti3(long long a, int b);
 
+
+#define DECLARE_DO_ERROR_INFO(name)    asmlinkage void name(struct pt_regs *regs)
+
+DECLARE_DO_ERROR_INFO(do_trap_unknown);
+DECLARE_DO_ERROR_INFO(do_trap_insn_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_insn_fault);
+DECLARE_DO_ERROR_INFO(do_trap_insn_illegal);
+DECLARE_DO_ERROR_INFO(do_trap_load_fault);
+DECLARE_DO_ERROR_INFO(do_trap_load_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_store_misaligned);
+DECLARE_DO_ERROR_INFO(do_trap_store_fault);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_u);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_s);
+DECLARE_DO_ERROR_INFO(do_trap_ecall_m);
+DECLARE_DO_ERROR_INFO(do_trap_break);
+
 #endif /* _ASM_RISCV_PROTOTYPES_H */
index d6f1ec0..d3804a2 100644 (file)
@@ -85,6 +85,7 @@ do {                                                          \
 struct pt_regs;
 struct task_struct;
 
+void __show_regs(struct pt_regs *regs);
 void die(struct pt_regs *regs, const char *str);
 void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr);
 
index cec462e..caadfc1 100644 (file)
 #define SATP_PPN       _AC(0x003FFFFF, UL)
 #define SATP_MODE_32   _AC(0x80000000, UL)
 #define SATP_MODE      SATP_MODE_32
+#define SATP_ASID_BITS 9
+#define SATP_ASID_SHIFT        22
+#define SATP_ASID_MASK _AC(0x1FF, UL)
 #else
 #define SATP_PPN       _AC(0x00000FFFFFFFFFFF, UL)
 #define SATP_MODE_39   _AC(0x8000000000000000, UL)
 #define SATP_MODE      SATP_MODE_39
+#define SATP_ASID_BITS 16
+#define SATP_ASID_SHIFT        44
+#define SATP_ASID_MASK _AC(0xFFFF, UL)
 #endif
 
 /* Exception cause high bit - is an interrupt if set */
index 9807ad1..e4c4355 100644 (file)
@@ -12,4 +12,6 @@
 
 #include <asm-generic/irq.h>
 
+extern void __init init_IRQ(void);
+
 #endif /* _ASM_RISCV_IRQ_H */
index b04028c..a2b3d9c 100644 (file)
@@ -8,12 +8,28 @@
 
 #ifdef CONFIG_KASAN
 
+/*
+ * The following comment was copied from arm64:
+ * KASAN_SHADOW_START: beginning of the kernel virtual addresses.
+ * KASAN_SHADOW_END: KASAN_SHADOW_START + 1/N of kernel virtual addresses,
+ * where N = (1 << KASAN_SHADOW_SCALE_SHIFT).
+ *
+ * KASAN_SHADOW_OFFSET:
+ * This value is used to map an address to the corresponding shadow
+ * address by the following formula:
+ *     shadow_addr = (address >> KASAN_SHADOW_SCALE_SHIFT) + KASAN_SHADOW_OFFSET
+ *
+ * (1 << (64 - KASAN_SHADOW_SCALE_SHIFT)) shadow addresses that lie in range
+ * [KASAN_SHADOW_OFFSET, KASAN_SHADOW_END) cover all 64-bits of virtual
+ * addresses. So KASAN_SHADOW_OFFSET should satisfy the following equation:
+ *      KASAN_SHADOW_OFFSET = KASAN_SHADOW_END -
+ *                              (1ULL << (64 - KASAN_SHADOW_SCALE_SHIFT))
+ */
 #define KASAN_SHADOW_SCALE_SHIFT       3
 
-#define KASAN_SHADOW_SIZE      (UL(1) << (38 - KASAN_SHADOW_SCALE_SHIFT))
-#define KASAN_SHADOW_START     KERN_VIRT_START /* 2^64 - 2^38 */
+#define KASAN_SHADOW_SIZE      (UL(1) << ((CONFIG_VA_BITS - 1) - KASAN_SHADOW_SCALE_SHIFT))
+#define KASAN_SHADOW_START     KERN_VIRT_START
 #define KASAN_SHADOW_END       (KASAN_SHADOW_START + KASAN_SHADOW_SIZE)
-
 #define KASAN_SHADOW_OFFSET    (KASAN_SHADOW_END - (1ULL << \
                                        (64 - KASAN_SHADOW_SCALE_SHIFT)))
 
index 56a98ea..4647d38 100644 (file)
 
 #include <asm-generic/kprobes.h>
 
+#ifdef CONFIG_KPROBES
+#include <linux/types.h>
+#include <linux/ptrace.h>
+#include <linux/percpu.h>
+
+#define __ARCH_WANT_KPROBES_INSN_SLOT
+#define MAX_INSN_SIZE                  2
+
+#define flush_insn_slot(p)             do { } while (0)
+#define kretprobe_blacklist_size       0
+
+#include <asm/probes.h>
+
+struct prev_kprobe {
+       struct kprobe *kp;
+       unsigned int status;
+};
+
+/* Single step context for kprobe */
+struct kprobe_step_ctx {
+       unsigned long ss_pending;
+       unsigned long match_addr;
+};
+
+/* per-cpu kprobe control block */
+struct kprobe_ctlblk {
+       unsigned int kprobe_status;
+       unsigned long saved_status;
+       struct prev_kprobe prev_kprobe;
+       struct kprobe_step_ctx ss_ctx;
+};
+
+void arch_remove_kprobe(struct kprobe *p);
+int kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr);
+bool kprobe_breakpoint_handler(struct pt_regs *regs);
+bool kprobe_single_step_handler(struct pt_regs *regs);
+void kretprobe_trampoline(void);
+void __kprobes *trampoline_probe_handler(struct pt_regs *regs);
+
+#endif /* CONFIG_KPROBES */
 #endif /* _ASM_RISCV_KPROBES_H */
index dabcf2c..0099dc1 100644 (file)
@@ -12,6 +12,8 @@
 typedef struct {
 #ifndef CONFIG_MMU
        unsigned long   end_brk;
+#else
+       atomic_long_t id;
 #endif
        void *vdso;
 #ifdef CONFIG_SMP
index 250defa..b065941 100644 (file)
@@ -23,6 +23,16 @@ static inline void activate_mm(struct mm_struct *prev,
        switch_mm(prev, next, NULL);
 }
 
+#define init_new_context init_new_context
+static inline int init_new_context(struct task_struct *tsk,
+                       struct mm_struct *mm)
+{
+#ifdef CONFIG_MMU
+       atomic_long_set(&mm->context.id, 0);
+#endif
+       return 0;
+}
+
 #include <asm-generic/mmu_context.h>
 
 #endif /* _ASM_RISCV_MMU_CONTEXT_H */
diff --git a/arch/riscv/include/asm/mmzone.h b/arch/riscv/include/asm/mmzone.h
new file mode 100644 (file)
index 0000000..fa17e01
--- /dev/null
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_MMZONE_H
+#define __ASM_MMZONE_H
+
+#ifdef CONFIG_NUMA
+
+#include <asm/numa.h>
+
+extern struct pglist_data *node_data[];
+#define NODE_DATA(nid)         (node_data[(nid)])
+
+#endif /* CONFIG_NUMA */
+#endif /* __ASM_MMZONE_H */
diff --git a/arch/riscv/include/asm/numa.h b/arch/riscv/include/asm/numa.h
new file mode 100644 (file)
index 0000000..8c8cf42
--- /dev/null
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_NUMA_H
+#define __ASM_NUMA_H
+
+#include <asm/topology.h>
+#include <asm-generic/numa.h>
+
+#endif /* __ASM_NUMA_H */
index 64a675c..adc9d26 100644 (file)
@@ -97,9 +97,6 @@ extern unsigned long pfn_base;
 #define ARCH_PFN_OFFSET                (PAGE_OFFSET >> PAGE_SHIFT)
 #endif /* CONFIG_MMU */
 
-extern unsigned long max_low_pfn;
-extern unsigned long min_low_pfn;
-
 #define __pa_to_va_nodebug(x)  ((void *)((unsigned long) (x) + va_pa_offset))
 #define __va_to_pa_nodebug(x)  ((unsigned long)(x) - va_pa_offset)
 
index 1c473a1..658e112 100644 (file)
@@ -32,6 +32,20 @@ static inline int pci_proc_domain(struct pci_bus *bus)
        /* always show the domain in /proc */
        return 1;
 }
+
+#ifdef CONFIG_NUMA
+
+static inline int pcibus_to_node(struct pci_bus *bus)
+{
+       return dev_to_node(&bus->dev);
+}
+#ifndef cpumask_of_pcibus
+#define cpumask_of_pcibus(bus) (pcibus_to_node(bus) == -1 ?            \
+                                cpu_all_mask :                         \
+                                cpumask_of_node(pcibus_to_node(bus)))
+#endif
+#endif /* CONFIG_NUMA */
+
 #endif  /* CONFIG_PCI */
 
 #endif  /* _ASM_RISCV_PCI_H */
index 251e1db..ebf817c 100644 (file)
@@ -186,6 +186,11 @@ static inline unsigned long pmd_page_vaddr(pmd_t pmd)
        return (unsigned long)pfn_to_virt(pmd_val(pmd) >> _PAGE_PFN_SHIFT);
 }
 
+static inline pte_t pmd_pte(pmd_t pmd)
+{
+       return __pte(pmd_val(pmd));
+}
+
 /* Yields the page frame number (PFN) of a page table entry */
 static inline unsigned long pte_pfn(pte_t pte)
 {
@@ -289,6 +294,21 @@ static inline pte_t pte_mkhuge(pte_t pte)
        return pte;
 }
 
+#ifdef CONFIG_NUMA_BALANCING
+/*
+ * See the comment in include/asm-generic/pgtable.h
+ */
+static inline int pte_protnone(pte_t pte)
+{
+       return (pte_val(pte) & (_PAGE_PRESENT | _PAGE_PROT_NONE)) == _PAGE_PROT_NONE;
+}
+
+static inline int pmd_protnone(pmd_t pmd)
+{
+       return pte_protnone(pmd_pte(pmd));
+}
+#endif
+
 /* Modify page protection bits */
 static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 {
@@ -468,6 +488,7 @@ extern void *dtb_early_va;
 extern uintptr_t dtb_early_pa;
 void setup_bootmem(void);
 void paging_init(void);
+void misc_mem_init(void);
 
 #define FIRST_USER_ADDRESS  0
 
diff --git a/arch/riscv/include/asm/probes.h b/arch/riscv/include/asm/probes.h
new file mode 100644 (file)
index 0000000..a787e6d
--- /dev/null
@@ -0,0 +1,24 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _ASM_RISCV_PROBES_H
+#define _ASM_RISCV_PROBES_H
+
+typedef u32 probe_opcode_t;
+typedef bool (probes_handler_t) (u32 opcode, unsigned long addr, struct pt_regs *);
+
+/* architecture specific copy of original instruction */
+struct arch_probe_insn {
+       probe_opcode_t *insn;
+       probes_handler_t *handler;
+       /* restore address after simulation */
+       unsigned long restore;
+};
+
+#ifdef CONFIG_KPROBES
+typedef u32 kprobe_opcode_t;
+struct arch_specific_insn {
+       struct arch_probe_insn api;
+};
+#endif
+
+#endif /* _ASM_RISCV_PROBES_H */
index bdddcd5..021ed64 100644 (file)
@@ -34,6 +34,7 @@ struct thread_struct {
        unsigned long sp;       /* Kernel mode stack */
        unsigned long s[12];    /* s[0]: frame pointer */
        struct __riscv_d_ext_state fstate;
+       unsigned long bad_cause;
 };
 
 #define INIT_THREAD {                                  \
@@ -70,6 +71,7 @@ int riscv_of_processor_hartid(struct device_node *node);
 int riscv_of_parent_hartid(struct device_node *node);
 
 extern void riscv_fill_hwcap(void);
+extern int arch_dup_task_struct(struct task_struct *dst, struct task_struct *src);
 
 #endif /* __ASSEMBLY__ */
 
index ee49f80..09ad4e9 100644 (file)
@@ -8,6 +8,7 @@
 
 #include <uapi/asm/ptrace.h>
 #include <asm/csr.h>
+#include <linux/compiler.h>
 
 #ifndef __ASSEMBLY__
 
@@ -60,6 +61,7 @@ struct pt_regs {
 
 #define user_mode(regs) (((regs)->status & SR_PP) == 0)
 
+#define MAX_REG_OFFSET offsetof(struct pt_regs, orig_a0)
 
 /* Helpers for working with the instruction pointer */
 static inline unsigned long instruction_pointer(struct pt_regs *regs)
@@ -85,6 +87,12 @@ static inline void user_stack_pointer_set(struct pt_regs *regs,
        regs->sp =  val;
 }
 
+/* Valid only for Kernel mode traps. */
+static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
+{
+       return regs->sp;
+}
+
 /* Helpers for working with the frame pointer */
 static inline unsigned long frame_pointer(struct pt_regs *regs)
 {
@@ -101,6 +109,38 @@ static inline unsigned long regs_return_value(struct pt_regs *regs)
        return regs->a0;
 }
 
+static inline void regs_set_return_value(struct pt_regs *regs,
+                                        unsigned long val)
+{
+       regs->a0 = val;
+}
+
+extern int regs_query_register_offset(const char *name);
+extern unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs,
+                                              unsigned int n);
+
+void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
+                          unsigned long frame_pointer);
+int do_syscall_trace_enter(struct pt_regs *regs);
+void do_syscall_trace_exit(struct pt_regs *regs);
+
+/**
+ * regs_get_register() - get register value from its offset
+ * @regs:      pt_regs from which register value is gotten
+ * @offset:    offset of the register.
+ *
+ * regs_get_register returns the value of a register whose offset from @regs.
+ * The @offset is the offset of the register in struct pt_regs.
+ * If @offset is bigger than MAX_REG_OFFSET, this returns 0.
+ */
+static inline unsigned long regs_get_register(struct pt_regs *regs,
+                                             unsigned int offset)
+{
+       if (unlikely(offset > MAX_REG_OFFSET))
+               return 0;
+
+       return *(unsigned long *)((unsigned long)regs + offset);
+}
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_RISCV_PTRACE_H */
index 653edb2..d702741 100644 (file)
@@ -51,10 +51,10 @@ enum sbi_ext_rfence_fid {
        SBI_EXT_RFENCE_REMOTE_FENCE_I = 0,
        SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
        SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
-       SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
        SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA_VMID,
-       SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
+       SBI_EXT_RFENCE_REMOTE_HFENCE_GVMA,
        SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA_ASID,
+       SBI_EXT_RFENCE_REMOTE_HFENCE_VVMA,
 };
 
 enum sbi_ext_hsm_fid {
@@ -89,7 +89,7 @@ struct sbiret {
        long value;
 };
 
-int sbi_init(void);
+void sbi_init(void);
 struct sbiret sbi_ecall(int ext, int fid, unsigned long arg0,
                        unsigned long arg1, unsigned long arg2,
                        unsigned long arg3, unsigned long arg4,
@@ -100,13 +100,13 @@ int sbi_console_getchar(void);
 void sbi_set_timer(uint64_t stime_value);
 void sbi_shutdown(void);
 void sbi_clear_ipi(void);
-void sbi_send_ipi(const unsigned long *hart_mask);
-void sbi_remote_fence_i(const unsigned long *hart_mask);
-void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_send_ipi(const unsigned long *hart_mask);
+int sbi_remote_fence_i(const unsigned long *hart_mask);
+int sbi_remote_sfence_vma(const unsigned long *hart_mask,
                           unsigned long start,
                           unsigned long size);
 
-void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
                                unsigned long start,
                                unsigned long size,
                                unsigned long asid);
@@ -147,11 +147,7 @@ static inline unsigned long sbi_minor_version(void)
 
 int sbi_err_map_linux_errno(int err);
 #else /* CONFIG_RISCV_SBI */
-/* stubs for code that is only reachable under IS_ENABLED(CONFIG_RISCV_SBI): */
-void sbi_set_timer(uint64_t stime_value);
-void sbi_clear_ipi(void);
-void sbi_send_ipi(const unsigned long *hart_mask);
-void sbi_remote_fence_i(const unsigned long *hart_mask);
-void sbi_init(void);
+static inline int sbi_remote_fence_i(const unsigned long *hart_mask) { return -1; }
+static inline void sbi_init(void) {}
 #endif /* CONFIG_RISCV_SBI */
 #endif /* _ASM_RISCV_SBI_H */
index 8b80c80..6887b3d 100644 (file)
@@ -22,7 +22,7 @@ static inline int set_memory_ro(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_rw(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_x(unsigned long addr, int numpages) { return 0; }
 static inline int set_memory_nx(unsigned long addr, int numpages) { return 0; }
-static inline void protect_kernel_text_data(void) {};
+static inline void protect_kernel_text_data(void) {}
 static inline int set_memory_rw_nx(unsigned long addr, int numpages) { return 0; }
 #endif
 
index 6c8363b..f494066 100644 (file)
@@ -21,42 +21,4 @@ void soc_early_init(void);
 extern unsigned long __soc_early_init_table_start;
 extern unsigned long __soc_early_init_table_end;
 
-/*
- * Allows Linux to provide a device tree, which is necessary for SOCs that
- * don't provide a useful one on their own.
- */
-struct soc_builtin_dtb {
-       unsigned long vendor_id;
-       unsigned long arch_id;
-       unsigned long imp_id;
-       void *(*dtb_func)(void);
-};
-
-/*
- * The argument name must specify a valid DTS file name without the dts
- * extension.
- */
-#define SOC_BUILTIN_DTB_DECLARE(name, vendor, arch, impl)              \
-       extern void *__dtb_##name##_begin;                              \
-                                                                       \
-       static __init __used                                            \
-       void *__soc_builtin_dtb_f__##name(void)                         \
-       {                                                               \
-               return (void *)&__dtb_##name##_begin;                   \
-       }                                                               \
-                                                                       \
-       static const struct soc_builtin_dtb __soc_builtin_dtb__##name   \
-               __used __section("__soc_builtin_dtb_table") =           \
-       {                                                               \
-               .vendor_id = vendor,                                    \
-               .arch_id   = arch,                                      \
-               .imp_id    = impl,                                      \
-               .dtb_func  = __soc_builtin_dtb_f__##name,               \
-       }
-
-extern unsigned long __soc_builtin_dtb_table_start;
-extern unsigned long __soc_builtin_dtb_table_end;
-
-void *soc_lookup_builtin_dtb(void);
-
 #endif
index 5962f88..09093af 100644 (file)
@@ -24,6 +24,7 @@ static __always_inline void boot_init_stack_canary(void)
        canary &= CANARY_MASK;
 
        current->stack_canary = canary;
-       __stack_chk_guard = current->stack_canary;
+       if (!IS_ENABLED(CONFIG_STACKPROTECTOR_PER_TASK))
+               __stack_chk_guard = current->stack_canary;
 }
 #endif /* _ASM_RISCV_STACKPROTECTOR_H */
index 470a65c..3450c19 100644 (file)
@@ -13,5 +13,7 @@ struct stackframe {
 
 extern void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
                                    bool (*fn)(void *, unsigned long), void *arg);
+extern void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+                          const char *loglvl);
 
 #endif /* _ASM_RISCV_STACKTRACE_H */
index 97bf5a1..0e549a3 100644 (file)
@@ -75,6 +75,7 @@ struct thread_info {
 #define TIF_SYSCALL_AUDIT      7       /* syscall auditing */
 #define TIF_SECCOMP            8       /* syscall secure computing */
 #define TIF_NOTIFY_SIGNAL      9       /* signal notifications exist */
+#define TIF_UPROBE             10      /* uprobe breakpoint or singlestep */
 
 #define _TIF_SYSCALL_TRACE     (1 << TIF_SYSCALL_TRACE)
 #define _TIF_NOTIFY_RESUME     (1 << TIF_NOTIFY_RESUME)
@@ -84,10 +85,11 @@ struct thread_info {
 #define _TIF_SYSCALL_AUDIT     (1 << TIF_SYSCALL_AUDIT)
 #define _TIF_SECCOMP           (1 << TIF_SECCOMP)
 #define _TIF_NOTIFY_SIGNAL     (1 << TIF_NOTIFY_SIGNAL)
+#define _TIF_UPROBE            (1 << TIF_UPROBE)
 
 #define _TIF_WORK_MASK \
        (_TIF_NOTIFY_RESUME | _TIF_SIGPENDING | _TIF_NEED_RESCHED | \
-        _TIF_NOTIFY_SIGNAL)
+        _TIF_NOTIFY_SIGNAL | _TIF_UPROBE)
 
 #define _TIF_SYSCALL_WORK \
        (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_AUDIT | \
index 81de51e..507cae2 100644 (file)
@@ -88,4 +88,6 @@ static inline int read_current_timer(unsigned long *timer_val)
        return 0;
 }
 
+extern void time_init(void);
+
 #endif /* _ASM_RISCV_TIMEX_H */
index 824b2c9..f944062 100644 (file)
@@ -306,7 +306,9 @@ do {                                                                \
  * data types like structures or arrays.
  *
  * @ptr must have pointer-to-simple-variable type, and @x must be assignable
- * to the result of dereferencing @ptr.
+ * to the result of dereferencing @ptr. The value of @x is copied to avoid
+ * re-ordering where @x is evaluated inside the block that enables user-space
+ * access (thus bypassing user space protection if @x is a function).
  *
  * Caller must check the pointer with access_ok() before calling this
  * function.
@@ -316,12 +318,13 @@ do {                                                              \
 #define __put_user(x, ptr)                                     \
 ({                                                             \
        __typeof__(*(ptr)) __user *__gu_ptr = (ptr);            \
+       __typeof__(*__gu_ptr) __val = (x);                      \
        long __pu_err = 0;                                      \
                                                                \
        __chk_user_ptr(__gu_ptr);                               \
                                                                \
        __enable_user_access();                                 \
-       __put_user_nocheck(x, __gu_ptr, __pu_err);              \
+       __put_user_nocheck(__val, __gu_ptr, __pu_err);          \
        __disable_user_access();                                \
                                                                \
        __pu_err;                                               \
diff --git a/arch/riscv/include/asm/uprobes.h b/arch/riscv/include/asm/uprobes.h
new file mode 100644 (file)
index 0000000..f2183e0
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifndef _ASM_RISCV_UPROBES_H
+#define _ASM_RISCV_UPROBES_H
+
+#include <asm/probes.h>
+#include <asm/patch.h>
+#include <asm/bug.h>
+
+#define MAX_UINSN_BYTES                8
+
+#ifdef CONFIG_RISCV_ISA_C
+#define UPROBE_SWBP_INSN       __BUG_INSN_16
+#define UPROBE_SWBP_INSN_SIZE  2
+#else
+#define UPROBE_SWBP_INSN       __BUG_INSN_32
+#define UPROBE_SWBP_INSN_SIZE  4
+#endif
+#define UPROBE_XOL_SLOT_BYTES  MAX_UINSN_BYTES
+
+typedef u32 uprobe_opcode_t;
+
+struct arch_uprobe_task {
+       unsigned long   saved_cause;
+};
+
+struct arch_uprobe {
+       union {
+               u8 insn[MAX_UINSN_BYTES];
+               u8 ixol[MAX_UINSN_BYTES];
+       };
+       struct arch_probe_insn api;
+       unsigned long insn_size;
+       bool simulate;
+};
+
+bool uprobe_breakpoint_handler(struct pt_regs *regs);
+bool uprobe_single_step_handler(struct pt_regs *regs);
+
+#endif /* _ASM_RISCV_UPROBES_H */
index f6caf4d..647a47f 100644 (file)
@@ -4,9 +4,11 @@
 #
 
 ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_ftrace.o = -pg
-CFLAGS_REMOVE_patch.o  = -pg
+CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_patch.o  = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_sbi.o    = $(CC_FLAGS_FTRACE)
 endif
+CFLAGS_syscall_table.o += $(call cc-option,-Wno-override-init,)
 
 extra-y += head.o
 extra-y += vmlinux.lds
@@ -29,6 +31,7 @@ obj-y += riscv_ksyms.o
 obj-y  += stacktrace.o
 obj-y  += cacheinfo.o
 obj-y  += patch.o
+obj-y  += probes/
 obj-$(CONFIG_MMU) += vdso.o vdso/
 
 obj-$(CONFIG_RISCV_M_MODE)     += traps_misaligned.o
index b79ffa3..9ef3334 100644 (file)
@@ -68,6 +68,9 @@ void asm_offsets(void)
        OFFSET(TASK_THREAD_F30, task_struct, thread.fstate.f[30]);
        OFFSET(TASK_THREAD_F31, task_struct, thread.fstate.f[31]);
        OFFSET(TASK_THREAD_FCSR, task_struct, thread.fstate.fcsr);
+#ifdef CONFIG_STACKPROTECTOR
+       OFFSET(TSK_STACK_CANARY, task_struct, stack_canary);
+#endif
 
        DEFINE(PT_SIZE, sizeof(struct pt_regs));
        OFFSET(PT_EPC, pt_regs, epc);
index 744f320..76274a4 100644 (file)
@@ -447,6 +447,7 @@ ENDPROC(__switch_to)
 #endif
 
        .section ".rodata"
+       .align LGREG
        /* Exception vector table */
 ENTRY(excp_vect_table)
        RISCV_PTR do_trap_insn_misaligned
index 765b624..7f1e520 100644 (file)
@@ -72,29 +72,56 @@ static int __ftrace_modify_call(unsigned long hook_pos, unsigned long target,
        return 0;
 }
 
+/*
+ * Put 5 instructions with 16 bytes at the front of function within
+ * patchable function entry nops' area.
+ *
+ * 0: REG_S  ra, -SZREG(sp)
+ * 1: auipc  ra, 0x?
+ * 2: jalr   -?(ra)
+ * 3: REG_L  ra, -SZREG(sp)
+ *
+ * So the opcodes is:
+ * 0: 0xfe113c23 (sd)/0xfe112e23 (sw)
+ * 1: 0x???????? -> auipc
+ * 2: 0x???????? -> jalr
+ * 3: 0xff813083 (ld)/0xffc12083 (lw)
+ */
+#if __riscv_xlen == 64
+#define INSN0  0xfe113c23
+#define INSN3  0xff813083
+#elif __riscv_xlen == 32
+#define INSN0  0xfe112e23
+#define INSN3  0xffc12083
+#endif
+
+#define FUNC_ENTRY_SIZE        16
+#define FUNC_ENTRY_JMP 4
+
 int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr)
 {
-       int ret = ftrace_check_current_call(rec->ip, NULL);
+       unsigned int call[4] = {INSN0, 0, 0, INSN3};
+       unsigned long target = addr;
+       unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
 
-       if (ret)
-               return ret;
+       call[1] = to_auipc_insn((unsigned int)(target - caller));
+       call[2] = to_jalr_insn((unsigned int)(target - caller));
 
-       return __ftrace_modify_call(rec->ip, addr, true);
+       if (patch_text_nosync((void *)rec->ip, call, FUNC_ENTRY_SIZE))
+               return -EPERM;
+
+       return 0;
 }
 
 int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec,
                    unsigned long addr)
 {
-       unsigned int call[2];
-       int ret;
+       unsigned int nops[4] = {NOP4, NOP4, NOP4, NOP4};
 
-       make_call(rec->ip, addr, call);
-       ret = ftrace_check_current_call(rec->ip, call);
-
-       if (ret)
-               return ret;
+       if (patch_text_nosync((void *)rec->ip, nops, FUNC_ENTRY_SIZE))
+               return -EPERM;
 
-       return __ftrace_modify_call(rec->ip, addr, false);
+       return 0;
 }
 
 
@@ -139,15 +166,16 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr,
                       unsigned long addr)
 {
        unsigned int call[2];
+       unsigned long caller = rec->ip + FUNC_ENTRY_JMP;
        int ret;
 
-       make_call(rec->ip, old_addr, call);
-       ret = ftrace_check_current_call(rec->ip, call);
+       make_call(caller, old_addr, call);
+       ret = ftrace_check_current_call(caller, call);
 
        if (ret)
                return ret;
 
-       return __ftrace_modify_call(rec->ip, addr, true);
+       return __ftrace_modify_call(caller, addr, true);
 }
 #endif
 
@@ -176,53 +204,30 @@ void prepare_ftrace_return(unsigned long *parent, unsigned long self_addr,
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 extern void ftrace_graph_call(void);
+extern void ftrace_graph_regs_call(void);
 int ftrace_enable_ftrace_graph_caller(void)
 {
-       unsigned int call[2];
-       static int init_graph = 1;
        int ret;
 
-       make_call(&ftrace_graph_call, &ftrace_stub, call);
-
-       /*
-        * When enabling graph tracer for the first time, ftrace_graph_call
-        * should contains a call to ftrace_stub.  Once it has been disabled,
-        * the 8-bytes at the position becomes NOPs.
-        */
-       if (init_graph) {
-               ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
-                                               call);
-               init_graph = 0;
-       } else {
-               ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
-                                               NULL);
-       }
-
+       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+                                   (unsigned long)&prepare_ftrace_return, true);
        if (ret)
                return ret;
 
-       return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
                                    (unsigned long)&prepare_ftrace_return, true);
 }
 
 int ftrace_disable_ftrace_graph_caller(void)
 {
-       unsigned int call[2];
        int ret;
 
-       make_call(&ftrace_graph_call, &prepare_ftrace_return, call);
-
-       /*
-        * This is to make sure that ftrace_enable_ftrace_graph_caller
-        * did the right thing.
-        */
-       ret = ftrace_check_current_call((unsigned long)&ftrace_graph_call,
-                                       call);
-
+       ret = __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+                                   (unsigned long)&prepare_ftrace_return, false);
        if (ret)
                return ret;
 
-       return __ftrace_modify_call((unsigned long)&ftrace_graph_call,
+       return __ftrace_modify_call((unsigned long)&ftrace_graph_regs_call,
                                    (unsigned long)&prepare_ftrace_return, false);
 }
 #endif /* CONFIG_DYNAMIC_FTRACE */
index 16e9941..f5a9bad 100644 (file)
@@ -260,7 +260,11 @@ clear_bss_done:
 
        /* Initialize page tables and relocate to virtual addresses */
        la sp, init_thread_union + THREAD_SIZE
+#ifdef CONFIG_BUILTIN_DTB
+       la a0, __dtb_start
+#else
        mv a0, s1
+#endif /* CONFIG_BUILTIN_DTB */
        call setup_vm
 #ifdef CONFIG_MMU
        la a0, early_pg_dir
index 8c212ef..71a76a6 100644 (file)
@@ -3,7 +3,7 @@
  * Copyright (C) 2020 Western Digital Corporation or its affiliates.
  * Linker script variables to be set after section resolution, as
  * ld.lld does not like variables assigned before SECTIONS is processed.
- * Based on arch/arm64/kerne/image-vars.h
+ * Based on arch/arm64/kernel/image-vars.h
  */
 #ifndef __RISCV_KERNEL_IMAGE_VARS_H
 #define __RISCV_KERNEL_IMAGE_VARS_H
index 35a6ed7..d171eca 100644 (file)
 
        .text
 
-       .macro SAVE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       addi    sp, sp, -48
-       sd      s0, 32(sp)
-       sd      ra, 40(sp)
-       addi    s0, sp, 48
-       sd      t0, 24(sp)
-       sd      t1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-       sd      t2, 8(sp)
-#endif
-#else
-       addi    sp, sp, -16
-       sd      s0, 0(sp)
-       sd      ra, 8(sp)
-       addi    s0, sp, 16
-#endif
+#define FENTRY_RA_OFFSET       12
+#define ABI_SIZE_ON_STACK      72
+#define ABI_A0                 0
+#define ABI_A1                 8
+#define ABI_A2                 16
+#define ABI_A3                 24
+#define ABI_A4                 32
+#define ABI_A5                 40
+#define ABI_A6                 48
+#define ABI_A7                 56
+#define ABI_RA                 64
+
+       .macro SAVE_ABI
+       addi    sp, sp, -SZREG
+       addi    sp, sp, -ABI_SIZE_ON_STACK
+
+       REG_S   a0, ABI_A0(sp)
+       REG_S   a1, ABI_A1(sp)
+       REG_S   a2, ABI_A2(sp)
+       REG_S   a3, ABI_A3(sp)
+       REG_S   a4, ABI_A4(sp)
+       REG_S   a5, ABI_A5(sp)
+       REG_S   a6, ABI_A6(sp)
+       REG_S   a7, ABI_A7(sp)
+       REG_S   ra, ABI_RA(sp)
        .endm
 
-       .macro RESTORE_ABI_STATE
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       ld      s0, 32(sp)
-       ld      ra, 40(sp)
-       addi    sp, sp, 48
-#else
-       ld      ra, 8(sp)
-       ld      s0, 0(sp)
-       addi    sp, sp, 16
-#endif
+       .macro RESTORE_ABI
+       REG_L   a0, ABI_A0(sp)
+       REG_L   a1, ABI_A1(sp)
+       REG_L   a2, ABI_A2(sp)
+       REG_L   a3, ABI_A3(sp)
+       REG_L   a4, ABI_A4(sp)
+       REG_L   a5, ABI_A5(sp)
+       REG_L   a6, ABI_A6(sp)
+       REG_L   a7, ABI_A7(sp)
+       REG_L   ra, ABI_RA(sp)
+
+       addi    sp, sp, ABI_SIZE_ON_STACK
+       addi    sp, sp, SZREG
        .endm
 
-       .macro RESTORE_GRAPH_ARGS
-       ld      a0, 24(sp)
-       ld      a1, 16(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-       ld      a2, 8(sp)
-#endif
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
+       .macro SAVE_ALL
+       addi    sp, sp, -SZREG
+       addi    sp, sp, -PT_SIZE_ON_STACK
+
+       REG_S x1,  PT_EPC(sp)
+       addi    sp, sp, PT_SIZE_ON_STACK
+       REG_L x1,  (sp)
+       addi    sp, sp, -PT_SIZE_ON_STACK
+       REG_S x1,  PT_RA(sp)
+       REG_L x1,  PT_EPC(sp)
+
+       REG_S x2,  PT_SP(sp)
+       REG_S x3,  PT_GP(sp)
+       REG_S x4,  PT_TP(sp)
+       REG_S x5,  PT_T0(sp)
+       REG_S x6,  PT_T1(sp)
+       REG_S x7,  PT_T2(sp)
+       REG_S x8,  PT_S0(sp)
+       REG_S x9,  PT_S1(sp)
+       REG_S x10, PT_A0(sp)
+       REG_S x11, PT_A1(sp)
+       REG_S x12, PT_A2(sp)
+       REG_S x13, PT_A3(sp)
+       REG_S x14, PT_A4(sp)
+       REG_S x15, PT_A5(sp)
+       REG_S x16, PT_A6(sp)
+       REG_S x17, PT_A7(sp)
+       REG_S x18, PT_S2(sp)
+       REG_S x19, PT_S3(sp)
+       REG_S x20, PT_S4(sp)
+       REG_S x21, PT_S5(sp)
+       REG_S x22, PT_S6(sp)
+       REG_S x23, PT_S7(sp)
+       REG_S x24, PT_S8(sp)
+       REG_S x25, PT_S9(sp)
+       REG_S x26, PT_S10(sp)
+       REG_S x27, PT_S11(sp)
+       REG_S x28, PT_T3(sp)
+       REG_S x29, PT_T4(sp)
+       REG_S x30, PT_T5(sp)
+       REG_S x31, PT_T6(sp)
        .endm
 
-ENTRY(ftrace_graph_caller)
-       addi    sp, sp, -16
-       sd      s0, 0(sp)
-       sd      ra, 8(sp)
-       addi    s0, sp, 16
-ftrace_graph_call:
-       .global ftrace_graph_call
-       /*
-        * Calling ftrace_enable/disable_ftrace_graph_caller would overwrite the
-        * call below.  Check ftrace_modify_all_code for details.
-        */
-       call    ftrace_stub
-       ld      ra, 8(sp)
-       ld      s0, 0(sp)
-       addi    sp, sp, 16
-       ret
-ENDPROC(ftrace_graph_caller)
+       .macro RESTORE_ALL
+       REG_L x1,  PT_RA(sp)
+       addi    sp, sp, PT_SIZE_ON_STACK
+       REG_S x1,  (sp)
+       addi    sp, sp, -PT_SIZE_ON_STACK
+       REG_L x1,  PT_EPC(sp)
+       REG_L x2,  PT_SP(sp)
+       REG_L x3,  PT_GP(sp)
+       REG_L x4,  PT_TP(sp)
+       REG_L x5,  PT_T0(sp)
+       REG_L x6,  PT_T1(sp)
+       REG_L x7,  PT_T2(sp)
+       REG_L x8,  PT_S0(sp)
+       REG_L x9,  PT_S1(sp)
+       REG_L x10, PT_A0(sp)
+       REG_L x11, PT_A1(sp)
+       REG_L x12, PT_A2(sp)
+       REG_L x13, PT_A3(sp)
+       REG_L x14, PT_A4(sp)
+       REG_L x15, PT_A5(sp)
+       REG_L x16, PT_A6(sp)
+       REG_L x17, PT_A7(sp)
+       REG_L x18, PT_S2(sp)
+       REG_L x19, PT_S3(sp)
+       REG_L x20, PT_S4(sp)
+       REG_L x21, PT_S5(sp)
+       REG_L x22, PT_S6(sp)
+       REG_L x23, PT_S7(sp)
+       REG_L x24, PT_S8(sp)
+       REG_L x25, PT_S9(sp)
+       REG_L x26, PT_S10(sp)
+       REG_L x27, PT_S11(sp)
+       REG_L x28, PT_T3(sp)
+       REG_L x29, PT_T4(sp)
+       REG_L x30, PT_T5(sp)
+       REG_L x31, PT_T6(sp)
+
+       addi    sp, sp, PT_SIZE_ON_STACK
+       addi    sp, sp, SZREG
+       .endm
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_REGS */
 
 ENTRY(ftrace_caller)
-       /*
-        * a0: the address in the caller when calling ftrace_caller
-        * a1: the caller's return address
-        * a2: the address of global variable function_trace_op
-        */
-       ld      a1, -8(s0)
-       addi    a0, ra, -MCOUNT_INSN_SIZE
-       la      t5, function_trace_op
-       ld      a2, 0(t5)
+       SAVE_ABI
 
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       /*
-        * the graph tracer (specifically, prepare_ftrace_return) needs these
-        * arguments but for now the function tracer occupies the regs, so we
-        * save them in temporary regs to recover later.
-        */
-       addi    t0, s0, -8
-       mv      t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-       ld      t2, -16(s0)
-#endif
-#endif
+       addi    a0, ra, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+       REG_L   a1, ABI_SIZE_ON_STACK(sp)
+       mv      a3, sp
 
-       SAVE_ABI_STATE
 ftrace_call:
        .global ftrace_call
-       /*
-        * For the dynamic ftrace to work, here we should reserve at least
-        * 8 bytes for a functional auipc-jalr pair.  The following call
-        * serves this purpose.
-        *
-        * Calling ftrace_update_ftrace_func would overwrite the nops below.
-        * Check ftrace_modify_all_code for details.
-        */
        call    ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       RESTORE_GRAPH_ARGS
-       call    ftrace_graph_caller
+       addi    a0, sp, ABI_SIZE_ON_STACK
+       REG_L   a1, ABI_RA(sp)
+       addi    a1, a1, -FENTRY_RA_OFFSET
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+       mv      a2, s0
 #endif
-
-       RESTORE_ABI_STATE
+ftrace_graph_call:
+       .global ftrace_graph_call
+       call    ftrace_stub
+#endif
+       RESTORE_ABI
        ret
 ENDPROC(ftrace_caller)
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
-       .macro SAVE_ALL
-       addi    sp, sp, -(PT_SIZE_ON_STACK+16)
-       sd      s0, (PT_SIZE_ON_STACK)(sp)
-       sd      ra, (PT_SIZE_ON_STACK+8)(sp)
-       addi    s0, sp, (PT_SIZE_ON_STACK+16)
-
-       sd x1,  PT_RA(sp)
-       sd x2,  PT_SP(sp)
-       sd x3,  PT_GP(sp)
-       sd x4,  PT_TP(sp)
-       sd x5,  PT_T0(sp)
-       sd x6,  PT_T1(sp)
-       sd x7,  PT_T2(sp)
-       sd x8,  PT_S0(sp)
-       sd x9,  PT_S1(sp)
-       sd x10, PT_A0(sp)
-       sd x11, PT_A1(sp)
-       sd x12, PT_A2(sp)
-       sd x13, PT_A3(sp)
-       sd x14, PT_A4(sp)
-       sd x15, PT_A5(sp)
-       sd x16, PT_A6(sp)
-       sd x17, PT_A7(sp)
-       sd x18, PT_S2(sp)
-       sd x19, PT_S3(sp)
-       sd x20, PT_S4(sp)
-       sd x21, PT_S5(sp)
-       sd x22, PT_S6(sp)
-       sd x23, PT_S7(sp)
-       sd x24, PT_S8(sp)
-       sd x25, PT_S9(sp)
-       sd x26, PT_S10(sp)
-       sd x27, PT_S11(sp)
-       sd x28, PT_T3(sp)
-       sd x29, PT_T4(sp)
-       sd x30, PT_T5(sp)
-       sd x31, PT_T6(sp)
-       .endm
-
-       .macro RESTORE_ALL
-       ld x1,  PT_RA(sp)
-       ld x2,  PT_SP(sp)
-       ld x3,  PT_GP(sp)
-       ld x4,  PT_TP(sp)
-       ld x5,  PT_T0(sp)
-       ld x6,  PT_T1(sp)
-       ld x7,  PT_T2(sp)
-       ld x8,  PT_S0(sp)
-       ld x9,  PT_S1(sp)
-       ld x10, PT_A0(sp)
-       ld x11, PT_A1(sp)
-       ld x12, PT_A2(sp)
-       ld x13, PT_A3(sp)
-       ld x14, PT_A4(sp)
-       ld x15, PT_A5(sp)
-       ld x16, PT_A6(sp)
-       ld x17, PT_A7(sp)
-       ld x18, PT_S2(sp)
-       ld x19, PT_S3(sp)
-       ld x20, PT_S4(sp)
-       ld x21, PT_S5(sp)
-       ld x22, PT_S6(sp)
-       ld x23, PT_S7(sp)
-       ld x24, PT_S8(sp)
-       ld x25, PT_S9(sp)
-       ld x26, PT_S10(sp)
-       ld x27, PT_S11(sp)
-       ld x28, PT_T3(sp)
-       ld x29, PT_T4(sp)
-       ld x30, PT_T5(sp)
-       ld x31, PT_T6(sp)
-
-       ld      s0, (PT_SIZE_ON_STACK)(sp)
-       ld      ra, (PT_SIZE_ON_STACK+8)(sp)
-       addi    sp, sp, (PT_SIZE_ON_STACK+16)
-       .endm
-
-       .macro RESTORE_GRAPH_REG_ARGS
-       ld      a0, PT_T0(sp)
-       ld      a1, PT_T1(sp)
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-       ld      a2, PT_T2(sp)
-#endif
-       .endm
-
-/*
- * Most of the contents are the same as ftrace_caller.
- */
 ENTRY(ftrace_regs_caller)
-       /*
-        * a3: the address of all registers in the stack
-        */
-       ld      a1, -8(s0)
-       addi    a0, ra, -MCOUNT_INSN_SIZE
-       la      t5, function_trace_op
-       ld      a2, 0(t5)
-       addi    a3, sp, -(PT_SIZE_ON_STACK+16)
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       addi    t0, s0, -8
-       mv      t1, a0
-#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
-       ld      t2, -16(s0)
-#endif
-#endif
        SAVE_ALL
 
+       addi    a0, ra, -FENTRY_RA_OFFSET
+       la      a1, function_trace_op
+       REG_L   a2, 0(a1)
+       REG_L   a1, PT_SIZE_ON_STACK(sp)
+       mv      a3, sp
+
 ftrace_regs_call:
        .global ftrace_regs_call
        call    ftrace_stub
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
-       RESTORE_GRAPH_REG_ARGS
-       call    ftrace_graph_caller
+       addi    a0, sp, PT_RA
+       REG_L   a1, PT_EPC(sp)
+       addi    a1, a1, -FENTRY_RA_OFFSET
+#ifdef HAVE_FUNCTION_GRAPH_FP_TEST
+       mv      a2, s0
+#endif
+ftrace_graph_regs_call:
+       .global ftrace_graph_regs_call
+       call    ftrace_stub
 #endif
 
        RESTORE_ALL
index 3fe7a52..0b55287 100644 (file)
@@ -20,7 +20,12 @@ struct patch_insn {
 };
 
 #ifdef CONFIG_MMU
-static void *patch_map(void *addr, int fixmap)
+/*
+ * The fix_to_virt(, idx) needs a const value (not a dynamic variable of
+ * reg-a0) or BUILD_BUG_ON failed with "idx >= __end_of_fixed_addresses".
+ * So use '__always_inline' and 'const unsigned int fixmap' here.
+ */
+static __always_inline void *patch_map(void *addr, const unsigned int fixmap)
 {
        uintptr_t uintaddr = (uintptr_t) addr;
        struct page *page;
@@ -37,7 +42,6 @@ static void *patch_map(void *addr, int fixmap)
        return (void *)set_fixmap_offset(fixmap, page_to_phys(page) +
                                         (uintaddr & ~PAGE_MASK));
 }
-NOKPROBE_SYMBOL(patch_map);
 
 static void patch_unmap(int fixmap)
 {
diff --git a/arch/riscv/kernel/probes/Makefile b/arch/riscv/kernel/probes/Makefile
new file mode 100644 (file)
index 0000000..7f0840d
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_KPROBES)          += kprobes.o decode-insn.o simulate-insn.o
+obj-$(CONFIG_KPROBES)          += kprobes_trampoline.o
+obj-$(CONFIG_KPROBES_ON_FTRACE)        += ftrace.o
+obj-$(CONFIG_UPROBES)          += uprobes.o decode-insn.o simulate-insn.o
+CFLAGS_REMOVE_simulate-insn.o = $(CC_FLAGS_FTRACE)
diff --git a/arch/riscv/kernel/probes/decode-insn.c b/arch/riscv/kernel/probes/decode-insn.c
new file mode 100644 (file)
index 0000000..0ed043a
--- /dev/null
@@ -0,0 +1,48 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+#include <linux/module.h>
+#include <linux/kallsyms.h>
+#include <asm/sections.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+/* Return:
+ *   INSN_REJECTED     If instruction is one not allowed to kprobe,
+ *   INSN_GOOD_NO_SLOT If instruction is supported but doesn't use its slot.
+ */
+enum probe_insn __kprobes
+riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *api)
+{
+       probe_opcode_t insn = *addr;
+
+       /*
+        * Reject instructions list:
+        */
+       RISCV_INSN_REJECTED(system,             insn);
+       RISCV_INSN_REJECTED(fence,              insn);
+
+       /*
+        * Simulate instructions list:
+        * TODO: the REJECTED ones below need to be implemented
+        */
+#ifdef CONFIG_RISCV_ISA_C
+       RISCV_INSN_REJECTED(c_j,                insn);
+       RISCV_INSN_REJECTED(c_jr,               insn);
+       RISCV_INSN_REJECTED(c_jal,              insn);
+       RISCV_INSN_REJECTED(c_jalr,             insn);
+       RISCV_INSN_REJECTED(c_beqz,             insn);
+       RISCV_INSN_REJECTED(c_bnez,             insn);
+       RISCV_INSN_REJECTED(c_ebreak,           insn);
+#endif
+
+       RISCV_INSN_REJECTED(auipc,              insn);
+       RISCV_INSN_REJECTED(branch,             insn);
+
+       RISCV_INSN_SET_SIMULATE(jal,            insn);
+       RISCV_INSN_SET_SIMULATE(jalr,           insn);
+
+       return INSN_GOOD;
+}
diff --git a/arch/riscv/kernel/probes/decode-insn.h b/arch/riscv/kernel/probes/decode-insn.h
new file mode 100644 (file)
index 0000000..42269a7
--- /dev/null
@@ -0,0 +1,18 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RISCV_KERNEL_KPROBES_DECODE_INSN_H
+#define _RISCV_KERNEL_KPROBES_DECODE_INSN_H
+
+#include <asm/sections.h>
+#include <asm/kprobes.h>
+
+enum probe_insn {
+       INSN_REJECTED,
+       INSN_GOOD_NO_SLOT,
+       INSN_GOOD,
+};
+
+enum probe_insn __kprobes
+riscv_probe_decode_insn(probe_opcode_t *addr, struct arch_probe_insn *asi);
+
+#endif /* _RISCV_KERNEL_KPROBES_DECODE_INSN_H */
diff --git a/arch/riscv/kernel/probes/ftrace.c b/arch/riscv/kernel/probes/ftrace.c
new file mode 100644 (file)
index 0000000..17ca5e9
--- /dev/null
@@ -0,0 +1,55 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/kprobes.h>
+
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
+void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
+                          struct ftrace_ops *ops, struct ftrace_regs *fregs)
+{
+       struct kprobe *p;
+       struct pt_regs *regs;
+       struct kprobe_ctlblk *kcb;
+
+       p = get_kprobe((kprobe_opcode_t *)ip);
+       if (unlikely(!p) || kprobe_disabled(p))
+               return;
+
+       regs = ftrace_get_regs(fregs);
+       kcb = get_kprobe_ctlblk();
+       if (kprobe_running()) {
+               kprobes_inc_nmissed_count(p);
+       } else {
+               unsigned long orig_ip = instruction_pointer(regs);
+
+               instruction_pointer_set(regs, ip);
+
+               __this_cpu_write(current_kprobe, p);
+               kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+               if (!p->pre_handler || !p->pre_handler(p, regs)) {
+                       /*
+                        * Emulate singlestep (and also recover regs->pc)
+                        * as if there is a nop
+                        */
+                       instruction_pointer_set(regs,
+                               (unsigned long)p->addr + MCOUNT_INSN_SIZE);
+                       if (unlikely(p->post_handler)) {
+                               kcb->kprobe_status = KPROBE_HIT_SSDONE;
+                               p->post_handler(p, regs, 0);
+                       }
+                       instruction_pointer_set(regs, orig_ip);
+               }
+
+               /*
+                * If pre_handler returns !0, it changes regs->pc. We have to
+                * skip emulating post_handler.
+                */
+               __this_cpu_write(current_kprobe, NULL);
+       }
+}
+NOKPROBE_SYMBOL(kprobe_ftrace_handler);
+
+int arch_prepare_kprobe_ftrace(struct kprobe *p)
+{
+       p->ainsn.api.insn = NULL;
+       return 0;
+}
diff --git a/arch/riscv/kernel/probes/kprobes.c b/arch/riscv/kernel/probes/kprobes.c
new file mode 100644 (file)
index 0000000..7e2c78e
--- /dev/null
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/kprobes.h>
+#include <linux/extable.h>
+#include <linux/slab.h>
+#include <linux/stop_machine.h>
+#include <asm/ptrace.h>
+#include <linux/uaccess.h>
+#include <asm/sections.h>
+#include <asm/cacheflush.h>
+#include <asm/bug.h>
+#include <asm/patch.h>
+
+#include "decode-insn.h"
+
+DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL;
+DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk);
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *, struct pt_regs *);
+
+static void __kprobes arch_prepare_ss_slot(struct kprobe *p)
+{
+       unsigned long offset = GET_INSN_LENGTH(p->opcode);
+
+       p->ainsn.api.restore = (unsigned long)p->addr + offset;
+
+       patch_text(p->ainsn.api.insn, p->opcode);
+       patch_text((void *)((unsigned long)(p->ainsn.api.insn) + offset),
+                  __BUG_INSN_32);
+}
+
+static void __kprobes arch_prepare_simulate(struct kprobe *p)
+{
+       p->ainsn.api.restore = 0;
+}
+
+static void __kprobes arch_simulate_insn(struct kprobe *p, struct pt_regs *regs)
+{
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+       if (p->ainsn.api.handler)
+               p->ainsn.api.handler((u32)p->opcode,
+                                       (unsigned long)p->addr, regs);
+
+       post_kprobe_handler(kcb, regs);
+}
+
+int __kprobes arch_prepare_kprobe(struct kprobe *p)
+{
+       unsigned long probe_addr = (unsigned long)p->addr;
+
+       if (probe_addr & 0x1) {
+               pr_warn("Address not aligned.\n");
+
+               return -EINVAL;
+       }
+
+       /* copy instruction */
+       p->opcode = *p->addr;
+
+       /* decode instruction */
+       switch (riscv_probe_decode_insn(p->addr, &p->ainsn.api)) {
+       case INSN_REJECTED:     /* insn not supported */
+               return -EINVAL;
+
+       case INSN_GOOD_NO_SLOT: /* insn need simulation */
+               p->ainsn.api.insn = NULL;
+               break;
+
+       case INSN_GOOD: /* instruction uses slot */
+               p->ainsn.api.insn = get_insn_slot();
+               if (!p->ainsn.api.insn)
+                       return -ENOMEM;
+               break;
+       }
+
+       /* prepare the instruction */
+       if (p->ainsn.api.insn)
+               arch_prepare_ss_slot(p);
+       else
+               arch_prepare_simulate(p);
+
+       return 0;
+}
+
+/* install breakpoint in text */
+void __kprobes arch_arm_kprobe(struct kprobe *p)
+{
+       if ((p->opcode & __INSN_LENGTH_MASK) == __INSN_LENGTH_32)
+               patch_text(p->addr, __BUG_INSN_32);
+       else
+               patch_text(p->addr, __BUG_INSN_16);
+}
+
+/* remove breakpoint from text */
+void __kprobes arch_disarm_kprobe(struct kprobe *p)
+{
+       patch_text(p->addr, p->opcode);
+}
+
+void __kprobes arch_remove_kprobe(struct kprobe *p)
+{
+}
+
+static void __kprobes save_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+       kcb->prev_kprobe.kp = kprobe_running();
+       kcb->prev_kprobe.status = kcb->kprobe_status;
+}
+
+static void __kprobes restore_previous_kprobe(struct kprobe_ctlblk *kcb)
+{
+       __this_cpu_write(current_kprobe, kcb->prev_kprobe.kp);
+       kcb->kprobe_status = kcb->prev_kprobe.status;
+}
+
+static void __kprobes set_current_kprobe(struct kprobe *p)
+{
+       __this_cpu_write(current_kprobe, p);
+}
+
+/*
+ * Interrupts need to be disabled before single-step mode is set, and not
+ * reenabled until after single-step mode ends.
+ * Without disabling interrupt on local CPU, there is a chance of
+ * interrupt occurrence in the period of exception return and  start of
+ * out-of-line single-step, that result in wrongly single stepping
+ * into the interrupt handler.
+ */
+static void __kprobes kprobes_save_local_irqflag(struct kprobe_ctlblk *kcb,
+                                               struct pt_regs *regs)
+{
+       kcb->saved_status = regs->status;
+       regs->status &= ~SR_SPIE;
+}
+
+static void __kprobes kprobes_restore_local_irqflag(struct kprobe_ctlblk *kcb,
+                                               struct pt_regs *regs)
+{
+       regs->status = kcb->saved_status;
+}
+
+static void __kprobes
+set_ss_context(struct kprobe_ctlblk *kcb, unsigned long addr, struct kprobe *p)
+{
+       unsigned long offset = GET_INSN_LENGTH(p->opcode);
+
+       kcb->ss_ctx.ss_pending = true;
+       kcb->ss_ctx.match_addr = addr + offset;
+}
+
+static void __kprobes clear_ss_context(struct kprobe_ctlblk *kcb)
+{
+       kcb->ss_ctx.ss_pending = false;
+       kcb->ss_ctx.match_addr = 0;
+}
+
+static void __kprobes setup_singlestep(struct kprobe *p,
+                                      struct pt_regs *regs,
+                                      struct kprobe_ctlblk *kcb, int reenter)
+{
+       unsigned long slot;
+
+       if (reenter) {
+               save_previous_kprobe(kcb);
+               set_current_kprobe(p);
+               kcb->kprobe_status = KPROBE_REENTER;
+       } else {
+               kcb->kprobe_status = KPROBE_HIT_SS;
+       }
+
+       if (p->ainsn.api.insn) {
+               /* prepare for single stepping */
+               slot = (unsigned long)p->ainsn.api.insn;
+
+               set_ss_context(kcb, slot, p);   /* mark pending ss */
+
+               /* IRQs and single stepping do not mix well. */
+               kprobes_save_local_irqflag(kcb, regs);
+
+               instruction_pointer_set(regs, slot);
+       } else {
+               /* insn simulation */
+               arch_simulate_insn(p, regs);
+       }
+}
+
+static int __kprobes reenter_kprobe(struct kprobe *p,
+                                   struct pt_regs *regs,
+                                   struct kprobe_ctlblk *kcb)
+{
+       switch (kcb->kprobe_status) {
+       case KPROBE_HIT_SSDONE:
+       case KPROBE_HIT_ACTIVE:
+               kprobes_inc_nmissed_count(p);
+               setup_singlestep(p, regs, kcb, 1);
+               break;
+       case KPROBE_HIT_SS:
+       case KPROBE_REENTER:
+               pr_warn("Unrecoverable kprobe detected.\n");
+               dump_kprobe(p);
+               BUG();
+               break;
+       default:
+               WARN_ON(1);
+               return 0;
+       }
+
+       return 1;
+}
+
+static void __kprobes
+post_kprobe_handler(struct kprobe_ctlblk *kcb, struct pt_regs *regs)
+{
+       struct kprobe *cur = kprobe_running();
+
+       if (!cur)
+               return;
+
+       /* return addr restore if non-branching insn */
+       if (cur->ainsn.api.restore != 0)
+               regs->epc = cur->ainsn.api.restore;
+
+       /* restore back original saved kprobe variables and continue */
+       if (kcb->kprobe_status == KPROBE_REENTER) {
+               restore_previous_kprobe(kcb);
+               return;
+       }
+
+       /* call post handler */
+       kcb->kprobe_status = KPROBE_HIT_SSDONE;
+       if (cur->post_handler)  {
+               /* post_handler can hit breakpoint and single step
+                * again, so we enable D-flag for recursive exception.
+                */
+               cur->post_handler(cur, regs, 0);
+       }
+
+       reset_current_kprobe();
+}
+
+int __kprobes kprobe_fault_handler(struct pt_regs *regs, unsigned int trapnr)
+{
+       struct kprobe *cur = kprobe_running();
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+       switch (kcb->kprobe_status) {
+       case KPROBE_HIT_SS:
+       case KPROBE_REENTER:
+               /*
+                * We are here because the instruction being single
+                * stepped caused a page fault. We reset the current
+                * kprobe and the ip points back to the probe address
+                * and allow the page fault handler to continue as a
+                * normal page fault.
+                */
+               regs->epc = (unsigned long) cur->addr;
+               BUG_ON(!instruction_pointer(regs));
+
+               if (kcb->kprobe_status == KPROBE_REENTER)
+                       restore_previous_kprobe(kcb);
+               else
+                       reset_current_kprobe();
+
+               break;
+       case KPROBE_HIT_ACTIVE:
+       case KPROBE_HIT_SSDONE:
+               /*
+                * We increment the nmissed count for accounting,
+                * we can also use npre/npostfault count for accounting
+                * these specific fault cases.
+                */
+               kprobes_inc_nmissed_count(cur);
+
+               /*
+                * We come here because instructions in the pre/post
+                * handler caused the page_fault, this could happen
+                * if handler tries to access user space by
+                * copy_from_user(), get_user() etc. Let the
+                * user-specified handler try to fix it first.
+                */
+               if (cur->fault_handler && cur->fault_handler(cur, regs, trapnr))
+                       return 1;
+
+               /*
+                * In case the user-specified fault handler returned
+                * zero, try to fix up.
+                */
+               if (fixup_exception(regs))
+                       return 1;
+       }
+       return 0;
+}
+
+bool __kprobes
+kprobe_breakpoint_handler(struct pt_regs *regs)
+{
+       struct kprobe *p, *cur_kprobe;
+       struct kprobe_ctlblk *kcb;
+       unsigned long addr = instruction_pointer(regs);
+
+       kcb = get_kprobe_ctlblk();
+       cur_kprobe = kprobe_running();
+
+       p = get_kprobe((kprobe_opcode_t *) addr);
+
+       if (p) {
+               if (cur_kprobe) {
+                       if (reenter_kprobe(p, regs, kcb))
+                               return true;
+               } else {
+                       /* Probe hit */
+                       set_current_kprobe(p);
+                       kcb->kprobe_status = KPROBE_HIT_ACTIVE;
+
+                       /*
+                        * If we have no pre-handler or it returned 0, we
+                        * continue with normal processing.  If we have a
+                        * pre-handler and it returned non-zero, it will
+                        * modify the execution path and no need to single
+                        * stepping. Let's just reset current kprobe and exit.
+                        *
+                        * pre_handler can hit a breakpoint and can step thru
+                        * before return.
+                        */
+                       if (!p->pre_handler || !p->pre_handler(p, regs))
+                               setup_singlestep(p, regs, kcb, 0);
+                       else
+                               reset_current_kprobe();
+               }
+               return true;
+       }
+
+       /*
+        * The breakpoint instruction was removed right
+        * after we hit it.  Another cpu has removed
+        * either a probepoint or a debugger breakpoint
+        * at this address.  In either case, no further
+        * handling of this interrupt is appropriate.
+        * Return back to original instruction, and continue.
+        */
+       return false;
+}
+
+bool __kprobes
+kprobe_single_step_handler(struct pt_regs *regs)
+{
+       struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
+
+       if ((kcb->ss_ctx.ss_pending)
+           && (kcb->ss_ctx.match_addr == instruction_pointer(regs))) {
+               clear_ss_context(kcb);  /* clear pending ss */
+
+               kprobes_restore_local_irqflag(kcb, regs);
+
+               post_kprobe_handler(kcb, regs);
+               return true;
+       }
+       return false;
+}
+
+/*
+ * Provide a blacklist of symbols identifying ranges which cannot be kprobed.
+ * This blacklist is exposed to userspace via debugfs (kprobes/blacklist).
+ */
+int __init arch_populate_kprobe_blacklist(void)
+{
+       int ret;
+
+       ret = kprobe_add_area_blacklist((unsigned long)__irqentry_text_start,
+                                       (unsigned long)__irqentry_text_end);
+       return ret;
+}
+
+void __kprobes __used *trampoline_probe_handler(struct pt_regs *regs)
+{
+       return (void *)kretprobe_trampoline_handler(regs, &kretprobe_trampoline, NULL);
+}
+
+void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
+                                     struct pt_regs *regs)
+{
+       ri->ret_addr = (kprobe_opcode_t *)regs->ra;
+       ri->fp = NULL;
+       regs->ra = (unsigned long) &kretprobe_trampoline;
+}
+
+int __kprobes arch_trampoline_kprobe(struct kprobe *p)
+{
+       return 0;
+}
+
+int __init arch_init_kprobes(void)
+{
+       return 0;
+}
diff --git a/arch/riscv/kernel/probes/kprobes_trampoline.S b/arch/riscv/kernel/probes/kprobes_trampoline.S
new file mode 100644 (file)
index 0000000..6e85d02
--- /dev/null
@@ -0,0 +1,93 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Author: Patrick Stählin <me@packi.ch>
+ */
+#include <linux/linkage.h>
+
+#include <asm/asm.h>
+#include <asm/asm-offsets.h>
+
+       .text
+       .altmacro
+
+       .macro save_all_base_regs
+       REG_S x1,  PT_RA(sp)
+       REG_S x3,  PT_GP(sp)
+       REG_S x4,  PT_TP(sp)
+       REG_S x5,  PT_T0(sp)
+       REG_S x6,  PT_T1(sp)
+       REG_S x7,  PT_T2(sp)
+       REG_S x8,  PT_S0(sp)
+       REG_S x9,  PT_S1(sp)
+       REG_S x10, PT_A0(sp)
+       REG_S x11, PT_A1(sp)
+       REG_S x12, PT_A2(sp)
+       REG_S x13, PT_A3(sp)
+       REG_S x14, PT_A4(sp)
+       REG_S x15, PT_A5(sp)
+       REG_S x16, PT_A6(sp)
+       REG_S x17, PT_A7(sp)
+       REG_S x18, PT_S2(sp)
+       REG_S x19, PT_S3(sp)
+       REG_S x20, PT_S4(sp)
+       REG_S x21, PT_S5(sp)
+       REG_S x22, PT_S6(sp)
+       REG_S x23, PT_S7(sp)
+       REG_S x24, PT_S8(sp)
+       REG_S x25, PT_S9(sp)
+       REG_S x26, PT_S10(sp)
+       REG_S x27, PT_S11(sp)
+       REG_S x28, PT_T3(sp)
+       REG_S x29, PT_T4(sp)
+       REG_S x30, PT_T5(sp)
+       REG_S x31, PT_T6(sp)
+       .endm
+
+       .macro restore_all_base_regs
+       REG_L x3,  PT_GP(sp)
+       REG_L x4,  PT_TP(sp)
+       REG_L x5,  PT_T0(sp)
+       REG_L x6,  PT_T1(sp)
+       REG_L x7,  PT_T2(sp)
+       REG_L x8,  PT_S0(sp)
+       REG_L x9,  PT_S1(sp)
+       REG_L x10, PT_A0(sp)
+       REG_L x11, PT_A1(sp)
+       REG_L x12, PT_A2(sp)
+       REG_L x13, PT_A3(sp)
+       REG_L x14, PT_A4(sp)
+       REG_L x15, PT_A5(sp)
+       REG_L x16, PT_A6(sp)
+       REG_L x17, PT_A7(sp)
+       REG_L x18, PT_S2(sp)
+       REG_L x19, PT_S3(sp)
+       REG_L x20, PT_S4(sp)
+       REG_L x21, PT_S5(sp)
+       REG_L x22, PT_S6(sp)
+       REG_L x23, PT_S7(sp)
+       REG_L x24, PT_S8(sp)
+       REG_L x25, PT_S9(sp)
+       REG_L x26, PT_S10(sp)
+       REG_L x27, PT_S11(sp)
+       REG_L x28, PT_T3(sp)
+       REG_L x29, PT_T4(sp)
+       REG_L x30, PT_T5(sp)
+       REG_L x31, PT_T6(sp)
+       .endm
+
+ENTRY(kretprobe_trampoline)
+       addi sp, sp, -(PT_SIZE_ON_STACK)
+       save_all_base_regs
+
+       move a0, sp /* pt_regs */
+
+       call trampoline_probe_handler
+
+       /* use the result as the return-address */
+       move ra, a0
+
+       restore_all_base_regs
+       addi sp, sp, PT_SIZE_ON_STACK
+
+       ret
+ENDPROC(kretprobe_trampoline)
diff --git a/arch/riscv/kernel/probes/simulate-insn.c b/arch/riscv/kernel/probes/simulate-insn.c
new file mode 100644 (file)
index 0000000..2519ce2
--- /dev/null
@@ -0,0 +1,85 @@
+// SPDX-License-Identifier: GPL-2.0+
+
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/kprobes.h>
+
+#include "decode-insn.h"
+#include "simulate-insn.h"
+
+static inline bool rv_insn_reg_get_val(struct pt_regs *regs, u32 index,
+                                      unsigned long *ptr)
+{
+       if (index == 0)
+               *ptr = 0;
+       else if (index <= 31)
+               *ptr = *((unsigned long *)regs + index);
+       else
+               return false;
+
+       return true;
+}
+
+static inline bool rv_insn_reg_set_val(struct pt_regs *regs, u32 index,
+                                      unsigned long val)
+{
+       if (index == 0)
+               return false;
+       else if (index <= 31)
+               *((unsigned long *)regs + index) = val;
+       else
+               return false;
+
+       return true;
+}
+
+bool __kprobes simulate_jal(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+       /*
+        *     31    30       21    20     19        12 11 7 6      0
+        * imm [20] | imm[10:1] | imm[11] | imm[19:12] | rd | opcode
+        *     1         10          1           8       5    JAL/J
+        */
+       bool ret;
+       u32 imm;
+       u32 index = (opcode >> 7) & 0x1f;
+
+       ret = rv_insn_reg_set_val(regs, index, addr + 4);
+       if (!ret)
+               return ret;
+
+       imm  = ((opcode >> 21) & 0x3ff) << 1;
+       imm |= ((opcode >> 20) & 0x1)   << 11;
+       imm |= ((opcode >> 12) & 0xff)  << 12;
+       imm |= ((opcode >> 31) & 0x1)   << 20;
+
+       instruction_pointer_set(regs, addr + sign_extend32((imm), 20));
+
+       return ret;
+}
+
+bool __kprobes simulate_jalr(u32 opcode, unsigned long addr, struct pt_regs *regs)
+{
+       /*
+        * 31          20 19 15 14 12 11 7 6      0
+        *  offset[11:0] | rs1 | 010 | rd | opcode
+        *      12         5      3    5    JALR/JR
+        */
+       bool ret;
+       unsigned long base_addr;
+       u32 imm = (opcode >> 20) & 0xfff;
+       u32 rd_index = (opcode >> 7) & 0x1f;
+       u32 rs1_index = (opcode >> 15) & 0x1f;
+
+       ret = rv_insn_reg_set_val(regs, rd_index, addr + 4);
+       if (!ret)
+               return ret;
+
+       ret = rv_insn_reg_get_val(regs, rs1_index, &base_addr);
+       if (!ret)
+               return ret;
+
+       instruction_pointer_set(regs, (base_addr + sign_extend32((imm), 11))&~1);
+
+       return ret;
+}
diff --git a/arch/riscv/kernel/probes/simulate-insn.h b/arch/riscv/kernel/probes/simulate-insn.h
new file mode 100644 (file)
index 0000000..cb6ff7d
--- /dev/null
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+
+#ifndef _RISCV_KERNEL_PROBES_SIMULATE_INSN_H
+#define _RISCV_KERNEL_PROBES_SIMULATE_INSN_H
+
+#define __RISCV_INSN_FUNCS(name, mask, val)                            \
+static __always_inline bool riscv_insn_is_##name(probe_opcode_t code)  \
+{                                                                      \
+       BUILD_BUG_ON(~(mask) & (val));                                  \
+       return (code & (mask)) == (val);                                \
+}                                                                      \
+bool simulate_##name(u32 opcode, unsigned long addr,                   \
+                    struct pt_regs *regs)
+
+#define RISCV_INSN_REJECTED(name, code)                                        \
+       do {                                                            \
+               if (riscv_insn_is_##name(code)) {                       \
+                       return INSN_REJECTED;                           \
+               }                                                       \
+       } while (0)
+
+__RISCV_INSN_FUNCS(system,     0x7f, 0x73);
+__RISCV_INSN_FUNCS(fence,      0x7f, 0x0f);
+
+#define RISCV_INSN_SET_SIMULATE(name, code)                            \
+       do {                                                            \
+               if (riscv_insn_is_##name(code)) {                       \
+                       api->handler = simulate_##name;                 \
+                       return INSN_GOOD_NO_SLOT;                       \
+               }                                                       \
+       } while (0)
+
+__RISCV_INSN_FUNCS(c_j,                0xe003, 0xa001);
+__RISCV_INSN_FUNCS(c_jr,       0xf007, 0x8002);
+__RISCV_INSN_FUNCS(c_jal,      0xe003, 0x2001);
+__RISCV_INSN_FUNCS(c_jalr,     0xf007, 0x9002);
+__RISCV_INSN_FUNCS(c_beqz,     0xe003, 0xc001);
+__RISCV_INSN_FUNCS(c_bnez,     0xe003, 0xe001);
+__RISCV_INSN_FUNCS(c_ebreak,   0xffff, 0x9002);
+
+__RISCV_INSN_FUNCS(auipc,      0x7f, 0x17);
+__RISCV_INSN_FUNCS(branch,     0x7f, 0x63);
+
+__RISCV_INSN_FUNCS(jal,                0x7f, 0x6f);
+__RISCV_INSN_FUNCS(jalr,       0x707f, 0x67);
+
+#endif /* _RISCV_KERNEL_PROBES_SIMULATE_INSN_H */
diff --git a/arch/riscv/kernel/probes/uprobes.c b/arch/riscv/kernel/probes/uprobes.c
new file mode 100644 (file)
index 0000000..7a057b5
--- /dev/null
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/highmem.h>
+#include <linux/ptrace.h>
+#include <linux/uprobes.h>
+
+#include "decode-insn.h"
+
+#define UPROBE_TRAP_NR UINT_MAX
+
+bool is_swbp_insn(uprobe_opcode_t *insn)
+{
+#ifdef CONFIG_RISCV_ISA_C
+       return (*insn & 0xffff) == UPROBE_SWBP_INSN;
+#else
+       return *insn == UPROBE_SWBP_INSN;
+#endif
+}
+
+unsigned long uprobe_get_swbp_addr(struct pt_regs *regs)
+{
+       return instruction_pointer(regs);
+}
+
+int arch_uprobe_analyze_insn(struct arch_uprobe *auprobe, struct mm_struct *mm,
+                            unsigned long addr)
+{
+       probe_opcode_t opcode;
+
+       opcode = *(probe_opcode_t *)(&auprobe->insn[0]);
+
+       auprobe->insn_size = GET_INSN_LENGTH(opcode);
+
+       switch (riscv_probe_decode_insn(&opcode, &auprobe->api)) {
+       case INSN_REJECTED:
+               return -EINVAL;
+
+       case INSN_GOOD_NO_SLOT:
+               auprobe->simulate = true;
+               break;
+
+       case INSN_GOOD:
+               auprobe->simulate = false;
+               break;
+
+       default:
+               return -EINVAL;
+       }
+
+       return 0;
+}
+
+int arch_uprobe_pre_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       struct uprobe_task *utask = current->utask;
+
+       utask->autask.saved_cause = current->thread.bad_cause;
+       current->thread.bad_cause = UPROBE_TRAP_NR;
+
+       instruction_pointer_set(regs, utask->xol_vaddr);
+
+       regs->status &= ~SR_SPIE;
+
+       return 0;
+}
+
+int arch_uprobe_post_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       struct uprobe_task *utask = current->utask;
+
+       WARN_ON_ONCE(current->thread.bad_cause != UPROBE_TRAP_NR);
+
+       instruction_pointer_set(regs, utask->vaddr + auprobe->insn_size);
+
+       regs->status |= SR_SPIE;
+
+       return 0;
+}
+
+bool arch_uprobe_xol_was_trapped(struct task_struct *t)
+{
+       if (t->thread.bad_cause != UPROBE_TRAP_NR)
+               return true;
+
+       return false;
+}
+
+bool arch_uprobe_skip_sstep(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       probe_opcode_t insn;
+       unsigned long addr;
+
+       if (!auprobe->simulate)
+               return false;
+
+       insn = *(probe_opcode_t *)(&auprobe->insn[0]);
+       addr = instruction_pointer(regs);
+
+       if (auprobe->api.handler)
+               auprobe->api.handler(insn, addr, regs);
+
+       return true;
+}
+
+void arch_uprobe_abort_xol(struct arch_uprobe *auprobe, struct pt_regs *regs)
+{
+       struct uprobe_task *utask = current->utask;
+
+       /*
+        * Task has received a fatal signal, so reset back to probbed
+        * address.
+        */
+       instruction_pointer_set(regs, utask->vaddr);
+
+       regs->status &= ~SR_SPIE;
+}
+
+bool arch_uretprobe_is_alive(struct return_instance *ret, enum rp_check ctx,
+               struct pt_regs *regs)
+{
+       if (ctx == RP_CHECK_CHAIN_CALL)
+               return regs->sp <= ret->stack;
+       else
+               return regs->sp < ret->stack;
+}
+
+unsigned long
+arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr,
+                                 struct pt_regs *regs)
+{
+       unsigned long ra;
+
+       ra = regs->ra;
+
+       regs->ra = trampoline_vaddr;
+
+       return ra;
+}
+
+int arch_uprobe_exception_notify(struct notifier_block *self,
+                                unsigned long val, void *data)
+{
+       return NOTIFY_DONE;
+}
+
+bool uprobe_breakpoint_handler(struct pt_regs *regs)
+{
+       if (uprobe_pre_sstep_notifier(regs))
+               return true;
+
+       return false;
+}
+
+bool uprobe_single_step_handler(struct pt_regs *regs)
+{
+       if (uprobe_post_sstep_notifier(regs))
+               return true;
+
+       return false;
+}
+
+void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+                          void *src, unsigned long len)
+{
+       /* Initialize the slot */
+       void *kaddr = kmap_atomic(page);
+       void *dst = kaddr + (vaddr & ~PAGE_MASK);
+
+       memcpy(dst, src, len);
+
+       /* Add ebreak behind opcode to simulate singlestep */
+       if (vaddr) {
+               dst += GET_INSN_LENGTH(*(probe_opcode_t *)src);
+               *(uprobe_opcode_t *)dst = __BUG_INSN_32;
+       }
+
+       kunmap_atomic(kaddr);
+
+       /*
+        * We probably need flush_icache_user_page() but it needs vma.
+        * This should work on most of architectures by default. If
+        * architecture needs to do something different it can define
+        * its own version of the function.
+        */
+       flush_dcache_page(page);
+}
index dd5f985..f9cd57c 100644 (file)
@@ -10,6 +10,7 @@
 #include <linux/cpu.h>
 #include <linux/kernel.h>
 #include <linux/sched.h>
+#include <linux/sched/debug.h>
 #include <linux/sched/task_stack.h>
 #include <linux/tick.h>
 #include <linux/ptrace.h>
 #include <asm/unistd.h>
 #include <asm/processor.h>
 #include <asm/csr.h>
+#include <asm/stacktrace.h>
 #include <asm/string.h>
 #include <asm/switch_to.h>
 #include <asm/thread_info.h>
 
 register unsigned long gp_in_global __asm__("gp");
 
-#ifdef CONFIG_STACKPROTECTOR
+#if defined(CONFIG_STACKPROTECTOR) && !defined(CONFIG_STACKPROTECTOR_PER_TASK)
 #include <linux/stackprotector.h>
 unsigned long __stack_chk_guard __read_mostly;
 EXPORT_SYMBOL(__stack_chk_guard);
@@ -39,11 +41,16 @@ void arch_cpu_idle(void)
        raw_local_irq_enable();
 }
 
-void show_regs(struct pt_regs *regs)
+void __show_regs(struct pt_regs *regs)
 {
        show_regs_print_info(KERN_DEFAULT);
 
-       pr_cont("epc: " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
+       if (!user_mode(regs)) {
+               pr_cont("epc : %pS\n", (void *)regs->epc);
+               pr_cont(" ra : %pS\n", (void *)regs->ra);
+       }
+
+       pr_cont("epc : " REG_FMT " ra : " REG_FMT " sp : " REG_FMT "\n",
                regs->epc, regs->ra, regs->sp);
        pr_cont(" gp : " REG_FMT " tp : " REG_FMT " t0 : " REG_FMT "\n",
                regs->gp, regs->tp, regs->t0);
@@ -69,6 +76,12 @@ void show_regs(struct pt_regs *regs)
        pr_cont("status: " REG_FMT " badaddr: " REG_FMT " cause: " REG_FMT "\n",
                regs->status, regs->badaddr, regs->cause);
 }
+void show_regs(struct pt_regs *regs)
+{
+       __show_regs(regs);
+       if (!user_mode(regs))
+               dump_backtrace(regs, NULL, KERN_DEFAULT);
+}
 
 void start_thread(struct pt_regs *regs, unsigned long pc,
        unsigned long sp)
@@ -112,7 +125,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
        struct pt_regs *childregs = task_pt_regs(p);
 
        /* p->thread holds context to be restored by __switch_to() */
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* Kernel thread */
                memset(childregs, 0, sizeof(struct pt_regs));
                childregs->gp = gp_in_global;
index 2d6395f..1a85305 100644 (file)
@@ -114,6 +114,105 @@ const struct user_regset_view *task_user_regset_view(struct task_struct *task)
        return &riscv_user_native_view;
 }
 
+struct pt_regs_offset {
+       const char *name;
+       int offset;
+};
+
+#define REG_OFFSET_NAME(r) {.name = #r, .offset = offsetof(struct pt_regs, r)}
+#define REG_OFFSET_END {.name = NULL, .offset = 0}
+
+static const struct pt_regs_offset regoffset_table[] = {
+       REG_OFFSET_NAME(epc),
+       REG_OFFSET_NAME(ra),
+       REG_OFFSET_NAME(sp),
+       REG_OFFSET_NAME(gp),
+       REG_OFFSET_NAME(tp),
+       REG_OFFSET_NAME(t0),
+       REG_OFFSET_NAME(t1),
+       REG_OFFSET_NAME(t2),
+       REG_OFFSET_NAME(s0),
+       REG_OFFSET_NAME(s1),
+       REG_OFFSET_NAME(a0),
+       REG_OFFSET_NAME(a1),
+       REG_OFFSET_NAME(a2),
+       REG_OFFSET_NAME(a3),
+       REG_OFFSET_NAME(a4),
+       REG_OFFSET_NAME(a5),
+       REG_OFFSET_NAME(a6),
+       REG_OFFSET_NAME(a7),
+       REG_OFFSET_NAME(s2),
+       REG_OFFSET_NAME(s3),
+       REG_OFFSET_NAME(s4),
+       REG_OFFSET_NAME(s5),
+       REG_OFFSET_NAME(s6),
+       REG_OFFSET_NAME(s7),
+       REG_OFFSET_NAME(s8),
+       REG_OFFSET_NAME(s9),
+       REG_OFFSET_NAME(s10),
+       REG_OFFSET_NAME(s11),
+       REG_OFFSET_NAME(t3),
+       REG_OFFSET_NAME(t4),
+       REG_OFFSET_NAME(t5),
+       REG_OFFSET_NAME(t6),
+       REG_OFFSET_NAME(status),
+       REG_OFFSET_NAME(badaddr),
+       REG_OFFSET_NAME(cause),
+       REG_OFFSET_NAME(orig_a0),
+       REG_OFFSET_END,
+};
+
+/**
+ * regs_query_register_offset() - query register offset from its name
+ * @name:      the name of a register
+ *
+ * regs_query_register_offset() returns the offset of a register in struct
+ * pt_regs from its name. If the name is invalid, this returns -EINVAL;
+ */
+int regs_query_register_offset(const char *name)
+{
+       const struct pt_regs_offset *roff;
+
+       for (roff = regoffset_table; roff->name != NULL; roff++)
+               if (!strcmp(roff->name, name))
+                       return roff->offset;
+       return -EINVAL;
+}
+
+/**
+ * regs_within_kernel_stack() - check the address in the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @addr:      address which is checked.
+ *
+ * regs_within_kernel_stack() checks @addr is within the kernel stack page(s).
+ * If @addr is within the kernel stack, it returns true. If not, returns false.
+ */
+static bool regs_within_kernel_stack(struct pt_regs *regs, unsigned long addr)
+{
+       return (addr & ~(THREAD_SIZE - 1))  ==
+               (kernel_stack_pointer(regs) & ~(THREAD_SIZE - 1));
+}
+
+/**
+ * regs_get_kernel_stack_nth() - get Nth entry of the stack
+ * @regs:      pt_regs which contains kernel stack pointer.
+ * @n:         stack entry number.
+ *
+ * regs_get_kernel_stack_nth() returns @n th entry of the kernel stack which
+ * is specified by @regs. If the @n th entry is NOT in the kernel stack,
+ * this returns 0.
+ */
+unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, unsigned int n)
+{
+       unsigned long *addr = (unsigned long *)kernel_stack_pointer(regs);
+
+       addr += n;
+       if (regs_within_kernel_stack(regs, (unsigned long)addr))
+               return *addr;
+       else
+               return 0;
+}
+
 void ptrace_disable(struct task_struct *child)
 {
        clear_tsk_thread_flag(child, TIF_SYSCALL_TRACE);
index 226ccce..d3bf756 100644 (file)
@@ -116,7 +116,7 @@ void sbi_clear_ipi(void)
 EXPORT_SYMBOL(sbi_clear_ipi);
 
 /**
- * sbi_set_timer_v01() - Program the timer for next timer event.
+ * __sbi_set_timer_v01() - Program the timer for next timer event.
  * @stime_value: The value after which next timer event should fire.
  *
  * Return: None
@@ -351,7 +351,7 @@ static int __sbi_rfence_v02(int fid, const unsigned long *hart_mask,
  * sbi_set_timer() - Program the timer for next timer event.
  * @stime_value: The value after which next timer event should fire.
  *
- * Return: None
+ * Return: None.
  */
 void sbi_set_timer(uint64_t stime_value)
 {
@@ -362,11 +362,11 @@ void sbi_set_timer(uint64_t stime_value)
  * sbi_send_ipi() - Send an IPI to any hart.
  * @hart_mask: A cpu mask containing all the target harts.
  *
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
  */
-void sbi_send_ipi(const unsigned long *hart_mask)
+int sbi_send_ipi(const unsigned long *hart_mask)
 {
-       __sbi_send_ipi(hart_mask);
+       return __sbi_send_ipi(hart_mask);
 }
 EXPORT_SYMBOL(sbi_send_ipi);
 
@@ -374,12 +374,12 @@ EXPORT_SYMBOL(sbi_send_ipi);
  * sbi_remote_fence_i() - Execute FENCE.I instruction on given remote harts.
  * @hart_mask: A cpu mask containing all the target harts.
  *
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
  */
-void sbi_remote_fence_i(const unsigned long *hart_mask)
+int sbi_remote_fence_i(const unsigned long *hart_mask)
 {
-       __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
-                    hart_mask, 0, 0, 0, 0);
+       return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_FENCE_I,
+                           hart_mask, 0, 0, 0, 0);
 }
 EXPORT_SYMBOL(sbi_remote_fence_i);
 
@@ -390,14 +390,14 @@ EXPORT_SYMBOL(sbi_remote_fence_i);
  * @start: Start of the virtual address
  * @size: Total size of the virtual address range.
  *
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
  */
-void sbi_remote_sfence_vma(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma(const unsigned long *hart_mask,
                           unsigned long start,
                           unsigned long size)
 {
-       __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
-                    hart_mask, start, size, 0, 0);
+       return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA,
+                           hart_mask, start, size, 0, 0);
 }
 EXPORT_SYMBOL(sbi_remote_sfence_vma);
 
@@ -410,15 +410,15 @@ EXPORT_SYMBOL(sbi_remote_sfence_vma);
  * @size: Total size of the virtual address range.
  * @asid: The value of address space identifier (ASID).
  *
- * Return: None
+ * Return: 0 on success, appropriate linux error code otherwise.
  */
-void sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
+int sbi_remote_sfence_vma_asid(const unsigned long *hart_mask,
                                unsigned long start,
                                unsigned long size,
                                unsigned long asid)
 {
-       __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
-                    hart_mask, start, size, asid, 0);
+       return __sbi_rfence(SBI_EXT_RFENCE_REMOTE_SFENCE_VMA_ASID,
+                           hart_mask, start, size, asid, 0);
 }
 EXPORT_SYMBOL(sbi_remote_sfence_vma_asid);
 
@@ -560,7 +560,7 @@ static struct riscv_ipi_ops sbi_ipi_ops = {
        .ipi_inject = sbi_send_cpumask_ipi
 };
 
-int __init sbi_init(void)
+void __init sbi_init(void)
 {
        int ret;
 
@@ -600,6 +600,4 @@ int __init sbi_init(void)
        }
 
        riscv_set_ipi_ops(&sbi_ipi_ops);
-
-       return 0;
 }
index c7c0655..f8f1533 100644 (file)
@@ -147,7 +147,8 @@ static void __init init_resources(void)
        bss_res.end = __pa_symbol(__bss_stop) - 1;
        bss_res.flags = IORESOURCE_SYSTEM_RAM | IORESOURCE_BUSY;
 
-       mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt) * sizeof(*mem_res);
+       /* + 1 as memblock_alloc() might increase memblock.reserved.cnt */
+       mem_res_sz = (memblock.memory.cnt + memblock.reserved.cnt + 1) * sizeof(*mem_res);
        mem_res = memblock_alloc(mem_res_sz, SMP_CACHE_BYTES);
        if (!mem_res)
                panic("%s: Failed to allocate %zu bytes\n", __func__, mem_res_sz);
@@ -216,8 +217,15 @@ static void __init init_resources(void)
 static void __init parse_dtb(void)
 {
        /* Early scan of device tree from init memory */
-       if (early_init_dt_scan(dtb_early_va))
+       if (early_init_dt_scan(dtb_early_va)) {
+               const char *name = of_flat_dt_get_machine_name();
+
+               if (name) {
+                       pr_info("Machine model: %s\n", name);
+                       dump_stack_set_arch_desc("%s (DT)", name);
+               }
                return;
+       }
 
        pr_err("No DTB passed to the kernel\n");
 #ifdef CONFIG_CMDLINE_FORCE
@@ -252,9 +260,9 @@ void __init setup_arch(char **cmdline_p)
        else
                pr_err("No DTB found in kernel mappings\n");
 #endif
+       misc_mem_init();
 
-       if (IS_ENABLED(CONFIG_RISCV_SBI))
-               sbi_init();
+       sbi_init();
 
        if (IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
                protect_kernel_text_data();
@@ -275,13 +283,19 @@ void __init setup_arch(char **cmdline_p)
 
 static int __init topology_init(void)
 {
-       int i;
+       int i, ret;
+
+       for_each_online_node(i)
+               register_one_node(i);
 
        for_each_possible_cpu(i) {
                struct cpu *cpu = &per_cpu(cpu_devices, i);
 
                cpu->hotpluggable = cpu_has_hotplug(i);
-               register_cpu(cpu, i);
+               ret = register_cpu(cpu, i);
+               if (unlikely(ret))
+                       pr_warn("Warning: %s: register_cpu %d failed (%d)\n",
+                              __func__, i, ret);
        }
 
        return 0;
index 469aef8..65942b3 100644 (file)
@@ -309,6 +309,9 @@ static void do_signal(struct pt_regs *regs)
 asmlinkage __visible void do_notify_resume(struct pt_regs *regs,
                                           unsigned long thread_info_flags)
 {
+       if (thread_info_flags & _TIF_UPROBE)
+               uprobe_notify_resume(regs);
+
        /* Handle pending signal delivery */
        if (thread_info_flags & (_TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL))
                do_signal(regs);
index 96167d5..5e276c2 100644 (file)
@@ -27,6 +27,7 @@
 #include <asm/cpu_ops.h>
 #include <asm/irq.h>
 #include <asm/mmu_context.h>
+#include <asm/numa.h>
 #include <asm/tlbflush.h>
 #include <asm/sections.h>
 #include <asm/sbi.h>
@@ -45,13 +46,18 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
 {
        int cpuid;
        int ret;
+       unsigned int curr_cpuid;
+
+       curr_cpuid = smp_processor_id();
+       numa_store_cpu_info(curr_cpuid);
+       numa_add_cpu(curr_cpuid);
 
        /* This covers non-smp usecase mandated by "nosmp" option */
        if (max_cpus == 0)
                return;
 
        for_each_possible_cpu(cpuid) {
-               if (cpuid == smp_processor_id())
+               if (cpuid == curr_cpuid)
                        continue;
                if (cpu_ops[cpuid]->cpu_prepare) {
                        ret = cpu_ops[cpuid]->cpu_prepare(cpuid);
@@ -59,6 +65,7 @@ void __init smp_prepare_cpus(unsigned int max_cpus)
                                continue;
                }
                set_cpu_present(cpuid, true);
+               numa_store_cpu_info(cpuid);
        }
 }
 
@@ -79,6 +86,7 @@ void __init setup_smp(void)
                if (hart == cpuid_to_hartid_map(0)) {
                        BUG_ON(found_boot_cpu);
                        found_boot_cpu = 1;
+                       early_map_cpu_to_node(0, of_node_to_nid(dn));
                        continue;
                }
                if (cpuid >= NR_CPUS) {
@@ -88,6 +96,7 @@ void __init setup_smp(void)
                }
 
                cpuid_to_hartid_map(cpuid) = hart;
+               early_map_cpu_to_node(cpuid, of_node_to_nid(dn));
                cpuid++;
        }
 
@@ -153,6 +162,7 @@ asmlinkage __visible void smp_callin(void)
        current->active_mm = mm;
 
        notify_cpu_starting(curr_cpuid);
+       numa_add_cpu(curr_cpuid);
        update_siblings_masks(curr_cpuid);
        set_cpu_online(curr_cpuid, 1);
 
index c7b0a73..a051617 100644 (file)
@@ -26,30 +26,3 @@ void __init soc_early_init(void)
                }
        }
 }
-
-static bool soc_builtin_dtb_match(unsigned long vendor_id,
-                               unsigned long arch_id, unsigned long imp_id,
-                               const struct soc_builtin_dtb *entry)
-{
-       return entry->vendor_id == vendor_id &&
-              entry->arch_id == arch_id &&
-              entry->imp_id == imp_id;
-}
-
-void * __init soc_lookup_builtin_dtb(void)
-{
-       unsigned long vendor_id, arch_id, imp_id;
-       const struct soc_builtin_dtb *s;
-
-       __asm__ ("csrr %0, mvendorid" : "=r"(vendor_id));
-       __asm__ ("csrr %0, marchid" : "=r"(arch_id));
-       __asm__ ("csrr %0, mimpid" : "=r"(imp_id));
-
-       for (s = (void *)&__soc_builtin_dtb_table_start;
-            (void *)s < (void *)&__soc_builtin_dtb_table_end; s++) {
-               if (soc_builtin_dtb_match(vendor_id, arch_id, imp_id, s))
-                       return s->dtb_func();
-       }
-
-       return NULL;
-}
index df5d2da..2b3e0cb 100644 (file)
@@ -14,7 +14,7 @@
 
 #include <asm/stacktrace.h>
 
-register const unsigned long sp_in_global __asm__("sp");
+register unsigned long sp_in_global __asm__("sp");
 
 #ifdef CONFIG_FRAME_POINTER
 
@@ -53,9 +53,15 @@ void notrace walk_stackframe(struct task_struct *task, struct pt_regs *regs,
                /* Unwind stack frame */
                frame = (struct stackframe *)fp - 1;
                sp = fp;
-               fp = frame->fp;
-               pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
-                                          (unsigned long *)(fp - 8));
+               if (regs && (regs->epc == pc) && (frame->fp & 0x7)) {
+                       fp = frame->ra;
+                       pc = regs->ra;
+               } else {
+                       fp = frame->fp;
+                       pc = ftrace_graph_ret_addr(current, NULL, frame->ra,
+                                                  (unsigned long *)(fp - 8));
+               }
+
        }
 }
 
@@ -100,10 +106,16 @@ static bool print_trace_address(void *arg, unsigned long pc)
        return true;
 }
 
+void dump_backtrace(struct pt_regs *regs, struct task_struct *task,
+                   const char *loglvl)
+{
+       pr_cont("%sCall Trace:\n", loglvl);
+       walk_stackframe(task, regs, print_trace_address, (void *)loglvl);
+}
+
 void show_stack(struct task_struct *task, unsigned long *sp, const char *loglvl)
 {
-       pr_cont("Call Trace:\n");
-       walk_stackframe(task, NULL, print_trace_address, (void *)loglvl);
+       dump_backtrace(NULL, task, loglvl);
 }
 
 static bool save_wchan(void *arg, unsigned long pc)
index 8a5cf99..1b43226 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <asm/sbi.h>
 #include <asm/processor.h>
+#include <asm/timex.h>
 
 unsigned long riscv_timebase;
 EXPORT_SYMBOL_GPL(riscv_timebase);
index ad14f44..0879b5d 100644 (file)
 #include <linux/signal.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
+#include <linux/kprobes.h>
 #include <linux/mm.h>
 #include <linux/module.h>
 #include <linux/irq.h>
 
+#include <asm/asm-prototypes.h>
+#include <asm/bug.h>
 #include <asm/processor.h>
 #include <asm/ptrace.h>
 #include <asm/csr.h>
@@ -66,7 +69,7 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
                        tsk->comm, task_pid_nr(tsk), signo, code, addr);
                print_vma_addr(KERN_CONT " in ", instruction_pointer(regs));
                pr_cont("\n");
-               show_regs(regs);
+               __show_regs(regs);
        }
 
        force_sig_fault(signo, code, (void __user *)addr);
@@ -75,6 +78,8 @@ void do_trap(struct pt_regs *regs, int signo, int code, unsigned long addr)
 static void do_trap_error(struct pt_regs *regs, int signo, int code,
        unsigned long addr, const char *str)
 {
+       current->thread.bad_cause = regs->cause;
+
        if (user_mode(regs)) {
                do_trap(regs, signo, code, addr);
        } else {
@@ -145,6 +150,22 @@ static inline unsigned long get_break_insn_length(unsigned long pc)
 
 asmlinkage __visible void do_trap_break(struct pt_regs *regs)
 {
+#ifdef CONFIG_KPROBES
+       if (kprobe_single_step_handler(regs))
+               return;
+
+       if (kprobe_breakpoint_handler(regs))
+               return;
+#endif
+#ifdef CONFIG_UPROBES
+       if (uprobe_single_step_handler(regs))
+               return;
+
+       if (uprobe_breakpoint_handler(regs))
+               return;
+#endif
+       current->thread.bad_cause = regs->cause;
+
        if (user_mode(regs))
                force_sig_fault(SIGTRAP, TRAP_BRKPT, (void __user *)regs->epc);
 #ifdef CONFIG_KGDB
index 0cfd6da..71a315e 100644 (file)
@@ -32,9 +32,10 @@ CPPFLAGS_vdso.lds += -P -C -U$(ARCH)
 # Disable -pg to prevent insert call site
 CFLAGS_REMOVE_vgettimeofday.o = $(CC_FLAGS_FTRACE) -Os
 
-# Disable gcov profiling for VDSO code
+# Disable profiling and instrumentation for VDSO code
 GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
+KASAN_SANITIZE := n
 
 # Force dependency
 $(obj)/vdso.o: $(obj)/vdso.so
index ac6171e..25d5c96 100644 (file)
@@ -5,3 +5,5 @@ lib-y                   += memset.o
 lib-y                  += memmove.o
 lib-$(CONFIG_MMU)      += uaccess.o
 lib-$(CONFIG_64BIT)    += tishift.o
+
+obj-$(CONFIG_FUNCTION_ERROR_INJECTION) += error-inject.o
diff --git a/arch/riscv/lib/error-inject.c b/arch/riscv/lib/error-inject.c
new file mode 100644 (file)
index 0000000..d667ade
--- /dev/null
@@ -0,0 +1,10 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/error-injection.h>
+#include <linux/kprobes.h>
+
+void override_function_with_return(struct pt_regs *regs)
+{
+       instruction_pointer_set(regs, regs->ra);
+}
+NOKPROBE_SYMBOL(override_function_with_return);
index c0185e5..7ebaef1 100644 (file)
@@ -2,7 +2,8 @@
 
 CFLAGS_init.o := -mcmodel=medany
 ifdef CONFIG_FTRACE
-CFLAGS_REMOVE_init.o = -pg
+CFLAGS_REMOVE_init.o = $(CC_FLAGS_FTRACE)
+CFLAGS_REMOVE_cacheflush.o = $(CC_FLAGS_FTRACE)
 endif
 
 KCOV_INSTRUMENT_init.o := n
index 613ec81..68aa312 100644 (file)
 /*
  * Copyright (C) 2012 Regents of the University of California
  * Copyright (C) 2017 SiFive
+ * Copyright (C) 2021 Western Digital Corporation or its affiliates.
  */
 
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
 #include <linux/mm.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/static_key.h>
 #include <asm/tlbflush.h>
 #include <asm/cacheflush.h>
 #include <asm/mmu_context.h>
 
+#ifdef CONFIG_MMU
+
+static DEFINE_STATIC_KEY_FALSE(use_asid_allocator);
+
+static unsigned long asid_bits;
+static unsigned long num_asids;
+static unsigned long asid_mask;
+
+static atomic_long_t current_version;
+
+static DEFINE_RAW_SPINLOCK(context_lock);
+static cpumask_t context_tlb_flush_pending;
+static unsigned long *context_asid_map;
+
+static DEFINE_PER_CPU(atomic_long_t, active_context);
+static DEFINE_PER_CPU(unsigned long, reserved_context);
+
+static bool check_update_reserved_context(unsigned long cntx,
+                                         unsigned long newcntx)
+{
+       int cpu;
+       bool hit = false;
+
+       /*
+        * Iterate over the set of reserved CONTEXT looking for a match.
+        * If we find one, then we can update our mm to use new CONTEXT
+        * (i.e. the same CONTEXT in the current_version) but we can't
+        * exit the loop early, since we need to ensure that all copies
+        * of the old CONTEXT are updated to reflect the mm. Failure to do
+        * so could result in us missing the reserved CONTEXT in a future
+        * version.
+        */
+       for_each_possible_cpu(cpu) {
+               if (per_cpu(reserved_context, cpu) == cntx) {
+                       hit = true;
+                       per_cpu(reserved_context, cpu) = newcntx;
+               }
+       }
+
+       return hit;
+}
+
+static void __flush_context(void)
+{
+       int i;
+       unsigned long cntx;
+
+       /* Must be called with context_lock held */
+       lockdep_assert_held(&context_lock);
+
+       /* Update the list of reserved ASIDs and the ASID bitmap. */
+       bitmap_clear(context_asid_map, 0, num_asids);
+
+       /* Mark already active ASIDs as used */
+       for_each_possible_cpu(i) {
+               cntx = atomic_long_xchg_relaxed(&per_cpu(active_context, i), 0);
+               /*
+                * If this CPU has already been through a rollover, but
+                * hasn't run another task in the meantime, we must preserve
+                * its reserved CONTEXT, as this is the only trace we have of
+                * the process it is still running.
+                */
+               if (cntx == 0)
+                       cntx = per_cpu(reserved_context, i);
+
+               __set_bit(cntx & asid_mask, context_asid_map);
+               per_cpu(reserved_context, i) = cntx;
+       }
+
+       /* Mark ASID #0 as used because it is used at boot-time */
+       __set_bit(0, context_asid_map);
+
+       /* Queue a TLB invalidation for each CPU on next context-switch */
+       cpumask_setall(&context_tlb_flush_pending);
+}
+
+static unsigned long __new_context(struct mm_struct *mm)
+{
+       static u32 cur_idx = 1;
+       unsigned long cntx = atomic_long_read(&mm->context.id);
+       unsigned long asid, ver = atomic_long_read(&current_version);
+
+       /* Must be called with context_lock held */
+       lockdep_assert_held(&context_lock);
+
+       if (cntx != 0) {
+               unsigned long newcntx = ver | (cntx & asid_mask);
+
+               /*
+                * If our current CONTEXT was active during a rollover, we
+                * can continue to use it and this was just a false alarm.
+                */
+               if (check_update_reserved_context(cntx, newcntx))
+                       return newcntx;
+
+               /*
+                * We had a valid CONTEXT in a previous life, so try to
+                * re-use it if possible.
+                */
+               if (!__test_and_set_bit(cntx & asid_mask, context_asid_map))
+                       return newcntx;
+       }
+
+       /*
+        * Allocate a free ASID. If we can't find one then increment
+        * current_version and flush all ASIDs.
+        */
+       asid = find_next_zero_bit(context_asid_map, num_asids, cur_idx);
+       if (asid != num_asids)
+               goto set_asid;
+
+       /* We're out of ASIDs, so increment current_version */
+       ver = atomic_long_add_return_relaxed(num_asids, &current_version);
+
+       /* Flush everything  */
+       __flush_context();
+
+       /* We have more ASIDs than CPUs, so this will always succeed */
+       asid = find_next_zero_bit(context_asid_map, num_asids, 1);
+
+set_asid:
+       __set_bit(asid, context_asid_map);
+       cur_idx = asid;
+       return asid | ver;
+}
+
+static void set_mm_asid(struct mm_struct *mm, unsigned int cpu)
+{
+       unsigned long flags;
+       bool need_flush_tlb = false;
+       unsigned long cntx, old_active_cntx;
+
+       cntx = atomic_long_read(&mm->context.id);
+
+       /*
+        * If our active_context is non-zero and the context matches the
+        * current_version, then we update the active_context entry with a
+        * relaxed cmpxchg.
+        *
+        * Following is how we handle racing with a concurrent rollover:
+        *
+        * - We get a zero back from the cmpxchg and end up waiting on the
+        *   lock. Taking the lock synchronises with the rollover and so
+        *   we are forced to see the updated verion.
+        *
+        * - We get a valid context back from the cmpxchg then we continue
+        *   using old ASID because __flush_context() would have marked ASID
+        *   of active_context as used and next context switch we will
+        *   allocate new context.
+        */
+       old_active_cntx = atomic_long_read(&per_cpu(active_context, cpu));
+       if (old_active_cntx &&
+           ((cntx & ~asid_mask) == atomic_long_read(&current_version)) &&
+           atomic_long_cmpxchg_relaxed(&per_cpu(active_context, cpu),
+                                       old_active_cntx, cntx))
+               goto switch_mm_fast;
+
+       raw_spin_lock_irqsave(&context_lock, flags);
+
+       /* Check that our ASID belongs to the current_version. */
+       cntx = atomic_long_read(&mm->context.id);
+       if ((cntx & ~asid_mask) != atomic_long_read(&current_version)) {
+               cntx = __new_context(mm);
+               atomic_long_set(&mm->context.id, cntx);
+       }
+
+       if (cpumask_test_and_clear_cpu(cpu, &context_tlb_flush_pending))
+               need_flush_tlb = true;
+
+       atomic_long_set(&per_cpu(active_context, cpu), cntx);
+
+       raw_spin_unlock_irqrestore(&context_lock, flags);
+
+switch_mm_fast:
+       csr_write(CSR_SATP, virt_to_pfn(mm->pgd) |
+                 ((cntx & asid_mask) << SATP_ASID_SHIFT) |
+                 SATP_MODE);
+
+       if (need_flush_tlb)
+               local_flush_tlb_all();
+}
+
+static void set_mm_noasid(struct mm_struct *mm)
+{
+       /* Switch the page table and blindly nuke entire local TLB */
+       csr_write(CSR_SATP, virt_to_pfn(mm->pgd) | SATP_MODE);
+       local_flush_tlb_all();
+}
+
+static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+{
+       if (static_branch_unlikely(&use_asid_allocator))
+               set_mm_asid(mm, cpu);
+       else
+               set_mm_noasid(mm);
+}
+
+static int asids_init(void)
+{
+       unsigned long old;
+
+       /* Figure-out number of ASID bits in HW */
+       old = csr_read(CSR_SATP);
+       asid_bits = old | (SATP_ASID_MASK << SATP_ASID_SHIFT);
+       csr_write(CSR_SATP, asid_bits);
+       asid_bits = (csr_read(CSR_SATP) >> SATP_ASID_SHIFT)  & SATP_ASID_MASK;
+       asid_bits = fls_long(asid_bits);
+       csr_write(CSR_SATP, old);
+
+       /*
+        * In the process of determining number of ASID bits (above)
+        * we polluted the TLB of current HART so let's do TLB flushed
+        * to remove unwanted TLB enteries.
+        */
+       local_flush_tlb_all();
+
+       /* Pre-compute ASID details */
+       num_asids = 1 << asid_bits;
+       asid_mask = num_asids - 1;
+
+       /*
+        * Use ASID allocator only if number of HW ASIDs are
+        * at-least twice more than CPUs
+        */
+       if (num_asids > (2 * num_possible_cpus())) {
+               atomic_long_set(&current_version, num_asids);
+
+               context_asid_map = kcalloc(BITS_TO_LONGS(num_asids),
+                                  sizeof(*context_asid_map), GFP_KERNEL);
+               if (!context_asid_map)
+                       panic("Failed to allocate bitmap for %lu ASIDs\n",
+                             num_asids);
+
+               __set_bit(0, context_asid_map);
+
+               static_branch_enable(&use_asid_allocator);
+
+               pr_info("ASID allocator using %lu bits (%lu entries)\n",
+                       asid_bits, num_asids);
+       } else {
+               pr_info("ASID allocator disabled\n");
+       }
+
+       return 0;
+}
+early_initcall(asids_init);
+#else
+static inline void set_mm(struct mm_struct *mm, unsigned int cpu)
+{
+       /* Nothing to do here when there is no MMU */
+}
+#endif
+
 /*
  * When necessary, performs a deferred icache flush for the given MM context,
  * on the local CPU.  RISC-V has no direct mechanism for instruction cache
@@ -58,10 +318,7 @@ void switch_mm(struct mm_struct *prev, struct mm_struct *next,
        cpumask_clear_cpu(cpu, mm_cpumask(prev));
        cpumask_set_cpu(cpu, mm_cpumask(next));
 
-#ifdef CONFIG_MMU
-       csr_write(CSR_SATP, virt_to_pfn(next->pgd) | SATP_MODE);
-       local_flush_tlb_all();
-#endif
+       set_mm(next, cpu);
 
        flush_icache_deferred(next);
 }
index 3c8b9e4..8f17519 100644 (file)
 #include <linux/perf_event.h>
 #include <linux/signal.h>
 #include <linux/uaccess.h>
+#include <linux/kprobes.h>
 
 #include <asm/ptrace.h>
 #include <asm/tlbflush.h>
 
 #include "../kernel/head.h"
 
+static void die_kernel_fault(const char *msg, unsigned long addr,
+               struct pt_regs *regs)
+{
+       bust_spinlocks(1);
+
+       pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n", msg,
+               addr);
+
+       bust_spinlocks(0);
+       die(regs, "Oops");
+       do_exit(SIGKILL);
+}
+
 static inline void no_context(struct pt_regs *regs, unsigned long addr)
 {
+       const char *msg;
+
        /* Are we prepared to handle this kernel fault? */
        if (fixup_exception(regs))
                return;
@@ -29,12 +45,8 @@ static inline void no_context(struct pt_regs *regs, unsigned long addr)
         * Oops. The kernel tried to access some bad page. We'll have to
         * terminate things with extreme prejudice.
         */
-       bust_spinlocks(1);
-       pr_alert("Unable to handle kernel %s at virtual address " REG_FMT "\n",
-               (addr < PAGE_SIZE) ? "NULL pointer dereference" :
-               "paging request", addr);
-       die(regs, "Oops");
-       do_exit(SIGKILL);
+       msg = (addr < PAGE_SIZE) ? "NULL pointer dereference" : "paging request";
+       die_kernel_fault(msg, addr, regs);
 }
 
 static inline void mm_fault_error(struct pt_regs *regs, unsigned long addr, vm_fault_t fault)
@@ -202,6 +214,9 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
        tsk = current;
        mm = tsk->mm;
 
+       if (kprobe_page_fault(regs, cause))
+               return;
+
        /*
         * Fault-in kernel-space virtual memory on-demand.
         * The 'reference' page table is init_mm.pgd.
@@ -225,6 +240,7 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
         * in an atomic region, then we must not take the fault.
         */
        if (unlikely(faulthandler_disabled() || !mm)) {
+               tsk->thread.bad_cause = cause;
                no_context(regs, addr);
                return;
        }
@@ -232,6 +248,11 @@ asmlinkage void do_page_fault(struct pt_regs *regs)
        if (user_mode(regs))
                flags |= FAULT_FLAG_USER;
 
+       if (!user_mode(regs) && addr < TASK_SIZE &&
+                       unlikely(!(regs->status & SR_SUM)))
+               die_kernel_fault("access to user memory without uaccess routines",
+                               addr, regs);
+
        perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS, 1, regs, addr);
 
        if (cause == EXC_STORE_PAGE_FAULT)
@@ -242,16 +263,19 @@ retry:
        mmap_read_lock(mm);
        vma = find_vma(mm, addr);
        if (unlikely(!vma)) {
+               tsk->thread.bad_cause = cause;
                bad_area(regs, mm, code, addr);
                return;
        }
        if (likely(vma->vm_start <= addr))
                goto good_area;
        if (unlikely(!(vma->vm_flags & VM_GROWSDOWN))) {
+               tsk->thread.bad_cause = cause;
                bad_area(regs, mm, code, addr);
                return;
        }
        if (unlikely(expand_stack(vma, addr))) {
+               tsk->thread.bad_cause = cause;
                bad_area(regs, mm, code, addr);
                return;
        }
@@ -264,6 +288,7 @@ good_area:
        code = SEGV_ACCERR;
 
        if (unlikely(access_error(cause, vma))) {
+               tsk->thread.bad_cause = cause;
                bad_area(regs, mm, code, addr);
                return;
        }
@@ -297,6 +322,7 @@ good_area:
        mmap_read_unlock(mm);
 
        if (unlikely(fault & VM_FAULT_ERROR)) {
+               tsk->thread.bad_cause = cause;
                mm_fault_error(regs, addr, fault);
                return;
        }
index f9f9568..067583a 100644 (file)
@@ -21,6 +21,7 @@
 #include <asm/soc.h>
 #include <asm/io.h>
 #include <asm/ptdump.h>
+#include <asm/numa.h>
 
 #include "../kernel/head.h"
 
@@ -105,85 +106,19 @@ void __init mem_init(void)
        print_vm_layout();
 }
 
-#ifdef CONFIG_BLK_DEV_INITRD
-static void __init setup_initrd(void)
-{
-       phys_addr_t start;
-       unsigned long size;
-
-       /* Ignore the virtul address computed during device tree parsing */
-       initrd_start = initrd_end = 0;
-
-       if (!phys_initrd_size)
-               return;
-       /*
-        * Round the memory region to page boundaries as per free_initrd_mem()
-        * This allows us to detect whether the pages overlapping the initrd
-        * are in use, but more importantly, reserves the entire set of pages
-        * as we don't want these pages allocated for other purposes.
-        */
-       start = round_down(phys_initrd_start, PAGE_SIZE);
-       size = phys_initrd_size + (phys_initrd_start - start);
-       size = round_up(size, PAGE_SIZE);
-
-       if (!memblock_is_region_memory(start, size)) {
-               pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
-                      (u64)start, size);
-               goto disable;
-       }
-
-       if (memblock_is_region_reserved(start, size)) {
-               pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
-                      (u64)start, size);
-               goto disable;
-       }
-
-       memblock_reserve(start, size);
-       /* Now convert initrd to virtual addresses */
-       initrd_start = (unsigned long)__va(phys_initrd_start);
-       initrd_end = initrd_start + phys_initrd_size;
-       initrd_below_start_ok = 1;
-
-       pr_info("Initial ramdisk at: 0x%p (%lu bytes)\n",
-               (void *)(initrd_start), size);
-       return;
-disable:
-       pr_cont(" - disabling initrd\n");
-       initrd_start = 0;
-       initrd_end = 0;
-}
-#endif /* CONFIG_BLK_DEV_INITRD */
-
 void __init setup_bootmem(void)
 {
-       phys_addr_t mem_start = 0;
-       phys_addr_t start, dram_end, end = 0;
        phys_addr_t vmlinux_end = __pa_symbol(&_end);
        phys_addr_t vmlinux_start = __pa_symbol(&_start);
+       phys_addr_t dram_end = memblock_end_of_DRAM();
        phys_addr_t max_mapped_addr = __pa(~(ulong)0);
-       u64 i;
 
-       /* Find the memory region containing the kernel */
-       for_each_mem_range(i, &start, &end) {
-               phys_addr_t size = end - start;
-               if (!mem_start)
-                       mem_start = start;
-               if (start <= vmlinux_start && vmlinux_end <= end)
-                       BUG_ON(size == 0);
-       }
-
-       /*
-        * The maximal physical memory size is -PAGE_OFFSET.
-        * Make sure that any memory beyond mem_start + (-PAGE_OFFSET) is removed
-        * as it is unusable by kernel.
-        */
+       /* The maximal physical memory size is -PAGE_OFFSET. */
        memblock_enforce_memory_limit(-PAGE_OFFSET);
 
        /* Reserve from the start of the kernel to the end of the kernel */
        memblock_reserve(vmlinux_start, vmlinux_end - vmlinux_start);
 
-       dram_end = memblock_end_of_DRAM();
-
        /*
         * memblock allocator is not aware of the fact that last 4K bytes of
         * the addressable memory can not be mapped because of IS_ERR_VALUE
@@ -198,20 +133,19 @@ void __init setup_bootmem(void)
        dma32_phys_limit = min(4UL * SZ_1G, (unsigned long)PFN_PHYS(max_low_pfn));
        set_max_mapnr(max_low_pfn - ARCH_PFN_OFFSET);
 
-#ifdef CONFIG_BLK_DEV_INITRD
-       setup_initrd();
-#endif /* CONFIG_BLK_DEV_INITRD */
-
+       reserve_initrd_mem();
        /*
-        * Avoid using early_init_fdt_reserve_self() since __pa() does
+        * If DTB is built in, no need to reserve its memblock.
+        * Otherwise, do reserve it but avoid using
+        * early_init_fdt_reserve_self() since __pa() does
         * not work for DTB pointers that are fixmap addresses
         */
-       memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
+       if (!IS_ENABLED(CONFIG_BUILTIN_DTB))
+               memblock_reserve(dtb_early_pa, fdt_totalsize(dtb_early_va));
 
        early_init_fdt_scan_reserved_mem();
        dma_contiguous_reserve(dma32_phys_limit);
        memblock_allow_resize();
-       memblock_dump_all();
 }
 
 #ifdef CONFIG_MMU
@@ -226,8 +160,6 @@ pgd_t swapper_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pgd_t trampoline_pg_dir[PTRS_PER_PGD] __page_aligned_bss;
 pte_t fixmap_pte[PTRS_PER_PTE] __page_aligned_bss;
 
-#define MAX_EARLY_MAPPING_SIZE SZ_128M
-
 pgd_t early_pg_dir[PTRS_PER_PGD] __initdata __aligned(PAGE_SIZE);
 
 void __set_fixmap(enum fixed_addresses idx, phys_addr_t phys, pgprot_t prot)
@@ -302,13 +234,7 @@ static void __init create_pte_mapping(pte_t *ptep,
 
 pmd_t trampoline_pmd[PTRS_PER_PMD] __page_aligned_bss;
 pmd_t fixmap_pmd[PTRS_PER_PMD] __page_aligned_bss;
-
-#if MAX_EARLY_MAPPING_SIZE < PGDIR_SIZE
-#define NUM_EARLY_PMDS         1UL
-#else
-#define NUM_EARLY_PMDS         (1UL + MAX_EARLY_MAPPING_SIZE / PGDIR_SIZE)
-#endif
-pmd_t early_pmd[PTRS_PER_PMD * NUM_EARLY_PMDS] __initdata __aligned(PAGE_SIZE);
+pmd_t early_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 pmd_t early_dtb_pmd[PTRS_PER_PMD] __initdata __aligned(PAGE_SIZE);
 
 static pmd_t *__init get_pmd_virt_early(phys_addr_t pa)
@@ -330,11 +256,9 @@ static pmd_t *get_pmd_virt_late(phys_addr_t pa)
 
 static phys_addr_t __init alloc_pmd_early(uintptr_t va)
 {
-       uintptr_t pmd_num;
+       BUG_ON((va - PAGE_OFFSET) >> PGDIR_SHIFT);
 
-       pmd_num = (va - PAGE_OFFSET) >> PGDIR_SHIFT;
-       BUG_ON(pmd_num >= NUM_EARLY_PMDS);
-       return (uintptr_t)&early_pmd[pmd_num * PTRS_PER_PMD];
+       return (uintptr_t)early_pmd;
 }
 
 static phys_addr_t __init alloc_pmd_fixmap(uintptr_t va)
@@ -452,7 +376,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        uintptr_t va, pa, end_va;
        uintptr_t load_pa = (uintptr_t)(&_start);
        uintptr_t load_sz = (uintptr_t)(&_end) - load_pa;
-       uintptr_t map_size = best_map_size(load_pa, MAX_EARLY_MAPPING_SIZE);
+       uintptr_t map_size;
 #ifndef __PAGETABLE_PMD_FOLDED
        pmd_t fix_bmap_spmd, fix_bmap_epmd;
 #endif
@@ -464,12 +388,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
         * Enforce boot alignment requirements of RV32 and
         * RV64 by only allowing PMD or PGD mappings.
         */
-       BUG_ON(map_size == PAGE_SIZE);
+       map_size = PMD_SIZE;
 
        /* Sanity check alignment and size */
        BUG_ON((PAGE_OFFSET % PGDIR_SIZE) != 0);
        BUG_ON((load_pa % map_size) != 0);
-       BUG_ON(load_sz > MAX_EARLY_MAPPING_SIZE);
 
        pt_ops.alloc_pte = alloc_pte_early;
        pt_ops.get_pte_virt = get_pte_virt_early;
@@ -511,6 +434,7 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        /* Setup early PMD for DTB */
        create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
                           (uintptr_t)early_dtb_pmd, PGDIR_SIZE, PAGE_TABLE);
+#ifndef CONFIG_BUILTIN_DTB
        /* Create two consecutive PMD mappings for FDT early scan */
        pa = dtb_pa & ~(PMD_SIZE - 1);
        create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA,
@@ -518,7 +442,11 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        create_pmd_mapping(early_dtb_pmd, DTB_EARLY_BASE_VA + PMD_SIZE,
                           pa + PMD_SIZE, PMD_SIZE, PAGE_KERNEL);
        dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PMD_SIZE - 1));
+#else /* CONFIG_BUILTIN_DTB */
+       dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_BUILTIN_DTB */
 #else
+#ifndef CONFIG_BUILTIN_DTB
        /* Create two consecutive PGD mappings for FDT early scan */
        pa = dtb_pa & ~(PGDIR_SIZE - 1);
        create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA,
@@ -526,6 +454,9 @@ asmlinkage void __init setup_vm(uintptr_t dtb_pa)
        create_pgd_mapping(early_pg_dir, DTB_EARLY_BASE_VA + PGDIR_SIZE,
                           pa + PGDIR_SIZE, PGDIR_SIZE, PAGE_KERNEL);
        dtb_early_va = (void *)DTB_EARLY_BASE_VA + (dtb_pa & (PGDIR_SIZE - 1));
+#else /* CONFIG_BUILTIN_DTB */
+       dtb_early_va = __va(dtb_pa);
+#endif /* CONFIG_BUILTIN_DTB */
 #endif
        dtb_early_pa = dtb_pa;
 
@@ -616,15 +547,7 @@ static void __init setup_vm_final(void)
 #else
 asmlinkage void __init setup_vm(uintptr_t dtb_pa)
 {
-#ifdef CONFIG_BUILTIN_DTB
-       dtb_early_va = soc_lookup_builtin_dtb();
-       if (!dtb_early_va) {
-               /* Fallback to first available DTS */
-               dtb_early_va = (void *) __dtb_start;
-       }
-#else
        dtb_early_va = (void *)dtb_pa;
-#endif
        dtb_early_pa = dtb_pa;
 }
 
@@ -665,9 +588,15 @@ void mark_rodata_ro(void)
 void __init paging_init(void)
 {
        setup_vm_final();
-       sparse_init();
        setup_zero_page();
+}
+
+void __init misc_mem_init(void)
+{
+       arch_numa_init();
+       sparse_init();
        zone_sizes_init();
+       memblock_dump_all();
 }
 
 #ifdef CONFIG_SPARSEMEM_VMEMMAP
index a8a2ffd..937d13c 100644 (file)
@@ -9,6 +9,19 @@
 #include <linux/pgtable.h>
 #include <asm/tlbflush.h>
 #include <asm/fixmap.h>
+#include <asm/pgalloc.h>
+
+static __init void *early_alloc(size_t size, int node)
+{
+       void *ptr = memblock_alloc_try_nid(size, size,
+               __pa(MAX_DMA_ADDRESS), MEMBLOCK_ALLOC_ACCESSIBLE, node);
+
+       if (!ptr)
+               panic("%pS: Failed to allocate %zu bytes align=%zx nid=%d from=%llx\n",
+                       __func__, size, size, node, (u64)__pa(MAX_DMA_ADDRESS));
+
+       return ptr;
+}
 
 extern pgd_t early_pg_dir[PTRS_PER_PGD];
 asmlinkage void __init kasan_early_init(void)
@@ -47,40 +60,135 @@ asmlinkage void __init kasan_early_init(void)
        local_flush_tlb_all();
 }
 
-static void __init populate(void *start, void *end)
+static void kasan_populate_pte(pmd_t *pmd, unsigned long vaddr, unsigned long end)
+{
+       phys_addr_t phys_addr;
+       pte_t *ptep, *base_pte;
+
+       if (pmd_none(*pmd))
+               base_pte = memblock_alloc(PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
+       else
+               base_pte = (pte_t *)pmd_page_vaddr(*pmd);
+
+       ptep = base_pte + pte_index(vaddr);
+
+       do {
+               if (pte_none(*ptep)) {
+                       phys_addr = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
+                       set_pte(ptep, pfn_pte(PFN_DOWN(phys_addr), PAGE_KERNEL));
+               }
+       } while (ptep++, vaddr += PAGE_SIZE, vaddr != end);
+
+       set_pmd(pmd, pfn_pmd(PFN_DOWN(__pa(base_pte)), PAGE_TABLE));
+}
+
+static void kasan_populate_pmd(pgd_t *pgd, unsigned long vaddr, unsigned long end)
+{
+       phys_addr_t phys_addr;
+       pmd_t *pmdp, *base_pmd;
+       unsigned long next;
+
+       base_pmd = (pmd_t *)pgd_page_vaddr(*pgd);
+       if (base_pmd == lm_alias(kasan_early_shadow_pmd))
+               base_pmd = memblock_alloc(PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
+
+       pmdp = base_pmd + pmd_index(vaddr);
+
+       do {
+               next = pmd_addr_end(vaddr, end);
+
+               if (pmd_none(*pmdp) && IS_ALIGNED(vaddr, PMD_SIZE) && (next - vaddr) >= PMD_SIZE) {
+                       phys_addr = memblock_phys_alloc(PMD_SIZE, PMD_SIZE);
+                       if (phys_addr) {
+                               set_pmd(pmdp, pfn_pmd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+                               continue;
+                       }
+               }
+
+               kasan_populate_pte(pmdp, vaddr, next);
+       } while (pmdp++, vaddr = next, vaddr != end);
+
+       /*
+        * Wait for the whole PGD to be populated before setting the PGD in
+        * the page table, otherwise, if we did set the PGD before populating
+        * it entirely, memblock could allocate a page at a physical address
+        * where KASAN is not populated yet and then we'd get a page fault.
+        */
+       set_pgd(pgd, pfn_pgd(PFN_DOWN(__pa(base_pmd)), PAGE_TABLE));
+}
+
+static void kasan_populate_pgd(unsigned long vaddr, unsigned long end)
+{
+       phys_addr_t phys_addr;
+       pgd_t *pgdp = pgd_offset_k(vaddr);
+       unsigned long next;
+
+       do {
+               next = pgd_addr_end(vaddr, end);
+
+               /*
+                * pgdp can't be none since kasan_early_init initialized all KASAN
+                * shadow region with kasan_early_shadow_pmd: if this is stillthe case,
+                * that means we can try to allocate a hugepage as a replacement.
+                */
+               if (pgd_page_vaddr(*pgdp) == (unsigned long)lm_alias(kasan_early_shadow_pmd) &&
+                   IS_ALIGNED(vaddr, PGDIR_SIZE) && (next - vaddr) >= PGDIR_SIZE) {
+                       phys_addr = memblock_phys_alloc(PGDIR_SIZE, PGDIR_SIZE);
+                       if (phys_addr) {
+                               set_pgd(pgdp, pfn_pgd(PFN_DOWN(phys_addr), PAGE_KERNEL));
+                               continue;
+                       }
+               }
+
+               kasan_populate_pmd(pgdp, vaddr, next);
+       } while (pgdp++, vaddr = next, vaddr != end);
+}
+
+static void __init kasan_populate(void *start, void *end)
 {
-       unsigned long i, offset;
        unsigned long vaddr = (unsigned long)start & PAGE_MASK;
        unsigned long vend = PAGE_ALIGN((unsigned long)end);
-       unsigned long n_pages = (vend - vaddr) / PAGE_SIZE;
-       unsigned long n_ptes =
-           ((n_pages + PTRS_PER_PTE) & -PTRS_PER_PTE) / PTRS_PER_PTE;
-       unsigned long n_pmds =
-           ((n_ptes + PTRS_PER_PMD) & -PTRS_PER_PMD) / PTRS_PER_PMD;
-
-       pte_t *pte =
-           memblock_alloc(n_ptes * PTRS_PER_PTE * sizeof(pte_t), PAGE_SIZE);
-       pmd_t *pmd =
-           memblock_alloc(n_pmds * PTRS_PER_PMD * sizeof(pmd_t), PAGE_SIZE);
-       pgd_t *pgd = pgd_offset_k(vaddr);
-
-       for (i = 0; i < n_pages; i++) {
-               phys_addr_t phys = memblock_phys_alloc(PAGE_SIZE, PAGE_SIZE);
-               set_pte(&pte[i], pfn_pte(PHYS_PFN(phys), PAGE_KERNEL));
-       }
 
-       for (i = 0, offset = 0; i < n_ptes; i++, offset += PTRS_PER_PTE)
-               set_pmd(&pmd[i],
-                       pfn_pmd(PFN_DOWN(__pa(&pte[offset])),
-                               __pgprot(_PAGE_TABLE)));
+       kasan_populate_pgd(vaddr, vend);
 
-       for (i = 0, offset = 0; i < n_pmds; i++, offset += PTRS_PER_PMD)
-               set_pgd(&pgd[i],
-                       pfn_pgd(PFN_DOWN(__pa(&pmd[offset])),
-                               __pgprot(_PAGE_TABLE)));
+       local_flush_tlb_all();
+       memset(start, KASAN_SHADOW_INIT, end - start);
+}
+
+static void __init kasan_shallow_populate(void *start, void *end)
+{
+       unsigned long vaddr = (unsigned long)start & PAGE_MASK;
+       unsigned long vend = PAGE_ALIGN((unsigned long)end);
+       unsigned long pfn;
+       int index;
+       void *p;
+       pud_t *pud_dir, *pud_k;
+       pgd_t *pgd_dir, *pgd_k;
+       p4d_t *p4d_dir, *p4d_k;
+
+       while (vaddr < vend) {
+               index = pgd_index(vaddr);
+               pfn = csr_read(CSR_SATP) & SATP_PPN;
+               pgd_dir = (pgd_t *)pfn_to_virt(pfn) + index;
+               pgd_k = init_mm.pgd + index;
+               pgd_dir = pgd_offset_k(vaddr);
+               set_pgd(pgd_dir, *pgd_k);
+
+               p4d_dir = p4d_offset(pgd_dir, vaddr);
+               p4d_k  = p4d_offset(pgd_k, vaddr);
+
+               vaddr = (vaddr + PUD_SIZE) & PUD_MASK;
+               pud_dir = pud_offset(p4d_dir, vaddr);
+               pud_k = pud_offset(p4d_k, vaddr);
+
+               if (pud_present(*pud_dir)) {
+                       p = early_alloc(PAGE_SIZE, NUMA_NO_NODE);
+                       pud_populate(&init_mm, pud_dir, p);
+               }
+               vaddr += PAGE_SIZE;
+       }
 
        local_flush_tlb_all();
-       memset(start, 0, end - start);
 }
 
 void __init kasan_init(void)
@@ -90,7 +198,15 @@ void __init kasan_init(void)
 
        kasan_populate_early_shadow((void *)KASAN_SHADOW_START,
                                    (void *)kasan_mem_to_shadow((void *)
-                                                               VMALLOC_END));
+                                                               VMEMMAP_END));
+       if (IS_ENABLED(CONFIG_KASAN_VMALLOC))
+               kasan_shallow_populate(
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
+       else
+               kasan_populate_early_shadow(
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_START),
+                       (void *)kasan_mem_to_shadow((void *)VMALLOC_END));
 
        for_each_mem_range(i, &_start, &_end) {
                void *start = (void *)__va(_start);
@@ -99,8 +215,8 @@ void __init kasan_init(void)
                if (start >= end)
                        break;
 
-               populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
-       };
+               kasan_populate(kasan_mem_to_shadow(start), kasan_mem_to_shadow(end));
+       }
 
        for (i = 0; i < PTRS_PER_PTE; i++)
                set_pte(&kasan_early_shadow_pte[i],
@@ -108,6 +224,6 @@ void __init kasan_init(void)
                               __pgprot(_PAGE_PRESENT | _PAGE_READ |
                                        _PAGE_ACCESSED)));
 
-       memset(kasan_early_shadow_page, 0, PAGE_SIZE);
+       memset(kasan_early_shadow_page, KASAN_SHADOW_INIT, PAGE_SIZE);
        init_task.kasan_depth = 0;
 }
index e8f7216..c1ff874 100644 (file)
@@ -184,6 +184,7 @@ config S390
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE
        select HAVE_RSEQ
+       select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_SYSCALL_TRACEPOINTS
        select HAVE_VIRT_CPU_ACCOUNTING
        select HAVE_VIRT_CPU_ACCOUNTING_IDLE
index 6896b42..dc0b690 100644 (file)
@@ -71,7 +71,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_MODULE_SIG_SHA256=y
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_BLK_DEV_INTEGRITY=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BLK_WBT=y
@@ -276,9 +275,9 @@ CONFIG_IP_VS_DH=m
 CONFIG_IP_VS_SH=m
 CONFIG_IP_VS_SED=m
 CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_TWOS=m
 CONFIG_IP_VS_FTP=m
 CONFIG_IP_VS_PE_SIP=m
-CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_IP_NF_IPTABLES=m
@@ -299,7 +298,6 @@ CONFIG_IP_NF_SECURITY=m
 CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_TABLES_IPV6=y
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -482,7 +480,6 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_AURORA is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_CADENCE is not set
@@ -582,7 +579,6 @@ CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
-# CONFIG_SURFACE_PLATFORMS is not set
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -636,6 +632,7 @@ CONFIG_NTFS_RW=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -715,12 +712,8 @@ CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
 CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
@@ -732,7 +725,6 @@ CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
@@ -797,12 +789,9 @@ CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
 CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER=y
 CONFIG_SLUB_DEBUG_ON=y
 CONFIG_SLUB_STATS=y
-CONFIG_DEBUG_KMEMLEAK=y
-CONFIG_DEBUG_KMEMLEAK_DEFAULT_OFF=y
 CONFIG_DEBUG_STACK_USAGE=y
 CONFIG_DEBUG_VM=y
 CONFIG_DEBUG_VM_VMACACHE=y
-CONFIG_DEBUG_VM_RB=y
 CONFIG_DEBUG_VM_PGFLAGS=y
 CONFIG_DEBUG_MEMORY_INIT=y
 CONFIG_MEMORY_NOTIFIER_ERROR_INJECT=m
@@ -839,6 +828,7 @@ CONFIG_BPF_KPROBE_OVERRIDE=y
 CONFIG_HIST_TRIGGERS=y
 CONFIG_FTRACE_STARTUP_TEST=y
 # CONFIG_EVENT_TRACE_STARTUP_TEST is not set
+CONFIG_DEBUG_ENTRY=y
 CONFIG_NOTIFIER_ERROR_INJECTION=m
 CONFIG_NETDEV_NOTIFIER_ERROR_INJECT=m
 CONFIG_FAULT_INJECTION=y
@@ -862,4 +852,3 @@ CONFIG_PERCPU_TEST=m
 CONFIG_ATOMIC64_SELFTEST=y
 CONFIG_TEST_BITOPS=m
 CONFIG_TEST_BPF=m
-CONFIG_DEBUG_ENTRY=y
index ef4df9d..320379d 100644 (file)
@@ -66,7 +66,6 @@ CONFIG_MODULE_FORCE_UNLOAD=y
 CONFIG_MODVERSIONS=y
 CONFIG_MODULE_SRCVERSION_ALL=y
 CONFIG_MODULE_SIG_SHA256=y
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_BLK_DEV_THROTTLING=y
 CONFIG_BLK_WBT=y
 CONFIG_BLK_CGROUP_IOLATENCY=y
@@ -267,9 +266,9 @@ CONFIG_IP_VS_DH=m
 CONFIG_IP_VS_SH=m
 CONFIG_IP_VS_SED=m
 CONFIG_IP_VS_NQ=m
+CONFIG_IP_VS_TWOS=m
 CONFIG_IP_VS_FTP=m
 CONFIG_IP_VS_PE_SIP=m
-CONFIG_NF_TABLES_IPV4=y
 CONFIG_NFT_FIB_IPV4=m
 CONFIG_NF_TABLES_ARP=y
 CONFIG_IP_NF_IPTABLES=m
@@ -290,7 +289,6 @@ CONFIG_IP_NF_SECURITY=m
 CONFIG_IP_NF_ARPTABLES=m
 CONFIG_IP_NF_ARPFILTER=m
 CONFIG_IP_NF_ARP_MANGLE=m
-CONFIG_NF_TABLES_IPV6=y
 CONFIG_NFT_FIB_IPV6=m
 CONFIG_IP6_NF_IPTABLES=m
 CONFIG_IP6_NF_MATCH_AH=m
@@ -474,7 +472,6 @@ CONFIG_NLMON=m
 # CONFIG_NET_VENDOR_AQUANTIA is not set
 # CONFIG_NET_VENDOR_ARC is not set
 # CONFIG_NET_VENDOR_ATHEROS is not set
-# CONFIG_NET_VENDOR_AURORA is not set
 # CONFIG_NET_VENDOR_BROADCOM is not set
 # CONFIG_NET_VENDOR_BROCADE is not set
 # CONFIG_NET_VENDOR_CADENCE is not set
@@ -574,7 +571,6 @@ CONFIG_VIRTIO_BALLOON=m
 CONFIG_VIRTIO_INPUT=y
 CONFIG_VHOST_NET=m
 CONFIG_VHOST_VSOCK=m
-# CONFIG_SURFACE_PLATFORMS is not set
 CONFIG_S390_CCW_IOMMU=y
 CONFIG_S390_AP_IOMMU=y
 CONFIG_EXT4_FS=y
@@ -624,6 +620,7 @@ CONFIG_NTFS_RW=y
 CONFIG_PROC_KCORE=y
 CONFIG_TMPFS=y
 CONFIG_TMPFS_POSIX_ACL=y
+CONFIG_TMPFS_INODE64=y
 CONFIG_HUGETLBFS=y
 CONFIG_CONFIGFS_FS=m
 CONFIG_ECRYPT_FS=m
@@ -704,12 +701,8 @@ CONFIG_CRYPTO_VMAC=m
 CONFIG_CRYPTO_CRC32=m
 CONFIG_CRYPTO_BLAKE2S=m
 CONFIG_CRYPTO_MICHAEL_MIC=m
-CONFIG_CRYPTO_RMD128=m
 CONFIG_CRYPTO_RMD160=m
-CONFIG_CRYPTO_RMD256=m
-CONFIG_CRYPTO_RMD320=m
 CONFIG_CRYPTO_SHA3=m
-CONFIG_CRYPTO_TGR192=m
 CONFIG_CRYPTO_WP512=m
 CONFIG_CRYPTO_AES_TI=m
 CONFIG_CRYPTO_ANUBIS=m
@@ -721,7 +714,6 @@ CONFIG_CRYPTO_CAST6=m
 CONFIG_CRYPTO_DES=m
 CONFIG_CRYPTO_FCRYPT=m
 CONFIG_CRYPTO_KHAZAD=m
-CONFIG_CRYPTO_SALSA20=m
 CONFIG_CRYPTO_SEED=m
 CONFIG_CRYPTO_SERPENT=m
 CONFIG_CRYPTO_SM4=m
index acf982a..76123a4 100644 (file)
@@ -26,7 +26,6 @@ CONFIG_CRASH_DUMP=y
 # CONFIG_SECCOMP is not set
 # CONFIG_GCC_PLUGINS is not set
 CONFIG_PARTITION_ADVANCED=y
-CONFIG_IBM_PARTITION=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 # CONFIG_COMPACTION is not set
 # CONFIG_MIGRATION is not set
@@ -61,11 +60,9 @@ CONFIG_RAW_DRIVER=y
 # CONFIG_HID is not set
 # CONFIG_VIRTIO_MENU is not set
 # CONFIG_VHOST_MENU is not set
-# CONFIG_SURFACE_PLATFORMS is not set
 # CONFIG_IOMMU_SUPPORT is not set
 # CONFIG_DNOTIFY is not set
 # CONFIG_INOTIFY_USER is not set
-CONFIG_CONFIGFS_FS=y
 # CONFIG_MISC_FILESYSTEMS is not set
 # CONFIG_NETWORK_FILESYSTEMS is not set
 CONFIG_LSM="yama,loadpin,safesetid,integrity"
index 68c476b..91b5d71 100644 (file)
@@ -44,7 +44,7 @@ static inline int __test_facility(unsigned long nr, void *facilities)
 }
 
 /*
- * The test_facility function uses the bit odering where the MSB is bit 0.
+ * The test_facility function uses the bit ordering where the MSB is bit 0.
  * That makes it easier to query facility bits with the bit number as
  * documented in the Principles of Operation.
  */
index dfbc3c6..58668ff 100644 (file)
@@ -18,7 +18,6 @@
 #define or_softirq_pending(x)  (S390_lowcore.softirq_pending |= (x))
 
 #define __ARCH_IRQ_STAT
-#define __ARCH_HAS_DO_SOFTIRQ
 #define __ARCH_IRQ_EXIT_IRQS_DISABLED
 
 static inline void ack_bad_irq(unsigned int irq)
index b04f6a7..5cea629 100644 (file)
 
 struct s390_idle_data {
        seqcount_t seqcount;
-       unsigned long long idle_count;
-       unsigned long long idle_time;
-       unsigned long long clock_idle_enter;
-       unsigned long long clock_idle_exit;
-       unsigned long long timer_idle_enter;
-       unsigned long long timer_idle_exit;
+       unsigned long idle_count;
+       unsigned long idle_time;
+       unsigned long clock_idle_enter;
+       unsigned long clock_idle_exit;
+       unsigned long timer_idle_enter;
+       unsigned long timer_idle_exit;
        unsigned long mt_cycles_enter[8];
 };
 
diff --git a/arch/s390/include/asm/irq_work.h b/arch/s390/include/asm/irq_work.h
new file mode 100644 (file)
index 0000000..6037837
--- /dev/null
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_S390_IRQ_WORK_H
+#define _ASM_S390_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+       return true;
+}
+
+void arch_irq_work_raise(void);
+
+#endif /* _ASM_S390_IRQ_WORK_H */
index 053fe8b..a75d94a 100644 (file)
@@ -202,7 +202,7 @@ extern unsigned int s390_pci_no_rid;
 ----------------------------------------------------------------------------- */
 /* Base stuff */
 int zpci_create_device(u32 fid, u32 fh, enum zpci_state state);
-void zpci_remove_device(struct zpci_dev *zdev);
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error);
 int zpci_enable_device(struct zpci_dev *);
 int zpci_disable_device(struct zpci_dev *);
 int zpci_register_ioat(struct zpci_dev *, u8, u64, u64, u64);
index d1297d6..6b187cd 100644 (file)
@@ -135,7 +135,7 @@ static inline void pmd_populate(struct mm_struct *mm,
 #define pmd_populate_kernel(mm, pmd, pte) pmd_populate(mm, pmd, pte)
 
 #define pmd_pgtable(pmd) \
-       (pgtable_t)(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE)
+       ((pgtable_t)__va(pmd_val(pmd) & -sizeof(pte_t)*PTRS_PER_PTE))
 
 /*
  * page table entry allocation/free routines.
index 794746a..29c7ecd 100644 (file)
@@ -1219,8 +1219,8 @@ static inline pte_t mk_pte(struct page *page, pgprot_t pgprot)
 #define pud_index(address) (((address) >> PUD_SHIFT) & (PTRS_PER_PUD-1))
 #define pmd_index(address) (((address) >> PMD_SHIFT) & (PTRS_PER_PMD-1))
 
-#define p4d_deref(pud) (p4d_val(pud) & _REGION_ENTRY_ORIGIN)
-#define pgd_deref(pgd) (pgd_val(pgd) & _REGION_ENTRY_ORIGIN)
+#define p4d_deref(pud) ((unsigned long)__va(p4d_val(pud) & _REGION_ENTRY_ORIGIN))
+#define pgd_deref(pgd) ((unsigned long)__va(pgd_val(pgd) & _REGION_ENTRY_ORIGIN))
 
 static inline unsigned long pmd_deref(pmd_t pmd)
 {
@@ -1229,12 +1229,12 @@ static inline unsigned long pmd_deref(pmd_t pmd)
        origin_mask = _SEGMENT_ENTRY_ORIGIN;
        if (pmd_large(pmd))
                origin_mask = _SEGMENT_ENTRY_ORIGIN_LARGE;
-       return pmd_val(pmd) & origin_mask;
+       return (unsigned long)__va(pmd_val(pmd) & origin_mask);
 }
 
 static inline unsigned long pmd_pfn(pmd_t pmd)
 {
-       return pmd_deref(pmd) >> PAGE_SHIFT;
+       return __pa(pmd_deref(pmd)) >> PAGE_SHIFT;
 }
 
 static inline unsigned long pud_deref(pud_t pud)
@@ -1244,12 +1244,12 @@ static inline unsigned long pud_deref(pud_t pud)
        origin_mask = _REGION_ENTRY_ORIGIN;
        if (pud_large(pud))
                origin_mask = _REGION3_ENTRY_ORIGIN_LARGE;
-       return pud_val(pud) & origin_mask;
+       return (unsigned long)__va(pud_val(pud) & origin_mask);
 }
 
 static inline unsigned long pud_pfn(pud_t pud)
 {
-       return pud_deref(pud) >> PAGE_SHIFT;
+       return __pa(pud_deref(pud)) >> PAGE_SHIFT;
 }
 
 /*
@@ -1329,7 +1329,7 @@ static inline bool gup_fast_permitted(unsigned long start, unsigned long end)
 }
 #define gup_fast_permitted gup_fast_permitted
 
-#define pfn_pte(pfn,pgprot) mk_pte_phys(__pa((pfn) << PAGE_SHIFT),(pgprot))
+#define pfn_pte(pfn, pgprot)   mk_pte_phys(((pfn) << PAGE_SHIFT), (pgprot))
 #define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
 #define pte_page(x) pfn_to_page(pte_pfn(x))
 
@@ -1636,7 +1636,7 @@ static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
 }
 #define pmdp_collapse_flush pmdp_collapse_flush
 
-#define pfn_pmd(pfn, pgprot)   mk_pmd_phys(__pa((pfn) << PAGE_SHIFT), (pgprot))
+#define pfn_pmd(pfn, pgprot)   mk_pmd_phys(((pfn) << PAGE_SHIFT), (pgprot))
 #define mk_pmd(page, pgprot)   pfn_pmd(page_to_pfn(page), (pgprot))
 
 static inline int pmd_trans_huge(pmd_t pmd)
index ee056f4..2b54316 100644 (file)
@@ -12,6 +12,7 @@ enum stack_type {
        STACK_TYPE_IRQ,
        STACK_TYPE_NODAT,
        STACK_TYPE_RESTART,
+       STACK_TYPE_MCCK,
 };
 
 struct stack_info {
index c4e23e9..f6326c6 100644 (file)
@@ -98,10 +98,10 @@ extern unsigned char ptff_function_mask[16];
 
 /* Query TOD offset result */
 struct ptff_qto {
-       unsigned long long physical_clock;
-       unsigned long long tod_offset;
-       unsigned long long logical_tod_offset;
-       unsigned long long tod_epoch_difference;
+       unsigned long physical_clock;
+       unsigned long tod_offset;
+       unsigned long logical_tod_offset;
+       unsigned long tod_epoch_difference;
 } __packed;
 
 static inline int ptff_query(unsigned int nr)
@@ -151,9 +151,9 @@ struct ptff_qui {
        rc;                                                             \
 })
 
-static inline unsigned long long local_tick_disable(void)
+static inline unsigned long local_tick_disable(void)
 {
-       unsigned long long old;
+       unsigned long old;
 
        old = S390_lowcore.clock_comparator;
        S390_lowcore.clock_comparator = clock_comparator_max;
@@ -161,7 +161,7 @@ static inline unsigned long long local_tick_disable(void)
        return old;
 }
 
-static inline void local_tick_enable(unsigned long long comp)
+static inline void local_tick_enable(unsigned long comp)
 {
        S390_lowcore.clock_comparator = comp;
        set_clock_comparator(S390_lowcore.clock_comparator);
@@ -169,9 +169,9 @@ static inline void local_tick_enable(unsigned long long comp)
 
 #define CLOCK_TICK_RATE                1193180 /* Underlying HZ */
 
-typedef unsigned long long cycles_t;
+typedef unsigned long cycles_t;
 
-static inline unsigned long long get_tod_clock(void)
+static inline unsigned long get_tod_clock(void)
 {
        union tod_clock clk;
 
@@ -179,10 +179,10 @@ static inline unsigned long long get_tod_clock(void)
        return clk.tod;
 }
 
-static inline unsigned long long get_tod_clock_fast(void)
+static inline unsigned long get_tod_clock_fast(void)
 {
 #ifdef CONFIG_HAVE_MARCH_Z9_109_FEATURES
-       unsigned long long clk;
+       unsigned long clk;
 
        asm volatile("stckf %0" : "=Q" (clk) : : "cc");
        return clk;
@@ -208,9 +208,9 @@ extern union tod_clock tod_clock_base;
  * Therefore preemption must be disabled, otherwise the returned
  * value is not guaranteed to be monotonic.
  */
-static inline unsigned long long get_tod_clock_monotonic(void)
+static inline unsigned long get_tod_clock_monotonic(void)
 {
-       unsigned long long tod;
+       unsigned long tod;
 
        preempt_disable_notrace();
        tod = get_tod_clock() - tod_clock_base.tod;
@@ -237,7 +237,7 @@ static inline unsigned long long get_tod_clock_monotonic(void)
  * -> ns = (th * 125) + ((tl * 125) >> 9);
  *
  */
-static inline unsigned long long tod_to_ns(unsigned long long todval)
+static inline unsigned long tod_to_ns(unsigned long todval)
 {
        return ((todval >> 9) * 125) + (((todval & 0x1ff) * 125) >> 9);
 }
@@ -249,10 +249,10 @@ static inline unsigned long long tod_to_ns(unsigned long long todval)
  *
  * Returns: true if a is later than b
  */
-static inline int tod_after(unsigned long long a, unsigned long long b)
+static inline int tod_after(unsigned long a, unsigned long b)
 {
        if (MACHINE_HAS_SCC)
-               return (long long) a > (long long) b;
+               return (long) a > (long) b;
        return a > b;
 }
 
@@ -263,10 +263,10 @@ static inline int tod_after(unsigned long long a, unsigned long long b)
  *
  * Returns: true if a is later than b
  */
-static inline int tod_after_eq(unsigned long long a, unsigned long long b)
+static inline int tod_after_eq(unsigned long a, unsigned long b)
 {
        if (MACHINE_HAS_SCC)
-               return (long long) a >= (long long) b;
+               return (long) a >= (long) b;
        return a >= b;
 }
 
index 7b3cdb4..73ee891 100644 (file)
@@ -6,7 +6,7 @@
 #include <vdso/datapage.h>
 
 struct arch_vdso_data {
-       __u64 tod_steering_delta;
+       __s64 tod_steering_delta;
        __u64 tod_steering_end;
 };
 
diff --git a/arch/s390/include/uapi/asm/hwctrset.h b/arch/s390/include/uapi/asm/hwctrset.h
new file mode 100644 (file)
index 0000000..3d8284b
--- /dev/null
@@ -0,0 +1,51 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright IBM Corp. 2021
+ * Interface implementation for communication with the CPU Measurement
+ * counter facility device driver.
+ *
+ * Author(s): Thomas Richter <tmricht@linux.ibm.com>
+ *
+ * Define for ioctl() commands to communicate with the CPU Measurement
+ * counter facility device driver.
+ */
+
+#ifndef _PERF_CPUM_CF_DIAG_H
+#define _PERF_CPUM_CF_DIAG_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define S390_HWCTR_DEVICE              "hwctr"
+#define S390_HWCTR_START_VERSION       1
+
+struct s390_ctrset_start {             /* Set CPUs to operate on */
+       __u64 version;                  /* Version of interface */
+       __u64 data_bytes;               /* # of bytes required */
+       __u64 cpumask_len;              /* Length of CPU mask in bytes */
+       __u64 *cpumask;                 /* Pointer to CPU mask */
+       __u64 counter_sets;             /* Bit mask of counter sets to get */
+};
+
+struct s390_ctrset_setdata {           /* Counter set data */
+       __u32 set;                      /* Counter set number */
+       __u32 no_cnts;                  /* # of counters stored in cv[] */
+       __u64 cv[0];                    /* Counter values (variable length) */
+};
+
+struct s390_ctrset_cpudata {           /* Counter set data per CPU */
+       __u32 cpu_nr;                   /* CPU number */
+       __u32 no_sets;                  /* # of counters sets in data[] */
+       struct s390_ctrset_setdata data[0];
+};
+
+struct s390_ctrset_read {              /* Structure to get all ctr sets */
+       __u64 no_cpus;                  /* Total # of CPUs data taken from */
+       struct s390_ctrset_cpudata data[0];
+};
+
+#define S390_HWCTR_MAGIC       'C'     /* Random magic # for ioctls */
+#define        S390_HWCTR_START        _IOWR(S390_HWCTR_MAGIC, 1, struct s390_ctrset_start)
+#define        S390_HWCTR_STOP         _IO(S390_HWCTR_MAGIC, 2)
+#define        S390_HWCTR_READ         _IOWR(S390_HWCTR_MAGIC, 3, struct s390_ctrset_read)
+#endif
index af013b4..2da0273 100644 (file)
@@ -37,10 +37,12 @@ static int diag8_noresponse(int cmdlen)
 
 static int diag8_response(int cmdlen, char *response, int *rlen)
 {
+       unsigned long _cmdlen = cmdlen | 0x40000000L;
+       unsigned long _rlen = *rlen;
        register unsigned long reg2 asm ("2") = (addr_t) cpcmd_buf;
        register unsigned long reg3 asm ("3") = (addr_t) response;
-       register unsigned long reg4 asm ("4") = cmdlen | 0x40000000L;
-       register unsigned long reg5 asm ("5") = *rlen;
+       register unsigned long reg4 asm ("4") = _cmdlen;
+       register unsigned long reg5 asm ("5") = _rlen;
 
        asm volatile(
                "       diag    %2,%0,0x8\n"
index 0dc4b25..db1bc00 100644 (file)
@@ -79,6 +79,15 @@ static bool in_nodat_stack(unsigned long sp, struct stack_info *info)
        return in_stack(sp, info, STACK_TYPE_NODAT, top - THREAD_SIZE, top);
 }
 
+static bool in_mcck_stack(unsigned long sp, struct stack_info *info)
+{
+       unsigned long frame_size, top;
+
+       frame_size = STACK_FRAME_OVERHEAD + sizeof(struct pt_regs);
+       top = S390_lowcore.mcck_stack + frame_size;
+       return in_stack(sp, info, STACK_TYPE_MCCK, top - THREAD_SIZE, top);
+}
+
 static bool in_restart_stack(unsigned long sp, struct stack_info *info)
 {
        unsigned long frame_size, top;
@@ -108,7 +117,8 @@ int get_stack_info(unsigned long sp, struct task_struct *task,
        /* Check per-cpu stacks */
        if (!in_irq_stack(sp, info) &&
            !in_nodat_stack(sp, info) &&
-           !in_restart_stack(sp, info))
+           !in_restart_stack(sp, info) &&
+           !in_mcck_stack(sp, info))
                goto unknown;
 
 recursion_check:
index 812073e..4bf1ee2 100644 (file)
@@ -47,7 +47,7 @@ void account_idle_time_irq(void)
 void arch_cpu_idle(void)
 {
        struct s390_idle_data *idle = this_cpu_ptr(&s390_idle);
-       unsigned long long idle_time;
+       unsigned long idle_time;
        unsigned long psw_mask;
 
        /* Wait for external, I/O or machine check interrupt. */
@@ -73,7 +73,7 @@ static ssize_t show_idle_count(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
        struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
-       unsigned long long idle_count;
+       unsigned long idle_count;
        unsigned int seq;
 
        do {
@@ -82,14 +82,14 @@ static ssize_t show_idle_count(struct device *dev,
                if (READ_ONCE(idle->clock_idle_enter))
                        idle_count++;
        } while (read_seqcount_retry(&idle->seqcount, seq));
-       return sprintf(buf, "%llu\n", idle_count);
+       return sprintf(buf, "%lu\n", idle_count);
 }
 DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL);
 
 static ssize_t show_idle_time(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
-       unsigned long long now, idle_time, idle_enter, idle_exit, in_idle;
+       unsigned long now, idle_time, idle_enter, idle_exit, in_idle;
        struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id);
        unsigned int seq;
 
@@ -109,14 +109,14 @@ static ssize_t show_idle_time(struct device *dev,
                }
        }
        idle_time += in_idle;
-       return sprintf(buf, "%llu\n", idle_time >> 12);
+       return sprintf(buf, "%lu\n", idle_time >> 12);
 }
 DEVICE_ATTR(idle_time_us, 0444, show_idle_time, NULL);
 
 u64 arch_cpu_idle_time(int cpu)
 {
        struct s390_idle_data *idle = &per_cpu(s390_idle, cpu);
-       unsigned long long now, idle_enter, idle_exit, in_idle;
+       unsigned long now, idle_enter, idle_exit, in_idle;
        unsigned int seq;
 
        do {
index c6d40bc..714269e 100644 (file)
@@ -28,6 +28,7 @@
 #include <asm/irq.h>
 #include <asm/hw_irq.h>
 #include <asm/stacktrace.h>
+#include <asm/softirq_stack.h>
 #include "entry.h"
 
 DEFINE_PER_CPU_SHARED_ALIGNED(struct irq_stat, irq_stat);
@@ -173,7 +174,7 @@ void noinstr do_ext_irq(struct pt_regs *regs)
 
        memcpy(&regs->int_code, &S390_lowcore.ext_cpu_addr, 4);
        regs->int_parm = S390_lowcore.ext_params;
-       regs->int_parm_long = *(unsigned long *)S390_lowcore.ext_params2;
+       regs->int_parm_long = S390_lowcore.ext_params2;
 
        from_idle = !user_mode(regs) && regs->psw.addr == (unsigned long)psw_idle_exit;
        if (from_idle)
index 0eb1d1c..b3beef6 100644 (file)
@@ -269,7 +269,7 @@ static int __hw_perf_event_init(struct perf_event *event, unsigned int type)
        case CPUMF_CTR_SET_MAX:
                /* The counter could not be associated to a counter set */
                return -EINVAL;
-       };
+       }
 
        /* Initialize for using the CPU-measurement counter facility */
        if (!atomic_inc_not_zero(&num_events)) {
index b5c86fb..2e3e7ed 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * Performance event support for s390x - CPU-measurement Counter Sets
  *
- *  Copyright IBM Corp. 2019
+ *  Copyright IBM Corp. 2019, 2021
  *  Author(s): Hendrik Brueckner <brueckner@linux.ibm.com>
  *            Thomas Richer <tmricht@linux.ibm.com>
  */
@@ -17,6 +17,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/processor.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
 
 #include <asm/ctl_reg.h>
 #include <asm/irq.h>
 #include <asm/timex.h>
 #include <asm/debug.h>
 
-#define        CF_DIAG_CTRSET_DEF              0xfeef  /* Counter set header mark */
+#include <asm/hwctrset.h>
 
+#define        CF_DIAG_CTRSET_DEF              0xfeef  /* Counter set header mark */
+                                               /* interval in seconds */
 static unsigned int cf_diag_cpu_speed;
 static debug_info_t *cf_diag_dbg;
 
-struct cf_diag_csd {           /* Counter set data per CPU */
+struct cf_diag_csd {                   /* Counter set data per CPU */
        size_t used;                    /* Bytes used in data/start */
        unsigned char start[PAGE_SIZE]; /* Counter set at event start */
        unsigned char data[PAGE_SIZE];  /* Counter set at event delete */
+       unsigned int sets;              /* # Counter set saved in data */
 };
 static DEFINE_PER_CPU(struct cf_diag_csd, cf_diag_csd);
 
@@ -178,18 +183,35 @@ static void cf_diag_disable(struct pmu *pmu)
 
 /* Number of perf events counting hardware events */
 static atomic_t cf_diag_events = ATOMIC_INIT(0);
+/* Used to avoid races in calling reserve/release_cpumf_hardware */
+static DEFINE_MUTEX(cf_diag_reserve_mutex);
 
 /* Release the PMU if event is the last perf event */
 static void cf_diag_perf_event_destroy(struct perf_event *event)
 {
        debug_sprintf_event(cf_diag_dbg, 5,
                            "%s event %p cpu %d cf_diag_events %d\n",
-                           __func__, event, event->cpu,
+                           __func__, event, smp_processor_id(),
                            atomic_read(&cf_diag_events));
        if (atomic_dec_return(&cf_diag_events) == 0)
                __kernel_cpumcf_end();
 }
 
+static int get_authctrsets(void)
+{
+       struct cpu_cf_events *cpuhw;
+       unsigned long auth = 0;
+       enum cpumf_ctr_set i;
+
+       cpuhw = &get_cpu_var(cpu_cf_events);
+       for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+               if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
+                       auth |= cpumf_ctr_ctl[i];
+       }
+       put_cpu_var(cpu_cf_events);
+       return auth;
+}
+
 /* Setup the event. Test for authorized counter sets and only include counter
  * sets which are authorized at the time of the setup. Including unauthorized
  * counter sets result in specification exception (and panic).
@@ -197,15 +219,12 @@ static void cf_diag_perf_event_destroy(struct perf_event *event)
 static int __hw_perf_event_init(struct perf_event *event)
 {
        struct perf_event_attr *attr = &event->attr;
-       struct cpu_cf_events *cpuhw;
-       enum cpumf_ctr_set i;
        int err = 0;
 
        debug_sprintf_event(cf_diag_dbg, 5, "%s event %p cpu %d\n", __func__,
                            event, event->cpu);
 
        event->hw.config = attr->config;
-       event->hw.config_base = 0;
 
        /* Add all authorized counter sets to config_base. The
         * the hardware init function is either called per-cpu or just once
@@ -215,11 +234,7 @@ static int __hw_perf_event_init(struct perf_event *event)
         * Checking the authorization on any CPU is fine as the hardware
         * applies the same authorization settings to all CPUs.
         */
-       cpuhw = &get_cpu_var(cpu_cf_events);
-       for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i)
-               if (cpuhw->info.auth_ctl & cpumf_ctr_ctl[i])
-                       event->hw.config_base |= cpumf_ctr_ctl[i];
-       put_cpu_var(cpu_cf_events);
+       event->hw.config_base = get_authctrsets();
 
        /* No authorized counter sets, nothing to count/sample */
        if (!event->hw.config_base) {
@@ -237,6 +252,25 @@ out:
        return err;
 }
 
+/* Return 0 if the CPU-measurement counter facility is currently free
+ * and an error otherwise.
+ */
+static int cf_diag_perf_event_inuse(void)
+{
+       int err = 0;
+
+       if (!atomic_inc_not_zero(&cf_diag_events)) {
+               mutex_lock(&cf_diag_reserve_mutex);
+               if (atomic_read(&cf_diag_events) == 0 &&
+                   __kernel_cpumcf_begin())
+                       err = -EBUSY;
+               else
+                       err = atomic_inc_return(&cf_diag_events);
+               mutex_unlock(&cf_diag_reserve_mutex);
+       }
+       return err;
+}
+
 static int cf_diag_event_init(struct perf_event *event)
 {
        struct perf_event_attr *attr = &event->attr;
@@ -264,13 +298,9 @@ static int cf_diag_event_init(struct perf_event *event)
        }
 
        /* Initialize for using the CPU-measurement counter facility */
-       if (atomic_inc_return(&cf_diag_events) == 1) {
-               if (__kernel_cpumcf_begin()) {
-                       atomic_dec(&cf_diag_events);
-                       err = -EBUSY;
-                       goto out;
-               }
-       }
+       err = cf_diag_perf_event_inuse();
+       if (err < 0)
+               goto out;
        event->destroy = cf_diag_perf_event_destroy;
 
        err = __hw_perf_event_init(event);
@@ -599,6 +629,8 @@ static void cf_diag_del(struct perf_event *event, int flags)
        cpuhw->flags &= ~PMU_F_IN_USE;
 }
 
+/* Default counter set events and format attribute groups */
+
 CPUMF_EVENT_ATTR(CF_DIAG, CF_DIAG, PERF_EVENT_CPUM_CF_DIAG);
 
 static struct attribute *cf_diag_events_attr[] = {
@@ -663,6 +695,441 @@ static void cf_diag_get_cpu_speed(void)
        }
 }
 
+/* Code to create device and file I/O operations */
+static atomic_t ctrset_opencnt = ATOMIC_INIT(0);       /* Excl. access */
+
+static int cf_diag_open(struct inode *inode, struct file *file)
+{
+       int err = 0;
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EPERM;
+       if (atomic_xchg(&ctrset_opencnt, 1))
+               return -EBUSY;
+
+       /* Avoid concurrent access with perf_event_open() system call */
+       mutex_lock(&cf_diag_reserve_mutex);
+       if (atomic_read(&cf_diag_events) || __kernel_cpumcf_begin())
+               err = -EBUSY;
+       mutex_unlock(&cf_diag_reserve_mutex);
+       if (err) {
+               atomic_set(&ctrset_opencnt, 0);
+               return err;
+       }
+       file->private_data = NULL;
+       debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
+       /* nonseekable_open() never fails */
+       return nonseekable_open(inode, file);
+}
+
+/* Variables for ioctl() interface support */
+static DEFINE_MUTEX(cf_diag_ctrset_mutex);
+static struct cf_diag_ctrset {
+       unsigned long ctrset;           /* Bit mask of counter set to read */
+       cpumask_t mask;                 /* CPU mask to read from */
+} cf_diag_ctrset;
+
+static void cf_diag_ctrset_clear(void)
+{
+       cpumask_clear(&cf_diag_ctrset.mask);
+       cf_diag_ctrset.ctrset = 0;
+}
+
+static void cf_diag_release_cpu(void *p)
+{
+       struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+
+       debug_sprintf_event(cf_diag_dbg, 3, "%s cpu %d\n", __func__,
+                           smp_processor_id());
+       lcctl(0);               /* Reset counter sets */
+       cpuhw->state = 0;       /* Save state in CPU hardware state */
+}
+
+/* Release function is also called when application gets terminated without
+ * doing a proper ioctl(..., S390_HWCTR_STOP, ...) command.
+ * Since only one application is allowed to open the device, simple stop all
+ * CPU counter sets.
+ */
+static int cf_diag_release(struct inode *inode, struct file *file)
+{
+       on_each_cpu(cf_diag_release_cpu, NULL, 1);
+       cf_diag_ctrset_clear();
+       atomic_set(&ctrset_opencnt, 0);
+       __kernel_cpumcf_end();
+       debug_sprintf_event(cf_diag_dbg, 2, "%s\n", __func__);
+       return 0;
+}
+
+struct cf_diag_call_on_cpu_parm {      /* Parm struct for smp_call_on_cpu */
+       unsigned int sets;              /* Counter set bit mask */
+       atomic_t cpus_ack;              /* # CPUs successfully executed func */
+};
+
+static int cf_diag_all_copy(unsigned long arg, cpumask_t *mask)
+{
+       struct s390_ctrset_read __user *ctrset_read;
+       unsigned int cpu, cpus, rc;
+       void __user *uptr;
+
+       ctrset_read = (struct s390_ctrset_read __user *)arg;
+       uptr = ctrset_read->data;
+       for_each_cpu(cpu, mask) {
+               struct cf_diag_csd *csd = per_cpu_ptr(&cf_diag_csd, cpu);
+               struct s390_ctrset_cpudata __user *ctrset_cpudata;
+
+               ctrset_cpudata = uptr;
+               debug_sprintf_event(cf_diag_dbg, 5, "%s cpu %d used %zd\n",
+                                   __func__, cpu, csd->used);
+               rc  = put_user(cpu, &ctrset_cpudata->cpu_nr);
+               rc |= put_user(csd->sets, &ctrset_cpudata->no_sets);
+               rc |= copy_to_user(ctrset_cpudata->data, csd->data, csd->used);
+               if (rc)
+                       return -EFAULT;
+               uptr += sizeof(struct s390_ctrset_cpudata) + csd->used;
+               cond_resched();
+       }
+       cpus = cpumask_weight(mask);
+       if (put_user(cpus, &ctrset_read->no_cpus))
+               return -EFAULT;
+       debug_sprintf_event(cf_diag_dbg, 5, "%s copied %ld\n",
+                           __func__, uptr - (void __user *)ctrset_read->data);
+       return 0;
+}
+
+static size_t cf_diag_cpuset_read(struct s390_ctrset_setdata *p, int ctrset,
+                                 int ctrset_size, size_t room)
+{
+       size_t need = 0;
+       int rc = -1;
+
+       need = sizeof(*p) + sizeof(u64) * ctrset_size;
+       debug_sprintf_event(cf_diag_dbg, 5,
+                           "%s room %zd need %zd set %#x set_size %d\n",
+                           __func__, room, need, ctrset, ctrset_size);
+       if (need <= room) {
+               p->set = cpumf_ctr_ctl[ctrset];
+               p->no_cnts = ctrset_size;
+               rc = ctr_stcctm(ctrset, ctrset_size, (u64 *)p->cv);
+               if (rc == 3)            /* Nothing stored */
+                       need = 0;
+       }
+       debug_sprintf_event(cf_diag_dbg, 5, "%s need %zd rc %d\n", __func__,
+                           need, rc);
+       return need;
+}
+
+/* Read all counter sets. Since the perf_event_open() system call with
+ * event cpum_cf_diag/.../ is blocked when this interface is active, reuse
+ * the perf_event_open() data buffer to store the counter sets.
+ */
+static void cf_diag_cpu_read(void *parm)
+{
+       struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+       struct cf_diag_csd *csd = this_cpu_ptr(&cf_diag_csd);
+       struct cf_diag_call_on_cpu_parm *p = parm;
+       int set, set_size;
+       size_t space;
+
+       debug_sprintf_event(cf_diag_dbg, 5,
+                           "%s new %#x flags %#x state %#llx\n",
+                           __func__, p->sets, cpuhw->flags,
+                           cpuhw->state);
+       /* No data saved yet */
+       csd->used = 0;
+       csd->sets = 0;
+       memset(csd->data, 0, sizeof(csd->data));
+
+       /* Scan the counter sets */
+       for (set = CPUMF_CTR_SET_BASIC; set < CPUMF_CTR_SET_MAX; ++set) {
+               struct s390_ctrset_setdata *sp = (void *)csd->data + csd->used;
+
+               if (!(p->sets & cpumf_ctr_ctl[set]))
+                       continue;       /* Counter set not in list */
+               set_size = cf_diag_ctrset_size(set, &cpuhw->info);
+               space = sizeof(csd->data) - csd->used;
+               space = cf_diag_cpuset_read(sp, set, set_size, space);
+               if (space) {
+                       csd->used += space;
+                       csd->sets += 1;
+               }
+               debug_sprintf_event(cf_diag_dbg, 5, "%s sp %px space %zd\n",
+                                   __func__, sp, space);
+       }
+       debug_sprintf_event(cf_diag_dbg, 5, "%s sets %d used %zd\n", __func__,
+                           csd->sets, csd->used);
+}
+
+static int cf_diag_all_read(unsigned long arg)
+{
+       struct cf_diag_call_on_cpu_parm p;
+       cpumask_var_t mask;
+       int rc;
+
+       debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+               return -ENOMEM;
+
+       p.sets = cf_diag_ctrset.ctrset;
+       cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+       on_each_cpu_mask(mask, cf_diag_cpu_read, &p, 1);
+       rc = cf_diag_all_copy(arg, mask);
+       free_cpumask_var(mask);
+       debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d\n", __func__, rc);
+       return rc;
+}
+
+/* Stop all counter sets via ioctl interface */
+static void cf_diag_ioctl_off(void *parm)
+{
+       struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+       struct cf_diag_call_on_cpu_parm *p = parm;
+       int rc;
+
+       debug_sprintf_event(cf_diag_dbg, 5,
+                           "%s new %#x flags %#x state %#llx\n",
+                           __func__, p->sets, cpuhw->flags,
+                           cpuhw->state);
+
+       ctr_set_multiple_disable(&cpuhw->state, p->sets);
+       ctr_set_multiple_stop(&cpuhw->state, p->sets);
+       rc = lcctl(cpuhw->state);               /* Stop counter sets */
+       if (!cpuhw->state)
+               cpuhw->flags &= ~PMU_F_IN_USE;
+       debug_sprintf_event(cf_diag_dbg, 5,
+                           "%s rc %d flags %#x state %#llx\n", __func__,
+                            rc, cpuhw->flags, cpuhw->state);
+}
+
+/* Start counter sets on particular CPU */
+static void cf_diag_ioctl_on(void *parm)
+{
+       struct cpu_cf_events *cpuhw = this_cpu_ptr(&cpu_cf_events);
+       struct cf_diag_call_on_cpu_parm *p = parm;
+       int rc;
+
+       debug_sprintf_event(cf_diag_dbg, 5,
+                           "%s new %#x flags %#x state %#llx\n",
+                           __func__, p->sets, cpuhw->flags,
+                           cpuhw->state);
+
+       if (!(cpuhw->flags & PMU_F_IN_USE))
+               cpuhw->state = 0;
+       cpuhw->flags |= PMU_F_IN_USE;
+       rc = lcctl(cpuhw->state);               /* Reset unused counter sets */
+       ctr_set_multiple_enable(&cpuhw->state, p->sets);
+       ctr_set_multiple_start(&cpuhw->state, p->sets);
+       rc |= lcctl(cpuhw->state);              /* Start counter sets */
+       if (!rc)
+               atomic_inc(&p->cpus_ack);
+       debug_sprintf_event(cf_diag_dbg, 5, "%s rc %d state %#llx\n",
+                           __func__, rc, cpuhw->state);
+}
+
+static int cf_diag_all_stop(void)
+{
+       struct cf_diag_call_on_cpu_parm p = {
+               .sets = cf_diag_ctrset.ctrset,
+       };
+       cpumask_var_t mask;
+
+       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+               return -ENOMEM;
+       cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+       on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
+       free_cpumask_var(mask);
+       return 0;
+}
+
+static int cf_diag_all_start(void)
+{
+       struct cf_diag_call_on_cpu_parm p = {
+               .sets = cf_diag_ctrset.ctrset,
+               .cpus_ack = ATOMIC_INIT(0),
+       };
+       cpumask_var_t mask;
+       int rc = 0;
+
+       if (!alloc_cpumask_var(&mask, GFP_KERNEL))
+               return -ENOMEM;
+       cpumask_and(mask, &cf_diag_ctrset.mask, cpu_online_mask);
+       on_each_cpu_mask(mask, cf_diag_ioctl_on, &p, 1);
+       if (atomic_read(&p.cpus_ack) != cpumask_weight(mask)) {
+               on_each_cpu_mask(mask, cf_diag_ioctl_off, &p, 1);
+               rc = -EIO;
+       }
+       free_cpumask_var(mask);
+       return rc;
+}
+
+/* Return the maximum required space for all possible CPUs in case one
+ * CPU will be onlined during the START, READ, STOP cycles.
+ * To find out the size of the counter sets, any one CPU will do. They
+ * all have the same counter sets.
+ */
+static size_t cf_diag_needspace(unsigned int sets)
+{
+       struct cpu_cf_events *cpuhw = get_cpu_ptr(&cpu_cf_events);
+       size_t bytes = 0;
+       int i;
+
+       for (i = CPUMF_CTR_SET_BASIC; i < CPUMF_CTR_SET_MAX; ++i) {
+               if (!(sets & cpumf_ctr_ctl[i]))
+                       continue;
+               bytes += cf_diag_ctrset_size(i, &cpuhw->info) * sizeof(u64) +
+                        sizeof(((struct s390_ctrset_setdata *)0)->set) +
+                        sizeof(((struct s390_ctrset_setdata *)0)->no_cnts);
+       }
+       bytes = sizeof(((struct s390_ctrset_read *)0)->no_cpus) + nr_cpu_ids *
+               (bytes + sizeof(((struct s390_ctrset_cpudata *)0)->cpu_nr) +
+                    sizeof(((struct s390_ctrset_cpudata *)0)->no_sets));
+       debug_sprintf_event(cf_diag_dbg, 5, "%s bytes %ld\n", __func__,
+                           bytes);
+       put_cpu_ptr(&cpu_cf_events);
+       return bytes;
+}
+
+static long cf_diag_ioctl_read(unsigned long arg)
+{
+       struct s390_ctrset_read read;
+       int ret = 0;
+
+       debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+       if (copy_from_user(&read, (char __user *)arg, sizeof(read)))
+               return -EFAULT;
+       ret = cf_diag_all_read(arg);
+       debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
+       return ret;
+}
+
+static long cf_diag_ioctl_stop(void)
+{
+       int ret;
+
+       debug_sprintf_event(cf_diag_dbg, 5, "%s\n", __func__);
+       ret = cf_diag_all_stop();
+       cf_diag_ctrset_clear();
+       debug_sprintf_event(cf_diag_dbg, 5, "%s ret %d\n", __func__, ret);
+       return ret;
+}
+
+static long cf_diag_ioctl_start(unsigned long arg)
+{
+       struct s390_ctrset_start __user *ustart;
+       struct s390_ctrset_start start;
+       void __user *umask;
+       unsigned int len;
+       int ret = 0;
+       size_t need;
+
+       if (cf_diag_ctrset.ctrset)
+               return -EBUSY;
+       ustart = (struct s390_ctrset_start __user *)arg;
+       if (copy_from_user(&start, ustart, sizeof(start)))
+               return -EFAULT;
+       if (start.version != S390_HWCTR_START_VERSION)
+               return -EINVAL;
+       if (start.counter_sets & ~(cpumf_ctr_ctl[CPUMF_CTR_SET_BASIC] |
+                                  cpumf_ctr_ctl[CPUMF_CTR_SET_USER] |
+                                  cpumf_ctr_ctl[CPUMF_CTR_SET_CRYPTO] |
+                                  cpumf_ctr_ctl[CPUMF_CTR_SET_EXT] |
+                                  cpumf_ctr_ctl[CPUMF_CTR_SET_MT_DIAG]))
+               return -EINVAL;         /* Invalid counter set */
+       if (!start.counter_sets)
+               return -EINVAL;         /* No counter set at all? */
+       cpumask_clear(&cf_diag_ctrset.mask);
+       len = min_t(u64, start.cpumask_len, cpumask_size());
+       umask = (void __user *)start.cpumask;
+       if (copy_from_user(&cf_diag_ctrset.mask, umask, len))
+               return -EFAULT;
+       if (cpumask_empty(&cf_diag_ctrset.mask))
+               return -EINVAL;
+       need = cf_diag_needspace(start.counter_sets);
+       if (put_user(need, &ustart->data_bytes))
+               ret = -EFAULT;
+       if (ret)
+               goto out;
+       cf_diag_ctrset.ctrset = start.counter_sets;
+       ret = cf_diag_all_start();
+out:
+       if (ret)
+               cf_diag_ctrset_clear();
+       debug_sprintf_event(cf_diag_dbg, 2, "%s sets %#lx need %ld ret %d\n",
+                           __func__, cf_diag_ctrset.ctrset, need, ret);
+       return ret;
+}
+
+static long cf_diag_ioctl(struct file *file, unsigned int cmd,
+                         unsigned long arg)
+{
+       int ret;
+
+       debug_sprintf_event(cf_diag_dbg, 2, "%s cmd %#x arg %lx\n", __func__,
+                           cmd, arg);
+       get_online_cpus();
+       mutex_lock(&cf_diag_ctrset_mutex);
+       switch (cmd) {
+       case S390_HWCTR_START:
+               ret = cf_diag_ioctl_start(arg);
+               break;
+       case S390_HWCTR_STOP:
+               ret = cf_diag_ioctl_stop();
+               break;
+       case S390_HWCTR_READ:
+               ret = cf_diag_ioctl_read(arg);
+               break;
+       default:
+               ret = -ENOTTY;
+               break;
+       }
+       mutex_unlock(&cf_diag_ctrset_mutex);
+       put_online_cpus();
+       debug_sprintf_event(cf_diag_dbg, 2, "%s ret %d\n", __func__, ret);
+       return ret;
+}
+
+static const struct file_operations cf_diag_fops = {
+       .owner = THIS_MODULE,
+       .open = cf_diag_open,
+       .release = cf_diag_release,
+       .unlocked_ioctl = cf_diag_ioctl,
+       .compat_ioctl = cf_diag_ioctl,
+       .llseek = no_llseek
+};
+
+static struct miscdevice cf_diag_dev = {
+       .name   = S390_HWCTR_DEVICE,
+       .minor  = MISC_DYNAMIC_MINOR,
+       .fops   = &cf_diag_fops,
+};
+
+static int cf_diag_online_cpu(unsigned int cpu)
+{
+       struct cf_diag_call_on_cpu_parm p;
+
+       mutex_lock(&cf_diag_ctrset_mutex);
+       if (!cf_diag_ctrset.ctrset)
+               goto out;
+       p.sets = cf_diag_ctrset.ctrset;
+       cf_diag_ioctl_on(&p);
+out:
+       mutex_unlock(&cf_diag_ctrset_mutex);
+       return 0;
+}
+
+static int cf_diag_offline_cpu(unsigned int cpu)
+{
+       struct cf_diag_call_on_cpu_parm p;
+
+       mutex_lock(&cf_diag_ctrset_mutex);
+       if (!cf_diag_ctrset.ctrset)
+               goto out;
+       p.sets = cf_diag_ctrset.ctrset;
+       cf_diag_ioctl_off(&p);
+out:
+       mutex_unlock(&cf_diag_ctrset_mutex);
+       return 0;
+}
+
 /* Initialize the counter set PMU to generate complete counter set data as
  * event raw data. This relies on the CPU Measurement Counter Facility device
  * already being loaded and initialized.
@@ -685,21 +1152,43 @@ static int __init cf_diag_init(void)
                return -ENOMEM;
        }
 
+       rc = misc_register(&cf_diag_dev);
+       if (rc) {
+               pr_err("Registration of /dev/" S390_HWCTR_DEVICE
+                      "failed rc=%d\n", rc);
+               goto out;
+       }
+
        /* Setup s390dbf facility */
        cf_diag_dbg = debug_register(KMSG_COMPONENT, 2, 1, 128);
        if (!cf_diag_dbg) {
                pr_err("Registration of s390dbf(cpum_cf_diag) failed\n");
-               return -ENOMEM;
+               rc = -ENOMEM;
+               goto out_dbf;
        }
        debug_register_view(cf_diag_dbg, &debug_sprintf_view);
 
        rc = perf_pmu_register(&cf_diag, "cpum_cf_diag", -1);
        if (rc) {
-               debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
-               debug_unregister(cf_diag_dbg);
                pr_err("Registration of PMU(cpum_cf_diag) failed with rc=%i\n",
                       rc);
+               goto out_perf;
        }
+       rc = cpuhp_setup_state_nocalls(CPUHP_AP_PERF_S390_CFD_ONLINE,
+                                      "perf/s390/cfd:online",
+                                      cf_diag_online_cpu, cf_diag_offline_cpu);
+       if (!rc)
+               goto out;
+
+       pr_err("Registration of CPUHP_AP_PERF_S390_CFD_ONLINE failed rc=%i\n",
+              rc);
+       perf_pmu_unregister(&cf_diag);
+out_perf:
+       debug_unregister_view(cf_diag_dbg, &debug_sprintf_view);
+       debug_unregister(cf_diag_dbg);
+out_dbf:
+       misc_deregister(&cf_diag_dev);
+out:
        return rc;
 }
-arch_initcall(cf_diag_init);
+device_initcall(cf_diag_init);
index 367bd00..e20bed1 100644 (file)
@@ -130,7 +130,7 @@ int copy_thread(unsigned long clone_flags, unsigned long new_stackp,
        frame->sf.gprs[9] = (unsigned long)frame;
 
        /* Store access registers to kernel stack of new process. */
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                /* kernel thread */
                memset(&frame->childregs, 0, sizeof(struct pt_regs));
                frame->childregs.psw.mask = PSW_KERNEL_BITS | PSW_MASK_DAT |
index 60da976..72134f9 100644 (file)
@@ -354,7 +354,7 @@ static int __init stack_realloc(void)
        if (!new)
                panic("Couldn't allocate machine check stack");
        WRITE_ONCE(S390_lowcore.mcck_stack, new + STACK_INIT_OFFSET);
-       memblock_free(old, THREAD_SIZE);
+       memblock_free_late(old, THREAD_SIZE);
        return 0;
 }
 early_initcall(stack_realloc);
index e299892..58c8afa 100644 (file)
@@ -30,6 +30,7 @@
 #include <linux/delay.h>
 #include <linux/interrupt.h>
 #include <linux/irqflags.h>
+#include <linux/irq_work.h>
 #include <linux/cpu.h>
 #include <linux/slab.h>
 #include <linux/sched/hotplug.h>
@@ -62,6 +63,7 @@ enum {
        ec_call_function_single,
        ec_stop_cpu,
        ec_mcck_pending,
+       ec_irq_work,
 };
 
 enum {
@@ -434,10 +436,12 @@ void notrace smp_yield_cpu(int cpu)
  */
 void notrace smp_emergency_stop(void)
 {
-       cpumask_t cpumask;
+       static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED;
+       static cpumask_t cpumask;
        u64 end;
        int cpu;
 
+       arch_spin_lock(&lock);
        cpumask_copy(&cpumask, cpu_online_mask);
        cpumask_clear_cpu(smp_processor_id(), &cpumask);
 
@@ -458,6 +462,7 @@ void notrace smp_emergency_stop(void)
                        break;
                cpu_relax();
        }
+       arch_spin_unlock(&lock);
 }
 NOKPROBE_SYMBOL(smp_emergency_stop);
 
@@ -505,6 +510,8 @@ static void smp_handle_ext_call(void)
                generic_smp_call_function_single_interrupt();
        if (test_bit(ec_mcck_pending, &bits))
                __s390_handle_mcck();
+       if (test_bit(ec_irq_work, &bits))
+               irq_work_run();
 }
 
 static void do_ext_call_interrupt(struct ext_code ext_code,
@@ -537,6 +544,13 @@ void smp_send_reschedule(int cpu)
        pcpu_ec_call(pcpu_devices + cpu, ec_schedule);
 }
 
+#ifdef CONFIG_IRQ_WORK
+void arch_irq_work_raise(void)
+{
+       pcpu_ec_call(pcpu_devices + smp_processor_id(), ec_irq_work);
+}
+#endif
+
 /*
  * parameter area for the set/clear control bit callbacks
  */
@@ -775,11 +789,13 @@ static int smp_add_core(struct sclp_core_entry *core, cpumask_t *avail,
 static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
 {
        struct sclp_core_entry *core;
-       cpumask_t avail;
+       static cpumask_t avail;
        bool configured;
        u16 core_id;
        int nr, i;
 
+       get_online_cpus();
+       mutex_lock(&smp_cpu_state_mutex);
        nr = 0;
        cpumask_xor(&avail, cpu_possible_mask, cpu_present_mask);
        /*
@@ -800,6 +816,8 @@ static int __smp_rescan_cpus(struct sclp_core_info *info, bool early)
                configured = i < info->configured;
                nr += smp_add_core(&info->core[i], &avail, configured, early);
        }
+       mutex_unlock(&smp_cpu_state_mutex);
+       put_online_cpus();
        return nr;
 }
 
@@ -847,9 +865,7 @@ void __init smp_detect_cpus(void)
        pr_info("%d configured CPUs, %d standby CPUs\n", c_cpus, s_cpus);
 
        /* Add CPUs present at boot */
-       get_online_cpus();
        __smp_rescan_cpus(info, true);
-       put_online_cpus();
        memblock_free_early((unsigned long)info, sizeof(*info));
 }
 
@@ -1178,11 +1194,7 @@ int __ref smp_rescan_cpus(void)
        if (!info)
                return -ENOMEM;
        smp_get_core_info(info, 0);
-       get_online_cpus();
-       mutex_lock(&smp_cpu_state_mutex);
        nr = __smp_rescan_cpus(info, false);
-       mutex_unlock(&smp_cpu_state_mutex);
-       put_online_cpus();
        kfree(info);
        if (nr)
                topology_schedule_update();
index d443423..3abef21 100644 (file)
 439  common    faccessat2              sys_faccessat2                  sys_faccessat2
 440  common    process_madvise         sys_process_madvise             sys_process_madvise
 441  common    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
+442  common    mount_setattr           sys_mount_setattr               sys_mount_setattr
index 06bcfa6..326cb8f 100644 (file)
@@ -68,10 +68,10 @@ EXPORT_SYMBOL(s390_epoch_delta_notifier);
 
 unsigned char ptff_function_mask[16];
 
-static unsigned long long lpar_offset;
-static unsigned long long initial_leap_seconds;
-static unsigned long long tod_steering_end;
-static long long tod_steering_delta;
+static unsigned long lpar_offset;
+static unsigned long initial_leap_seconds;
+static unsigned long tod_steering_end;
+static long tod_steering_delta;
 
 /*
  * Get time offsets with PTFF
@@ -80,10 +80,12 @@ void __init time_early_init(void)
 {
        struct ptff_qto qto;
        struct ptff_qui qui;
+       int cs;
 
        /* Initialize TOD steering parameters */
        tod_steering_end = tod_clock_base.tod;
-       vdso_data->arch_data.tod_steering_end = tod_steering_end;
+       for (cs = 0; cs < CS_BASES; cs++)
+               vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
 
        if (!test_facility(28))
                return;
@@ -96,7 +98,7 @@ void __init time_early_init(void)
 
        /* get initial leap seconds */
        if (ptff_query(PTFF_QUI) && ptff(&qui, sizeof(qui), PTFF_QUI) == 0)
-               initial_leap_seconds = (unsigned long long)
+               initial_leap_seconds = (unsigned long)
                        ((long) qui.old_leap * 4096000000L);
 }
 
@@ -222,7 +224,7 @@ void __init read_persistent_wall_and_boot_offset(struct timespec64 *wall_time,
 
 static u64 read_tod_clock(struct clocksource *cs)
 {
-       unsigned long long now, adj;
+       unsigned long now, adj;
 
        preempt_disable(); /* protect from changes to steering parameters */
        now = get_tod_clock();
@@ -362,10 +364,11 @@ static inline int check_sync_clock(void)
  * Apply clock delta to the global data structures.
  * This is called once on the CPU that performed the clock sync.
  */
-static void clock_sync_global(unsigned long long delta)
+static void clock_sync_global(unsigned long delta)
 {
        unsigned long now, adj;
        struct ptff_qto qto;
+       int cs;
 
        /* Fixup the monotonic sched clock. */
        tod_clock_base.eitod += delta;
@@ -378,10 +381,13 @@ static void clock_sync_global(unsigned long long delta)
                        -(adj >> 15) : (adj >> 15);
        tod_steering_delta += delta;
        if ((abs(tod_steering_delta) >> 48) != 0)
-               panic("TOD clock sync offset %lli is too large to drift\n",
+               panic("TOD clock sync offset %li is too large to drift\n",
                      tod_steering_delta);
        tod_steering_end = now + (abs(tod_steering_delta) << 15);
-       vdso_data->arch_data.tod_steering_end = tod_steering_end;
+       for (cs = 0; cs < CS_BASES; cs++) {
+               vdso_data[cs].arch_data.tod_steering_end = tod_steering_end;
+               vdso_data[cs].arch_data.tod_steering_delta = tod_steering_delta;
+       }
 
        /* Update LPAR offset. */
        if (ptff_query(PTFF_QTO) && ptff(&qto, sizeof(qto), PTFF_QTO) == 0)
@@ -394,7 +400,7 @@ static void clock_sync_global(unsigned long long delta)
  * Apply clock delta to the per-CPU data structures of this CPU.
  * This is called for each online CPU after the call to clock_sync_global.
  */
-static void clock_sync_local(unsigned long long delta)
+static void clock_sync_local(unsigned long delta)
 {
        /* Add the delta to the clock comparator. */
        if (S390_lowcore.clock_comparator != clock_comparator_max) {
@@ -418,7 +424,7 @@ static void __init time_init_wq(void)
 struct clock_sync_data {
        atomic_t cpus;
        int in_sync;
-       unsigned long long clock_delta;
+       unsigned long clock_delta;
 };
 
 /*
@@ -538,7 +544,7 @@ static int stpinfo_valid(void)
 static int stp_sync_clock(void *data)
 {
        struct clock_sync_data *sync = data;
-       unsigned long long clock_delta, flags;
+       u64 clock_delta, flags;
        static int first;
        int rc;
 
@@ -720,8 +726,8 @@ static ssize_t ctn_id_show(struct device *dev,
 
        mutex_lock(&stp_mutex);
        if (stpinfo_valid())
-               ret = sprintf(buf, "%016llx\n",
-                             *(unsigned long long *) stp_info.ctnid);
+               ret = sprintf(buf, "%016lx\n",
+                             *(unsigned long *) stp_info.ctnid);
        mutex_unlock(&stp_mutex);
        return ret;
 }
@@ -794,7 +800,7 @@ static ssize_t leap_seconds_scheduled_show(struct device *dev,
        if (!stzi.lsoib.p)
                return sprintf(buf, "0,0\n");
 
-       return sprintf(buf, "%llu,%d\n",
+       return sprintf(buf, "%lu,%d\n",
                       tod_to_ns(stzi.lsoib.nlsout - TOD_UNIX_EPOCH) / NSEC_PER_SEC,
                       stzi.lsoib.nlso - stzi.lsoib.also);
 }
index ca47141..bfcc327 100644 (file)
@@ -62,22 +62,20 @@ static struct mask_info drawer_info;
 struct cpu_topology_s390 cpu_topology[NR_CPUS];
 EXPORT_SYMBOL_GPL(cpu_topology);
 
-static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
+static void cpu_group_map(cpumask_t *dst, struct mask_info *info, unsigned int cpu)
 {
-       cpumask_t mask;
+       static cpumask_t mask;
 
        cpumask_copy(&mask, cpumask_of(cpu));
        switch (topology_mode) {
        case TOPOLOGY_MODE_HW:
                while (info) {
                        if (cpumask_test_cpu(cpu, &info->mask)) {
-                               mask = info->mask;
+                               cpumask_copy(&mask, &info->mask);
                                break;
                        }
                        info = info->next;
                }
-               if (cpumask_empty(&mask))
-                       cpumask_copy(&mask, cpumask_of(cpu));
                break;
        case TOPOLOGY_MODE_PACKAGE:
                cpumask_copy(&mask, cpu_present_mask);
@@ -89,23 +87,24 @@ static cpumask_t cpu_group_map(struct mask_info *info, unsigned int cpu)
                break;
        }
        cpumask_and(&mask, &mask, cpu_online_mask);
-       return mask;
+       cpumask_copy(dst, &mask);
 }
 
-static cpumask_t cpu_thread_map(unsigned int cpu)
+static void cpu_thread_map(cpumask_t *dst, unsigned int cpu)
 {
-       cpumask_t mask;
+       static cpumask_t mask;
        int i;
 
        cpumask_copy(&mask, cpumask_of(cpu));
        if (topology_mode != TOPOLOGY_MODE_HW)
-               return mask;
+               goto out;
        cpu -= cpu % (smp_cpu_mtid + 1);
        for (i = 0; i <= smp_cpu_mtid; i++)
                if (cpu_present(cpu + i))
                        cpumask_set_cpu(cpu + i, &mask);
        cpumask_and(&mask, &mask, cpu_online_mask);
-       return mask;
+out:
+       cpumask_copy(dst, &mask);
 }
 
 #define TOPOLOGY_CORE_BITS     64
@@ -250,10 +249,10 @@ void update_cpu_masks(void)
 
        for_each_possible_cpu(cpu) {
                topo = &cpu_topology[cpu];
-               topo->thread_mask = cpu_thread_map(cpu);
-               topo->core_mask = cpu_group_map(&socket_info, cpu);
-               topo->book_mask = cpu_group_map(&book_info, cpu);
-               topo->drawer_mask = cpu_group_map(&drawer_info, cpu);
+               cpu_thread_map(&topo->thread_mask, cpu);
+               cpu_group_map(&topo->core_mask, &socket_info, cpu);
+               cpu_group_map(&topo->book_mask, &book_info, cpu);
+               cpu_group_map(&topo->drawer_mask, &drawer_info, cpu);
                topo->booted_cores = 0;
                if (topology_mode != TOPOLOGY_MODE_HW) {
                        id = topology_mode == TOPOLOGY_MODE_PACKAGE ? 0 : cpu;
index 73c7afc..f216a1b 100644 (file)
@@ -214,7 +214,7 @@ void vtime_flush(struct task_struct *tsk)
        avg_steal = S390_lowcore.avg_steal_timer / 2;
        if ((s64) steal > 0) {
                S390_lowcore.steal_timer = 0;
-               account_steal_time(steal);
+               account_steal_time(cputime_to_nsecs(steal));
                avg_steal += steal;
        }
        S390_lowcore.avg_steal_timer = avg_steal;
index e3183bd..d548d60 100644 (file)
@@ -1287,7 +1287,7 @@ static u64 __calculate_sltime(struct kvm_vcpu *vcpu)
                        /* already expired? */
                        if (cputm >> 63)
                                return 0;
-                       return min(sltime, tod_to_ns(cputm));
+                       return min_t(u64, sltime, tod_to_ns(cputm));
                }
        } else if (cpu_timer_interrupts_enabled(vcpu)) {
                sltime = kvm_s390_get_cpu_timer(vcpu);
index 73a1630..0e76b21 100644 (file)
@@ -297,6 +297,7 @@ int arch_add_memory(int nid, u64 start, u64 size,
        if (WARN_ON_ONCE(params->pgprot.pgprot != PAGE_KERNEL.pgprot))
                return -EINVAL;
 
+       VM_BUG_ON(!mhp_range_allowed(start, size, true));
        rc = vmem_add_mapping(start, size);
        if (rc)
                return rc;
index 4e87c81..781965f 100644 (file)
@@ -58,7 +58,7 @@ unsigned long *crst_table_alloc(struct mm_struct *mm)
        if (!page)
                return NULL;
        arch_set_page_dat(page, 2);
-       return (unsigned long *) page_to_phys(page);
+       return (unsigned long *) page_to_virt(page);
 }
 
 void crst_table_free(struct mm_struct *mm, unsigned long *table)
@@ -161,7 +161,7 @@ struct page *page_table_alloc_pgste(struct mm_struct *mm)
 
        page = alloc_page(GFP_KERNEL);
        if (page) {
-               table = (u64 *)page_to_phys(page);
+               table = (u64 *)page_to_virt(page);
                memset64(table, _PAGE_INVALID, PTRS_PER_PTE);
                memset64(table + PTRS_PER_PTE, 0, PTRS_PER_PTE);
        }
@@ -194,7 +194,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
                        mask = atomic_read(&page->_refcount) >> 24;
                        mask = (mask | (mask >> 4)) & 3;
                        if (mask != 3) {
-                               table = (unsigned long *) page_to_phys(page);
+                               table = (unsigned long *) page_to_virt(page);
                                bit = mask & 1;         /* =1 -> second 2K */
                                if (bit)
                                        table += PTRS_PER_PTE;
@@ -217,7 +217,7 @@ unsigned long *page_table_alloc(struct mm_struct *mm)
        }
        arch_set_page_dat(page, 0);
        /* Initialize page table */
-       table = (unsigned long *) page_to_phys(page);
+       table = (unsigned long *) page_to_virt(page);
        if (mm_alloc_pgste(mm)) {
                /* Return 4K page table with PGSTEs */
                atomic_xor_bits(&page->_refcount, 3 << 24);
@@ -239,10 +239,10 @@ void page_table_free(struct mm_struct *mm, unsigned long *table)
        struct page *page;
        unsigned int bit, mask;
 
-       page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+       page = virt_to_page(table);
        if (!mm_alloc_pgste(mm)) {
                /* Free 2K page table fragment of a 4K page */
-               bit = (__pa(table) & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
+               bit = ((unsigned long) table & ~PAGE_MASK)/(PTRS_PER_PTE*sizeof(pte_t));
                spin_lock_bh(&mm->context.lock);
                mask = atomic_xor_bits(&page->_refcount, 1U << (bit + 24));
                mask >>= 24;
@@ -269,14 +269,14 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
        unsigned int bit, mask;
 
        mm = tlb->mm;
-       page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+       page = virt_to_page(table);
        if (mm_alloc_pgste(mm)) {
                gmap_unlink(mm, table, vmaddr);
-               table = (unsigned long *) (__pa(table) | 3);
+               table = (unsigned long *) ((unsigned long)table | 3);
                tlb_remove_table(tlb, table);
                return;
        }
-       bit = (__pa(table) & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
+       bit = ((unsigned long) table & ~PAGE_MASK) / (PTRS_PER_PTE*sizeof(pte_t));
        spin_lock_bh(&mm->context.lock);
        mask = atomic_xor_bits(&page->_refcount, 0x11U << (bit + 24));
        mask >>= 24;
@@ -285,7 +285,7 @@ void page_table_free_rcu(struct mmu_gather *tlb, unsigned long *table,
        else
                list_del(&page->lru);
        spin_unlock_bh(&mm->context.lock);
-       table = (unsigned long *) (__pa(table) | (1U << bit));
+       table = (unsigned long *) ((unsigned long) table | (1U << bit));
        tlb_remove_table(tlb, table);
 }
 
@@ -293,7 +293,7 @@ void __tlb_remove_table(void *_table)
 {
        unsigned int mask = (unsigned long) _table & 3;
        void *table = (void *)((unsigned long) _table ^ mask);
-       struct page *page = pfn_to_page(__pa(table) >> PAGE_SHIFT);
+       struct page *page = virt_to_page(table);
 
        switch (mask) {
        case 0:         /* pmd, pud, or p4d */
index 01f3a5f..96897fa 100644 (file)
@@ -4,6 +4,7 @@
  *    Author(s): Heiko Carstens <heiko.carstens@de.ibm.com>
  */
 
+#include <linux/memory_hotplug.h>
 #include <linux/memblock.h>
 #include <linux/pfn.h>
 #include <linux/mm.h>
@@ -26,14 +27,14 @@ static void __ref *vmem_alloc_pages(unsigned int order)
 
        if (slab_is_available())
                return (void *)__get_free_pages(GFP_KERNEL, order);
-       return (void *) memblock_phys_alloc(size, size);
+       return memblock_alloc(size, size);
 }
 
 static void vmem_free_pages(unsigned long addr, int order)
 {
        /* We don't expect boot memory to be removed ever. */
        if (!slab_is_available() ||
-           WARN_ON_ONCE(PageReserved(phys_to_page(addr))))
+           WARN_ON_ONCE(PageReserved(virt_to_page(addr))))
                return;
        free_pages(addr, order);
 }
@@ -56,7 +57,7 @@ pte_t __ref *vmem_pte_alloc(void)
        if (slab_is_available())
                pte = (pte_t *) page_table_alloc(&init_mm);
        else
-               pte = (pte_t *) memblock_phys_alloc(size, size);
+               pte = (pte_t *) memblock_alloc(size, size);
        if (!pte)
                return NULL;
        memset64((u64 *)pte, _PAGE_INVALID, PTRS_PER_PTE);
@@ -84,7 +85,7 @@ static void vmemmap_flush_unused_sub_pmd(void)
 {
        if (!unused_sub_pmd_start)
                return;
-       memset(__va(unused_sub_pmd_start), PAGE_UNUSED,
+       memset((void *)unused_sub_pmd_start, PAGE_UNUSED,
               ALIGN(unused_sub_pmd_start, PMD_SIZE) - unused_sub_pmd_start);
        unused_sub_pmd_start = 0;
 }
@@ -97,7 +98,7 @@ static void vmemmap_mark_sub_pmd_used(unsigned long start, unsigned long end)
         * getting removed (just in case the memmap never gets initialized,
         * e.g., because the memory block never gets onlined).
         */
-       memset(__va(start), 0, sizeof(struct page));
+       memset((void *)start, 0, sizeof(struct page));
 }
 
 static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
@@ -118,7 +119,7 @@ static void vmemmap_use_sub_pmd(unsigned long start, unsigned long end)
 
 static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 {
-       void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+       unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
 
        vmemmap_flush_unused_sub_pmd();
 
@@ -127,7 +128,7 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 
        /* Mark the unused parts of the new memmap page PAGE_UNUSED. */
        if (!IS_ALIGNED(start, PMD_SIZE))
-               memset(page, PAGE_UNUSED, start - __pa(page));
+               memset((void *)page, PAGE_UNUSED, start - page);
        /*
         * We want to avoid memset(PAGE_UNUSED) when populating the vmemmap of
         * consecutive sections. Remember for the last added PMD the last
@@ -140,11 +141,11 @@ static void vmemmap_use_new_sub_pmd(unsigned long start, unsigned long end)
 /* Returns true if the PMD is completely unused and can be freed. */
 static bool vmemmap_unuse_sub_pmd(unsigned long start, unsigned long end)
 {
-       void *page = __va(ALIGN_DOWN(start, PMD_SIZE));
+       unsigned long page = ALIGN_DOWN(start, PMD_SIZE);
 
        vmemmap_flush_unused_sub_pmd();
-       memset(__va(start), PAGE_UNUSED, end - start);
-       return !memchr_inv(page, PAGE_UNUSED, PMD_SIZE);
+       memset((void *)start, PAGE_UNUSED, end - start);
+       return !memchr_inv((void *)page, PAGE_UNUSED, PMD_SIZE);
 }
 
 /* __ref: we'll only call vmemmap_alloc_block() via vmemmap_populate() */
@@ -165,7 +166,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
                        if (pte_none(*pte))
                                continue;
                        if (!direct)
-                               vmem_free_pages(pfn_to_phys(pte_pfn(*pte)), 0);
+                               vmem_free_pages((unsigned long) pfn_to_virt(pte_pfn(*pte)), 0);
                        pte_clear(&init_mm, addr, pte);
                } else if (pte_none(*pte)) {
                        if (!direct) {
@@ -175,7 +176,7 @@ static int __ref modify_pte_table(pmd_t *pmd, unsigned long addr,
                                        goto out;
                                pte_val(*pte) = __pa(new_page) | prot;
                        } else {
-                               pte_val(*pte) = addr | prot;
+                               pte_val(*pte) = __pa(addr) | prot;
                        }
                } else {
                        continue;
@@ -200,7 +201,7 @@ static void try_free_pte_table(pmd_t *pmd, unsigned long start)
                if (!pte_none(*pte))
                        return;
        }
-       vmem_pte_free(__va(pmd_deref(*pmd)));
+       vmem_pte_free((unsigned long *) pmd_deref(*pmd));
        pmd_clear(pmd);
 }
 
@@ -241,7 +242,7 @@ static int __ref modify_pmd_table(pud_t *pud, unsigned long addr,
                            IS_ALIGNED(next, PMD_SIZE) &&
                            MACHINE_HAS_EDAT1 && addr && direct &&
                            !debug_pagealloc_enabled()) {
-                               pmd_val(*pmd) = addr | prot;
+                               pmd_val(*pmd) = __pa(addr) | prot;
                                pages++;
                                continue;
                        } else if (!direct && MACHINE_HAS_EDAT1) {
@@ -337,7 +338,7 @@ static int modify_pud_table(p4d_t *p4d, unsigned long addr, unsigned long end,
                            IS_ALIGNED(next, PUD_SIZE) &&
                            MACHINE_HAS_EDAT2 && addr && direct &&
                            !debug_pagealloc_enabled()) {
-                               pud_val(*pud) = addr | prot;
+                               pud_val(*pud) = __pa(addr) | prot;
                                pages++;
                                continue;
                        }
@@ -532,11 +533,22 @@ void vmem_remove_mapping(unsigned long start, unsigned long size)
        mutex_unlock(&vmem_mutex);
 }
 
+struct range arch_get_mappable_range(void)
+{
+       struct range mhp_range;
+
+       mhp_range.start = 0;
+       mhp_range.end =  VMEM_MAX_PHYS - 1;
+       return mhp_range;
+}
+
 int vmem_add_mapping(unsigned long start, unsigned long size)
 {
+       struct range range = arch_get_mappable_range();
        int ret;
 
-       if (start + size > VMEM_MAX_PHYS ||
+       if (start < range.start ||
+           start + size > range.end + 1 ||
            start + size < start)
                return -ERANGE;
 
index 600881d..9106407 100644 (file)
@@ -682,16 +682,36 @@ int zpci_disable_device(struct zpci_dev *zdev)
 }
 EXPORT_SYMBOL_GPL(zpci_disable_device);
 
-void zpci_remove_device(struct zpci_dev *zdev)
+/* zpci_remove_device - Removes the given zdev from the PCI core
+ * @zdev: the zdev to be removed from the PCI core
+ * @set_error: if true the device's error state is set to permanent failure
+ *
+ * Sets a zPCI device to a configured but offline state; the zPCI
+ * device is still accessible through its hotplug slot and the zPCI
+ * API but is removed from the common code PCI bus, making it
+ * no longer available to drivers.
+ */
+void zpci_remove_device(struct zpci_dev *zdev, bool set_error)
 {
        struct zpci_bus *zbus = zdev->zbus;
        struct pci_dev *pdev;
 
+       if (!zdev->zbus->bus)
+               return;
+
        pdev = pci_get_slot(zbus->bus, zdev->devfn);
        if (pdev) {
-               if (pdev->is_virtfn)
-                       return zpci_iov_remove_virtfn(pdev, zdev->vfn);
+               if (set_error)
+                       pdev->error_state = pci_channel_io_perm_failure;
+               if (pdev->is_virtfn) {
+                       zpci_iov_remove_virtfn(pdev, zdev->vfn);
+                       /* balance pci_get_slot */
+                       pci_dev_put(pdev);
+                       return;
+               }
                pci_stop_and_remove_bus_device_locked(pdev);
+               /* balance pci_get_slot */
+               pci_dev_put(pdev);
        }
 }
 
@@ -765,7 +785,7 @@ void zpci_release_device(struct kref *kref)
        struct zpci_dev *zdev = container_of(kref, struct zpci_dev, kref);
 
        if (zdev->zbus->bus)
-               zpci_remove_device(zdev);
+               zpci_remove_device(zdev, false);
 
        switch (zdev->state) {
        case ZPCI_FN_STATE_ONLINE:
index b4162da..ac0c65c 100644 (file)
@@ -76,13 +76,10 @@ void zpci_event_error(void *data)
 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
 {
        struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid);
-       struct pci_dev *pdev = NULL;
        enum zpci_state state;
+       struct pci_dev *pdev;
        int ret;
 
-       if (zdev && zdev->zbus->bus)
-               pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn);
-
        zpci_err("avail CCDF:\n");
        zpci_err_hex(ccdf, sizeof(*ccdf));
 
@@ -124,8 +121,7 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
        case 0x0303: /* Deconfiguration requested */
                if (!zdev)
                        break;
-               if (pdev)
-                       zpci_remove_device(zdev);
+               zpci_remove_device(zdev, false);
 
                ret = zpci_disable_device(zdev);
                if (ret)
@@ -140,12 +136,10 @@ static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf)
        case 0x0304: /* Configured -> Standby|Reserved */
                if (!zdev)
                        break;
-               if (pdev) {
-                       /* Give the driver a hint that the function is
-                        * already unusable. */
-                       pdev->error_state = pci_channel_io_perm_failure;
-                       zpci_remove_device(zdev);
-               }
+               /* Give the driver a hint that the function is
+                * already unusable.
+                */
+               zpci_remove_device(zdev, true);
 
                zdev->fh = ccdf->fh;
                zpci_disable_device(zdev);
index 46d8ed9..0e207c4 100644 (file)
@@ -597,7 +597,7 @@ b9b3        cu42    RRE_RR
 b9bd   trtre   RRF_U0RR
 b9be   srstu   RRE_RR
 b9bf   trte    RRF_U0RR
-b9c0   selhhhr RRF_RURR
+b9c0   selfhr  RRF_RURR
 b9c8   ahhhr   RRF_R0RR2
 b9c9   shhhr   RRF_R0RR2
 b9ca   alhhhr  RRF_R0RR2
index 7ac847c..e798e55 100644 (file)
@@ -54,6 +54,7 @@ config SUPERH
        select HAVE_PERF_EVENTS
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_UID16
+       select HAVE_SOFTIRQ_ON_OWN_STACK if IRQSTACKS
        select HAVE_STACKPROTECTOR
        select HAVE_SYSCALL_TRACEPOINTS
        select IRQ_FORCED_THREADING
index 1c0da99..ff2200f 100644 (file)
@@ -27,11 +27,10 @@ static int openCnt;
 
 static int gio_open(struct inode *inode, struct file *filp)
 {
-       int minor;
+       int minor = iminor(inode);
        int ret = -ENOENT;
 
        preempt_disable();
-       minor = MINOR(inode->i_rdev);
        if (minor < DEVCOUNT) {
                if (openCnt > 0) {
                        ret = -EALREADY;
@@ -46,9 +45,8 @@ static int gio_open(struct inode *inode, struct file *filp)
 
 static int gio_close(struct inode *inode, struct file *filp)
 {
-       int minor;
+       int minor = iminor(inode);
 
-       minor = MINOR(inode->i_rdev);
        if (minor < DEVCOUNT) {
                openCnt--;
        }
index 02ba622..d77f54e 100644 (file)
@@ -102,7 +102,6 @@ CONFIG_NLS_UTF8=y
 CONFIG_PRINTK_TIME=y
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_SHIRQ=y
 CONFIG_DETECT_HUNG_TASK=y
index d00376e..6c719ab 100644 (file)
@@ -128,7 +128,6 @@ CONFIG_NLS_ISO8859_15=y
 CONFIG_NLS_UTF8=y
 # CONFIG_ENABLE_MUST_CHECK is not set
 CONFIG_MAGIC_SYSRQ=y
-CONFIG_UNUSED_SYMBOLS=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DETECT_HUNG_TASK=y
 # CONFIG_SCHED_DEBUG is not set
index 6d44c32..839551c 100644 (file)
@@ -51,7 +51,6 @@ asmlinkage int do_IRQ(unsigned int irq, struct pt_regs *regs);
 #ifdef CONFIG_IRQSTACKS
 extern void irq_ctx_init(int cpu);
 extern void irq_ctx_exit(int cpu);
-# define __ARCH_HAS_DO_SOFTIRQ
 #else
 # define irq_ctx_init(cpu) do { } while (0)
 # define irq_ctx_exit(cpu) do { } while (0)
index ab5f790..ef0f082 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/uaccess.h>
 #include <asm/thread_info.h>
 #include <cpu/mmu_context.h>
+#include <asm/softirq_stack.h>
 
 atomic_t irq_err_count;
 
index 80a5d1c..1aa508e 100644 (file)
@@ -114,7 +114,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp, unsigned long arg,
 
        childregs = task_pt_regs(p);
        p->thread.sp = (unsigned long) childregs;
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childregs, 0, sizeof(struct pt_regs));
                p->thread.pc = (unsigned long) ret_from_kernel_thread;
                childregs->regs[4] = arg;
index 659faef..285aaba 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -21,18 +21,19 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_abi_$(basetarget))'                \
                   '$(systbl_offset_$(basetarget))'
 
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-$(kapi)/syscall_table.h: $(syscall) $(systbl)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h
 kapisyshdr-y           += syscall_table.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 9df40ac..d08eeba 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index 2c1cee9..164a525 100644 (file)
@@ -96,6 +96,7 @@ config SPARC64
        select ARCH_HAS_PTE_SPECIAL
        select PCI_DOMAINS if PCI
        select ARCH_HAS_GIGANTIC_PAGE
+       select HAVE_SOFTIRQ_ON_OWN_STACK
 
 config ARCH_PROC_KCORE_TEXT
        def_bool y
@@ -175,7 +176,7 @@ config SMP
          Management" code will be disabled if you say Y here.
 
          See also <file:Documentation/admin-guide/lockup-watchdogs.rst> and the SMP-HOWTO
-         available at <http://www.tldp.org/docs.html#howto>.
+         available at <https://www.tldp.org/docs.html#howto>.
 
          If you don't know what to do here, say N.
 
index a7a38fb..6d74064 100644 (file)
@@ -154,6 +154,10 @@ static off_t get_hdrs_offset(int kernelfd, const char *filename)
                offset -= LOOKBACK;
                /* skip a.out header */
                offset += AOUT_TEXT_OFFSET;
+               if (offset < 0) {
+                       errno = -EINVAL;
+                       die("Calculated a negative offset, probably elftoaout generated an invalid image. Did you use a recent elftoaout ?");
+               }
                if (lseek(kernelfd, offset, SEEK_SET) < 0)
                        die("lseek");
                if (read(kernelfd, buffer, BUFSIZE) != BUFSIZE)
index d91eb6a..12a4fb0 100644 (file)
@@ -65,9 +65,8 @@ CONFIG_CDROM_PKTCDVD=m
 CONFIG_CDROM_PKTCDVD_WCACHE=y
 CONFIG_ATA_OVER_ETH=m
 CONFIG_SUNVDC=m
-CONFIG_IDE=y
-CONFIG_BLK_DEV_IDECD=y
-CONFIG_BLK_DEV_ALI15X3=y
+CONFIG_ATA=y
+CONFIG_PATA_ALI=y
 CONFIG_RAID_ATTRS=m
 CONFIG_SCSI=y
 CONFIG_BLK_DEV_SD=y
@@ -94,7 +93,7 @@ CONFIG_NETDEVICES=y
 CONFIG_NET_ETHERNET=y
 CONFIG_MII=m
 CONFIG_SUNLANCE=m
-CONFIG_HAPPYMEAL=m
+CONFIG_HAPPYMEAL=y
 CONFIG_SUNGEM=m
 CONFIG_SUNVNET=m
 CONFIG_LDMVSW=m
@@ -235,3 +234,7 @@ CONFIG_CRYPTO_TWOFISH=m
 CONFIG_CRC16=m
 CONFIG_LIBCRC32C=m
 CONFIG_VCC=m
+CONFIG_PATA_CMD64X=y
+CONFIG_IP_PNP=y
+CONFIG_IP_PNP_DHCP=y
+CONFIG_DEVTMPFS=y
index 8625946..597a229 100644 (file)
@@ -18,7 +18,7 @@
  *
  * When we spin, we try to use an operation that will cause the
  * current cpu strand to block, and therefore make the core fully
- * available to any other other runnable strands.  There are two
+ * available to any other runnable strands.  There are two
  * options, based upon cpu capabilities.
  *
  * On all cpus prior to SPARC-T4 we do three dummy reads of the
index c73b5a3..a53d744 100644 (file)
@@ -25,7 +25,7 @@ static inline unsigned long __xchg(unsigned long x, __volatile__ void * ptr, int
        return x;
 }
 
-#define xchg(ptr,x) ((__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr))))
+#define xchg(ptr,x) ({(__typeof__(*(ptr)))__xchg((unsigned long)(x),(ptr),sizeof(*(ptr)));})
 
 /* Emulate cmpxchg() the same way we emulate atomics,
  * by hashing the object address and indexing into an array
index 7e078bc..8fb09ee 100644 (file)
@@ -8,7 +8,6 @@
 
 #include <asm/ptrace.h>
 #include <asm/processor.h>
-#include <asm/extable_64.h>
 #include <asm/spitfire.h>
 #include <asm/adi.h>
 
similarity index 92%
rename from arch/sparc/include/asm/extable_64.h
rename to arch/sparc/include/asm/extable.h
index 5a01719..554a9dc 100644 (file)
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0 */
-#ifndef __ASM_EXTABLE64_H
-#define __ASM_EXTABLE64_H
+#ifndef __ASM_EXTABLE_H
+#define __ASM_EXTABLE_H
 /*
  * The exception table consists of pairs of addresses: the first is the
  * address of an instruction that is allowed to fault, and the second is
index 4d748e9..154df2c 100644 (file)
@@ -93,7 +93,6 @@ void arch_trigger_cpumask_backtrace(const struct cpumask *mask,
 
 extern void *hardirq_stack[NR_CPUS];
 extern void *softirq_stack[NR_CPUS];
-#define __ARCH_HAS_DO_SOFTIRQ
 
 #define NO_IRQ         0xffffffff
 
index f94532f..274217e 100644 (file)
@@ -57,35 +57,39 @@ static inline int sparc_validate_prot(unsigned long prot, unsigned long addr)
 {
        if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | PROT_ADI))
                return 0;
-       if (prot & PROT_ADI) {
-               if (!adi_capable())
-                       return 0;
+       return 1;
+}
 
-               if (addr) {
-                       struct vm_area_struct *vma;
+#define arch_validate_flags(vm_flags) arch_validate_flags(vm_flags)
+/* arch_validate_flags() - Ensure combination of flags is valid for a
+ *     VMA.
+ */
+static inline bool arch_validate_flags(unsigned long vm_flags)
+{
+       /* If ADI is being enabled on this VMA, check for ADI
+        * capability on the platform and ensure VMA is suitable
+        * for ADI
+        */
+       if (vm_flags & VM_SPARC_ADI) {
+               if (!adi_capable())
+                       return false;
 
-                       vma = find_vma(current->mm, addr);
-                       if (vma) {
-                               /* ADI can not be enabled on PFN
-                                * mapped pages
-                                */
-                               if (vma->vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
-                                       return 0;
+               /* ADI can not be enabled on PFN mapped pages */
+               if (vm_flags & (VM_PFNMAP | VM_MIXEDMAP))
+                       return false;
 
-                               /* Mergeable pages can become unmergeable
-                                * if ADI is enabled on them even if they
-                                * have identical data on them. This can be
-                                * because ADI enabled pages with identical
-                                * data may still not have identical ADI
-                                * tags on them. Disallow ADI on mergeable
-                                * pages.
-                                */
-                               if (vma->vm_flags & VM_MERGEABLE)
-                                       return 0;
-                       }
-               }
+               /* Mergeable pages can become unmergeable
+                * if ADI is enabled on them even if they
+                * have identical data on them. This can be
+                * because ADI enabled pages with identical
+                * data may still not have identical ADI
+                * tags on them. Disallow ADI on mergeable
+                * pages.
+                */
+               if (vm_flags & VM_MERGEABLE)
+                       return false;
        }
-       return 1;
+       return true;
 }
 #endif /* CONFIG_SPARC64 */
 
index 7708d01..6067925 100644 (file)
@@ -113,7 +113,7 @@ extern unsigned long last_valid_pfn;
 extern void *srmmu_nocache_pool;
 #define __nocache_pa(VADDR) (((unsigned long)VADDR) - SRMMU_NOCACHE_VADDR + __pa((unsigned long)srmmu_nocache_pool))
 #define __nocache_va(PADDR) (__va((unsigned long)PADDR) - (unsigned long)srmmu_nocache_pool + SRMMU_NOCACHE_VADDR)
-#define __nocache_fix(VADDR) __va(__nocache_pa(VADDR))
+#define __nocache_fix(VADDR) ((__typeof__(VADDR))__va(__nocache_pa(VADDR)))
 
 /* Accessing the MMU control register. */
 unsigned int srmmu_get_mmureg(void);
index 3c4bc21..b6242f7 100644 (file)
@@ -50,16 +50,12 @@ struct thread_struct {
        unsigned long   fsr;
        unsigned long   fpqdepth;
        struct fpq      fpqueue[16];
-       unsigned long flags;
        mm_segment_t current_ds;
 };
 
-#define SPARC_FLAG_KTHREAD      0x1    /* task is a kernel thread */
-#define SPARC_FLAG_UNALIGNED    0x2    /* is allowed to do unaligned accesses */
-
 #define INIT_THREAD  { \
-       .flags = SPARC_FLAG_KTHREAD, \
        .current_ds = KERNEL_DS, \
+       .kregs = (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \
 }
 
 /* Do necessary setup to start up a newly executed thread. */
index 827b73a..28f8108 100644 (file)
@@ -9,18 +9,6 @@
 #include <uapi/asm/signal.h>
 
 #ifndef __ASSEMBLY__
-/*
- * DJHR
- * SA_STATIC_ALLOC is used for the sparc32 system to indicate that this
- * interrupt handler's irq structure should be statically allocated
- * by the request_irq routine.
- * The alternative is that arch/sparc/kernel/irq.c has carnal knowledge
- * of interrupt usage and that sucks. Also without a flag like this
- * it may be possible for the free_irq routine to attempt to free
- * statically allocated data.. which is NOT GOOD.
- *
- */
-#define SA_STATIC_ALLOC         0x8000
 
 #define __ARCH_HAS_KA_RESTORER
 #define __ARCH_HAS_SA_RESTORER
index 42cd4cd..8047a9c 100644 (file)
@@ -118,6 +118,7 @@ struct thread_info {
        .task           =       &tsk,                   \
        .current_ds     =       ASI_P,                  \
        .preempt_count  =       INIT_PREEMPT_COUNT,     \
+       .kregs          =       (struct pt_regs *)(init_stack+THREAD_SIZE)-1 \
 }
 
 /* how to get the thread information struct from C */
index dd85bc2..3900942 100644 (file)
@@ -1,6 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef ___ASM_SPARC_UACCESS_H
 #define ___ASM_SPARC_UACCESS_H
+
+#include <asm/extable.h>
+
 #if defined(__sparc__) && defined(__arch64__)
 #include <asm/uaccess_64.h>
 #else
index 0a2d3eb..4a12346 100644 (file)
@@ -13,9 +13,6 @@
 
 #include <asm/processor.h>
 
-#define ARCH_HAS_SORT_EXTABLE
-#define ARCH_HAS_SEARCH_EXTABLE
-
 /* Sparc is not segmented, however we need to be able to fool access_ok()
  * when doing system calls from kernel mode legitimately.
  *
 #define __access_ok(addr, size) (__user_ok((addr) & get_fs().seg, (size)))
 #define access_ok(addr, size) __access_ok((unsigned long)(addr), size)
 
-/*
- * The exception table consists of pairs of addresses: the first is the
- * address of an instruction that is allowed to fault, and the second is
- * the address at which the program should continue.  No registers are
- * modified, so it is entirely up to the continuation code to figure out
- * what to do.
- *
- * All the routines below use bits of fixup code that are out of line
- * with the main instruction path.  This means when everything is well,
- * we don't even have to jump over them.  Further, they do not intrude
- * on our cache or tlb entries.
- *
- * There is a special way how to put a range of potentially faulting
- * insns (like twenty ldd/std's with now intervening other instructions)
- * You specify address of first in insn and 0 in fixup and in the next
- * exception_table_entry you specify last potentially faulting insn + 1
- * and in fixup the routine which should handle the fault.
- * That fixup code will get
- * (faulting_insn_address - first_insn_in_the_range_address)/4
- * in %g2 (ie. index of the faulting instruction in the range).
- */
-
-struct exception_table_entry
-{
-        unsigned long insn, fixup;
-};
-
-/* Returns 0 if exception not found and fixup otherwise.  */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2);
-
 /* Uh, these should become the main single-value transfer routines..
  * They automatically use the right size if we just have the right
  * pointer type..
@@ -252,12 +219,7 @@ static inline unsigned long __clear_user(void __user *addr, unsigned long size)
        unsigned long ret;
 
        __asm__ __volatile__ (
-               ".section __ex_table,#alloc\n\t"
-               ".align 4\n\t"
-               ".word 1f,3\n\t"
-               ".previous\n\t"
                "mov %2, %%o1\n"
-               "1:\n\t"
                "call __bzero\n\t"
                " mov %1, %%o0\n\t"
                "mov %%o0, %0\n"
index 698cf69..30eb4c6 100644 (file)
@@ -10,7 +10,6 @@
 #include <linux/string.h>
 #include <asm/asi.h>
 #include <asm/spitfire.h>
-#include <asm/extable_64.h>
 
 #include <asm/processor.h>
 
index d589402..a269ad2 100644 (file)
@@ -994,7 +994,7 @@ do_syscall:
        andcc   %l5, _TIF_SYSCALL_TRACE, %g0
        mov     %i4, %o4
        bne     linux_syscall_trace
-        mov    %i0, %l5
+        mov    %i0, %l6
 2:
        call    %l7
         mov    %i5, %o5
@@ -1003,16 +1003,15 @@ do_syscall:
        st      %o0, [%sp + STACKFRAME_SZ + PT_I0]
 
 ret_sys_call:
-       ld      [%curptr + TI_FLAGS], %l6
+       ld      [%curptr + TI_FLAGS], %l5
        cmp     %o0, -ERESTART_RESTARTBLOCK
        ld      [%sp + STACKFRAME_SZ + PT_PSR], %g3
        set     PSR_C, %g2
        bgeu    1f
-        andcc  %l6, _TIF_SYSCALL_TRACE, %g0
+        andcc  %l5, _TIF_SYSCALL_TRACE, %g0
 
        /* System call success, clear Carry condition code. */
        andn    %g3, %g2, %g3
-       clr     %l6
        st      %g3, [%sp + STACKFRAME_SZ + PT_PSR]     
        bne     linux_syscall_trace2
         ld     [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
@@ -1027,7 +1026,6 @@ ret_sys_call:
        sub     %g0, %o0, %o0
        or      %g3, %g2, %g3
        st      %o0, [%sp + STACKFRAME_SZ + PT_I0]
-       mov     1, %l6
        st      %g3, [%sp + STACKFRAME_SZ + PT_PSR]
        bne     linux_syscall_trace2
         ld     [%sp + STACKFRAME_SZ + PT_NPC], %l1 /* pc = npc */
index be30c8d..6044b82 100644 (file)
@@ -515,7 +515,7 @@ continue_boot:
 
                /* I want a kernel stack NOW! */
                set     init_thread_union, %g1
-               set     (THREAD_SIZE - STACKFRAME_SZ), %g2
+               set     (THREAD_SIZE - STACKFRAME_SZ - TRACEREG_SZ), %g2
                add     %g1, %g2, %sp
                mov     0, %fp                  /* And for good luck */
 
index c5ff247..72a5bdc 100644 (file)
@@ -706,7 +706,7 @@ tlb_fixup_done:
        wr      %g0, ASI_P, %asi
        mov     1, %g1
        sllx    %g1, THREAD_SHIFT, %g1
-       sub     %g1, (STACKFRAME_SZ + STACK_BIAS), %g1
+       sub     %g1, (STACKFRAME_SZ + STACK_BIAS + TRACEREG_SZ), %g1
        add     %g6, %g1, %sp
 
        /* Set per-cpu pointer initially to zero, this makes
index 3ec9f14..c8848bb 100644 (file)
@@ -42,6 +42,7 @@
 #include <asm/head.h>
 #include <asm/hypervisor.h>
 #include <asm/cacheflush.h>
+#include <asm/softirq_stack.h>
 
 #include "entry.h"
 #include "cpumap.h"
index bd48575..3a66e62 100644 (file)
@@ -50,6 +50,7 @@ static void led_blink(struct timer_list *unused)
        add_timer(&led_blink_timer);
 }
 
+#ifdef CONFIG_PROC_FS
 static int led_proc_show(struct seq_file *m, void *v)
 {
        if (get_auxio() & AUXIO_LED)
@@ -111,6 +112,7 @@ static const struct proc_ops led_proc_ops = {
        .proc_release   = single_release,
        .proc_write     = led_proc_write,
 };
+#endif
 
 static struct proc_dir_entry *led;
 
index 5d45b6d..9c2b720 100644 (file)
@@ -552,9 +552,8 @@ static void pci_of_scan_bus(struct pci_pbm_info *pbm,
                pci_info(bus, "scan_bus[%pOF] bus no %d\n",
                         node, bus->number);
 
-       child = NULL;
        prev_devfn = -1;
-       while ((child = of_get_next_child(node, child)) != NULL) {
+       for_each_child_of_node(node, child) {
                if (ofpci_verbose)
                        pci_info(bus, "  * %pOF\n", child);
                reg = of_get_property(child, "reg", &reglen);
index a023637..3b97949 100644 (file)
@@ -183,7 +183,7 @@ void exit_thread(struct task_struct *tsk)
 #ifndef CONFIG_SMP
        if (last_task_used_math == tsk) {
 #else
-       if (test_ti_thread_flag(task_thread_info(tsk), TIF_USEDFPU)) {
+       if (test_tsk_thread_flag(tsk, TIF_USEDFPU)) {
 #endif
                /* Keep process from leaving FPU in a bogon state. */
                put_psr(get_psr() | PSR_EF);
@@ -216,16 +216,6 @@ void flush_thread(void)
                clear_thread_flag(TIF_USEDFPU);
 #endif
        }
-
-       /* This task is no longer a kernel thread. */
-       if (current->thread.flags & SPARC_FLAG_KTHREAD) {
-               current->thread.flags &= ~SPARC_FLAG_KTHREAD;
-
-               /* We must fixup kregs as well. */
-               /* XXX This was not fixed for ti for a while, worked. Unused? */
-               current->thread.kregs = (struct pt_regs *)
-                   (task_stack_page(current) + (THREAD_SIZE - TRACEREG_SZ));
-       }
 }
 
 static inline struct sparc_stackf __user *
@@ -309,11 +299,10 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
        ti->ksp = (unsigned long) new_stack;
        p->thread.kregs = childregs;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                extern int nwindows;
                unsigned long psr;
                memset(new_stack, 0, STACKFRAME_SZ + TRACEREG_SZ);
-               p->thread.flags |= SPARC_FLAG_KTHREAD;
                p->thread.current_ds = KERNEL_DS;
                ti->kpc = (((unsigned long) ret_from_kernel_thread) - 0x8);
                childregs->u_regs[UREG_G1] = sp; /* function */
@@ -325,7 +314,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
        }
        memcpy(new_stack, (char *)regs - STACKFRAME_SZ, STACKFRAME_SZ + TRACEREG_SZ);
        childregs->u_regs[UREG_FP] = sp;
-       p->thread.flags &= ~SPARC_FLAG_KTHREAD;
        p->thread.current_ds = USER_DS;
        ti->kpc = (((unsigned long) ret_from_fork) - 0x8);
        ti->kpsr = current->thread.fork_kpsr | PSR_PIL;
index 6f8c782..7afd0a8 100644 (file)
@@ -597,7 +597,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
                                       sizeof(struct sparc_stackf));
        t->fpsaved[0] = 0;
 
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(child_trap_frame, 0, child_stack_sz);
                __thread_flag_byte_ptr(t)[TI_FLAG_BYTE_CWP] = 
                        (current_pt_regs()->tstate + 1) & TSTATE_CWP;
index dca8ed8..8931fe2 100644 (file)
@@ -75,7 +75,7 @@ signal_p:
         ld     [%sp + STACKFRAME_SZ + PT_PSR], %t_psr
 
        mov     %g2, %o2
-       mov     %l5, %o1
+       mov     %l6, %o1
        call    do_notify_resume
         add    %sp, STACKFRAME_SZ, %o0 ! pt_regs ptr
 
index eea43a1..c8e0dd9 100644 (file)
@@ -266,7 +266,6 @@ static __init void leon_patch(void)
 }
 
 struct tt_entry *sparc_ttable;
-static struct pt_regs fake_swapper_regs;
 
 /* Called from head_32.S - before we have setup anything
  * in the kernel. Be very careful with what you do here.
@@ -363,8 +362,6 @@ void __init setup_arch(char **cmdline_p)
                (*(linux_dbvec->teach_debugger))();
        }
 
-       init_task.thread.kregs = &fake_swapper_regs;
-
        /* Run-time patch instructions to match the cpu model */
        per_cpu_patch();
 
index d872441..48abee4 100644 (file)
@@ -165,8 +165,6 @@ extern int root_mountflags;
 
 char reboot_command[COMMAND_LINE_SIZE];
 
-static struct pt_regs fake_swapper_regs = { { 0, }, 0, 0, 0, 0 };
-
 static void __init per_cpu_patch(void)
 {
        struct cpuid_patch_entry *p;
@@ -661,8 +659,6 @@ void __init setup_arch(char **cmdline_p)
        rd_image_start = ram_flags & RAMDISK_IMAGE_START_MASK;
 #endif
 
-       task_thread_info(&init_task)->kregs = &fake_swapper_regs;
-
 #ifdef CONFIG_IP_PNP
        if (!ic_set_manually) {
                phandle chosen = prom_finddevice("/chosen");
index 11cf228..02f3ad5 100644 (file)
@@ -400,8 +400,8 @@ static int setup_rt_frame(struct ksignal *ksig, struct pt_regs *regs,
        else {
                regs->u_regs[UREG_I7] = (unsigned long)(&(sf->insns[0]) - 2);
 
-               /* mov __NR_sigreturn, %g1 */
-               err |= __put_user(0x821020d8, &sf->insns[0]);
+               /* mov __NR_rt_sigreturn, %g1 */
+               err |= __put_user(0x82102065, &sf->insns[0]);
 
                /* t 0x10 */
                err |= __put_user(0x91d02010, &sf->insns[1]);
index c22a21c..283f644 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -22,24 +22,24 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_offset_$(basetarget))'
 
 syshdr_abis_unistd_32 := common,32
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abis_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 systbl_abis_syscall_table_32 := common,32
-$(kapi)/syscall_table_32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_64 := common,64
-$(kapi)/syscall_table_64.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_64.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 systbl_abis_syscall_table_c32 := common,32
 systbl_abi_syscall_table_c32 := c32
-$(kapi)/syscall_table_c32.h: $(syscall) $(systbl)
+$(kapi)/syscall_table_c32.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h unistd_64.h
@@ -47,9 +47,10 @@ kapisyshdr-y         += syscall_table_32.h           \
                           syscall_table_64.h           \
                           syscall_table_c32.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 40d8c7c..84403a9 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index d92e5ea..a850dcc 100644 (file)
@@ -275,14 +275,13 @@ bool is_no_fault_exception(struct pt_regs *regs)
                        asi = (regs->tstate >> 24); /* saved %asi       */
                else
                        asi = (insn >> 5);          /* immediate asi    */
-               if ((asi & 0xf2) == ASI_PNF) {
-                       if (insn & 0x1000000) {     /* op3[5:4]=3       */
-                               handle_ldf_stq(insn, regs);
-                               return true;
-                       } else if (insn & 0x200000) { /* op3[2], stores */
+               if ((asi & 0xf6) == ASI_PNF) {
+                       if (insn & 0x200000)        /* op3[2], stores   */
                                return false;
-                       }
-                       handle_ld_nf(insn, regs);
+                       if (insn & 0x1000000)       /* op3[5:4]=3 (fp)  */
+                               handle_ldf_stq(insn, regs);
+                       else
+                               handle_ld_nf(insn, regs);
                        return true;
                }
        }
index 83db94c..ef5c520 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/uaccess.h>
 #include <linux/smp.h>
 #include <linux/perf_event.h>
+#include <linux/extable.h>
 
 #include <asm/setup.h>
 
@@ -213,10 +214,10 @@ static inline int ok_for_kernel(unsigned int insn)
 
 static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
 {
-       unsigned long g2 = regs->u_regs [UREG_G2];
-       unsigned long fixup = search_extables_range(regs->pc, &g2);
+       const struct exception_table_entry *entry;
 
-       if (!fixup) {
+       entry = search_exception_tables(regs->pc);
+       if (!entry) {
                unsigned long address = compute_effective_address(regs, insn);
                if(address < PAGE_SIZE) {
                        printk(KERN_ALERT "Unable to handle kernel NULL pointer dereference in mna handler");
@@ -232,9 +233,8 @@ static void kernel_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
                die_if_kernel("Oops", regs);
                /* Not reached */
        }
-       regs->pc = fixup;
+       regs->pc = entry->fixup;
        regs->npc = regs->pc + 4;
-       regs->u_regs [UREG_G2] = g2;
 }
 
 asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
@@ -274,103 +274,9 @@ asmlinkage void kernel_unaligned_trap(struct pt_regs *regs, unsigned int insn)
        }
 }
 
-static inline int ok_for_user(struct pt_regs *regs, unsigned int insn,
-                             enum direction dir)
-{
-       unsigned int reg;
-       int size = ((insn >> 19) & 3) == 3 ? 8 : 4;
-
-       if ((regs->pc | regs->npc) & 3)
-               return 0;
-
-       /* Must access_ok() in all the necessary places. */
-#define WINREG_ADDR(regnum) \
-       ((void __user *)(((unsigned long *)regs->u_regs[UREG_FP])+(regnum)))
-
-       reg = (insn >> 25) & 0x1f;
-       if (reg >= 16) {
-               if (!access_ok(WINREG_ADDR(reg - 16), size))
-                       return -EFAULT;
-       }
-       reg = (insn >> 14) & 0x1f;
-       if (reg >= 16) {
-               if (!access_ok(WINREG_ADDR(reg - 16), size))
-                       return -EFAULT;
-       }
-       if (!(insn & 0x2000)) {
-               reg = (insn & 0x1f);
-               if (reg >= 16) {
-                       if (!access_ok(WINREG_ADDR(reg - 16), size))
-                               return -EFAULT;
-               }
-       }
-#undef WINREG_ADDR
-       return 0;
-}
-
-static void user_mna_trap_fault(struct pt_regs *regs, unsigned int insn)
+asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
 {
        send_sig_fault(SIGBUS, BUS_ADRALN,
                       (void __user *)safe_compute_effective_address(regs, insn),
                       0, current);
 }
-
-asmlinkage void user_unaligned_trap(struct pt_regs *regs, unsigned int insn)
-{
-       enum direction dir;
-
-       if(!(current->thread.flags & SPARC_FLAG_UNALIGNED) ||
-          (((insn >> 30) & 3) != 3))
-               goto kill_user;
-       dir = decode_direction(insn);
-       if(!ok_for_user(regs, insn, dir)) {
-               goto kill_user;
-       } else {
-               int err, size = decode_access_size(insn);
-               unsigned long addr;
-
-               if(floating_point_load_or_store_p(insn)) {
-                       printk("User FPU load/store unaligned unsupported.\n");
-                       goto kill_user;
-               }
-
-               addr = compute_effective_address(regs, insn);
-               perf_sw_event(PERF_COUNT_SW_ALIGNMENT_FAULTS, 1, regs, addr);
-               switch(dir) {
-               case load:
-                       err = do_int_load(fetch_reg_addr(((insn>>25)&0x1f),
-                                                        regs),
-                                         size, (unsigned long *) addr,
-                                         decode_signedness(insn));
-                       break;
-
-               case store:
-                       err = do_int_store(((insn>>25)&0x1f), size,
-                                          (unsigned long *) addr, regs);
-                       break;
-
-               case both:
-                       /*
-                        * This was supported in 2.4. However, we question
-                        * the value of SWAP instruction across word boundaries.
-                        */
-                       printk("Unaligned SWAP unsupported.\n");
-                       err = -EFAULT;
-                       break;
-
-               default:
-                       unaligned_panic("Impossible user unaligned trap.");
-                       goto out;
-               }
-               if (err)
-                       goto kill_user;
-               else
-                       advance(regs);
-               goto out;
-       }
-
-kill_user:
-       user_mna_trap_fault(regs, insn);
-out:
-       ;
-}
index 7db5aab..e27afd2 100644 (file)
@@ -428,7 +428,7 @@ static int process_dreg_info(struct vio_driver_state *vio,
                             struct vio_dring_register *pkt)
 {
        struct vio_dring_state *dr;
-       int i, len;
+       int i;
 
        viodbg(HS, "GOT DRING_REG INFO ident[%llx] "
               "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n",
@@ -482,9 +482,7 @@ static int process_dreg_info(struct vio_driver_state *vio,
               pkt->num_descr, pkt->descr_size, pkt->options,
               pkt->num_cookies);
 
-       len = (sizeof(*pkt) +
-              (dr->ncookies * sizeof(struct ldc_trans_cookie)));
-       if (send_ctrl(vio, &pkt->tag, len) < 0)
+       if (send_ctrl(vio, &pkt->tag, struct_size(pkt, cookies, dr->ncookies)) < 0)
                goto send_nack;
 
        vio->dr_state |= VIO_DR_STATE_RXREG;
index 7488d13..781e39b 100644 (file)
@@ -155,13 +155,6 @@ cpout:     retl                                            ! get outta here
         .text;                                  \
         .align  4
 
-#define EXT(start,end)                         \
-        .section __ex_table,ALLOC;             \
-        .align  4;                              \
-        .word   start, 0, end, cc_fault;         \
-        .text;                                  \
-        .align  4
-
        /* This aligned version executes typically in 8.5 superscalar cycles, this
         * is the best I can do.  I say 8.5 because the final add will pair with
         * the next ldd in the main unrolled loop.  Thus the pipe is always full.
@@ -169,20 +162,20 @@ cpout:    retl                                            ! get outta here
         * please check the fixup code below as well.
         */
 #define CSUMCOPY_BIGCHUNK_ALIGNED(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)  \
-       ldd     [src + off + 0x00], t0;                                                 \
-       ldd     [src + off + 0x08], t2;                                                 \
+       EX(ldd  [src + off + 0x00], t0);                                                \
+       EX(ldd  [src + off + 0x08], t2);                                                \
        addxcc  t0, sum, sum;                                                           \
-       ldd     [src + off + 0x10], t4;                                                 \
+       EX(ldd  [src + off + 0x10], t4);                                                \
        addxcc  t1, sum, sum;                                                           \
-       ldd     [src + off + 0x18], t6;                                                 \
+       EX(ldd  [src + off + 0x18], t6);                                                \
        addxcc  t2, sum, sum;                                                           \
-       std     t0, [dst + off + 0x00];                                                 \
+       EX(std  t0, [dst + off + 0x00]);                                                \
        addxcc  t3, sum, sum;                                                           \
-       std     t2, [dst + off + 0x08];                                                 \
+       EX(std  t2, [dst + off + 0x08]);                                                \
        addxcc  t4, sum, sum;                                                           \
-       std     t4, [dst + off + 0x10];                                                 \
+       EX(std  t4, [dst + off + 0x10]);                                                \
        addxcc  t5, sum, sum;                                                           \
-       std     t6, [dst + off + 0x18];                                                 \
+       EX(std  t6, [dst + off + 0x18]);                                                \
        addxcc  t6, sum, sum;                                                           \
        addxcc  t7, sum, sum;
 
@@ -191,39 +184,39 @@ cpout:    retl                                            ! get outta here
         * Viking MXCC into streaming mode.  Ho hum...
         */
 #define CSUMCOPY_BIGCHUNK(src, dst, sum, off, t0, t1, t2, t3, t4, t5, t6, t7)  \
-       ldd     [src + off + 0x00], t0;                                         \
-       ldd     [src + off + 0x08], t2;                                         \
-       ldd     [src + off + 0x10], t4;                                         \
-       ldd     [src + off + 0x18], t6;                                         \
-       st      t0, [dst + off + 0x00];                                         \
+       EX(ldd  [src + off + 0x00], t0);                                        \
+       EX(ldd  [src + off + 0x08], t2);                                        \
+       EX(ldd  [src + off + 0x10], t4);                                        \
+       EX(ldd  [src + off + 0x18], t6);                                        \
+       EX(st   t0, [dst + off + 0x00]);                                        \
        addxcc  t0, sum, sum;                                                   \
-       st      t1, [dst + off + 0x04];                                         \
+       EX(st   t1, [dst + off + 0x04]);                                        \
        addxcc  t1, sum, sum;                                                   \
-       st      t2, [dst + off + 0x08];                                         \
+       EX(st   t2, [dst + off + 0x08]);                                        \
        addxcc  t2, sum, sum;                                                   \
-       st      t3, [dst + off + 0x0c];                                         \
+       EX(st   t3, [dst + off + 0x0c]);                                        \
        addxcc  t3, sum, sum;                                                   \
-       st      t4, [dst + off + 0x10];                                         \
+       EX(st   t4, [dst + off + 0x10]);                                        \
        addxcc  t4, sum, sum;                                                   \
-       st      t5, [dst + off + 0x14];                                         \
+       EX(st   t5, [dst + off + 0x14]);                                        \
        addxcc  t5, sum, sum;                                                   \
-       st      t6, [dst + off + 0x18];                                         \
+       EX(st   t6, [dst + off + 0x18]);                                        \
        addxcc  t6, sum, sum;                                                   \
-       st      t7, [dst + off + 0x1c];                                         \
+       EX(st   t7, [dst + off + 0x1c]);                                        \
        addxcc  t7, sum, sum;
 
        /* Yuck, 6 superscalar cycles... */
 #define CSUMCOPY_LASTCHUNK(src, dst, sum, off, t0, t1, t2, t3) \
-       ldd     [src - off - 0x08], t0;                         \
-       ldd     [src - off - 0x00], t2;                         \
+       EX(ldd  [src - off - 0x08], t0);                        \
+       EX(ldd  [src - off - 0x00], t2);                        \
        addxcc  t0, sum, sum;                                   \
-       st      t0, [dst - off - 0x08];                         \
+       EX(st   t0, [dst - off - 0x08]);                        \
        addxcc  t1, sum, sum;                                   \
-       st      t1, [dst - off - 0x04];                         \
+       EX(st   t1, [dst - off - 0x04]);                        \
        addxcc  t2, sum, sum;                                   \
-       st      t2, [dst - off - 0x00];                         \
+       EX(st   t2, [dst - off - 0x00]);                        \
        addxcc  t3, sum, sum;                                   \
-       st      t3, [dst - off + 0x04];
+       EX(st   t3, [dst - off + 0x04]);
 
        /* Handle the end cruft code out of band for better cache patterns. */
 cc_end_cruft:
@@ -331,7 +324,6 @@ __csum_partial_copy_sparc_generic:
        CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
        CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
        CSUMCOPY_BIGCHUNK(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-10:    EXT(5b, 10b)                    ! note for exception handling
        sub     %g1, 128, %g1           ! detract from length
        addx    %g0, %g7, %g7           ! add in last carry bit
        andcc   %g1, 0xffffff80, %g0    ! more to csum?
@@ -356,8 +348,7 @@ cctbl:      CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x68,%g2,%g3,%g4,%g5)
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x28,%g2,%g3,%g4,%g5)
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x18,%g2,%g3,%g4,%g5)
        CSUMCOPY_LASTCHUNK(%o0,%o1,%g7,0x08,%g2,%g3,%g4,%g5)
-12:    EXT(cctbl, 12b)                 ! note for exception table handling
-       addx    %g0, %g7, %g7
+12:    addx    %g0, %g7, %g7
        andcc   %o3, 0xf, %g0           ! check for low bits set
 ccte:  bne     cc_end_cruft            ! something left, handle it out of band
         andcc  %o3, 8, %g0             ! begin checks for that code
@@ -367,7 +358,6 @@ ccdbl:      CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x00,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o
        CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x20,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
        CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x40,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
        CSUMCOPY_BIGCHUNK_ALIGNED(%o0,%o1,%g7,0x60,%o4,%o5,%g2,%g3,%g4,%g5,%o2,%o3)
-11:    EXT(ccdbl, 11b)                 ! note for exception table handling
        sub     %g1, 128, %g1           ! detract from length
        addx    %g0, %g7, %g7           ! add in last carry bit
        andcc   %g1, 0xffffff80, %g0    ! more to csum?
index dc72f2b..954572c 100644 (file)
 /* Work around cpp -rob */
 #define ALLOC #alloc
 #define EXECINSTR #execinstr
+
+#define EX_ENTRY(l1, l2)                       \
+       .section __ex_table,ALLOC;              \
+       .align  4;                              \
+       .word   l1, l2;                         \
+       .text;
+
 #define EX(x,y,a,b)                            \
 98:    x,y;                                    \
        .section .fixup,ALLOC,EXECINSTR;        \
        .align  4;                              \
-99:    ba fixupretl;                           \
-        a, b, %g3;                             \
-       .section __ex_table,ALLOC;              \
-       .align  4;                              \
-       .word   98b, 99b;                       \
-       .text;                                  \
-       .align  4
+99:    retl;                                   \
+        a, b, %o0;                             \
+       EX_ENTRY(98b, 99b)
 
 #define EX2(x,y,c,d,e,a,b)                     \
 98:    x,y;                                    \
        .section .fixup,ALLOC,EXECINSTR;        \
        .align  4;                              \
 99:    c, d, e;                                \
-       ba fixupretl;                           \
-        a, b, %g3;                             \
-       .section __ex_table,ALLOC;              \
-       .align  4;                              \
-       .word   98b, 99b;                       \
-       .text;                                  \
-       .align  4
+       retl;                                   \
+        a, b, %o0;                             \
+       EX_ENTRY(98b, 99b)
 
 #define EXO2(x,y)                              \
 98:    x, y;                                   \
-       .section __ex_table,ALLOC;              \
-       .align  4;                              \
-       .word   98b, 97f;                       \
-       .text;                                  \
-       .align  4
+       EX_ENTRY(98b, 97f)
 
-#define EXT(start,end,handler)                 \
-       .section __ex_table,ALLOC;              \
-       .align  4;                              \
-       .word   start, 0, end, handler;         \
-       .text;                                  \
-       .align  4
+#define LD(insn, src, offset, reg, label)      \
+98:    insn [%src + (offset)], %reg;           \
+       .section .fixup,ALLOC,EXECINSTR;        \
+99:    ba      label;                          \
+        mov    offset, %g5;                    \
+       EX_ENTRY(98b, 99b)
 
-/* Please do not change following macros unless you change logic used
- * in .fixup at the end of this file as well
- */
+#define ST(insn, dst, offset, reg, label)      \
+98:    insn %reg, [%dst + (offset)];           \
+       .section .fixup,ALLOC,EXECINSTR;        \
+99:    ba      label;                          \
+        mov    offset, %g5;                    \
+       EX_ENTRY(98b, 99b)
 
 /* Both these macros have to start with exactly the same insn */
+/* left: g7 + (g1 % 128) - offset */
 #define MOVE_BIGCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-       ldd     [%src + (offset) + 0x00], %t0; \
-       ldd     [%src + (offset) + 0x08], %t2; \
-       ldd     [%src + (offset) + 0x10], %t4; \
-       ldd     [%src + (offset) + 0x18], %t6; \
-       st      %t0, [%dst + (offset) + 0x00]; \
-       st      %t1, [%dst + (offset) + 0x04]; \
-       st      %t2, [%dst + (offset) + 0x08]; \
-       st      %t3, [%dst + (offset) + 0x0c]; \
-       st      %t4, [%dst + (offset) + 0x10]; \
-       st      %t5, [%dst + (offset) + 0x14]; \
-       st      %t6, [%dst + (offset) + 0x18]; \
-       st      %t7, [%dst + (offset) + 0x1c];
-
+       LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \
+       LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \
+       LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \
+       LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \
+       ST(st, dst, offset + 0x00, t0, bigchunk_fault)  \
+       ST(st, dst, offset + 0x04, t1, bigchunk_fault)  \
+       ST(st, dst, offset + 0x08, t2, bigchunk_fault)  \
+       ST(st, dst, offset + 0x0c, t3, bigchunk_fault)  \
+       ST(st, dst, offset + 0x10, t4, bigchunk_fault)  \
+       ST(st, dst, offset + 0x14, t5, bigchunk_fault)  \
+       ST(st, dst, offset + 0x18, t6, bigchunk_fault)  \
+       ST(st, dst, offset + 0x1c, t7, bigchunk_fault)
+
+/* left: g7 + (g1 % 128) - offset */
 #define MOVE_BIGALIGNCHUNK(src, dst, offset, t0, t1, t2, t3, t4, t5, t6, t7) \
-       ldd     [%src + (offset) + 0x00], %t0; \
-       ldd     [%src + (offset) + 0x08], %t2; \
-       ldd     [%src + (offset) + 0x10], %t4; \
-       ldd     [%src + (offset) + 0x18], %t6; \
-       std     %t0, [%dst + (offset) + 0x00]; \
-       std     %t2, [%dst + (offset) + 0x08]; \
-       std     %t4, [%dst + (offset) + 0x10]; \
-       std     %t6, [%dst + (offset) + 0x18];
+       LD(ldd, src, offset + 0x00, t0, bigchunk_fault) \
+       LD(ldd, src, offset + 0x08, t2, bigchunk_fault) \
+       LD(ldd, src, offset + 0x10, t4, bigchunk_fault) \
+       LD(ldd, src, offset + 0x18, t6, bigchunk_fault) \
+       ST(std, dst, offset + 0x00, t0, bigchunk_fault) \
+       ST(std, dst, offset + 0x08, t2, bigchunk_fault) \
+       ST(std, dst, offset + 0x10, t4, bigchunk_fault) \
+       ST(std, dst, offset + 0x18, t6, bigchunk_fault)
 
+       .section .fixup,#alloc,#execinstr
+bigchunk_fault:
+       sub     %g7, %g5, %o0
+       and     %g1, 127, %g1
+       retl
+        add    %o0, %g1, %o0
+
+/* left: offset + 16 + (g1 % 16) */
 #define MOVE_LASTCHUNK(src, dst, offset, t0, t1, t2, t3) \
-       ldd     [%src - (offset) - 0x10], %t0; \
-       ldd     [%src - (offset) - 0x08], %t2; \
-       st      %t0, [%dst - (offset) - 0x10]; \
-       st      %t1, [%dst - (offset) - 0x0c]; \
-       st      %t2, [%dst - (offset) - 0x08]; \
-       st      %t3, [%dst - (offset) - 0x04];
+       LD(ldd, src, -(offset + 0x10), t0, lastchunk_fault)     \
+       LD(ldd, src, -(offset + 0x08), t2, lastchunk_fault)     \
+       ST(st, dst, -(offset + 0x10), t0, lastchunk_fault)      \
+       ST(st, dst, -(offset + 0x0c), t1, lastchunk_fault)      \
+       ST(st, dst, -(offset + 0x08), t2, lastchunk_fault)      \
+       ST(st, dst, -(offset + 0x04), t3, lastchunk_fault)
 
-#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
-       lduh    [%src + (offset) + 0x00], %t0; \
-       lduh    [%src + (offset) + 0x02], %t1; \
-       lduh    [%src + (offset) + 0x04], %t2; \
-       lduh    [%src + (offset) + 0x06], %t3; \
-       sth     %t0, [%dst + (offset) + 0x00]; \
-       sth     %t1, [%dst + (offset) + 0x02]; \
-       sth     %t2, [%dst + (offset) + 0x04]; \
-       sth     %t3, [%dst + (offset) + 0x06];
+       .section .fixup,#alloc,#execinstr
+lastchunk_fault:
+       and     %g1, 15, %g1
+       retl
+        sub    %g1, %g5, %o0
 
+/* left: o3 + (o2 % 16) - offset */
+#define MOVE_HALFCHUNK(src, dst, offset, t0, t1, t2, t3) \
+       LD(lduh, src, offset + 0x00, t0, halfchunk_fault)       \
+       LD(lduh, src, offset + 0x02, t1, halfchunk_fault)       \
+       LD(lduh, src, offset + 0x04, t2, halfchunk_fault)       \
+       LD(lduh, src, offset + 0x06, t3, halfchunk_fault)       \
+       ST(sth, dst, offset + 0x00, t0, halfchunk_fault)        \
+       ST(sth, dst, offset + 0x02, t1, halfchunk_fault)        \
+       ST(sth, dst, offset + 0x04, t2, halfchunk_fault)        \
+       ST(sth, dst, offset + 0x06, t3, halfchunk_fault)
+
+/* left: o3 + (o2 % 16) + offset + 2 */
 #define MOVE_SHORTCHUNK(src, dst, offset, t0, t1) \
-       ldub    [%src - (offset) - 0x02], %t0; \
-       ldub    [%src - (offset) - 0x01], %t1; \
-       stb     %t0, [%dst - (offset) - 0x02]; \
-       stb     %t1, [%dst - (offset) - 0x01];
+       LD(ldub, src, -(offset + 0x02), t0, halfchunk_fault)    \
+       LD(ldub, src, -(offset + 0x01), t1, halfchunk_fault)    \
+       ST(stb, dst, -(offset + 0x02), t0, halfchunk_fault)     \
+       ST(stb, dst, -(offset + 0x01), t1, halfchunk_fault)
+
+       .section .fixup,#alloc,#execinstr
+halfchunk_fault:
+       and     %o2, 15, %o2
+       sub     %o3, %g5, %o3
+       retl
+        add    %o2, %o3, %o0
+
+/* left: offset + 2 + (o2 % 2) */
+#define MOVE_LAST_SHORTCHUNK(src, dst, offset, t0, t1) \
+       LD(ldub, src, -(offset + 0x02), t0, last_shortchunk_fault)      \
+       LD(ldub, src, -(offset + 0x01), t1, last_shortchunk_fault)      \
+       ST(stb, dst, -(offset + 0x02), t0, last_shortchunk_fault)       \
+       ST(stb, dst, -(offset + 0x01), t1, last_shortchunk_fault)
+
+       .section .fixup,#alloc,#execinstr
+last_shortchunk_fault:
+       and     %o2, 1, %o2
+       retl
+        sub    %o2, %g5, %o0
 
        .text
        .align  4
@@ -182,8 +218,6 @@ __copy_user:        /* %o0=dst %o1=src %o2=len */
        MOVE_BIGCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
        MOVE_BIGCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
        MOVE_BIGCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-80:
-       EXT(5b, 80b, 50f)
        subcc   %g7, 128, %g7
        add     %o1, 128, %o1
        bne     5b
@@ -201,7 +235,6 @@ __copy_user:        /* %o0=dst %o1=src %o2=len */
        jmpl    %o5 + %lo(copy_user_table_end), %g0
         add    %o0, %g7, %o0
 
-copy_user_table:
        MOVE_LASTCHUNK(o1, o0, 0x60, g2, g3, g4, g5)
        MOVE_LASTCHUNK(o1, o0, 0x50, g2, g3, g4, g5)
        MOVE_LASTCHUNK(o1, o0, 0x40, g2, g3, g4, g5)
@@ -210,7 +243,6 @@ copy_user_table:
        MOVE_LASTCHUNK(o1, o0, 0x10, g2, g3, g4, g5)
        MOVE_LASTCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
 copy_user_table_end:
-       EXT(copy_user_table, copy_user_table_end, 51f)
        be      copy_user_last7
         andcc  %g1, 4, %g0
 
@@ -250,8 +282,6 @@ ldd_std:
        MOVE_BIGALIGNCHUNK(o1, o0, 0x20, o2, o3, o4, o5, g2, g3, g4, g5)
        MOVE_BIGALIGNCHUNK(o1, o0, 0x40, o2, o3, o4, o5, g2, g3, g4, g5)
        MOVE_BIGALIGNCHUNK(o1, o0, 0x60, o2, o3, o4, o5, g2, g3, g4, g5)
-81:
-       EXT(ldd_std, 81b, 52f)
        subcc   %g7, 128, %g7
        add     %o1, 128, %o1
        bne     ldd_std
@@ -290,8 +320,6 @@ cannot_optimize:
 10:
        MOVE_HALFCHUNK(o1, o0, 0x00, g2, g3, g4, g5)
        MOVE_HALFCHUNK(o1, o0, 0x08, g2, g3, g4, g5)
-82:
-       EXT(10b, 82b, 53f)
        subcc   %o3, 0x10, %o3
        add     %o1, 0x10, %o1
        bne     10b
@@ -308,8 +336,6 @@ byte_chunk:
        MOVE_SHORTCHUNK(o1, o0, -0x0c, g2, g3)
        MOVE_SHORTCHUNK(o1, o0, -0x0e, g2, g3)
        MOVE_SHORTCHUNK(o1, o0, -0x10, g2, g3)
-83:
-       EXT(byte_chunk, 83b, 54f)
        subcc   %o3, 0x10, %o3
        add     %o1, 0x10, %o1
        bne     byte_chunk
@@ -325,16 +351,14 @@ short_end:
        add     %o1, %o3, %o1
        jmpl    %o5 + %lo(short_table_end), %g0
         andcc  %o2, 1, %g0
-84:
-       MOVE_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
-       MOVE_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
-       MOVE_SHORTCHUNK(o1, o0, 0x08, g2, g3)
-       MOVE_SHORTCHUNK(o1, o0, 0x06, g2, g3)
-       MOVE_SHORTCHUNK(o1, o0, 0x04, g2, g3)
-       MOVE_SHORTCHUNK(o1, o0, 0x02, g2, g3)
-       MOVE_SHORTCHUNK(o1, o0, 0x00, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x0c, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x0a, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x08, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x06, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x04, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x02, g2, g3)
+       MOVE_LAST_SHORTCHUNK(o1, o0, 0x00, g2, g3)
 short_table_end:
-       EXT(84b, short_table_end, 55f)
        be      1f
         nop
        EX(ldub [%o1], %g2, add %g0, 1)
@@ -363,123 +387,8 @@ short_aligned_end:
        .section .fixup,#alloc,#execinstr
        .align  4
 97:
-       mov     %o2, %g3
-fixupretl:
        retl
-        mov    %g3, %o0
-
-/* exception routine sets %g2 to (broken_insn - first_insn)>>2 */
-50:
-/* This magic counts how many bytes are left when crash in MOVE_BIGCHUNK
- * happens. This is derived from the amount ldd reads, st stores, etc.
- * x = g2 % 12;
- * g3 = g1 + g7 - ((g2 / 12) * 32 + (x < 4) ? 0 : (x - 4) * 4);
- * o0 += (g2 / 12) * 32;
- */
-       cmp     %g2, 12
-       add     %o0, %g7, %o0
-       bcs     1f
-        cmp    %g2, 24
-       bcs     2f
-        cmp    %g2, 36
-       bcs     3f
-        nop
-       sub     %g2, 12, %g2
-       sub     %g7, 32, %g7
-3:     sub     %g2, 12, %g2
-       sub     %g7, 32, %g7
-2:     sub     %g2, 12, %g2
-       sub     %g7, 32, %g7
-1:     cmp     %g2, 4
-       bcs,a   60f
-        clr    %g2
-       sub     %g2, 4, %g2
-       sll     %g2, 2, %g2
-60:    and     %g1, 0x7f, %g3
-       sub     %o0, %g7, %o0
-       add     %g3, %g7, %g3
-       ba      fixupretl
-        sub    %g3, %g2, %g3
-51:
-/* i = 41 - g2; j = i % 6;
- * g3 = (g1 & 15) + (i / 6) * 16 + (j < 4) ? (j + 1) * 4 : 16;
- * o0 -= (i / 6) * 16 + 16;
- */
-       neg     %g2
-       and     %g1, 0xf, %g1
-       add     %g2, 41, %g2
-       add     %o0, %g1, %o0
-1:     cmp     %g2, 6
-       bcs,a   2f
-        cmp    %g2, 4
-       add     %g1, 16, %g1
-       b       1b
-        sub    %g2, 6, %g2
-2:     bcc,a   2f
-        mov    16, %g2
-       inc     %g2
-       sll     %g2, 2, %g2
-2:     add     %g1, %g2, %g3
-       ba      fixupretl
-        sub    %o0, %g3, %o0
-52:
-/* g3 = g1 + g7 - (g2 / 8) * 32 + (g2 & 4) ? (g2 & 3) * 8 : 0;
-   o0 += (g2 / 8) * 32 */
-       andn    %g2, 7, %g4
-       add     %o0, %g7, %o0
-       andcc   %g2, 4, %g0
-       and     %g2, 3, %g2
-       sll     %g4, 2, %g4
-       sll     %g2, 3, %g2
-       bne     60b
-        sub    %g7, %g4, %g7
-       ba      60b
-        clr    %g2
-53:
-/* g3 = o3 + (o2 & 15) - (g2 & 8) - (g2 & 4) ? (g2 & 3) * 2 : 0;
-   o0 += (g2 & 8) */
-       and     %g2, 3, %g4
-       andcc   %g2, 4, %g0
-       and     %g2, 8, %g2
-       sll     %g4, 1, %g4
-       be      1f
-        add    %o0, %g2, %o0
-       add     %g2, %g4, %g2
-1:     and     %o2, 0xf, %g3
-       add     %g3, %o3, %g3
-       ba      fixupretl
-        sub    %g3, %g2, %g3
-54:
-/* g3 = o3 + (o2 & 15) - (g2 / 4) * 2 - (g2 & 2) ? (g2 & 1) : 0;
-   o0 += (g2 / 4) * 2 */
-       srl     %g2, 2, %o4
-       and     %g2, 1, %o5
-       srl     %g2, 1, %g2
-       add     %o4, %o4, %o4
-       and     %o5, %g2, %o5
-       and     %o2, 0xf, %o2
-       add     %o0, %o4, %o0
-       sub     %o3, %o5, %o3
-       sub     %o2, %o4, %o2
-       ba      fixupretl
-        add    %o2, %o3, %g3
-55:
-/* i = 27 - g2;
-   g3 = (o2 & 1) + i / 4 * 2 + !(i & 3);
-   o0 -= i / 4 * 2 + 1 */
-       neg     %g2
-       and     %o2, 1, %o2
-       add     %g2, 27, %g2
-       srl     %g2, 2, %o5
-       andcc   %g2, 3, %g0
-       mov     1, %g2
-       add     %o5, %o5, %o5
-       be,a    1f
-        clr    %g2
-1:     add     %g2, %o5, %g3
-       sub     %o0, %g3, %o0
-       ba      fixupretl
-        add    %g3, %o2, %g3
+        mov    %o2, %o0
 
        .globl  __copy_user_end
 __copy_user_end:
index b89d42b..eaff682 100644 (file)
@@ -19,7 +19,7 @@
 98:    x,y;                                    \
        .section .fixup,ALLOC,EXECINSTR;        \
        .align  4;                              \
-99:    ba 30f;                                 \
+99:    retl;                                   \
         a, b, %o0;                             \
        .section __ex_table,ALLOC;              \
        .align  4;                              \
        .text;                                  \
        .align  4
 
-#define EXT(start,end,handler)                         \
+#define STORE(source, base, offset, n)         \
+98:    std source, [base + offset + n];        \
+       .section .fixup,ALLOC,EXECINSTR;        \
+       .align  4;                              \
+99:    ba 30f;                                 \
+        sub %o3, n - offset, %o3;              \
        .section __ex_table,ALLOC;              \
        .align  4;                              \
-       .word   start, 0, end, handler;         \
+       .word   98b, 99b;                       \
        .text;                                  \
-       .align  4
+       .align  4;
+
+#define STORE_LAST(source, base, offset, n)    \
+       EX(std source, [base - offset - n],     \
+          add %o1, offset + n);
 
 /* Please don't change these macros, unless you change the logic
  * in the .fixup section below as well.
  * Store 64 bytes at (BASE + OFFSET) using value SOURCE. */
-#define ZERO_BIG_BLOCK(base, offset, source)    \
-       std     source, [base + offset + 0x00]; \
-       std     source, [base + offset + 0x08]; \
-       std     source, [base + offset + 0x10]; \
-       std     source, [base + offset + 0x18]; \
-       std     source, [base + offset + 0x20]; \
-       std     source, [base + offset + 0x28]; \
-       std     source, [base + offset + 0x30]; \
-       std     source, [base + offset + 0x38];
+#define ZERO_BIG_BLOCK(base, offset, source)   \
+       STORE(source, base, offset, 0x00);      \
+       STORE(source, base, offset, 0x08);      \
+       STORE(source, base, offset, 0x10);      \
+       STORE(source, base, offset, 0x18);      \
+       STORE(source, base, offset, 0x20);      \
+       STORE(source, base, offset, 0x28);      \
+       STORE(source, base, offset, 0x30);      \
+       STORE(source, base, offset, 0x38);
 
 #define ZERO_LAST_BLOCKS(base, offset, source) \
-       std     source, [base - offset - 0x38]; \
-       std     source, [base - offset - 0x30]; \
-       std     source, [base - offset - 0x28]; \
-       std     source, [base - offset - 0x20]; \
-       std     source, [base - offset - 0x18]; \
-       std     source, [base - offset - 0x10]; \
-       std     source, [base - offset - 0x08]; \
-       std     source, [base - offset - 0x00];
+       STORE_LAST(source, base, offset, 0x38); \
+       STORE_LAST(source, base, offset, 0x30); \
+       STORE_LAST(source, base, offset, 0x28); \
+       STORE_LAST(source, base, offset, 0x20); \
+       STORE_LAST(source, base, offset, 0x18); \
+       STORE_LAST(source, base, offset, 0x10); \
+       STORE_LAST(source, base, offset, 0x08); \
+       STORE_LAST(source, base, offset, 0x00);
 
        .text
        .align 4
@@ -68,8 +77,6 @@ __bzero_begin:
        .globl  memset
        EXPORT_SYMBOL(__bzero)
        EXPORT_SYMBOL(memset)
-       .globl  __memset_start, __memset_end
-__memset_start:
 memset:
        mov     %o0, %g1
        mov     1, %g4
@@ -122,8 +129,6 @@ __bzero:
        ZERO_BIG_BLOCK(%o0, 0x00, %g2)
        subcc   %o3, 128, %o3
        ZERO_BIG_BLOCK(%o0, 0x40, %g2)
-11:
-       EXT(10b, 11b, 20f)
        bne     10b
         add    %o0, 128, %o0
 
@@ -138,7 +143,6 @@ __bzero:
        jmp     %o4
         add    %o0, %o2, %o0
 
-12:
        ZERO_LAST_BLOCKS(%o0, 0x48, %g2)
        ZERO_LAST_BLOCKS(%o0, 0x08, %g2)
 13:
@@ -181,37 +185,13 @@ __bzero:
 5:
        retl
         clr    %o0
-__memset_end:
 
        .section .fixup,#alloc,#execinstr
        .align  4
-20:
-       cmp     %g2, 8
-       bleu    1f
-        and    %o1, 0x7f, %o1
-       sub     %g2, 9, %g2
-       add     %o3, 64, %o3
-1:
-       sll     %g2, 3, %g2
-       add     %o3, %o1, %o0
-       b 30f
-        sub    %o0, %g2, %o0
-21:
-       mov     8, %o0
-       and     %o1, 7, %o1
-       sub     %o0, %g2, %o0
-       sll     %o0, 3, %o0
-       b 30f
-        add    %o0, %o1, %o0
 30:
-/* %o4 is faulting address, %o5 is %pc where fault occurred */
-       save    %sp, -104, %sp
-       mov     %i5, %o0
-       mov     %i7, %o1
-       call    lookup_fault
-        mov    %i4, %o2
-       ret
-        restore
+       and     %o1, 0x7f, %o1
+       retl
+        add    %o3, %o1, %o0
 
        .globl __bzero_end
 __bzero_end:
index 68db1f8..871354a 100644 (file)
@@ -8,7 +8,7 @@ ccflags-y := -Werror
 obj-$(CONFIG_SPARC64)   += ultra.o tlb.o tsb.o
 obj-y                   += fault_$(BITS).o
 obj-y                   += init_$(BITS).o
-obj-$(CONFIG_SPARC32)   += extable.o srmmu.o iommu.o io-unit.o
+obj-$(CONFIG_SPARC32)   += srmmu.o iommu.o io-unit.o
 obj-$(CONFIG_SPARC32)   += srmmu_access.o
 obj-$(CONFIG_SPARC32)   += hypersparc.o viking.o tsunami.o swift.o
 obj-$(CONFIG_SPARC32)   += leon_mm.o
diff --git a/arch/sparc/mm/extable.c b/arch/sparc/mm/extable.c
deleted file mode 100644 (file)
index 241b406..0000000
+++ /dev/null
@@ -1,107 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * linux/arch/sparc/mm/extable.c
- */
-
-#include <linux/module.h>
-#include <linux/extable.h>
-#include <linux/uaccess.h>
-
-void sort_extable(struct exception_table_entry *start,
-                 struct exception_table_entry *finish)
-{
-}
-
-/* Caller knows they are in a range if ret->fixup == 0 */
-const struct exception_table_entry *
-search_extable(const struct exception_table_entry *base,
-              const size_t num,
-              unsigned long value)
-{
-       int i;
-
-       /* Single insn entries are encoded as:
-        *      word 1: insn address
-        *      word 2: fixup code address
-        *
-        * Range entries are encoded as:
-        *      word 1: first insn address
-        *      word 2: 0
-        *      word 3: last insn address + 4 bytes
-        *      word 4: fixup code address
-        *
-        * Deleted entries are encoded as:
-        *      word 1: unused
-        *      word 2: -1
-        *
-        * See asm/uaccess.h for more details.
-        */
-
-       /* 1. Try to find an exact match. */
-       for (i = 0; i < num; i++) {
-               if (base[i].fixup == 0) {
-                       /* A range entry, skip both parts. */
-                       i++;
-                       continue;
-               }
-
-               /* A deleted entry; see trim_init_extable */
-               if (base[i].fixup == -1)
-                       continue;
-
-               if (base[i].insn == value)
-                       return &base[i];
-       }
-
-       /* 2. Try to find a range match. */
-       for (i = 0; i < (num - 1); i++) {
-               if (base[i].fixup)
-                       continue;
-
-               if (base[i].insn <= value && base[i + 1].insn > value)
-                       return &base[i];
-
-               i++;
-       }
-
-        return NULL;
-}
-
-#ifdef CONFIG_MODULES
-/* We could memmove them around; easier to mark the trimmed ones. */
-void trim_init_extable(struct module *m)
-{
-       unsigned int i;
-       bool range;
-
-       for (i = 0; i < m->num_exentries; i += range ? 2 : 1) {
-               range = m->extable[i].fixup == 0;
-
-               if (within_module_init(m->extable[i].insn, m)) {
-                       m->extable[i].fixup = -1;
-                       if (range)
-                               m->extable[i+1].fixup = -1;
-               }
-               if (range)
-                       i++;
-       }
-}
-#endif /* CONFIG_MODULES */
-
-/* Special extable search, which handles ranges.  Returns fixup */
-unsigned long search_extables_range(unsigned long addr, unsigned long *g2)
-{
-       const struct exception_table_entry *entry;
-
-       entry = search_exception_tables(addr);
-       if (!entry)
-               return 0;
-
-       /* Inside range?  Fix g2 and return correct fixup */
-       if (!entry->fixup) {
-               *g2 = (addr - entry->insn) / 4;
-               return (entry + 1)->fixup;
-       }
-
-       return entry->fixup;
-}
index 40ce087..de2031c 100644 (file)
@@ -23,6 +23,7 @@
 #include <linux/interrupt.h>
 #include <linux/kdebug.h>
 #include <linux/uaccess.h>
+#include <linux/extable.h>
 
 #include <asm/page.h>
 #include <asm/openprom.h>
@@ -54,54 +55,6 @@ static void __noreturn unhandled_fault(unsigned long address,
        die_if_kernel("Oops", regs);
 }
 
-asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
-                           unsigned long address)
-{
-       struct pt_regs regs;
-       unsigned long g2;
-       unsigned int insn;
-       int i;
-
-       i = search_extables_range(ret_pc, &g2);
-       switch (i) {
-       case 3:
-               /* load & store will be handled by fixup */
-               return 3;
-
-       case 1:
-               /* store will be handled by fixup, load will bump out */
-               /* for _to_ macros */
-               insn = *((unsigned int *) pc);
-               if ((insn >> 21) & 1)
-                       return 1;
-               break;
-
-       case 2:
-               /* load will be handled by fixup, store will bump out */
-               /* for _from_ macros */
-               insn = *((unsigned int *) pc);
-               if (!((insn >> 21) & 1) || ((insn>>19)&0x3f) == 15)
-                       return 2;
-               break;
-
-       default:
-               break;
-       }
-
-       memset(&regs, 0, sizeof(regs));
-       regs.pc = pc;
-       regs.npc = pc + 4;
-       __asm__ __volatile__(
-               "rd %%psr, %0\n\t"
-               "nop\n\t"
-               "nop\n\t"
-               "nop\n" : "=r" (regs.psr));
-       unhandled_fault(address, current, &regs);
-
-       /* Not reached */
-       return 0;
-}
-
 static inline void
 show_signal_msg(struct pt_regs *regs, int sig, int code,
                unsigned long address, struct task_struct *tsk)
@@ -162,8 +115,6 @@ asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
        struct vm_area_struct *vma;
        struct task_struct *tsk = current;
        struct mm_struct *mm = tsk->mm;
-       unsigned int fixup;
-       unsigned long g2;
        int from_user = !(regs->psr & PSR_PS);
        int code;
        vm_fault_t fault;
@@ -281,30 +232,19 @@ bad_area_nosemaphore:
 
        /* Is this in ex_table? */
 no_context:
-       g2 = regs->u_regs[UREG_G2];
        if (!from_user) {
-               fixup = search_extables_range(regs->pc, &g2);
-               /* Values below 10 are reserved for other things */
-               if (fixup > 10) {
-                       extern const unsigned int __memset_start[];
-                       extern const unsigned int __memset_end[];
+               const struct exception_table_entry *entry;
 
+               entry = search_exception_tables(regs->pc);
 #ifdef DEBUG_EXCEPTIONS
-                       printk("Exception: PC<%08lx> faddr<%08lx>\n",
-                              regs->pc, address);
-                       printk("EX_TABLE: insn<%08lx> fixup<%08x> g2<%08lx>\n",
-                               regs->pc, fixup, g2);
+               printk("Exception: PC<%08lx> faddr<%08lx>\n",
+                      regs->pc, address);
+               printk("EX_TABLE: insn<%08lx> fixup<%08x>\n",
+                       regs->pc, entry->fixup);
 #endif
-                       if ((regs->pc >= (unsigned long)__memset_start &&
-                            regs->pc < (unsigned long)__memset_end)) {
-                               regs->u_regs[UREG_I4] = address;
-                               regs->u_regs[UREG_I5] = regs->pc;
-                       }
-                       regs->u_regs[UREG_G2] = g2;
-                       regs->pc = fixup;
-                       regs->npc = regs->pc + 4;
-                       return;
-               }
+               regs->pc = entry->fixup;
+               regs->npc = regs->pc + 4;
+               return;
        }
 
        unhandled_fault(address, tsk, regs);
index eb2946b..6139c57 100644 (file)
@@ -197,6 +197,9 @@ unsigned long __init bootmem_init(unsigned long *pages_avail)
        size = memblock_phys_mem_size() - memblock_reserved_size();
        *pages_avail = (size >> PAGE_SHIFT) - high_pages;
 
+       /* Only allow low memory to be allocated via memblock allocation */
+       memblock_set_current_limit(max_low_pfn << PAGE_SHIFT);
+
        return max_pfn;
 }
 
index ce750a9..ee55f10 100644 (file)
@@ -1,7 +1,5 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* fault_32.c - visible as they are called from assembler */
-asmlinkage int lookup_fault(unsigned long pc, unsigned long ret_pc,
-                            unsigned long address);
 asmlinkage void do_sparc_fault(struct pt_regs *regs, int text_fault, int write,
                                unsigned long address);
 
index a03caa5..a9aa6a9 100644 (file)
@@ -351,7 +351,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
        pte_t *ptep;
        struct page *page;
 
-       if ((ptep = pte_alloc_one_kernel(mm)) == 0)
+       if (!(ptep = pte_alloc_one_kernel(mm)))
                return NULL;
        page = pfn_to_page(__nocache_pa((unsigned long)ptep) >> PAGE_SHIFT);
        spin_lock(&mm->page_table_lock);
@@ -689,7 +689,7 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
                pgdp = pgd_offset_k(start);
                p4dp = p4d_offset(pgdp, start);
                pudp = pud_offset(p4dp, start);
-               if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
+               if (pud_none(*__nocache_fix(pudp))) {
                        pmdp = __srmmu_get_nocache(
                            SRMMU_PMD_TABLE_SIZE, SRMMU_PMD_TABLE_SIZE);
                        if (pmdp == NULL)
@@ -698,7 +698,7 @@ static void __init srmmu_early_allocate_ptable_skeleton(unsigned long start,
                        pud_set(__nocache_fix(pudp), pmdp);
                }
                pmdp = pmd_offset(__nocache_fix(pudp), start);
-               if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
+               if (srmmu_pmd_none(*__nocache_fix(pmdp))) {
                        ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
                        if (ptep == NULL)
                                early_pgtable_allocfail("pte");
@@ -810,11 +810,11 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                p4dp = p4d_offset(pgdp, start);
                pudp = pud_offset(p4dp, start);
                if (what == 2) {
-                       *(pgd_t *)__nocache_fix(pgdp) = __pgd(probed);
+                       *__nocache_fix(pgdp) = __pgd(probed);
                        start += PGDIR_SIZE;
                        continue;
                }
-               if (pud_none(*(pud_t *)__nocache_fix(pudp))) {
+               if (pud_none(*__nocache_fix(pudp))) {
                        pmdp = __srmmu_get_nocache(SRMMU_PMD_TABLE_SIZE,
                                                   SRMMU_PMD_TABLE_SIZE);
                        if (pmdp == NULL)
@@ -822,13 +822,13 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                        memset(__nocache_fix(pmdp), 0, SRMMU_PMD_TABLE_SIZE);
                        pud_set(__nocache_fix(pudp), pmdp);
                }
-               pmdp = pmd_offset(__nocache_fix(pgdp), start);
+               pmdp = pmd_offset(__nocache_fix(pudp), start);
                if (what == 1) {
                        *(pmd_t *)__nocache_fix(pmdp) = __pmd(probed);
                        start += PMD_SIZE;
                        continue;
                }
-               if (srmmu_pmd_none(*(pmd_t *)__nocache_fix(pmdp))) {
+               if (srmmu_pmd_none(*__nocache_fix(pmdp))) {
                        ptep = __srmmu_get_nocache(PTE_SIZE, PTE_SIZE);
                        if (ptep == NULL)
                                early_pgtable_allocfail("pte");
@@ -836,7 +836,7 @@ static void __init srmmu_inherit_prom_mappings(unsigned long start,
                        pmd_set(__nocache_fix(pmdp), ptep);
                }
                ptep = pte_offset_kernel(__nocache_fix(pmdp), start);
-               *(pte_t *)__nocache_fix(ptep) = __pte(probed);
+               *__nocache_fix(ptep) = __pte(probed);
                start += PAGE_SIZE;
        }
 }
@@ -850,7 +850,7 @@ static void __init do_large_mapping(unsigned long vaddr, unsigned long phys_base
        unsigned long big_pte;
 
        big_pte = KERNEL_PTE(phys_base >> 4);
-       *(pgd_t *)__nocache_fix(pgdp) = __pgd(big_pte);
+       *__nocache_fix(pgdp) = __pgd(big_pte);
 }
 
 /* Map sp_bank entry SP_ENTRY, starting at virtual address VBASE. */
@@ -940,7 +940,7 @@ void __init srmmu_paging_init(void)
        srmmu_ctx_table_phys = (ctxd_t *)__nocache_pa(srmmu_context_table);
 
        for (i = 0; i < num_contexts; i++)
-               srmmu_ctxd_set((ctxd_t *)__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir);
+               srmmu_ctxd_set(__nocache_fix(&srmmu_context_table[i]), srmmu_swapper_pg_dir);
 
        flush_cache_all();
        srmmu_set_ctable_ptr((unsigned long)srmmu_ctx_table_phys);
index a58811d..d7492e5 100644 (file)
@@ -20,6 +20,7 @@ generic-y += param.h
 generic-y += pci.h
 generic-y += percpu.h
 generic-y += preempt.h
+generic-y += softirq_stack.h
 generic-y += switch_to.h
 generic-y += topology.h
 generic-y += trace_clock.h
index 81d508d..c501106 100644 (file)
@@ -157,7 +157,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp,
                unsigned long arg, struct task_struct * p, unsigned long tls)
 {
        void (*handler)(void);
-       int kthread = current->flags & PF_KTHREAD;
+       int kthread = current->flags & (PF_KTHREAD | PF_IO_WORKER);
        int ret = 0;
 
        p->thread = (struct thread_struct) INIT_THREAD;
index 595193b..2792879 100644 (file)
@@ -97,6 +97,8 @@ config X86
        select ARCH_SUPPORTS_DEBUG_PAGEALLOC
        select ARCH_SUPPORTS_NUMA_BALANCING     if X86_64
        select ARCH_SUPPORTS_KMAP_LOCAL_FORCE_MAP       if NR_CPUS <= 4096
+       select ARCH_SUPPORTS_LTO_CLANG          if X86_64
+       select ARCH_SUPPORTS_LTO_CLANG_THIN     if X86_64
        select ARCH_USE_BUILTIN_BSWAP
        select ARCH_USE_QUEUED_RWLOCKS
        select ARCH_USE_QUEUED_SPINLOCKS
@@ -149,6 +151,7 @@ config X86
        select HAVE_ARCH_JUMP_LABEL_RELATIVE
        select HAVE_ARCH_KASAN                  if X86_64
        select HAVE_ARCH_KASAN_VMALLOC          if X86_64
+       select HAVE_ARCH_KFENCE
        select HAVE_ARCH_KGDB
        select HAVE_ARCH_MMAP_RND_BITS          if MMU
        select HAVE_ARCH_MMAP_RND_COMPAT_BITS   if MMU && COMPAT
@@ -169,6 +172,7 @@ config X86
        select HAVE_CONTEXT_TRACKING            if X86_64
        select HAVE_CONTEXT_TRACKING_OFFSTACK   if HAVE_CONTEXT_TRACKING
        select HAVE_C_RECORDMCOUNT
+       select HAVE_OBJTOOL_MCOUNT              if STACK_VALIDATION
        select HAVE_DEBUG_KMEMLEAK
        select HAVE_DMA_CONTIGUOUS
        select HAVE_DYNAMIC_FTRACE
@@ -188,6 +192,7 @@ config X86
        select HAVE_HW_BREAKPOINT
        select HAVE_IDE
        select HAVE_IOREMAP_PROT
+       select HAVE_IRQ_EXIT_ON_IRQ_STACK       if X86_64
        select HAVE_IRQ_TIME_ACCOUNTING
        select HAVE_KERNEL_BZIP2
        select HAVE_KERNEL_GZIP
@@ -220,6 +225,7 @@ config X86
        select HAVE_REGS_AND_STACK_ACCESS_API
        select HAVE_RELIABLE_STACKTRACE         if X86_64 && (UNWINDER_FRAME_POINTER || UNWINDER_ORC) && STACK_VALIDATION
        select HAVE_FUNCTION_ARG_ACCESS_API
+       select HAVE_SOFTIRQ_ON_OWN_STACK
        select HAVE_STACKPROTECTOR              if CC_HAS_SANE_STACKPROTECTOR
        select HAVE_STACK_VALIDATION            if X86_64
        select HAVE_STATIC_CALL
@@ -445,7 +451,7 @@ config X86_X2APIC
          If you don't know what to do here, say N.
 
 config X86_MPPARSE
-       bool "Enable MPS table" if ACPI || SFI
+       bool "Enable MPS table" if ACPI
        default y
        depends on X86_LOCAL_APIC
        help
@@ -604,7 +610,6 @@ config X86_INTEL_MID
        depends on PCI
        depends on X86_64 || (PCI_GOANY && X86_32)
        depends on X86_IO_APIC
-       select SFI
        select I2C
        select DW_APB_TIMER
        select APB_TIMER
@@ -893,18 +898,6 @@ config HPET_EMULATE_RTC
        def_bool y
        depends on HPET_TIMER && (RTC_DRV_CMOS=m || RTC_DRV_CMOS=y)
 
-config APB_TIMER
-       def_bool y if X86_INTEL_MID
-       prompt "Intel MID APB Timer Support" if X86_INTEL_MID
-       select DW_APB_TIMER
-       depends on X86_INTEL_MID && SFI
-       help
-        APB timer is the replacement for 8254, HPET on X86 MID platforms.
-        The APBT provides a stable time base on SMP
-        systems, unlike the TSC, but it is more expensive to access,
-        as it is off-chip. APB timers are always running regardless of CPU
-        C states, they are used as per CPU clockevent device when possible.
-
 # Mark as expert because too many people got it wrong.
 # The code disables itself when not needed.
 config DMI
@@ -2466,8 +2459,6 @@ source "kernel/power/Kconfig"
 
 source "drivers/acpi/Kconfig"
 
-source "drivers/sfi/Kconfig"
-
 config X86_APM_BOOT
        def_bool y
        depends on APM
@@ -2654,7 +2645,7 @@ config PCI_DIRECT
 config PCI_MMCONFIG
        bool "Support mmconfig PCI config space access" if X86_64
        default y
-       depends on PCI && (ACPI || SFI || JAILHOUSE_GUEST)
+       depends on PCI && (ACPI || JAILHOUSE_GUEST)
        depends on X86_64 || (PCI_GOANY || PCI_GOMMCONFIG)
 
 config PCI_OLPC
index b797f15..9a85eae 100644 (file)
@@ -27,7 +27,7 @@ endif
 REALMODE_CFLAGS        := -m16 -g -Os -DDISABLE_BRANCH_PROFILING \
                   -Wall -Wstrict-prototypes -march=i386 -mregparm=3 \
                   -fno-strict-aliasing -fomit-frame-pointer -fno-pic \
-                  -mno-mmx -mno-sse
+                  -mno-mmx -mno-sse $(call cc-option,-fcf-protection=none)
 
 REALMODE_CFLAGS += -ffreestanding
 REALMODE_CFLAGS += -fno-stack-protector
@@ -169,6 +169,11 @@ ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1)
        KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args,)
 endif
 
+ifdef CONFIG_LTO_CLANG
+KBUILD_LDFLAGS += -plugin-opt=-code-model=kernel \
+                  -plugin-opt=-stack-alignment=$(if $(CONFIG_X86_32),4,8)
+endif
+
 # Workaround for a gcc prelease that unfortunately was shipped in a suse release
 KBUILD_CFLAGS += -Wno-sign-compare
 #
index 7821079..9c9c4a8 100644 (file)
@@ -50,7 +50,6 @@ CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
index 9936528..b60bd2d 100644 (file)
@@ -48,7 +48,6 @@ CONFIG_JUMP_LABEL=y
 CONFIG_MODULES=y
 CONFIG_MODULE_UNLOAD=y
 CONFIG_MODULE_FORCE_UNLOAD=y
-# CONFIG_UNUSED_SYMBOLS is not set
 CONFIG_BINFMT_MISC=y
 CONFIG_NET=y
 CONFIG_PACKET=y
index 0904f56..4efd39a 100644 (file)
@@ -128,7 +128,8 @@ static noinstr bool __do_fast_syscall_32(struct pt_regs *regs)
                regs->ax = -EFAULT;
 
                instrumentation_end();
-               syscall_exit_to_user_mode(regs);
+               local_irq_disable();
+               irqentry_exit_to_user_mode(regs);
                return false;
        }
 
@@ -249,30 +250,23 @@ static __always_inline bool get_and_clear_inhcall(void) { return false; }
 static __always_inline void restore_inhcall(bool inhcall) { }
 #endif
 
-static void __xen_pv_evtchn_do_upcall(void)
+static void __xen_pv_evtchn_do_upcall(struct pt_regs *regs)
 {
-       irq_enter_rcu();
+       struct pt_regs *old_regs = set_irq_regs(regs);
+
        inc_irq_stat(irq_hv_callback_count);
 
        xen_hvm_evtchn_do_upcall();
 
-       irq_exit_rcu();
+       set_irq_regs(old_regs);
 }
 
 __visible noinstr void xen_pv_evtchn_do_upcall(struct pt_regs *regs)
 {
-       struct pt_regs *old_regs;
+       irqentry_state_t state = irqentry_enter(regs);
        bool inhcall;
-       irqentry_state_t state;
 
-       state = irqentry_enter(regs);
-       old_regs = set_irq_regs(regs);
-
-       instrumentation_begin();
-       run_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
-       instrumentation_begin();
-
-       set_irq_regs(old_regs);
+       run_sysvec_on_irqstack_cond(__xen_pv_evtchn_do_upcall, regs);
 
        inhcall = get_and_clear_inhcall();
        if (inhcall && !WARN_ON_ONCE(state.exit_rcu)) {
index ce0464d..400908d 100644 (file)
@@ -754,47 +754,6 @@ SYM_CODE_START_LOCAL_NOALIGN(.Lbad_gs)
 SYM_CODE_END(.Lbad_gs)
        .previous
 
-/*
- * rdi: New stack pointer points to the top word of the stack
- * rsi: Function pointer
- * rdx: Function argument (can be NULL if none)
- */
-SYM_FUNC_START(asm_call_on_stack)
-SYM_INNER_LABEL(asm_call_sysvec_on_stack, SYM_L_GLOBAL)
-SYM_INNER_LABEL(asm_call_irq_on_stack, SYM_L_GLOBAL)
-       /*
-        * Save the frame pointer unconditionally. This allows the ORC
-        * unwinder to handle the stack switch.
-        */
-       pushq           %rbp
-       mov             %rsp, %rbp
-
-       /*
-        * The unwinder relies on the word at the top of the new stack
-        * page linking back to the previous RSP.
-        */
-       mov             %rsp, (%rdi)
-       mov             %rdi, %rsp
-       /* Move the argument to the right place */
-       mov             %rdx, %rdi
-
-1:
-       .pushsection .discard.instr_begin
-       .long 1b - .
-       .popsection
-
-       CALL_NOSPEC     rsi
-
-2:
-       .pushsection .discard.instr_end
-       .long 2b - .
-       .popsection
-
-       /* Restore the previous stack pointer from RBP. */
-       leaveq
-       ret
-SYM_FUNC_END(asm_call_on_stack)
-
 #ifdef CONFIG_XEN_PV
 /*
  * A note on the "critical region" in our callback handler.
index 541fdaf..0051cf5 100644 (file)
@@ -210,6 +210,8 @@ SYM_CODE_START(entry_SYSCALL_compat)
        /* Switch to the kernel stack */
        movq    PER_CPU_VAR(cpu_current_top_of_stack), %rsp
 
+SYM_INNER_LABEL(entry_SYSCALL_compat_safe_stack, SYM_L_GLOBAL)
+
        /* Construct struct pt_regs on stack */
        pushq   $__USER32_DS            /* pt_regs->ss */
        pushq   %r8                     /* pt_regs->sp */
index 6fb9b57..d8c4f6c 100644 (file)
@@ -6,8 +6,8 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') \
          $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')
 
-syscall32 := $(srctree)/$(src)/syscall_32.tbl
-syscall64 := $(srctree)/$(src)/syscall_64.tbl
+syscall32 := $(src)/syscall_32.tbl
+syscall64 := $(src)/syscall_64.tbl
 
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
@@ -21,37 +21,37 @@ quiet_cmd_systbl = SYSTBL  $@
       cmd_systbl = $(CONFIG_SHELL) '$(systbl)' $< $@
 
 quiet_cmd_hypercalls = HYPERCALLS $@
-      cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<,$^)
+      cmd_hypercalls = $(CONFIG_SHELL) '$<' $@ $(filter-out $<, $(real-prereqs))
 
 syshdr_abi_unistd_32 := i386
-$(uapi)/unistd_32.h: $(syscall32) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall32) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abi_unistd_32_ia32 := i386
 syshdr_pfx_unistd_32_ia32 := ia32_
-$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr)
+$(out)/unistd_32_ia32.h: $(syscall32) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abi_unistd_x32 := common,x32
 syshdr_offset_unistd_x32 := __X32_SYSCALL_BIT
-$(uapi)/unistd_x32.h: $(syscall64) $(syshdr)
+$(uapi)/unistd_x32.h: $(syscall64) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abi_unistd_64 := common,64
-$(uapi)/unistd_64.h: $(syscall64) $(syshdr)
+$(uapi)/unistd_64.h: $(syscall64) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
 syshdr_abi_unistd_64_x32 := x32
 syshdr_pfx_unistd_64_x32 := x32_
-$(out)/unistd_64_x32.h: $(syscall64) $(syshdr)
+$(out)/unistd_64_x32.h: $(syscall64) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-$(out)/syscalls_32.h: $(syscall32) $(systbl)
+$(out)/syscalls_32.h: $(syscall32) $(systbl) FORCE
        $(call if_changed,systbl)
-$(out)/syscalls_64.h: $(syscall64) $(systbl)
+$(out)/syscalls_64.h: $(syscall64) $(systbl) FORCE
        $(call if_changed,systbl)
 
-$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh
+$(out)/xen-hypercalls.h: $(srctree)/scripts/xen-hypercalls.sh FORCE
        $(call if_changed,hypercalls)
 
 $(out)/xen-hypercalls.h: $(srctree)/include/xen/interface/xen*.h
@@ -62,9 +62,10 @@ syshdr-$(CONFIG_X86_64)              += unistd_32_ia32.h unistd_64_x32.h
 syshdr-$(CONFIG_X86_64)                += syscalls_64.h
 syshdr-$(CONFIG_XEN)           += xen-hypercalls.h
 
-targets        += $(uapisyshdr-y) $(syshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+syshdr-y       := $(addprefix $(out)/, $(syshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(syshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(out)/,$(syshdr-y))
+all: $(uapisyshdr-y) $(syshdr-y)
        @:
index 874aeac..a1c9f49 100644 (file)
 439    i386    faccessat2              sys_faccessat2
 440    i386    process_madvise         sys_process_madvise
 441    i386    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
+442    i386    mount_setattr           sys_mount_setattr
index 7867212..7bf01cb 100644 (file)
 439    common  faccessat2              sys_faccessat2
 440    common  process_madvise         sys_process_madvise
 441    common  epoll_pwait2            sys_epoll_pwait2
+442    common  mount_setattr           sys_mount_setattr
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index 02e3e42..05c4abc 100644 (file)
@@ -91,7 +91,7 @@ ifneq ($(RETPOLINE_VDSO_CFLAGS),)
 endif
 endif
 
-$(vobjs): KBUILD_CFLAGS := $(filter-out $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
+$(vobjs): KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO) $(GCC_PLUGINS_CFLAGS) $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS)) $(CFL)
 
 #
 # vDSO code runs in userspace and -pg doesn't help with profiling anyway.
@@ -150,6 +150,7 @@ KBUILD_CFLAGS_32 := $(filter-out -fno-pic,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out -mfentry,$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(GCC_PLUGINS_CFLAGS),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 := $(filter-out $(RETPOLINE_CFLAGS),$(KBUILD_CFLAGS_32))
+KBUILD_CFLAGS_32 := $(filter-out $(CC_FLAGS_LTO),$(KBUILD_CFLAGS_32))
 KBUILD_CFLAGS_32 += -m32 -msoft-float -mregparm=0 -fpic
 KBUILD_CFLAGS_32 += -fno-stack-protector
 KBUILD_CFLAGS_32 += $(call cc-option, -foptimize-sibling-calls)
index 6ddeed3..18df171 100644 (file)
@@ -81,7 +81,11 @@ DEFINE_STATIC_CALL_NULL(x86_pmu_swap_task_ctx, *x86_pmu.swap_task_ctx);
 DEFINE_STATIC_CALL_NULL(x86_pmu_drain_pebs,   *x86_pmu.drain_pebs);
 DEFINE_STATIC_CALL_NULL(x86_pmu_pebs_aliases, *x86_pmu.pebs_aliases);
 
-DEFINE_STATIC_CALL_NULL(x86_pmu_guest_get_msrs,  *x86_pmu.guest_get_msrs);
+/*
+ * This one is magic, it will get called even when PMU init fails (because
+ * there is no PMU), in which case it should simply return NULL.
+ */
+DEFINE_STATIC_CALL_RET0(x86_pmu_guest_get_msrs, *x86_pmu.guest_get_msrs);
 
 u64 __read_mostly hw_cache_event_ids
                                [PERF_COUNT_HW_CACHE_MAX]
@@ -1944,13 +1948,6 @@ static void _x86_pmu_read(struct perf_event *event)
        x86_perf_event_update(event);
 }
 
-static inline struct perf_guest_switch_msr *
-perf_guest_get_msrs_nop(int *nr)
-{
-       *nr = 0;
-       return NULL;
-}
-
 static int __init init_hw_perf_events(void)
 {
        struct x86_pmu_quirk *quirk;
@@ -2025,7 +2022,7 @@ static int __init init_hw_perf_events(void)
                x86_pmu.read = _x86_pmu_read;
 
        if (!x86_pmu.guest_get_msrs)
-               x86_pmu.guest_get_msrs = perf_guest_get_msrs_nop;
+               x86_pmu.guest_get_msrs = (void *)&__static_call_return0;
 
        x86_pmu_static_call_update();
 
index 5bac48d..37ce384 100644 (file)
@@ -3659,11 +3659,16 @@ static int intel_pmu_hw_config(struct perf_event *event)
                return ret;
 
        if (event->attr.precise_ip) {
+               if ((event->attr.config & INTEL_ARCH_EVENT_MASK) == INTEL_FIXED_VLBR_EVENT)
+                       return -EINVAL;
+
                if (!(event->attr.freq || (event->attr.wakeup_events && !event->attr.watermark))) {
                        event->hw.flags |= PERF_X86_EVENT_AUTO_RELOAD;
                        if (!(event->attr.sample_type &
-                             ~intel_pmu_large_pebs_flags(event)))
+                             ~intel_pmu_large_pebs_flags(event))) {
                                event->hw.flags |= PERF_X86_EVENT_LARGE_PEBS;
+                               event->attach_state |= PERF_ATTACH_SCHED_CB;
+                       }
                }
                if (x86_pmu.pebs_aliases)
                        x86_pmu.pebs_aliases(event);
@@ -3676,6 +3681,7 @@ static int intel_pmu_hw_config(struct perf_event *event)
                ret = intel_pmu_setup_lbr_filter(event);
                if (ret)
                        return ret;
+               event->attach_state |= PERF_ATTACH_SCHED_CB;
 
                /*
                 * BTS is set up earlier in this path, so don't account twice
index 7ebae18..d32b302 100644 (file)
@@ -2010,7 +2010,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs, struct perf_sample_d
                 */
                if (!pebs_status && cpuc->pebs_enabled &&
                        !(cpuc->pebs_enabled & (cpuc->pebs_enabled-1)))
-                       pebs_status = cpuc->pebs_enabled;
+                       pebs_status = p->status = cpuc->pebs_enabled;
 
                bit = find_first_bit((unsigned long *)&pebs_status,
                                        x86_pmu.max_pebs_events);
diff --git a/arch/x86/include/asm/acrn.h b/arch/x86/include/asm/acrn.h
new file mode 100644 (file)
index 0000000..e003a01
--- /dev/null
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_ACRN_H
+#define _ASM_X86_ACRN_H
+
+/*
+ * This CPUID returns feature bitmaps in EAX.
+ * Guest VM uses this to detect the appropriate feature bit.
+ */
+#define        ACRN_CPUID_FEATURES             0x40000001
+/* Bit 0 indicates whether guest VM is privileged */
+#define        ACRN_FEATURE_PRIVILEGED_VM      BIT(0)
+
+void acrn_setup_intr_handler(void (*handler)(void));
+void acrn_remove_intr_handler(void);
+
+static inline u32 acrn_cpuid_base(void)
+{
+       if (boot_cpu_has(X86_FEATURE_HYPERVISOR))
+               return hypervisor_cpuid_base("ACRNACRNACRN", 0);
+
+       return 0;
+}
+
+/*
+ * Hypercalls for ACRN
+ *
+ * - VMCALL instruction is used to implement ACRN hypercalls.
+ * - ACRN hypercall ABI:
+ *   - Hypercall number is passed in R8 register.
+ *   - Up to 2 arguments are passed in RDI, RSI.
+ *   - Return value will be placed in RAX.
+ *
+ * Because GCC doesn't support R8 register as direct register constraints, use
+ * supported constraint as input with a explicit MOV to R8 in beginning of asm.
+ */
+static inline long acrn_hypercall0(unsigned long hcall_id)
+{
+       long result;
+
+       asm volatile("movl %1, %%r8d\n\t"
+                    "vmcall\n\t"
+                    : "=a" (result)
+                    : "g" (hcall_id)
+                    : "r8", "memory");
+
+       return result;
+}
+
+static inline long acrn_hypercall1(unsigned long hcall_id,
+                                  unsigned long param1)
+{
+       long result;
+
+       asm volatile("movl %1, %%r8d\n\t"
+                    "vmcall\n\t"
+                    : "=a" (result)
+                    : "g" (hcall_id), "D" (param1)
+                    : "r8", "memory");
+
+       return result;
+}
+
+static inline long acrn_hypercall2(unsigned long hcall_id,
+                                  unsigned long param1,
+                                  unsigned long param2)
+{
+       long result;
+
+       asm volatile("movl %1, %%r8d\n\t"
+                    "vmcall\n\t"
+                    : "=a" (result)
+                    : "g" (hcall_id), "D" (param1), "S" (param2)
+                    : "r8", "memory");
+
+       return result;
+}
+
+#endif /* _ASM_X86_ACRN_H */
diff --git a/arch/x86/include/asm/apb_timer.h b/arch/x86/include/asm/apb_timer.h
deleted file mode 100644 (file)
index 87ce8e9..0000000
+++ /dev/null
@@ -1,40 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * apb_timer.h: Driver for Langwell APB timer based on Synopsis DesignWare
- *
- * (C) Copyright 2009 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * Note:
- */
-
-#ifndef ASM_X86_APBT_H
-#define ASM_X86_APBT_H
-#include <linux/sfi.h>
-
-#ifdef CONFIG_APB_TIMER
-
-/* default memory mapped register base */
-#define LNW_SCU_ADDR           0xFF100000
-#define LNW_EXT_TIMER_OFFSET   0x1B800
-#define APBT_DEFAULT_BASE      (LNW_SCU_ADDR+LNW_EXT_TIMER_OFFSET)
-#define LNW_EXT_TIMER_PGOFFSET         0x800
-
-/* APBT clock speed range from PCLK to fabric base, 25-100MHz */
-#define APBT_MAX_FREQ          50000000
-#define APBT_MIN_FREQ          1000000
-#define APBT_MMAP_SIZE         1024
-
-extern void apbt_time_init(void);
-extern void apbt_setup_secondary_clock(void);
-
-extern struct sfi_timer_table_entry *sfi_get_mtmr(int hint);
-extern void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr);
-extern int sfi_mtimer_num;
-
-#else /* CONFIG_APB_TIMER */
-
-static inline void apbt_time_init(void) { }
-
-#endif
-#endif /* ASM_X86_APBT_H */
index 9f1a0a9..d0dcefb 100644 (file)
@@ -108,9 +108,6 @@ enum fixed_addresses {
 #ifdef CONFIG_PARAVIRT_XXL
        FIX_PARAVIRT_BOOTMAP,
 #endif
-#ifdef CONFIG_X86_INTEL_MID
-       FIX_LNW_VRTC,
-#endif
 
 #ifdef CONFIG_ACPI_APEI_GHES
        /* Used for GHES mapping from assorted contexts */
index 41e2e2e..5eb3bdf 100644 (file)
@@ -187,23 +187,22 @@ __visible noinstr void func(struct pt_regs *regs, unsigned long error_code)
  * has to be done in the function body if necessary.
  */
 #define DEFINE_IDTENTRY_IRQ(func)                                      \
-static __always_inline void __##func(struct pt_regs *regs, u8 vector); \
+static void __##func(struct pt_regs *regs, u32 vector);                        \
                                                                        \
 __visible noinstr void func(struct pt_regs *regs,                      \
                            unsigned long error_code)                   \
 {                                                                      \
        irqentry_state_t state = irqentry_enter(regs);                  \
+       u32 vector = (u32)(u8)error_code;                               \
                                                                        \
        instrumentation_begin();                                        \
-       irq_enter_rcu();                                                \
        kvm_set_cpu_l1tf_flush_l1d();                                   \
-       __##func (regs, (u8)error_code);                                \
-       irq_exit_rcu();                                                 \
+       run_irq_on_irqstack_cond(__##func, regs, vector);               \
        instrumentation_end();                                          \
        irqentry_exit(regs, state);                                     \
 }                                                                      \
                                                                        \
-static __always_inline void __##func(struct pt_regs *regs, u8 vector)
+static noinline void __##func(struct pt_regs *regs, u32 vector)
 
 /**
  * DECLARE_IDTENTRY_SYSVEC - Declare functions for system vector entry points
@@ -237,10 +236,8 @@ __visible noinstr void func(struct pt_regs *regs)                  \
        irqentry_state_t state = irqentry_enter(regs);                  \
                                                                        \
        instrumentation_begin();                                        \
-       irq_enter_rcu();                                                \
        kvm_set_cpu_l1tf_flush_l1d();                                   \
        run_sysvec_on_irqstack_cond(__##func, regs);                    \
-       irq_exit_rcu();                                                 \
        instrumentation_end();                                          \
        irqentry_exit(regs, state);                                     \
 }                                                                      \
index a0f839a..98b4dae 100644 (file)
@@ -23,6 +23,8 @@ unsigned long insn_get_seg_base(struct pt_regs *regs, int seg_reg_idx);
 int insn_get_code_seg_params(struct pt_regs *regs);
 int insn_fetch_from_user(struct pt_regs *regs,
                         unsigned char buf[MAX_INSN_SIZE]);
+int insn_fetch_from_user_inatomic(struct pt_regs *regs,
+                                 unsigned char buf[MAX_INSN_SIZE]);
 bool insn_decode(struct insn *insn, struct pt_regs *regs,
                 unsigned char buf[MAX_INSN_SIZE], int buf_size);
 
index a8c3d28..95a448f 100644 (file)
@@ -7,9 +7,12 @@
  * Copyright (C) IBM Corporation, 2009
  */
 
+#include <asm/byteorder.h>
 /* insn_attr_t is defined in inat.h */
 #include <asm/inat.h>
 
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+
 struct insn_field {
        union {
                insn_value_t value;
@@ -20,6 +23,48 @@ struct insn_field {
        unsigned char nbytes;
 };
 
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+                                 unsigned char n)
+{
+       p->value = v;
+       p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+                                insn_byte_t v)
+{
+       p->bytes[n] = v;
+}
+
+#else
+
+struct insn_field {
+       insn_value_t value;
+       union {
+               insn_value_t little;
+               insn_byte_t bytes[4];
+       };
+       /* !0 if we've run insn_get_xxx() for this field */
+       unsigned char got;
+       unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+                                 unsigned char n)
+{
+       p->value = v;
+       p->little = __cpu_to_le32(v);
+       p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+                                insn_byte_t v)
+{
+       p->bytes[n] = v;
+       p->value = __le32_to_cpu(p->little);
+}
+#endif
+
 struct insn {
        struct insn_field prefixes;     /*
                                         * Prefixes
index cf0e25f..c201083 100644 (file)
@@ -1,15 +1,13 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * intel-mid.h: Intel MID specific setup code
+ * Intel MID specific setup code
  *
- * (C) Copyright 2009 Intel Corporation
+ * (C) Copyright 2009, 2021 Intel Corporation
  */
 #ifndef _ASM_X86_INTEL_MID_H
 #define _ASM_X86_INTEL_MID_H
 
-#include <linux/sfi.h>
 #include <linux/pci.h>
-#include <linux/platform_device.h>
 
 extern int intel_mid_pci_init(void);
 extern int intel_mid_pci_set_power_state(struct pci_dev *pdev, pci_power_t state);
@@ -22,93 +20,18 @@ extern void intel_mid_pwr_power_off(void);
 
 extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev);
 
-extern int get_gpio_by_name(const char *name);
-extern int __init sfi_parse_mrtc(struct sfi_table_header *table);
-extern int __init sfi_parse_mtmr(struct sfi_table_header *table);
-extern int sfi_mrtc_num;
-extern struct sfi_rtc_table_entry sfi_mrtc_array[];
-
-/*
- * Here defines the array of devices platform data that IAFW would export
- * through SFI "DEVS" table, we use name and type to match the device and
- * its platform data.
- */
-struct devs_id {
-       char name[SFI_NAME_LEN + 1];
-       u8 type;
-       u8 delay;
-       u8 msic;
-       void *(*get_platform_data)(void *info);
-};
-
-#define sfi_device(i)                                                          \
-       static const struct devs_id *const __intel_mid_sfi_##i##_dev __used     \
-       __section(".x86_intel_mid_dev.init") = &i
-
-/**
-* struct mid_sd_board_info - template for SD device creation
-* @name:               identifies the driver
-* @bus_num:            board-specific identifier for a given SD controller
-* @max_clk:            the maximum frequency device supports
-* @platform_data:      the particular data stored there is driver-specific
-*/
-struct mid_sd_board_info {
-       char            name[SFI_NAME_LEN];
-       int             bus_num;
-       unsigned short  addr;
-       u32             max_clk;
-       void            *platform_data;
-};
-
-/*
- * Medfield is the follow-up of Moorestown, it combines two chip solution into
- * one. Other than that it also added always-on and constant tsc and lapic
- * timers. Medfield is the platform name, and the chip name is called Penwell
- * we treat Medfield/Penwell as a variant of Moorestown. Penwell can be
- * identified via MSRs.
- */
-enum intel_mid_cpu_type {
-       /* 1 was Moorestown */
-       INTEL_MID_CPU_CHIP_PENWELL = 2,
-       INTEL_MID_CPU_CHIP_CLOVERVIEW,
-       INTEL_MID_CPU_CHIP_TANGIER,
-};
-
-extern enum intel_mid_cpu_type __intel_mid_cpu_chip;
-
 #ifdef CONFIG_X86_INTEL_MID
 
-static inline enum intel_mid_cpu_type intel_mid_identify_cpu(void)
-{
-       return __intel_mid_cpu_chip;
-}
-
-static inline bool intel_mid_has_msic(void)
-{
-       return (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_PENWELL);
-}
-
 extern void intel_scu_devices_create(void);
 extern void intel_scu_devices_destroy(void);
 
 #else /* !CONFIG_X86_INTEL_MID */
 
-#define intel_mid_identify_cpu()       0
-#define intel_mid_has_msic()           0
-
 static inline void intel_scu_devices_create(void) { }
 static inline void intel_scu_devices_destroy(void) { }
 
 #endif /* !CONFIG_X86_INTEL_MID */
 
-enum intel_mid_timer_options {
-       INTEL_MID_TIMER_DEFAULT,
-       INTEL_MID_TIMER_APBT_ONLY,
-       INTEL_MID_TIMER_LAPIC_APBT,
-};
-
-extern enum intel_mid_timer_options intel_mid_timer_options;
-
 /* Bus Select SoC Fuse value */
 #define BSEL_SOC_FUSE_MASK             0x7
 /* FSB 133MHz */
@@ -118,16 +41,4 @@ extern enum intel_mid_timer_options intel_mid_timer_options;
 /* FSB 83MHz */
 #define BSEL_SOC_FUSE_111              0x7
 
-#define SFI_MTMR_MAX_NUM               8
-#define SFI_MRTC_MAX                   8
-
-/* VRTC timer */
-#define MRST_VRTC_MAP_SZ               1024
-/* #define MRST_VRTC_PGOFFSET          0xc00 */
-
-extern void intel_mid_rtc_init(void);
-
-/* The offset for the mapping of global gpio pin to irq */
-#define INTEL_MID_IRQ_OFFSET           0x100
-
 #endif /* _ASM_X86_INTEL_MID_H */
diff --git a/arch/x86/include/asm/intel_mid_vrtc.h b/arch/x86/include/asm/intel_mid_vrtc.h
deleted file mode 100644 (file)
index 0b44b1a..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _INTEL_MID_VRTC_H
-#define _INTEL_MID_VRTC_H
-
-extern unsigned char vrtc_cmos_read(unsigned char reg);
-extern void vrtc_cmos_write(unsigned char val, unsigned char reg);
-extern void vrtc_get_time(struct timespec64 *now);
-extern int vrtc_set_mmss(const struct timespec64 *now);
-
-#endif
index 11d457a..8537f59 100644 (file)
@@ -65,6 +65,4 @@ static inline int intel_scu_ipc_dev_command(struct intel_scu_ipc_dev *scu, int c
                                                   inlen, out, outlen);
 }
 
-#include <asm/intel_scu_ipc_legacy.h>
-
 #endif
diff --git a/arch/x86/include/asm/intel_scu_ipc_legacy.h b/arch/x86/include/asm/intel_scu_ipc_legacy.h
deleted file mode 100644 (file)
index 4cf13fe..0000000
+++ /dev/null
@@ -1,91 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_X86_INTEL_SCU_IPC_LEGACY_H_
-#define _ASM_X86_INTEL_SCU_IPC_LEGACY_H_
-
-#include <linux/notifier.h>
-
-#define IPCMSG_INDIRECT_READ   0x02
-#define IPCMSG_INDIRECT_WRITE  0x05
-
-#define IPCMSG_COLD_OFF                0x80    /* Only for Tangier */
-
-#define IPCMSG_WARM_RESET      0xF0
-#define IPCMSG_COLD_RESET      0xF1
-#define IPCMSG_SOFT_RESET      0xF2
-#define IPCMSG_COLD_BOOT       0xF3
-
-#define IPCMSG_VRTC            0xFA    /* Set vRTC device */
-/* Command id associated with message IPCMSG_VRTC */
-#define IPC_CMD_VRTC_SETTIME      1    /* Set time */
-#define IPC_CMD_VRTC_SETALARM     2    /* Set alarm */
-
-/* Don't call these in new code - they will be removed eventually */
-
-/* Read single register */
-static inline int intel_scu_ipc_ioread8(u16 addr, u8 *data)
-{
-       return intel_scu_ipc_dev_ioread8(NULL, addr, data);
-}
-
-/* Read a vector */
-static inline int intel_scu_ipc_readv(u16 *addr, u8 *data, int len)
-{
-       return intel_scu_ipc_dev_readv(NULL, addr, data, len);
-}
-
-/* Write single register */
-static inline int intel_scu_ipc_iowrite8(u16 addr, u8 data)
-{
-       return intel_scu_ipc_dev_iowrite8(NULL, addr, data);
-}
-
-/* Write a vector */
-static inline int intel_scu_ipc_writev(u16 *addr, u8 *data, int len)
-{
-       return intel_scu_ipc_dev_writev(NULL, addr, data, len);
-}
-
-/* Update single register based on the mask */
-static inline int intel_scu_ipc_update_register(u16 addr, u8 data, u8 mask)
-{
-       return intel_scu_ipc_dev_update(NULL, addr, data, mask);
-}
-
-/* Issue commands to the SCU with or without data */
-static inline int intel_scu_ipc_simple_command(int cmd, int sub)
-{
-       return intel_scu_ipc_dev_simple_command(NULL, cmd, sub);
-}
-
-static inline int intel_scu_ipc_command(int cmd, int sub, u32 *in, int inlen,
-                                       u32 *out, int outlen)
-{
-       /* New API takes both inlen and outlen as bytes so convert here */
-       size_t inbytes = inlen * sizeof(u32);
-       size_t outbytes = outlen * sizeof(u32);
-
-       return intel_scu_ipc_dev_command_with_size(NULL, cmd, sub, in, inbytes,
-                                                  inlen, out, outbytes);
-}
-
-extern struct blocking_notifier_head intel_scu_notifier;
-
-static inline void intel_scu_notifier_add(struct notifier_block *nb)
-{
-       blocking_notifier_chain_register(&intel_scu_notifier, nb);
-}
-
-static inline void intel_scu_notifier_remove(struct notifier_block *nb)
-{
-       blocking_notifier_chain_unregister(&intel_scu_notifier, nb);
-}
-
-static inline int intel_scu_notifier_post(unsigned long v, void *p)
-{
-       return blocking_notifier_call_chain(&intel_scu_notifier, v, p);
-}
-
-#define                SCU_AVAILABLE           1
-#define                SCU_DOWN                2
-
-#endif
index 76d3896..768aa23 100644 (file)
@@ -25,8 +25,6 @@ static inline int irq_canonicalize(int irq)
 
 extern int irq_init_percpu_irqstack(unsigned int cpu);
 
-#define __ARCH_HAS_DO_SOFTIRQ
-
 struct irq_desc;
 
 extern void fixup_irqs(void);
index 7758169..9b2a0ff 100644 (file)
 #include <asm/processor.h>
 
 #ifdef CONFIG_X86_64
-static __always_inline bool irqstack_active(void)
-{
-       return __this_cpu_read(irq_count) != -1;
-}
-
-void asm_call_on_stack(void *sp, void (*func)(void), void *arg);
-void asm_call_sysvec_on_stack(void *sp, void (*func)(struct pt_regs *regs),
-                             struct pt_regs *regs);
-void asm_call_irq_on_stack(void *sp, void (*func)(struct irq_desc *desc),
-                          struct irq_desc *desc);
 
-static __always_inline void __run_on_irqstack(void (*func)(void))
-{
-       void *tos = __this_cpu_read(hardirq_stack_ptr);
-
-       __this_cpu_add(irq_count, 1);
-       asm_call_on_stack(tos - 8, func, NULL);
-       __this_cpu_sub(irq_count, 1);
+/*
+ * Macro to inline switching to an interrupt stack and invoking function
+ * calls from there. The following rules apply:
+ *
+ * - Ordering:
+ *
+ *   1. Write the stack pointer into the top most place of the irq
+ *     stack. This ensures that the various unwinders can link back to the
+ *     original stack.
+ *
+ *   2. Switch the stack pointer to the top of the irq stack.
+ *
+ *   3. Invoke whatever needs to be done (@asm_call argument)
+ *
+ *   4. Pop the original stack pointer from the top of the irq stack
+ *     which brings it back to the original stack where it left off.
+ *
+ * - Function invocation:
+ *
+ *   To allow flexible usage of the macro, the actual function code including
+ *   the store of the arguments in the call ABI registers is handed in via
+ *   the @asm_call argument.
+ *
+ * - Local variables:
+ *
+ *   @tos:
+ *     The @tos variable holds a pointer to the top of the irq stack and
+ *     _must_ be allocated in a non-callee saved register as this is a
+ *     restriction coming from objtool.
+ *
+ *     Note, that (tos) is both in input and output constraints to ensure
+ *     that the compiler does not assume that R11 is left untouched in
+ *     case this macro is used in some place where the per cpu interrupt
+ *     stack pointer is used again afterwards
+ *
+ * - Function arguments:
+ *     The function argument(s), if any, have to be defined in register
+ *     variables at the place where this is invoked. Storing the
+ *     argument(s) in the proper register(s) is part of the @asm_call
+ *
+ * - Constraints:
+ *
+ *   The constraints have to be done very carefully because the compiler
+ *   does not know about the assembly call.
+ *
+ *   output:
+ *     As documented already above the @tos variable is required to be in
+ *     the output constraints to make the compiler aware that R11 cannot be
+ *     reused after the asm() statement.
+ *
+ *     For builds with CONFIG_UNWIND_FRAME_POINTER ASM_CALL_CONSTRAINT is
+ *     required as well as this prevents certain creative GCC variants from
+ *     misplacing the ASM code.
+ *
+ *  input:
+ *    - func:
+ *       Immediate, which tells the compiler that the function is referenced.
+ *
+ *    - tos:
+ *       Register. The actual register is defined by the variable declaration.
+ *
+ *    - function arguments:
+ *       The constraints are handed in via the 'argconstr' argument list. They
+ *       describe the register arguments which are used in @asm_call.
+ *
+ *  clobbers:
+ *     Function calls can clobber anything except the callee-saved
+ *     registers. Tell the compiler.
+ */
+#define call_on_irqstack(func, asm_call, argconstr...)                 \
+{                                                                      \
+       register void *tos asm("r11");                                  \
+                                                                       \
+       tos = ((void *)__this_cpu_read(hardirq_stack_ptr));             \
+                                                                       \
+       asm_inline volatile(                                            \
+       "movq   %%rsp, (%[tos])                         \n"             \
+       "movq   %[tos], %%rsp                           \n"             \
+                                                                       \
+       asm_call                                                        \
+                                                                       \
+       "popq   %%rsp                                   \n"             \
+                                                                       \
+       : "+r" (tos), ASM_CALL_CONSTRAINT                               \
+       : [__func] "i" (func), [tos] "r" (tos) argconstr                \
+       : "cc", "rax", "rcx", "rdx", "rsi", "rdi", "r8", "r9", "r10",   \
+         "memory"                                                      \
+       );                                                              \
 }
 
-static __always_inline void
-__run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
-                        struct pt_regs *regs)
-{
-       void *tos = __this_cpu_read(hardirq_stack_ptr);
-
-       __this_cpu_add(irq_count, 1);
-       asm_call_sysvec_on_stack(tos - 8, func, regs);
-       __this_cpu_sub(irq_count, 1);
+/* Macros to assert type correctness for run_*_on_irqstack macros */
+#define assert_function_type(func, proto)                              \
+       static_assert(__builtin_types_compatible_p(typeof(&func), proto))
+
+#define assert_arg_type(arg, proto)                                    \
+       static_assert(__builtin_types_compatible_p(typeof(arg), proto))
+
+/*
+ * Macro to invoke system vector and device interrupt C handlers.
+ */
+#define call_on_irqstack_cond(func, regs, asm_call, constr, c_args...) \
+{                                                                      \
+       /*                                                              \
+        * User mode entry and interrupt on the irq stack do not        \
+        * switch stacks. If from user mode the task stack is empty.    \
+        */                                                             \
+       if (user_mode(regs) || __this_cpu_read(hardirq_stack_inuse)) {  \
+               irq_enter_rcu();                                        \
+               func(c_args);                                           \
+               irq_exit_rcu();                                         \
+       } else {                                                        \
+               /*                                                      \
+                * Mark the irq stack inuse _before_ and unmark _after_ \
+                * switching stacks. Interrupts are disabled in both    \
+                * places. Invoke the stack switch macro with the call  \
+                * sequence which matches the above direct invocation.  \
+                */                                                     \
+               __this_cpu_write(hardirq_stack_inuse, true);            \
+               call_on_irqstack(func, asm_call, constr);               \
+               __this_cpu_write(hardirq_stack_inuse, false);           \
+       }                                                               \
 }
 
-static __always_inline void
-__run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
-                     struct irq_desc *desc)
-{
-       void *tos = __this_cpu_read(hardirq_stack_ptr);
-
-       __this_cpu_add(irq_count, 1);
-       asm_call_irq_on_stack(tos - 8, func, desc);
-       __this_cpu_sub(irq_count, 1);
+/*
+ * Function call sequence for __call_on_irqstack() for system vectors.
+ *
+ * Note that irq_enter_rcu() and irq_exit_rcu() do not use the input
+ * mechanism because these functions are global and cannot be optimized out
+ * when compiling a particular source file which uses one of these macros.
+ *
+ * The argument (regs) does not need to be pushed or stashed in a callee
+ * saved register to be safe vs. the irq_enter_rcu() call because the
+ * clobbers already prevent the compiler from storing it in a callee
+ * clobbered register. As the compiler has to preserve @regs for the final
+ * call to idtentry_exit() anyway, it's likely that it does not cause extra
+ * effort for this asm magic.
+ */
+#define ASM_CALL_SYSVEC                                                        \
+       "call irq_enter_rcu                             \n"             \
+       "movq   %[arg1], %%rdi                          \n"             \
+       "call %P[__func]                                \n"             \
+       "call irq_exit_rcu                              \n"
+
+#define SYSVEC_CONSTRAINTS     , [arg1] "r" (regs)
+
+#define run_sysvec_on_irqstack_cond(func, regs)                                \
+{                                                                      \
+       assert_function_type(func, void (*)(struct pt_regs *));         \
+       assert_arg_type(regs, struct pt_regs *);                        \
+                                                                       \
+       call_on_irqstack_cond(func, regs, ASM_CALL_SYSVEC,              \
+                             SYSVEC_CONSTRAINTS, regs);                \
 }
 
-#else /* CONFIG_X86_64 */
-static inline bool irqstack_active(void) { return false; }
-static inline void __run_on_irqstack(void (*func)(void)) { }
-static inline void __run_sysvec_on_irqstack(void (*func)(struct pt_regs *regs),
-                                           struct pt_regs *regs) { }
-static inline void __run_irq_on_irqstack(void (*func)(struct irq_desc *desc),
-                                        struct irq_desc *desc) { }
-#endif /* !CONFIG_X86_64 */
-
-static __always_inline bool irq_needs_irq_stack(struct pt_regs *regs)
-{
-       if (IS_ENABLED(CONFIG_X86_32))
-               return false;
-       if (!regs)
-               return !irqstack_active();
-       return !user_mode(regs) && !irqstack_active();
+/*
+ * As in ASM_CALL_SYSVEC above the clobbers force the compiler to store
+ * @regs and @vector in callee saved registers.
+ */
+#define ASM_CALL_IRQ                                                   \
+       "call irq_enter_rcu                             \n"             \
+       "movq   %[arg1], %%rdi                          \n"             \
+       "movl   %[arg2], %%esi                          \n"             \
+       "call %P[__func]                                \n"             \
+       "call irq_exit_rcu                              \n"
+
+#define IRQ_CONSTRAINTS        , [arg1] "r" (regs), [arg2] "r" (vector)
+
+#define run_irq_on_irqstack_cond(func, regs, vector)                   \
+{                                                                      \
+       assert_function_type(func, void (*)(struct pt_regs *, u32));    \
+       assert_arg_type(regs, struct pt_regs *);                        \
+       assert_arg_type(vector, u32);                                   \
+                                                                       \
+       call_on_irqstack_cond(func, regs, ASM_CALL_IRQ,                 \
+                             IRQ_CONSTRAINTS, regs, vector);           \
 }
 
-
-static __always_inline void run_on_irqstack_cond(void (*func)(void),
-                                                struct pt_regs *regs)
-{
-       lockdep_assert_irqs_disabled();
-
-       if (irq_needs_irq_stack(regs))
-               __run_on_irqstack(func);
-       else
-               func();
+#define ASM_CALL_SOFTIRQ                                               \
+       "call %P[__func]                                \n"
+
+/*
+ * Macro to invoke __do_softirq on the irq stack. This is only called from
+ * task context when bottom halfs are about to be reenabled and soft
+ * interrupts are pending to be processed. The interrupt stack cannot be in
+ * use here.
+ */
+#define do_softirq_own_stack()                                         \
+{                                                                      \
+       __this_cpu_write(hardirq_stack_inuse, true);                    \
+       call_on_irqstack(__do_softirq, ASM_CALL_SOFTIRQ);               \
+       __this_cpu_write(hardirq_stack_inuse, false);                   \
 }
 
-static __always_inline void
-run_sysvec_on_irqstack_cond(void (*func)(struct pt_regs *regs),
-                           struct pt_regs *regs)
-{
-       lockdep_assert_irqs_disabled();
-
-       if (irq_needs_irq_stack(regs))
-               __run_sysvec_on_irqstack(func, regs);
-       else
-               func(regs);
+#else /* CONFIG_X86_64 */
+/* System vector handlers always run on the stack they interrupted. */
+#define run_sysvec_on_irqstack_cond(func, regs)                                \
+{                                                                      \
+       irq_enter_rcu();                                                \
+       func(regs);                                                     \
+       irq_exit_rcu();                                                 \
 }
 
-static __always_inline void
-run_irq_on_irqstack_cond(void (*func)(struct irq_desc *desc), struct irq_desc *desc,
-                        struct pt_regs *regs)
-{
-       lockdep_assert_irqs_disabled();
-
-       if (irq_needs_irq_stack(regs))
-               __run_irq_on_irqstack(func, desc);
-       else
-               func(desc);
+/* Switches to the irq stack within func() */
+#define run_irq_on_irqstack_cond(func, regs, vector)                   \
+{                                                                      \
+       irq_enter_rcu();                                                \
+       func(regs, vector);                                             \
+       irq_exit_rcu();                                                 \
 }
 
+#endif /* !CONFIG_X86_64 */
+
 #endif
diff --git a/arch/x86/include/asm/kfence.h b/arch/x86/include/asm/kfence.h
new file mode 100644 (file)
index 0000000..97bbb4a
--- /dev/null
@@ -0,0 +1,64 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * x86 KFENCE support.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef _ASM_X86_KFENCE_H
+#define _ASM_X86_KFENCE_H
+
+#include <linux/bug.h>
+#include <linux/kfence.h>
+
+#include <asm/pgalloc.h>
+#include <asm/pgtable.h>
+#include <asm/set_memory.h>
+#include <asm/tlbflush.h>
+
+/* Force 4K pages for __kfence_pool. */
+static inline bool arch_kfence_init_pool(void)
+{
+       unsigned long addr;
+
+       for (addr = (unsigned long)__kfence_pool; is_kfence_address((void *)addr);
+            addr += PAGE_SIZE) {
+               unsigned int level;
+
+               if (!lookup_address(addr, &level))
+                       return false;
+
+               if (level != PG_LEVEL_4K)
+                       set_memory_4k(addr, 1);
+       }
+
+       return true;
+}
+
+/* Protect the given page and flush TLB. */
+static inline bool kfence_protect_page(unsigned long addr, bool protect)
+{
+       unsigned int level;
+       pte_t *pte = lookup_address(addr, &level);
+
+       if (WARN_ON(!pte || level != PG_LEVEL_4K))
+               return false;
+
+       /*
+        * We need to avoid IPIs, as we may get KFENCE allocations or faults
+        * with interrupts disabled. Therefore, the below is best-effort, and
+        * does not flush TLBs on all CPUs. We can tolerate some inaccuracy;
+        * lazy fault handling takes care of faults after the page is PRESENT.
+        */
+
+       if (protect)
+               set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_PRESENT));
+       else
+               set_pte(pte, __pte(pte_val(*pte) | _PAGE_PRESENT));
+
+       /* Flush this CPU's TLB. */
+       flush_tlb_one_kernel(addr);
+       return true;
+}
+
+#endif /* _ASM_X86_KFENCE_H */
index 355a2ab..3236410 100644 (file)
@@ -93,11 +93,7 @@ KVM_X86_OP(check_intercept)
 KVM_X86_OP(handle_exit_irqoff)
 KVM_X86_OP_NULL(request_immediate_exit)
 KVM_X86_OP(sched_in)
-KVM_X86_OP_NULL(slot_enable_log_dirty)
-KVM_X86_OP_NULL(slot_disable_log_dirty)
-KVM_X86_OP_NULL(flush_log_dirty)
-KVM_X86_OP_NULL(enable_log_dirty_pt_masked)
-KVM_X86_OP_NULL(cpu_dirty_log_size)
+KVM_X86_OP_NULL(update_cpu_dirty_logging)
 KVM_X86_OP_NULL(pre_block)
 KVM_X86_OP_NULL(post_block)
 KVM_X86_OP_NULL(vcpu_blocking)
index 84499aa..3768819 100644 (file)
@@ -89,6 +89,8 @@
        KVM_ARCH_REQ_FLAGS(27, KVM_REQUEST_NO_WAKEUP)
 #define KVM_REQ_APF_READY              KVM_ARCH_REQ(28)
 #define KVM_REQ_MSR_FILTER_CHANGED     KVM_ARCH_REQ(29)
+#define KVM_REQ_UPDATE_CPU_DIRTY_LOGGING \
+       KVM_ARCH_REQ_FLAGS(30, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP)
 
 #define CR0_RESERVED_BITS                                               \
        (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \
@@ -533,10 +535,16 @@ struct kvm_vcpu_hv {
 /* Xen HVM per vcpu emulation context */
 struct kvm_vcpu_xen {
        u64 hypercall_rip;
+       u32 current_runstate;
        bool vcpu_info_set;
        bool vcpu_time_info_set;
+       bool runstate_set;
        struct gfn_to_hva_cache vcpu_info_cache;
        struct gfn_to_hva_cache vcpu_time_info_cache;
+       struct gfn_to_hva_cache runstate_cache;
+       u64 last_steal;
+       u64 runstate_entry_time;
+       u64 runstate_times[4];
 };
 
 struct kvm_vcpu_arch {
@@ -876,12 +884,29 @@ struct kvm_hv_syndbg {
        u64 options;
 };
 
+/* Current state of Hyper-V TSC page clocksource */
+enum hv_tsc_page_status {
+       /* TSC page was not set up or disabled */
+       HV_TSC_PAGE_UNSET = 0,
+       /* TSC page MSR was written by the guest, update pending */
+       HV_TSC_PAGE_GUEST_CHANGED,
+       /* TSC page MSR was written by KVM userspace, update pending */
+       HV_TSC_PAGE_HOST_CHANGED,
+       /* TSC page was properly set up and is currently active  */
+       HV_TSC_PAGE_SET,
+       /* TSC page is currently being updated and therefore is inactive */
+       HV_TSC_PAGE_UPDATING,
+       /* TSC page was set up with an inaccessible GPA */
+       HV_TSC_PAGE_BROKEN,
+};
+
 /* Hyper-V emulation context */
 struct kvm_hv {
        struct mutex hv_lock;
        u64 hv_guest_os_id;
        u64 hv_hypercall;
        u64 hv_tsc_page;
+       enum hv_tsc_page_status hv_tsc_page_status;
 
        /* Hyper-v based guest crash (NT kernel bugcheck) parameters */
        u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
@@ -923,6 +948,12 @@ enum kvm_irqchip_mode {
        KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
 };
 
+struct kvm_x86_msr_filter {
+       u8 count;
+       bool default_allow:1;
+       struct msr_bitmap_range ranges[16];
+};
+
 #define APICV_INHIBIT_REASON_DISABLE    0
 #define APICV_INHIBIT_REASON_HYPERV     1
 #define APICV_INHIBIT_REASON_NESTED     2
@@ -937,9 +968,6 @@ struct kvm_arch {
        unsigned int indirect_shadow_pages;
        u8 mmu_valid_gen;
        struct hlist_head mmu_page_hash[KVM_NUM_MMU_PAGES];
-       /*
-        * Hash table of struct kvm_mmu_page.
-        */
        struct list_head active_mmu_pages;
        struct list_head zapped_obsolete_pages;
        struct list_head lpage_disallowed_mmu_pages;
@@ -958,7 +986,7 @@ struct kvm_arch {
        struct kvm_pit *vpit;
        atomic_t vapics_in_nmi_mode;
        struct mutex apic_map_lock;
-       struct kvm_apic_map *apic_map;
+       struct kvm_apic_map __rcu *apic_map;
        atomic_t apic_map_dirty;
 
        bool apic_access_page_done;
@@ -1007,6 +1035,7 @@ struct kvm_arch {
        u32 bsp_vcpu_id;
 
        u64 disabled_quirks;
+       int cpu_dirty_logging_count;
 
        enum kvm_irqchip_mode irqchip_mode;
        u8 nr_reserved_ioapic_pins;
@@ -1019,18 +1048,13 @@ struct kvm_arch {
        bool guest_can_read_msr_platform_info;
        bool exception_payload_enabled;
 
+       bool bus_lock_detection_enabled;
+
        /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
        u32 user_space_msr_mask;
+       struct kvm_x86_msr_filter __rcu *msr_filter;
 
-       struct {
-               u8 count;
-               bool default_allow:1;
-               struct msr_bitmap_range ranges[16];
-       } msr_filter;
-
-       bool bus_lock_detection_enabled;
-
-       struct kvm_pmu_event_filter *pmu_event_filter;
+       struct kvm_pmu_event_filter __rcu *pmu_event_filter;
        struct task_struct *nx_lpage_recovery_thread;
 
 #ifdef CONFIG_X86_64
@@ -1271,30 +1295,11 @@ struct kvm_x86_ops {
        void (*sched_in)(struct kvm_vcpu *kvm, int cpu);
 
        /*
-        * Arch-specific dirty logging hooks. These hooks are only supposed to
-        * be valid if the specific arch has hardware-accelerated dirty logging
-        * mechanism. Currently only for PML on VMX.
-        *
-        *  - slot_enable_log_dirty:
-        *      called when enabling log dirty mode for the slot.
-        *  - slot_disable_log_dirty:
-        *      called when disabling log dirty mode for the slot.
-        *      also called when slot is created with log dirty disabled.
-        *  - flush_log_dirty:
-        *      called before reporting dirty_bitmap to userspace.
-        *  - enable_log_dirty_pt_masked:
-        *      called when reenabling log dirty for the GFNs in the mask after
-        *      corresponding bits are cleared in slot->dirty_bitmap.
+        * Size of the CPU's dirty log buffer, i.e. VMX's PML buffer.  A zero
+        * value indicates CPU dirty logging is unsupported or disabled.
         */
-       void (*slot_enable_log_dirty)(struct kvm *kvm,
-                                     struct kvm_memory_slot *slot);
-       void (*slot_disable_log_dirty)(struct kvm *kvm,
-                                      struct kvm_memory_slot *slot);
-       void (*flush_log_dirty)(struct kvm *kvm);
-       void (*enable_log_dirty_pt_masked)(struct kvm *kvm,
-                                          struct kvm_memory_slot *slot,
-                                          gfn_t offset, unsigned long mask);
-       int (*cpu_dirty_log_size)(void);
+       int cpu_dirty_log_size;
+       void (*update_cpu_dirty_logging)(struct kvm_vcpu *vcpu);
 
        /* pmu operations of sub-arch */
        const struct kvm_pmu_ops *pmu_ops;
@@ -1437,11 +1442,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                   struct kvm_memory_slot *memslot);
 void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
                                        struct kvm_memory_slot *memslot);
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
-                           struct kvm_memory_slot *memslot);
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
-                                  struct kvm_memory_slot *slot,
-                                  gfn_t gfn_offset, unsigned long mask);
 void kvm_mmu_zap_all(struct kvm *kvm);
 void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
 unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
@@ -1613,7 +1613,6 @@ void kvm_inject_nmi(struct kvm_vcpu *vcpu);
 void kvm_update_dr7(struct kvm_vcpu *vcpu);
 
 int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva);
 void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
 int kvm_mmu_load(struct kvm_vcpu *vcpu);
 void kvm_mmu_unload(struct kvm_vcpu *vcpu);
index fdbffec..5a2baf2 100644 (file)
@@ -40,6 +40,8 @@
 #define ORC_REG_MAX                    15
 
 #ifndef __ASSEMBLY__
+#include <asm/byteorder.h>
+
 /*
  * This struct is more or less a vastly simplified version of the DWARF Call
  * Frame Information standard.  It contains only the necessary parts of DWARF
 struct orc_entry {
        s16             sp_offset;
        s16             bp_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
        unsigned        sp_reg:4;
        unsigned        bp_reg:4;
        unsigned        type:2;
        unsigned        end:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       unsigned        bp_reg:4;
+       unsigned        sp_reg:4;
+       unsigned        unused:5;
+       unsigned        end:1;
+       unsigned        type:2;
+#endif
 } __packed;
 
 #endif /* __ASSEMBLY__ */
index 16b9f22..40f9227 100644 (file)
@@ -10,8 +10,6 @@
 #ifndef _PLATFORM_SST_AUDIO_H_
 #define _PLATFORM_SST_AUDIO_H_
 
-#include <linux/sfi.h>
-
 #define MAX_NUM_STREAMS_MRFLD  25
 #define MAX_NUM_STREAMS        MAX_NUM_STREAMS_MRFLD
 
index c20a52b..f1b9ed5 100644 (file)
@@ -426,8 +426,6 @@ struct irq_stack {
        char            stack[IRQ_STACK_SIZE];
 } __aligned(IRQ_STACK_SIZE);
 
-DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-
 #ifdef CONFIG_X86_32
 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack);
 #else
@@ -454,7 +452,8 @@ static inline unsigned long cpu_kernelmode_gs_base(int cpu)
        return (unsigned long)per_cpu(fixed_percpu_data.gs_base, cpu);
 }
 
-DECLARE_PER_CPU(unsigned int, irq_count);
+DECLARE_PER_CPU(void *, hardirq_stack_ptr);
+DECLARE_PER_CPU(bool, hardirq_stack_inuse);
 extern asmlinkage void ignore_sysret(void);
 
 /* Save actual FS/GS selectors and bases to current->thread */
@@ -473,9 +472,9 @@ struct stack_canary {
 };
 DECLARE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
 #endif
-/* Per CPU softirq stack pointer */
+DECLARE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
 DECLARE_PER_CPU(struct irq_stack *, softirq_stack_ptr);
-#endif /* X86_64 */
+#endif /* !X86_64 */
 
 extern unsigned int fpu_kernel_xstate_size;
 extern unsigned int fpu_user_xstate_size;
@@ -552,15 +551,6 @@ static inline void arch_thread_struct_whitelist(unsigned long *offset,
        *size = fpu_kernel_xstate_size;
 }
 
-/*
- * Thread-synchronous status.
- *
- * This is different from the flags in that nobody else
- * ever touches our thread-synchronous status, so we don't
- * have to worry about atomic accesses.
- */
-#define TS_COMPAT              0x0002  /* 32bit syscall active (64BIT)*/
-
 static inline void
 native_load_sp0(unsigned long sp0)
 {
index 2c35f1c..b6a9d51 100644 (file)
@@ -25,6 +25,7 @@ void __end_SYSENTER_singlestep_region(void);
 void entry_SYSENTER_compat(void);
 void __end_entry_SYSENTER_compat(void);
 void entry_SYSCALL_compat(void);
+void entry_SYSCALL_compat_safe_stack(void);
 void entry_INT80_compat(void);
 #ifdef CONFIG_XEN_PV
 void xen_entry_INT80_compat(void);
index d8324a2..409f661 100644 (file)
@@ -94,6 +94,8 @@ struct pt_regs {
 #include <asm/paravirt_types.h>
 #endif
 
+#include <asm/proto.h>
+
 struct cpuinfo_x86;
 struct task_struct;
 
@@ -175,6 +177,19 @@ static inline bool any_64bit_mode(struct pt_regs *regs)
 #ifdef CONFIG_X86_64
 #define current_user_stack_pointer()   current_pt_regs()->sp
 #define compat_user_stack_pointer()    current_pt_regs()->sp
+
+static inline bool ip_within_syscall_gap(struct pt_regs *regs)
+{
+       bool ret = (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
+                   regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack);
+
+#ifdef CONFIG_IA32_EMULATION
+       ret = ret || (regs->ip >= (unsigned long)entry_SYSCALL_compat &&
+                     regs->ip <  (unsigned long)entry_SYSCALL_compat_safe_stack);
+#endif
+
+       return ret;
+}
 #endif
 
 static inline unsigned long kernel_stack_pointer(struct pt_regs *regs)
index 8b58d69..0bc9b08 100644 (file)
@@ -58,9 +58,8 @@ static __always_inline unsigned long smap_save(void)
        unsigned long flags;
 
        asm volatile ("# smap_save\n\t"
-                     ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
-                     "pushf; pop %0; " __ASM_CLAC "\n\t"
-                     "1:"
+                     ALTERNATIVE("", "pushf; pop %0; " __ASM_CLAC "\n\t",
+                                 X86_FEATURE_SMAP)
                      : "=rm" (flags) : : "memory", "cc");
 
        return flags;
@@ -69,9 +68,8 @@ static __always_inline unsigned long smap_save(void)
 static __always_inline void smap_restore(unsigned long flags)
 {
        asm volatile ("# smap_restore\n\t"
-                     ALTERNATIVE("jmp 1f", "", X86_FEATURE_SMAP)
-                     "push %0; popf\n\t"
-                     "1:"
+                     ALTERNATIVE("", "push %0; popf\n\t",
+                                 X86_FEATURE_SMAP)
                      : : "g" (flags) : "memory", "cc");
 }
 
index c0538f8..630ff08 100644 (file)
@@ -132,6 +132,7 @@ void native_play_dead(void);
 void play_dead_common(void);
 void wbinvd_on_cpu(int cpu);
 int wbinvd_on_all_cpus(void);
+void cond_wakeup_cpu0(void);
 
 void native_smp_send_reschedule(int cpu);
 void native_send_call_func_ipi(const struct cpumask *mask);
diff --git a/arch/x86/include/asm/softirq_stack.h b/arch/x86/include/asm/softirq_stack.h
new file mode 100644 (file)
index 0000000..889d53d
--- /dev/null
@@ -0,0 +1,11 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_X86_SOFTIRQ_STACK_H
+#define _ASM_X86_SOFTIRQ_STACK_H
+
+#ifdef CONFIG_X86_64
+# include <asm/irq_stack.h>
+#else
+# include <asm-generic/softirq_stack.h>
+#endif
+
+#endif
index 0d751d5..06b740b 100644 (file)
@@ -205,10 +205,23 @@ static inline int arch_within_stack_frames(const void * const stack,
 
 #endif
 
+/*
+ * Thread-synchronous status.
+ *
+ * This is different from the flags in that nobody else
+ * ever touches our thread-synchronous status, so we don't
+ * have to worry about atomic accesses.
+ */
+#define TS_COMPAT              0x0002  /* 32bit syscall active (64BIT)*/
+
+#ifndef __ASSEMBLY__
 #ifdef CONFIG_COMPAT
 #define TS_I386_REGS_POKED     0x0004  /* regs poked by 32-bit ptracer */
+
+#define arch_set_restart_data(restart) \
+       do { restart->arch_data = current_thread_info()->status; } while (0)
+
 #endif
-#ifndef __ASSEMBLY__
 
 #ifdef CONFIG_X86_32
 #define in_ia32_syscall() true
index 664d461..8e574c0 100644 (file)
        UNWIND_HINT_REGS base=\base offset=\offset partial=1
 .endm
 
-.macro UNWIND_HINT_FUNC sp_offset=8
-       UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=\sp_offset type=UNWIND_HINT_TYPE_CALL
-.endm
-
-/*
- * RET_OFFSET: Used on instructions that terminate a function; mostly RETURN
- * and sibling calls. On these, sp_offset denotes the expected offset from
- * initial_func_cfi.
- */
-.macro UNWIND_HINT_RET_OFFSET sp_offset=8
-       UNWIND_HINT sp_reg=ORC_REG_SP type=UNWIND_HINT_TYPE_RET_OFFSET sp_offset=\sp_offset
+.macro UNWIND_HINT_FUNC
+       UNWIND_HINT sp_reg=ORC_REG_SP sp_offset=8 type=UNWIND_HINT_TYPE_FUNC
 .endm
 
 #endif /* __ASSEMBLY__ */
index 5eeb808..2ddf083 100644 (file)
@@ -116,7 +116,6 @@ obj-$(CONFIG_VM86)          += vm86_32.o
 obj-$(CONFIG_EARLY_PRINTK)     += early_printk.o
 
 obj-$(CONFIG_HPET_TIMER)       += hpet.o
-obj-$(CONFIG_APB_TIMER)                += apb_timer.o
 
 obj-$(CONFIG_AMD_NB)           += amd_nb.o
 obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o
index f1bb57b..cf340d8 100644 (file)
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-OBJECT_FILES_NON_STANDARD_wakeup_$(BITS).o := y
 
 obj-$(CONFIG_ACPI)             += boot.o
 obj-$(CONFIG_ACPI_SLEEP)       += sleep.o wakeup_$(BITS).o
index 7bdc023..14cd318 100644 (file)
@@ -1554,10 +1554,18 @@ void __init acpi_boot_table_init(void)
        /*
         * Initialize the ACPI boot-time table parser.
         */
-       if (acpi_table_init()) {
+       if (acpi_locate_initial_tables())
                disable_acpi();
-               return;
-       }
+       else
+               acpi_reserve_initial_tables();
+}
+
+int __init early_acpi_boot_init(void)
+{
+       if (acpi_disabled)
+               return 1;
+
+       acpi_table_init_complete();
 
        acpi_table_parse(ACPI_SIG_BOOT, acpi_parse_sbf);
 
@@ -1570,18 +1578,9 @@ void __init acpi_boot_table_init(void)
                } else {
                        printk(KERN_WARNING PREFIX "Disabling ACPI support\n");
                        disable_acpi();
-                       return;
+                       return 1;
                }
        }
-}
-
-int __init early_acpi_boot_init(void)
-{
-       /*
-        * If acpi_disabled, bail out
-        */
-       if (acpi_disabled)
-               return 1;
 
        /*
         * Process the Multiple APIC Description Table (MADT), if present
index 5d3a0b8..56b6865 100644 (file)
@@ -1,12 +1,14 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 .text
 #include <linux/linkage.h>
+#include <linux/objtool.h>
 #include <asm/segment.h>
 #include <asm/pgtable_types.h>
 #include <asm/page_types.h>
 #include <asm/msr.h>
 #include <asm/asm-offsets.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
 
 # Copyright 2003 Pavel Machek <pavel@suse.cz
 
@@ -39,6 +41,7 @@ SYM_FUNC_START(wakeup_long64)
        movq    saved_rbp, %rbp
 
        movq    saved_rip, %rax
+       ANNOTATE_RETPOLINE_SAFE
        jmp     *%rax
 SYM_FUNC_END(wakeup_long64)
 
@@ -126,6 +129,7 @@ SYM_FUNC_START(do_suspend_lowlevel)
        FRAME_END
        jmp     restore_processor_state
 SYM_FUNC_END(do_suspend_lowlevel)
+STACK_FRAME_NON_STANDARD do_suspend_lowlevel
 
 .data
 saved_rbp:             .quad   0
diff --git a/arch/x86/kernel/apb_timer.c b/arch/x86/kernel/apb_timer.c
deleted file mode 100644 (file)
index 263eead..0000000
+++ /dev/null
@@ -1,347 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * apb_timer.c: Driver for Langwell APB timers
- *
- * (C) Copyright 2009 Intel Corporation
- * Author: Jacob Pan (jacob.jun.pan@intel.com)
- *
- * Note:
- * Langwell is the south complex of Intel Moorestown MID platform. There are
- * eight external timers in total that can be used by the operating system.
- * The timer information, such as frequency and addresses, is provided to the
- * OS via SFI tables.
- * Timer interrupts are routed via FW/HW emulated IOAPIC independently via
- * individual redirection table entries (RTE).
- * Unlike HPET, there is no master counter, therefore one of the timers are
- * used as clocksource. The overall allocation looks like:
- *  - timer 0 - NR_CPUs for per cpu timer
- *  - one timer for clocksource
- *  - one timer for watchdog driver.
- * It is also worth notice that APB timer does not support true one-shot mode,
- * free-running mode will be used here to emulate one-shot mode.
- * APB timer can also be used as broadcast timer along with per cpu local APIC
- * timer, but by default APB timer has higher rating than local APIC timers.
- */
-
-#include <linux/delay.h>
-#include <linux/dw_apb_timer.h>
-#include <linux/errno.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/pm.h>
-#include <linux/sfi.h>
-#include <linux/interrupt.h>
-#include <linux/cpu.h>
-#include <linux/irq.h>
-
-#include <asm/fixmap.h>
-#include <asm/apb_timer.h>
-#include <asm/intel-mid.h>
-#include <asm/time.h>
-
-#define APBT_CLOCKEVENT_RATING         110
-#define APBT_CLOCKSOURCE_RATING                250
-
-#define APBT_CLOCKEVENT0_NUM   (0)
-#define APBT_CLOCKSOURCE_NUM   (2)
-
-static phys_addr_t apbt_address;
-static int apb_timer_block_enabled;
-static void __iomem *apbt_virt_address;
-
-/*
- * Common DW APB timer info
- */
-static unsigned long apbt_freq;
-
-struct apbt_dev {
-       struct dw_apb_clock_event_device        *timer;
-       unsigned int                            num;
-       int                                     cpu;
-       unsigned int                            irq;
-       char                                    name[10];
-};
-
-static struct dw_apb_clocksource *clocksource_apbt;
-
-static inline void __iomem *adev_virt_addr(struct apbt_dev *adev)
-{
-       return apbt_virt_address + adev->num * APBTMRS_REG_SIZE;
-}
-
-static DEFINE_PER_CPU(struct apbt_dev, cpu_apbt_dev);
-
-#ifdef CONFIG_SMP
-static unsigned int apbt_num_timers_used;
-#endif
-
-static inline void apbt_set_mapping(void)
-{
-       struct sfi_timer_table_entry *mtmr;
-       int phy_cs_timer_id = 0;
-
-       if (apbt_virt_address) {
-               pr_debug("APBT base already mapped\n");
-               return;
-       }
-       mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
-       if (mtmr == NULL) {
-               printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
-                      APBT_CLOCKEVENT0_NUM);
-               return;
-       }
-       apbt_address = (phys_addr_t)mtmr->phys_addr;
-       if (!apbt_address) {
-               printk(KERN_WARNING "No timer base from SFI, use default\n");
-               apbt_address = APBT_DEFAULT_BASE;
-       }
-       apbt_virt_address = ioremap(apbt_address, APBT_MMAP_SIZE);
-       if (!apbt_virt_address) {
-               pr_debug("Failed mapping APBT phy address at %lu\n",\
-                        (unsigned long)apbt_address);
-               goto panic_noapbt;
-       }
-       apbt_freq = mtmr->freq_hz;
-       sfi_free_mtmr(mtmr);
-
-       /* Now figure out the physical timer id for clocksource device */
-       mtmr = sfi_get_mtmr(APBT_CLOCKSOURCE_NUM);
-       if (mtmr == NULL)
-               goto panic_noapbt;
-
-       /* Now figure out the physical timer id */
-       pr_debug("Use timer %d for clocksource\n",
-                (int)(mtmr->phys_addr & 0xff) / APBTMRS_REG_SIZE);
-       phy_cs_timer_id = (unsigned int)(mtmr->phys_addr & 0xff) /
-               APBTMRS_REG_SIZE;
-
-       clocksource_apbt = dw_apb_clocksource_init(APBT_CLOCKSOURCE_RATING,
-               "apbt0", apbt_virt_address + phy_cs_timer_id *
-               APBTMRS_REG_SIZE, apbt_freq);
-       return;
-
-panic_noapbt:
-       panic("Failed to setup APB system timer\n");
-
-}
-
-static inline void apbt_clear_mapping(void)
-{
-       iounmap(apbt_virt_address);
-       apbt_virt_address = NULL;
-}
-
-static int __init apbt_clockevent_register(void)
-{
-       struct sfi_timer_table_entry *mtmr;
-       struct apbt_dev *adev = this_cpu_ptr(&cpu_apbt_dev);
-
-       mtmr = sfi_get_mtmr(APBT_CLOCKEVENT0_NUM);
-       if (mtmr == NULL) {
-               printk(KERN_ERR "Failed to get MTMR %d from SFI\n",
-                      APBT_CLOCKEVENT0_NUM);
-               return -ENODEV;
-       }
-
-       adev->num = smp_processor_id();
-       adev->timer = dw_apb_clockevent_init(smp_processor_id(), "apbt0",
-               intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ?
-               APBT_CLOCKEVENT_RATING - 100 : APBT_CLOCKEVENT_RATING,
-               adev_virt_addr(adev), 0, apbt_freq);
-       /* Firmware does EOI handling for us. */
-       adev->timer->eoi = NULL;
-
-       if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
-               global_clock_event = &adev->timer->ced;
-               printk(KERN_DEBUG "%s clockevent registered as global\n",
-                      global_clock_event->name);
-       }
-
-       dw_apb_clockevent_register(adev->timer);
-
-       sfi_free_mtmr(mtmr);
-       return 0;
-}
-
-#ifdef CONFIG_SMP
-
-static void apbt_setup_irq(struct apbt_dev *adev)
-{
-       irq_modify_status(adev->irq, 0, IRQ_MOVE_PCNTXT);
-       irq_set_affinity(adev->irq, cpumask_of(adev->cpu));
-}
-
-/* Should be called with per cpu */
-void apbt_setup_secondary_clock(void)
-{
-       struct apbt_dev *adev;
-       int cpu;
-
-       /* Don't register boot CPU clockevent */
-       cpu = smp_processor_id();
-       if (!cpu)
-               return;
-
-       adev = this_cpu_ptr(&cpu_apbt_dev);
-       if (!adev->timer) {
-               adev->timer = dw_apb_clockevent_init(cpu, adev->name,
-                       APBT_CLOCKEVENT_RATING, adev_virt_addr(adev),
-                       adev->irq, apbt_freq);
-               adev->timer->eoi = NULL;
-       } else {
-               dw_apb_clockevent_resume(adev->timer);
-       }
-
-       printk(KERN_INFO "Registering CPU %d clockevent device %s, cpu %08x\n",
-              cpu, adev->name, adev->cpu);
-
-       apbt_setup_irq(adev);
-       dw_apb_clockevent_register(adev->timer);
-
-       return;
-}
-
-/*
- * this notify handler process CPU hotplug events. in case of S0i3, nonboot
- * cpus are disabled/enabled frequently, for performance reasons, we keep the
- * per cpu timer irq registered so that we do need to do free_irq/request_irq.
- *
- * TODO: it might be more reliable to directly disable percpu clockevent device
- * without the notifier chain. currently, cpu 0 may get interrupts from other
- * cpu timers during the offline process due to the ordering of notification.
- * the extra interrupt is harmless.
- */
-static int apbt_cpu_dead(unsigned int cpu)
-{
-       struct apbt_dev *adev = &per_cpu(cpu_apbt_dev, cpu);
-
-       dw_apb_clockevent_pause(adev->timer);
-       if (system_state == SYSTEM_RUNNING) {
-               pr_debug("skipping APBT CPU %u offline\n", cpu);
-       } else {
-               pr_debug("APBT clockevent for cpu %u offline\n", cpu);
-               dw_apb_clockevent_stop(adev->timer);
-       }
-       return 0;
-}
-
-static __init int apbt_late_init(void)
-{
-       if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT ||
-               !apb_timer_block_enabled)
-               return 0;
-       return cpuhp_setup_state(CPUHP_X86_APB_DEAD, "x86/apb:dead", NULL,
-                                apbt_cpu_dead);
-}
-fs_initcall(apbt_late_init);
-#else
-
-void apbt_setup_secondary_clock(void) {}
-
-#endif /* CONFIG_SMP */
-
-static int apbt_clocksource_register(void)
-{
-       u64 start, now;
-       u64 t1;
-
-       /* Start the counter, use timer 2 as source, timer 0/1 for event */
-       dw_apb_clocksource_start(clocksource_apbt);
-
-       /* Verify whether apbt counter works */
-       t1 = dw_apb_clocksource_read(clocksource_apbt);
-       start = rdtsc();
-
-       /*
-        * We don't know the TSC frequency yet, but waiting for
-        * 200000 TSC cycles is safe:
-        * 4 GHz == 50us
-        * 1 GHz == 200us
-        */
-       do {
-               rep_nop();
-               now = rdtsc();
-       } while ((now - start) < 200000UL);
-
-       /* APBT is the only always on clocksource, it has to work! */
-       if (t1 == dw_apb_clocksource_read(clocksource_apbt))
-               panic("APBT counter not counting. APBT disabled\n");
-
-       dw_apb_clocksource_register(clocksource_apbt);
-
-       return 0;
-}
-
-/*
- * Early setup the APBT timer, only use timer 0 for booting then switch to
- * per CPU timer if possible.
- * returns 1 if per cpu apbt is setup
- * returns 0 if no per cpu apbt is chosen
- * panic if set up failed, this is the only platform timer on Moorestown.
- */
-void __init apbt_time_init(void)
-{
-#ifdef CONFIG_SMP
-       int i;
-       struct sfi_timer_table_entry *p_mtmr;
-       struct apbt_dev *adev;
-#endif
-
-       if (apb_timer_block_enabled)
-               return;
-       apbt_set_mapping();
-       if (!apbt_virt_address)
-               goto out_noapbt;
-       /*
-        * Read the frequency and check for a sane value, for ESL model
-        * we extend the possible clock range to allow time scaling.
-        */
-
-       if (apbt_freq < APBT_MIN_FREQ || apbt_freq > APBT_MAX_FREQ) {
-               pr_debug("APBT has invalid freq 0x%lx\n", apbt_freq);
-               goto out_noapbt;
-       }
-       if (apbt_clocksource_register()) {
-               pr_debug("APBT has failed to register clocksource\n");
-               goto out_noapbt;
-       }
-       if (!apbt_clockevent_register())
-               apb_timer_block_enabled = 1;
-       else {
-               pr_debug("APBT has failed to register clockevent\n");
-               goto out_noapbt;
-       }
-#ifdef CONFIG_SMP
-       /* kernel cmdline disable apb timer, so we will use lapic timers */
-       if (intel_mid_timer_options == INTEL_MID_TIMER_LAPIC_APBT) {
-               printk(KERN_INFO "apbt: disabled per cpu timer\n");
-               return;
-       }
-       pr_debug("%s: %d CPUs online\n", __func__, num_online_cpus());
-       if (num_possible_cpus() <= sfi_mtimer_num)
-               apbt_num_timers_used = num_possible_cpus();
-       else
-               apbt_num_timers_used = 1;
-       pr_debug("%s: %d APB timers used\n", __func__, apbt_num_timers_used);
-
-       /* here we set up per CPU timer data structure */
-       for (i = 0; i < apbt_num_timers_used; i++) {
-               adev = &per_cpu(cpu_apbt_dev, i);
-               adev->num = i;
-               adev->cpu = i;
-               p_mtmr = sfi_get_mtmr(i);
-               if (p_mtmr)
-                       adev->irq = p_mtmr->irq;
-               else
-                       printk(KERN_ERR "Failed to get timer for cpu %d\n", i);
-               snprintf(adev->name, sizeof(adev->name) - 1, "apbt%d", i);
-       }
-#endif
-
-       return;
-
-out_noapbt:
-       apbt_clear_mapping();
-       apb_timer_block_enabled = 0;
-       panic("failed to enable APB timer\n");
-}
index 819db00..4f26700 100644 (file)
@@ -2138,18 +2138,11 @@ void __init register_lapic_address(unsigned long address)
  * Local APIC interrupts
  */
 
-/**
- * spurious_interrupt - Catch all for interrupts raised on unused vectors
- * @regs:      Pointer to pt_regs on stack
- * @vector:    The vector number
- *
- * This is invoked from ASM entry code to catch all interrupts which
- * trigger on an entry which is routed to the common_spurious idtentry
- * point.
- *
- * Also called from sysvec_spurious_apic_interrupt().
+/*
+ * Common handling code for spurious_interrupt and spurious_vector entry
+ * points below. No point in allowing the compiler to inline it twice.
  */
-DEFINE_IDTENTRY_IRQ(spurious_interrupt)
+static noinline void handle_spurious_interrupt(u8 vector)
 {
        u32 v;
 
@@ -2184,9 +2177,23 @@ out:
        trace_spurious_apic_exit(vector);
 }
 
+/**
+ * spurious_interrupt - Catch all for interrupts raised on unused vectors
+ * @regs:      Pointer to pt_regs on stack
+ * @vector:    The vector number
+ *
+ * This is invoked from ASM entry code to catch all interrupts which
+ * trigger on an entry which is routed to the common_spurious idtentry
+ * point.
+ */
+DEFINE_IDTENTRY_IRQ(spurious_interrupt)
+{
+       handle_spurious_interrupt(vector);
+}
+
 DEFINE_IDTENTRY_SYSVEC(sysvec_spurious_apic_interrupt)
 {
-       __spurious_interrupt(regs, SPURIOUS_APIC_VECTOR);
+       handle_spurious_interrupt(SPURIOUS_APIC_VECTOR);
 }
 
 /*
@@ -2335,6 +2342,11 @@ static int cpuid_to_apicid[] = {
        [0 ... NR_CPUS - 1] = -1,
 };
 
+bool arch_match_cpu_phys_id(int cpu, u64 phys_id)
+{
+       return phys_id == cpuid_to_apicid[cpu];
+}
+
 #ifdef CONFIG_SMP
 /**
  * apic_id_is_primary_thread - Check whether APIC ID belongs to a primary thread
index e4ab480..73ff4dd 100644 (file)
@@ -198,7 +198,7 @@ static int __init parse_noapic(char *str)
 }
 early_param("noapic", parse_noapic);
 
-/* Will be called in mpparse/acpi/sfi codes for saving IRQ info */
+/* Will be called in mpparse/ACPI codes for saving IRQ info */
 void mp_save_irq(struct mpc_intsrc *m)
 {
        int i;
@@ -1032,6 +1032,16 @@ static int mp_map_pin_to_irq(u32 gsi, int idx, int ioapic, int pin,
        if (idx >= 0 && test_bit(mp_irqs[idx].srcbus, mp_bus_not_pci)) {
                irq = mp_irqs[idx].srcbusirq;
                legacy = mp_is_legacy_irq(irq);
+               /*
+                * IRQ2 is unusable for historical reasons on systems which
+                * have a legacy PIC. See the comment vs. IRQ2 further down.
+                *
+                * If this gets removed at some point then the related code
+                * in lapic_assign_system_vectors() needs to be adjusted as
+                * well.
+                */
+               if (legacy && irq == PIC_CASCADE_IR)
+                       return -EINVAL;
        }
 
        mutex_lock(&ioapic_mutex);
@@ -2863,7 +2873,7 @@ int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 
        /*
         * If mp_register_ioapic() is called during early boot stage when
-        * walking ACPI/SFI/DT tables, it's too early to create irqdomain,
+        * walking ACPI/DT tables, it's too early to create irqdomain,
         * we are still using bootmem allocator. So delay it to setup_IO_APIC().
         */
        if (hotplug) {
index 0b2c039..23f5f27 100644 (file)
@@ -10,6 +10,8 @@
  */
 
 #include <linux/interrupt.h>
+
+#include <asm/acrn.h>
 #include <asm/apic.h>
 #include <asm/cpufeatures.h>
 #include <asm/desc.h>
@@ -19,7 +21,7 @@
 
 static u32 __init acrn_detect(void)
 {
-       return hypervisor_cpuid_base("ACRNACRNACRN", 0);
+       return acrn_cpuid_base();
 }
 
 static void __init acrn_init_platform(void)
@@ -55,6 +57,18 @@ DEFINE_IDTENTRY_SYSVEC(sysvec_acrn_hv_callback)
        set_irq_regs(old_regs);
 }
 
+void acrn_setup_intr_handler(void (*handler)(void))
+{
+       acrn_intr_handler = handler;
+}
+EXPORT_SYMBOL_GPL(acrn_setup_intr_handler);
+
+void acrn_remove_intr_handler(void)
+{
+       acrn_intr_handler = NULL;
+}
+EXPORT_SYMBOL_GPL(acrn_remove_intr_handler);
+
 const __initconst struct hypervisor_x86 x86_hyper_acrn = {
        .name                   = "ACRN",
        .detect                 = acrn_detect,
index 9215b91..ab640ab 100644 (file)
@@ -1742,8 +1742,8 @@ DEFINE_PER_CPU(struct task_struct *, current_task) ____cacheline_aligned =
        &init_task;
 EXPORT_PER_CPU_SYMBOL(current_task);
 
-DEFINE_PER_CPU(struct irq_stack *, hardirq_stack_ptr);
-DEFINE_PER_CPU(unsigned int, irq_count) __visible = -1;
+DEFINE_PER_CPU(void *, hardirq_stack_ptr);
+DEFINE_PER_CPU(bool, hardirq_stack_inuse);
 
 DEFINE_PER_CPU(int, __preempt_count) = INIT_PREEMPT_COUNT;
 EXPORT_PER_CPU_SYMBOL(__preempt_count);
index 1dd8513..5601b95 100644 (file)
@@ -128,12 +128,21 @@ static __always_inline bool in_exception_stack(unsigned long *stack, struct stac
 
 static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info *info)
 {
-       unsigned long *end   = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
-       unsigned long *begin = end - (IRQ_STACK_SIZE / sizeof(long));
+       unsigned long *end = (unsigned long *)this_cpu_read(hardirq_stack_ptr);
+       unsigned long *begin;
 
        /*
-        * This is a software stack, so 'end' can be a valid stack pointer.
-        * It just means the stack is empty.
+        * @end points directly to the top most stack entry to avoid a -8
+        * adjustment in the stack switch hotpath. Adjust it back before
+        * calculating @begin.
+        */
+       end++;
+       begin = end - (IRQ_STACK_SIZE / sizeof(long));
+
+       /*
+        * Due to the switching logic RSP can never be == @end because the
+        * final operation is 'popq %rsp' which means after that RSP points
+        * to the original stack and not to @end.
         */
        if (stack < begin || stack >= end)
                return false;
@@ -143,8 +152,9 @@ static __always_inline bool in_irq_stack(unsigned long *stack, struct stack_info
        info->end       = end;
 
        /*
-        * The next stack pointer is the first thing pushed by the entry code
-        * after switching to the irq stack.
+        * The next stack pointer is stored at the top of the irq stack
+        * before switching to the irq stack. Actual stack entries are all
+        * below that.
         */
        info->next_sp = (unsigned long *)*(end - 1);
 
index 0d54099..7c27384 100644 (file)
@@ -184,6 +184,7 @@ SYM_INNER_LABEL(ftrace_graph_call, SYM_L_GLOBAL)
  * It is also used to copy the retq for trampolines.
  */
 SYM_INNER_LABEL_ALIGN(ftrace_stub, SYM_L_WEAK)
+       UNWIND_HINT_FUNC
        retq
 SYM_FUNC_END(ftrace_epilogue)
 
@@ -276,7 +277,7 @@ SYM_INNER_LABEL(ftrace_regs_caller_end, SYM_L_GLOBAL)
        restore_mcount_regs 8
        /* Restore flags */
        popfq
-       UNWIND_HINT_RET_OFFSET
+       UNWIND_HINT_FUNC
        jmp     ftrace_epilogue
 
 SYM_FUNC_END(ftrace_regs_caller)
@@ -333,8 +334,7 @@ SYM_FUNC_START(ftrace_graph_caller)
        retq
 SYM_FUNC_END(ftrace_graph_caller)
 
-SYM_CODE_START(return_to_handler)
-       UNWIND_HINT_EMPTY
+SYM_FUNC_START(return_to_handler)
        subq  $24, %rsp
 
        /* Save the return values */
@@ -349,5 +349,5 @@ SYM_CODE_START(return_to_handler)
        movq (%rsp), %rax
        addq $24, %rsp
        JMP_NOSPEC rdi
-SYM_CODE_END(return_to_handler)
+SYM_FUNC_END(return_to_handler)
 #endif
index d4ad344..58aa712 100644 (file)
@@ -228,7 +228,7 @@ static __always_inline void handle_irq(struct irq_desc *desc,
                                       struct pt_regs *regs)
 {
        if (IS_ENABLED(CONFIG_X86_64))
-               run_irq_on_irqstack_cond(desc->handle_irq, desc, regs);
+               generic_handle_irq_desc(desc);
        else
                __handle_irq(desc, regs);
 }
index 0b79efc..044902d 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <asm/apic.h>
 #include <asm/nospec-branch.h>
+#include <asm/softirq_stack.h>
 
 #ifdef CONFIG_DEBUG_STACKOVERFLOW
 
index 440eed5..1c0fb96 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/sched/task_stack.h>
 
 #include <asm/cpu_entry_area.h>
+#include <asm/softirq_stack.h>
 #include <asm/irq_stack.h>
 #include <asm/io_apic.h>
 #include <asm/apic.h>
@@ -48,7 +49,8 @@ static int map_irq_stack(unsigned int cpu)
        if (!va)
                return -ENOMEM;
 
-       per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+       /* Store actual TOS to avoid adjustment in the hotpath */
+       per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
        return 0;
 }
 #else
@@ -60,7 +62,8 @@ static int map_irq_stack(unsigned int cpu)
 {
        void *va = per_cpu_ptr(&irq_stack_backing_store, cpu);
 
-       per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE;
+       /* Store actual TOS to avoid adjustment in the hotpath */
+       per_cpu(hardirq_stack_ptr, cpu) = va + IRQ_STACK_SIZE - 8;
        return 0;
 }
 #endif
@@ -71,8 +74,3 @@ int irq_init_percpu_irqstack(unsigned int cpu)
                return 0;
        return map_irq_stack(cpu);
 }
-
-void do_softirq_own_stack(void)
-{
-       run_on_irqstack_cond(__do_softirq, NULL);
-}
index 373e5fa..51c7f52 100644 (file)
@@ -12,7 +12,7 @@
 
 #include "common.h"
 
-/* Ftrace callback handler for kprobes -- called under preepmt disabed */
+/* Ftrace callback handler for kprobes -- called under preepmt disabled */
 void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
                           struct ftrace_ops *ops, struct ftrace_regs *fregs)
 {
index 5e78e01..78bb0fa 100644 (file)
@@ -836,28 +836,25 @@ static void kvm_kick_cpu(int cpu)
 
 static void kvm_wait(u8 *ptr, u8 val)
 {
-       unsigned long flags;
-
        if (in_nmi())
                return;
 
-       local_irq_save(flags);
-
-       if (READ_ONCE(*ptr) != val)
-               goto out;
-
        /*
         * halt until it's our turn and kicked. Note that we do safe halt
         * for irq enabled case to avoid hang when lock info is overwritten
         * in irq spinlock slowpath and no spurious interrupt occur to save us.
         */
-       if (arch_irqs_disabled_flags(flags))
-               halt();
-       else
-               safe_halt();
+       if (irqs_disabled()) {
+               if (READ_ONCE(*ptr) == val)
+                       halt();
+       } else {
+               local_irq_disable();
 
-out:
-       local_irq_restore(flags);
+               if (READ_ONCE(*ptr) == val)
+                       safe_halt();
+
+               local_irq_enable();
+       }
 }
 
 #ifdef CONFIG_X86_32
index aa59374..1fc0962 100644 (file)
@@ -268,21 +268,20 @@ static void __init kvmclock_init_mem(void)
 
 static int __init kvm_setup_vsyscall_timeinfo(void)
 {
-#ifdef CONFIG_X86_64
-       u8 flags;
+       kvmclock_init_mem();
 
-       if (!per_cpu(hv_clock_per_cpu, 0) || !kvmclock_vsyscall)
-               return 0;
+#ifdef CONFIG_X86_64
+       if (per_cpu(hv_clock_per_cpu, 0) && kvmclock_vsyscall) {
+               u8 flags;
 
-       flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
-       if (!(flags & PVCLOCK_TSC_STABLE_BIT))
-               return 0;
+               flags = pvclock_read_flags(&hv_clock_boot[0].pvti);
+               if (!(flags & PVCLOCK_TSC_STABLE_BIT))
+                       return 0;
 
-       kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+               kvm_clock.vdso_clock_mode = VDSO_CLOCKMODE_PVCLOCK;
+       }
 #endif
 
-       kvmclock_init_mem();
-
        return 0;
 }
 early_initcall(kvm_setup_vsyscall_timeinfo);
index 145a7ac..9c214d7 100644 (file)
@@ -161,7 +161,7 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, unsigned long arg,
 #endif
 
        /* Kernel thread ? */
-       if (unlikely(p->flags & PF_KTHREAD)) {
+       if (unlikely(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                memset(childregs, 0, sizeof(struct pt_regs));
                kthread_frame_init(frame, sp, arg);
                return 0;
index ad582f9..d08307d 100644 (file)
@@ -539,7 +539,7 @@ __switch_to(struct task_struct *prev_p, struct task_struct *next_p)
        int cpu = smp_processor_id();
 
        WARN_ON_ONCE(IS_ENABLED(CONFIG_DEBUG_ENTRY) &&
-                    this_cpu_read(irq_count) != -1);
+                    this_cpu_read(hardirq_stack_inuse));
 
        if (!test_thread_flag(TIF_NEED_FPU_LOAD))
                switch_fpu_prepare(prev_fpu, cpu);
index 740f3bd..5ecd69a 100644 (file)
@@ -16,7 +16,6 @@
 #include <linux/memblock.h>
 #include <linux/pci.h>
 #include <linux/root_dev.h>
-#include <linux/sfi.h>
 #include <linux/hugetlb.h>
 #include <linux/tboot.h>
 #include <linux/usb/xhci-dbgp.h>
@@ -1046,6 +1045,9 @@ void __init setup_arch(char **cmdline_p)
 
        cleanup_highmap();
 
+       /* Look for ACPI tables and reserve memory occupied by them. */
+       acpi_boot_table_init();
+
        memblock_set_current_limit(ISA_END_ADDRESS);
        e820__memblock_setup();
 
@@ -1137,11 +1139,6 @@ void __init setup_arch(char **cmdline_p)
 
        early_platform_quirks();
 
-       /*
-        * Parse the ACPI tables for possible boot-time SMP configuration.
-        */
-       acpi_boot_table_init();
-
        early_acpi_boot_init();
 
        initmem_init();
@@ -1185,7 +1182,6 @@ void __init setup_arch(char **cmdline_p)
         * Read APIC and some other early information from ACPI tables.
         */
        acpi_boot_init();
-       sfi_init();
        x86_dtb_init();
 
        /*
index 84c1821..04a780a 100644 (file)
@@ -121,8 +121,18 @@ static void __init setup_vc_stacks(int cpu)
        cea_set_pte((void *)vaddr, pa, PAGE_KERNEL);
 }
 
-static __always_inline bool on_vc_stack(unsigned long sp)
+static __always_inline bool on_vc_stack(struct pt_regs *regs)
 {
+       unsigned long sp = regs->sp;
+
+       /* User-mode RSP is not trusted */
+       if (user_mode(regs))
+               return false;
+
+       /* SYSCALL gap still has user-mode RSP */
+       if (ip_within_syscall_gap(regs))
+               return false;
+
        return ((sp >= __this_cpu_ist_bottom_va(VC)) && (sp < __this_cpu_ist_top_va(VC)));
 }
 
@@ -144,7 +154,7 @@ void noinstr __sev_es_ist_enter(struct pt_regs *regs)
        old_ist = __this_cpu_read(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC]);
 
        /* Make room on the IST stack */
-       if (on_vc_stack(regs->sp))
+       if (on_vc_stack(regs))
                new_ist = ALIGN_DOWN(regs->sp, 8) - sizeof(old_ist);
        else
                new_ist = old_ist - sizeof(old_ist);
@@ -248,7 +258,7 @@ static enum es_result vc_decode_insn(struct es_em_ctxt *ctxt)
        int res;
 
        if (user_mode(ctxt->regs)) {
-               res = insn_fetch_from_user(ctxt->regs, buffer);
+               res = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
                if (!res) {
                        ctxt->fi.vector     = X86_TRAP_PF;
                        ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
@@ -1248,13 +1258,12 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
 DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 {
        struct sev_es_runtime_data *data = this_cpu_read(runtime_data);
+       irqentry_state_t irq_state;
        struct ghcb_state state;
        struct es_em_ctxt ctxt;
        enum es_result result;
        struct ghcb *ghcb;
 
-       lockdep_assert_irqs_disabled();
-
        /*
         * Handle #DB before calling into !noinstr code to avoid recursive #DB.
         */
@@ -1263,6 +1272,8 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
                return;
        }
 
+       irq_state = irqentry_nmi_enter(regs);
+       lockdep_assert_irqs_disabled();
        instrumentation_begin();
 
        /*
@@ -1325,6 +1336,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
 
 out:
        instrumentation_end();
+       irqentry_nmi_exit(regs, irq_state);
 
        return;
 
index ea794a0..f306e85 100644 (file)
@@ -766,30 +766,8 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-       /*
-        * This function is fundamentally broken as currently
-        * implemented.
-        *
-        * The idea is that we want to trigger a call to the
-        * restart_block() syscall and that we want in_ia32_syscall(),
-        * in_x32_syscall(), etc. to match whatever they were in the
-        * syscall being restarted.  We assume that the syscall
-        * instruction at (regs->ip - 2) matches whatever syscall
-        * instruction we used to enter in the first place.
-        *
-        * The problem is that we can get here when ptrace pokes
-        * syscall-like values into regs even if we're not in a syscall
-        * at all.
-        *
-        * For now, we maintain historical behavior and guess based on
-        * stored state.  We could do better by saving the actual
-        * syscall arch in restart_block or (with caveats on x32) by
-        * checking if regs->ip points to 'int $0x80'.  The current
-        * behavior is incorrect if a tracer has a different bitness
-        * than the tracee.
-        */
 #ifdef CONFIG_IA32_EMULATION
-       if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
+       if (current->restart_block.arch_data & TS_COMPAT)
                return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
index 02813a7..16703c3 100644 (file)
@@ -1659,13 +1659,17 @@ void play_dead_common(void)
        local_irq_disable();
 }
 
-static bool wakeup_cpu0(void)
+/**
+ * cond_wakeup_cpu0 - Wake up CPU0 if needed.
+ *
+ * If NMI wants to wake up CPU0, start CPU0.
+ */
+void cond_wakeup_cpu0(void)
 {
        if (smp_processor_id() == 0 && enable_start_cpu0)
-               return true;
-
-       return false;
+               start_cpu0();
 }
+EXPORT_SYMBOL_GPL(cond_wakeup_cpu0);
 
 /*
  * We need to flush the caches before going to sleep, lest we have
@@ -1734,11 +1738,8 @@ static inline void mwait_play_dead(void)
                __monitor(mwait_ptr, 0, 0);
                mb();
                __mwait(eax, 0);
-               /*
-                * If NMI wants to wake up CPU0, start CPU0.
-                */
-               if (wakeup_cpu0())
-                       start_cpu0();
+
+               cond_wakeup_cpu0();
        }
 }
 
@@ -1749,11 +1750,8 @@ void hlt_play_dead(void)
 
        while (1) {
                native_halt();
-               /*
-                * If NMI wants to wake up CPU0, start CPU0.
-                */
-               if (wakeup_cpu0())
-                       start_cpu0();
+
+               cond_wakeup_cpu0();
        }
 }
 
index 7f5aec7..ac1874a 100644 (file)
@@ -694,8 +694,7 @@ asmlinkage __visible noinstr struct pt_regs *vc_switch_off_ist(struct pt_regs *r
         * In the SYSCALL entry path the RSP value comes from user-space - don't
         * trust it and switch to the current kernel stack
         */
-       if (regs->ip >= (unsigned long)entry_SYSCALL_64 &&
-           regs->ip <  (unsigned long)entry_SYSCALL_64_safe_stack) {
+       if (ip_within_syscall_gap(regs)) {
                sp = this_cpu_read(cpu_current_top_of_stack);
                goto sync;
        }
index 73f8001..a120253 100644 (file)
@@ -13,7 +13,7 @@
 
 #define orc_warn_current(args...)                                      \
 ({                                                                     \
-       if (state->task == current)                                     \
+       if (state->task == current && !state->error)                    \
                orc_warn(args);                                         \
 })
 
@@ -367,8 +367,8 @@ static bool deref_stack_regs(struct unwind_state *state, unsigned long addr,
        if (!stack_access_ok(state, addr, sizeof(struct pt_regs)))
                return false;
 
-       *ip = regs->ip;
-       *sp = regs->sp;
+       *ip = READ_ONCE_NOCHECK(regs->ip);
+       *sp = READ_ONCE_NOCHECK(regs->sp);
        return true;
 }
 
@@ -380,8 +380,8 @@ static bool deref_stack_iret_regs(struct unwind_state *state, unsigned long addr
        if (!stack_access_ok(state, addr, IRET_FRAME_SIZE))
                return false;
 
-       *ip = regs->ip;
-       *sp = regs->sp;
+       *ip = READ_ONCE_NOCHECK(regs->ip);
+       *sp = READ_ONCE_NOCHECK(regs->sp);
        return true;
 }
 
@@ -402,12 +402,12 @@ static bool get_reg(struct unwind_state *state, unsigned int reg_off,
                return false;
 
        if (state->full_regs) {
-               *val = ((unsigned long *)state->regs)[reg];
+               *val = READ_ONCE_NOCHECK(((unsigned long *)state->regs)[reg]);
                return true;
        }
 
        if (state->prev_regs) {
-               *val = ((unsigned long *)state->prev_regs)[reg];
+               *val = READ_ONCE_NOCHECK(((unsigned long *)state->prev_regs)[reg]);
                return true;
        }
 
@@ -471,7 +471,7 @@ bool unwind_next_frame(struct unwind_state *state)
                break;
 
        case ORC_REG_SP_INDIRECT:
-               sp = state->sp + orc->sp_offset;
+               sp = state->sp;
                indirect = true;
                break;
 
@@ -521,6 +521,9 @@ bool unwind_next_frame(struct unwind_state *state)
        if (indirect) {
                if (!deref_stack_reg(state, sp, &sp))
                        goto err;
+
+               if (orc->sp_reg == ORC_REG_SP_INDIRECT)
+                       sp += orc->sp_offset;
        }
 
        /* Find IP, SP and possibly regs: */
index 7ac5926..a788d51 100644 (file)
@@ -103,6 +103,15 @@ config KVM_AMD_SEV
          Provides support for launching Encrypted VMs (SEV) and Encrypted VMs
          with Encrypted State (SEV-ES) on AMD processors.
 
+config KVM_XEN
+       bool "Support for Xen hypercall interface"
+       depends on KVM
+       help
+         Provides KVM support for the hosting Xen HVM guests and
+         passing Xen hypercalls to userspace.
+
+         If in doubt, say "N".
+
 config KVM_MMU_AUDIT
        bool "Audit KVM MMU"
        depends on KVM && TRACEPOINTS
index aeab168..eafc4d6 100644 (file)
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 
-ccflags-y += -Iarch/x86/kvm
+ccflags-y += -I $(srctree)/arch/x86/kvm
 ccflags-$(CONFIG_KVM_WERROR) += -Werror
 
 ifeq ($(CONFIG_FRAME_POINTER),y)
@@ -14,11 +14,12 @@ kvm-y                       += $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
                                $(KVM)/dirty_ring.o
 kvm-$(CONFIG_KVM_ASYNC_PF)     += $(KVM)/async_pf.o
 
-kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o xen.o \
+kvm-y                  += x86.o emulate.o i8259.o irq.o lapic.o \
                           i8254.o ioapic.o irq_comm.o cpuid.o pmu.o mtrr.o \
                           hyperv.o debugfs.o mmu/mmu.o mmu/page_track.o \
                           mmu/spte.o
 kvm-$(CONFIG_X86_64) += mmu/tdp_iter.o mmu/tdp_mmu.o
+kvm-$(CONFIG_KVM_XEN)  += xen.o
 
 kvm-intel-y            += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
                           vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
index c8f2592..6bd2f8b 100644 (file)
@@ -408,7 +408,7 @@ void kvm_set_cpu_caps(void)
 
        kvm_cpu_cap_mask(CPUID_7_0_EBX,
                F(FSGSBASE) | F(BMI1) | F(HLE) | F(AVX2) | F(SMEP) |
-               F(BMI2) | F(ERMS) | 0 /*INVPCID*/ | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
+               F(BMI2) | F(ERMS) | F(INVPCID) | F(RTM) | 0 /*MPX*/ | F(RDSEED) |
                F(ADX) | F(SMAP) | F(AVX512IFMA) | F(AVX512F) | F(AVX512PF) |
                F(AVX512ER) | F(AVX512CD) | F(CLFLUSHOPT) | F(CLWB) | F(AVX512DQ) |
                F(SHA_NI) | F(AVX512BW) | F(AVX512VL) | 0 /*INTEL_PT*/
index 7d2dae9..f98370a 100644 (file)
@@ -159,7 +159,7 @@ static struct kvm_vcpu_hv_synic *synic_get(struct kvm *kvm, u32 vpidx)
        struct kvm_vcpu_hv_synic *synic;
 
        vcpu = get_vcpu_by_vpidx(kvm, vpidx);
-       if (!vcpu)
+       if (!vcpu || !to_hv_vcpu(vcpu))
                return NULL;
        synic = to_hv_synic(vcpu);
        return (synic->active) ? synic : NULL;
@@ -520,10 +520,10 @@ static u64 get_time_ref_counter(struct kvm *kvm)
        u64 tsc;
 
        /*
-        * The guest has not set up the TSC page or the clock isn't
-        * stable, fall back to get_kvmclock_ns.
+        * Fall back to get_kvmclock_ns() when TSC page hasn't been set up,
+        * is broken, disabled or being updated.
         */
-       if (!hv->tsc_ref.tsc_sequence)
+       if (hv->hv_tsc_page_status != HV_TSC_PAGE_SET)
                return div_u64(get_kvmclock_ns(kvm), 100);
 
        vcpu = kvm_get_vcpu(kvm, 0);
@@ -1077,6 +1077,21 @@ static bool compute_tsc_page_parameters(struct pvclock_vcpu_time_info *hv_clock,
        return true;
 }
 
+/*
+ * Don't touch TSC page values if the guest has opted for TSC emulation after
+ * migration. KVM doesn't fully support reenlightenment notifications and TSC
+ * access emulation and Hyper-V is known to expect the values in TSC page to
+ * stay constant before TSC access emulation is disabled from guest side
+ * (HV_X64_MSR_TSC_EMULATION_STATUS). KVM userspace is expected to preserve TSC
+ * frequency and guest visible TSC value across migration (and prevent it when
+ * TSC scaling is unsupported).
+ */
+static inline bool tsc_page_update_unsafe(struct kvm_hv *hv)
+{
+       return (hv->hv_tsc_page_status != HV_TSC_PAGE_GUEST_CHANGED) &&
+               hv->hv_tsc_emulation_control;
+}
+
 void kvm_hv_setup_tsc_page(struct kvm *kvm,
                           struct pvclock_vcpu_time_info *hv_clock)
 {
@@ -1087,7 +1102,8 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
        BUILD_BUG_ON(sizeof(tsc_seq) != sizeof(hv->tsc_ref.tsc_sequence));
        BUILD_BUG_ON(offsetof(struct ms_hyperv_tsc_page, tsc_sequence) != 0);
 
-       if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+       if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
+           hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET)
                return;
 
        mutex_lock(&hv->hv_lock);
@@ -1101,7 +1117,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
         */
        if (unlikely(kvm_read_guest(kvm, gfn_to_gpa(gfn),
                                    &tsc_seq, sizeof(tsc_seq))))
+               goto out_err;
+
+       if (tsc_seq && tsc_page_update_unsafe(hv)) {
+               if (kvm_read_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
+                       goto out_err;
+
+               hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
                goto out_unlock;
+       }
 
        /*
         * While we're computing and writing the parameters, force the
@@ -1110,15 +1134,15 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
        hv->tsc_ref.tsc_sequence = 0;
        if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
                            &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
-               goto out_unlock;
+               goto out_err;
 
        if (!compute_tsc_page_parameters(hv_clock, &hv->tsc_ref))
-               goto out_unlock;
+               goto out_err;
 
        /* Ensure sequence is zero before writing the rest of the struct.  */
        smp_wmb();
        if (kvm_write_guest(kvm, gfn_to_gpa(gfn), &hv->tsc_ref, sizeof(hv->tsc_ref)))
-               goto out_unlock;
+               goto out_err;
 
        /*
         * Now switch to the TSC page mechanism by writing the sequence.
@@ -1131,8 +1155,45 @@ void kvm_hv_setup_tsc_page(struct kvm *kvm,
        smp_wmb();
 
        hv->tsc_ref.tsc_sequence = tsc_seq;
-       kvm_write_guest(kvm, gfn_to_gpa(gfn),
-                       &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence));
+       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+                           &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
+               goto out_err;
+
+       hv->hv_tsc_page_status = HV_TSC_PAGE_SET;
+       goto out_unlock;
+
+out_err:
+       hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+out_unlock:
+       mutex_unlock(&hv->hv_lock);
+}
+
+void kvm_hv_invalidate_tsc_page(struct kvm *kvm)
+{
+       struct kvm_hv *hv = to_kvm_hv(kvm);
+       u64 gfn;
+
+       if (hv->hv_tsc_page_status == HV_TSC_PAGE_BROKEN ||
+           hv->hv_tsc_page_status == HV_TSC_PAGE_UNSET ||
+           tsc_page_update_unsafe(hv))
+               return;
+
+       mutex_lock(&hv->hv_lock);
+
+       if (!(hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE))
+               goto out_unlock;
+
+       /* Preserve HV_TSC_PAGE_GUEST_CHANGED/HV_TSC_PAGE_HOST_CHANGED states */
+       if (hv->hv_tsc_page_status == HV_TSC_PAGE_SET)
+               hv->hv_tsc_page_status = HV_TSC_PAGE_UPDATING;
+
+       gfn = hv->hv_tsc_page >> HV_X64_MSR_TSC_REFERENCE_ADDRESS_SHIFT;
+
+       hv->tsc_ref.tsc_sequence = 0;
+       if (kvm_write_guest(kvm, gfn_to_gpa(gfn),
+                           &hv->tsc_ref, sizeof(hv->tsc_ref.tsc_sequence)))
+               hv->hv_tsc_page_status = HV_TSC_PAGE_BROKEN;
+
 out_unlock:
        mutex_unlock(&hv->hv_lock);
 }
@@ -1193,8 +1254,15 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
        }
        case HV_X64_MSR_REFERENCE_TSC:
                hv->hv_tsc_page = data;
-               if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE)
+               if (hv->hv_tsc_page & HV_X64_MSR_TSC_REFERENCE_ENABLE) {
+                       if (!host)
+                               hv->hv_tsc_page_status = HV_TSC_PAGE_GUEST_CHANGED;
+                       else
+                               hv->hv_tsc_page_status = HV_TSC_PAGE_HOST_CHANGED;
                        kvm_make_request(KVM_REQ_MASTERCLOCK_UPDATE, vcpu);
+               } else {
+                       hv->hv_tsc_page_status = HV_TSC_PAGE_UNSET;
+               }
                break;
        case HV_X64_MSR_CRASH_P0 ... HV_X64_MSR_CRASH_P4:
                return kvm_hv_msr_set_crash_data(kvm,
@@ -1229,6 +1297,9 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data,
                hv->hv_tsc_emulation_control = data;
                break;
        case HV_X64_MSR_TSC_EMULATION_STATUS:
+               if (data && !host)
+                       return 1;
+
                hv->hv_tsc_emulation_status = data;
                break;
        case HV_X64_MSR_TIME_REF_COUNT:
index e951af1..60547d5 100644 (file)
@@ -133,6 +133,7 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu);
 
 void kvm_hv_setup_tsc_page(struct kvm *kvm,
                           struct pvclock_vcpu_time_info *hv_clock);
+void kvm_hv_invalidate_tsc_page(struct kvm *kvm);
 
 void kvm_hv_init_vm(struct kvm *kvm);
 void kvm_hv_destroy_vm(struct kvm *kvm);
index 45d40bf..cc369b9 100644 (file)
@@ -1642,7 +1642,16 @@ static void apic_timer_expired(struct kvm_lapic *apic, bool from_timer_fn)
        }
 
        if (kvm_use_posted_timer_interrupt(apic->vcpu)) {
-               kvm_wait_lapic_expire(vcpu);
+               /*
+                * Ensure the guest's timer has truly expired before posting an
+                * interrupt.  Open code the relevant checks to avoid querying
+                * lapic_timer_int_injected(), which will be false since the
+                * interrupt isn't yet injected.  Waiting until after injecting
+                * is not an option since that won't help a posted interrupt.
+                */
+               if (vcpu->arch.apic->lapic_timer.expired_tscdeadline &&
+                   vcpu->arch.apic->lapic_timer.timer_advance_ns)
+                       __kvm_wait_lapic_expire(vcpu);
                kvm_apic_inject_pending_timer_irqs(apic);
                return;
        }
@@ -2595,6 +2604,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s)
 
        apic_update_ppr(apic);
        hrtimer_cancel(&apic->lapic_timer.timer);
+       apic->lapic_timer.expired_tscdeadline = 0;
        apic_update_lvtt(apic);
        apic_manage_nmi_watchdog(apic, kvm_lapic_get_reg(apic, APIC_LVT0));
        update_divide_count(apic);
index e507568..951dae4 100644 (file)
@@ -1165,7 +1165,8 @@ static bool spte_wrprot_for_clear_dirty(u64 *sptep)
  *     - W bit on ad-disabled SPTEs.
  * Returns true iff any D or W bits were cleared.
  */
-static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+                              struct kvm_memory_slot *slot)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1180,35 +1181,6 @@ static bool __rmap_clear_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
        return flush;
 }
 
-static bool spte_set_dirty(u64 *sptep)
-{
-       u64 spte = *sptep;
-
-       rmap_printk("spte %p %llx\n", sptep, *sptep);
-
-       /*
-        * Similar to the !kvm_x86_ops.slot_disable_log_dirty case,
-        * do not bother adding back write access to pages marked
-        * SPTE_AD_WRPROT_ONLY_MASK.
-        */
-       spte |= shadow_dirty_mask;
-
-       return mmu_spte_update(sptep, spte);
-}
-
-static bool __rmap_set_dirty(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
-{
-       u64 *sptep;
-       struct rmap_iterator iter;
-       bool flush = false;
-
-       for_each_rmap_spte(rmap_head, &iter, sptep)
-               if (spte_ad_enabled(*sptep))
-                       flush |= spte_set_dirty(sptep);
-
-       return flush;
-}
-
 /**
  * kvm_mmu_write_protect_pt_masked - write protect selected PT level pages
  * @kvm: kvm instance
@@ -1248,9 +1220,9 @@ static void kvm_mmu_write_protect_pt_masked(struct kvm *kvm,
  *
  * Used for PML to re-log the dirty GPAs after userspace querying dirty_bitmap.
  */
-void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
-                                    struct kvm_memory_slot *slot,
-                                    gfn_t gfn_offset, unsigned long mask)
+static void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
+                                        struct kvm_memory_slot *slot,
+                                        gfn_t gfn_offset, unsigned long mask)
 {
        struct kvm_rmap_head *rmap_head;
 
@@ -1260,13 +1232,12 @@ void kvm_mmu_clear_dirty_pt_masked(struct kvm *kvm,
        while (mask) {
                rmap_head = __gfn_to_rmap(slot->base_gfn + gfn_offset + __ffs(mask),
                                          PG_LEVEL_4K, slot);
-               __rmap_clear_dirty(kvm, rmap_head);
+               __rmap_clear_dirty(kvm, rmap_head, slot);
 
                /* clear the first set bit */
                mask &= mask - 1;
        }
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_clear_dirty_pt_masked);
 
 /**
  * kvm_arch_mmu_enable_log_dirty_pt_masked - enable dirty logging for selected
@@ -1282,20 +1253,15 @@ void kvm_arch_mmu_enable_log_dirty_pt_masked(struct kvm *kvm,
                                struct kvm_memory_slot *slot,
                                gfn_t gfn_offset, unsigned long mask)
 {
-       if (kvm_x86_ops.enable_log_dirty_pt_masked)
-               static_call(kvm_x86_enable_log_dirty_pt_masked)(kvm, slot,
-                                                               gfn_offset,
-                                                               mask);
+       if (kvm_x86_ops.cpu_dirty_log_size)
+               kvm_mmu_clear_dirty_pt_masked(kvm, slot, gfn_offset, mask);
        else
                kvm_mmu_write_protect_pt_masked(kvm, slot, gfn_offset, mask);
 }
 
 int kvm_cpu_dirty_log_size(void)
 {
-       if (kvm_x86_ops.cpu_dirty_log_size)
-               return static_call(kvm_x86_cpu_dirty_log_size)();
-
-       return 0;
+       return kvm_x86_ops.cpu_dirty_log_size;
 }
 
 bool kvm_mmu_slot_gfn_write_protect(struct kvm *kvm,
@@ -1325,7 +1291,8 @@ static bool rmap_write_protect(struct kvm_vcpu *vcpu, u64 gfn)
        return kvm_mmu_slot_gfn_write_protect(vcpu->kvm, slot, gfn);
 }
 
-static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head)
+static bool kvm_zap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+                         struct kvm_memory_slot *slot)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -1345,7 +1312,7 @@ static int kvm_unmap_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
                           struct kvm_memory_slot *slot, gfn_t gfn, int level,
                           unsigned long data)
 {
-       return kvm_zap_rmapp(kvm, rmap_head);
+       return kvm_zap_rmapp(kvm, rmap_head, slot);
 }
 
 static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head,
@@ -2499,7 +2466,21 @@ int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn)
 
        return r;
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page);
+
+static int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
+{
+       gpa_t gpa;
+       int r;
+
+       if (vcpu->arch.mmu->direct_map)
+               return 0;
+
+       gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
+
+       r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
+
+       return r;
+}
 
 static void kvm_unsync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp)
 {
@@ -2753,11 +2734,18 @@ static void direct_pte_prefetch(struct kvm_vcpu *vcpu, u64 *sptep)
        if (sp->role.level > PG_LEVEL_4K)
                return;
 
+       /*
+        * If addresses are being invalidated, skip prefetching to avoid
+        * accidentally prefetching those addresses.
+        */
+       if (unlikely(vcpu->kvm->mmu_notifier_count))
+               return;
+
        __direct_pte_prefetch(vcpu, sp, sptep);
 }
 
-static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
-                                 kvm_pfn_t pfn, struct kvm_memory_slot *slot)
+static int host_pfn_mapping_level(struct kvm *kvm, gfn_t gfn, kvm_pfn_t pfn,
+                                 struct kvm_memory_slot *slot)
 {
        unsigned long hva;
        pte_t *pte;
@@ -2776,19 +2764,36 @@ static int host_pfn_mapping_level(struct kvm_vcpu *vcpu, gfn_t gfn,
         */
        hva = __gfn_to_hva_memslot(slot, gfn);
 
-       pte = lookup_address_in_mm(vcpu->kvm->mm, hva, &level);
+       pte = lookup_address_in_mm(kvm->mm, hva, &level);
        if (unlikely(!pte))
                return PG_LEVEL_4K;
 
        return level;
 }
 
+int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_memory_slot *slot,
+                             gfn_t gfn, kvm_pfn_t pfn, int max_level)
+{
+       struct kvm_lpage_info *linfo;
+
+       max_level = min(max_level, max_huge_page_level);
+       for ( ; max_level > PG_LEVEL_4K; max_level--) {
+               linfo = lpage_info_slot(gfn, slot, max_level);
+               if (!linfo->disallow_lpage)
+                       break;
+       }
+
+       if (max_level == PG_LEVEL_4K)
+               return PG_LEVEL_4K;
+
+       return host_pfn_mapping_level(kvm, gfn, pfn, slot);
+}
+
 int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
                            int max_level, kvm_pfn_t *pfnp,
                            bool huge_page_disallowed, int *req_level)
 {
        struct kvm_memory_slot *slot;
-       struct kvm_lpage_info *linfo;
        kvm_pfn_t pfn = *pfnp;
        kvm_pfn_t mask;
        int level;
@@ -2805,17 +2810,7 @@ int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
        if (!slot)
                return PG_LEVEL_4K;
 
-       max_level = min(max_level, max_huge_page_level);
-       for ( ; max_level > PG_LEVEL_4K; max_level--) {
-               linfo = lpage_info_slot(gfn, slot, max_level);
-               if (!linfo->disallow_lpage)
-                       break;
-       }
-
-       if (max_level == PG_LEVEL_4K)
-               return PG_LEVEL_4K;
-
-       level = host_pfn_mapping_level(vcpu, gfn, pfn, slot);
+       level = kvm_mmu_max_mapping_level(vcpu->kvm, slot, gfn, pfn, max_level);
        if (level == PG_LEVEL_4K)
                return level;
 
@@ -3437,7 +3432,6 @@ void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu)
        kvm_mmu_audit(vcpu, AUDIT_POST_SYNC);
        write_unlock(&vcpu->kvm->mmu_lock);
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_sync_roots);
 
 static gpa_t nonpaging_gva_to_gpa(struct kvm_vcpu *vcpu, gpa_t vaddr,
                                  u32 access, struct x86_exception *exception)
@@ -3653,8 +3647,8 @@ static bool kvm_arch_setup_async_pf(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa,
 }
 
 static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
-                        gpa_t cr2_or_gpa, kvm_pfn_t *pfn, bool write,
-                        bool *writable)
+                        gpa_t cr2_or_gpa, kvm_pfn_t *pfn, hva_t *hva,
+                        bool write, bool *writable)
 {
        struct kvm_memory_slot *slot = kvm_vcpu_gfn_to_memslot(vcpu, gfn);
        bool async;
@@ -3667,7 +3661,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
        }
 
        async = false;
-       *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async, write, writable);
+       *pfn = __gfn_to_pfn_memslot(slot, gfn, false, &async,
+                                   write, writable, hva);
        if (!async)
                return false; /* *pfn has correct page already */
 
@@ -3681,7 +3676,8 @@ static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn,
                        return true;
        }
 
-       *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL, write, writable);
+       *pfn = __gfn_to_pfn_memslot(slot, gfn, false, NULL,
+                                   write, writable, hva);
        return false;
 }
 
@@ -3694,6 +3690,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
        gfn_t gfn = gpa >> PAGE_SHIFT;
        unsigned long mmu_seq;
        kvm_pfn_t pfn;
+       hva_t hva;
        int r;
 
        if (page_fault_handle_page_track(vcpu, error_code, gfn))
@@ -3712,7 +3709,8 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, write, &map_writable))
+       if (try_async_pf(vcpu, prefault, gfn, gpa, &pfn, &hva,
+                        write, &map_writable))
                return RET_PF_RETRY;
 
        if (handle_abnormal_pfn(vcpu, is_tdp ? 0 : gpa, gfn, pfn, ACC_ALL, &r))
@@ -3725,7 +3723,7 @@ static int direct_page_fault(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
        else
                write_lock(&vcpu->kvm->mmu_lock);
 
-       if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+       if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
                goto out_unlock;
        r = make_mmu_pages_available(vcpu);
        if (r)
@@ -5003,22 +5001,6 @@ static void kvm_mmu_pte_write(struct kvm_vcpu *vcpu, gpa_t gpa,
        write_unlock(&vcpu->kvm->mmu_lock);
 }
 
-int kvm_mmu_unprotect_page_virt(struct kvm_vcpu *vcpu, gva_t gva)
-{
-       gpa_t gpa;
-       int r;
-
-       if (vcpu->arch.mmu->direct_map)
-               return 0;
-
-       gpa = kvm_mmu_gva_to_gpa_read(vcpu, gva, NULL);
-
-       r = kvm_mmu_unprotect_page(vcpu->kvm, gpa >> PAGE_SHIFT);
-
-       return r;
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_unprotect_page_virt);
-
 int kvm_mmu_page_fault(struct kvm_vcpu *vcpu, gpa_t cr2_or_gpa, u64 error_code,
                       void *insn, int insn_len)
 {
@@ -5117,7 +5099,6 @@ void kvm_mmu_invalidate_gva(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                mmu->invlpg(vcpu, gva, root_hpa);
        }
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_invalidate_gva);
 
 void kvm_mmu_invlpg(struct kvm_vcpu *vcpu, gva_t gva)
 {
@@ -5157,7 +5138,6 @@ void kvm_mmu_invpcid_gva(struct kvm_vcpu *vcpu, gva_t gva, unsigned long pcid)
         * for them.
         */
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_invpcid_gva);
 
 void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
                       int tdp_huge_page_level)
@@ -5182,7 +5162,8 @@ void kvm_configure_mmu(bool enable_tdp, int tdp_max_root_level,
 EXPORT_SYMBOL_GPL(kvm_configure_mmu);
 
 /* The return value indicates if tlb flush on all vcpus is needed. */
-typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head);
+typedef bool (*slot_level_handler) (struct kvm *kvm, struct kvm_rmap_head *rmap_head,
+                                   struct kvm_memory_slot *slot);
 
 /* The caller should hold mmu-lock before calling this function. */
 static __always_inline bool
@@ -5196,7 +5177,7 @@ slot_handle_level_range(struct kvm *kvm, struct kvm_memory_slot *memslot,
        for_each_slot_rmap_range(memslot, start_level, end_level, start_gfn,
                        end_gfn, &iterator) {
                if (iterator.rmap)
-                       flush |= fn(kvm, iterator.rmap);
+                       flush |= fn(kvm, iterator.rmap, memslot);
 
                if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
                        if (flush && lock_flush_tlb) {
@@ -5230,22 +5211,6 @@ slot_handle_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
 }
 
 static __always_inline bool
-slot_handle_all_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                     slot_level_handler fn, bool lock_flush_tlb)
-{
-       return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K,
-                                KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static __always_inline bool
-slot_handle_large_level(struct kvm *kvm, struct kvm_memory_slot *memslot,
-                       slot_level_handler fn, bool lock_flush_tlb)
-{
-       return slot_handle_level(kvm, memslot, fn, PG_LEVEL_4K + 1,
-                                KVM_MAX_HUGEPAGE_LEVEL, lock_flush_tlb);
-}
-
-static __always_inline bool
 slot_handle_leaf(struct kvm *kvm, struct kvm_memory_slot *memslot,
                 slot_level_handler fn, bool lock_flush_tlb)
 {
@@ -5485,7 +5450,8 @@ void kvm_zap_gfn_range(struct kvm *kvm, gfn_t gfn_start, gfn_t gfn_end)
 }
 
 static bool slot_rmap_write_protect(struct kvm *kvm,
-                                   struct kvm_rmap_head *rmap_head)
+                                   struct kvm_rmap_head *rmap_head,
+                                   struct kvm_memory_slot *slot)
 {
        return __rmap_write_protect(kvm, rmap_head, false);
 }
@@ -5519,7 +5485,8 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
 }
 
 static bool kvm_mmu_zap_collapsible_spte(struct kvm *kvm,
-                                        struct kvm_rmap_head *rmap_head)
+                                        struct kvm_rmap_head *rmap_head,
+                                        struct kvm_memory_slot *slot)
 {
        u64 *sptep;
        struct rmap_iterator iter;
@@ -5540,8 +5507,8 @@ restart:
                 * mapping if the indirect sp has level = 1.
                 */
                if (sp->role.direct && !kvm_is_reserved_pfn(pfn) &&
-                   (kvm_is_zone_device_pfn(pfn) ||
-                    PageCompound(pfn_to_page(pfn)))) {
+                   sp->role.level < kvm_mmu_max_mapping_level(kvm, slot, sp->gfn,
+                                                              pfn, PG_LEVEL_NUM)) {
                        pte_list_remove(rmap_head, sptep);
 
                        if (kvm_available_flush_tlb_with_range())
@@ -5561,12 +5528,13 @@ void kvm_mmu_zap_collapsible_sptes(struct kvm *kvm,
                                   const struct kvm_memory_slot *memslot)
 {
        /* FIXME: const-ify all uses of struct kvm_memory_slot.  */
+       struct kvm_memory_slot *slot = (struct kvm_memory_slot *)memslot;
+
        write_lock(&kvm->mmu_lock);
-       slot_handle_leaf(kvm, (struct kvm_memory_slot *)memslot,
-                        kvm_mmu_zap_collapsible_spte, true);
+       slot_handle_leaf(kvm, slot, kvm_mmu_zap_collapsible_spte, true);
 
        if (is_tdp_mmu_enabled(kvm))
-               kvm_tdp_mmu_zap_collapsible_sptes(kvm, memslot);
+               kvm_tdp_mmu_zap_collapsible_sptes(kvm, slot);
        write_unlock(&kvm->mmu_lock);
 }
 
@@ -5605,40 +5573,6 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
        if (flush)
                kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
 }
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_leaf_clear_dirty);
-
-void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
-                                       struct kvm_memory_slot *memslot)
-{
-       bool flush;
-
-       write_lock(&kvm->mmu_lock);
-       flush = slot_handle_large_level(kvm, memslot, slot_rmap_write_protect,
-                                       false);
-       if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_2M);
-       write_unlock(&kvm->mmu_lock);
-
-       if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_largepage_remove_write_access);
-
-void kvm_mmu_slot_set_dirty(struct kvm *kvm,
-                           struct kvm_memory_slot *memslot)
-{
-       bool flush;
-
-       write_lock(&kvm->mmu_lock);
-       flush = slot_handle_all_level(kvm, memslot, __rmap_set_dirty, false);
-       if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_slot_set_dirty(kvm, memslot);
-       write_unlock(&kvm->mmu_lock);
-
-       if (flush)
-               kvm_arch_flush_remote_tlbs_memslot(kvm, memslot);
-}
-EXPORT_SYMBOL_GPL(kvm_mmu_slot_set_dirty);
 
 void kvm_mmu_zap_all(struct kvm *kvm)
 {
@@ -5950,6 +5884,7 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
        struct kvm_mmu_page *sp;
        unsigned int ratio;
        LIST_HEAD(invalid_list);
+       bool flush = false;
        ulong to_zap;
 
        rcu_idx = srcu_read_lock(&kvm->srcu);
@@ -5971,19 +5906,19 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
                                      lpage_disallowed_link);
                WARN_ON_ONCE(!sp->lpage_disallowed);
                if (is_tdp_mmu_page(sp)) {
-                       kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn,
-                               sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level));
+                       flush |= kvm_tdp_mmu_zap_sp(kvm, sp);
                } else {
                        kvm_mmu_prepare_zap_page(kvm, sp, &invalid_list);
                        WARN_ON_ONCE(sp->lpage_disallowed);
                }
 
                if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
-                       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+                       kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
                        cond_resched_rwlock_write(&kvm->mmu_lock);
+                       flush = false;
                }
        }
-       kvm_mmu_commit_zap_page(kvm, &invalid_list);
+       kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
 
        write_unlock(&kvm->mmu_lock);
        srcu_read_unlock(&kvm->srcu, rcu_idx);
index 9e38d3c..1f6f98c 100644 (file)
@@ -78,15 +78,23 @@ static inline struct kvm_mmu_page *sptep_to_sp(u64 *sptep)
        return to_shadow_page(__pa(sptep));
 }
 
+static inline int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
+{
+       return sp->role.smm ? 1 : 0;
+}
+
 static inline bool kvm_vcpu_ad_need_write_protect(struct kvm_vcpu *vcpu)
 {
        /*
-        * When using the EPT page-modification log, the GPAs in the log
-        * would come from L2 rather than L1.  Therefore, we need to rely
-        * on write protection to record dirty pages.  This also bypasses
-        * PML, since writes now result in a vmexit.
+        * When using the EPT page-modification log, the GPAs in the CPU dirty
+        * log would come from L2 rather than L1.  Therefore, we need to rely
+        * on write protection to record dirty pages, which bypasses PML, since
+        * writes now result in a vmexit.  Note, the check on CPU dirty logging
+        * being enabled is mandatory as the bits used to denote WP-only SPTEs
+        * are reserved for NPT w/ PAE (32-bit KVM).
         */
-       return vcpu->arch.mmu == &vcpu->arch.guest_mmu;
+       return vcpu->arch.mmu == &vcpu->arch.guest_mmu &&
+              kvm_x86_ops.cpu_dirty_log_size;
 }
 
 bool is_nx_huge_page_enabled(void);
@@ -138,6 +146,8 @@ enum {
 #define SET_SPTE_NEED_REMOTE_TLB_FLUSH BIT(1)
 #define SET_SPTE_SPURIOUS              BIT(2)
 
+int kvm_mmu_max_mapping_level(struct kvm *kvm, struct kvm_memory_slot *slot,
+                             gfn_t gfn, kvm_pfn_t pfn, int max_level);
 int kvm_mmu_hugepage_adjust(struct kvm_vcpu *vcpu, gfn_t gfn,
                            int max_level, kvm_pfn_t *pfnp,
                            bool huge_page_disallowed, int *req_level);
index d9f66cc..55d7b47 100644 (file)
@@ -601,6 +601,13 @@ static void FNAME(pte_prefetch)(struct kvm_vcpu *vcpu, struct guest_walker *gw,
        if (sp->role.level > PG_LEVEL_4K)
                return;
 
+       /*
+        * If addresses are being invalidated, skip prefetching to avoid
+        * accidentally prefetching those addresses.
+        */
+       if (unlikely(vcpu->kvm->mmu_notifier_count))
+               return;
+
        if (sp->role.direct)
                return __direct_pte_prefetch(vcpu, sp, sptep);
 
@@ -790,6 +797,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
        struct guest_walker walker;
        int r;
        kvm_pfn_t pfn;
+       hva_t hva;
        unsigned long mmu_seq;
        bool map_writable, is_self_change_mapping;
        int max_level;
@@ -840,8 +848,8 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
        mmu_seq = vcpu->kvm->mmu_notifier_seq;
        smp_rmb();
 
-       if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, write_fault,
-                        &map_writable))
+       if (try_async_pf(vcpu, prefault, walker.gfn, addr, &pfn, &hva,
+                        write_fault, &map_writable))
                return RET_PF_RETRY;
 
        if (handle_abnormal_pfn(vcpu, addr, walker.gfn, pfn, walker.pte_access, &r))
@@ -869,7 +877,7 @@ static int FNAME(page_fault)(struct kvm_vcpu *vcpu, gpa_t addr, u32 error_code,
 
        r = RET_PF_RETRY;
        write_lock(&vcpu->kvm->mmu_lock);
-       if (mmu_notifier_retry(vcpu->kvm, mmu_seq))
+       if (!is_noslot_pfn(pfn) && mmu_notifier_retry_hva(vcpu->kvm, mmu_seq, hva))
                goto out_unlock;
 
        kvm_mmu_audit(vcpu, AUDIT_PRE_PAGE_FAULT);
index e5f1481..b3ed302 100644 (file)
@@ -21,6 +21,21 @@ static gfn_t round_gfn_for_level(gfn_t gfn, int level)
 }
 
 /*
+ * Return the TDP iterator to the root PT and allow it to continue its
+ * traversal over the paging structure from there.
+ */
+void tdp_iter_restart(struct tdp_iter *iter)
+{
+       iter->yielded_gfn = iter->next_last_level_gfn;
+       iter->level = iter->root_level;
+
+       iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
+       tdp_iter_refresh_sptep(iter);
+
+       iter->valid = true;
+}
+
+/*
  * Sets a TDP iterator to walk a pre-order traversal of the paging structure
  * rooted at root_pt, starting with the walk to translate next_last_level_gfn.
  */
@@ -31,16 +46,12 @@ void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
        WARN_ON(root_level > PT64_ROOT_MAX_LEVEL);
 
        iter->next_last_level_gfn = next_last_level_gfn;
-       iter->yielded_gfn = iter->next_last_level_gfn;
        iter->root_level = root_level;
        iter->min_level = min_level;
-       iter->level = root_level;
-       iter->pt_path[iter->level - 1] = (tdp_ptep_t)root_pt;
-
-       iter->gfn = round_gfn_for_level(iter->next_last_level_gfn, iter->level);
-       tdp_iter_refresh_sptep(iter);
+       iter->pt_path[iter->root_level - 1] = (tdp_ptep_t)root_pt;
+       iter->as_id = kvm_mmu_page_as_id(sptep_to_sp(root_pt));
 
-       iter->valid = true;
+       tdp_iter_restart(iter);
 }
 
 /*
@@ -159,8 +170,3 @@ void tdp_iter_next(struct tdp_iter *iter)
        iter->valid = false;
 }
 
-tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter)
-{
-       return iter->pt_path[iter->root_level - 1];
-}
-
index 4cc177d..b1748b9 100644 (file)
@@ -36,6 +36,8 @@ struct tdp_iter {
        int min_level;
        /* The iterator's current level within the paging structure */
        int level;
+       /* The address space ID, i.e. SMM vs. regular. */
+       int as_id;
        /* A snapshot of the value at sptep */
        u64 old_spte;
        /*
@@ -62,6 +64,6 @@ tdp_ptep_t spte_to_child_pt(u64 pte, int level);
 void tdp_iter_start(struct tdp_iter *iter, u64 *root_pt, int root_level,
                    int min_level, gfn_t next_last_level_gfn);
 void tdp_iter_next(struct tdp_iter *iter);
-tdp_ptep_t tdp_iter_root_pt(struct tdp_iter *iter);
+void tdp_iter_restart(struct tdp_iter *iter);
 
 #endif /* __KVM_X86_MMU_TDP_ITER_H */
index 71e100a..018d82e 100644 (file)
@@ -86,7 +86,7 @@ static inline struct kvm_mmu_page *tdp_mmu_next_root(struct kvm *kvm,
        list_for_each_entry(_root, &_kvm->arch.tdp_mmu_roots, link)
 
 static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
-                         gfn_t start, gfn_t end, bool can_yield);
+                         gfn_t start, gfn_t end, bool can_yield, bool flush);
 
 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
 {
@@ -99,7 +99,7 @@ void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root)
 
        list_del(&root->link);
 
-       zap_gfn_range(kvm, root, 0, max_gfn, false);
+       zap_gfn_range(kvm, root, 0, max_gfn, false, false);
 
        free_page((unsigned long)root->spt);
        kmem_cache_free(mmu_page_header_cache, root);
@@ -203,11 +203,6 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
                                u64 old_spte, u64 new_spte, int level,
                                bool shared);
 
-static int kvm_mmu_page_as_id(struct kvm_mmu_page *sp)
-{
-       return sp->role.smm ? 1 : 0;
-}
-
 static void handle_changed_spte_acc_track(u64 old_spte, u64 new_spte, int level)
 {
        bool pfn_changed = spte_to_pfn(old_spte) != spte_to_pfn(new_spte);
@@ -301,11 +296,16 @@ static void tdp_mmu_unlink_page(struct kvm *kvm, struct kvm_mmu_page *sp,
  *
  * Given a page table that has been removed from the TDP paging structure,
  * iterates through the page table to clear SPTEs and free child page tables.
+ *
+ * Note that pt is passed in as a tdp_ptep_t, but it does not need RCU
+ * protection. Since this thread removed it from the paging structure,
+ * this thread will be responsible for ensuring the page is freed. Hence the
+ * early rcu_dereferences in the function.
  */
-static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
+static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
                                        bool shared)
 {
-       struct kvm_mmu_page *sp = sptep_to_sp(pt);
+       struct kvm_mmu_page *sp = sptep_to_sp(rcu_dereference(pt));
        int level = sp->role.level;
        gfn_t base_gfn = sp->gfn;
        u64 old_child_spte;
@@ -318,7 +318,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
        tdp_mmu_unlink_page(kvm, sp, shared);
 
        for (i = 0; i < PT64_ENT_PER_PAGE; i++) {
-               sptep = pt + i;
+               sptep = rcu_dereference(pt) + i;
                gfn = base_gfn + (i * KVM_PAGES_PER_HPAGE(level - 1));
 
                if (shared) {
@@ -337,7 +337,18 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, u64 *pt,
                                cpu_relax();
                        }
                } else {
+                       /*
+                        * If the SPTE is not MMU-present, there is no backing
+                        * page associated with the SPTE and so no side effects
+                        * that need to be recorded, and exclusive ownership of
+                        * mmu_lock ensures the SPTE can't be made present.
+                        * Note, zapping MMIO SPTEs is also unnecessary as they
+                        * are guarded by the memslots generation, not by being
+                        * unreachable.
+                        */
                        old_child_spte = READ_ONCE(*sptep);
+                       if (!is_shadow_present_pte(old_child_spte))
+                               continue;
 
                        /*
                         * Marking the SPTE as a removed SPTE is not
@@ -481,10 +492,6 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
                                           struct tdp_iter *iter,
                                           u64 new_spte)
 {
-       u64 *root_pt = tdp_iter_root_pt(iter);
-       struct kvm_mmu_page *root = sptep_to_sp(root_pt);
-       int as_id = kvm_mmu_page_as_id(root);
-
        lockdep_assert_held_read(&kvm->mmu_lock);
 
        /*
@@ -498,8 +505,8 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
                      new_spte) != iter->old_spte)
                return false;
 
-       handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
-                           iter->level, true);
+       handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+                           new_spte, iter->level, true);
 
        return true;
 }
@@ -527,7 +534,7 @@ static inline bool tdp_mmu_zap_spte_atomic(struct kvm *kvm,
         * here since the SPTE is going from non-present
         * to non-present.
         */
-       WRITE_ONCE(*iter->sptep, 0);
+       WRITE_ONCE(*rcu_dereference(iter->sptep), 0);
 
        return true;
 }
@@ -553,10 +560,6 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
                                      u64 new_spte, bool record_acc_track,
                                      bool record_dirty_log)
 {
-       tdp_ptep_t root_pt = tdp_iter_root_pt(iter);
-       struct kvm_mmu_page *root = sptep_to_sp(root_pt);
-       int as_id = kvm_mmu_page_as_id(root);
-
        lockdep_assert_held_write(&kvm->mmu_lock);
 
        /*
@@ -570,13 +573,13 @@ static inline void __tdp_mmu_set_spte(struct kvm *kvm, struct tdp_iter *iter,
 
        WRITE_ONCE(*rcu_dereference(iter->sptep), new_spte);
 
-       __handle_changed_spte(kvm, as_id, iter->gfn, iter->old_spte, new_spte,
-                             iter->level, false);
+       __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+                             new_spte, iter->level, false);
        if (record_acc_track)
                handle_changed_spte_acc_track(iter->old_spte, new_spte,
                                              iter->level);
        if (record_dirty_log)
-               handle_changed_spte_dirty_log(kvm, as_id, iter->gfn,
+               handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
                                              iter->old_spte, new_spte,
                                              iter->level);
 }
@@ -648,9 +651,7 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
 
                WARN_ON(iter->gfn > iter->next_last_level_gfn);
 
-               tdp_iter_start(iter, iter->pt_path[iter->root_level - 1],
-                              iter->root_level, iter->min_level,
-                              iter->next_last_level_gfn);
+               tdp_iter_restart(iter);
 
                return true;
        }
@@ -667,20 +668,21 @@ static inline bool tdp_mmu_iter_cond_resched(struct kvm *kvm,
  * scheduler needs the CPU or there is contention on the MMU lock. If this
  * function cannot yield, it will not release the MMU lock or reschedule and
  * the caller must ensure it does not supply too large a GFN range, or the
- * operation can cause a soft lockup.
+ * operation can cause a soft lockup.  Note, in some use cases a flush may be
+ * required by prior actions.  Ensure the pending flush is performed prior to
+ * yielding.
  */
 static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
-                         gfn_t start, gfn_t end, bool can_yield)
+                         gfn_t start, gfn_t end, bool can_yield, bool flush)
 {
        struct tdp_iter iter;
-       bool flush_needed = false;
 
        rcu_read_lock();
 
        tdp_root_for_each_pte(iter, root, start, end) {
                if (can_yield &&
-                   tdp_mmu_iter_cond_resched(kvm, &iter, flush_needed)) {
-                       flush_needed = false;
+                   tdp_mmu_iter_cond_resched(kvm, &iter, flush)) {
+                       flush = false;
                        continue;
                }
 
@@ -698,11 +700,11 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
                        continue;
 
                tdp_mmu_set_spte(kvm, &iter, 0);
-               flush_needed = true;
+               flush = true;
        }
 
        rcu_read_unlock();
-       return flush_needed;
+       return flush;
 }
 
 /*
@@ -711,13 +713,14 @@ static bool zap_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
  * SPTEs have been cleared and a TLB flush is needed before releasing the
  * MMU lock.
  */
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end)
+bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+                                bool can_yield)
 {
        struct kvm_mmu_page *root;
        bool flush = false;
 
        for_each_tdp_mmu_root_yield_safe(kvm, root)
-               flush |= zap_gfn_range(kvm, root, start, end, true);
+               flush = zap_gfn_range(kvm, root, start, end, can_yield, flush);
 
        return flush;
 }
@@ -929,7 +932,7 @@ static int zap_gfn_range_hva_wrapper(struct kvm *kvm,
                                     struct kvm_mmu_page *root, gfn_t start,
                                     gfn_t end, unsigned long unused)
 {
-       return zap_gfn_range(kvm, root, start, end, false);
+       return zap_gfn_range(kvm, root, start, end, false, false);
 }
 
 int kvm_tdp_mmu_zap_hva_range(struct kvm *kvm, unsigned long start,
@@ -1269,67 +1272,15 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
 }
 
 /*
- * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
- * only used for PML, and so will involve setting the dirty bit on each SPTE.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
- */
-static bool set_dirty_gfn_range(struct kvm *kvm, struct kvm_mmu_page *root,
-                               gfn_t start, gfn_t end)
-{
-       struct tdp_iter iter;
-       u64 new_spte;
-       bool spte_set = false;
-
-       rcu_read_lock();
-
-       tdp_root_for_each_pte(iter, root, start, end) {
-               if (tdp_mmu_iter_cond_resched(kvm, &iter, false))
-                       continue;
-
-               if (!is_shadow_present_pte(iter.old_spte) ||
-                   iter.old_spte & shadow_dirty_mask)
-                       continue;
-
-               new_spte = iter.old_spte | shadow_dirty_mask;
-
-               tdp_mmu_set_spte(kvm, &iter, new_spte);
-               spte_set = true;
-       }
-
-       rcu_read_unlock();
-       return spte_set;
-}
-
-/*
- * Set the dirty status of all the SPTEs mapping GFNs in the memslot. This is
- * only used for PML, and so will involve setting the dirty bit on each SPTE.
- * Returns true if an SPTE has been changed and the TLBs need to be flushed.
- */
-bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-       struct kvm_mmu_page *root;
-       int root_as_id;
-       bool spte_set = false;
-
-       for_each_tdp_mmu_root_yield_safe(kvm, root) {
-               root_as_id = kvm_mmu_page_as_id(root);
-               if (root_as_id != slot->as_id)
-                       continue;
-
-               spte_set |= set_dirty_gfn_range(kvm, root, slot->base_gfn,
-                               slot->base_gfn + slot->npages);
-       }
-       return spte_set;
-}
-
-/*
  * Clear leaf entries which could be replaced by large mappings, for
  * GFNs within the slot.
  */
 static void zap_collapsible_spte_range(struct kvm *kvm,
                                       struct kvm_mmu_page *root,
-                                      gfn_t start, gfn_t end)
+                                      struct kvm_memory_slot *slot)
 {
+       gfn_t start = slot->base_gfn;
+       gfn_t end = start + slot->npages;
        struct tdp_iter iter;
        kvm_pfn_t pfn;
        bool spte_set = false;
@@ -1348,7 +1299,8 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
 
                pfn = spte_to_pfn(iter.old_spte);
                if (kvm_is_reserved_pfn(pfn) ||
-                   !PageTransCompoundMap(pfn_to_page(pfn)))
+                   iter.level >= kvm_mmu_max_mapping_level(kvm, slot, iter.gfn,
+                                                           pfn, PG_LEVEL_NUM))
                        continue;
 
                tdp_mmu_set_spte(kvm, &iter, 0);
@@ -1366,7 +1318,7 @@ static void zap_collapsible_spte_range(struct kvm *kvm,
  * be replaced by large mappings, for GFNs within the slot.
  */
 void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                                      const struct kvm_memory_slot *slot)
+                                      struct kvm_memory_slot *slot)
 {
        struct kvm_mmu_page *root;
        int root_as_id;
@@ -1376,8 +1328,7 @@ void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
                if (root_as_id != slot->as_id)
                        continue;
 
-               zap_collapsible_spte_range(kvm, root, slot->base_gfn,
-                                          slot->base_gfn + slot->npages);
+               zap_collapsible_spte_range(kvm, root, slot);
        }
 }
 
index b4b65e3..31096ec 100644 (file)
@@ -8,7 +8,29 @@
 hpa_t kvm_tdp_mmu_get_vcpu_root_hpa(struct kvm_vcpu *vcpu);
 void kvm_tdp_mmu_free_root(struct kvm *kvm, struct kvm_mmu_page *root);
 
-bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end);
+bool __kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start, gfn_t end,
+                                bool can_yield);
+static inline bool kvm_tdp_mmu_zap_gfn_range(struct kvm *kvm, gfn_t start,
+                                            gfn_t end)
+{
+       return __kvm_tdp_mmu_zap_gfn_range(kvm, start, end, true);
+}
+static inline bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
+{
+       gfn_t end = sp->gfn + KVM_PAGES_PER_HPAGE(sp->role.level);
+
+       /*
+        * Don't allow yielding, as the caller may have a flush pending.  Note,
+        * if mmu_lock is held for write, zapping will never yield in this case,
+        * but explicitly disallow it for safety.  The TDP MMU does not yield
+        * until it has made forward progress (steps sideways), and when zapping
+        * a single shadow page that it's guaranteed to see (thus the mmu_lock
+        * requirement), its "step sideways" will always step beyond the bounds
+        * of the shadow page's gfn range and stop iterating before yielding.
+        */
+       lockdep_assert_held_write(&kvm->mmu_lock);
+       return __kvm_tdp_mmu_zap_gfn_range(kvm, sp->gfn, end, false);
+}
 void kvm_tdp_mmu_zap_all(struct kvm *kvm);
 
 int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
@@ -33,9 +55,8 @@ void kvm_tdp_mmu_clear_dirty_pt_masked(struct kvm *kvm,
                                       struct kvm_memory_slot *slot,
                                       gfn_t gfn, unsigned long mask,
                                       bool wrprot);
-bool kvm_tdp_mmu_slot_set_dirty(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_tdp_mmu_zap_collapsible_sptes(struct kvm *kvm,
-                                      const struct kvm_memory_slot *slot);
+                                      struct kvm_memory_slot *slot);
 
 bool kvm_tdp_mmu_write_protect_gfn(struct kvm *kvm,
                                   struct kvm_memory_slot *slot, gfn_t gfn);
index cc91738..fb204ea 100644 (file)
@@ -51,6 +51,23 @@ static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
        nested_svm_vmexit(svm);
 }
 
+static void svm_inject_page_fault_nested(struct kvm_vcpu *vcpu, struct x86_exception *fault)
+{
+       struct vcpu_svm *svm = to_svm(vcpu);
+       WARN_ON(!is_guest_mode(vcpu));
+
+       if (vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_EXCEPTION_OFFSET + PF_VECTOR) &&
+          !svm->nested.nested_run_pending) {
+               svm->vmcb->control.exit_code = SVM_EXIT_EXCP_BASE + PF_VECTOR;
+               svm->vmcb->control.exit_code_hi = 0;
+               svm->vmcb->control.exit_info_1 = fault->error_code;
+               svm->vmcb->control.exit_info_2 = fault->address;
+               nested_svm_vmexit(svm);
+       } else {
+               kvm_inject_page_fault(vcpu, fault);
+       }
+}
+
 static u64 nested_svm_get_tdp_pdptr(struct kvm_vcpu *vcpu, int index)
 {
        struct vcpu_svm *svm = to_svm(vcpu);
@@ -229,11 +246,18 @@ static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
        return true;
 }
 
-static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
+static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
 {
        struct kvm_vcpu *vcpu = &svm->vcpu;
        bool vmcb12_lma;
 
+       /*
+        * FIXME: these should be done after copying the fields,
+        * to avoid TOC/TOU races.  For these save area checks
+        * the possible damage is limited since kvm_set_cr0 and
+        * kvm_set_cr4 handle failure; EFER_SVME is an exception
+        * so it is force-set later in nested_prepare_vmcb_save.
+        */
        if ((vmcb12->save.efer & EFER_SVME) == 0)
                return false;
 
@@ -254,7 +278,7 @@ static bool nested_vmcb_checks(struct vcpu_svm *svm, struct vmcb *vmcb12)
        if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
                return false;
 
-       return nested_vmcb_check_controls(&vmcb12->control);
+       return true;
 }
 
 static void load_nested_vmcb_control(struct vcpu_svm *svm,
@@ -379,7 +403,14 @@ static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
        svm->vmcb->save.gdtr = vmcb12->save.gdtr;
        svm->vmcb->save.idtr = vmcb12->save.idtr;
        kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
-       svm_set_efer(&svm->vcpu, vmcb12->save.efer);
+
+       /*
+        * Force-set EFER_SVME even though it is checked earlier on the
+        * VMCB12, because the guest can flip the bit between the check
+        * and now.  Clearing EFER_SVME would call svm_free_nested.
+        */
+       svm_set_efer(&svm->vcpu, vmcb12->save.efer | EFER_SVME);
+
        svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
        svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
        svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2;
@@ -436,16 +467,32 @@ int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
 {
        int ret;
 
+       trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
+                              vmcb12->save.rip,
+                              vmcb12->control.int_ctl,
+                              vmcb12->control.event_inj,
+                              vmcb12->control.nested_ctl);
+
+       trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
+                                   vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
+                                   vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
+                                   vmcb12->control.intercepts[INTERCEPT_WORD3],
+                                   vmcb12->control.intercepts[INTERCEPT_WORD4],
+                                   vmcb12->control.intercepts[INTERCEPT_WORD5]);
+
+
        svm->nested.vmcb12_gpa = vmcb12_gpa;
-       load_nested_vmcb_control(svm, &vmcb12->control);
-       nested_prepare_vmcb_save(svm, vmcb12);
        nested_prepare_vmcb_control(svm);
+       nested_prepare_vmcb_save(svm, vmcb12);
 
        ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
                                  nested_npt_enabled(svm));
        if (ret)
                return ret;
 
+       if (!npt_enabled)
+               svm->vcpu.arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
+
        svm_set_gif(svm, true);
 
        return 0;
@@ -481,7 +528,10 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
        if (WARN_ON_ONCE(!svm->nested.initialized))
                return -EINVAL;
 
-       if (!nested_vmcb_checks(svm, vmcb12)) {
+       load_nested_vmcb_control(svm, &vmcb12->control);
+
+       if (!nested_vmcb_check_save(svm, vmcb12) ||
+           !nested_vmcb_check_controls(&svm->nested.ctl)) {
                vmcb12->control.exit_code    = SVM_EXIT_ERR;
                vmcb12->control.exit_code_hi = 0;
                vmcb12->control.exit_info_1  = 0;
@@ -489,18 +539,6 @@ int nested_svm_vmrun(struct vcpu_svm *svm)
                goto out;
        }
 
-       trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
-                              vmcb12->save.rip,
-                              vmcb12->control.int_ctl,
-                              vmcb12->control.event_inj,
-                              vmcb12->control.nested_ctl);
-
-       trace_kvm_nested_intercepts(vmcb12->control.intercepts[INTERCEPT_CR] & 0xffff,
-                                   vmcb12->control.intercepts[INTERCEPT_CR] >> 16,
-                                   vmcb12->control.intercepts[INTERCEPT_EXCEPTION],
-                                   vmcb12->control.intercepts[INTERCEPT_WORD3],
-                                   vmcb12->control.intercepts[INTERCEPT_WORD4],
-                                   vmcb12->control.intercepts[INTERCEPT_WORD5]);
 
        /* Clear internal status */
        kvm_clear_exception_queue(&svm->vcpu);
@@ -1187,6 +1225,8 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
         */
        if (!(save->cr0 & X86_CR0_PG))
                goto out_free;
+       if (!(save->efer & EFER_SVME))
+               goto out_free;
 
        /*
         * All checks done, we can enter guest mode.  L1 control fields
index 035da07..fdf587f 100644 (file)
@@ -98,6 +98,8 @@ static enum index msr_to_index(u32 msr)
 static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
                                             enum pmu_type type)
 {
+       struct kvm_vcpu *vcpu = pmu_to_vcpu(pmu);
+
        switch (msr) {
        case MSR_F15H_PERF_CTL0:
        case MSR_F15H_PERF_CTL1:
@@ -105,6 +107,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
        case MSR_F15H_PERF_CTL3:
        case MSR_F15H_PERF_CTL4:
        case MSR_F15H_PERF_CTL5:
+               if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+                       return NULL;
+               fallthrough;
        case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
                if (type != PMU_TYPE_EVNTSEL)
                        return NULL;
@@ -115,6 +120,9 @@ static inline struct kvm_pmc *get_gp_pmc_amd(struct kvm_pmu *pmu, u32 msr,
        case MSR_F15H_PERF_CTR3:
        case MSR_F15H_PERF_CTR4:
        case MSR_F15H_PERF_CTR5:
+               if (!guest_cpuid_has(vcpu, X86_FEATURE_PERFCTR_CORE))
+                       return NULL;
+               fallthrough;
        case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
                if (type != PMU_TYPE_COUNTER)
                        return NULL;
index adb3619..58a45bb 100644 (file)
@@ -115,13 +115,6 @@ static const struct svm_direct_access_msrs {
        { .index = MSR_INVALID,                         .always = false },
 };
 
-/* enable NPT for AMD64 and X86 with PAE */
-#if defined(CONFIG_X86_64) || defined(CONFIG_X86_PAE)
-bool npt_enabled = true;
-#else
-bool npt_enabled;
-#endif
-
 /*
  * These 2 parameters are used to config the controls for Pause-Loop Exiting:
  * pause_filter_count: On processors that support Pause filtering(indicated
@@ -170,9 +163,12 @@ module_param(pause_filter_count_shrink, ushort, 0444);
 static unsigned short pause_filter_count_max = KVM_SVM_DEFAULT_PLE_WINDOW_MAX;
 module_param(pause_filter_count_max, ushort, 0444);
 
-/* allow nested paging (virtualized MMU) for all guests */
-static int npt = true;
-module_param(npt, int, S_IRUGO);
+/*
+ * Use nested page tables by default.  Note, NPT may get forced off by
+ * svm_hardware_setup() if it's unsupported by hardware or the host kernel.
+ */
+bool npt_enabled = true;
+module_param_named(npt, npt_enabled, bool, 0444);
 
 /* allow nested virtualization in KVM/SVM */
 static int nested = true;
@@ -926,9 +922,6 @@ static __init void svm_set_cpu_caps(void)
        if (boot_cpu_has(X86_FEATURE_LS_CFG_SSBD) ||
            boot_cpu_has(X86_FEATURE_AMD_SSBD))
                kvm_cpu_cap_set(X86_FEATURE_VIRT_SSBD);
-
-       /* Enable INVPCID feature */
-       kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
 }
 
 static __init int svm_hardware_setup(void)
@@ -991,10 +984,15 @@ static __init int svm_hardware_setup(void)
                        goto err;
        }
 
-       if (!boot_cpu_has(X86_FEATURE_NPT))
+       /*
+        * KVM's MMU doesn't support using 2-level paging for itself, and thus
+        * NPT isn't supported if the host is using 2-level paging since host
+        * CR4 is unchanged on VMRUN.
+        */
+       if (!IS_ENABLED(CONFIG_X86_64) && !IS_ENABLED(CONFIG_X86_PAE))
                npt_enabled = false;
 
-       if (npt_enabled && !npt)
+       if (!boot_cpu_has(X86_FEATURE_NPT))
                npt_enabled = false;
 
        kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
@@ -1103,12 +1101,12 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
 static void svm_check_invpcid(struct vcpu_svm *svm)
 {
        /*
-        * Intercept INVPCID instruction only if shadow page table is
-        * enabled. Interception is not required with nested page table
-        * enabled.
+        * Intercept INVPCID if shadow paging is enabled to sync/free shadow
+        * roots, or if INVPCID is disabled in the guest to inject #UD.
         */
        if (kvm_cpu_cap_has(X86_FEATURE_INVPCID)) {
-               if (!npt_enabled)
+               if (!npt_enabled ||
+                   !guest_cpuid_has(&svm->vcpu, X86_FEATURE_INVPCID))
                        svm_set_intercept(svm, INTERCEPT_INVPCID);
                else
                        svm_clr_intercept(svm, INTERCEPT_INVPCID);
@@ -1203,6 +1201,7 @@ static void init_vmcb(struct vcpu_svm *svm)
        init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
        init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
 
+       svm_set_cr4(&svm->vcpu, 0);
        svm_set_efer(&svm->vcpu, 0);
        save->dr6 = 0xffff0ff0;
        kvm_set_rflags(&svm->vcpu, X86_EFLAGS_FIXED);
@@ -2214,15 +2213,20 @@ static int emulate_svm_instr(struct kvm_vcpu *vcpu, int opcode)
                [SVM_INSTR_VMSAVE] = vmsave_interception,
        };
        struct vcpu_svm *svm = to_svm(vcpu);
+       int ret;
 
        if (is_guest_mode(vcpu)) {
                svm->vmcb->control.exit_code = guest_mode_exit_codes[opcode];
                svm->vmcb->control.exit_info_1 = 0;
                svm->vmcb->control.exit_info_2 = 0;
 
-               return nested_svm_vmexit(svm);
-       } else
-               return svm_instr_handlers[opcode](svm);
+               /* Returns '1' or -errno on failure, '0' on success. */
+               ret = nested_svm_vmexit(svm);
+               if (ret)
+                       return ret;
+               return 1;
+       }
+       return svm_instr_handlers[opcode](svm);
 }
 
 /*
index b2f0b5e..bcca0b8 100644 (file)
@@ -2167,15 +2167,13 @@ static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
                vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
 
        /*
-        * The PML address never changes, so it is constant in vmcs02.
-        * Conceptually we want to copy the PML index from vmcs01 here,
-        * and then back to vmcs01 on nested vmexit.  But since we flush
-        * the log and reset GUEST_PML_INDEX on each vmexit, the PML
-        * index is also effectively constant in vmcs02.
+        * PML is emulated for L2, but never enabled in hardware as the MMU
+        * handles A/D emulation.  Disabling PML for L2 also avoids having to
+        * deal with filtering out L2 GPAs from the buffer.
         */
        if (enable_pml) {
-               vmcs_write64(PML_ADDRESS, page_to_phys(vmx->pml_pg));
-               vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
+               vmcs_write64(PML_ADDRESS, 0);
+               vmcs_write16(GUEST_PML_INDEX, -1);
        }
 
        if (cpu_has_vmx_encls_vmexit())
@@ -2210,7 +2208,7 @@ static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
 
 static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
 {
-       u32 exec_control, vmcs12_exec_ctrl;
+       u32 exec_control;
        u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
 
        if (vmx->nested.dirty_vmcs12 || vmx->nested.hv_evmcs)
@@ -2284,11 +2282,11 @@ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
                                  SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                  SECONDARY_EXEC_ENABLE_VMFUNC);
                if (nested_cpu_has(vmcs12,
-                                  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) {
-                       vmcs12_exec_ctrl = vmcs12->secondary_vm_exec_control &
-                               ~SECONDARY_EXEC_ENABLE_PML;
-                       exec_control |= vmcs12_exec_ctrl;
-               }
+                                  CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
+                       exec_control |= vmcs12->secondary_vm_exec_control;
+
+               /* PML is emulated and never enabled in hardware for L2. */
+               exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
                /* VMCS shadowing for L2 is emulated for now */
                exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
@@ -4200,9 +4198,6 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
        if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, &ignored))
                nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
 
-       if (!enable_ept)
-               vcpu->arch.walk_mmu->inject_page_fault = kvm_inject_page_fault;
-
        nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
 
        vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
@@ -4495,6 +4490,11 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
                vmx_set_virtual_apic_mode(vcpu);
        }
 
+       if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
+               vmx->nested.update_vmcs01_cpu_dirty_logging = false;
+               vmx_update_cpu_dirty_logging(vcpu);
+       }
+
        /* Unpin physical memory we referred to in vmcs02 */
        if (vmx->nested.apic_access_page) {
                kvm_release_page_clean(vmx->nested.apic_access_page);
@@ -5793,7 +5793,10 @@ static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
        case EXIT_REASON_PREEMPTION_TIMER:
                return true;
        case EXIT_REASON_PML_FULL:
-               /* We emulate PML support to L1. */
+               /*
+                * PML is emulated for an L1 VMM and should never be enabled in
+                * vmcs02, always "handle" PML_FULL by exiting to userspace.
+                */
                return true;
        case EXIT_REASON_VMFUNC:
                /* VM functions are emulated through L2->L0 vmexits. */
index d1df618..9efc1a6 100644 (file)
@@ -298,7 +298,7 @@ int intel_pmu_create_guest_lbr_event(struct kvm_vcpu *vcpu)
        if (IS_ERR(event)) {
                pr_debug_ratelimited("%s: failed %ld\n",
                                        __func__, PTR_ERR(event));
-               return -ENOENT;
+               return PTR_ERR(event);
        }
        lbr_desc->event = event;
        pmu->event_count++;
@@ -320,7 +320,7 @@ static bool intel_pmu_handle_lbr_msrs_access(struct kvm_vcpu *vcpu,
        if (!intel_pmu_is_valid_lbr_msr(vcpu, index))
                return false;
 
-       if (!lbr_desc->event && !intel_pmu_create_guest_lbr_event(vcpu))
+       if (!lbr_desc->event && intel_pmu_create_guest_lbr_event(vcpu) < 0)
                goto dummy;
 
        /*
index e0a3a9b..32cf828 100644 (file)
@@ -4277,7 +4277,12 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
        */
        exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
 
-       if (!enable_pml)
+       /*
+        * PML is enabled/disabled when dirty logging of memsmlots changes, but
+        * it needs to be set here when dirty logging is already active, e.g.
+        * if this vCPU was created after dirty logging was enabled.
+        */
+       if (!vcpu->kvm->arch.cpu_dirty_logging_count)
                exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
 
        if (cpu_has_vmx_xsaves()) {
@@ -4295,18 +4300,8 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
        }
 
        vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
-
-       /*
-        * Expose INVPCID if and only if PCID is also exposed to the guest.
-        * INVPCID takes a #UD when it's disabled in the VMCS, but a #GP or #PF
-        * if CR4.PCIDE=0.  Enumerating CPUID.INVPCID=1 would lead to incorrect
-        * behavior from the guest perspective (it would expect #GP or #PF).
-        */
-       if (!guest_cpuid_has(vcpu, X86_FEATURE_PCID))
-               guest_cpuid_clear(vcpu, X86_FEATURE_INVPCID);
        vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
-
        vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
        vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdseed, RDSEED);
 
@@ -5776,24 +5771,6 @@ static void vmx_flush_pml_buffer(struct kvm_vcpu *vcpu)
        vmcs_write16(GUEST_PML_INDEX, PML_ENTITY_NUM - 1);
 }
 
-/*
- * Flush all vcpus' PML buffer and update logged GPAs to dirty_bitmap.
- * Called before reporting dirty_bitmap to userspace.
- */
-static void kvm_flush_pml_buffers(struct kvm *kvm)
-{
-       int i;
-       struct kvm_vcpu *vcpu;
-       /*
-        * We only need to kick vcpu out of guest mode here, as PML buffer
-        * is flushed at beginning of all VMEXITs, and it's obvious that only
-        * vcpus running in guest are possible to have unflushed GPAs in PML
-        * buffer.
-        */
-       kvm_for_each_vcpu(i, vcpu, kvm)
-               kvm_vcpu_kick(vcpu);
-}
-
 static void vmx_dump_sel(char *name, uint32_t sel)
 {
        pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n",
@@ -5976,9 +5953,10 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
         * updated. Another good is, in kvm_vm_ioctl_get_dirty_log, before
         * querying dirty_bitmap, we only need to kick all vcpus out of guest
         * mode as if vcpus is in root mode, the PML buffer must has been
-        * flushed already.
+        * flushed already.  Note, PML is never enabled in hardware while
+        * running L2.
         */
-       if (enable_pml)
+       if (enable_pml && !is_guest_mode(vcpu))
                vmx_flush_pml_buffer(vcpu);
 
        /*
@@ -5995,6 +5973,13 @@ static int __vmx_handle_exit(struct kvm_vcpu *vcpu, fastpath_t exit_fastpath)
 
        if (is_guest_mode(vcpu)) {
                /*
+                * PML is never enabled when running L2, bail immediately if a
+                * PML full exit occurs as something is horribly wrong.
+                */
+               if (exit_reason.basic == EXIT_REASON_PML_FULL)
+                       goto unexpected_vmexit;
+
+               /*
                 * The host physical addresses of some pages of guest memory
                 * are loaded into the vmcs02 (e.g. vmcs12's Virtual APIC
                 * Page). The CPU may write to these pages via their host
@@ -6595,8 +6580,8 @@ static void atomic_switch_perf_msrs(struct vcpu_vmx *vmx)
        int i, nr_msrs;
        struct perf_guest_switch_msr *msrs;
 
+       /* Note, nr_msrs may be garbage if perf_guest_get_msrs() returns NULL. */
        msrs = perf_guest_get_msrs(&nr_msrs);
-
        if (!msrs)
                return;
 
@@ -6851,13 +6836,15 @@ static fastpath_t vmx_vcpu_run(struct kvm_vcpu *vcpu)
        if (unlikely((u16)vmx->exit_reason.basic == EXIT_REASON_MCE_DURING_VMENTRY))
                kvm_machine_check();
 
+       if (likely(!vmx->exit_reason.failed_vmentry))
+               vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+
        trace_kvm_exit(vmx->exit_reason.full, vcpu, KVM_ISA_VMX);
 
        if (unlikely(vmx->exit_reason.failed_vmentry))
                return EXIT_FASTPATH_NONE;
 
        vmx->loaded_vmcs->launched = 1;
-       vmx->idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
 
        vmx_recover_nmi_blocking(vmx);
        vmx_complete_interrupts(vmx);
@@ -7330,8 +7317,8 @@ static __init void vmx_set_cpu_caps(void)
        /* CPUID 0x7 */
        if (kvm_mpx_supported())
                kvm_cpu_cap_check_and_set(X86_FEATURE_MPX);
-       if (cpu_has_vmx_invpcid())
-               kvm_cpu_cap_check_and_set(X86_FEATURE_INVPCID);
+       if (!cpu_has_vmx_invpcid())
+               kvm_cpu_cap_clear(X86_FEATURE_INVPCID);
        if (vmx_pt_mode_is_host_guest())
                kvm_cpu_cap_check_and_set(X86_FEATURE_INTEL_PT);
 
@@ -7509,30 +7496,24 @@ static void vmx_sched_in(struct kvm_vcpu *vcpu, int cpu)
                shrink_ple_window(vcpu);
 }
 
-static void vmx_slot_enable_log_dirty(struct kvm *kvm,
-                                    struct kvm_memory_slot *slot)
-{
-       if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
-               kvm_mmu_slot_leaf_clear_dirty(kvm, slot);
-       kvm_mmu_slot_largepage_remove_write_access(kvm, slot);
-}
-
-static void vmx_slot_disable_log_dirty(struct kvm *kvm,
-                                      struct kvm_memory_slot *slot)
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu)
 {
-       kvm_mmu_slot_set_dirty(kvm, slot);
-}
+       struct vcpu_vmx *vmx = to_vmx(vcpu);
 
-static void vmx_flush_log_dirty(struct kvm *kvm)
-{
-       kvm_flush_pml_buffers(kvm);
-}
+       if (is_guest_mode(vcpu)) {
+               vmx->nested.update_vmcs01_cpu_dirty_logging = true;
+               return;
+       }
 
-static void vmx_enable_log_dirty_pt_masked(struct kvm *kvm,
-                                          struct kvm_memory_slot *memslot,
-                                          gfn_t offset, unsigned long mask)
-{
-       kvm_mmu_clear_dirty_pt_masked(kvm, memslot, offset, mask);
+       /*
+        * Note, cpu_dirty_logging_count can be changed concurrent with this
+        * code, but in that case another update request will be made and so
+        * the guest will never run with a stale PML value.
+        */
+       if (vcpu->kvm->arch.cpu_dirty_logging_count)
+               secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_ENABLE_PML);
+       else
+               secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_ENABLE_PML);
 }
 
 static int vmx_pre_block(struct kvm_vcpu *vcpu)
@@ -7642,11 +7623,6 @@ static bool vmx_check_apicv_inhibit_reasons(ulong bit)
        return supported & BIT(bit);
 }
 
-static int vmx_cpu_dirty_log_size(void)
-{
-       return enable_pml ? PML_ENTITY_NUM : 0;
-}
-
 static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .hardware_unsetup = hardware_unsetup,
 
@@ -7746,10 +7722,8 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
        .sched_in = vmx_sched_in,
 
-       .slot_enable_log_dirty = vmx_slot_enable_log_dirty,
-       .slot_disable_log_dirty = vmx_slot_disable_log_dirty,
-       .flush_log_dirty = vmx_flush_log_dirty,
-       .enable_log_dirty_pt_masked = vmx_enable_log_dirty_pt_masked,
+       .cpu_dirty_log_size = PML_ENTITY_NUM,
+       .update_cpu_dirty_logging = vmx_update_cpu_dirty_logging,
 
        .pre_block = vmx_pre_block,
        .post_block = vmx_post_block,
@@ -7777,7 +7751,6 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
 
        .msr_filter_changed = vmx_msr_filter_changed,
        .complete_emulated_msr = kvm_complete_insn_gp,
-       .cpu_dirty_log_size = vmx_cpu_dirty_log_size,
 
        .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
@@ -7894,13 +7867,8 @@ static __init int hardware_setup(void)
        if (!enable_ept || !enable_ept_ad_bits || !cpu_has_vmx_pml())
                enable_pml = 0;
 
-       if (!enable_pml) {
-               vmx_x86_ops.slot_enable_log_dirty = NULL;
-               vmx_x86_ops.slot_disable_log_dirty = NULL;
-               vmx_x86_ops.flush_log_dirty = NULL;
-               vmx_x86_ops.enable_log_dirty_pt_masked = NULL;
-               vmx_x86_ops.cpu_dirty_log_size = NULL;
-       }
+       if (!enable_pml)
+               vmx_x86_ops.cpu_dirty_log_size = 0;
 
        if (!cpu_has_vmx_preemption_timer())
                enable_preemption_timer = false;
index 12c53d0..89da5e1 100644 (file)
@@ -165,6 +165,7 @@ struct nested_vmx {
 
        bool change_vmcs01_virtual_apic_mode;
        bool reload_vmcs01_apic_access_page;
+       bool update_vmcs01_cpu_dirty_logging;
 
        /*
         * Enlightened VMCS has been enabled. It does not mean that L1 has to
@@ -393,6 +394,7 @@ int vmx_find_loadstore_msr_slot(struct vmx_msrs *m, u32 msr);
 void vmx_ept_load_pdptrs(struct kvm_vcpu *vcpu);
 void vmx_set_intercept_for_msr(struct kvm_vcpu *vcpu,
        u32 msr, int type, bool value);
+void vmx_update_cpu_dirty_logging(struct kvm_vcpu *vcpu);
 
 static inline u8 vmx_get_rvi(void)
 {
index 884e5b3..eca6362 100644 (file)
@@ -271,8 +271,7 @@ static struct kmem_cache *x86_emulator_cache;
  * When called, it means the previous get/set msr reached an invalid msr.
  * Return true if we want to ignore/silent this failed msr access.
  */
-static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
-                                 u64 data, bool write)
+static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
 {
        const char *op = write ? "wrmsr" : "rdmsr";
 
@@ -1445,7 +1444,7 @@ static int do_get_msr_feature(struct kvm_vcpu *vcpu, unsigned index, u64 *data)
        if (r == KVM_MSR_RET_INVALID) {
                /* Unconditionally clear the output for simplicity */
                *data = 0;
-               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+               if (kvm_msr_ignored_check(index, 0, false))
                        r = 0;
        }
 
@@ -1526,35 +1525,44 @@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits);
 
 bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
 {
+       struct kvm_x86_msr_filter *msr_filter;
+       struct msr_bitmap_range *ranges;
        struct kvm *kvm = vcpu->kvm;
-       struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
-       u32 count = kvm->arch.msr_filter.count;
-       u32 i;
-       bool r = kvm->arch.msr_filter.default_allow;
+       bool allowed;
        int idx;
+       u32 i;
 
-       /* MSR filtering not set up or x2APIC enabled, allow everything */
-       if (!count || (index >= 0x800 && index <= 0x8ff))
+       /* x2APIC MSRs do not support filtering. */
+       if (index >= 0x800 && index <= 0x8ff)
                return true;
 
-       /* Prevent collision with set_msr_filter */
        idx = srcu_read_lock(&kvm->srcu);
 
-       for (i = 0; i < count; i++) {
+       msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
+       if (!msr_filter) {
+               allowed = true;
+               goto out;
+       }
+
+       allowed = msr_filter->default_allow;
+       ranges = msr_filter->ranges;
+
+       for (i = 0; i < msr_filter->count; i++) {
                u32 start = ranges[i].base;
                u32 end = start + ranges[i].nmsrs;
                u32 flags = ranges[i].flags;
                unsigned long *bitmap = ranges[i].bitmap;
 
                if ((index >= start) && (index < end) && (flags & type)) {
-                       r = !!test_bit(index - start, bitmap);
+                       allowed = !!test_bit(index - start, bitmap);
                        break;
                }
        }
 
+out:
        srcu_read_unlock(&kvm->srcu, idx);
 
-       return r;
+       return allowed;
 }
 EXPORT_SYMBOL_GPL(kvm_msr_allowed);
 
@@ -1611,7 +1619,7 @@ static int kvm_set_msr_ignored_check(struct kvm_vcpu *vcpu,
        int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
 
        if (ret == KVM_MSR_RET_INVALID)
-               if (kvm_msr_ignored_check(vcpu, index, data, true))
+               if (kvm_msr_ignored_check(index, data, true))
                        ret = 0;
 
        return ret;
@@ -1649,7 +1657,7 @@ static int kvm_get_msr_ignored_check(struct kvm_vcpu *vcpu,
        if (ret == KVM_MSR_RET_INVALID) {
                /* Unconditionally clear *data for simplicity */
                *data = 0;
-               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+               if (kvm_msr_ignored_check(index, 0, false))
                        ret = 0;
        }
 
@@ -2320,7 +2328,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
        kvm_vcpu_write_tsc_offset(vcpu, offset);
        raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
 
-       spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
        if (!matched) {
                kvm->arch.nr_vcpus_matched_tsc = 0;
        } else if (!already_matched) {
@@ -2328,7 +2336,7 @@ static void kvm_synchronize_tsc(struct kvm_vcpu *vcpu, u64 data)
        }
 
        kvm_track_tsc_matching(vcpu);
-       spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
 }
 
 static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@ -2550,11 +2558,16 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
        int i;
        struct kvm_vcpu *vcpu;
        struct kvm_arch *ka = &kvm->arch;
+       unsigned long flags;
+
+       kvm_hv_invalidate_tsc_page(kvm);
 
-       spin_lock(&ka->pvclock_gtod_sync_lock);
        kvm_make_mclock_inprogress_request(kvm);
+
        /* no guest entries from this point */
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        pvclock_update_vm_gtod_copy(kvm);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@ -2562,8 +2575,6 @@ static void kvm_gen_update_masterclock(struct kvm *kvm)
        /* guest entries allowed */
        kvm_for_each_vcpu(i, vcpu, kvm)
                kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
-
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
 #endif
 }
 
@@ -2571,17 +2582,18 @@ u64 get_kvmclock_ns(struct kvm *kvm)
 {
        struct kvm_arch *ka = &kvm->arch;
        struct pvclock_vcpu_time_info hv_clock;
+       unsigned long flags;
        u64 ret;
 
-       spin_lock(&ka->pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        if (!ka->use_master_clock) {
-               spin_unlock(&ka->pvclock_gtod_sync_lock);
+               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
                return get_kvmclock_base_ns() + ka->kvmclock_offset;
        }
 
        hv_clock.tsc_timestamp = ka->master_cycle_now;
        hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        /* both __this_cpu_read() and rdtsc() should be on the same cpu */
        get_cpu();
@@ -2675,13 +2687,13 @@ static int kvm_guest_time_update(struct kvm_vcpu *v)
         * If the host uses TSC clock, then passthrough TSC as stable
         * to the guest.
         */
-       spin_lock(&ka->pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
        use_master_clock = ka->use_master_clock;
        if (use_master_clock) {
                host_tsc = ka->master_cycle_now;
                kernel_ns = ka->master_kernel_ns;
        }
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
        /* Keep irq disabled to prevent changes to the clock */
        local_irq_save(flags);
@@ -2957,6 +2969,11 @@ static void record_steal_time(struct kvm_vcpu *vcpu)
        struct kvm_host_map map;
        struct kvm_steal_time *st;
 
+       if (kvm_xen_msr_enabled(vcpu->kvm)) {
+               kvm_xen_runstate_set_running(vcpu);
+               return;
+       }
+
        if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED))
                return;
 
@@ -3756,11 +3773,15 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
        case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
                r = 1;
                break;
+#ifdef CONFIG_KVM_XEN
        case KVM_CAP_XEN_HVM:
                r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
                    KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL |
                    KVM_XEN_HVM_CONFIG_SHARED_INFO;
+               if (sched_info_on())
+                       r |= KVM_XEN_HVM_CONFIG_RUNSTATE;
                break;
+#endif
        case KVM_CAP_SYNC_REGS:
                r = KVM_SYNC_X86_VALID_FIELDS;
                break;
@@ -4038,7 +4059,11 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu)
        if (vcpu->preempted && !vcpu->arch.guest_state_protected)
                vcpu->arch.preempted_in_kernel = !static_call(kvm_x86_get_cpl)(vcpu);
 
-       kvm_steal_time_set_preempted(vcpu);
+       if (kvm_xen_msr_enabled(vcpu->kvm))
+               kvm_xen_runstate_set_preempted(vcpu);
+       else
+               kvm_steal_time_set_preempted(vcpu);
+
        static_call(kvm_x86_vcpu_put)(vcpu);
        vcpu->arch.last_host_tsc = rdtsc();
        /*
@@ -5013,6 +5038,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
        case KVM_GET_SUPPORTED_HV_CPUID:
                r = kvm_ioctl_get_supported_hv_cpuid(vcpu, argp);
                break;
+#ifdef CONFIG_KVM_XEN
        case KVM_XEN_VCPU_GET_ATTR: {
                struct kvm_xen_vcpu_attr xva;
 
@@ -5033,6 +5059,7 @@ long kvm_arch_vcpu_ioctl(struct file *filp,
                r = kvm_xen_vcpu_set_attr(vcpu, &xva);
                break;
        }
+#endif
        default:
                r = -EINVAL;
        }
@@ -5215,10 +5242,18 @@ static int kvm_vm_ioctl_reinject(struct kvm *kvm,
 
 void kvm_arch_sync_dirty_log(struct kvm *kvm, struct kvm_memory_slot *memslot)
 {
+
        /*
-        * Flush potentially hardware-cached dirty pages to dirty_bitmap.
+        * Flush all CPUs' dirty log buffers to the  dirty_bitmap.  Called
+        * before reporting dirty_bitmap to userspace.  KVM flushes the buffers
+        * on all VM-Exits, thus we only need to kick running vCPUs to force a
+        * VM-Exit.
         */
-       static_call_cond(kvm_x86_flush_log_dirty)(kvm);
+       struct kvm_vcpu *vcpu;
+       int i;
+
+       kvm_for_each_vcpu(i, vcpu, kvm)
+               kvm_vcpu_kick(vcpu);
 }
 
 int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event,
@@ -5329,25 +5364,34 @@ split_irqchip_unlock:
        return r;
 }
 
-static void kvm_clear_msr_filter(struct kvm *kvm)
+static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
+{
+       struct kvm_x86_msr_filter *msr_filter;
+
+       msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
+       if (!msr_filter)
+               return NULL;
+
+       msr_filter->default_allow = default_allow;
+       return msr_filter;
+}
+
+static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 {
        u32 i;
-       u32 count = kvm->arch.msr_filter.count;
-       struct msr_bitmap_range ranges[16];
 
-       mutex_lock(&kvm->lock);
-       kvm->arch.msr_filter.count = 0;
-       memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
-       mutex_unlock(&kvm->lock);
-       synchronize_srcu(&kvm->srcu);
+       if (!msr_filter)
+               return;
+
+       for (i = 0; i < msr_filter->count; i++)
+               kfree(msr_filter->ranges[i].bitmap);
 
-       for (i = 0; i < count; i++)
-               kfree(ranges[i].bitmap);
+       kfree(msr_filter);
 }
 
-static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
+static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
+                             struct kvm_msr_filter_range *user_range)
 {
-       struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
        struct msr_bitmap_range range;
        unsigned long *bitmap = NULL;
        size_t bitmap_size;
@@ -5381,11 +5425,9 @@ static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user
                goto err;
        }
 
-       /* Everything ok, add this range identifier to our global pool */
-       ranges[kvm->arch.msr_filter.count] = range;
-       /* Make sure we filled the array before we tell anyone to walk it */
-       smp_wmb();
-       kvm->arch.msr_filter.count++;
+       /* Everything ok, add this range identifier. */
+       msr_filter->ranges[msr_filter->count] = range;
+       msr_filter->count++;
 
        return 0;
 err:
@@ -5396,10 +5438,11 @@ err:
 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
 {
        struct kvm_msr_filter __user *user_msr_filter = argp;
+       struct kvm_x86_msr_filter *new_filter, *old_filter;
        struct kvm_msr_filter filter;
        bool default_allow;
-       int r = 0;
        bool empty = true;
+       int r = 0;
        u32 i;
 
        if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
@@ -5412,25 +5455,32 @@ static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
        if (empty && !default_allow)
                return -EINVAL;
 
-       kvm_clear_msr_filter(kvm);
-
-       kvm->arch.msr_filter.default_allow = default_allow;
+       new_filter = kvm_alloc_msr_filter(default_allow);
+       if (!new_filter)
+               return -ENOMEM;
 
-       /*
-        * Protect from concurrent calls to this function that could trigger
-        * a TOCTOU violation on kvm->arch.msr_filter.count.
-        */
-       mutex_lock(&kvm->lock);
        for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
-               r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
-               if (r)
-                       break;
+               r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+               if (r) {
+                       kvm_free_msr_filter(new_filter);
+                       return r;
+               }
        }
 
+       mutex_lock(&kvm->lock);
+
+       /* The per-VM filter is protected by kvm->lock... */
+       old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
+
+       rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
+       synchronize_srcu(&kvm->srcu);
+
+       kvm_free_msr_filter(old_filter);
+
        kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
        mutex_unlock(&kvm->lock);
 
-       return r;
+       return 0;
 }
 
 long kvm_arch_vm_ioctl(struct file *filp,
@@ -5646,6 +5696,7 @@ set_pit2_out:
                        kvm->arch.bsp_vcpu_id = arg;
                mutex_unlock(&kvm->lock);
                break;
+#ifdef CONFIG_KVM_XEN
        case KVM_XEN_HVM_CONFIG: {
                struct kvm_xen_hvm_config xhc;
                r = -EFAULT;
@@ -5674,7 +5725,9 @@ set_pit2_out:
                r = kvm_xen_hvm_set_attr(kvm, &xha);
                break;
        }
+#endif
        case KVM_SET_CLOCK: {
+               struct kvm_arch *ka = &kvm->arch;
                struct kvm_clock_data user_ns;
                u64 now_ns;
 
@@ -5693,8 +5746,22 @@ set_pit2_out:
                 * pvclock_update_vm_gtod_copy().
                 */
                kvm_gen_update_masterclock(kvm);
-               now_ns = get_kvmclock_ns(kvm);
-               kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
+
+               /*
+                * This pairs with kvm_guest_time_update(): when masterclock is
+                * in use, we use master_kernel_ns + kvmclock_offset to set
+                * unsigned 'system_time' so if we use get_kvmclock_ns() (which
+                * is slightly ahead) here we risk going negative on unsigned
+                * 'system_time' when 'user_ns.clock' is very small.
+                */
+               spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+               if (kvm->arch.use_master_clock)
+                       now_ns = ka->master_kernel_ns;
+               else
+                       now_ns = get_kvmclock_base_ns();
+               ka->kvmclock_offset = user_ns.clock - now_ns;
+               spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+
                kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                break;
        }
@@ -6578,7 +6645,7 @@ static int kvm_emulate_wbinvd_noskip(struct kvm_vcpu *vcpu)
                int cpu = get_cpu();
 
                cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
-               smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
+               on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
                                wbinvd_ipi, NULL, 1);
                put_cpu();
                cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
@@ -7673,6 +7740,7 @@ static void kvm_hyperv_tsc_notifier(void)
        struct kvm *kvm;
        struct kvm_vcpu *vcpu;
        int cpu;
+       unsigned long flags;
 
        mutex_lock(&kvm_lock);
        list_for_each_entry(kvm, &vm_list, vm_list)
@@ -7688,17 +7756,15 @@ static void kvm_hyperv_tsc_notifier(void)
        list_for_each_entry(kvm, &vm_list, vm_list) {
                struct kvm_arch *ka = &kvm->arch;
 
-               spin_lock(&ka->pvclock_gtod_sync_lock);
-
+               spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
                pvclock_update_vm_gtod_copy(kvm);
+               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
 
                kvm_for_each_vcpu(cpu, vcpu, kvm)
                        kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
 
                kvm_for_each_vcpu(cpu, vcpu, kvm)
                        kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
-
-               spin_unlock(&ka->pvclock_gtod_sync_lock);
        }
        mutex_unlock(&kvm_lock);
 }
@@ -8032,7 +8098,10 @@ void kvm_arch_exit(void)
        kvm_mmu_module_exit();
        free_percpu(user_return_msrs);
        kmem_cache_destroy(x86_fpu_cache);
+#ifdef CONFIG_KVM_XEN
+       static_key_deferred_flush(&kvm_xen_enabled);
        WARN_ON(static_branch_unlikely(&kvm_xen_enabled.key));
+#endif
 }
 
 static int __kvm_vcpu_halt(struct kvm_vcpu *vcpu, int state, int reason)
@@ -8980,6 +9049,9 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
                        kvm_check_async_pf_completion(vcpu);
                if (kvm_check_request(KVM_REQ_MSR_FILTER_CHANGED, vcpu))
                        static_call(kvm_x86_msr_filter_changed)(vcpu);
+
+               if (kvm_check_request(KVM_REQ_UPDATE_CPU_DIRTY_LOGGING, vcpu))
+                       static_call(kvm_x86_update_cpu_dirty_logging)(vcpu);
        }
 
        if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win ||
@@ -10570,7 +10642,7 @@ void __user * __x86_set_memory_region(struct kvm *kvm, int id, gpa_t gpa,
                        return (void __user *)hva;
        } else {
                if (!slot || !slot->npages)
-                       return 0;
+                       return NULL;
 
                old_npages = slot->npages;
                hva = slot->userspace_addr;
@@ -10603,8 +10675,6 @@ void kvm_arch_pre_destroy_vm(struct kvm *kvm)
 
 void kvm_arch_destroy_vm(struct kvm *kvm)
 {
-       u32 i;
-
        if (current->mm == kvm->mm) {
                /*
                 * Free memory regions allocated on behalf of userspace,
@@ -10620,8 +10690,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm)
                mutex_unlock(&kvm->slots_lock);
        }
        static_call_cond(kvm_x86_vm_destroy)(kvm);
-       for (i = 0; i < kvm->arch.msr_filter.count; i++)
-               kfree(kvm->arch.msr_filter.ranges[i].bitmap);
+       kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
        kvm_pic_destroy(kvm);
        kvm_ioapic_destroy(kvm);
        kvm_free_vcpus(kvm);
@@ -10748,75 +10817,96 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
        return 0;
 }
 
+
+static void kvm_mmu_update_cpu_dirty_logging(struct kvm *kvm, bool enable)
+{
+       struct kvm_arch *ka = &kvm->arch;
+
+       if (!kvm_x86_ops.cpu_dirty_log_size)
+               return;
+
+       if ((enable && ++ka->cpu_dirty_logging_count == 1) ||
+           (!enable && --ka->cpu_dirty_logging_count == 0))
+               kvm_make_all_cpus_request(kvm, KVM_REQ_UPDATE_CPU_DIRTY_LOGGING);
+
+       WARN_ON_ONCE(ka->cpu_dirty_logging_count < 0);
+}
+
 static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                                     struct kvm_memory_slot *old,
                                     struct kvm_memory_slot *new,
                                     enum kvm_mr_change change)
 {
+       bool log_dirty_pages = new->flags & KVM_MEM_LOG_DIRTY_PAGES;
+
        /*
-        * Nothing to do for RO slots or CREATE/MOVE/DELETE of a slot.
-        * See comments below.
+        * Update CPU dirty logging if dirty logging is being toggled.  This
+        * applies to all operations.
         */
-       if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
-               return;
+       if ((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)
+               kvm_mmu_update_cpu_dirty_logging(kvm, log_dirty_pages);
 
        /*
-        * Dirty logging tracks sptes in 4k granularity, meaning that large
-        * sptes have to be split.  If live migration is successful, the guest
-        * in the source machine will be destroyed and large sptes will be
-        * created in the destination. However, if the guest continues to run
-        * in the source machine (for example if live migration fails), small
-        * sptes will remain around and cause bad performance.
-        *
-        * Scan sptes if dirty logging has been stopped, dropping those
-        * which can be collapsed into a single large-page spte.  Later
-        * page faults will create the large-page sptes.
+        * Nothing more to do for RO slots (which can't be dirtied and can't be
+        * made writable) or CREATE/MOVE/DELETE of a slot.
         *
-        * There is no need to do this in any of the following cases:
+        * For a memslot with dirty logging disabled:
         * CREATE:      No dirty mappings will already exist.
         * MOVE/DELETE: The old mappings will already have been cleaned up by
         *              kvm_arch_flush_shadow_memslot()
+        *
+        * For a memslot with dirty logging enabled:
+        * CREATE:      No shadow pages exist, thus nothing to write-protect
+        *              and no dirty bits to clear.
+        * MOVE/DELETE: The old mappings will already have been cleaned up by
+        *              kvm_arch_flush_shadow_memslot().
         */
-       if ((old->flags & KVM_MEM_LOG_DIRTY_PAGES) &&
-           !(new->flags & KVM_MEM_LOG_DIRTY_PAGES))
-               kvm_mmu_zap_collapsible_sptes(kvm, new);
+       if ((change != KVM_MR_FLAGS_ONLY) || (new->flags & KVM_MEM_READONLY))
+               return;
 
        /*
-        * Enable or disable dirty logging for the slot.
-        *
-        * For KVM_MR_DELETE and KVM_MR_MOVE, the shadow pages of the old
-        * slot have been zapped so no dirty logging updates are needed for
-        * the old slot.
-        * For KVM_MR_CREATE and KVM_MR_MOVE, once the new slot is visible
-        * any mappings that might be created in it will consume the
-        * properties of the new slot and do not need to be updated here.
-        *
-        * When PML is enabled, the kvm_x86_ops dirty logging hooks are
-        * called to enable/disable dirty logging.
-        *
-        * When disabling dirty logging with PML enabled, the D-bit is set
-        * for sptes in the slot in order to prevent unnecessary GPA
-        * logging in the PML buffer (and potential PML buffer full VMEXIT).
-        * This guarantees leaving PML enabled for the guest's lifetime
-        * won't have any additional overhead from PML when the guest is
-        * running with dirty logging disabled.
-        *
-        * When enabling dirty logging, large sptes are write-protected
-        * so they can be split on first write.  New large sptes cannot
-        * be created for this slot until the end of the logging.
-        * See the comments in fast_page_fault().
-        * For small sptes, nothing is done if the dirty log is in the
-        * initial-all-set state.  Otherwise, depending on whether pml
-        * is enabled the D-bit or the W-bit will be cleared.
+        * READONLY and non-flags changes were filtered out above, and the only
+        * other flag is LOG_DIRTY_PAGES, i.e. something is wrong if dirty
+        * logging isn't being toggled on or off.
         */
-       if (new->flags & KVM_MEM_LOG_DIRTY_PAGES) {
-               if (kvm_x86_ops.slot_enable_log_dirty) {
-                       static_call(kvm_x86_slot_enable_log_dirty)(kvm, new);
-               } else {
-                       int level =
-                               kvm_dirty_log_manual_protect_and_init_set(kvm) ?
-                               PG_LEVEL_2M : PG_LEVEL_4K;
+       if (WARN_ON_ONCE(!((old->flags ^ new->flags) & KVM_MEM_LOG_DIRTY_PAGES)))
+               return;
+
+       if (!log_dirty_pages) {
+               /*
+                * Dirty logging tracks sptes in 4k granularity, meaning that
+                * large sptes have to be split.  If live migration succeeds,
+                * the guest in the source machine will be destroyed and large
+                * sptes will be created in the destination.  However, if the
+                * guest continues to run in the source machine (for example if
+                * live migration fails), small sptes will remain around and
+                * cause bad performance.
+                *
+                * Scan sptes if dirty logging has been stopped, dropping those
+                * which can be collapsed into a single large-page spte.  Later
+                * page faults will create the large-page sptes.
+                */
+               kvm_mmu_zap_collapsible_sptes(kvm, new);
+       } else {
+               /* By default, write-protect everything to log writes. */
+               int level = PG_LEVEL_4K;
 
+               if (kvm_x86_ops.cpu_dirty_log_size) {
+                       /*
+                        * Clear all dirty bits, unless pages are treated as
+                        * dirty from the get-go.
+                        */
+                       if (!kvm_dirty_log_manual_protect_and_init_set(kvm))
+                               kvm_mmu_slot_leaf_clear_dirty(kvm, new);
+
+                       /*
+                        * Write-protect large pages on write so that dirty
+                        * logging happens at 4k granularity.  No need to
+                        * write-protect small SPTEs since write accesses are
+                        * logged by the CPU via dirty bits.
+                        */
+                       level = PG_LEVEL_2M;
+               } else if (kvm_dirty_log_manual_protect_and_init_set(kvm)) {
                        /*
                         * If we're with initial-all-set, we don't need
                         * to write protect any small page because
@@ -10825,10 +10915,9 @@ static void kvm_mmu_slot_apply_flags(struct kvm *kvm,
                         * so that the page split can happen lazily on
                         * the first write to the huge page.
                         */
-                       kvm_mmu_slot_remove_write_access(kvm, new, level);
+                       level = PG_LEVEL_2M;
                }
-       } else {
-               static_call_cond(kvm_x86_slot_disable_log_dirty)(kvm, new);
+               kvm_mmu_slot_remove_write_access(kvm, new, level);
        }
 }
 
index 39eb048..9035e34 100644 (file)
@@ -250,7 +250,6 @@ static inline bool kvm_vcpu_latch_init(struct kvm_vcpu *vcpu)
 void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs);
 void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
 
-void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
 u64 get_kvmclock_ns(struct kvm *kvm);
 
 int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
index af8f656..ae17250 100644 (file)
 #include "hyperv.h"
 
 #include <linux/kvm_host.h>
+#include <linux/sched/stat.h>
 
 #include <trace/events/kvm.h>
 #include <xen/interface/xen.h>
+#include <xen/interface/vcpu.h>
 
 #include "trace.h"
 
@@ -61,6 +63,132 @@ out:
        return ret;
 }
 
+static void kvm_xen_update_runstate(struct kvm_vcpu *v, int state)
+{
+       struct kvm_vcpu_xen *vx = &v->arch.xen;
+       u64 now = get_kvmclock_ns(v->kvm);
+       u64 delta_ns = now - vx->runstate_entry_time;
+       u64 run_delay = current->sched_info.run_delay;
+
+       if (unlikely(!vx->runstate_entry_time))
+               vx->current_runstate = RUNSTATE_offline;
+
+       /*
+        * Time waiting for the scheduler isn't "stolen" if the
+        * vCPU wasn't running anyway.
+        */
+       if (vx->current_runstate == RUNSTATE_running) {
+               u64 steal_ns = run_delay - vx->last_steal;
+
+               delta_ns -= steal_ns;
+
+               vx->runstate_times[RUNSTATE_runnable] += steal_ns;
+       }
+       vx->last_steal = run_delay;
+
+       vx->runstate_times[vx->current_runstate] += delta_ns;
+       vx->current_runstate = state;
+       vx->runstate_entry_time = now;
+}
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *v, int state)
+{
+       struct kvm_vcpu_xen *vx = &v->arch.xen;
+       uint64_t state_entry_time;
+       unsigned int offset;
+
+       kvm_xen_update_runstate(v, state);
+
+       if (!vx->runstate_set)
+               return;
+
+       BUILD_BUG_ON(sizeof(struct compat_vcpu_runstate_info) != 0x2c);
+
+       offset = offsetof(struct compat_vcpu_runstate_info, state_entry_time);
+#ifdef CONFIG_X86_64
+       /*
+        * The only difference is alignment of uint64_t in 32-bit.
+        * So the first field 'state' is accessed directly using
+        * offsetof() (where its offset happens to be zero), while the
+        * remaining fields which are all uint64_t, start at 'offset'
+        * which we tweak here by adding 4.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, state_entry_time) + 4);
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) + 4);
+
+       if (v->kvm->arch.xen.long_mode)
+               offset = offsetof(struct vcpu_runstate_info, state_entry_time);
+#endif
+       /*
+        * First write the updated state_entry_time at the appropriate
+        * location determined by 'offset'.
+        */
+       state_entry_time = vx->runstate_entry_time;
+       state_entry_time |= XEN_RUNSTATE_UPDATE;
+
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state_entry_time) !=
+                    sizeof(state_entry_time));
+       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state_entry_time) !=
+                    sizeof(state_entry_time));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state_entry_time, offset,
+                                         sizeof(state_entry_time)))
+               return;
+       smp_wmb();
+
+       /*
+        * Next, write the new runstate. This is in the *same* place
+        * for 32-bit and 64-bit guests, asserted here for paranoia.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state) !=
+                    offsetof(struct compat_vcpu_runstate_info, state));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->state) !=
+                    sizeof(vx->current_runstate));
+       BUILD_BUG_ON(sizeof(((struct compat_vcpu_runstate_info *)0)->state) !=
+                    sizeof(vx->current_runstate));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &vx->current_runstate,
+                                         offsetof(struct vcpu_runstate_info, state),
+                                         sizeof(vx->current_runstate)))
+               return;
+
+       /*
+        * Write the actual runstate times immediately after the
+        * runstate_entry_time.
+        */
+       BUILD_BUG_ON(offsetof(struct vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(offsetof(struct compat_vcpu_runstate_info, state_entry_time) !=
+                    offsetof(struct compat_vcpu_runstate_info, time) - sizeof(u64));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(((struct compat_vcpu_runstate_info *)0)->time));
+       BUILD_BUG_ON(sizeof(((struct vcpu_runstate_info *)0)->time) !=
+                    sizeof(vx->runstate_times));
+
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &vx->runstate_times[0],
+                                         offset + sizeof(u64),
+                                         sizeof(vx->runstate_times)))
+               return;
+
+       smp_wmb();
+
+       /*
+        * Finally, clear the XEN_RUNSTATE_UPDATE bit in the guest's
+        * runstate_entry_time field.
+        */
+
+       state_entry_time &= ~XEN_RUNSTATE_UPDATE;
+       if (kvm_write_guest_offset_cached(v->kvm, &v->arch.xen.runstate_cache,
+                                         &state_entry_time, offset,
+                                         sizeof(state_entry_time)))
+               return;
+}
+
 int __kvm_xen_has_interrupt(struct kvm_vcpu *v)
 {
        u8 rc = 0;
@@ -187,9 +315,12 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                /* No compat necessary here. */
                BUILD_BUG_ON(sizeof(struct vcpu_info) !=
                             sizeof(struct compat_vcpu_info));
+               BUILD_BUG_ON(offsetof(struct vcpu_info, time) !=
+                            offsetof(struct compat_vcpu_info, time));
 
                if (data->u.gpa == GPA_INVALID) {
                        vcpu->arch.xen.vcpu_info_set = false;
+                       r = 0;
                        break;
                }
 
@@ -206,6 +337,7 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
        case KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO:
                if (data->u.gpa == GPA_INVALID) {
                        vcpu->arch.xen.vcpu_time_info_set = false;
+                       r = 0;
                        break;
                }
 
@@ -219,6 +351,121 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                }
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.gpa == GPA_INVALID) {
+                       vcpu->arch.xen.runstate_set = false;
+                       r = 0;
+                       break;
+               }
+
+               r = kvm_gfn_to_hva_cache_init(vcpu->kvm,
+                                             &vcpu->arch.xen.runstate_cache,
+                                             data->u.gpa,
+                                             sizeof(struct vcpu_runstate_info));
+               if (!r) {
+                       vcpu->arch.xen.runstate_set = true;
+               }
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (data->u.runstate.state_entry_time !=
+                   (data->u.runstate.time_running +
+                    data->u.runstate.time_runnable +
+                    data->u.runstate.time_blocked +
+                    data->u.runstate.time_offline)) {
+                       r = -EINVAL;
+                       break;
+               }
+               if (get_kvmclock_ns(vcpu->kvm) <
+                   data->u.runstate.state_entry_time) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               vcpu->arch.xen.current_runstate = data->u.runstate.state;
+               vcpu->arch.xen.runstate_entry_time =
+                       data->u.runstate.state_entry_time;
+               vcpu->arch.xen.runstate_times[RUNSTATE_running] =
+                       data->u.runstate.time_running;
+               vcpu->arch.xen.runstate_times[RUNSTATE_runnable] =
+                       data->u.runstate.time_runnable;
+               vcpu->arch.xen.runstate_times[RUNSTATE_blocked] =
+                       data->u.runstate.time_blocked;
+               vcpu->arch.xen.runstate_times[RUNSTATE_offline] =
+                       data->u.runstate.time_offline;
+               vcpu->arch.xen.last_steal = current->sched_info.run_delay;
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (data->u.runstate.state > RUNSTATE_offline &&
+                   data->u.runstate.state != (u64)-1) {
+                       r = -EINVAL;
+                       break;
+               }
+               /* The adjustment must add up */
+               if (data->u.runstate.state_entry_time !=
+                   (data->u.runstate.time_running +
+                    data->u.runstate.time_runnable +
+                    data->u.runstate.time_blocked +
+                    data->u.runstate.time_offline)) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               if (get_kvmclock_ns(vcpu->kvm) <
+                   (vcpu->arch.xen.runstate_entry_time +
+                    data->u.runstate.state_entry_time)) {
+                       r = -EINVAL;
+                       break;
+               }
+
+               vcpu->arch.xen.runstate_entry_time +=
+                       data->u.runstate.state_entry_time;
+               vcpu->arch.xen.runstate_times[RUNSTATE_running] +=
+                       data->u.runstate.time_running;
+               vcpu->arch.xen.runstate_times[RUNSTATE_runnable] +=
+                       data->u.runstate.time_runnable;
+               vcpu->arch.xen.runstate_times[RUNSTATE_blocked] +=
+                       data->u.runstate.time_blocked;
+               vcpu->arch.xen.runstate_times[RUNSTATE_offline] +=
+                       data->u.runstate.time_offline;
+
+               if (data->u.runstate.state <= RUNSTATE_offline)
+                       kvm_xen_update_runstate(vcpu, data->u.runstate.state);
+               r = 0;
+               break;
+
        default:
                break;
        }
@@ -251,6 +498,49 @@ int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
                r = 0;
                break;
 
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               if (vcpu->arch.xen.runstate_set) {
+                       data->u.gpa = vcpu->arch.xen.runstate_cache.gpa;
+                       r = 0;
+               }
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate.state = vcpu->arch.xen.current_runstate;
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA:
+               if (!sched_info_on()) {
+                       r = -EOPNOTSUPP;
+                       break;
+               }
+               data->u.runstate.state = vcpu->arch.xen.current_runstate;
+               data->u.runstate.state_entry_time =
+                       vcpu->arch.xen.runstate_entry_time;
+               data->u.runstate.time_running =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_running];
+               data->u.runstate.time_runnable =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_runnable];
+               data->u.runstate.time_blocked =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_blocked];
+               data->u.runstate.time_offline =
+                       vcpu->arch.xen.runstate_times[RUNSTATE_offline];
+               r = 0;
+               break;
+
+       case KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST:
+               r = -EINVAL;
+               break;
+
        default:
                break;
        }
index b66a921..463a784 100644 (file)
@@ -9,6 +9,7 @@
 #ifndef __ARCH_X86_KVM_XEN_H__
 #define __ARCH_X86_KVM_XEN_H__
 
+#ifdef CONFIG_KVM_XEN
 #include <linux/jump_label_ratelimit.h>
 
 extern struct static_key_false_deferred kvm_xen_enabled;
@@ -18,11 +19,16 @@ int kvm_xen_vcpu_set_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data)
 int kvm_xen_vcpu_get_attr(struct kvm_vcpu *vcpu, struct kvm_xen_vcpu_attr *data);
 int kvm_xen_hvm_set_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
 int kvm_xen_hvm_get_attr(struct kvm *kvm, struct kvm_xen_hvm_attr *data);
-int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
 int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data);
 int kvm_xen_hvm_config(struct kvm *kvm, struct kvm_xen_hvm_config *xhc);
 void kvm_xen_destroy_vm(struct kvm *kvm);
 
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+       return static_branch_unlikely(&kvm_xen_enabled.key) &&
+               kvm->arch.xen_hvm_config.msr;
+}
+
 static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
 {
        return static_branch_unlikely(&kvm_xen_enabled.key) &&
@@ -38,11 +44,59 @@ static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
 
        return 0;
 }
+#else
+static inline int kvm_xen_write_hypercall_page(struct kvm_vcpu *vcpu, u64 data)
+{
+       return 1;
+}
+
+static inline void kvm_xen_destroy_vm(struct kvm *kvm)
+{
+}
+
+static inline bool kvm_xen_msr_enabled(struct kvm *kvm)
+{
+       return false;
+}
+
+static inline bool kvm_xen_hypercall_enabled(struct kvm *kvm)
+{
+       return false;
+}
+
+static inline int kvm_xen_has_interrupt(struct kvm_vcpu *vcpu)
+{
+       return 0;
+}
+#endif
+
+int kvm_xen_hypercall(struct kvm_vcpu *vcpu);
 
-/* 32-bit compatibility definitions, also used natively in 32-bit build */
 #include <asm/pvclock-abi.h>
 #include <asm/xen/interface.h>
+#include <xen/interface/vcpu.h>
+
+void kvm_xen_update_runstate_guest(struct kvm_vcpu *vcpu, int state);
 
+static inline void kvm_xen_runstate_set_running(struct kvm_vcpu *vcpu)
+{
+       kvm_xen_update_runstate_guest(vcpu, RUNSTATE_running);
+}
+
+static inline void kvm_xen_runstate_set_preempted(struct kvm_vcpu *vcpu)
+{
+       /*
+        * If the vCPU wasn't preempted but took a normal exit for
+        * some reason (hypercalls, I/O, etc.), that is accounted as
+        * still RUNSTATE_running, as the VMM is still operating on
+        * behalf of the vCPU. Only if the VMM does actually block
+        * does it need to enter RUNSTATE_blocked.
+        */
+       if (vcpu->preempted)
+               kvm_xen_update_runstate_guest(vcpu, RUNSTATE_runnable);
+}
+
+/* 32-bit compatibility definitions, also used natively in 32-bit build */
 struct compat_arch_vcpu_info {
        unsigned int cr2;
        unsigned int pad[5];
@@ -75,4 +129,10 @@ struct compat_shared_info {
        struct compat_arch_shared_info arch;
 };
 
+struct compat_vcpu_runstate_info {
+    int state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+} __attribute__((packed));
+
 #endif /* __ARCH_X86_KVM_XEN_H__ */
index 4229950..bb0b3fe 100644 (file)
@@ -1415,6 +1415,25 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
        }
 }
 
+static unsigned long insn_get_effective_ip(struct pt_regs *regs)
+{
+       unsigned long seg_base = 0;
+
+       /*
+        * If not in user-space long mode, a custom code segment could be in
+        * use. This is true in protected mode (if the process defined a local
+        * descriptor table), or virtual-8086 mode. In most of the cases
+        * seg_base will be zero as in USER_CS.
+        */
+       if (!user_64bit_mode(regs)) {
+               seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
+               if (seg_base == -1L)
+                       return 0;
+       }
+
+       return seg_base + regs->ip;
+}
+
 /**
  * insn_fetch_from_user() - Copy instruction bytes from user-space memory
  * @regs:      Structure with register values as seen when entering kernel mode
@@ -1431,24 +1450,43 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
  */
 int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
 {
-       unsigned long seg_base = 0;
+       unsigned long ip;
        int not_copied;
 
-       /*
-        * If not in user-space long mode, a custom code segment could be in
-        * use. This is true in protected mode (if the process defined a local
-        * descriptor table), or virtual-8086 mode. In most of the cases
-        * seg_base will be zero as in USER_CS.
-        */
-       if (!user_64bit_mode(regs)) {
-               seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
-               if (seg_base == -1L)
-                       return 0;
-       }
+       ip = insn_get_effective_ip(regs);
+       if (!ip)
+               return 0;
+
+       not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
 
+       return MAX_INSN_SIZE - not_copied;
+}
+
+/**
+ * insn_fetch_from_user_inatomic() - Copy instruction bytes from user-space memory
+ *                                   while in atomic code
+ * @regs:      Structure with register values as seen when entering kernel mode
+ * @buf:       Array to store the fetched instruction
+ *
+ * Gets the linear address of the instruction and copies the instruction bytes
+ * to the buf. This function must be used in atomic context.
+ *
+ * Returns:
+ *
+ * Number of instruction bytes copied.
+ *
+ * 0 if nothing was copied.
+ */
+int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
+{
+       unsigned long ip;
+       int not_copied;
+
+       ip = insn_get_effective_ip(regs);
+       if (!ip)
+               return 0;
 
-       not_copied = copy_from_user(buf, (void __user *)(seg_base + regs->ip),
-                                   MAX_INSN_SIZE);
+       not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
 
        return MAX_INSN_SIZE - not_copied;
 }
index 4042795..435630a 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
 
+#include <linux/kernel.h>
 #ifdef __KERNEL__
 #include <linux/string.h>
 #else
 
 #include <asm/emulate_prefix.h>
 
+#define leXX_to_cpu(t, r)                                              \
+({                                                                     \
+       __typeof__(t) v;                                                \
+       switch (sizeof(t)) {                                            \
+       case 4: v = le32_to_cpu(r); break;                              \
+       case 2: v = le16_to_cpu(r); break;                              \
+       case 1: v = r; break;                                           \
+       default:                                                        \
+               BUILD_BUG(); break;                                     \
+       }                                                               \
+       v;                                                              \
+})
+
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)      \
        ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
 
 #define __get_next(t, insn)    \
-       ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+       ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
 
 #define __peek_nbyte_next(t, insn, n)  \
-       ({ t r = *(t*)((insn)->next_byte + n); r; })
+       ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
 
 #define get_next(t, insn)      \
        ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
@@ -147,9 +161,9 @@ found:
                        b = insn->prefixes.bytes[3];
                        for (i = 0; i < nb; i++)
                                if (prefixes->bytes[i] == lb)
-                                       prefixes->bytes[i] = b;
+                                       insn_set_byte(prefixes, i, b);
                }
-               insn->prefixes.bytes[3] = lb;
+               insn_set_byte(&insn->prefixes, 3, lb);
        }
 
        /* Decode REX prefix */
@@ -157,8 +171,7 @@ found:
                b = peek_next(insn_byte_t, insn);
                attr = inat_get_opcode_attribute(b);
                if (inat_is_rex_prefix(attr)) {
-                       insn->rex_prefix.value = b;
-                       insn->rex_prefix.nbytes = 1;
+                       insn_field_set(&insn->rex_prefix, b, 1);
                        insn->next_byte++;
                        if (X86_REX_W(b))
                                /* REX.W overrides opnd_size */
@@ -181,13 +194,13 @@ found:
                        if (X86_MODRM_MOD(b2) != 3)
                                goto vex_end;
                }
-               insn->vex_prefix.bytes[0] = b;
-               insn->vex_prefix.bytes[1] = b2;
+               insn_set_byte(&insn->vex_prefix, 0, b);
+               insn_set_byte(&insn->vex_prefix, 1, b2);
                if (inat_is_evex_prefix(attr)) {
                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
-                       insn->vex_prefix.bytes[2] = b2;
+                       insn_set_byte(&insn->vex_prefix, 2, b2);
                        b2 = peek_nbyte_next(insn_byte_t, insn, 3);
-                       insn->vex_prefix.bytes[3] = b2;
+                       insn_set_byte(&insn->vex_prefix, 3, b2);
                        insn->vex_prefix.nbytes = 4;
                        insn->next_byte += 4;
                        if (insn->x86_64 && X86_VEX_W(b2))
@@ -195,7 +208,7 @@ found:
                                insn->opnd_bytes = 8;
                } else if (inat_is_vex3_prefix(attr)) {
                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
-                       insn->vex_prefix.bytes[2] = b2;
+                       insn_set_byte(&insn->vex_prefix, 2, b2);
                        insn->vex_prefix.nbytes = 3;
                        insn->next_byte += 3;
                        if (insn->x86_64 && X86_VEX_W(b2))
@@ -207,7 +220,7 @@ found:
                         * Makes it easier to decode vex.W, vex.vvvv,
                         * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
                         */
-                       insn->vex_prefix.bytes[2] = b2 & 0x7f;
+                       insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f);
                        insn->vex_prefix.nbytes = 2;
                        insn->next_byte += 2;
                }
@@ -243,7 +256,7 @@ void insn_get_opcode(struct insn *insn)
 
        /* Get first opcode */
        op = get_next(insn_byte_t, insn);
-       opcode->bytes[0] = op;
+       insn_set_byte(opcode, 0, op);
        opcode->nbytes = 1;
 
        /* Check if there is VEX prefix or not */
@@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn)
 
        if (inat_has_modrm(insn->attr)) {
                mod = get_next(insn_byte_t, insn);
-               modrm->value = mod;
-               modrm->nbytes = 1;
+               insn_field_set(modrm, mod, 1);
                if (inat_is_group(insn->attr)) {
                        pfx_id = insn_last_prefix_id(insn);
                        insn->attr = inat_get_group_attribute(mod, pfx_id,
@@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn)
         * For rip-relative instructions, the mod field (top 2 bits)
         * is zero and the r/m field (bottom 3 bits) is 0x5.
         */
-       return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+       return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
 }
 
 /**
@@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn)
        if (!insn->modrm.got)
                insn_get_modrm(insn);
        if (insn->modrm.nbytes) {
-               modrm = (insn_byte_t)insn->modrm.value;
+               modrm = insn->modrm.bytes[0];
                if (insn->addr_bytes != 2 &&
                    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
-                       insn->sib.value = get_next(insn_byte_t, insn);
-                       insn->sib.nbytes = 1;
+                       insn_field_set(&insn->sib,
+                                      get_next(insn_byte_t, insn), 1);
                }
        }
        insn->sib.got = 1;
@@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn)
                if (mod == 3)
                        goto out;
                if (mod == 1) {
-                       insn->displacement.value = get_next(signed char, insn);
-                       insn->displacement.nbytes = 1;
+                       insn_field_set(&insn->displacement,
+                                      get_next(signed char, insn), 1);
                } else if (insn->addr_bytes == 2) {
                        if ((mod == 0 && rm == 6) || mod == 2) {
-                               insn->displacement.value =
-                                        get_next(short, insn);
-                               insn->displacement.nbytes = 2;
+                               insn_field_set(&insn->displacement,
+                                              get_next(short, insn), 2);
                        }
                } else {
                        if ((mod == 0 && rm == 5) || mod == 2 ||
                            (mod == 0 && base == 5)) {
-                               insn->displacement.value = get_next(int, insn);
-                               insn->displacement.nbytes = 4;
+                               insn_field_set(&insn->displacement,
+                                              get_next(int, insn), 4);
                        }
                }
        }
@@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn)
 {
        switch (insn->addr_bytes) {
        case 2:
-               insn->moffset1.value = get_next(short, insn);
-               insn->moffset1.nbytes = 2;
+               insn_field_set(&insn->moffset1, get_next(short, insn), 2);
                break;
        case 4:
-               insn->moffset1.value = get_next(int, insn);
-               insn->moffset1.nbytes = 4;
+               insn_field_set(&insn->moffset1, get_next(int, insn), 4);
                break;
        case 8:
-               insn->moffset1.value = get_next(int, insn);
-               insn->moffset1.nbytes = 4;
-               insn->moffset2.value = get_next(int, insn);
-               insn->moffset2.nbytes = 4;
+               insn_field_set(&insn->moffset1, get_next(int, insn), 4);
+               insn_field_set(&insn->moffset2, get_next(int, insn), 4);
                break;
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
@@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn)
 {
        switch (insn->opnd_bytes) {
        case 2:
-               insn->immediate.value = get_next(short, insn);
-               insn->immediate.nbytes = 2;
+               insn_field_set(&insn->immediate, get_next(short, insn), 2);
                break;
        case 4:
        case 8:
-               insn->immediate.value = get_next(int, insn);
-               insn->immediate.nbytes = 4;
+               insn_field_set(&insn->immediate, get_next(int, insn), 4);
                break;
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
@@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn)
 {
        switch (insn->opnd_bytes) {
        case 2:
-               insn->immediate1.value = get_next(short, insn);
-               insn->immediate1.nbytes = 2;
+               insn_field_set(&insn->immediate1, get_next(short, insn), 2);
                break;
        case 4:
-               insn->immediate1.value = get_next(int, insn);
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
                insn->immediate1.nbytes = 4;
                break;
        case 8:
-               insn->immediate1.value = get_next(int, insn);
-               insn->immediate1.nbytes = 4;
-               insn->immediate2.value = get_next(int, insn);
-               insn->immediate2.nbytes = 4;
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+               insn_field_set(&insn->immediate2, get_next(int, insn), 4);
                break;
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
@@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn)
 {
        switch (insn->opnd_bytes) {
        case 2:
-               insn->immediate1.value = get_next(short, insn);
-               insn->immediate1.nbytes = 2;
+               insn_field_set(&insn->immediate1, get_next(short, insn), 2);
                break;
        case 4:
-               insn->immediate1.value = get_next(int, insn);
-               insn->immediate1.nbytes = 4;
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
                break;
        case 8:
                /* ptr16:64 is not exist (no segment) */
@@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn)
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
        }
-       insn->immediate2.value = get_next(unsigned short, insn);
-       insn->immediate2.nbytes = 2;
+       insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
        insn->immediate1.got = insn->immediate2.got = 1;
 
        return 1;
@@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn)
 
        switch (inat_immediate_size(insn->attr)) {
        case INAT_IMM_BYTE:
-               insn->immediate.value = get_next(signed char, insn);
-               insn->immediate.nbytes = 1;
+               insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
                break;
        case INAT_IMM_WORD:
-               insn->immediate.value = get_next(short, insn);
-               insn->immediate.nbytes = 2;
+               insn_field_set(&insn->immediate, get_next(short, insn), 2);
                break;
        case INAT_IMM_DWORD:
-               insn->immediate.value = get_next(int, insn);
-               insn->immediate.nbytes = 4;
+               insn_field_set(&insn->immediate, get_next(int, insn), 4);
                break;
        case INAT_IMM_QWORD:
-               insn->immediate1.value = get_next(int, insn);
-               insn->immediate1.nbytes = 4;
-               insn->immediate2.value = get_next(int, insn);
-               insn->immediate2.nbytes = 4;
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+               insn_field_set(&insn->immediate2, get_next(int, insn), 4);
                break;
        case INAT_IMM_PTR:
                if (!__get_immptr(insn))
@@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn)
                goto err_out;
        }
        if (inat_has_second_immediate(insn->attr)) {
-               insn->immediate2.value = get_next(signed char, insn);
-               insn->immediate2.nbytes = 1;
+               insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
        }
 done:
        insn->immediate.got = 1;
index b4c43a9..f6fb1d2 100644 (file)
@@ -28,7 +28,7 @@ SYM_FUNC_START_NOALIGN(__x86_retpoline_\reg)
        jmp     .Lspec_trap_\@
 .Ldo_rop_\@:
        mov     %\reg, (%_ASM_SP)
-       UNWIND_HINT_RET_OFFSET
+       UNWIND_HINT_FUNC
        ret
 SYM_FUNC_END(__x86_retpoline_\reg)
 
index 5251973..a73347e 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/kdebug.h>              /* oops_begin/end, ...          */
 #include <linux/extable.h>             /* search_exception_tables      */
 #include <linux/memblock.h>            /* max_low_pfn                  */
+#include <linux/kfence.h>              /* kfence_handle_page_fault     */
 #include <linux/kprobes.h>             /* NOKPROBE_SYMBOL, ...         */
 #include <linux/mmiotrace.h>           /* kmmio_handler, ...           */
 #include <linux/perf_event.h>          /* perf_sw_event                */
@@ -680,6 +681,11 @@ page_fault_oops(struct pt_regs *regs, unsigned long error_code,
        if (IS_ENABLED(CONFIG_EFI))
                efi_crash_gracefully_on_page_fault(address);
 
+       /* Only not-present faults should be handled by KFENCE. */
+       if (!(error_code & X86_PF_PROT) &&
+           kfence_handle_page_fault(address, error_code & X86_PF_WRITE, regs))
+               return;
+
 oops:
        /*
         * Oops. The kernel tried to access some bad page. We'll have to
index 4b01f7d..ae78cef 100644 (file)
@@ -262,7 +262,7 @@ static void __init __set_clr_pte_enc(pte_t *kpte, int level, bool enc)
        if (pgprot_val(old_prot) == pgprot_val(new_prot))
                return;
 
-       pa = pfn << page_level_shift(level);
+       pa = pfn << PAGE_SHIFT;
        size = page_level_size(level);
 
        /*
index 8f665c3..ca311aa 100644 (file)
@@ -1164,12 +1164,14 @@ static void *memtype_seq_start(struct seq_file *seq, loff_t *pos)
 
 static void *memtype_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 {
+       kfree(v);
        ++*pos;
        return memtype_get_idx(*pos);
 }
 
 static void memtype_seq_stop(struct seq_file *seq, void *v)
 {
+       kfree(v);
 }
 
 static int memtype_seq_show(struct seq_file *seq, void *v)
@@ -1181,8 +1183,6 @@ static int memtype_seq_show(struct seq_file *seq, void *v)
                        entry_print->end,
                        cattr_name(entry_print->type));
 
-       kfree(entry_print);
-
        return 0;
 }
 
index 79e7a0e..b35fc80 100644 (file)
@@ -1349,6 +1349,7 @@ st:                       if (is_imm8(insn->off))
                            insn->imm == (BPF_XOR | BPF_FETCH)) {
                                u8 *branch_target;
                                bool is64 = BPF_SIZE(insn->code) == BPF_DW;
+                               u32 real_src_reg = src_reg;
 
                                /*
                                 * Can't be implemented with a single x86 insn.
@@ -1357,6 +1358,9 @@ st:                       if (is_imm8(insn->off))
 
                                /* Will need RAX as a CMPXCHG operand so save R0 */
                                emit_mov_reg(&prog, true, BPF_REG_AX, BPF_REG_0);
+                               if (src_reg == BPF_REG_0)
+                                       real_src_reg = BPF_REG_AX;
+
                                branch_target = prog;
                                /* Load old value */
                                emit_ldx(&prog, BPF_SIZE(insn->code),
@@ -1366,9 +1370,9 @@ st:                       if (is_imm8(insn->off))
                                 * put the result in the AUX_REG.
                                 */
                                emit_mov_reg(&prog, is64, AUX_REG, BPF_REG_0);
-                               maybe_emit_mod(&prog, AUX_REG, src_reg, is64);
+                               maybe_emit_mod(&prog, AUX_REG, real_src_reg, is64);
                                EMIT2(simple_alu_opcodes[BPF_OP(insn->imm)],
-                                     add_2reg(0xC0, AUX_REG, src_reg));
+                                     add_2reg(0xC0, AUX_REG, real_src_reg));
                                /* Attempt to swap in new value */
                                err = emit_atomic(&prog, BPF_CMPXCHG,
                                                  dst_reg, AUX_REG, insn->off,
@@ -1381,7 +1385,7 @@ st:                       if (is_imm8(insn->off))
                                 */
                                EMIT2(X86_JNE, -(prog - branch_target) - 2);
                                /* Return the pre-modification value */
-                               emit_mov_reg(&prog, is64, src_reg, BPF_REG_0);
+                               emit_mov_reg(&prog, is64, real_src_reg, BPF_REG_0);
                                /* Restore R0 after clobbering RAX */
                                emit_mov_reg(&prog, true, BPF_REG_0, BPF_REG_AX);
                                break;
@@ -1932,7 +1936,7 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog,
  * add rsp, 8                      // skip eth_type_trans's frame
  * ret                             // return to its caller
  */
-int arch_prepare_bpf_trampoline(void *image, void *image_end,
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end,
                                const struct btf_func_model *m, u32 flags,
                                struct bpf_tramp_progs *tprogs,
                                void *orig_call)
@@ -1971,6 +1975,15 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
 
        save_regs(m, &prog, nr_args, stack_size);
 
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+               /* arg1: mov rdi, im */
+               emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
+               if (emit_call(&prog, __bpf_tramp_enter, prog)) {
+                       ret = -EINVAL;
+                       goto cleanup;
+               }
+       }
+
        if (fentry->nr_progs)
                if (invoke_bpf(m, &prog, fentry, stack_size))
                        return -EINVAL;
@@ -1989,8 +2002,7 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
        }
 
        if (flags & BPF_TRAMP_F_CALL_ORIG) {
-               if (fentry->nr_progs || fmod_ret->nr_progs)
-                       restore_regs(m, &prog, nr_args, stack_size);
+               restore_regs(m, &prog, nr_args, stack_size);
 
                /* call original function */
                if (emit_call(&prog, orig_call, prog)) {
@@ -1999,6 +2011,9 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
                }
                /* remember return value in a stack for bpf prog to access */
                emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -8);
+               im->ip_after_call = prog;
+               memcpy(prog, ideal_nops[NOP_ATOMIC5], X86_PATCH_SIZE);
+               prog += X86_PATCH_SIZE;
        }
 
        if (fmod_ret->nr_progs) {
@@ -2029,9 +2044,17 @@ int arch_prepare_bpf_trampoline(void *image, void *image_end,
         * the return value is only updated on the stack and still needs to be
         * restored to R0.
         */
-       if (flags & BPF_TRAMP_F_CALL_ORIG)
+       if (flags & BPF_TRAMP_F_CALL_ORIG) {
+               im->ip_epilogue = prog;
+               /* arg1: mov rdi, im */
+               emit_mov_imm64(&prog, BPF_REG_1, (long) im >> 32, (u32) (long) im);
+               if (emit_call(&prog, __bpf_tramp_exit, prog)) {
+                       ret = -EINVAL;
+                       goto cleanup;
+               }
                /* restore original return value back into RAX */
                emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, -8);
+       }
 
        EMIT1(0x5B); /* pop rbx */
        EMIT1(0xC9); /* leave */
@@ -2221,7 +2244,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog)
                padding = true;
                goto skip_init_addrs;
        }
-       addrs = kmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
+       addrs = kvmalloc_array(prog->len + 1, sizeof(*addrs), GFP_KERNEL);
        if (!addrs) {
                prog = orig_prog;
                goto out_addrs;
@@ -2313,7 +2336,7 @@ out_image:
                if (image)
                        bpf_prog_fill_jited_linfo(prog, addrs + 1);
 out_addrs:
-               kfree(addrs);
+               kvfree(addrs);
                kfree(jit_data);
                prog->aux->jit_data = NULL;
        }
index 95e2e6b..8edd622 100644 (file)
 #include <linux/io.h>
 #include <linux/smp.h>
 
+#include <asm/cpu_device_id.h>
 #include <asm/segment.h>
 #include <asm/pci_x86.h>
 #include <asm/hw_irq.h>
 #include <asm/io_apic.h>
+#include <asm/intel-family.h>
 #include <asm/intel-mid.h>
 #include <asm/acpi.h>
 
@@ -140,6 +142,7 @@ static int pci_device_update_fixed(struct pci_bus *bus, unsigned int devfn,
  * type1_access_ok - check whether to use type 1
  * @bus: bus number
  * @devfn: device & function in question
+ * @reg: configuration register offset
  *
  * If the bus is on a Lincroft chip and it exists, or is not on a Lincroft at
  * all, the we can go ahead with any reads & writes.  If it's on a Lincroft,
@@ -212,10 +215,17 @@ static int pci_write(struct pci_bus *bus, unsigned int devfn, int where,
                               where, size, value);
 }
 
+static const struct x86_cpu_id intel_mid_cpu_ids[] = {
+       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, NULL),
+       {}
+};
+
 static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 {
+       const struct x86_cpu_id *id;
        struct irq_alloc_info info;
        bool polarity_low;
+       u16 model = 0;
        int ret;
        u8 gsi;
 
@@ -228,8 +238,12 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
                return ret;
        }
 
-       switch (intel_mid_identify_cpu()) {
-       case INTEL_MID_CPU_CHIP_TANGIER:
+       id = x86_match_cpu(intel_mid_cpu_ids);
+       if (id)
+               model = id->model;
+
+       switch (model) {
+       case INTEL_FAM6_ATOM_SILVERMONT_MID:
                polarity_low = false;
 
                /* Special treatment for IRQ0 */
index 234998f..de6bf0e 100644 (file)
@@ -11,9 +11,9 @@
  * themselves.
  */
 
+#include <linux/acpi.h>
 #include <linux/pci.h>
 #include <linux/init.h>
-#include <linux/sfi_acpi.h>
 #include <linux/bitmap.h>
 #include <linux/dmi.h>
 #include <linux/slab.h>
@@ -665,7 +665,7 @@ void __init pci_mmcfg_early_init(void)
                if (pci_mmcfg_check_hostbridge())
                        known_bridge = 1;
                else
-                       acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
+                       acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
                __pci_mmcfg_init(1);
 
                set_apei_filter();
@@ -683,7 +683,7 @@ void __init pci_mmcfg_late_init(void)
 
        /* MMCONFIG hasn't been enabled yet, try again */
        if (pci_probe & PCI_PROBE_MASK & ~PCI_PROBE_MMCONF) {
-               acpi_sfi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
+               acpi_table_parse(ACPI_SIG_MCFG, pci_parse_mcfg);
                __pci_mmcfg_init(0);
        }
 }
index b2f90a1..3ed03a2 100644 (file)
@@ -10,6 +10,5 @@ obj-y += intel-mid/
 obj-y  += intel-quark/
 obj-y  += olpc/
 obj-y  += scx200/
-obj-y  += sfi/
 obj-y  += ts5500/
 obj-y  += uv/
index cc2549f..ddfc087 100644 (file)
@@ -1,7 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o intel_mid_vrtc.o pwr.o
-
-# SFI specific code
-ifdef CONFIG_X86_INTEL_MID
-obj-$(CONFIG_SFI) += sfi.o device_libs/
-endif
+obj-$(CONFIG_X86_INTEL_MID) += intel-mid.o pwr.o
diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile
deleted file mode 100644 (file)
index 918edac..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-# Family-Level Interface Shim (FLIS)
-obj-$(subst m,y,$(CONFIG_PINCTRL_MERRIFIELD)) += platform_mrfld_pinctrl.o
-# SDHCI Devices
-obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o
-# WiFi + BT
-obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o
-obj-$(subst m,y,$(CONFIG_BT_HCIUART_BCM)) += platform_bt.o
-# IPC Devices
-obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o
-obj-$(subst m,y,$(CONFIG_SND_MFLD_MACHINE)) += platform_msic_audio.o
-obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o
-obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_ocd.o
-obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o
-obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o
-obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o
-# SPI Devices
-obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o
-# I2C Devices
-obj-$(subst m,y,$(CONFIG_SENSORS_EMC1403)) += platform_emc1403.o
-obj-$(subst m,y,$(CONFIG_SENSORS_LIS3LV02D)) += platform_lis331.o
-obj-$(subst m,y,$(CONFIG_MPU3050_I2C)) += platform_mpu3050.o
-obj-$(subst m,y,$(CONFIG_INPUT_BMA150)) += platform_bma023.o
-obj-$(subst m,y,$(CONFIG_DRM_MEDFIELD)) += platform_tc35876x.o
-# I2C GPIO Expanders
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_max7315.o
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o
-obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o
-# MISC Devices
-obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o
-obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o
-obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c b/arch/x86/platform/intel-mid/device_libs/platform_bcm43xx.c
deleted file mode 100644 (file)
index 564c47c..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_bcm43xx.c: bcm43xx platform data initialization file
- *
- * (C) Copyright 2016 Intel Corporation
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- */
-
-#include <linux/gpio/machine.h>
-#include <linux/platform_device.h>
-#include <linux/regulator/machine.h>
-#include <linux/regulator/fixed.h>
-#include <linux/sfi.h>
-
-#include <asm/intel-mid.h>
-
-#define WLAN_SFI_GPIO_IRQ_NAME         "WLAN-interrupt"
-#define WLAN_SFI_GPIO_ENABLE_NAME      "WLAN-enable"
-
-#define WLAN_DEV_NAME                  "0000:00:01.3"
-
-static struct regulator_consumer_supply bcm43xx_vmmc_supply = {
-       .dev_name               = WLAN_DEV_NAME,
-       .supply                 = "vmmc",
-};
-
-static struct regulator_init_data bcm43xx_vmmc_data = {
-       .constraints = {
-               .valid_ops_mask         = REGULATOR_CHANGE_STATUS,
-       },
-       .num_consumer_supplies  = 1,
-       .consumer_supplies      = &bcm43xx_vmmc_supply,
-};
-
-static struct fixed_voltage_config bcm43xx_vmmc = {
-       .supply_name            = "bcm43xx-vmmc-regulator",
-       /*
-        * Announce 2.0V here to be compatible with SDIO specification. The
-        * real voltage and signaling are still 1.8V.
-        */
-       .microvolts             = 2000000,              /* 1.8V */
-       .startup_delay          = 250 * 1000,           /* 250ms */
-       .enabled_at_boot        = 0,                    /* disabled at boot */
-       .init_data              = &bcm43xx_vmmc_data,
-};
-
-static struct platform_device bcm43xx_vmmc_regulator = {
-       .name           = "reg-fixed-voltage",
-       .id             = PLATFORM_DEVID_AUTO,
-       .dev = {
-               .platform_data  = &bcm43xx_vmmc,
-       },
-};
-
-static struct gpiod_lookup_table bcm43xx_vmmc_gpio_table = {
-       .dev_id = "reg-fixed-voltage.0",
-       .table  = {
-               GPIO_LOOKUP("0000:00:0c.0", -1, NULL, GPIO_ACTIVE_LOW),
-               {}
-       },
-};
-
-static int __init bcm43xx_regulator_register(void)
-{
-       struct gpiod_lookup_table *table = &bcm43xx_vmmc_gpio_table;
-       struct gpiod_lookup *lookup = table->table;
-       int ret;
-
-       lookup[0].chip_hwnum = get_gpio_by_name(WLAN_SFI_GPIO_ENABLE_NAME);
-       gpiod_add_lookup_table(table);
-
-       ret = platform_device_register(&bcm43xx_vmmc_regulator);
-       if (ret) {
-               pr_err("%s: vmmc regulator register failed\n", __func__);
-               return ret;
-       }
-
-       return 0;
-}
-
-static void __init *bcm43xx_platform_data(void *info)
-{
-       int ret;
-
-       ret = bcm43xx_regulator_register();
-       if (ret)
-               return NULL;
-
-       pr_info("Using generic wifi platform data\n");
-
-       /* For now it's empty */
-       return NULL;
-}
-
-static const struct devs_id bcm43xx_clk_vmmc_dev_id __initconst = {
-       .name                   = "bcm43xx_clk_vmmc",
-       .type                   = SFI_DEV_TYPE_SD,
-       .get_platform_data      = &bcm43xx_platform_data,
-};
-
-sfi_device(bcm43xx_clk_vmmc_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bma023.c b/arch/x86/platform/intel-mid/device_libs/platform_bma023.c
deleted file mode 100644 (file)
index 32912a1..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_bma023.c: bma023 platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- */
-
-#include <asm/intel-mid.h>
-
-static const struct devs_id bma023_dev_id __initconst = {
-       .name = "bma023",
-       .type = SFI_DEV_TYPE_I2C,
-       .delay = 1,
-};
-
-sfi_device(bma023_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_bt.c b/arch/x86/platform/intel-mid/device_libs/platform_bt.c
deleted file mode 100644 (file)
index 2930b6e..0000000
+++ /dev/null
@@ -1,101 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Bluetooth platform data initialization file
- *
- * (C) Copyright 2017 Intel Corporation
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- */
-
-#include <linux/gpio/machine.h>
-#include <linux/pci.h>
-#include <linux/platform_device.h>
-
-#include <asm/cpu_device_id.h>
-#include <asm/intel-family.h>
-#include <asm/intel-mid.h>
-
-struct bt_sfi_data {
-       struct device *dev;
-       const char *name;
-       int (*setup)(struct bt_sfi_data *ddata);
-};
-
-static struct gpiod_lookup_table tng_bt_sfi_gpio_table = {
-       .dev_id = "hci_bcm",
-       .table  = {
-               GPIO_LOOKUP("0000:00:0c.0", -1, "device-wakeup", GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("0000:00:0c.0", -1, "shutdown",      GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("0000:00:0c.0", -1, "host-wakeup",   GPIO_ACTIVE_HIGH),
-               { },
-       },
-};
-
-#define TNG_BT_SFI_GPIO_DEVICE_WAKEUP  "bt_wakeup"
-#define TNG_BT_SFI_GPIO_SHUTDOWN       "BT-reset"
-#define TNG_BT_SFI_GPIO_HOST_WAKEUP    "bt_uart_enable"
-
-static int __init tng_bt_sfi_setup(struct bt_sfi_data *ddata)
-{
-       struct gpiod_lookup_table *table = &tng_bt_sfi_gpio_table;
-       struct gpiod_lookup *lookup = table->table;
-       struct pci_dev *pdev;
-
-       /* Connected to /dev/ttyS0 */
-       pdev = pci_get_domain_bus_and_slot(0, 0, PCI_DEVFN(4, 1));
-       if (!pdev)
-               return -ENODEV;
-
-       ddata->dev = &pdev->dev;
-       ddata->name = table->dev_id;
-
-       lookup[0].chip_hwnum = get_gpio_by_name(TNG_BT_SFI_GPIO_DEVICE_WAKEUP);
-       lookup[1].chip_hwnum = get_gpio_by_name(TNG_BT_SFI_GPIO_SHUTDOWN);
-       lookup[2].chip_hwnum = get_gpio_by_name(TNG_BT_SFI_GPIO_HOST_WAKEUP);
-
-       gpiod_add_lookup_table(table);
-       return 0;
-}
-
-static struct bt_sfi_data tng_bt_sfi_data __initdata = {
-       .setup  = tng_bt_sfi_setup,
-};
-
-static const struct x86_cpu_id bt_sfi_cpu_ids[] = {
-       X86_MATCH_INTEL_FAM6_MODEL(ATOM_SILVERMONT_MID, &tng_bt_sfi_data),
-       {}
-};
-
-static int __init bt_sfi_init(void)
-{
-       struct platform_device_info info;
-       struct platform_device *pdev;
-       const struct x86_cpu_id *id;
-       struct bt_sfi_data *ddata;
-       int ret;
-
-       id = x86_match_cpu(bt_sfi_cpu_ids);
-       if (!id)
-               return -ENODEV;
-
-       ddata = (struct bt_sfi_data *)id->driver_data;
-       if (!ddata)
-               return -ENODEV;
-
-       ret = ddata->setup(ddata);
-       if (ret)
-               return ret;
-
-       memset(&info, 0, sizeof(info));
-       info.fwnode     = ddata->dev->fwnode;
-       info.parent     = ddata->dev;
-       info.name       = ddata->name;
-       info.id         = PLATFORM_DEVID_NONE;
-
-       pdev = platform_device_register_full(&info);
-       if (IS_ERR(pdev))
-               return PTR_ERR(pdev);
-
-       dev_info(ddata->dev, "Registered Bluetooth device: %s\n", ddata->name);
-       return 0;
-}
-device_initcall(bt_sfi_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c b/arch/x86/platform/intel-mid/device_libs/platform_emc1403.c
deleted file mode 100644 (file)
index a250858..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_emc1403.c: emc1403 platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/gpio.h>
-#include <linux/i2c.h>
-#include <asm/intel-mid.h>
-
-static void __init *emc1403_platform_data(void *info)
-{
-       static short intr2nd_pdata;
-       struct i2c_board_info *i2c_info = info;
-       int intr = get_gpio_by_name("thermal_int");
-       int intr2nd = get_gpio_by_name("thermal_alert");
-
-       if (intr < 0)
-               return NULL;
-       if (intr2nd < 0)
-               return NULL;
-
-       i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-       intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
-
-       return &intr2nd_pdata;
-}
-
-static const struct devs_id emc1403_dev_id __initconst = {
-       .name = "emc1403",
-       .type = SFI_DEV_TYPE_I2C,
-       .delay = 1,
-       .get_platform_data = &emc1403_platform_data,
-};
-
-sfi_device(emc1403_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c
deleted file mode 100644 (file)
index d9435d2..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_gpio_keys.c: gpio_keys platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/input.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/gpio.h>
-#include <linux/gpio_keys.h>
-#include <linux/platform_device.h>
-#include <asm/intel-mid.h>
-
-#define DEVICE_NAME "gpio-keys"
-
-/*
- * we will search these buttons in SFI GPIO table (by name)
- * and register them dynamically. Please add all possible
- * buttons here, we will shrink them if no GPIO found.
- */
-static struct gpio_keys_button gpio_button[] = {
-       {KEY_POWER,             -1, 1, "power_btn",     EV_KEY, 0, 3000},
-       {KEY_PROG1,             -1, 1, "prog_btn1",     EV_KEY, 0, 20},
-       {KEY_PROG2,             -1, 1, "prog_btn2",     EV_KEY, 0, 20},
-       {SW_LID,                -1, 1, "lid_switch",    EV_SW,  0, 20},
-       {KEY_VOLUMEUP,          -1, 1, "vol_up",        EV_KEY, 0, 20},
-       {KEY_VOLUMEDOWN,        -1, 1, "vol_down",      EV_KEY, 0, 20},
-       {KEY_MUTE,              -1, 1, "mute_enable",   EV_KEY, 0, 20},
-       {KEY_VOLUMEUP,          -1, 1, "volume_up",     EV_KEY, 0, 20},
-       {KEY_VOLUMEDOWN,        -1, 1, "volume_down",   EV_KEY, 0, 20},
-       {KEY_CAMERA,            -1, 1, "camera_full",   EV_KEY, 0, 20},
-       {KEY_CAMERA_FOCUS,      -1, 1, "camera_half",   EV_KEY, 0, 20},
-       {SW_KEYPAD_SLIDE,       -1, 1, "MagSw1",        EV_SW,  0, 20},
-       {SW_KEYPAD_SLIDE,       -1, 1, "MagSw2",        EV_SW,  0, 20},
-};
-
-static struct gpio_keys_platform_data gpio_keys = {
-       .buttons        = gpio_button,
-       .rep            = 1,
-       .nbuttons       = -1, /* will fill it after search */
-};
-
-static struct platform_device pb_device = {
-       .name           = DEVICE_NAME,
-       .id             = -1,
-       .dev            = {
-               .platform_data  = &gpio_keys,
-       },
-};
-
-/*
- * Shrink the non-existent buttons, register the gpio button
- * device if there is some
- */
-static int __init pb_keys_init(void)
-{
-       struct gpio_keys_button *gb = gpio_button;
-       int i, good = 0;
-
-       for (i = 0; i < ARRAY_SIZE(gpio_button); i++) {
-               gb[i].gpio = get_gpio_by_name(gb[i].desc);
-               pr_debug("info[%2d]: name = %s, gpio = %d\n", i, gb[i].desc,
-                                       gb[i].gpio);
-               if (gb[i].gpio < 0)
-                       continue;
-
-               if (i != good)
-                       gb[good] = gb[i];
-               good++;
-       }
-
-       if (good) {
-               gpio_keys.nbuttons = good;
-               return platform_device_register(&pb_device);
-       }
-       return 0;
-}
-late_initcall(pb_keys_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_lis331.c b/arch/x86/platform/intel-mid/device_libs/platform_lis331.c
deleted file mode 100644 (file)
index a4485cd..0000000
+++ /dev/null
@@ -1,37 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_lis331.c:  lis331 platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/i2c.h>
-#include <linux/gpio.h>
-#include <asm/intel-mid.h>
-
-static void __init *lis331dl_platform_data(void *info)
-{
-       static short intr2nd_pdata;
-       struct i2c_board_info *i2c_info = info;
-       int intr = get_gpio_by_name("accel_int");
-       int intr2nd = get_gpio_by_name("accel_2");
-
-       if (intr < 0)
-               return NULL;
-       if (intr2nd < 0)
-               return NULL;
-
-       i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-       intr2nd_pdata = intr2nd + INTEL_MID_IRQ_OFFSET;
-
-       return &intr2nd_pdata;
-}
-
-static const struct devs_id lis331dl_dev_id __initconst = {
-       .name = "i2c_accel",
-       .type = SFI_DEV_TYPE_I2C,
-       .get_platform_data = &lis331dl_platform_data,
-};
-
-sfi_device(lis331dl_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_max7315.c b/arch/x86/platform/intel-mid/device_libs/platform_max7315.c
deleted file mode 100644 (file)
index e9287c3..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_max7315.c: max7315 platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/gpio.h>
-#include <linux/i2c.h>
-#include <linux/platform_data/pca953x.h>
-#include <asm/intel-mid.h>
-
-#define MAX7315_NUM 2
-
-static void __init *max7315_platform_data(void *info)
-{
-       static struct pca953x_platform_data max7315_pdata[MAX7315_NUM];
-       static int nr;
-       struct pca953x_platform_data *max7315 = &max7315_pdata[nr];
-       struct i2c_board_info *i2c_info = info;
-       int gpio_base, intr;
-       char base_pin_name[SFI_NAME_LEN + 1];
-       char intr_pin_name[SFI_NAME_LEN + 1];
-
-       if (nr == MAX7315_NUM) {
-               pr_err("too many max7315s, we only support %d\n",
-                               MAX7315_NUM);
-               return NULL;
-       }
-       /* we have several max7315 on the board, we only need load several
-        * instances of the same pca953x driver to cover them
-        */
-       strcpy(i2c_info->type, "max7315");
-       if (nr++) {
-               snprintf(base_pin_name, sizeof(base_pin_name),
-                        "max7315_%d_base", nr);
-               snprintf(intr_pin_name, sizeof(intr_pin_name),
-                        "max7315_%d_int", nr);
-       } else {
-               strcpy(base_pin_name, "max7315_base");
-               strcpy(intr_pin_name, "max7315_int");
-       }
-
-       gpio_base = get_gpio_by_name(base_pin_name);
-       intr = get_gpio_by_name(intr_pin_name);
-
-       if (gpio_base < 0)
-               return NULL;
-       max7315->gpio_base = gpio_base;
-       if (intr != -1) {
-               i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-               max7315->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
-       } else {
-               i2c_info->irq = -1;
-               max7315->irq_base = -1;
-       }
-       return max7315;
-}
-
-static const struct devs_id max7315_dev_id __initconst = {
-       .name = "i2c_max7315",
-       .type = SFI_DEV_TYPE_I2C,
-       .delay = 1,
-       .get_platform_data = &max7315_platform_data,
-};
-
-static const struct devs_id max7315_2_dev_id __initconst = {
-       .name = "i2c_max7315_2",
-       .type = SFI_DEV_TYPE_I2C,
-       .delay = 1,
-       .get_platform_data = &max7315_platform_data,
-};
-
-sfi_device(max7315_dev_id);
-sfi_device(max7315_2_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c b/arch/x86/platform/intel-mid/device_libs/platform_mpu3050.c
deleted file mode 100644 (file)
index 28a1827..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_mpu3050.c: mpu3050 platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/gpio.h>
-#include <linux/i2c.h>
-#include <asm/intel-mid.h>
-
-static void *mpu3050_platform_data(void *info)
-{
-       struct i2c_board_info *i2c_info = info;
-       int intr = get_gpio_by_name("mpu3050_int");
-
-       if (intr < 0)
-               return NULL;
-
-       i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-       return NULL;
-}
-
-static const struct devs_id mpu3050_dev_id __initconst = {
-       .name = "mpu3050",
-       .type = SFI_DEV_TYPE_I2C,
-       .delay = 1,
-       .get_platform_data = &mpu3050_platform_data,
-};
-
-sfi_device(mpu3050_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_pinctrl.c
deleted file mode 100644 (file)
index 605e1f9..0000000
+++ /dev/null
@@ -1,39 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Merrifield FLIS platform device initialization file
- *
- * Copyright (C) 2016, Intel Corporation
- *
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-
-#include <asm/intel-mid.h>
-
-#define FLIS_BASE_ADDR                 0xff0c0000
-#define FLIS_LENGTH                    0x8000
-
-static struct resource mrfld_pinctrl_mmio_resource = {
-       .start          = FLIS_BASE_ADDR,
-       .end            = FLIS_BASE_ADDR + FLIS_LENGTH - 1,
-       .flags          = IORESOURCE_MEM,
-};
-
-static struct platform_device mrfld_pinctrl_device = {
-       .name           = "pinctrl-merrifield",
-       .id             = PLATFORM_DEVID_NONE,
-       .resource       = &mrfld_pinctrl_mmio_resource,
-       .num_resources  = 1,
-};
-
-static int __init mrfld_pinctrl_init(void)
-{
-       if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
-               return platform_device_register(&mrfld_pinctrl_device);
-
-       return -ENODEV;
-}
-arch_initcall(mrfld_pinctrl_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c
deleted file mode 100644 (file)
index ec2afb4..0000000
+++ /dev/null
@@ -1,78 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Merrifield power button support
- *
- * (C) Copyright 2017 Intel Corporation
- *
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/ioport.h>
-#include <linux/platform_device.h>
-#include <linux/sfi.h>
-
-#include <asm/intel-mid.h>
-#include <asm/intel_scu_ipc.h>
-
-static struct resource mrfld_power_btn_resources[] = {
-       {
-               .flags          = IORESOURCE_IRQ,
-       },
-};
-
-static struct platform_device mrfld_power_btn_dev = {
-       .name           = "msic_power_btn",
-       .id             = PLATFORM_DEVID_NONE,
-       .num_resources  = ARRAY_SIZE(mrfld_power_btn_resources),
-       .resource       = mrfld_power_btn_resources,
-};
-
-static int mrfld_power_btn_scu_status_change(struct notifier_block *nb,
-                                            unsigned long code, void *data)
-{
-       if (code == SCU_DOWN) {
-               platform_device_unregister(&mrfld_power_btn_dev);
-               return 0;
-       }
-
-       return platform_device_register(&mrfld_power_btn_dev);
-}
-
-static struct notifier_block mrfld_power_btn_scu_notifier = {
-       .notifier_call  = mrfld_power_btn_scu_status_change,
-};
-
-static int __init register_mrfld_power_btn(void)
-{
-       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
-               return -ENODEV;
-
-       /*
-        * We need to be sure that the SCU IPC is ready before
-        * PMIC power button device can be registered:
-        */
-       intel_scu_notifier_add(&mrfld_power_btn_scu_notifier);
-
-       return 0;
-}
-arch_initcall(register_mrfld_power_btn);
-
-static void __init *mrfld_power_btn_platform_data(void *info)
-{
-       struct resource *res = mrfld_power_btn_resources;
-       struct sfi_device_table_entry *pentry = info;
-
-       res->start = res->end = pentry->irq;
-       return NULL;
-}
-
-static const struct devs_id mrfld_power_btn_dev_id __initconst = {
-       .name                   = "bcove_power_btn",
-       .type                   = SFI_DEV_TYPE_IPC,
-       .delay                  = 1,
-       .msic                   = 1,
-       .get_platform_data      = &mrfld_power_btn_platform_data,
-};
-
-sfi_device(mrfld_power_btn_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c
deleted file mode 100644 (file)
index 40e9808..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Intel Merrifield legacy RTC initialization file
- *
- * (C) Copyright 2017 Intel Corporation
- *
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- */
-
-#include <linux/init.h>
-
-#include <asm/hw_irq.h>
-#include <asm/intel-mid.h>
-#include <asm/io_apic.h>
-#include <asm/time.h>
-#include <asm/x86_init.h>
-
-static int __init mrfld_legacy_rtc_alloc_irq(void)
-{
-       struct irq_alloc_info info;
-       int ret;
-
-       if (!x86_platform.legacy.rtc)
-               return -ENODEV;
-
-       ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, 0);
-       ret = mp_map_gsi_to_irq(RTC_IRQ, IOAPIC_MAP_ALLOC, &info);
-       if (ret < 0) {
-               pr_info("Failed to allocate RTC interrupt. Disabling RTC\n");
-               x86_platform.legacy.rtc = 0;
-               return ret;
-       }
-
-       return 0;
-}
-
-static int __init mrfld_legacy_rtc_init(void)
-{
-       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
-               return -ENODEV;
-
-       return mrfld_legacy_rtc_alloc_irq();
-}
-arch_initcall(mrfld_legacy_rtc_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_sd.c
deleted file mode 100644 (file)
index fe3b7ff..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * SDHCI platform data initilisation file
- *
- * (C) Copyright 2016 Intel Corporation
- * Author: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/pci.h>
-
-#include <linux/mmc/sdhci-pci-data.h>
-
-#include <asm/intel-mid.h>
-
-#define INTEL_MRFLD_SD                 2
-#define INTEL_MRFLD_SD_CD_GPIO         77
-
-static struct sdhci_pci_data mrfld_sdhci_pci_data = {
-       .rst_n_gpio     = -EINVAL,
-       .cd_gpio        = INTEL_MRFLD_SD_CD_GPIO,
-};
-
-static struct sdhci_pci_data *
-mrfld_sdhci_pci_get_data(struct pci_dev *pdev, int slotno)
-{
-       unsigned int func = PCI_FUNC(pdev->devfn);
-
-       if (func == INTEL_MRFLD_SD)
-               return &mrfld_sdhci_pci_data;
-
-       return NULL;
-}
-
-static int __init mrfld_sd_init(void)
-{
-       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
-               return -ENODEV;
-
-       sdhci_pci_get_data = mrfld_sdhci_pci_get_data;
-       return 0;
-}
-arch_initcall(mrfld_sd_init);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_spidev.c
deleted file mode 100644 (file)
index b828f4f..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * spidev platform data initialization file
- *
- * (C) Copyright 2014, 2016 Intel Corporation
- * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *         Dan O'Donovan <dan@emutex.com>
- */
-
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/spi/pxa2xx_spi.h>
-#include <linux/spi/spi.h>
-
-#include <asm/intel-mid.h>
-
-#define MRFLD_SPI_DEFAULT_DMA_BURST    8
-#define MRFLD_SPI_DEFAULT_TIMEOUT      500
-
-/* GPIO pin for spidev chipselect */
-#define MRFLD_SPIDEV_GPIO_CS           111
-
-static struct pxa2xx_spi_chip spidev_spi_chip = {
-       .dma_burst_size         = MRFLD_SPI_DEFAULT_DMA_BURST,
-       .timeout                = MRFLD_SPI_DEFAULT_TIMEOUT,
-       .gpio_cs                = MRFLD_SPIDEV_GPIO_CS,
-};
-
-static void __init *spidev_platform_data(void *info)
-{
-       struct spi_board_info *spi_info = info;
-
-       if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER)
-               return ERR_PTR(-ENODEV);
-
-       spi_info->mode = SPI_MODE_0;
-       spi_info->controller_data = &spidev_spi_chip;
-
-       return NULL;
-}
-
-static const struct devs_id spidev_dev_id __initconst = {
-       .name                   = "spidev",
-       .type                   = SFI_DEV_TYPE_SPI,
-       .delay                  = 0,
-       .get_platform_data      = &spidev_platform_data,
-};
-
-sfi_device(spidev_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.c b/arch/x86/platform/intel-mid/device_libs/platform_msic.c
deleted file mode 100644 (file)
index b17783d..0000000
+++ /dev/null
@@ -1,83 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic.c: MSIC platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel_scu_ipc.h>
-#include <asm/intel-mid.h>
-#include "platform_msic.h"
-
-struct intel_msic_platform_data msic_pdata;
-
-static struct resource msic_resources[] = {
-       {
-               .start  = INTEL_MSIC_IRQ_PHYS_BASE,
-               .end    = INTEL_MSIC_IRQ_PHYS_BASE + 64 - 1,
-               .flags  = IORESOURCE_MEM,
-       },
-};
-
-static struct platform_device msic_device = {
-       .name           = "intel_msic",
-       .id             = -1,
-       .dev            = {
-               .platform_data  = &msic_pdata,
-       },
-       .num_resources  = ARRAY_SIZE(msic_resources),
-       .resource       = msic_resources,
-};
-
-static int msic_scu_status_change(struct notifier_block *nb,
-                                 unsigned long code, void *data)
-{
-       if (code == SCU_DOWN) {
-               platform_device_unregister(&msic_device);
-               return 0;
-       }
-
-       return platform_device_register(&msic_device);
-}
-
-static int __init msic_init(void)
-{
-       static struct notifier_block msic_scu_notifier = {
-               .notifier_call  = msic_scu_status_change,
-       };
-
-       /*
-        * We need to be sure that the SCU IPC is ready before MSIC device
-        * can be registered.
-        */
-       if (intel_mid_has_msic())
-               intel_scu_notifier_add(&msic_scu_notifier);
-
-       return 0;
-}
-arch_initcall(msic_init);
-
-/*
- * msic_generic_platform_data - sets generic platform data for the block
- * @info: pointer to the SFI device table entry for this block
- * @block: MSIC block
- *
- * Function sets IRQ number from the SFI table entry for given device to
- * the MSIC platform data.
- */
-void *msic_generic_platform_data(void *info, enum intel_msic_block block)
-{
-       struct sfi_device_table_entry *entry = info;
-
-       BUG_ON(block < 0 || block >= INTEL_MSIC_BLOCK_LAST);
-       msic_pdata.irq[block] = entry->irq;
-
-       return NULL;
-}
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic.h b/arch/x86/platform/intel-mid/device_libs/platform_msic.h
deleted file mode 100644 (file)
index 91deb2e..0000000
+++ /dev/null
@@ -1,15 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * platform_msic.h: MSIC platform data header file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-#ifndef _PLATFORM_MSIC_H_
-#define _PLATFORM_MSIC_H_
-
-extern struct intel_msic_platform_data msic_pdata;
-
-void *msic_generic_platform_data(void *info, enum intel_msic_block block);
-
-#endif
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c
deleted file mode 100644 (file)
index e765da7..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic_audio.c: MSIC audio platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/platform_device.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel-mid.h>
-
-#include "platform_msic.h"
-
-static void *msic_audio_platform_data(void *info)
-{
-       struct platform_device *pdev;
-
-       pdev = platform_device_register_simple("sst-platform", -1, NULL, 0);
-
-       if (IS_ERR(pdev)) {
-               pr_err("failed to create audio platform device\n");
-               return NULL;
-       }
-
-       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_AUDIO);
-}
-
-static const struct devs_id msic_audio_dev_id __initconst = {
-       .name = "msic_audio",
-       .type = SFI_DEV_TYPE_IPC,
-       .delay = 1,
-       .msic = 1,
-       .get_platform_data = &msic_audio_platform_data,
-};
-
-sfi_device(msic_audio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c
deleted file mode 100644 (file)
index f461f84..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic_battery.c: MSIC battery platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel-mid.h>
-
-#include "platform_msic.h"
-
-static void __init *msic_battery_platform_data(void *info)
-{
-       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_BATTERY);
-}
-
-static const struct devs_id msic_battery_dev_id __initconst = {
-       .name = "msic_battery",
-       .type = SFI_DEV_TYPE_IPC,
-       .delay = 1,
-       .msic = 1,
-       .get_platform_data = &msic_battery_platform_data,
-};
-
-sfi_device(msic_battery_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c
deleted file mode 100644 (file)
index 71a7d6d..0000000
+++ /dev/null
@@ -1,43 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic_gpio.c: MSIC GPIO platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/sfi.h>
-#include <linux/init.h>
-#include <linux/gpio.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel-mid.h>
-
-#include "platform_msic.h"
-
-static void __init *msic_gpio_platform_data(void *info)
-{
-       static struct intel_msic_gpio_pdata msic_gpio_pdata;
-
-       int gpio = get_gpio_by_name("msic_gpio_base");
-
-       if (gpio < 0)
-               return NULL;
-
-       msic_gpio_pdata.gpio_base = gpio;
-       msic_pdata.gpio = &msic_gpio_pdata;
-
-       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_GPIO);
-}
-
-static const struct devs_id msic_gpio_dev_id __initconst = {
-       .name = "msic_gpio",
-       .type = SFI_DEV_TYPE_IPC,
-       .delay = 1,
-       .msic = 1,
-       .get_platform_data = &msic_gpio_platform_data,
-};
-
-sfi_device(msic_gpio_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c
deleted file mode 100644 (file)
index 558c0d9..0000000
+++ /dev/null
@@ -1,44 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic_ocd.c: MSIC OCD platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/sfi.h>
-#include <linux/init.h>
-#include <linux/gpio.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel-mid.h>
-
-#include "platform_msic.h"
-
-static void __init *msic_ocd_platform_data(void *info)
-{
-       static struct intel_msic_ocd_pdata msic_ocd_pdata;
-       int gpio;
-
-       gpio = get_gpio_by_name("ocd_gpio");
-
-       if (gpio < 0)
-               return NULL;
-
-       msic_ocd_pdata.gpio = gpio;
-       msic_pdata.ocd = &msic_ocd_pdata;
-
-       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_OCD);
-}
-
-static const struct devs_id msic_ocd_dev_id __initconst = {
-       .name = "msic_ocd",
-       .type = SFI_DEV_TYPE_IPC,
-       .delay = 1,
-       .msic = 1,
-       .get_platform_data = &msic_ocd_platform_data,
-};
-
-sfi_device(msic_ocd_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c
deleted file mode 100644 (file)
index 3d3de2d..0000000
+++ /dev/null
@@ -1,31 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic_power_btn.c: MSIC power btn platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/sfi.h>
-#include <linux/init.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel-mid.h>
-
-#include "platform_msic.h"
-
-static void __init *msic_power_btn_platform_data(void *info)
-{
-       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_POWER_BTN);
-}
-
-static const struct devs_id msic_power_btn_dev_id __initconst = {
-       .name = "msic_power_btn",
-       .type = SFI_DEV_TYPE_IPC,
-       .delay = 1,
-       .msic = 1,
-       .get_platform_data = &msic_power_btn_platform_data,
-};
-
-sfi_device(msic_power_btn_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c
deleted file mode 100644 (file)
index 4858da1..0000000
+++ /dev/null
@@ -1,32 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_msic_thermal.c: msic_thermal platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/input.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/gpio.h>
-#include <linux/platform_device.h>
-#include <linux/mfd/intel_msic.h>
-#include <asm/intel-mid.h>
-
-#include "platform_msic.h"
-
-static void __init *msic_thermal_platform_data(void *info)
-{
-       return msic_generic_platform_data(info, INTEL_MSIC_BLOCK_THERMAL);
-}
-
-static const struct devs_id msic_thermal_dev_id __initconst = {
-       .name = "msic_thermal",
-       .type = SFI_DEV_TYPE_IPC,
-       .delay = 1,
-       .msic = 1,
-       .get_platform_data = &msic_thermal_platform_data,
-};
-
-sfi_device(msic_thermal_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c b/arch/x86/platform/intel-mid/device_libs/platform_pcal9555a.c
deleted file mode 100644 (file)
index 5609d8d..0000000
+++ /dev/null
@@ -1,95 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * PCAL9555a platform data initialization file
- *
- * Copyright (C) 2016, Intel Corporation
- *
- * Authors: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
- *         Dan O'Donovan <dan@emutex.com>
- */
-
-#include <linux/gpio.h>
-#include <linux/init.h>
-#include <linux/i2c.h>
-#include <linux/platform_data/pca953x.h>
-#include <linux/sfi.h>
-
-#include <asm/intel-mid.h>
-
-#define PCAL9555A_NUM  4
-
-static struct pca953x_platform_data pcal9555a_pdata[PCAL9555A_NUM];
-static int nr;
-
-static void __init *pcal9555a_platform_data(void *info)
-{
-       struct i2c_board_info *i2c_info = info;
-       char *type = i2c_info->type;
-       struct pca953x_platform_data *pcal9555a;
-       char base_pin_name[SFI_NAME_LEN + 1];
-       char intr_pin_name[SFI_NAME_LEN + 1];
-       int gpio_base, intr;
-
-       snprintf(base_pin_name, sizeof(base_pin_name), "%s_base", type);
-       snprintf(intr_pin_name, sizeof(intr_pin_name), "%s_int", type);
-
-       gpio_base = get_gpio_by_name(base_pin_name);
-       intr = get_gpio_by_name(intr_pin_name);
-
-       /* Check if the SFI record valid */
-       if (gpio_base == -1)
-               return NULL;
-
-       if (nr >= PCAL9555A_NUM) {
-               pr_err("%s: Too many instances, only %d supported\n", __func__,
-                      PCAL9555A_NUM);
-               return NULL;
-       }
-
-       pcal9555a = &pcal9555a_pdata[nr++];
-       pcal9555a->gpio_base = gpio_base;
-
-       if (intr >= 0) {
-               i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-               pcal9555a->irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
-       } else {
-               i2c_info->irq = -1;
-               pcal9555a->irq_base = -1;
-       }
-
-       strcpy(type, "pcal9555a");
-       return pcal9555a;
-}
-
-static const struct devs_id pcal9555a_1_dev_id __initconst = {
-       .name                   = "pcal9555a-1",
-       .type                   = SFI_DEV_TYPE_I2C,
-       .delay                  = 1,
-       .get_platform_data      = &pcal9555a_platform_data,
-};
-
-static const struct devs_id pcal9555a_2_dev_id __initconst = {
-       .name                   = "pcal9555a-2",
-       .type                   = SFI_DEV_TYPE_I2C,
-       .delay                  = 1,
-       .get_platform_data      = &pcal9555a_platform_data,
-};
-
-static const struct devs_id pcal9555a_3_dev_id __initconst = {
-       .name                   = "pcal9555a-3",
-       .type                   = SFI_DEV_TYPE_I2C,
-       .delay                  = 1,
-       .get_platform_data      = &pcal9555a_platform_data,
-};
-
-static const struct devs_id pcal9555a_4_dev_id __initconst = {
-       .name                   = "pcal9555a-4",
-       .type                   = SFI_DEV_TYPE_I2C,
-       .delay                  = 1,
-       .get_platform_data      = &pcal9555a_platform_data,
-};
-
-sfi_device(pcal9555a_1_dev_id);
-sfi_device(pcal9555a_2_dev_id);
-sfi_device(pcal9555a_3_dev_id);
-sfi_device(pcal9555a_4_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c b/arch/x86/platform/intel-mid/device_libs/platform_tc35876x.c
deleted file mode 100644 (file)
index 139738b..0000000
+++ /dev/null
@@ -1,42 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_tc35876x.c: tc35876x platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/gpio/machine.h>
-#include <asm/intel-mid.h>
-
-static struct gpiod_lookup_table tc35876x_gpio_table = {
-       .dev_id = "i2c_disp_brig",
-       .table  = {
-               GPIO_LOOKUP("0000:00:0c.0", -1, "bridge-reset", GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("0000:00:0c.0", -1, "bl-en", GPIO_ACTIVE_HIGH),
-               GPIO_LOOKUP("0000:00:0c.0", -1, "vadd", GPIO_ACTIVE_HIGH),
-               { },
-       },
-};
-
-/*tc35876x DSI_LVDS bridge chip and panel platform data*/
-static void *tc35876x_platform_data(void *data)
-{
-       struct gpiod_lookup_table *table = &tc35876x_gpio_table;
-       struct gpiod_lookup *lookup = table->table;
-
-       lookup[0].chip_hwnum = get_gpio_by_name("LCMB_RXEN");
-       lookup[1].chip_hwnum = get_gpio_by_name("6S6P_BL_EN");
-       lookup[2].chip_hwnum = get_gpio_by_name("EN_VREG_LCD_V3P3");
-       gpiod_add_lookup_table(table);
-
-       return NULL;
-}
-
-static const struct devs_id tc35876x_dev_id __initconst = {
-       .name = "i2c_disp_brig",
-       .type = SFI_DEV_TYPE_I2C,
-       .get_platform_data = &tc35876x_platform_data,
-};
-
-sfi_device(tc35876x_dev_id);
diff --git a/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c b/arch/x86/platform/intel-mid/device_libs/platform_tca6416.c
deleted file mode 100644 (file)
index e689d8f..0000000
+++ /dev/null
@@ -1,53 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * platform_tca6416.c: tca6416 platform data initialization file
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/platform_data/pca953x.h>
-#include <linux/i2c.h>
-#include <linux/gpio.h>
-#include <asm/intel-mid.h>
-
-#define TCA6416_NAME   "tca6416"
-#define TCA6416_BASE   "tca6416_base"
-#define TCA6416_INTR   "tca6416_int"
-
-static void *tca6416_platform_data(void *info)
-{
-       static struct pca953x_platform_data tca6416;
-       struct i2c_board_info *i2c_info = info;
-       int gpio_base, intr;
-       char base_pin_name[SFI_NAME_LEN + 1];
-       char intr_pin_name[SFI_NAME_LEN + 1];
-
-       strcpy(i2c_info->type, TCA6416_NAME);
-       strcpy(base_pin_name, TCA6416_BASE);
-       strcpy(intr_pin_name, TCA6416_INTR);
-
-       gpio_base = get_gpio_by_name(base_pin_name);
-       intr = get_gpio_by_name(intr_pin_name);
-
-       if (gpio_base < 0)
-               return NULL;
-       tca6416.gpio_base = gpio_base;
-       if (intr >= 0) {
-               i2c_info->irq = intr + INTEL_MID_IRQ_OFFSET;
-               tca6416.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET;
-       } else {
-               i2c_info->irq = -1;
-               tca6416.irq_base = -1;
-       }
-       return &tca6416;
-}
-
-static const struct devs_id tca6416_dev_id __initconst = {
-       .name = "tca6416",
-       .type = SFI_DEV_TYPE_I2C,
-       .delay = 1,
-       .get_platform_data = &tca6416_platform_data,
-};
-
-sfi_device(tca6416_dev_id);
index 7807281..f4592dc 100644 (file)
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * intel-mid.c: Intel MID platform setup code
+ * Intel MID platform setup code
  *
- * (C) Copyright 2008, 2012 Intel Corporation
+ * (C) Copyright 2008, 2012, 2021 Intel Corporation
  * Author: Jacob Pan (jacob.jun.pan@intel.com)
  * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
  */
@@ -14,7 +14,6 @@
 #include <linux/interrupt.h>
 #include <linux/regulator/machine.h>
 #include <linux/scatterlist.h>
-#include <linux/sfi.h>
 #include <linux/irq.h>
 #include <linux/export.h>
 #include <linux/notifier.h>
 #include <asm/apic.h>
 #include <asm/io_apic.h>
 #include <asm/intel-mid.h>
-#include <asm/intel_mid_vrtc.h>
 #include <asm/io.h>
 #include <asm/i8259.h>
 #include <asm/intel_scu_ipc.h>
-#include <asm/apb_timer.h>
 #include <asm/reboot.h>
 
-/*
- * the clockevent devices on Moorestown/Medfield can be APBT or LAPIC clock,
- * cmdline option x86_intel_mid_timer can be used to override the configuration
- * to prefer one or the other.
- * at runtime, there are basically three timer configurations:
- * 1. per cpu apbt clock only
- * 2. per cpu always-on lapic clocks only, this is Penwell/Medfield only
- * 3. per cpu lapic clock (C3STOP) and one apbt clock, with broadcast.
- *
- * by default (without cmdline option), platform code first detects cpu type
- * to see if we are on lincroft or penwell, then set up both lapic or apbt
- * clocks accordingly.
- * i.e. by default, medfield uses configuration #2, moorestown uses #1.
- * config #3 is supported but not recommended on medfield.
- *
- * rating and feature summary:
- * lapic (with C3STOP) --------- 100
- * apbt (always-on) ------------ 110
- * lapic (always-on,ARAT) ------ 150
- */
-
-enum intel_mid_timer_options intel_mid_timer_options;
-
-enum intel_mid_cpu_type __intel_mid_cpu_chip;
-EXPORT_SYMBOL_GPL(__intel_mid_cpu_chip);
+#define IPCMSG_COLD_OFF                0x80    /* Only for Tangier */
+#define IPCMSG_COLD_RESET      0xF1
 
 static void intel_mid_power_off(void)
 {
@@ -64,69 +38,32 @@ static void intel_mid_power_off(void)
        intel_mid_pwr_power_off();
 
        /* Only for Tangier, the rest will ignore this command */
-       intel_scu_ipc_simple_command(IPCMSG_COLD_OFF, 1);
+       intel_scu_ipc_dev_simple_command(NULL, IPCMSG_COLD_OFF, 1);
 };
 
 static void intel_mid_reboot(void)
 {
-       intel_scu_ipc_simple_command(IPCMSG_COLD_RESET, 0);
-}
-
-static void __init intel_mid_setup_bp_timer(void)
-{
-       apbt_time_init();
-       setup_boot_APIC_clock();
+       intel_scu_ipc_dev_simple_command(NULL, IPCMSG_COLD_RESET, 0);
 }
 
 static void __init intel_mid_time_init(void)
 {
-       sfi_table_parse(SFI_SIG_MTMR, NULL, NULL, sfi_parse_mtmr);
-
-       switch (intel_mid_timer_options) {
-       case INTEL_MID_TIMER_APBT_ONLY:
-               break;
-       case INTEL_MID_TIMER_LAPIC_APBT:
-               /* Use apbt and local apic */
-               x86_init.timers.setup_percpu_clockev = intel_mid_setup_bp_timer;
-               x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
-               return;
-       default:
-               if (!boot_cpu_has(X86_FEATURE_ARAT))
-                       break;
-               /* Lapic only, no apbt */
-               x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
-               x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
-               return;
-       }
-
-       x86_init.timers.setup_percpu_clockev = apbt_time_init;
+       /* Lapic only, no apbt */
+       x86_init.timers.setup_percpu_clockev = setup_boot_APIC_clock;
+       x86_cpuinit.setup_percpu_clockev = setup_secondary_APIC_clock;
 }
 
 static void intel_mid_arch_setup(void)
 {
-       if (boot_cpu_data.x86 != 6) {
-               pr_err("Unknown Intel MID CPU (%d:%d), default to Penwell\n",
-                       boot_cpu_data.x86, boot_cpu_data.x86_model);
-               __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
-               goto out;
-       }
-
        switch (boot_cpu_data.x86_model) {
-       case 0x35:
-               __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_CLOVERVIEW;
-               break;
        case 0x3C:
        case 0x4A:
-               __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_TANGIER;
                x86_platform.legacy.rtc = 1;
                break;
-       case 0x27:
        default:
-               __intel_mid_cpu_chip = INTEL_MID_CPU_CHIP_PENWELL;
                break;
        }
 
-out:
        /*
         * Intel MID platforms are using explicitly defined regulators.
         *
@@ -159,14 +96,11 @@ void __init x86_intel_mid_early_setup(void)
 
        x86_init.timers.timer_init = intel_mid_time_init;
        x86_init.timers.setup_percpu_clockev = x86_init_noop;
-       x86_init.timers.wallclock_init = intel_mid_rtc_init;
 
        x86_init.irqs.pre_vector_init = x86_init_noop;
 
        x86_init.oem.arch_setup = intel_mid_arch_setup;
 
-       x86_cpuinit.setup_percpu_clockev = apbt_setup_secondary_clock;
-
        x86_platform.get_nmi_reason = intel_mid_get_nmi_reason;
 
        x86_init.pci.arch_init = intel_mid_pci_init;
@@ -188,25 +122,3 @@ void __init x86_intel_mid_early_setup(void)
        x86_init.mpparse.get_smp_config = x86_init_uint_noop;
        set_bit(MP_BUS_ISA, mp_bus_not_pci);
 }
-
-/*
- * if user does not want to use per CPU apb timer, just give it a lower rating
- * than local apic timer and skip the late per cpu timer init.
- */
-static inline int __init setup_x86_intel_mid_timer(char *arg)
-{
-       if (!arg)
-               return -EINVAL;
-
-       if (strcmp("apbt_only", arg) == 0)
-               intel_mid_timer_options = INTEL_MID_TIMER_APBT_ONLY;
-       else if (strcmp("lapic_and_apbt", arg) == 0)
-               intel_mid_timer_options = INTEL_MID_TIMER_LAPIC_APBT;
-       else {
-               pr_warn("X86 INTEL_MID timer option %s not recognised use x86_intel_mid_timer=apbt_only or lapic_and_apbt\n",
-                       arg);
-               return -EINVAL;
-       }
-       return 0;
-}
-__setup("x86_intel_mid_timer=", setup_x86_intel_mid_timer);
diff --git a/arch/x86/platform/intel-mid/intel_mid_vrtc.c b/arch/x86/platform/intel-mid/intel_mid_vrtc.c
deleted file mode 100644 (file)
index 2226da4..0000000
+++ /dev/null
@@ -1,173 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * intel_mid_vrtc.c: Driver for virtual RTC device on Intel MID platform
- *
- * (C) Copyright 2009 Intel Corporation
- *
- * Note:
- * VRTC is emulated by system controller firmware, the real HW
- * RTC is located in the PMIC device. SCU FW shadows PMIC RTC
- * in a memory mapped IO space that is visible to the host IA
- * processor.
- *
- * This driver is based on RTC CMOS driver.
- */
-
-#include <linux/kernel.h>
-#include <linux/export.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/platform_device.h>
-#include <linux/mc146818rtc.h>
-
-#include <asm/intel-mid.h>
-#include <asm/intel_mid_vrtc.h>
-#include <asm/time.h>
-#include <asm/fixmap.h>
-
-static unsigned char __iomem *vrtc_virt_base;
-
-unsigned char vrtc_cmos_read(unsigned char reg)
-{
-       unsigned char retval;
-
-       /* vRTC's registers range from 0x0 to 0xD */
-       if (reg > 0xd || !vrtc_virt_base)
-               return 0xff;
-
-       lock_cmos_prefix(reg);
-       retval = __raw_readb(vrtc_virt_base + (reg << 2));
-       lock_cmos_suffix(reg);
-       return retval;
-}
-EXPORT_SYMBOL_GPL(vrtc_cmos_read);
-
-void vrtc_cmos_write(unsigned char val, unsigned char reg)
-{
-       if (reg > 0xd || !vrtc_virt_base)
-               return;
-
-       lock_cmos_prefix(reg);
-       __raw_writeb(val, vrtc_virt_base + (reg << 2));
-       lock_cmos_suffix(reg);
-}
-EXPORT_SYMBOL_GPL(vrtc_cmos_write);
-
-void vrtc_get_time(struct timespec64 *now)
-{
-       u8 sec, min, hour, mday, mon;
-       unsigned long flags;
-       u32 year;
-
-       spin_lock_irqsave(&rtc_lock, flags);
-
-       while ((vrtc_cmos_read(RTC_FREQ_SELECT) & RTC_UIP))
-               cpu_relax();
-
-       sec = vrtc_cmos_read(RTC_SECONDS);
-       min = vrtc_cmos_read(RTC_MINUTES);
-       hour = vrtc_cmos_read(RTC_HOURS);
-       mday = vrtc_cmos_read(RTC_DAY_OF_MONTH);
-       mon = vrtc_cmos_read(RTC_MONTH);
-       year = vrtc_cmos_read(RTC_YEAR);
-
-       spin_unlock_irqrestore(&rtc_lock, flags);
-
-       /* vRTC YEAR reg contains the offset to 1972 */
-       year += 1972;
-
-       pr_info("vRTC: sec: %d min: %d hour: %d day: %d "
-               "mon: %d year: %d\n", sec, min, hour, mday, mon, year);
-
-       now->tv_sec = mktime64(year, mon, mday, hour, min, sec);
-       now->tv_nsec = 0;
-}
-
-int vrtc_set_mmss(const struct timespec64 *now)
-{
-       unsigned long flags;
-       struct rtc_time tm;
-       int year;
-       int retval = 0;
-
-       rtc_time64_to_tm(now->tv_sec, &tm);
-       if (!rtc_valid_tm(&tm) && tm.tm_year >= 72) {
-               /*
-                * tm.year is the number of years since 1900, and the
-                * vrtc need the years since 1972.
-                */
-               year = tm.tm_year - 72;
-               spin_lock_irqsave(&rtc_lock, flags);
-               vrtc_cmos_write(year, RTC_YEAR);
-               vrtc_cmos_write(tm.tm_mon, RTC_MONTH);
-               vrtc_cmos_write(tm.tm_mday, RTC_DAY_OF_MONTH);
-               vrtc_cmos_write(tm.tm_hour, RTC_HOURS);
-               vrtc_cmos_write(tm.tm_min, RTC_MINUTES);
-               vrtc_cmos_write(tm.tm_sec, RTC_SECONDS);
-               spin_unlock_irqrestore(&rtc_lock, flags);
-       } else {
-               pr_err("%s: Invalid vRTC value: write of %llx to vRTC failed\n",
-                       __func__, (s64)now->tv_sec);
-               retval = -EINVAL;
-       }
-       return retval;
-}
-
-void __init intel_mid_rtc_init(void)
-{
-       unsigned long vrtc_paddr;
-
-       sfi_table_parse(SFI_SIG_MRTC, NULL, NULL, sfi_parse_mrtc);
-
-       vrtc_paddr = sfi_mrtc_array[0].phys_addr;
-       if (!sfi_mrtc_num || !vrtc_paddr)
-               return;
-
-       vrtc_virt_base = (void __iomem *)set_fixmap_offset_nocache(FIX_LNW_VRTC,
-                                                               vrtc_paddr);
-       x86_platform.get_wallclock = vrtc_get_time;
-       x86_platform.set_wallclock = vrtc_set_mmss;
-}
-
-/*
- * The Moorestown platform has a memory mapped virtual RTC device that emulates
- * the programming interface of the RTC.
- */
-
-static struct resource vrtc_resources[] = {
-       [0] = {
-               .flags  = IORESOURCE_MEM,
-       },
-       [1] = {
-               .flags  = IORESOURCE_IRQ,
-       }
-};
-
-static struct platform_device vrtc_device = {
-       .name           = "rtc_mrst",
-       .id             = -1,
-       .resource       = vrtc_resources,
-       .num_resources  = ARRAY_SIZE(vrtc_resources),
-};
-
-/* Register the RTC device if appropriate */
-static int __init intel_mid_device_create(void)
-{
-       /* No Moorestown, no device */
-       if (!intel_mid_identify_cpu())
-               return -ENODEV;
-       /* No timer, no device */
-       if (!sfi_mrtc_num)
-               return -ENODEV;
-
-       /* iomem resource */
-       vrtc_resources[0].start = sfi_mrtc_array[0].phys_addr;
-       vrtc_resources[0].end = sfi_mrtc_array[0].phys_addr +
-                               MRST_VRTC_MAP_SZ;
-       /* irq resource */
-       vrtc_resources[1].start = sfi_mrtc_array[0].irq;
-       vrtc_resources[1].end = sfi_mrtc_array[0].irq;
-
-       return platform_device_register(&vrtc_device);
-}
-device_initcall(intel_mid_device_create);
diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c
deleted file mode 100644 (file)
index 30bd571..0000000
+++ /dev/null
@@ -1,543 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * intel_mid_sfi.c: Intel MID SFI initialization code
- *
- * (C) Copyright 2013 Intel Corporation
- * Author: Sathyanarayanan Kuppuswamy <sathyanarayanan.kuppuswamy@intel.com>
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/interrupt.h>
-#include <linux/scatterlist.h>
-#include <linux/sfi.h>
-#include <linux/spi/spi.h>
-#include <linux/i2c.h>
-#include <linux/skbuff.h>
-#include <linux/gpio.h>
-#include <linux/gpio_keys.h>
-#include <linux/input.h>
-#include <linux/platform_device.h>
-#include <linux/irq.h>
-#include <linux/export.h>
-#include <linux/notifier.h>
-#include <linux/mmc/core.h>
-#include <linux/mmc/card.h>
-#include <linux/blkdev.h>
-
-#include <asm/setup.h>
-#include <asm/mpspec_def.h>
-#include <asm/hw_irq.h>
-#include <asm/apic.h>
-#include <asm/io_apic.h>
-#include <asm/intel-mid.h>
-#include <asm/intel_mid_vrtc.h>
-#include <asm/io.h>
-#include <asm/i8259.h>
-#include <asm/intel_scu_ipc.h>
-#include <asm/apb_timer.h>
-#include <asm/reboot.h>
-
-#define        SFI_SIG_OEM0    "OEM0"
-#define MAX_IPCDEVS    24
-#define MAX_SCU_SPI    24
-#define MAX_SCU_I2C    24
-
-static struct platform_device *ipc_devs[MAX_IPCDEVS];
-static struct spi_board_info *spi_devs[MAX_SCU_SPI];
-static struct i2c_board_info *i2c_devs[MAX_SCU_I2C];
-static struct sfi_gpio_table_entry *gpio_table;
-static struct sfi_timer_table_entry sfi_mtimer_array[SFI_MTMR_MAX_NUM];
-static int ipc_next_dev;
-static int spi_next_dev;
-static int i2c_next_dev;
-static int i2c_bus[MAX_SCU_I2C];
-static int gpio_num_entry;
-static u32 sfi_mtimer_usage[SFI_MTMR_MAX_NUM];
-int sfi_mrtc_num;
-int sfi_mtimer_num;
-
-struct sfi_rtc_table_entry sfi_mrtc_array[SFI_MRTC_MAX];
-EXPORT_SYMBOL_GPL(sfi_mrtc_array);
-
-struct blocking_notifier_head intel_scu_notifier =
-                       BLOCKING_NOTIFIER_INIT(intel_scu_notifier);
-EXPORT_SYMBOL_GPL(intel_scu_notifier);
-
-#define intel_mid_sfi_get_pdata(dev, priv)     \
-       ((dev)->get_platform_data ? (dev)->get_platform_data(priv) : NULL)
-
-/* parse all the mtimer info to a static mtimer array */
-int __init sfi_parse_mtmr(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_timer_table_entry *pentry;
-       struct mpc_intsrc mp_irq;
-       int totallen;
-
-       sb = (struct sfi_table_simple *)table;
-       if (!sfi_mtimer_num) {
-               sfi_mtimer_num = SFI_GET_NUM_ENTRIES(sb,
-                                       struct sfi_timer_table_entry);
-               pentry = (struct sfi_timer_table_entry *) sb->pentry;
-               totallen = sfi_mtimer_num * sizeof(*pentry);
-               memcpy(sfi_mtimer_array, pentry, totallen);
-       }
-
-       pr_debug("SFI MTIMER info (num = %d):\n", sfi_mtimer_num);
-       pentry = sfi_mtimer_array;
-       for (totallen = 0; totallen < sfi_mtimer_num; totallen++, pentry++) {
-               pr_debug("timer[%d]: paddr = 0x%08x, freq = %dHz, irq = %d\n",
-                       totallen, (u32)pentry->phys_addr,
-                       pentry->freq_hz, pentry->irq);
-               mp_irq.type = MP_INTSRC;
-               mp_irq.irqtype = mp_INT;
-               mp_irq.irqflag = MP_IRQTRIG_EDGE | MP_IRQPOL_ACTIVE_HIGH;
-               mp_irq.srcbus = MP_BUS_ISA;
-               mp_irq.srcbusirq = pentry->irq; /* IRQ */
-               mp_irq.dstapic = MP_APIC_ALL;
-               mp_irq.dstirq = pentry->irq;
-               mp_save_irq(&mp_irq);
-               mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL);
-       }
-
-       return 0;
-}
-
-struct sfi_timer_table_entry *sfi_get_mtmr(int hint)
-{
-       int i;
-       if (hint < sfi_mtimer_num) {
-               if (!sfi_mtimer_usage[hint]) {
-                       pr_debug("hint taken for timer %d irq %d\n",
-                               hint, sfi_mtimer_array[hint].irq);
-                       sfi_mtimer_usage[hint] = 1;
-                       return &sfi_mtimer_array[hint];
-               }
-       }
-       /* take the first timer available */
-       for (i = 0; i < sfi_mtimer_num;) {
-               if (!sfi_mtimer_usage[i]) {
-                       sfi_mtimer_usage[i] = 1;
-                       return &sfi_mtimer_array[i];
-               }
-               i++;
-       }
-       return NULL;
-}
-
-void sfi_free_mtmr(struct sfi_timer_table_entry *mtmr)
-{
-       int i;
-       for (i = 0; i < sfi_mtimer_num;) {
-               if (mtmr->irq == sfi_mtimer_array[i].irq) {
-                       sfi_mtimer_usage[i] = 0;
-                       return;
-               }
-               i++;
-       }
-}
-
-/* parse all the mrtc info to a global mrtc array */
-int __init sfi_parse_mrtc(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_rtc_table_entry *pentry;
-       struct mpc_intsrc mp_irq;
-
-       int totallen;
-
-       sb = (struct sfi_table_simple *)table;
-       if (!sfi_mrtc_num) {
-               sfi_mrtc_num = SFI_GET_NUM_ENTRIES(sb,
-                                               struct sfi_rtc_table_entry);
-               pentry = (struct sfi_rtc_table_entry *)sb->pentry;
-               totallen = sfi_mrtc_num * sizeof(*pentry);
-               memcpy(sfi_mrtc_array, pentry, totallen);
-       }
-
-       pr_debug("SFI RTC info (num = %d):\n", sfi_mrtc_num);
-       pentry = sfi_mrtc_array;
-       for (totallen = 0; totallen < sfi_mrtc_num; totallen++, pentry++) {
-               pr_debug("RTC[%d]: paddr = 0x%08x, irq = %d\n",
-                       totallen, (u32)pentry->phys_addr, pentry->irq);
-               mp_irq.type = MP_INTSRC;
-               mp_irq.irqtype = mp_INT;
-               mp_irq.irqflag = MP_IRQTRIG_LEVEL | MP_IRQPOL_ACTIVE_LOW;
-               mp_irq.srcbus = MP_BUS_ISA;
-               mp_irq.srcbusirq = pentry->irq; /* IRQ */
-               mp_irq.dstapic = MP_APIC_ALL;
-               mp_irq.dstirq = pentry->irq;
-               mp_save_irq(&mp_irq);
-               mp_map_gsi_to_irq(pentry->irq, IOAPIC_MAP_ALLOC, NULL);
-       }
-       return 0;
-}
-
-
-/*
- * Parsing GPIO table first, since the DEVS table will need this table
- * to map the pin name to the actual pin.
- */
-static int __init sfi_parse_gpio(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_gpio_table_entry *pentry;
-       int num, i;
-
-       if (gpio_table)
-               return 0;
-       sb = (struct sfi_table_simple *)table;
-       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_gpio_table_entry);
-       pentry = (struct sfi_gpio_table_entry *)sb->pentry;
-
-       gpio_table = kmemdup(pentry, num * sizeof(*pentry), GFP_KERNEL);
-       if (!gpio_table)
-               return -1;
-       gpio_num_entry = num;
-
-       pr_debug("GPIO pin info:\n");
-       for (i = 0; i < num; i++, pentry++)
-               pr_debug("info[%2d]: controller = %16.16s, pin_name = %16.16s,"
-               " pin = %d\n", i,
-                       pentry->controller_name,
-                       pentry->pin_name,
-                       pentry->pin_no);
-       return 0;
-}
-
-int get_gpio_by_name(const char *name)
-{
-       struct sfi_gpio_table_entry *pentry = gpio_table;
-       int i;
-
-       if (!pentry)
-               return -1;
-       for (i = 0; i < gpio_num_entry; i++, pentry++) {
-               if (!strncmp(name, pentry->pin_name, SFI_NAME_LEN))
-                       return pentry->pin_no;
-       }
-       return -EINVAL;
-}
-
-static void __init intel_scu_ipc_device_register(struct platform_device *pdev)
-{
-       if (ipc_next_dev == MAX_IPCDEVS)
-               pr_err("too many SCU IPC devices");
-       else
-               ipc_devs[ipc_next_dev++] = pdev;
-}
-
-static void __init intel_scu_spi_device_register(struct spi_board_info *sdev)
-{
-       struct spi_board_info *new_dev;
-
-       if (spi_next_dev == MAX_SCU_SPI) {
-               pr_err("too many SCU SPI devices");
-               return;
-       }
-
-       new_dev = kzalloc(sizeof(*sdev), GFP_KERNEL);
-       if (!new_dev) {
-               pr_err("failed to alloc mem for delayed spi dev %s\n",
-                       sdev->modalias);
-               return;
-       }
-       *new_dev = *sdev;
-
-       spi_devs[spi_next_dev++] = new_dev;
-}
-
-static void __init intel_scu_i2c_device_register(int bus,
-                                               struct i2c_board_info *idev)
-{
-       struct i2c_board_info *new_dev;
-
-       if (i2c_next_dev == MAX_SCU_I2C) {
-               pr_err("too many SCU I2C devices");
-               return;
-       }
-
-       new_dev = kzalloc(sizeof(*idev), GFP_KERNEL);
-       if (!new_dev) {
-               pr_err("failed to alloc mem for delayed i2c dev %s\n",
-                       idev->type);
-               return;
-       }
-       *new_dev = *idev;
-
-       i2c_bus[i2c_next_dev] = bus;
-       i2c_devs[i2c_next_dev++] = new_dev;
-}
-
-/* Called by IPC driver */
-void intel_scu_devices_create(void)
-{
-       int i;
-
-       for (i = 0; i < ipc_next_dev; i++)
-               platform_device_add(ipc_devs[i]);
-
-       for (i = 0; i < spi_next_dev; i++)
-               spi_register_board_info(spi_devs[i], 1);
-
-       for (i = 0; i < i2c_next_dev; i++) {
-               struct i2c_adapter *adapter;
-               struct i2c_client *client;
-
-               adapter = i2c_get_adapter(i2c_bus[i]);
-               if (adapter) {
-                       client = i2c_new_client_device(adapter, i2c_devs[i]);
-                       if (IS_ERR(client))
-                               pr_err("can't create i2c device %s\n",
-                                       i2c_devs[i]->type);
-               } else
-                       i2c_register_board_info(i2c_bus[i], i2c_devs[i], 1);
-       }
-       intel_scu_notifier_post(SCU_AVAILABLE, NULL);
-}
-EXPORT_SYMBOL_GPL(intel_scu_devices_create);
-
-/* Called by IPC driver */
-void intel_scu_devices_destroy(void)
-{
-       int i;
-
-       intel_scu_notifier_post(SCU_DOWN, NULL);
-
-       for (i = 0; i < ipc_next_dev; i++)
-               platform_device_del(ipc_devs[i]);
-}
-EXPORT_SYMBOL_GPL(intel_scu_devices_destroy);
-
-static void __init install_irq_resource(struct platform_device *pdev, int irq)
-{
-       /* Single threaded */
-       static struct resource res __initdata = {
-               .name = "IRQ",
-               .flags = IORESOURCE_IRQ,
-       };
-       res.start = irq;
-       platform_device_add_resources(pdev, &res, 1);
-}
-
-static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry,
-                                       struct devs_id *dev)
-{
-       struct platform_device *pdev;
-       void *pdata = NULL;
-
-       pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n",
-               pentry->name, pentry->irq);
-
-       /*
-        * We need to call platform init of IPC devices to fill misc_pdata
-        * structure. It will be used in msic_init for initialization.
-        */
-       pdata = intel_mid_sfi_get_pdata(dev, pentry);
-       if (IS_ERR(pdata))
-               return;
-
-       /*
-        * On Medfield the platform device creation is handled by the MSIC
-        * MFD driver so we don't need to do it here.
-        */
-       if (dev->msic && intel_mid_has_msic())
-               return;
-
-       pdev = platform_device_alloc(pentry->name, 0);
-       if (pdev == NULL) {
-               pr_err("out of memory for SFI platform device '%s'.\n",
-                       pentry->name);
-               return;
-       }
-       install_irq_resource(pdev, pentry->irq);
-
-       pdev->dev.platform_data = pdata;
-       if (dev->delay)
-               intel_scu_ipc_device_register(pdev);
-       else
-               platform_device_add(pdev);
-}
-
-static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry,
-                                       struct devs_id *dev)
-{
-       struct spi_board_info spi_info;
-       void *pdata = NULL;
-
-       memset(&spi_info, 0, sizeof(spi_info));
-       strncpy(spi_info.modalias, pentry->name, SFI_NAME_LEN);
-       spi_info.irq = ((pentry->irq == (u8)0xff) ? 0 : pentry->irq);
-       spi_info.bus_num = pentry->host_num;
-       spi_info.chip_select = pentry->addr;
-       spi_info.max_speed_hz = pentry->max_freq;
-       pr_debug("SPI bus=%d, name=%16.16s, irq=0x%2x, max_freq=%d, cs=%d\n",
-               spi_info.bus_num,
-               spi_info.modalias,
-               spi_info.irq,
-               spi_info.max_speed_hz,
-               spi_info.chip_select);
-
-       pdata = intel_mid_sfi_get_pdata(dev, &spi_info);
-       if (IS_ERR(pdata))
-               return;
-
-       spi_info.platform_data = pdata;
-       if (dev->delay)
-               intel_scu_spi_device_register(&spi_info);
-       else
-               spi_register_board_info(&spi_info, 1);
-}
-
-static void __init sfi_handle_i2c_dev(struct sfi_device_table_entry *pentry,
-                                       struct devs_id *dev)
-{
-       struct i2c_board_info i2c_info;
-       void *pdata = NULL;
-
-       memset(&i2c_info, 0, sizeof(i2c_info));
-       strncpy(i2c_info.type, pentry->name, SFI_NAME_LEN);
-       i2c_info.irq = ((pentry->irq == (u8)0xff) ? 0 : pentry->irq);
-       i2c_info.addr = pentry->addr;
-       pr_debug("I2C bus = %d, name = %16.16s, irq = 0x%2x, addr = 0x%x\n",
-               pentry->host_num,
-               i2c_info.type,
-               i2c_info.irq,
-               i2c_info.addr);
-       pdata = intel_mid_sfi_get_pdata(dev, &i2c_info);
-       i2c_info.platform_data = pdata;
-       if (IS_ERR(pdata))
-               return;
-
-       if (dev->delay)
-               intel_scu_i2c_device_register(pentry->host_num, &i2c_info);
-       else
-               i2c_register_board_info(pentry->host_num, &i2c_info, 1);
-}
-
-static void __init sfi_handle_sd_dev(struct sfi_device_table_entry *pentry,
-                                       struct devs_id *dev)
-{
-       struct mid_sd_board_info sd_info;
-       void *pdata;
-
-       memset(&sd_info, 0, sizeof(sd_info));
-       strncpy(sd_info.name, pentry->name, SFI_NAME_LEN);
-       sd_info.bus_num = pentry->host_num;
-       sd_info.max_clk = pentry->max_freq;
-       sd_info.addr = pentry->addr;
-       pr_debug("SD bus = %d, name = %16.16s, max_clk = %d, addr = 0x%x\n",
-                sd_info.bus_num,
-                sd_info.name,
-                sd_info.max_clk,
-                sd_info.addr);
-       pdata = intel_mid_sfi_get_pdata(dev, &sd_info);
-       if (IS_ERR(pdata))
-               return;
-
-       /* Nothing we can do with this for now */
-       sd_info.platform_data = pdata;
-
-       pr_debug("Successfully registered %16.16s", sd_info.name);
-}
-
-extern struct devs_id *const __x86_intel_mid_dev_start[],
-                     *const __x86_intel_mid_dev_end[];
-
-static struct devs_id __init *get_device_id(u8 type, char *name)
-{
-       struct devs_id *const *dev_table;
-
-       for (dev_table = __x86_intel_mid_dev_start;
-                       dev_table < __x86_intel_mid_dev_end; dev_table++) {
-               struct devs_id *dev = *dev_table;
-               if (dev->type == type &&
-                       !strncmp(dev->name, name, SFI_NAME_LEN)) {
-                       return dev;
-               }
-       }
-
-       return NULL;
-}
-
-static int __init sfi_parse_devs(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_device_table_entry *pentry;
-       struct devs_id *dev = NULL;
-       int num, i, ret;
-       int polarity;
-       struct irq_alloc_info info;
-
-       sb = (struct sfi_table_simple *)table;
-       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_device_table_entry);
-       pentry = (struct sfi_device_table_entry *)sb->pentry;
-
-       for (i = 0; i < num; i++, pentry++) {
-               int irq = pentry->irq;
-
-               if (irq != (u8)0xff) { /* native RTE case */
-                       /* these SPI2 devices are not exposed to system as PCI
-                        * devices, but they have separate RTE entry in IOAPIC
-                        * so we have to enable them one by one here
-                        */
-                       if (intel_mid_identify_cpu() ==
-                                       INTEL_MID_CPU_CHIP_TANGIER) {
-                               if (!strncmp(pentry->name, "r69001-ts-i2c", 13))
-                                       /* active low */
-                                       polarity = 1;
-                               else if (!strncmp(pentry->name,
-                                               "synaptics_3202", 14))
-                                       /* active low */
-                                       polarity = 1;
-                               else if (irq == 41)
-                                       /* fast_int_1 */
-                                       polarity = 1;
-                               else
-                                       /* active high */
-                                       polarity = 0;
-                       } else {
-                               /* PNW and CLV go with active low */
-                               polarity = 1;
-                       }
-
-                       ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, polarity);
-                       ret = mp_map_gsi_to_irq(irq, IOAPIC_MAP_ALLOC, &info);
-                       WARN_ON(ret < 0);
-               }
-
-               dev = get_device_id(pentry->type, pentry->name);
-
-               if (!dev)
-                       continue;
-
-               switch (pentry->type) {
-               case SFI_DEV_TYPE_IPC:
-                       sfi_handle_ipc_dev(pentry, dev);
-                       break;
-               case SFI_DEV_TYPE_SPI:
-                       sfi_handle_spi_dev(pentry, dev);
-                       break;
-               case SFI_DEV_TYPE_I2C:
-                       sfi_handle_i2c_dev(pentry, dev);
-                       break;
-               case SFI_DEV_TYPE_SD:
-                       sfi_handle_sd_dev(pentry, dev);
-                       break;
-               case SFI_DEV_TYPE_UART:
-               case SFI_DEV_TYPE_HSI:
-               default:
-                       break;
-               }
-       }
-       return 0;
-}
-
-static int __init intel_mid_platform_init(void)
-{
-       sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio);
-       sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs);
-       return 0;
-}
-arch_initcall(intel_mid_platform_init);
index 1ac8578..b42bfda 100644 (file)
@@ -27,7 +27,6 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Sébastien Hinderer <Sebastien.Hinderer@ens-lyon.org>");
 MODULE_DESCRIPTION("A power_off handler for Iris devices from EuroBraille");
-MODULE_SUPPORTED_DEVICE("Eurobraille/Iris");
 
 static bool force;
 
index 43b4d86..d2ccadc 100644 (file)
@@ -16,6 +16,7 @@
 #include <asm/boot.h>
 #include <asm/processor-flags.h>
 #include <asm/msr.h>
+#include <asm/nospec-branch.h>
 #include <xen/interface/elfnote.h>
 
        __HEAD
@@ -105,6 +106,7 @@ SYM_CODE_START_LOCAL(pvh_start_xen)
        /* startup_64 expects boot_params in %rsi. */
        mov $_pa(pvh_bootparams), %rsi
        mov $_pa(startup_64), %rax
+       ANNOTATE_RETPOLINE_SAFE
        jmp *%rax
 
 #else /* CONFIG_X86_64 */
diff --git a/arch/x86/platform/sfi/sfi.c b/arch/x86/platform/sfi/sfi.c
deleted file mode 100644 (file)
index 6259563..0000000
+++ /dev/null
@@ -1,100 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * sfi.c - x86 architecture SFI support.
- *
- * Copyright (c) 2009, Intel Corporation.
- */
-
-#define KMSG_COMPONENT "SFI"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/io.h>
-
-#include <asm/irqdomain.h>
-#include <asm/io_apic.h>
-#include <asm/mpspec.h>
-#include <asm/setup.h>
-#include <asm/apic.h>
-
-#ifdef CONFIG_X86_LOCAL_APIC
-static unsigned long sfi_lapic_addr __initdata = APIC_DEFAULT_PHYS_BASE;
-
-/* All CPUs enumerated by SFI must be present and enabled */
-static void __init mp_sfi_register_lapic(u8 id)
-{
-       if (MAX_LOCAL_APIC - id <= 0) {
-               pr_warn("Processor #%d invalid (max %d)\n", id, MAX_LOCAL_APIC);
-               return;
-       }
-
-       pr_info("registering lapic[%d]\n", id);
-
-       generic_processor_info(id, GET_APIC_VERSION(apic_read(APIC_LVR)));
-}
-
-static int __init sfi_parse_cpus(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_cpu_table_entry *pentry;
-       int i;
-       int cpu_num;
-
-       sb = (struct sfi_table_simple *)table;
-       cpu_num = SFI_GET_NUM_ENTRIES(sb, struct sfi_cpu_table_entry);
-       pentry = (struct sfi_cpu_table_entry *)sb->pentry;
-
-       for (i = 0; i < cpu_num; i++) {
-               mp_sfi_register_lapic(pentry->apic_id);
-               pentry++;
-       }
-
-       smp_found_config = 1;
-       return 0;
-}
-#endif /* CONFIG_X86_LOCAL_APIC */
-
-#ifdef CONFIG_X86_IO_APIC
-
-static int __init sfi_parse_ioapic(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_apic_table_entry *pentry;
-       int i, num;
-       struct ioapic_domain_cfg cfg = {
-               .type = IOAPIC_DOMAIN_STRICT,
-               .ops = &mp_ioapic_irqdomain_ops,
-       };
-
-       sb = (struct sfi_table_simple *)table;
-       num = SFI_GET_NUM_ENTRIES(sb, struct sfi_apic_table_entry);
-       pentry = (struct sfi_apic_table_entry *)sb->pentry;
-
-       for (i = 0; i < num; i++) {
-               mp_register_ioapic(i, pentry->phys_addr, gsi_top, &cfg);
-               pentry++;
-       }
-
-       WARN(pic_mode, KERN_WARNING
-               "SFI: pic_mod shouldn't be 1 when IOAPIC table is present\n");
-       pic_mode = 0;
-       return 0;
-}
-#endif /* CONFIG_X86_IO_APIC */
-
-/*
- * sfi_platform_init(): register lapics & io-apics
- */
-int __init sfi_platform_init(void)
-{
-#ifdef CONFIG_X86_LOCAL_APIC
-       register_lapic_address(sfi_lapic_addr);
-       sfi_table_parse(SFI_SIG_CPUS, NULL, NULL, sfi_parse_cpus);
-#endif
-#ifdef CONFIG_X86_IO_APIC
-       sfi_table_parse(SFI_SIG_APIC, NULL, NULL, sfi_parse_ioapic);
-#endif
-       return 0;
-}
index 6907b52..3797775 100644 (file)
@@ -1,9 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0
-OBJECT_FILES_NON_STANDARD_hibernate_asm_$(BITS).o := y
 
 # __restore_processor_state() restores %gs after S3 resume and so should not
 # itself be stack-protected
 CFLAGS_cpu.o   := -fno-stack-protector
 
+# Clang may incorrectly inline functions with stack protector enabled into
+# __restore_processor_state(): https://bugs.llvm.org/show_bug.cgi?id=47479
+CFLAGS_REMOVE_cpu.o := $(CC_FLAGS_LTO)
+
 obj-$(CONFIG_PM_SLEEP)         += cpu.o
 obj-$(CONFIG_HIBERNATION)      += hibernate_$(BITS).o hibernate_asm_$(BITS).o hibernate.o
index 7918b84..d9bed59 100644 (file)
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
 #include <asm/frame.h>
+#include <asm/nospec-branch.h>
+
+        /* code below belongs to the image kernel */
+       .align PAGE_SIZE
+SYM_FUNC_START(restore_registers)
+       /* go back to the original page tables */
+       movq    %r9, %cr3
+
+       /* Flush TLB, including "global" things (vmalloc) */
+       movq    mmu_cr4_features(%rip), %rax
+       movq    %rax, %rdx
+       andq    $~(X86_CR4_PGE), %rdx
+       movq    %rdx, %cr4;  # turn off PGE
+       movq    %cr3, %rcx;  # flush TLB
+       movq    %rcx, %cr3
+       movq    %rax, %cr4;  # turn PGE back on
+
+       /* We don't restore %rax, it must be 0 anyway */
+       movq    $saved_context, %rax
+       movq    pt_regs_sp(%rax), %rsp
+       movq    pt_regs_bp(%rax), %rbp
+       movq    pt_regs_si(%rax), %rsi
+       movq    pt_regs_di(%rax), %rdi
+       movq    pt_regs_bx(%rax), %rbx
+       movq    pt_regs_cx(%rax), %rcx
+       movq    pt_regs_dx(%rax), %rdx
+       movq    pt_regs_r8(%rax), %r8
+       movq    pt_regs_r9(%rax), %r9
+       movq    pt_regs_r10(%rax), %r10
+       movq    pt_regs_r11(%rax), %r11
+       movq    pt_regs_r12(%rax), %r12
+       movq    pt_regs_r13(%rax), %r13
+       movq    pt_regs_r14(%rax), %r14
+       movq    pt_regs_r15(%rax), %r15
+       pushq   pt_regs_flags(%rax)
+       popfq
+
+       /* Saved in save_processor_state. */
+       lgdt    saved_context_gdt_desc(%rax)
+
+       xorl    %eax, %eax
+
+       /* tell the hibernation core that we've just restored the memory */
+       movq    %rax, in_suspend(%rip)
+
+       ret
+SYM_FUNC_END(restore_registers)
 
 SYM_FUNC_START(swsusp_arch_suspend)
        movq    $saved_context, %rax
@@ -52,7 +99,7 @@ SYM_FUNC_START(swsusp_arch_suspend)
        ret
 SYM_FUNC_END(swsusp_arch_suspend)
 
-SYM_CODE_START(restore_image)
+SYM_FUNC_START(restore_image)
        /* prepare to jump to the image kernel */
        movq    restore_jump_address(%rip), %r8
        movq    restore_cr3(%rip), %r9
@@ -66,11 +113,12 @@ SYM_CODE_START(restore_image)
 
        /* jump to relocated restore code */
        movq    relocated_restore_code(%rip), %rcx
+       ANNOTATE_RETPOLINE_SAFE
        jmpq    *%rcx
-SYM_CODE_END(restore_image)
+SYM_FUNC_END(restore_image)
 
        /* code below has been relocated to a safe page */
-SYM_CODE_START(core_restore_code)
+SYM_FUNC_START(core_restore_code)
        /* switch to temporary page tables */
        movq    %rax, %cr3
        /* flush TLB */
@@ -97,51 +145,6 @@ SYM_CODE_START(core_restore_code)
 
 .Ldone:
        /* jump to the restore_registers address from the image header */
+       ANNOTATE_RETPOLINE_SAFE
        jmpq    *%r8
-SYM_CODE_END(core_restore_code)
-
-        /* code below belongs to the image kernel */
-       .align PAGE_SIZE
-SYM_FUNC_START(restore_registers)
-       /* go back to the original page tables */
-       movq    %r9, %cr3
-
-       /* Flush TLB, including "global" things (vmalloc) */
-       movq    mmu_cr4_features(%rip), %rax
-       movq    %rax, %rdx
-       andq    $~(X86_CR4_PGE), %rdx
-       movq    %rdx, %cr4;  # turn off PGE
-       movq    %cr3, %rcx;  # flush TLB
-       movq    %rcx, %cr3
-       movq    %rax, %cr4;  # turn PGE back on
-
-       /* We don't restore %rax, it must be 0 anyway */
-       movq    $saved_context, %rax
-       movq    pt_regs_sp(%rax), %rsp
-       movq    pt_regs_bp(%rax), %rbp
-       movq    pt_regs_si(%rax), %rsi
-       movq    pt_regs_di(%rax), %rdi
-       movq    pt_regs_bx(%rax), %rbx
-       movq    pt_regs_cx(%rax), %rcx
-       movq    pt_regs_dx(%rax), %rdx
-       movq    pt_regs_r8(%rax), %r8
-       movq    pt_regs_r9(%rax), %r9
-       movq    pt_regs_r10(%rax), %r10
-       movq    pt_regs_r11(%rax), %r11
-       movq    pt_regs_r12(%rax), %r12
-       movq    pt_regs_r13(%rax), %r13
-       movq    pt_regs_r14(%rax), %r14
-       movq    pt_regs_r15(%rax), %r15
-       pushq   pt_regs_flags(%rax)
-       popfq
-
-       /* Saved in save_processor_state. */
-       lgdt    saved_context_gdt_desc(%rax)
-
-       xorl    %eax, %eax
-
-       /* tell the hibernation core that we've just restored the memory */
-       movq    %rax, in_suspend(%rip)
-
-       ret
-SYM_FUNC_END(restore_registers)
+SYM_FUNC_END(core_restore_code)
index 55b1ab3..bddfc9a 100644 (file)
@@ -29,14 +29,14 @@ posttest: $(obj)/insn_decoder_test vmlinux $(obj)/insn_sanity
 hostprogs += insn_decoder_test insn_sanity
 
 # -I needed for generated C source and C source which in the kernel tree.
-HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/uapi/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/uapi/
+HOSTCFLAGS_insn_decoder_test.o := -Wall -I$(srctree)/tools/arch/x86/lib/ -I$(srctree)/tools/arch/x86/include/ -I$(objtree)/arch/x86/lib/
 
-HOSTCFLAGS_insn_sanity.o := -Wall -I$(objtree)/arch/x86/lib/ -I$(srctree)/arch/x86/include/ -I$(srctree)/arch/x86/lib/ -I$(srctree)/include/
+HOSTCFLAGS_insn_sanity.o := -Wall -I$(srctree)/tools/arch/x86/lib/ -I$(srctree)/tools/arch/x86/include/ -I$(objtree)/arch/x86/lib/
 
 # Dependencies are also needed.
-$(obj)/insn_decoder_test.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+$(obj)/insn_decoder_test.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tools/arch/x86/lib/inat.c $(srctree)/tools/arch/x86/include/asm/inat_types.h $(srctree)/tools/arch/x86/include/asm/inat.h $(srctree)/tools/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
 
-$(obj)/insn_sanity.o: $(srctree)/arch/x86/lib/insn.c $(srctree)/arch/x86/lib/inat.c $(srctree)/arch/x86/include/asm/inat_types.h $(srctree)/arch/x86/include/asm/inat.h $(srctree)/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
+$(obj)/insn_sanity.o: $(srctree)/tools/arch/x86/lib/insn.c $(srctree)/tools/arch/x86/lib/inat.c $(srctree)/tools/arch/x86/include/asm/inat_types.h $(srctree)/tools/arch/x86/include/asm/inat.h $(srctree)/tools/arch/x86/include/asm/insn.h $(objtree)/arch/x86/lib/inat-tables.c
 
 HOST_EXTRACFLAGS += -I$(srctree)/tools/include
 hostprogs      += relocs
index 185ceba..c6a0000 100644 (file)
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
-
-#define unlikely(cond) (cond)
-#define ARRAY_SIZE(a)  (sizeof(a)/sizeof(a[0]))
-
 #include <asm/insn.h>
 #include <inat.c>
 #include <insn.c>
index 1c3a196..04c5a44 100644 (file)
@@ -61,8 +61,8 @@ static const char * const sym_regex_kernel[S_NSYMTYPES] = {
        "(__iommu_table|__apicdrivers|__smp_locks)(|_end)|"
        "__(start|end)_pci_.*|"
        "__(start|end)_builtin_fw|"
-       "__(start|stop)___ksymtab(|_gpl|_unused|_unused_gpl|_gpl_future)|"
-       "__(start|stop)___kcrctab(|_gpl|_unused|_unused_gpl|_gpl_future)|"
+       "__(start|stop)___ksymtab(|_gpl)|"
+       "__(start|stop)___kcrctab(|_gpl)|"
        "__(start|stop)___param|"
        "__(start|stop)___modver|"
        "__(start|stop)___bug_table|"
index fc5c5ba..40b5779 100644 (file)
@@ -1,5 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0
-OBJECT_FILES_NON_STANDARD_xen-asm.o := y
 
 ifdef CONFIG_FUNCTION_TRACER
 # Do not profile debug and lowlevel utilities
index b5949e5..ac06ca3 100644 (file)
@@ -98,8 +98,8 @@ EXPORT_SYMBOL_GPL(xen_p2m_size);
 unsigned long xen_max_p2m_pfn __read_mostly;
 EXPORT_SYMBOL_GPL(xen_max_p2m_pfn);
 
-#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
-#define P2M_LIMIT CONFIG_XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+#ifdef CONFIG_XEN_MEMORY_HOTPLUG_LIMIT
+#define P2M_LIMIT CONFIG_XEN_MEMORY_HOTPLUG_LIMIT
 #else
 #define P2M_LIMIT 0
 #endif
@@ -652,10 +652,9 @@ bool __set_phys_to_machine(unsigned long pfn, unsigned long mfn)
        pte_t *ptep;
        unsigned int level;
 
-       if (unlikely(pfn >= xen_p2m_size)) {
-               BUG_ON(mfn != INVALID_P2M_ENTRY);
-               return true;
-       }
+       /* Only invalid entries allowed above the highest p2m covered frame. */
+       if (unlikely(pfn >= xen_p2m_size))
+               return mfn == INVALID_P2M_ENTRY;
 
        /*
         * The interface requires atomic updates on p2m elements.
@@ -710,6 +709,8 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 
        for (i = 0; i < count; i++) {
                unsigned long mfn, pfn;
+               struct gnttab_unmap_grant_ref unmap[2];
+               int rc;
 
                /* Do not add to override if the map failed. */
                if (map_ops[i].status != GNTST_okay ||
@@ -727,16 +728,51 @@ int set_foreign_p2m_mapping(struct gnttab_map_grant_ref *map_ops,
 
                WARN(pfn_to_mfn(pfn) != INVALID_P2M_ENTRY, "page must be ballooned");
 
-               if (unlikely(!set_phys_to_machine(pfn, FOREIGN_FRAME(mfn)))) {
-                       ret = -ENOMEM;
-                       goto out;
+               if (likely(set_phys_to_machine(pfn, FOREIGN_FRAME(mfn))))
+                       continue;
+
+               /*
+                * Signal an error for this slot. This in turn requires
+                * immediate unmapping.
+                */
+               map_ops[i].status = GNTST_general_error;
+               unmap[0].host_addr = map_ops[i].host_addr,
+               unmap[0].handle = map_ops[i].handle;
+               map_ops[i].handle = INVALID_GRANT_HANDLE;
+               if (map_ops[i].flags & GNTMAP_device_map)
+                       unmap[0].dev_bus_addr = map_ops[i].dev_bus_addr;
+               else
+                       unmap[0].dev_bus_addr = 0;
+
+               if (kmap_ops) {
+                       kmap_ops[i].status = GNTST_general_error;
+                       unmap[1].host_addr = kmap_ops[i].host_addr,
+                       unmap[1].handle = kmap_ops[i].handle;
+                       kmap_ops[i].handle = INVALID_GRANT_HANDLE;
+                       if (kmap_ops[i].flags & GNTMAP_device_map)
+                               unmap[1].dev_bus_addr = kmap_ops[i].dev_bus_addr;
+                       else
+                               unmap[1].dev_bus_addr = 0;
                }
+
+               /*
+                * Pre-populate both status fields, to be recognizable in
+                * the log message below.
+                */
+               unmap[0].status = 1;
+               unmap[1].status = 1;
+
+               rc = HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref,
+                                              unmap, 1 + !!kmap_ops);
+               if (rc || unmap[0].status != GNTST_okay ||
+                   unmap[1].status != GNTST_okay)
+                       pr_err_once("gnttab unmap failed: rc=%d st0=%d st1=%d\n",
+                                   rc, unmap[0].status, unmap[1].status);
        }
 
 out:
        return ret;
 }
-EXPORT_SYMBOL_GPL(set_foreign_p2m_mapping);
 
 int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
                              struct gnttab_unmap_grant_ref *kunmap_ops,
@@ -762,7 +798,6 @@ int clear_foreign_p2m_mapping(struct gnttab_unmap_grant_ref *unmap_ops,
 
        return ret;
 }
-EXPORT_SYMBOL_GPL(clear_foreign_p2m_mapping);
 
 #ifdef CONFIG_XEN_DEBUG_FS
 #include <linux/debugfs.h>
index 7eab14d..8bfc103 100644 (file)
@@ -59,13 +59,13 @@ static struct {
 } xen_remap_buf __initdata __aligned(PAGE_SIZE);
 static unsigned long xen_remap_mfn __initdata = INVALID_P2M_ENTRY;
 
-/* 
+/*
  * The maximum amount of extra memory compared to the base size.  The
  * main scaling factor is the size of struct page.  At extreme ratios
  * of base:extra, all the base memory can be filled with page
  * structures for the extra memory, leaving no space for anything
  * else.
- * 
+ *
  * 10x seems like a reasonable balance between scaling flexibility and
  * leaving a practically usable system.
  */
@@ -791,17 +791,10 @@ char * __init xen_memory_setup(void)
 
        /*
         * Clamp the amount of extra memory to a EXTRA_MEM_RATIO
-        * factor the base size.  On non-highmem systems, the base
-        * size is the full initial memory allocation; on highmem it
-        * is limited to the max size of lowmem, so that it doesn't
-        * get completely filled.
+        * factor the base size.
         *
         * Make sure we have no memory above max_pages, as this area
         * isn't handled by the p2m management.
-        *
-        * In principle there could be a problem in lowmem systems if
-        * the initial memory is also very large with respect to
-        * lowmem, but we won't try to deal with that here.
         */
        extra_pages = min3(EXTRA_MEM_RATIO * min(max_pfn, PFN_DOWN(MAXMEM)),
                           extra_pages, max_pages - max_pfn);
index 02f3134..1e62644 100644 (file)
@@ -14,6 +14,7 @@
 #include <asm/thread_info.h>
 #include <asm/asm.h>
 #include <asm/frame.h>
+#include <asm/unwind_hints.h>
 
 #include <xen/interface/xen.h>
 
@@ -118,6 +119,7 @@ SYM_FUNC_END(xen_read_cr2_direct);
 
 .macro xen_pv_trap name
 SYM_CODE_START(xen_\name)
+       UNWIND_HINT_EMPTY
        pop %rcx
        pop %r11
        jmp  \name
@@ -157,6 +159,7 @@ xen_pv_trap asm_exc_xen_hypervisor_callback
 SYM_CODE_START(xen_early_idt_handler_array)
        i = 0
        .rept NUM_EXCEPTION_VECTORS
+       UNWIND_HINT_EMPTY
        pop %rcx
        pop %r11
        jmp early_idt_handler_array + i*EARLY_IDT_HANDLER_SIZE
@@ -183,6 +186,7 @@ hypercall_iret = hypercall_page + __HYPERVISOR_iret * 32
  * rsp->rax            }
  */
 SYM_CODE_START(xen_iret)
+       UNWIND_HINT_EMPTY
        pushq $0
        jmp hypercall_iret
 SYM_CODE_END(xen_iret)
@@ -203,7 +207,8 @@ SYM_CODE_END(xen_iret)
  */
 
 /* Normal 64-bit system call target */
-SYM_FUNC_START(xen_syscall_target)
+SYM_CODE_START(xen_syscall_target)
+       UNWIND_HINT_EMPTY
        popq %rcx
        popq %r11
 
@@ -216,12 +221,13 @@ SYM_FUNC_START(xen_syscall_target)
        movq $__USER_CS, 1*8(%rsp)
 
        jmp entry_SYSCALL_64_after_hwframe
-SYM_FUNC_END(xen_syscall_target)
+SYM_CODE_END(xen_syscall_target)
 
 #ifdef CONFIG_IA32_EMULATION
 
 /* 32-bit compat syscall target */
-SYM_FUNC_START(xen_syscall32_target)
+SYM_CODE_START(xen_syscall32_target)
+       UNWIND_HINT_EMPTY
        popq %rcx
        popq %r11
 
@@ -234,10 +240,11 @@ SYM_FUNC_START(xen_syscall32_target)
        movq $__USER32_CS, 1*8(%rsp)
 
        jmp entry_SYSCALL_compat_after_hwframe
-SYM_FUNC_END(xen_syscall32_target)
+SYM_CODE_END(xen_syscall32_target)
 
 /* 32-bit compat sysenter target */
-SYM_FUNC_START(xen_sysenter_target)
+SYM_CODE_START(xen_sysenter_target)
+       UNWIND_HINT_EMPTY
        /*
         * NB: Xen is polite and clears TF from EFLAGS for us.  This means
         * that we don't need to guard against single step exceptions here.
@@ -254,17 +261,18 @@ SYM_FUNC_START(xen_sysenter_target)
        movq $__USER32_CS, 1*8(%rsp)
 
        jmp entry_SYSENTER_compat_after_hwframe
-SYM_FUNC_END(xen_sysenter_target)
+SYM_CODE_END(xen_sysenter_target)
 
 #else /* !CONFIG_IA32_EMULATION */
 
-SYM_FUNC_START_ALIAS(xen_syscall32_target)
-SYM_FUNC_START(xen_sysenter_target)
+SYM_CODE_START(xen_syscall32_target)
+SYM_CODE_START(xen_sysenter_target)
+       UNWIND_HINT_EMPTY
        lea 16(%rsp), %rsp      /* strip %rcx, %r11 */
        mov $-ENOSYS, %rax
        pushq $0
        jmp hypercall_iret
-SYM_FUNC_END(xen_sysenter_target)
-SYM_FUNC_END_ALIAS(xen_syscall32_target)
+SYM_CODE_END(xen_sysenter_target)
+SYM_CODE_END(xen_syscall32_target)
 
 #endif /* CONFIG_IA32_EMULATION */
index 2d7c8f3..cb6538a 100644 (file)
@@ -68,8 +68,9 @@ SYM_CODE_END(asm_cpu_bringup_and_idle)
        .balign PAGE_SIZE
 SYM_CODE_START(hypercall_page)
        .rept (PAGE_SIZE / 32)
-               UNWIND_HINT_EMPTY
-               .skip 32
+               UNWIND_HINT_FUNC
+               .skip 31, 0x90
+               ret
        .endr
 
 #define HYPERCALL(n) \
index c426b84..45cc0ae 100644 (file)
        LOAD_CP_REGS_TAB(7)
 
 /*
- * coprocessor_flush(struct thread_info*, index)
- *                             a2        a3
- *
- * Save coprocessor registers for coprocessor 'index'.
- * The register values are saved to or loaded from the coprocessor area 
- * inside the task_info structure.
- *
- * Note that this function doesn't update the coprocessor_owner information!
- *
- */
-
-ENTRY(coprocessor_flush)
-
-       /* reserve 4 bytes on stack to save a0 */
-       abi_entry(4)
-
-       s32i    a0, a1, 0
-       movi    a0, .Lsave_cp_regs_jump_table
-       addx8   a3, a3, a0
-       l32i    a4, a3, 4
-       l32i    a3, a3, 0
-       add     a2, a2, a4
-       beqz    a3, 1f
-       callx0  a3
-1:     l32i    a0, a1, 0
-
-       abi_ret(4)
-
-ENDPROC(coprocessor_flush)
-
-/*
  * Entry condition:
  *
  *   a0:       trashed, original value saved on stack (PT_AREG0)
@@ -245,6 +214,39 @@ ENTRY(fast_coprocessor)
 
 ENDPROC(fast_coprocessor)
 
+       .text
+
+/*
+ * coprocessor_flush(struct thread_info*, index)
+ *                             a2        a3
+ *
+ * Save coprocessor registers for coprocessor 'index'.
+ * The register values are saved to or loaded from the coprocessor area
+ * inside the task_info structure.
+ *
+ * Note that this function doesn't update the coprocessor_owner information!
+ *
+ */
+
+ENTRY(coprocessor_flush)
+
+       /* reserve 4 bytes on stack to save a0 */
+       abi_entry(4)
+
+       s32i    a0, a1, 0
+       movi    a0, .Lsave_cp_regs_jump_table
+       addx8   a3, a3, a0
+       l32i    a4, a3, 4
+       l32i    a3, a3, 0
+       add     a2, a2, a4
+       beqz    a3, 1f
+       callx0  a3
+1:     l32i    a0, a1, 0
+
+       abi_ret(4)
+
+ENDPROC(coprocessor_flush)
+
        .data
 
 ENTRY(coprocessor_owner)
index 397a7de..9534ef5 100644 (file)
@@ -217,7 +217,7 @@ int copy_thread(unsigned long clone_flags, unsigned long usp_thread_fn,
 
        p->thread.sp = (unsigned long)childregs;
 
-       if (!(p->flags & PF_KTHREAD)) {
+       if (!(p->flags & (PF_KTHREAD | PF_IO_WORKER))) {
                struct pt_regs *regs = current_pt_regs();
                unsigned long usp = usp_thread_fn ?
                        usp_thread_fn : regs->areg[1];
index 659faef..285aaba 100644 (file)
@@ -5,7 +5,7 @@ uapi := arch/$(SRCARCH)/include/generated/uapi/asm
 _dummy := $(shell [ -d '$(uapi)' ] || mkdir -p '$(uapi)')      \
          $(shell [ -d '$(kapi)' ] || mkdir -p '$(kapi)')
 
-syscall := $(srctree)/$(src)/syscall.tbl
+syscall := $(src)/syscall.tbl
 syshdr := $(srctree)/$(src)/syscallhdr.sh
 systbl := $(srctree)/$(src)/syscalltbl.sh
 
@@ -21,18 +21,19 @@ quiet_cmd_systbl = SYSTBL  $@
                   '$(systbl_abi_$(basetarget))'                \
                   '$(systbl_offset_$(basetarget))'
 
-$(uapi)/unistd_32.h: $(syscall) $(syshdr)
+$(uapi)/unistd_32.h: $(syscall) $(syshdr) FORCE
        $(call if_changed,syshdr)
 
-$(kapi)/syscall_table.h: $(syscall) $(systbl)
+$(kapi)/syscall_table.h: $(syscall) $(systbl) FORCE
        $(call if_changed,systbl)
 
 uapisyshdr-y           += unistd_32.h
 kapisyshdr-y           += syscall_table.h
 
-targets        += $(uapisyshdr-y) $(kapisyshdr-y)
+uapisyshdr-y   := $(addprefix $(uapi)/, $(uapisyshdr-y))
+kapisyshdr-y   := $(addprefix $(kapi)/, $(kapisyshdr-y))
+targets                += $(addprefix ../../../../, $(uapisyshdr-y) $(kapisyshdr-y))
 
 PHONY += all
-all: $(addprefix $(uapi)/,$(uapisyshdr-y))
-all: $(addprefix $(kapi)/,$(kapisyshdr-y))
+all: $(uapisyshdr-y) $(kapisyshdr-y)
        @:
index 46116a2..365a9b8 100644 (file)
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index 7666408..95a7489 100644 (file)
@@ -112,8 +112,11 @@ good_area:
         */
        fault = handle_mm_fault(vma, address, flags, regs);
 
-       if (fault_signal_pending(fault, regs))
+       if (fault_signal_pending(fault, regs)) {
+               if (!user_mode(regs))
+                       goto bad_page_fault;
                return;
+       }
 
        if (unlikely(fault & VM_FAULT_ERROR)) {
                if (fault & VM_FAULT_OOM)
index b398dde..9558613 100644 (file)
 #include <linux/delay.h>
 #include <linux/backing-dev.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-tag.h"
@@ -160,7 +162,7 @@ BFQ_BFQQ_FNS(split_coop);
 BFQ_BFQQ_FNS(softrt_update);
 #undef BFQ_BFQQ_FNS                                            \
 
-/* Expiration time of sync (0) and async (1) requests, in ns. */
+/* Expiration time of async (0) and sync (1) requests, in ns. */
 static const u64 bfq_fifo_expire[2] = { NSEC_PER_SEC / 4, NSEC_PER_SEC / 8 };
 
 /* Maximum backwards seek (magic number lifted from CFQ), in KiB. */
@@ -5621,7 +5623,7 @@ static void bfq_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
 
        spin_unlock_irq(&bfqd->lock);
 
-       blk_mq_sched_request_inserted(rq);
+       trace_block_rq_insert(rq);
 
        spin_lock_irq(&bfqd->lock);
        bfqq = bfq_init_rq(rq);
index a1c4d29..50e5790 100644 (file)
@@ -33,7 +33,7 @@ static struct biovec_slab {
        { .nr_vecs = 16, .name = "biovec-16" },
        { .nr_vecs = 64, .name = "biovec-64" },
        { .nr_vecs = 128, .name = "biovec-128" },
-       { .nr_vecs = BIO_MAX_PAGES, .name = "biovec-max" },
+       { .nr_vecs = BIO_MAX_VECS, .name = "biovec-max" },
 };
 
 static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
@@ -46,7 +46,7 @@ static struct biovec_slab *biovec_slab(unsigned short nr_vecs)
                return &bvec_slabs[1];
        case 65 ... 128:
                return &bvec_slabs[2];
-       case 129 ... BIO_MAX_PAGES:
+       case 129 ... BIO_MAX_VECS:
                return &bvec_slabs[3];
        default:
                BUG();
@@ -151,9 +151,9 @@ out:
 
 void bvec_free(mempool_t *pool, struct bio_vec *bv, unsigned short nr_vecs)
 {
-       BIO_BUG_ON(nr_vecs > BIO_MAX_PAGES);
+       BIO_BUG_ON(nr_vecs > BIO_MAX_VECS);
 
-       if (nr_vecs == BIO_MAX_PAGES)
+       if (nr_vecs == BIO_MAX_VECS)
                mempool_free(bv, pool);
        else if (nr_vecs > BIO_INLINE_VECS)
                kmem_cache_free(biovec_slab(nr_vecs)->slab, bv);
@@ -186,15 +186,15 @@ struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
        /*
         * Try a slab allocation first for all smaller allocations.  If that
         * fails and __GFP_DIRECT_RECLAIM is set retry with the mempool.
-        * The mempool is sized to handle up to BIO_MAX_PAGES entries.
+        * The mempool is sized to handle up to BIO_MAX_VECS entries.
         */
-       if (*nr_vecs < BIO_MAX_PAGES) {
+       if (*nr_vecs < BIO_MAX_VECS) {
                struct bio_vec *bvl;
 
                bvl = kmem_cache_alloc(bvs->slab, bvec_alloc_gfp(gfp_mask));
                if (likely(bvl) || !(gfp_mask & __GFP_DIRECT_RECLAIM))
                        return bvl;
-               *nr_vecs = BIO_MAX_PAGES;
+               *nr_vecs = BIO_MAX_VECS;
        }
 
        return mempool_alloc(pool, gfp_mask);
@@ -277,7 +277,7 @@ static struct bio *__bio_chain_endio(struct bio *bio)
 {
        struct bio *parent = bio->bi_private;
 
-       if (!parent->bi_status)
+       if (bio->bi_status && !parent->bi_status)
                parent->bi_status = bio->bi_status;
        bio_put(bio);
        return parent;
@@ -949,7 +949,7 @@ void bio_release_pages(struct bio *bio, bool mark_dirty)
 }
 EXPORT_SYMBOL_GPL(bio_release_pages);
 
-static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+static void __bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
 {
        WARN_ON_ONCE(bio->bi_max_vecs);
 
@@ -959,11 +959,26 @@ static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
        bio->bi_iter.bi_size = iter->count;
        bio_set_flag(bio, BIO_NO_PAGE_REF);
        bio_set_flag(bio, BIO_CLONED);
+}
 
+static int bio_iov_bvec_set(struct bio *bio, struct iov_iter *iter)
+{
+       __bio_iov_bvec_set(bio, iter);
        iov_iter_advance(iter, iter->count);
        return 0;
 }
 
+static int bio_iov_bvec_set_append(struct bio *bio, struct iov_iter *iter)
+{
+       struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+       struct iov_iter i = *iter;
+
+       iov_iter_truncate(&i, queue_max_zone_append_sectors(q) << 9);
+       __bio_iov_bvec_set(bio, &i);
+       iov_iter_advance(iter, i.count);
+       return 0;
+}
+
 #define PAGE_PTRS_PER_BVEC     (sizeof(struct bio_vec) / sizeof(struct page *))
 
 /**
@@ -1094,8 +1109,8 @@ int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter)
        int ret = 0;
 
        if (iov_iter_is_bvec(iter)) {
-               if (WARN_ON_ONCE(bio_op(bio) == REQ_OP_ZONE_APPEND))
-                       return -EINVAL;
+               if (bio_op(bio) == REQ_OP_ZONE_APPEND)
+                       return bio_iov_bvec_set_append(bio, iter);
                return bio_iov_bvec_set(bio, iter);
        }
 
index 85d5790..3304e84 100644 (file)
@@ -109,6 +109,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
 
        lockdep_assert_held(&blkg->q->queue_lock);
 
+       memset(sum, 0, sizeof(*sum));
        rcu_read_lock();
        blkg_for_each_descendant_pre(pos_blkg, pos_css, blkg) {
                struct blkg_rwstat *rwstat;
@@ -122,7 +123,7 @@ void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol,
                        rwstat = (void *)pos_blkg + off;
 
                for (i = 0; i < BLKG_RWSTAT_NR; i++)
-                       sum->cnt[i] = blkg_rwstat_read_counter(rwstat, i);
+                       sum->cnt[i] += blkg_rwstat_read_counter(rwstat, i);
        }
        rcu_read_unlock();
 }
index 5e75284..fc60ff2 100644 (file)
@@ -59,6 +59,7 @@ EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_remap);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_split);
 EXPORT_TRACEPOINT_SYMBOL_GPL(block_unplug);
+EXPORT_TRACEPOINT_SYMBOL_GPL(block_rq_insert);
 
 DEFINE_IDA(blk_queue_ida);
 
index e8327c5..c322176 100644 (file)
@@ -80,6 +80,7 @@ static struct blk_crypto_keyslot {
 static struct blk_keyslot_manager blk_crypto_ksm;
 static struct workqueue_struct *blk_crypto_wq;
 static mempool_t *blk_crypto_bounce_page_pool;
+static struct bio_set crypto_bio_split;
 
 /*
  * This is the key we set when evicting a keyslot. This *should* be the all 0's
@@ -218,13 +219,14 @@ static bool blk_crypto_split_bio_if_needed(struct bio **bio_ptr)
 
        bio_for_each_segment(bv, bio, iter) {
                num_sectors += bv.bv_len >> SECTOR_SHIFT;
-               if (++i == BIO_MAX_PAGES)
+               if (++i == BIO_MAX_VECS)
                        break;
        }
        if (num_sectors < bio_sectors(bio)) {
                struct bio *split_bio;
 
-               split_bio = bio_split(bio, num_sectors, GFP_NOIO, NULL);
+               split_bio = bio_split(bio, num_sectors, GFP_NOIO,
+                                     &crypto_bio_split);
                if (!split_bio) {
                        bio->bi_status = BLK_STS_RESOURCE;
                        return false;
@@ -538,9 +540,13 @@ static int blk_crypto_fallback_init(void)
 
        prandom_bytes(blank_key, BLK_CRYPTO_MAX_KEY_SIZE);
 
-       err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+       err = bioset_init(&crypto_bio_split, 64, 0, 0);
        if (err)
                goto out;
+
+       err = blk_ksm_init(&blk_crypto_ksm, blk_crypto_num_keyslots);
+       if (err)
+               goto fail_free_bioset;
        err = -ENOMEM;
 
        blk_crypto_ksm.ksm_ll_ops = blk_crypto_ksm_ll_ops;
@@ -591,6 +597,8 @@ fail_free_wq:
        destroy_workqueue(blk_crypto_wq);
 fail_free_ksm:
        blk_ksm_destroy(&blk_crypto_ksm);
+fail_free_bioset:
+       bioset_exit(&crypto_bio_split);
 out:
        return err;
 }
index 752f9c7..7b25613 100644 (file)
@@ -296,7 +296,7 @@ static unsigned int __blkdev_sectors_to_bio_pages(sector_t nr_sects)
 {
        sector_t pages = DIV_ROUND_UP_SECTOR_T(nr_sects, PAGE_SIZE / 512);
 
-       return min(pages, (sector_t)BIO_MAX_PAGES);
+       return min(pages, (sector_t)BIO_MAX_VECS);
 }
 
 static int __blkdev_issue_zero_pages(struct block_device *bdev,
index 21630dc..1ffef78 100644 (file)
@@ -150,9 +150,7 @@ static int bio_copy_user_iov(struct request *rq, struct rq_map_data *map_data,
        bmd->is_our_pages = !map_data;
        bmd->is_null_mapped = (map_data && map_data->null_mapped);
 
-       nr_pages = DIV_ROUND_UP(offset + len, PAGE_SIZE);
-       if (nr_pages > BIO_MAX_PAGES)
-               nr_pages = BIO_MAX_PAGES;
+       nr_pages = bio_max_segs(DIV_ROUND_UP(offset + len, PAGE_SIZE));
 
        ret = -ENOMEM;
        bio = bio_kmalloc(gfp_mask, nr_pages);
@@ -251,7 +249,7 @@ static int bio_map_user_iov(struct request *rq, struct iov_iter *iter,
        if (!iov_iter_count(iter))
                return -EINVAL;
 
-       bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_PAGES));
+       bio = bio_kmalloc(gfp_mask, iov_iter_npages(iter, BIO_MAX_VECS));
        if (!bio)
                return -ENOMEM;
        bio->bi_opf |= req_op(rq);
index ffb4aa0..4d97fb6 100644 (file)
@@ -382,6 +382,14 @@ unsigned int blk_recalc_rq_segments(struct request *rq)
        switch (bio_op(rq->bio)) {
        case REQ_OP_DISCARD:
        case REQ_OP_SECURE_ERASE:
+               if (queue_max_discard_segments(rq->q) > 1) {
+                       struct bio *bio = rq->bio;
+
+                       for_each_bio(bio)
+                               nr_phys_segs++;
+                       return nr_phys_segs;
+               }
+               return 1;
        case REQ_OP_WRITE_ZEROES:
                return 0;
        case REQ_OP_WRITE_SAME:
index 4de03da..271f659 100644 (file)
@@ -292,7 +292,6 @@ static const char *const cmd_flag_name[] = {
 
 #define RQF_NAME(name) [ilog2((__force u32)RQF_##name)] = #name
 static const char *const rqf_name[] = {
-       RQF_NAME(SORTED),
        RQF_NAME(STARTED),
        RQF_NAME(SOFTBARRIER),
        RQF_NAME(FLUSH_SEQ),
@@ -303,7 +302,6 @@ static const char *const rqf_name[] = {
        RQF_NAME(QUIET),
        RQF_NAME(ELVPRIV),
        RQF_NAME(IO_STAT),
-       RQF_NAME(ALLOCED),
        RQF_NAME(PM),
        RQF_NAME(HASHED),
        RQF_NAME(STATS),
index deff4e8..e1e997a 100644 (file)
@@ -384,14 +384,7 @@ bool blk_mq_sched_try_insert_merge(struct request_queue *q, struct request *rq)
 }
 EXPORT_SYMBOL_GPL(blk_mq_sched_try_insert_merge);
 
-void blk_mq_sched_request_inserted(struct request *rq)
-{
-       trace_block_rq_insert(rq);
-}
-EXPORT_SYMBOL_GPL(blk_mq_sched_request_inserted);
-
 static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
-                                      bool has_sched,
                                       struct request *rq)
 {
        /*
@@ -408,9 +401,6 @@ static bool blk_mq_sched_bypass_insert(struct blk_mq_hw_ctx *hctx,
        if ((rq->rq_flags & RQF_FLUSH_SEQ) || blk_rq_is_passthrough(rq))
                return true;
 
-       if (has_sched)
-               rq->rq_flags |= RQF_SORTED;
-
        return false;
 }
 
@@ -424,7 +414,7 @@ void blk_mq_sched_insert_request(struct request *rq, bool at_head,
 
        WARN_ON(e && (rq->tag != BLK_MQ_NO_TAG));
 
-       if (blk_mq_sched_bypass_insert(hctx, !!e, rq)) {
+       if (blk_mq_sched_bypass_insert(hctx, rq)) {
                /*
                 * Firstly normal IO request is inserted to scheduler queue or
                 * sw queue, meantime we add flush request to dispatch queue(
index 0476360..5b18ab9 100644 (file)
@@ -7,7 +7,6 @@
 
 void blk_mq_sched_assign_ioc(struct request *rq);
 
-void blk_mq_sched_request_inserted(struct request *rq);
 bool blk_mq_sched_try_merge(struct request_queue *q, struct bio *bio,
                unsigned int nr_segs, struct request **merged_request);
 bool __blk_mq_sched_bio_merge(struct request_queue *q, struct bio *bio,
index a2283cc..8a5a0d4 100644 (file)
@@ -21,31 +21,6 @@ static inline void blk_pm_mark_last_busy(struct request *rq)
        if (rq->q->dev && !(rq->rq_flags & RQF_PM))
                pm_runtime_mark_last_busy(rq->q->dev);
 }
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-       lockdep_assert_held(&rq->q->queue_lock);
-
-       if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-               rq->q->nr_pending--;
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
-                                     struct request *rq)
-{
-       lockdep_assert_held(&q->queue_lock);
-
-       if (q->dev && !(rq->rq_flags & RQF_PM))
-               q->nr_pending++;
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-       lockdep_assert_held(&rq->q->queue_lock);
-
-       if (rq->q->dev && !(rq->rq_flags & RQF_PM))
-               --rq->q->nr_pending;
-}
 #else
 static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 {
@@ -55,19 +30,6 @@ static inline int blk_pm_resume_queue(const bool pm, struct request_queue *q)
 static inline void blk_pm_mark_last_busy(struct request *rq)
 {
 }
-
-static inline void blk_pm_requeue_request(struct request *rq)
-{
-}
-
-static inline void blk_pm_add_request(struct request_queue *q,
-                                     struct request *rq)
-{
-}
-
-static inline void blk_pm_put_request(struct request *rq)
-{
-}
 #endif
 
 #endif /* _BLOCK_BLK_PM_H_ */
index 7dd8be3..b4aa2f3 100644 (file)
@@ -504,6 +504,14 @@ void blk_queue_io_opt(struct request_queue *q, unsigned int opt)
 }
 EXPORT_SYMBOL(blk_queue_io_opt);
 
+static unsigned int blk_round_down_sectors(unsigned int sectors, unsigned int lbs)
+{
+       sectors = round_down(sectors, lbs >> SECTOR_SHIFT);
+       if (sectors < PAGE_SIZE >> SECTOR_SHIFT)
+               sectors = PAGE_SIZE >> SECTOR_SHIFT;
+       return sectors;
+}
+
 /**
  * blk_stack_limits - adjust queue_limits for stacked devices
  * @t: the stacking driver limits (top device)
@@ -630,6 +638,10 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
                ret = -1;
        }
 
+       t->max_sectors = blk_round_down_sectors(t->max_sectors, t->logical_block_size);
+       t->max_hw_sectors = blk_round_down_sectors(t->max_hw_sectors, t->logical_block_size);
+       t->max_dev_sectors = blk_round_down_sectors(t->max_dev_sectors, t->logical_block_size);
+
        /* Discard alignment and granularity */
        if (b->discard_granularity) {
                alignment = queue_limit_discard_alignment(b, start);
index ae39c7f..0f4f0c8 100644 (file)
@@ -434,10 +434,13 @@ static ssize_t queue_poll_store(struct request_queue *q, const char *page,
        if (ret < 0)
                return ret;
 
-       if (poll_on)
+       if (poll_on) {
                blk_queue_flag_set(QUEUE_FLAG_POLL, q);
-       else
+       } else {
+               blk_mq_freeze_queue(q);
                blk_queue_flag_clear(QUEUE_FLAG_POLL, q);
+               blk_mq_unfreeze_queue(q);
+       }
 
        return ret;
 }
index 833978c..c0276b4 100644 (file)
@@ -240,7 +240,7 @@ int blkdev_zone_mgmt(struct block_device *bdev, enum req_opf op,
                 */
                if (op == REQ_OP_ZONE_RESET &&
                    blkdev_allow_reset_all_zones(bdev, sector, nr_sectors)) {
-                       bio->bi_opf = REQ_OP_ZONE_RESET_ALL;
+                       bio->bi_opf = REQ_OP_ZONE_RESET_ALL | REQ_SYNC;
                        break;
                }
 
@@ -318,6 +318,22 @@ int blkdev_report_zones_ioctl(struct block_device *bdev, fmode_t mode,
        return 0;
 }
 
+static int blkdev_truncate_zone_range(struct block_device *bdev, fmode_t mode,
+                                     const struct blk_zone_range *zrange)
+{
+       loff_t start, end;
+
+       if (zrange->sector + zrange->nr_sectors <= zrange->sector ||
+           zrange->sector + zrange->nr_sectors > get_capacity(bdev->bd_disk))
+               /* Out of range */
+               return -EINVAL;
+
+       start = zrange->sector << SECTOR_SHIFT;
+       end = ((zrange->sector + zrange->nr_sectors) << SECTOR_SHIFT) - 1;
+
+       return truncate_bdev_range(bdev, mode, start, end);
+}
+
 /*
  * BLKRESETZONE, BLKOPENZONE, BLKCLOSEZONE and BLKFINISHZONE ioctl processing.
  * Called from blkdev_ioctl.
@@ -329,6 +345,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
        struct request_queue *q;
        struct blk_zone_range zrange;
        enum req_opf op;
+       int ret;
 
        if (!argp)
                return -EINVAL;
@@ -352,6 +369,11 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
        switch (cmd) {
        case BLKRESETZONE:
                op = REQ_OP_ZONE_RESET;
+
+               /* Invalidate the page cache, including dirty pages. */
+               ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+               if (ret)
+                       return ret;
                break;
        case BLKOPENZONE:
                op = REQ_OP_ZONE_OPEN;
@@ -366,8 +388,20 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, fmode_t mode,
                return -ENOTTY;
        }
 
-       return blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
-                               GFP_KERNEL);
+       ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors,
+                              GFP_KERNEL);
+
+       /*
+        * Invalidate the page cache again for zone reset: writes can only be
+        * direct for zoned devices so concurrent writes would not add any page
+        * to the page cache after/during reset. The page cache may be filled
+        * again due to concurrent reads though and dropping the pages for
+        * these is fine.
+        */
+       if (!ret && cmd == BLKRESETZONE)
+               ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
+
+       return ret;
 }
 
 static inline unsigned long *blk_alloc_zone_bitmap(int node,
index fc55314..6c441f4 100644 (file)
@@ -214,8 +214,7 @@ static void bounce_end_io_read_isa(struct bio *bio)
        __bounce_end_io_read(bio, &isa_page_pool);
 }
 
-static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
-               struct bio_set *bs)
+static struct bio *bounce_clone_bio(struct bio *bio_src)
 {
        struct bvec_iter iter;
        struct bio_vec bv;
@@ -230,10 +229,10 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
         *  - The point of cloning the biovec is to produce a bio with a biovec
         *    the caller can modify: bi_idx and bi_bvec_done should be 0.
         *
-        *  - The original bio could've had more than BIO_MAX_PAGES biovecs; if
+        *  - The original bio could've had more than BIO_MAX_VECS biovecs; if
         *    we tried to clone the whole thing bio_alloc_bioset() would fail.
         *    But the clone should succeed as long as the number of biovecs we
-        *    actually need to allocate is fewer than BIO_MAX_PAGES.
+        *    actually need to allocate is fewer than BIO_MAX_VECS.
         *
         *  - Lastly, bi_vcnt should not be looked at or relied upon by code
         *    that does not own the bio - reason being drivers don't use it for
@@ -242,10 +241,12 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
         *    asking for trouble and would force extra work on
         *    __bio_clone_fast() anyways.
         */
-
-       bio = bio_alloc_bioset(gfp_mask, bio_segments(bio_src), bs);
-       if (!bio)
-               return NULL;
+       if (bio_is_passthrough(bio_src))
+               bio = bio_kmalloc(GFP_NOIO | __GFP_NOFAIL,
+                                 bio_segments(bio_src));
+       else
+               bio = bio_alloc_bioset(GFP_NOIO, bio_segments(bio_src),
+                                      &bounce_bio_set);
        bio->bi_bdev            = bio_src->bi_bdev;
        if (bio_flagged(bio_src, BIO_REMAPPED))
                bio_set_flag(bio, BIO_REMAPPED);
@@ -269,11 +270,11 @@ static struct bio *bounce_clone_bio(struct bio *bio_src, gfp_t gfp_mask,
                break;
        }
 
-       if (bio_crypt_clone(bio, bio_src, gfp_mask) < 0)
+       if (bio_crypt_clone(bio, bio_src, GFP_NOIO) < 0)
                goto err_put;
 
        if (bio_integrity(bio_src) &&
-           bio_integrity_clone(bio, bio_src, gfp_mask) < 0)
+           bio_integrity_clone(bio, bio_src, GFP_NOIO) < 0)
                goto err_put;
 
        bio_clone_blkg_association(bio, bio_src);
@@ -296,10 +297,9 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        unsigned i = 0;
        bool bounce = false;
        int sectors = 0;
-       bool passthrough = bio_is_passthrough(*bio_orig);
 
        bio_for_each_segment(from, *bio_orig, iter) {
-               if (i++ < BIO_MAX_PAGES)
+               if (i++ < BIO_MAX_VECS)
                        sectors += from.bv_len >> 9;
                if (page_to_pfn(from.bv_page) > q->limits.bounce_pfn)
                        bounce = true;
@@ -307,14 +307,14 @@ static void __blk_queue_bounce(struct request_queue *q, struct bio **bio_orig,
        if (!bounce)
                return;
 
-       if (!passthrough && sectors < bio_sectors(*bio_orig)) {
+       if (!bio_is_passthrough(*bio_orig) &&
+           sectors < bio_sectors(*bio_orig)) {
                bio = bio_split(*bio_orig, sectors, GFP_NOIO, &bounce_bio_split);
                bio_chain(bio, *bio_orig);
                submit_bio_noacct(*bio_orig);
                *bio_orig = bio;
        }
-       bio = bounce_clone_bio(*bio_orig, GFP_NOIO, passthrough ? NULL :
-                       &bounce_bio_set);
+       bio = bounce_clone_bio(*bio_orig);
 
        /*
         * Bvec table can't be updated by bio_for_each_segment_all(),
index 36ff45b..8c8f543 100644 (file)
@@ -45,11 +45,10 @@ static void disk_release_events(struct gendisk *disk);
 void set_capacity(struct gendisk *disk, sector_t sectors)
 {
        struct block_device *bdev = disk->part0;
-       unsigned long flags;
 
-       spin_lock_irqsave(&bdev->bd_size_lock, flags);
+       spin_lock(&bdev->bd_size_lock);
        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-       spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
+       spin_unlock(&bdev->bd_size_lock);
 }
 EXPORT_SYMBOL(set_capacity);
 
@@ -74,7 +73,7 @@ bool set_capacity_and_notify(struct gendisk *disk, sector_t size)
                return false;
 
        pr_info("%s: detected capacity change from %lld to %lld\n",
-               disk->disk_name, size, capacity);
+               disk->disk_name, capacity, size);
 
        /*
         * Historically we did not send a uevent for changes to/from an empty
@@ -476,7 +475,7 @@ void disk_uevent(struct gendisk *disk, enum kobject_action action)
        struct disk_part_iter piter;
        struct block_device *part;
 
-       disk_part_iter_init(&piter, disk, DISK_PITER_INCL_PART0);
+       disk_part_iter_init(&piter, disk, DISK_PITER_INCL_EMPTY_PART0);
        while ((part = disk_part_iter_next(&piter)))
                kobject_uevent(bdev_kobj(part), action);
        disk_part_iter_exit(&piter);
@@ -535,10 +534,8 @@ static void register_disk(struct device *parent, struct gendisk *disk,
                kobject_create_and_add("holders", &ddev->kobj);
        disk->slave_dir = kobject_create_and_add("slaves", &ddev->kobj);
 
-       if (disk->flags & GENHD_FL_HIDDEN) {
-               dev_set_uevent_suppress(ddev, 0);
+       if (disk->flags & GENHD_FL_HIDDEN)
                return;
-       }
 
        disk_scan_partitions(disk);
 
index d61d652..ff241e6 100644 (file)
@@ -81,20 +81,27 @@ static int compat_blkpg_ioctl(struct block_device *bdev,
 }
 #endif
 
-static int blkdev_reread_part(struct block_device *bdev)
+static int blkdev_reread_part(struct block_device *bdev, fmode_t mode)
 {
-       int ret;
+       struct block_device *tmp;
 
        if (!disk_part_scan_enabled(bdev->bd_disk) || bdev_is_partition(bdev))
                return -EINVAL;
        if (!capable(CAP_SYS_ADMIN))
                return -EACCES;
 
-       mutex_lock(&bdev->bd_mutex);
-       ret = bdev_disk_changed(bdev, false);
-       mutex_unlock(&bdev->bd_mutex);
+       /*
+        * Reopen the device to revalidate the driver state and force a
+        * partition rescan.
+        */
+       mode &= ~FMODE_EXCL;
+       set_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
 
-       return ret;
+       tmp = blkdev_get_by_dev(bdev->bd_dev, mode, NULL);
+       if (IS_ERR(tmp))
+               return PTR_ERR(tmp);
+       blkdev_put(tmp, mode);
+       return 0;
 }
 
 static int blk_ioctl_discard(struct block_device *bdev, fmode_t mode,
@@ -498,7 +505,7 @@ static int blkdev_common_ioctl(struct block_device *bdev, fmode_t mode,
                bdev->bd_bdi->ra_pages = (arg * 512) / PAGE_SIZE;
                return 0;
        case BLKRRPART:
-               return blkdev_reread_part(bdev);
+               return blkdev_reread_part(bdev, mode);
        case BLKTRACESTART:
        case BLKTRACESTOP:
        case BLKTRACETEARDOWN:
index c25c41d..33d34d6 100644 (file)
@@ -13,6 +13,8 @@
 #include <linux/module.h>
 #include <linux/sbitmap.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -353,19 +355,9 @@ static void kyber_timer_fn(struct timer_list *t)
        }
 }
 
-static unsigned int kyber_sched_tags_shift(struct request_queue *q)
-{
-       /*
-        * All of the hardware queues have the same depth, so we can just grab
-        * the shift of the first one.
-        */
-       return q->queue_hw_ctx[0]->sched_tags->bitmap_tags->sb.shift;
-}
-
 static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
 {
        struct kyber_queue_data *kqd;
-       unsigned int shift;
        int ret = -ENOMEM;
        int i;
 
@@ -400,9 +392,6 @@ static struct kyber_queue_data *kyber_queue_data_alloc(struct request_queue *q)
                kqd->latency_targets[i] = kyber_latency_targets[i];
        }
 
-       shift = kyber_sched_tags_shift(q);
-       kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
-
        return kqd;
 
 err_buckets:
@@ -458,9 +447,19 @@ static void kyber_ctx_queue_init(struct kyber_ctx_queue *kcq)
                INIT_LIST_HEAD(&kcq->rq_list[i]);
 }
 
-static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+static void kyber_depth_updated(struct blk_mq_hw_ctx *hctx)
 {
        struct kyber_queue_data *kqd = hctx->queue->elevator->elevator_data;
+       struct blk_mq_tags *tags = hctx->sched_tags;
+       unsigned int shift = tags->bitmap_tags->sb.shift;
+
+       kqd->async_depth = (1U << shift) * KYBER_ASYNC_PERCENT / 100U;
+
+       sbitmap_queue_min_shallow_depth(tags->bitmap_tags, kqd->async_depth);
+}
+
+static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
+{
        struct kyber_hctx_data *khd;
        int i;
 
@@ -502,8 +501,7 @@ static int kyber_init_hctx(struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
        khd->batching = 0;
 
        hctx->sched_data = khd;
-       sbitmap_queue_min_shallow_depth(hctx->sched_tags->bitmap_tags,
-                                       kqd->async_depth);
+       kyber_depth_updated(hctx);
 
        return 0;
 
@@ -602,7 +600,7 @@ static void kyber_insert_requests(struct blk_mq_hw_ctx *hctx,
                        list_move_tail(&rq->queuelist, head);
                sbitmap_set_bit(&khd->kcq_map[sched_domain],
                                rq->mq_ctx->index_hw[hctx->type]);
-               blk_mq_sched_request_inserted(rq);
+               trace_block_rq_insert(rq);
                spin_unlock(&kcq->lock);
        }
 }
@@ -1022,6 +1020,7 @@ static struct elevator_type kyber_sched = {
                .completed_request = kyber_completed_request,
                .dispatch_request = kyber_dispatch_request,
                .has_work = kyber_has_work,
+               .depth_updated = kyber_depth_updated,
        },
 #ifdef CONFIG_BLK_DEBUG_FS
        .queue_debugfs_attrs = kyber_queue_debugfs_attrs,
index b57470e..f3631a2 100644 (file)
@@ -18,6 +18,8 @@
 #include <linux/rbtree.h>
 #include <linux/sbitmap.h>
 
+#include <trace/events/block.h>
+
 #include "blk.h"
 #include "blk-mq.h"
 #include "blk-mq-debugfs.h"
@@ -496,7 +498,7 @@ static void dd_insert_request(struct blk_mq_hw_ctx *hctx, struct request *rq,
        if (blk_mq_sched_try_insert_merge(q, rq))
                return;
 
-       blk_mq_sched_request_inserted(rq);
+       trace_block_rq_insert(rq);
 
        if (at_head || blk_rq_is_passthrough(rq)) {
                if (at_head)
index f3d9ff2..46f055b 100644 (file)
@@ -88,11 +88,9 @@ static int (*check_part[])(struct parsed_partitions *) = {
 
 static void bdev_set_nr_sectors(struct block_device *bdev, sector_t sectors)
 {
-       unsigned long flags;
-
-       spin_lock_irqsave(&bdev->bd_size_lock, flags);
+       spin_lock(&bdev->bd_size_lock);
        i_size_write(bdev->bd_inode, (loff_t)sectors << SECTOR_SHIFT);
-       spin_unlock_irqrestore(&bdev->bd_size_lock, flags);
+       spin_unlock(&bdev->bd_size_lock);
 }
 
 static struct parsed_partitions *allocate_partitions(struct gendisk *hd)
@@ -325,6 +323,13 @@ static struct block_device *add_partition(struct gendisk *disk, int partno,
        int err;
 
        /*
+        * disk_max_parts() won't be zero, either GENHD_FL_EXT_DEVT is set
+        * or 'minors' is passed to alloc_disk().
+        */
+       if (partno >= disk_max_parts(disk))
+               return ERR_PTR(-EINVAL);
+
+       /*
         * Partitions are not supported on zoned block devices that are used as
         * such.
         */
index 6514f9e..bffe4c6 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/ctype.h>
 #include <linux/err.h>
 #include <linux/seq_file.h>
+#include <linux/uidgid.h>
 #include <keys/system_keyring.h>
 #include "blacklist.h"
 
@@ -37,7 +38,7 @@ static int blacklist_vet_description(const char *desc)
 found_colon:
        desc++;
        for (; *desc; desc++) {
-               if (!isxdigit(*desc))
+               if (!isxdigit(*desc) || isupper(*desc))
                        return -EINVAL;
                n++;
        }
@@ -78,7 +79,7 @@ static struct key_type key_type_blacklist = {
 
 /**
  * mark_hash_blacklisted - Add a hash to the system blacklist
- * @hash - The hash as a hex string with a type prefix (eg. "tbs:23aa429783")
+ * @hash: The hash as a hex string with a type prefix (eg. "tbs:23aa429783")
  */
 int mark_hash_blacklisted(const char *hash)
 {
@@ -156,13 +157,12 @@ static int __init blacklist_init(void)
 
        blacklist_keyring =
                keyring_alloc(".blacklist",
-                             KUIDT_INIT(0), KGIDT_INIT(0),
-                             current_cred(),
+                             GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
                              (KEY_POS_ALL & ~KEY_POS_SETATTR) |
                              KEY_USR_VIEW | KEY_USR_READ |
                              KEY_USR_SEARCH,
                              KEY_ALLOC_NOT_IN_QUOTA |
-                             KEY_FLAG_KEEP,
+                             KEY_ALLOC_SET_KEEP,
                              NULL, NULL);
        if (IS_ERR(blacklist_keyring))
                panic("Can't allocate system blacklist keyring\n");
index 7982911..4b693da 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/cred.h>
 #include <linux/err.h>
 #include <linux/slab.h>
+#include <linux/uidgid.h>
 #include <linux/verification.h>
 #include <keys/asymmetric-type.h>
 #include <keys/system_keyring.h>
@@ -98,7 +99,7 @@ static __init int system_trusted_keyring_init(void)
 
        builtin_trusted_keys =
                keyring_alloc(".builtin_trusted_keys",
-                             KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+                             GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
                              ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
                              KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH),
                              KEY_ALLOC_NOT_IN_QUOTA,
@@ -109,7 +110,7 @@ static __init int system_trusted_keyring_init(void)
 #ifdef CONFIG_SECONDARY_TRUSTED_KEYRING
        secondary_trusted_keys =
                keyring_alloc(".secondary_trusted_keys",
-                             KUIDT_INIT(0), KGIDT_INIT(0), current_cred(),
+                             GLOBAL_ROOT_UID, GLOBAL_ROOT_GID, current_cred(),
                              ((KEY_POS_ALL & ~KEY_POS_SETATTR) |
                               KEY_USR_VIEW | KEY_USR_READ | KEY_USR_SEARCH |
                               KEY_USR_WRITE),
index 15c9c28..5809cc1 100644 (file)
@@ -767,7 +767,7 @@ config CRYPTO_POLY1305_X86_64
 
 config CRYPTO_POLY1305_MIPS
        tristate "Poly1305 authenticator algorithm (MIPS optimized)"
-       depends on CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+       depends on MIPS
        select CRYPTO_ARCH_HAVE_LIB_POLY1305
 
 config CRYPTO_MD4
index 33e77d8..ad8af3d 100644 (file)
@@ -152,7 +152,8 @@ EXPORT_SYMBOL_GPL(asymmetric_key_generate_id);
 
 /**
  * asymmetric_key_id_same - Return true if two asymmetric keys IDs are the same.
- * @kid_1, @kid_2: The key IDs to compare
+ * @kid1: The key ID to compare
+ * @kid2: The key ID to compare
  */
 bool asymmetric_key_id_same(const struct asymmetric_key_id *kid1,
                            const struct asymmetric_key_id *kid2)
@@ -168,7 +169,8 @@ EXPORT_SYMBOL_GPL(asymmetric_key_id_same);
 /**
  * asymmetric_key_id_partial - Return true if two asymmetric keys IDs
  * partially match
- * @kid_1, @kid_2: The key IDs to compare
+ * @kid1: The key ID to compare
+ * @kid2: The key ID to compare
  */
 bool asymmetric_key_id_partial(const struct asymmetric_key_id *kid1,
                               const struct asymmetric_key_id *kid2)
index 6565fdc..e17f7ce 100644 (file)
@@ -41,10 +41,9 @@ struct pkcs7_signed_info {
         *
         * This contains the generated digest of _either_ the Content Data or
         * the Authenticated Attributes [RFC2315 9.3].  If the latter, one of
-        * the attributes contains the digest of the the Content Data within
-        * it.
+        * the attributes contains the digest of the Content Data within it.
         *
-        * THis also contains the issuing cert serial number and issuer's name
+        * This also contains the issuing cert serial number and issuer's name
         * [PKCS#7 or CMS ver 1] or issuing cert's SKID [CMS ver 3].
         */
        struct public_key_signature *sig;
index 61af3c4..b531df2 100644 (file)
@@ -16,7 +16,7 @@
 #include <crypto/public_key.h>
 #include "pkcs7_parser.h"
 
-/**
+/*
  * Check the trust on one PKCS#7 SignedInfo block.
  */
 static int pkcs7_validate_trust_one(struct pkcs7_message *pkcs7,
index ce49820..0b4d07a 100644 (file)
@@ -141,11 +141,10 @@ int pkcs7_get_digest(struct pkcs7_message *pkcs7, const u8 **buf, u32 *len,
        *buf = sinfo->sig->digest;
        *len = sinfo->sig->digest_size;
 
-       for (i = 0; i < HASH_ALGO__LAST; i++)
-               if (!strcmp(hash_algo_name[i], sinfo->sig->hash_algo)) {
-                       *hash_algo = i;
-                       break;
-               }
+       i = match_string(hash_algo_name, HASH_ALGO__LAST,
+                        sinfo->sig->hash_algo);
+       if (i >= 0)
+               *hash_algo = i;
 
        return 0;
 }
index dcecc9f..62c753a 100644 (file)
@@ -6,6 +6,7 @@ menu "Device Drivers"
 source "drivers/amba/Kconfig"
 source "drivers/eisa/Kconfig"
 source "drivers/pci/Kconfig"
+source "drivers/cxl/Kconfig"
 source "drivers/pcmcia/Kconfig"
 source "drivers/rapidio/Kconfig"
 
index fd11b9a..6fba7da 100644 (file)
@@ -27,7 +27,7 @@ obj-y                         += idle/
 obj-y                          += char/ipmi/
 
 obj-$(CONFIG_ACPI)             += acpi/
-obj-$(CONFIG_SFI)              += sfi/
+
 # PnP must come after ACPI since it will eventually need to check if acpi
 # was used and do nothing if so
 obj-$(CONFIG_PNP)              += pnp/
@@ -73,6 +73,7 @@ obj-$(CONFIG_NVM)             += lightnvm/
 obj-y                          += base/ block/ misc/ mfd/ nfc/
 obj-$(CONFIG_LIBNVDIMM)                += nvdimm/
 obj-$(CONFIG_DAX)              += dax/
+obj-$(CONFIG_CXL_BUS)          += cxl/
 obj-$(CONFIG_DMA_SHARED_BUFFER) += dma-buf/
 obj-$(CONFIG_NUBUS)            += nubus/
 obj-y                          += macintosh/
index 403b01d..53580bd 100644 (file)
@@ -27,11 +27,11 @@ static const struct old_serial_port *serstate;
 static int timeouts;
 
 static int spk_serial_out(struct spk_synth *in_synth, const char ch);
-static void spk_serial_send_xchar(char ch);
-static void spk_serial_tiocmset(unsigned int set, unsigned int clear);
-static unsigned char spk_serial_in(void);
-static unsigned char spk_serial_in_nowait(void);
-static void spk_serial_flush_buffer(void);
+static void spk_serial_send_xchar(struct spk_synth *in_synth, char ch);
+static void spk_serial_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear);
+static unsigned char spk_serial_in(struct spk_synth *in_synth);
+static unsigned char spk_serial_in_nowait(struct spk_synth *in_synth);
+static void spk_serial_flush_buffer(struct spk_synth *in_synth);
 static int spk_serial_wait_for_xmitr(struct spk_synth *in_synth);
 
 struct spk_io_ops spk_serial_io_ops = {
@@ -150,7 +150,7 @@ static void start_serial_interrupt(int irq)
        outb(1, speakup_info.port_tts + UART_FCR);      /* Turn FIFO On */
 }
 
-static void spk_serial_send_xchar(char ch)
+static void spk_serial_send_xchar(struct spk_synth *synth, char ch)
 {
        int timeout = SPK_XMITR_TIMEOUT;
 
@@ -162,7 +162,7 @@ static void spk_serial_send_xchar(char ch)
        outb(ch, speakup_info.port_tts);
 }
 
-static void spk_serial_tiocmset(unsigned int set, unsigned int clear)
+static void spk_serial_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear)
 {
        int old = inb(speakup_info.port_tts + UART_MCR);
 
@@ -251,7 +251,7 @@ static int spk_serial_wait_for_xmitr(struct spk_synth *in_synth)
        return 1;
 }
 
-static unsigned char spk_serial_in(void)
+static unsigned char spk_serial_in(struct spk_synth *in_synth)
 {
        int tmout = SPK_SERIAL_TIMEOUT;
 
@@ -265,7 +265,7 @@ static unsigned char spk_serial_in(void)
        return inb_p(speakup_info.port_tts + UART_RX);
 }
 
-static unsigned char spk_serial_in_nowait(void)
+static unsigned char spk_serial_in_nowait(struct spk_synth *in_synth)
 {
        unsigned char lsr;
 
@@ -275,7 +275,7 @@ static unsigned char spk_serial_in_nowait(void)
        return inb_p(speakup_info.port_tts + UART_RX);
 }
 
-static void spk_serial_flush_buffer(void)
+static void spk_serial_flush_buffer(struct spk_synth *in_synth)
 {
        /* TODO: flush the UART 16550 buffer */
 }
@@ -307,7 +307,7 @@ const char *spk_serial_synth_immediate(struct spk_synth *synth,
 }
 EXPORT_SYMBOL_GPL(spk_serial_synth_immediate);
 
-void spk_serial_release(void)
+void spk_serial_release(struct spk_synth *synth)
 {
        spk_stop_serial_interrupt();
        if (speakup_info.port_tts == 0)
index c94328a..c1ec087 100644 (file)
@@ -25,7 +25,7 @@
 #define PROCSPEECH '\r'
 
 static int synth_probe(struct spk_synth *synth);
-static void accent_release(void);
+static void accent_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -294,7 +294,7 @@ static int synth_probe(struct spk_synth *synth)
        return 0;
 }
 
-static void accent_release(void)
+static void accent_release(struct spk_synth *synth)
 {
        spk_stop_serial_interrupt();
        if (speakup_info.port_tts)
index 0877b40..cd63581 100644 (file)
@@ -163,8 +163,8 @@ static void do_catch_up(struct spk_synth *synth)
                full_time_val = full_time->u.n.value;
                spin_unlock_irqrestore(&speakup_info.spinlock, flags);
                if (!synth->io_ops->synth_out(synth, ch)) {
-                       synth->io_ops->tiocmset(0, UART_MCR_RTS);
-                       synth->io_ops->tiocmset(UART_MCR_RTS, 0);
+                       synth->io_ops->tiocmset(synth, 0, UART_MCR_RTS);
+                       synth->io_ops->tiocmset(synth, UART_MCR_RTS, 0);
                        schedule_timeout(msecs_to_jiffies(full_time_val));
                        continue;
                }
index e6a6a96..e89fd72 100644 (file)
@@ -119,8 +119,8 @@ static struct spk_synth synth_audptr = {
 
 static void synth_flush(struct spk_synth *synth)
 {
-       synth->io_ops->flush_buffer();
-       synth->io_ops->send_xchar(SYNTH_CLEAR);
+       synth->io_ops->flush_buffer(synth);
+       synth->io_ops->send_xchar(synth, SYNTH_CLEAR);
        synth->io_ops->synth_out(synth, PROCSPEECH);
 }
 
@@ -130,11 +130,11 @@ static void synth_version(struct spk_synth *synth)
        char synth_id[40] = "";
 
        synth->synth_immediate(synth, "\x05[Q]");
-       synth_id[test] = synth->io_ops->synth_in();
+       synth_id[test] = synth->io_ops->synth_in(synth);
        if (synth_id[test] == 'A') {
                do {
                        /* read version string from synth */
-                       synth_id[++test] = synth->io_ops->synth_in();
+                       synth_id[++test] = synth->io_ops->synth_in(synth);
                } while (synth_id[test] != '\n' && test < 32);
                synth_id[++test] = 0x00;
        }
index 7408eb2..092cfd0 100644 (file)
@@ -218,7 +218,7 @@ static void do_catch_up(struct spk_synth *synth)
 static void synth_flush(struct spk_synth *synth)
 {
        in_escape = 0;
-       synth->io_ops->flush_buffer();
+       synth->io_ops->flush_buffer(synth);
        synth->synth_immediate(synth, "\033P;10z\033\\");
 }
 
index 96f24c8..dec314d 100644 (file)
@@ -125,7 +125,7 @@ enum {      PRIMARY_DIC     = 0, USER_DIC, COMMAND_DIC, ABBREV_DIC };
 #define SYNTH_IO_EXTENT 8
 
 static int synth_probe(struct spk_synth *synth);
-static void dtpc_release(void);
+static void dtpc_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -474,7 +474,7 @@ static int synth_probe(struct spk_synth *synth)
        return 0;
 }
 
-static void dtpc_release(void)
+static void dtpc_release(struct spk_synth *synth)
 {
        spk_stop_serial_interrupt();
        if (speakup_info.port_tts)
index ab6d61e..580ec79 100644 (file)
@@ -78,6 +78,8 @@ static struct kobj_attribute direct_attribute =
        __ATTR(direct, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute full_time_attribute =
        __ATTR(full_time, 0644, spk_var_show, spk_var_store);
+static struct kobj_attribute flush_time_attribute =
+       __ATTR(flush_time, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute jiffy_delta_attribute =
        __ATTR(jiffy_delta, 0644, spk_var_show, spk_var_store);
 static struct kobj_attribute trigger_time_attribute =
@@ -99,6 +101,7 @@ static struct attribute *synth_attrs[] = {
        &delay_time_attribute.attr,
        &direct_attribute.attr,
        &full_time_attribute.attr,
+       &flush_time_attribute.attr,
        &jiffy_delta_attribute.attr,
        &trigger_time_attribute.attr,
        NULL,   /* need to NULL terminate the list of attributes */
@@ -118,6 +121,7 @@ static struct spk_synth synth_dectlk = {
        .trigger = 50,
        .jiffies = 50,
        .full = 40000,
+       .flush_time = 4000,
        .dev_name = SYNTH_DEFAULT_DEV,
        .startup = SYNTH_START,
        .checkval = SYNTH_CHECK,
@@ -200,18 +204,23 @@ static void do_catch_up(struct spk_synth *synth)
        static u_char last = '\0';
        unsigned long flags;
        unsigned long jiff_max;
-       unsigned long timeout = msecs_to_jiffies(4000);
+       unsigned long timeout;
        DEFINE_WAIT(wait);
        struct var_t *jiffy_delta;
        struct var_t *delay_time;
+       struct var_t *flush_time;
        int jiffy_delta_val;
        int delay_time_val;
+       int timeout_val;
 
        jiffy_delta = spk_get_var(JIFFY);
        delay_time = spk_get_var(DELAY);
+       flush_time = spk_get_var(FLUSH);
        spin_lock_irqsave(&speakup_info.spinlock, flags);
        jiffy_delta_val = jiffy_delta->u.n.value;
+       timeout_val = flush_time->u.n.value;
        spin_unlock_irqrestore(&speakup_info.spinlock, flags);
+       timeout = msecs_to_jiffies(timeout_val);
        jiff_max = jiffies + jiffy_delta_val;
 
        while (!kthread_should_stop()) {
@@ -289,7 +298,7 @@ static void synth_flush(struct spk_synth *synth)
                synth->io_ops->synth_out(synth, ']');
        in_escape = 0;
        is_flushing = 1;
-       synth->io_ops->flush_buffer();
+       synth->io_ops->flush_buffer(synth);
        synth->io_ops->synth_out(synth, SYNTH_CLEAR);
 }
 
index dbebed0..92838d3 100644 (file)
@@ -24,7 +24,7 @@
 #define PROCSPEECH 0x00
 
 static int synth_probe(struct spk_synth *synth);
-static void dtlk_release(void);
+static void dtlk_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -365,7 +365,7 @@ static int synth_probe(struct spk_synth *synth)
        return 0;
 }
 
-static void dtlk_release(void)
+static void dtlk_release(struct spk_synth *synth)
 {
        spk_stop_serial_interrupt();
        if (speakup_info.port_tts)
index 414827e..311f4aa 100644 (file)
@@ -24,7 +24,7 @@
 #define SYNTH_CLEAR 0x03
 
 static int synth_probe(struct spk_synth *synth);
-static void keynote_release(void);
+static void keynote_release(struct spk_synth *synth);
 static const char *synth_immediate(struct spk_synth *synth, const char *buf);
 static void do_catch_up(struct spk_synth *synth);
 static void synth_flush(struct spk_synth *synth);
@@ -295,7 +295,7 @@ static int synth_probe(struct spk_synth *synth)
        return 0;
 }
 
-static void keynote_release(void)
+static void keynote_release(struct spk_synth *synth)
 {
        spk_stop_serial_interrupt();
        if (synth_port)
index 3c59519..3e59b38 100644 (file)
@@ -132,7 +132,7 @@ static void synth_interrogate(struct spk_synth *synth)
 
        synth->synth_immediate(synth, "\x18\x01?");
        for (i = 0; i < 50; i++) {
-               buf[i] = synth->io_ops->synth_in();
+               buf[i] = synth->io_ops->synth_in(synth);
                if (i > 2 && buf[i] == 0x7f)
                        break;
        }
index 9a70295..c3f97c5 100644 (file)
@@ -24,7 +24,7 @@
 #define CLEAR_SYNTH 0x18
 
 static int softsynth_probe(struct spk_synth *synth);
-static void softsynth_release(void);
+static void softsynth_release(struct spk_synth *synth);
 static int softsynth_is_alive(struct spk_synth *synth);
 static unsigned char get_index(struct spk_synth *synth);
 
@@ -402,7 +402,7 @@ static int softsynth_probe(struct spk_synth *synth)
        return 0;
 }
 
-static void softsynth_release(void)
+static void softsynth_release(struct spk_synth *synth)
 {
        misc_deregister(&synth_device);
        misc_deregister(&synthu_device);
index 6e933bf..bd3d8dc 100644 (file)
@@ -117,8 +117,8 @@ static struct spk_synth synth_spkout = {
 
 static void synth_flush(struct spk_synth *synth)
 {
-       synth->io_ops->flush_buffer();
-       synth->io_ops->send_xchar(SYNTH_CLEAR);
+       synth->io_ops->flush_buffer(synth);
+       synth->io_ops->send_xchar(synth, SYNTH_CLEAR);
 }
 
 module_param_named(ser, synth_spkout.ser, int, 0444);
index 0f4bcbe..9da57ea 100644 (file)
@@ -34,8 +34,8 @@
 
 const struct old_serial_port *spk_serial_init(int index);
 void spk_stop_serial_interrupt(void);
-void spk_serial_release(void);
-void spk_ttyio_release(void);
+void spk_serial_release(struct spk_synth *synth);
+void spk_ttyio_release(struct spk_synth *synth);
 void spk_ttyio_register_ldisc(void);
 void spk_ttyio_unregister_ldisc(void);
 
index 835d174..9af1d4c 100644 (file)
@@ -12,14 +12,15 @@ struct spk_ldisc_data {
        char buf;
        struct completion completion;
        bool buf_free;
+       struct spk_synth *synth;
 };
 
-static struct spk_synth *spk_ttyio_synth;
-static struct tty_struct *speakup_tty;
-/* mutex to protect against speakup_tty disappearing from underneath us while
- * we are using it. this can happen when the device physically unplugged,
- * while in use. it also serialises access to speakup_tty.
+/*
+ * This allows to catch within spk_ttyio_ldisc_open whether it is getting set
+ * on for a speakup-driven device.
  */
+static struct tty_struct *speakup_tty;
+/* This mutex serializes the use of such global speakup_tty variable */
 static DEFINE_MUTEX(speakup_tty_mutex);
 
 static int ser_to_dev(int ser, dev_t *dev_no)
@@ -67,22 +68,20 @@ static int spk_ttyio_ldisc_open(struct tty_struct *tty)
 
 static void spk_ttyio_ldisc_close(struct tty_struct *tty)
 {
-       mutex_lock(&speakup_tty_mutex);
-       kfree(speakup_tty->disc_data);
-       speakup_tty = NULL;
-       mutex_unlock(&speakup_tty_mutex);
+       kfree(tty->disc_data);
 }
 
 static int spk_ttyio_receive_buf2(struct tty_struct *tty,
                                  const unsigned char *cp, char *fp, int count)
 {
        struct spk_ldisc_data *ldisc_data = tty->disc_data;
+       struct spk_synth *synth = ldisc_data->synth;
 
-       if (spk_ttyio_synth->read_buff_add) {
+       if (synth->read_buff_add) {
                int i;
 
                for (i = 0; i < count; i++)
-                       spk_ttyio_synth->read_buff_add(cp[i]);
+                       synth->read_buff_add(cp[i]);
 
                return count;
        }
@@ -114,11 +113,11 @@ static struct tty_ldisc_ops spk_ttyio_ldisc_ops = {
 
 static int spk_ttyio_out(struct spk_synth *in_synth, const char ch);
 static int spk_ttyio_out_unicode(struct spk_synth *in_synth, u16 ch);
-static void spk_ttyio_send_xchar(char ch);
-static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear);
-static unsigned char spk_ttyio_in(void);
-static unsigned char spk_ttyio_in_nowait(void);
-static void spk_ttyio_flush_buffer(void);
+static void spk_ttyio_send_xchar(struct spk_synth *in_synth, char ch);
+static void spk_ttyio_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear);
+static unsigned char spk_ttyio_in(struct spk_synth *in_synth);
+static unsigned char spk_ttyio_in_nowait(struct spk_synth *in_synth);
+static void spk_ttyio_flush_buffer(struct spk_synth *in_synth);
 static int spk_ttyio_wait_for_xmitr(struct spk_synth *in_synth);
 
 struct spk_io_ops spk_ttyio_ops = {
@@ -187,13 +186,17 @@ static int spk_ttyio_initialise_ldisc(struct spk_synth *synth)
        mutex_lock(&speakup_tty_mutex);
        speakup_tty = tty;
        ret = tty_set_ldisc(tty, N_SPEAKUP);
-       if (ret)
-               speakup_tty = NULL;
+       speakup_tty = NULL;
        mutex_unlock(&speakup_tty_mutex);
 
-       if (!ret)
+       if (!ret) {
                /* Success */
+               struct spk_ldisc_data *ldisc_data = tty->disc_data;
+
+               ldisc_data->synth = synth;
+               synth->dev = tty;
                return 0;
+       }
 
        pr_err("speakup: Failed to set N_SPEAKUP on tty\n");
 
@@ -221,29 +224,30 @@ void spk_ttyio_unregister_ldisc(void)
 
 static int spk_ttyio_out(struct spk_synth *in_synth, const char ch)
 {
-       mutex_lock(&speakup_tty_mutex);
-       if (in_synth->alive && speakup_tty && speakup_tty->ops->write) {
-               int ret = speakup_tty->ops->write(speakup_tty, &ch, 1);
-
-               mutex_unlock(&speakup_tty_mutex);
-               if (ret == 0)
-                       /* No room */
-                       return 0;
-               if (ret < 0) {
-                       pr_warn("%s: I/O error, deactivating speakup\n",
-                               in_synth->long_name);
-                       /* No synth any more, so nobody will restart TTYs,
-                        * and we thus need to do it ourselves.  Now that there
-                        * is no synth we can let application flood anyway
-                        */
-                       in_synth->alive = 0;
-                       speakup_start_ttys();
-                       return 0;
-               }
+       struct tty_struct *tty = in_synth->dev;
+       int ret;
+
+       if (!in_synth->alive || !tty->ops->write)
+               return 0;
+
+       ret = tty->ops->write(tty, &ch, 1);
+
+       if (ret == 0)
+               /* No room */
+               return 0;
+
+       if (ret > 0)
+               /* Success */
                return 1;
-       }
 
-       mutex_unlock(&speakup_tty_mutex);
+       pr_warn("%s: I/O error, deactivating speakup\n",
+               in_synth->long_name);
+       /* No synth any more, so nobody will restart TTYs,
+        * and we thus need to do it ourselves.  Now that there
+        * is no synth we can let application flood anyway
+        */
+       in_synth->alive = 0;
+       speakup_start_ttys();
        return 0;
 }
 
@@ -264,47 +268,20 @@ static int spk_ttyio_out_unicode(struct spk_synth *in_synth, u16 ch)
        return ret;
 }
 
-static int check_tty(struct tty_struct *tty)
-{
-       if (!tty) {
-               pr_warn("%s: I/O error, deactivating speakup\n",
-                       spk_ttyio_synth->long_name);
-               /* No synth any more, so nobody will restart TTYs, and we thus
-                * need to do it ourselves.  Now that there is no synth we can
-                * let application flood anyway
-                */
-               spk_ttyio_synth->alive = 0;
-               speakup_start_ttys();
-               return 1;
-       }
-
-       return 0;
-}
-
-static void spk_ttyio_send_xchar(char ch)
+static void spk_ttyio_send_xchar(struct spk_synth *in_synth, char ch)
 {
-       mutex_lock(&speakup_tty_mutex);
-       if (check_tty(speakup_tty)) {
-               mutex_unlock(&speakup_tty_mutex);
-               return;
-       }
+       struct tty_struct *tty = in_synth->dev;
 
-       if (speakup_tty->ops->send_xchar)
-               speakup_tty->ops->send_xchar(speakup_tty, ch);
-       mutex_unlock(&speakup_tty_mutex);
+       if (tty->ops->send_xchar)
+               tty->ops->send_xchar(tty, ch);
 }
 
-static void spk_ttyio_tiocmset(unsigned int set, unsigned int clear)
+static void spk_ttyio_tiocmset(struct spk_synth *in_synth, unsigned int set, unsigned int clear)
 {
-       mutex_lock(&speakup_tty_mutex);
-       if (check_tty(speakup_tty)) {
-               mutex_unlock(&speakup_tty_mutex);
-               return;
-       }
+       struct tty_struct *tty = in_synth->dev;
 
-       if (speakup_tty->ops->tiocmset)
-               speakup_tty->ops->tiocmset(speakup_tty, set, clear);
-       mutex_unlock(&speakup_tty_mutex);
+       if (tty->ops->tiocmset)
+               tty->ops->tiocmset(tty, set, clear);
 }
 
 static int spk_ttyio_wait_for_xmitr(struct spk_synth *in_synth)
@@ -312,9 +289,10 @@ static int spk_ttyio_wait_for_xmitr(struct spk_synth *in_synth)
        return 1;
 }
 
-static unsigned char ttyio_in(int timeout)
+static unsigned char ttyio_in(struct spk_synth *in_synth, int timeout)
 {
-       struct spk_ldisc_data *ldisc_data = speakup_tty->disc_data;
+       struct tty_struct *tty = in_synth->dev;
+       struct spk_ldisc_data *ldisc_data = tty->disc_data;
        char rv;
 
        if (!timeout) {
@@ -334,35 +312,29 @@ static unsigned char ttyio_in(int timeout)
        mb();
        ldisc_data->buf_free = true;
        /* Let TTY push more characters */
-       tty_schedule_flip(speakup_tty->port);
+       tty_schedule_flip(tty->port);
 
        return rv;
 }
 
-static unsigned char spk_ttyio_in(void)
+static unsigned char spk_ttyio_in(struct spk_synth *in_synth)
 {
-       return ttyio_in(SPK_SYNTH_TIMEOUT);
+       return ttyio_in(in_synth, SPK_SYNTH_TIMEOUT);
 }
 
-static unsigned char spk_ttyio_in_nowait(void)
+static unsigned char spk_ttyio_in_nowait(struct spk_synth *in_synth)
 {
-       u8 rv = ttyio_in(0);
+       u8 rv = ttyio_in(in_synth, 0);
 
        return (rv == 0xff) ? 0 : rv;
 }
 
-static void spk_ttyio_flush_buffer(void)
+static void spk_ttyio_flush_buffer(struct spk_synth *in_synth)
 {
-       mutex_lock(&speakup_tty_mutex);
-       if (check_tty(speakup_tty)) {
-               mutex_unlock(&speakup_tty_mutex);
-               return;
-       }
+       struct tty_struct *tty = in_synth->dev;
 
-       if (speakup_tty->ops->flush_buffer)
-               speakup_tty->ops->flush_buffer(speakup_tty);
-
-       mutex_unlock(&speakup_tty_mutex);
+       if (tty->ops->flush_buffer)
+               tty->ops->flush_buffer(tty);
 }
 
 int spk_ttyio_synth_probe(struct spk_synth *synth)
@@ -373,37 +345,38 @@ int spk_ttyio_synth_probe(struct spk_synth *synth)
                return rv;
 
        synth->alive = 1;
-       spk_ttyio_synth = synth;
 
        return 0;
 }
 EXPORT_SYMBOL_GPL(spk_ttyio_synth_probe);
 
-void spk_ttyio_release(void)
+void spk_ttyio_release(struct spk_synth *in_synth)
 {
-       if (!speakup_tty)
-               return;
+       struct tty_struct *tty = in_synth->dev;
 
-       tty_lock(speakup_tty);
+       tty_lock(tty);
 
-       if (speakup_tty->ops->close)
-               speakup_tty->ops->close(speakup_tty, NULL);
+       if (tty->ops->close)
+               tty->ops->close(tty, NULL);
+
+       tty_ldisc_flush(tty);
+       tty_unlock(tty);
+       tty_kclose(tty);
 
-       tty_ldisc_flush(speakup_tty);
-       tty_unlock(speakup_tty);
-       tty_kclose(speakup_tty);
+       in_synth->dev = NULL;
 }
 EXPORT_SYMBOL_GPL(spk_ttyio_release);
 
-const char *spk_ttyio_synth_immediate(struct spk_synth *synth, const char *buff)
+const char *spk_ttyio_synth_immediate(struct spk_synth *in_synth, const char *buff)
 {
+       struct tty_struct *tty = in_synth->dev;
        u_char ch;
 
        while ((ch = *buff)) {
                if (ch == '\n')
-                       ch = synth->procspeech;
-               if (tty_write_room(speakup_tty) < 1 ||
-                   !synth->io_ops->synth_out(synth, ch))
+                       ch = in_synth->procspeech;
+               if (tty_write_room(tty) < 1 ||
+                   !in_synth->io_ops->synth_out(in_synth, ch))
                        return buff;
                buff++;
        }
index 91fca30..6a96ad9 100644 (file)
@@ -48,7 +48,7 @@ enum var_id_t {
        ATTRIB_BLEEP, BLEEPS,
        RATE, PITCH, VOL, TONE, PUNCT, VOICE, FREQUENCY, LANG,
        DIRECT, PAUSE,
-       CAPS_START, CAPS_STOP, CHARTAB, INFLECTION,
+       CAPS_START, CAPS_STOP, CHARTAB, INFLECTION, FLUSH,
        MAXVARS
 };
 
@@ -157,11 +157,11 @@ struct spk_synth;
 struct spk_io_ops {
        int (*synth_out)(struct spk_synth *synth, const char ch);
        int (*synth_out_unicode)(struct spk_synth *synth, u16 ch);
-       void (*send_xchar)(char ch);
-       void (*tiocmset)(unsigned int set, unsigned int clear);
-       unsigned char (*synth_in)(void);
-       unsigned char (*synth_in_nowait)(void);
-       void (*flush_buffer)(void);
+       void (*send_xchar)(struct spk_synth *synth, char ch);
+       void (*tiocmset)(struct spk_synth *synth, unsigned int set, unsigned int clear);
+       unsigned char (*synth_in)(struct spk_synth *synth);
+       unsigned char (*synth_in_nowait)(struct spk_synth *synth);
+       void (*flush_buffer)(struct spk_synth *synth);
        int (*wait_for_xmitr)(struct spk_synth *synth);
 };
 
@@ -178,6 +178,7 @@ struct spk_synth {
        int trigger;
        int jiffies;
        int full;
+       int flush_time;
        int ser;
        char *dev_name;
        short flags;
@@ -188,7 +189,7 @@ struct spk_synth {
        int *default_vol;
        struct spk_io_ops *io_ops;
        int (*probe)(struct spk_synth *synth);
-       void (*release)(void);
+       void (*release)(struct spk_synth *synth);
        const char *(*synth_immediate)(struct spk_synth *synth,
                                       const char *buff);
        void (*catch_up)(struct spk_synth *synth);
@@ -200,6 +201,8 @@ struct spk_synth {
        struct synth_indexing indexing;
        int alive;
        struct attribute_group attributes;
+
+       void *dev;
 };
 
 /*
index ac47dba..2b86996 100644 (file)
@@ -137,14 +137,14 @@ EXPORT_SYMBOL_GPL(spk_do_catch_up_unicode);
 
 void spk_synth_flush(struct spk_synth *synth)
 {
-       synth->io_ops->flush_buffer();
+       synth->io_ops->flush_buffer(synth);
        synth->io_ops->synth_out(synth, synth->clear);
 }
 EXPORT_SYMBOL_GPL(spk_synth_flush);
 
 unsigned char spk_synth_get_index(struct spk_synth *synth)
 {
-       return synth->io_ops->synth_in_nowait();
+       return synth->io_ops->synth_in_nowait(synth);
 }
 EXPORT_SYMBOL_GPL(spk_synth_get_index);
 
@@ -348,6 +348,7 @@ struct var_t synth_time_vars[] = {
        { TRIGGER, .u.n = {NULL, 20, 10, 2000, 0, 0, NULL } },
        { JIFFY, .u.n = {NULL, 50, 20, 200, 0, 0, NULL } },
        { FULL, .u.n = {NULL, 400, 200, 60000, 0, 0, NULL } },
+       { FLUSH, .u.n = {NULL, 4000, 100, 4000, 0, 0, NULL } },
        V_LAST_VAR
 };
 
@@ -408,6 +409,8 @@ static int do_synth_init(struct spk_synth *in_synth)
                synth_time_vars[2].u.n.default_val = synth->jiffies;
        synth_time_vars[3].u.n.value =
                synth_time_vars[3].u.n.default_val = synth->full;
+       synth_time_vars[4].u.n.value =
+               synth_time_vars[4].u.n.default_val = synth->flush_time;
        synth_printf("%s", synth->init);
        for (var = synth->vars;
                (var->var_id >= 0) && (var->var_id < MAXVARS); var++)
@@ -440,7 +443,7 @@ void synth_release(void)
                sysfs_remove_group(speakup_kobj, &synth->attributes);
        for (var = synth->vars; var->var_id != MAXVARS; var++)
                speakup_unregister_var(var->var_id);
-       synth->release();
+       synth->release(synth);
        synth = NULL;
 }
 
index d7f6bec..067c0da 100644 (file)
@@ -23,6 +23,7 @@ static struct st_var_header var_headers[] = {
        { "trigger_time", TRIGGER, VAR_TIME, NULL, NULL },
        { "jiffy_delta", JIFFY, VAR_TIME, NULL, NULL },
        { "full_time", FULL, VAR_TIME, NULL, NULL },
+       { "flush_time", FLUSH, VAR_TIME, NULL, NULL },
        { "spell_delay", SPELL_DELAY, VAR_NUM, &spk_spell_delay, NULL },
        { "bleeps", BLEEPS, VAR_NUM, &spk_bleeps, NULL },
        { "attrib_bleep", ATTRIB_BLEEP, VAR_NUM, &spk_attrib_bleep, NULL },
index fc06945..eedec61 100644 (file)
@@ -87,6 +87,14 @@ config ACPI_SPCR_TABLE
          This table provides information about the configuration of the
          earlycon console.
 
+config ACPI_FPDT
+       bool "ACPI Firmware Performance Data Table (FPDT) support"
+       depends on X86_64
+       help
+         Enable support for the Firmware Performance Data Table (FPDT).
+         This table provides information on the timing of the system
+         boot, S3 suspend and S3 resume firmware code paths.
+
 config ACPI_LPIT
        bool
        depends on X86_64
@@ -327,21 +335,7 @@ config ACPI_THERMAL
          the module will be called thermal.
 
 config ACPI_PLATFORM_PROFILE
-       tristate "ACPI Platform Profile Driver"
-       default m
-       help
-         This driver adds support for platform-profiles on platforms that
-         support it.
-
-         Platform-profiles can be used to control the platform behaviour. For
-         example whether to operate in a lower power mode, in a higher
-         power performance mode or between the two.
-
-         This driver provides the sysfs interface and is used as the registration
-         point for platform specific drivers.
-
-         Which profiles are supported is determined on a per-platform basis and
-         should be obtained from the platform specific driver.
+       tristate
 
 config ACPI_CUSTOM_DSDT_FILE
        string "Custom DSDT Table file to include"
index 52b627c..700b41a 100644 (file)
@@ -57,6 +57,7 @@ acpi-$(CONFIG_X86)            += x86/utils.o
 acpi-$(CONFIG_X86)             += x86/s2idle.o
 acpi-$(CONFIG_DEBUG_FS)                += debugfs.o
 acpi-y                         += acpi_lpat.o
+acpi-$(CONFIG_ACPI_FPDT)       += acpi_fpdt.o
 acpi-$(CONFIG_ACPI_LPIT)       += acpi_lpit.o
 acpi-$(CONFIG_ACPI_GENERIC_GSI) += irq.o
 acpi-$(CONFIG_ACPI_WATCHDOG)   += acpi_watchdog.o
diff --git a/drivers/acpi/acpi_fpdt.c b/drivers/acpi/acpi_fpdt.c
new file mode 100644 (file)
index 0000000..a89a806
--- /dev/null
@@ -0,0 +1,264 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+/*
+ * FPDT support for exporting boot and suspend/resume performance data
+ *
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ */
+
+#define pr_fmt(fmt) "ACPI FPDT: " fmt
+
+#include <linux/acpi.h>
+
+/*
+ * FPDT contains ACPI table header and a number of fpdt_subtable_entries.
+ * Each fpdt_subtable_entry points to a subtable: FBPT or S3PT.
+ * Each FPDT subtable (FBPT/S3PT) is composed of a fpdt_subtable_header
+ * and a number of fpdt performance records.
+ * Each FPDT performance record is composed of a fpdt_record_header and
+ * performance data fields, for boot or suspend or resume phase.
+ */
+enum fpdt_subtable_type {
+       SUBTABLE_FBPT,
+       SUBTABLE_S3PT,
+};
+
+struct fpdt_subtable_entry {
+       u16 type;               /* refer to enum fpdt_subtable_type */
+       u8 length;
+       u8 revision;
+       u32 reserved;
+       u64 address;            /* physical address of the S3PT/FBPT table */
+};
+
+struct fpdt_subtable_header {
+       u32 signature;
+       u32 length;
+};
+
+enum fpdt_record_type {
+       RECORD_S3_RESUME,
+       RECORD_S3_SUSPEND,
+       RECORD_BOOT,
+};
+
+struct fpdt_record_header {
+       u16 type;               /* refer to enum fpdt_record_type */
+       u8 length;
+       u8 revision;
+};
+
+struct resume_performance_record {
+       struct fpdt_record_header header;
+       u32 resume_count;
+       u64 resume_prev;
+       u64 resume_avg;
+} __attribute__((packed));
+
+struct boot_performance_record {
+       struct fpdt_record_header header;
+       u32 reserved;
+       u64 firmware_start;
+       u64 bootloader_load;
+       u64 bootloader_launch;
+       u64 exitbootservice_start;
+       u64 exitbootservice_end;
+} __attribute__((packed));
+
+struct suspend_performance_record {
+       struct fpdt_record_header header;
+       u64 suspend_start;
+       u64 suspend_end;
+} __attribute__((packed));
+
+
+static struct resume_performance_record *record_resume;
+static struct suspend_performance_record *record_suspend;
+static struct boot_performance_record *record_boot;
+
+#define FPDT_ATTR(phase, name) \
+static ssize_t name##_show(struct kobject *kobj,       \
+                struct kobj_attribute *attr, char *buf)        \
+{      \
+       return sprintf(buf, "%llu\n", record_##phase->name);    \
+}      \
+static struct kobj_attribute name##_attr =     \
+__ATTR(name##_ns, 0444, name##_show, NULL)
+
+FPDT_ATTR(resume, resume_prev);
+FPDT_ATTR(resume, resume_avg);
+FPDT_ATTR(suspend, suspend_start);
+FPDT_ATTR(suspend, suspend_end);
+FPDT_ATTR(boot, firmware_start);
+FPDT_ATTR(boot, bootloader_load);
+FPDT_ATTR(boot, bootloader_launch);
+FPDT_ATTR(boot, exitbootservice_start);
+FPDT_ATTR(boot, exitbootservice_end);
+
+static ssize_t resume_count_show(struct kobject *kobj,
+                                struct kobj_attribute *attr, char *buf)
+{
+       return sprintf(buf, "%u\n", record_resume->resume_count);
+}
+
+static struct kobj_attribute resume_count_attr =
+__ATTR_RO(resume_count);
+
+static struct attribute *resume_attrs[] = {
+       &resume_count_attr.attr,
+       &resume_prev_attr.attr,
+       &resume_avg_attr.attr,
+       NULL
+};
+
+static const struct attribute_group resume_attr_group = {
+       .attrs = resume_attrs,
+       .name = "resume",
+};
+
+static struct attribute *suspend_attrs[] = {
+       &suspend_start_attr.attr,
+       &suspend_end_attr.attr,
+       NULL
+};
+
+static const struct attribute_group suspend_attr_group = {
+       .attrs = suspend_attrs,
+       .name = "suspend",
+};
+
+static struct attribute *boot_attrs[] = {
+       &firmware_start_attr.attr,
+       &bootloader_load_attr.attr,
+       &bootloader_launch_attr.attr,
+       &exitbootservice_start_attr.attr,
+       &exitbootservice_end_attr.attr,
+       NULL
+};
+
+static const struct attribute_group boot_attr_group = {
+       .attrs = boot_attrs,
+       .name = "boot",
+};
+
+static struct kobject *fpdt_kobj;
+
+static int fpdt_process_subtable(u64 address, u32 subtable_type)
+{
+       struct fpdt_subtable_header *subtable_header;
+       struct fpdt_record_header *record_header;
+       char *signature = (subtable_type == SUBTABLE_FBPT ? "FBPT" : "S3PT");
+       u32 length, offset;
+       int result;
+
+       subtable_header = acpi_os_map_memory(address, sizeof(*subtable_header));
+       if (!subtable_header)
+               return -ENOMEM;
+
+       if (strncmp((char *)&subtable_header->signature, signature, 4)) {
+               pr_info(FW_BUG "subtable signature and type mismatch!\n");
+               return -EINVAL;
+       }
+
+       length = subtable_header->length;
+       acpi_os_unmap_memory(subtable_header, sizeof(*subtable_header));
+
+       subtable_header = acpi_os_map_memory(address, length);
+       if (!subtable_header)
+               return -ENOMEM;
+
+       offset = sizeof(*subtable_header);
+       while (offset < length) {
+               record_header = (void *)subtable_header + offset;
+               offset += record_header->length;
+
+               switch (record_header->type) {
+               case RECORD_S3_RESUME:
+                       if (subtable_type != SUBTABLE_S3PT) {
+                               pr_err(FW_BUG "Invalid record %d for subtable %s\n",
+                                    record_header->type, signature);
+                               return -EINVAL;
+                       }
+                       if (record_resume) {
+                               pr_err("Duplicate resume performance record found.\n");
+                               continue;
+                       }
+                       record_resume = (struct resume_performance_record *)record_header;
+                       result = sysfs_create_group(fpdt_kobj, &resume_attr_group);
+                       if (result)
+                               return result;
+                       break;
+               case RECORD_S3_SUSPEND:
+                       if (subtable_type != SUBTABLE_S3PT) {
+                               pr_err(FW_BUG "Invalid %d for subtable %s\n",
+                                    record_header->type, signature);
+                               continue;
+                       }
+                       if (record_suspend) {
+                               pr_err("Duplicate suspend performance record found.\n");
+                               continue;
+                       }
+                       record_suspend = (struct suspend_performance_record *)record_header;
+                       result = sysfs_create_group(fpdt_kobj, &suspend_attr_group);
+                       if (result)
+                               return result;
+                       break;
+               case RECORD_BOOT:
+                       if (subtable_type != SUBTABLE_FBPT) {
+                               pr_err(FW_BUG "Invalid %d for subtable %s\n",
+                                    record_header->type, signature);
+                               return -EINVAL;
+                       }
+                       if (record_boot) {
+                               pr_err("Duplicate boot performance record found.\n");
+                               continue;
+                       }
+                       record_boot = (struct boot_performance_record *)record_header;
+                       result = sysfs_create_group(fpdt_kobj, &boot_attr_group);
+                       if (result)
+                               return result;
+                       break;
+
+               default:
+                       pr_err(FW_BUG "Invalid record %d found.\n", record_header->type);
+                       return -EINVAL;
+               }
+       }
+       return 0;
+}
+
+static int __init acpi_init_fpdt(void)
+{
+       acpi_status status;
+       struct acpi_table_header *header;
+       struct fpdt_subtable_entry *subtable;
+       u32 offset = sizeof(*header);
+
+       status = acpi_get_table(ACPI_SIG_FPDT, 0, &header);
+
+       if (ACPI_FAILURE(status))
+               return 0;
+
+       fpdt_kobj = kobject_create_and_add("fpdt", acpi_kobj);
+       if (!fpdt_kobj)
+               return -ENOMEM;
+
+       while (offset < header->length) {
+               subtable = (void *)header + offset;
+               switch (subtable->type) {
+               case SUBTABLE_FBPT:
+               case SUBTABLE_S3PT:
+                       fpdt_process_subtable(subtable->address,
+                                             subtable->type);
+                       break;
+               default:
+                       pr_info(FW_BUG "Invalid subtable type %d found.\n",
+                              subtable->type);
+                       break;
+               }
+               offset += sizeof(*subtable);
+       }
+       return 0;
+}
+
+fs_initcall(acpi_init_fpdt);
index af47a3f..9db5ae0 100644 (file)
@@ -284,6 +284,7 @@ struct acpi_object_addr_handler {
        acpi_adr_space_handler handler;
        struct acpi_namespace_node *node;       /* Parent device */
        void *context;
+       acpi_mutex context_mutex;
        acpi_adr_space_setup setup;
        union acpi_operand_object *region_list; /* Regions using this handler */
        union acpi_operand_object *next;
index ea9485e..c0cd714 100644 (file)
@@ -489,6 +489,13 @@ acpi_ev_install_space_handler(struct acpi_namespace_node *node,
 
        /* Init handler obj */
 
+       status =
+           acpi_os_create_mutex(&handler_obj->address_space.context_mutex);
+       if (ACPI_FAILURE(status)) {
+               acpi_ut_remove_reference(handler_obj);
+               goto unlock_and_exit;
+       }
+
        handler_obj->address_space.space_id = (u8)space_id;
        handler_obj->address_space.handler_flags = flags;
        handler_obj->address_space.region_list = NULL;
index 3ed7d9a..4ef43c8 100644 (file)
@@ -112,6 +112,8 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
        union acpi_operand_object *region_obj2;
        void *region_context = NULL;
        struct acpi_connection_info *context;
+       acpi_mutex context_mutex;
+       u8 context_locked;
        acpi_physical_address address;
 
        ACPI_FUNCTION_TRACE(ev_address_space_dispatch);
@@ -136,6 +138,8 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
        }
 
        context = handler_desc->address_space.context;
+       context_mutex = handler_desc->address_space.context_mutex;
+       context_locked = FALSE;
 
        /*
         * It may be the case that the region has never been initialized.
@@ -204,6 +208,23 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
        handler = handler_desc->address_space.handler;
        address = (region_obj->region.address + region_offset);
 
+       ACPI_DEBUG_PRINT((ACPI_DB_OPREGION,
+                         "Handler %p (@%p) Address %8.8X%8.8X [%s]\n",
+                         &region_obj->region.handler->address_space, handler,
+                         ACPI_FORMAT_UINT64(address),
+                         acpi_ut_get_region_name(region_obj->region.
+                                                 space_id)));
+
+       if (!(handler_desc->address_space.handler_flags &
+             ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) {
+               /*
+                * For handlers other than the default (supplied) handlers, we must
+                * exit the interpreter because the handler *might* block -- we don't
+                * know what it will do, so we can't hold the lock on the interpreter.
+                */
+               acpi_ex_exit_interpreter();
+       }
+
        /*
         * Special handling for generic_serial_bus and general_purpose_io:
         * There are three extra parameters that must be passed to the
@@ -212,48 +233,39 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
         *   2) Length of the above buffer
         *   3) Actual access length from the access_as() op
         *
+        * Since we pass these extra parameters via the context, which is
+        * shared between threads, we must lock the context to avoid these
+        * parameters being changed from another thread before the handler
+        * has completed running.
+        *
         * In addition, for general_purpose_io, the Address and bit_width fields
         * are defined as follows:
         *   1) Address is the pin number index of the field (bit offset from
         *      the previous Connection)
         *   2) bit_width is the actual bit length of the field (number of pins)
         */
-       if ((region_obj->region.space_id == ACPI_ADR_SPACE_GSBUS) &&
+       if ((region_obj->region.space_id == ACPI_ADR_SPACE_GSBUS ||
+            region_obj->region.space_id == ACPI_ADR_SPACE_GPIO) &&
            context && field_obj) {
 
-               /* Get the Connection (resource_template) buffer */
+               status =
+                   acpi_os_acquire_mutex(context_mutex, ACPI_WAIT_FOREVER);
+               if (ACPI_FAILURE(status)) {
+                       goto re_enter_interpreter;
+               }
 
-               context->connection = field_obj->field.resource_buffer;
-               context->length = field_obj->field.resource_length;
-               context->access_length = field_obj->field.access_length;
-       }
-       if ((region_obj->region.space_id == ACPI_ADR_SPACE_GPIO) &&
-           context && field_obj) {
+               context_locked = TRUE;
 
                /* Get the Connection (resource_template) buffer */
 
                context->connection = field_obj->field.resource_buffer;
                context->length = field_obj->field.resource_length;
                context->access_length = field_obj->field.access_length;
-               address = field_obj->field.pin_number_index;
-               bit_width = field_obj->field.bit_length;
-       }
-
-       ACPI_DEBUG_PRINT((ACPI_DB_OPREGION,
-                         "Handler %p (@%p) Address %8.8X%8.8X [%s]\n",
-                         &region_obj->region.handler->address_space, handler,
-                         ACPI_FORMAT_UINT64(address),
-                         acpi_ut_get_region_name(region_obj->region.
-                                                 space_id)));
 
-       if (!(handler_desc->address_space.handler_flags &
-             ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) {
-               /*
-                * For handlers other than the default (supplied) handlers, we must
-                * exit the interpreter because the handler *might* block -- we don't
-                * know what it will do, so we can't hold the lock on the interpreter.
-                */
-               acpi_ex_exit_interpreter();
+               if (region_obj->region.space_id == ACPI_ADR_SPACE_GPIO) {
+                       address = field_obj->field.pin_number_index;
+                       bit_width = field_obj->field.bit_length;
+               }
        }
 
        /* Call the handler */
@@ -261,6 +273,10 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
        status = handler(function, address, bit_width, value, context,
                         region_obj2->extra.region_context);
 
+       if (context_locked) {
+               acpi_os_release_mutex(context_mutex);
+       }
+
        if (ACPI_FAILURE(status)) {
                ACPI_EXCEPTION((AE_INFO, status, "Returned by Handler for [%s]",
                                acpi_ut_get_region_name(region_obj->region.
@@ -277,6 +293,7 @@ acpi_ev_address_space_dispatch(union acpi_operand_object *region_obj,
                }
        }
 
+re_enter_interpreter:
        if (!(handler_desc->address_space.handler_flags &
              ACPI_ADDR_HANDLER_DEFAULT_INSTALLED)) {
                /*
index 7672d70..b1ff0a8 100644 (file)
@@ -201,6 +201,8 @@ acpi_remove_address_space_handler(acpi_handle device,
 
                        /* Now we can delete the handler object */
 
+                       acpi_os_release_mutex(handler_obj->address_space.
+                                             context_mutex);
                        acpi_ut_remove_reference(handler_obj);
                        goto unlock_and_exit;
                }
index 3f045b5..a0c1a66 100644 (file)
@@ -99,13 +99,12 @@ acpi_status acpi_ns_root_initialize(void)
                 * just create and link the new node(s) here.
                 */
                new_node =
-                   ACPI_ALLOCATE_ZEROED(sizeof(struct acpi_namespace_node));
+                   acpi_ns_create_node(*ACPI_CAST_PTR(u32, init_val->name));
                if (!new_node) {
                        status = AE_NO_MEMORY;
                        goto unlock_and_exit;
                }
 
-               ACPI_COPY_NAMESEG(new_node->name.ascii, init_val->name);
                new_node->descriptor_type = ACPI_DESC_TYPE_NAMED;
                new_node->type = init_val->type;
 
index e6a5d99..cb8f708 100644 (file)
@@ -9,6 +9,8 @@
 #ifndef _ACPI_INTERNAL_H_
 #define _ACPI_INTERNAL_H_
 
+#include <linux/idr.h>
+
 #define PREFIX "ACPI: "
 
 int early_acpi_osi_init(void);
@@ -96,9 +98,11 @@ void acpi_scan_table_handler(u32 event, void *table, void *context);
 
 extern struct list_head acpi_bus_id_list;
 
+#define ACPI_MAX_DEVICE_INSTANCES      4096
+
 struct acpi_device_bus_id {
        const char *bus_id;
-       unsigned int instance_no;
+       struct ida instance_ida;
        struct list_head node;
 };
 
index 0bf072c..dcd5937 100644 (file)
@@ -56,8 +56,6 @@ static struct acpi_scan_handler pci_root_handler = {
        },
 };
 
-static DEFINE_MUTEX(osc_lock);
-
 /**
  * acpi_is_root_bridge - determine whether an ACPI CA node is a PCI root bridge
  * @handle:  the ACPI CA node in question.
@@ -223,12 +221,7 @@ static acpi_status acpi_pci_query_osc(struct acpi_pci_root *root,
 
 static acpi_status acpi_pci_osc_support(struct acpi_pci_root *root, u32 flags)
 {
-       acpi_status status;
-
-       mutex_lock(&osc_lock);
-       status = acpi_pci_query_osc(root, flags, NULL);
-       mutex_unlock(&osc_lock);
-       return status;
+       return acpi_pci_query_osc(root, flags, NULL);
 }
 
 struct acpi_pci_root *acpi_pci_find_root(acpi_handle handle)
@@ -353,10 +346,10 @@ EXPORT_SYMBOL_GPL(acpi_get_pci_dev);
  * _OSC bits the BIOS has granted control of, but its contents are meaningless
  * on failure.
  **/
-acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
+static acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
 {
        struct acpi_pci_root *root;
-       acpi_status status = AE_OK;
+       acpi_status status;
        u32 ctrl, capbuf[3];
 
        if (!mask)
@@ -370,18 +363,16 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
        if (!root)
                return AE_NOT_EXIST;
 
-       mutex_lock(&osc_lock);
-
        *mask = ctrl | root->osc_control_set;
        /* No need to evaluate _OSC if the control was already granted. */
        if ((root->osc_control_set & ctrl) == ctrl)
-               goto out;
+               return AE_OK;
 
        /* Need to check the available controls bits before requesting them. */
        while (*mask) {
                status = acpi_pci_query_osc(root, root->osc_support_set, mask);
                if (ACPI_FAILURE(status))
-                       goto out;
+                       return status;
                if (ctrl == *mask)
                        break;
                decode_osc_control(root, "platform does not support",
@@ -392,21 +383,19 @@ acpi_status acpi_pci_osc_control_set(acpi_handle handle, u32 *mask, u32 req)
        if ((ctrl & req) != req) {
                decode_osc_control(root, "not requesting control; platform does not support",
                                   req & ~(ctrl));
-               status = AE_SUPPORT;
-               goto out;
+               return AE_SUPPORT;
        }
 
        capbuf[OSC_QUERY_DWORD] = 0;
        capbuf[OSC_SUPPORT_DWORD] = root->osc_support_set;
        capbuf[OSC_CONTROL_DWORD] = ctrl;
        status = acpi_pci_run_osc(handle, capbuf, mask);
-       if (ACPI_SUCCESS(status))
-               root->osc_control_set = *mask;
-out:
-       mutex_unlock(&osc_lock);
-       return status;
+       if (ACPI_FAILURE(status))
+               return status;
+
+       root->osc_control_set = *mask;
+       return AE_OK;
 }
-EXPORT_SYMBOL(acpi_pci_osc_control_set);
 
 static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm,
                                 bool is_pcie)
@@ -452,9 +441,8 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm,
                if ((status == AE_NOT_FOUND) && !is_pcie)
                        return;
 
-               dev_info(&device->dev, "_OSC failed (%s)%s\n",
-                        acpi_format_exception(status),
-                        pcie_aspm_support_enabled() ? "; disabling ASPM" : "");
+               dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n",
+                        acpi_format_exception(status));
                return;
        }
 
@@ -510,7 +498,7 @@ static void negotiate_os_control(struct acpi_pci_root *root, int *no_aspm,
        } else {
                decode_osc_control(root, "OS requested", requested);
                decode_osc_control(root, "platform willing to grant", control);
-               dev_info(&device->dev, "_OSC failed (%s); disabling ASPM\n",
+               dev_info(&device->dev, "_OSC: platform retains control of PCIe features (%s)\n",
                        acpi_format_exception(status));
 
                /*
index 4a59c59..dd2fbf3 100644 (file)
@@ -17,6 +17,7 @@ static const char * const profile_names[] = {
        [PLATFORM_PROFILE_COOL] = "cool",
        [PLATFORM_PROFILE_QUIET] = "quiet",
        [PLATFORM_PROFILE_BALANCED] = "balanced",
+       [PLATFORM_PROFILE_BALANCED_PERFORMANCE] = "balanced-performance",
        [PLATFORM_PROFILE_PERFORMANCE] = "performance",
 };
 static_assert(ARRAY_SIZE(profile_names) == PLATFORM_PROFILE_LAST);
index d93e400..4e2d76b 100644 (file)
@@ -29,6 +29,7 @@
  */
 #ifdef CONFIG_X86
 #include <asm/apic.h>
+#include <asm/cpu.h>
 #endif
 
 #define _COMPONENT              ACPI_PROCESSOR_COMPONENT
@@ -541,6 +542,10 @@ static int acpi_idle_play_dead(struct cpuidle_device *dev, int index)
                        wait_for_freeze();
                } else
                        return -ENODEV;
+
+#if defined(CONFIG_X86) && defined(CONFIG_HOTPLUG_CPU)
+               cond_wakeup_cpu0();
+#endif
        }
 
        /* Never reached */
index a184529..6efe7ed 100644 (file)
@@ -479,9 +479,8 @@ static void acpi_device_del(struct acpi_device *device)
        list_for_each_entry(acpi_device_bus_id, &acpi_bus_id_list, node)
                if (!strcmp(acpi_device_bus_id->bus_id,
                            acpi_device_hid(device))) {
-                       if (acpi_device_bus_id->instance_no > 0)
-                               acpi_device_bus_id->instance_no--;
-                       else {
+                       ida_simple_remove(&acpi_device_bus_id->instance_ida, device->pnp.instance_no);
+                       if (ida_is_empty(&acpi_device_bus_id->instance_ida)) {
                                list_del(&acpi_device_bus_id->node);
                                kfree_const(acpi_device_bus_id->bus_id);
                                kfree(acpi_device_bus_id);
@@ -631,6 +630,21 @@ static struct acpi_device_bus_id *acpi_device_bus_id_match(const char *dev_id)
        return NULL;
 }
 
+static int acpi_device_set_name(struct acpi_device *device,
+                               struct acpi_device_bus_id *acpi_device_bus_id)
+{
+       struct ida *instance_ida = &acpi_device_bus_id->instance_ida;
+       int result;
+
+       result = ida_simple_get(instance_ida, 0, ACPI_MAX_DEVICE_INSTANCES, GFP_KERNEL);
+       if (result < 0)
+               return result;
+
+       device->pnp.instance_no = result;
+       dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, result);
+       return 0;
+}
+
 int acpi_device_add(struct acpi_device *device,
                    void (*release)(struct device *))
 {
@@ -665,7 +679,9 @@ int acpi_device_add(struct acpi_device *device,
 
        acpi_device_bus_id = acpi_device_bus_id_match(acpi_device_hid(device));
        if (acpi_device_bus_id) {
-               acpi_device_bus_id->instance_no++;
+               result = acpi_device_set_name(device, acpi_device_bus_id);
+               if (result)
+                       goto err_unlock;
        } else {
                acpi_device_bus_id = kzalloc(sizeof(*acpi_device_bus_id),
                                             GFP_KERNEL);
@@ -681,9 +697,16 @@ int acpi_device_add(struct acpi_device *device,
                        goto err_unlock;
                }
 
+               ida_init(&acpi_device_bus_id->instance_ida);
+
+               result = acpi_device_set_name(device, acpi_device_bus_id);
+               if (result) {
+                       kfree(acpi_device_bus_id);
+                       goto err_unlock;
+               }
+
                list_add_tail(&acpi_device_bus_id->node, &acpi_bus_id_list);
        }
-       dev_set_name(&device->dev, "%s:%02x", acpi_device_bus_id->bus_id, acpi_device_bus_id->instance_no);
 
        if (device->parent)
                list_add_tail(&device->node, &device->parent->children);
@@ -1647,6 +1670,8 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
        device_initialize(&device->dev);
        dev_set_uevent_suppress(&device->dev, true);
        acpi_init_coherency(device);
+       /* Assume there are unmet deps to start with. */
+       device->dep_unmet = 1;
 }
 
 void acpi_device_add_finalize(struct acpi_device *device)
@@ -1910,6 +1935,8 @@ static void acpi_scan_dep_init(struct acpi_device *adev)
 {
        struct acpi_dep_data *dep;
 
+       adev->dep_unmet = 0;
+
        mutex_lock(&acpi_dep_list_lock);
 
        list_for_each_entry(dep, &acpi_dep_list, node) {
@@ -1957,7 +1984,13 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, bool check_dep,
                return AE_CTRL_DEPTH;
 
        acpi_scan_init_hotplug(device);
-       if (!check_dep)
+       /*
+        * If check_dep is true at this point, the device has no dependencies,
+        * or the creation of the device object would have been postponed above.
+        */
+       if (check_dep)
+               device->dep_unmet = 0;
+       else
                acpi_scan_dep_init(device);
 
 out:
index e48690a..9d58104 100644 (file)
@@ -780,7 +780,7 @@ acpi_status acpi_os_table_override(struct acpi_table_header *existing_table,
 }
 
 /*
- * acpi_table_init()
+ * acpi_locate_initial_tables()
  *
  * find RSDP, find and checksum SDT/XSDT.
  * checksum all tables, print SDT/XSDT
@@ -788,7 +788,7 @@ acpi_status acpi_os_table_override(struct acpi_table_header *existing_table,
  * result: sdt_entry[] is initialized
  */
 
-int __init acpi_table_init(void)
+int __init acpi_locate_initial_tables(void)
 {
        acpi_status status;
 
@@ -803,9 +803,45 @@ int __init acpi_table_init(void)
        status = acpi_initialize_tables(initial_tables, ACPI_MAX_TABLES, 0);
        if (ACPI_FAILURE(status))
                return -EINVAL;
-       acpi_table_initrd_scan();
 
+       return 0;
+}
+
+void __init acpi_reserve_initial_tables(void)
+{
+       int i;
+
+       for (i = 0; i < ACPI_MAX_TABLES; i++) {
+               struct acpi_table_desc *table_desc = &initial_tables[i];
+               u64 start = table_desc->address;
+               u64 size = table_desc->length;
+
+               if (!start || !size)
+                       break;
+
+               pr_info("Reserving %4s table memory at [mem 0x%llx-0x%llx]\n",
+                       table_desc->signature.ascii, start, start + size - 1);
+
+               memblock_reserve(start, size);
+       }
+}
+
+void __init acpi_table_init_complete(void)
+{
+       acpi_table_initrd_scan();
        check_multiple_madt();
+}
+
+int __init acpi_table_init(void)
+{
+       int ret;
+
+       ret = acpi_locate_initial_tables();
+       if (ret)
+               return ret;
+
+       acpi_table_init_complete();
+
        return 0;
 }
 
index 811d298..83cd4c9 100644 (file)
@@ -147,6 +147,7 @@ static const struct dmi_system_id video_detect_dmi_table[] = {
                },
        },
        {
+       .callback = video_detect_force_vendor,
        .ident = "Sony VPCEH3U1E",
        .matches = {
                DMI_MATCH(DMI_SYS_VENDOR, "Sony Corporation"),
index 7b4f154..e80ba93 100644 (file)
@@ -355,7 +355,8 @@ static inline bool is_binderfs_control_device(const struct dentry *dentry)
        return info->control_dentry == dentry;
 }
 
-static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int binderfs_rename(struct user_namespace *mnt_userns,
+                          struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry,
                           unsigned int flags)
 {
@@ -363,7 +364,8 @@ static int binderfs_rename(struct inode *old_dir, struct dentry *old_dentry,
            is_binderfs_control_device(new_dentry))
                return -EPERM;
 
-       return simple_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+       return simple_rename(&init_user_ns, old_dir, old_dentry, new_dir,
+                            new_dentry, flags);
 }
 
 static int binderfs_unlink(struct inode *dir, struct dentry *dentry)
index 316a994..b574cce 100644 (file)
@@ -2260,7 +2260,8 @@ out:
        return rc;
 
 err_eni_release:
-       eni_do_release(dev);
+       dev->phy = NULL;
+       iounmap(ENI_DEV(dev)->ioaddr);
 err_unregister:
        atm_dev_deregister(dev);
 err_free_consistent:
index 9a70bee..495fd0a 100644 (file)
@@ -100,8 +100,6 @@ static LIST_HEAD(fore200e_boards);
 
 MODULE_AUTHOR("Christophe Lizzi - credits to Uwe Dannowski and Heikki Vatiainen");
 MODULE_DESCRIPTION("FORE Systems 200E-series ATM driver - version " FORE200E_VERSION);
-MODULE_SUPPORTED_DEVICE("PCA-200E, SBA-200E");
-
 
 static const int fore200e_rx_buf_nbr[ BUFFER_SCHEME_NBR ][ BUFFER_MAGN_NBR ] = {
     { BUFFER_S1_NBR, BUFFER_L1_NBR },
index 3c081b6..bfca7b8 100644 (file)
@@ -262,7 +262,7 @@ static int idt77105_start(struct atm_dev *dev)
 {
        unsigned long flags;
 
-       if (!(dev->dev_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL)))
+       if (!(dev->phy_data = kmalloc(sizeof(struct idt77105_priv),GFP_KERNEL)))
                return -ENOMEM;
        PRIV(dev)->dev = dev;
        spin_lock_irqsave(&idt77105_priv_lock, flags);
@@ -337,7 +337,7 @@ static int idt77105_stop(struct atm_dev *dev)
                 else
                     idt77105_all = walk->next;
                dev->phy = NULL;
-                dev->dev_data = NULL;
+                dev->phy_data = NULL;
                 kfree(walk);
                 break;
             }
index d7277c2..32d7aa1 100644 (file)
@@ -2233,6 +2233,7 @@ static int lanai_dev_open(struct atm_dev *atmdev)
        conf1_write(lanai);
 #endif
        iounmap(lanai->base);
+       lanai->base = NULL;
     error_pci:
        pci_disable_device(lanai->pci);
     error:
@@ -2245,6 +2246,8 @@ static int lanai_dev_open(struct atm_dev *atmdev)
 static void lanai_dev_close(struct atm_dev *atmdev)
 {
        struct lanai_dev *lanai = (struct lanai_dev *) atmdev->dev_data;
+       if (lanai->base==NULL)
+               return;
        printk(KERN_INFO DEV_LABEL "(itf %d): shutting down interface\n",
            lanai->number);
        lanai_timed_poll_stop(lanai);
@@ -2552,7 +2555,7 @@ static int lanai_init_one(struct pci_dev *pci,
        struct atm_dev *atmdev;
        int result;
 
-       lanai = kmalloc(sizeof(*lanai), GFP_KERNEL);
+       lanai = kzalloc(sizeof(*lanai), GFP_KERNEL);
        if (lanai == NULL) {
                printk(KERN_ERR DEV_LABEL
                       ": couldn't allocate dev_data structure!\n");
index 7850758..239852d 100644 (file)
@@ -211,7 +211,7 @@ static void uPD98402_int(struct atm_dev *dev)
 static int uPD98402_start(struct atm_dev *dev)
 {
        DPRINTK("phy_start\n");
-       if (!(dev->dev_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL)))
+       if (!(dev->phy_data = kmalloc(sizeof(struct uPD98402_priv),GFP_KERNEL)))
                return -ENOMEM;
        spin_lock_init(&PRIV(dev)->lock);
        memset(&PRIV(dev)->sonet_stats,0,sizeof(struct k_sonet_stats));
index 7eebae7..fd430e6 100644 (file)
@@ -5,7 +5,7 @@
  * Description: cfag12864b LCD driver
  *     Depends: ks0108
  *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *      Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
  *        Date: 2006-10-31
  */
 
@@ -376,5 +376,5 @@ module_init(cfag12864b_init);
 module_exit(cfag12864b_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
+MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
 MODULE_DESCRIPTION("cfag12864b LCD driver");
index 2002291..d66821a 100644 (file)
@@ -5,7 +5,7 @@
  * Description: cfag12864b LCD framebuffer driver
  *     Depends: cfag12864b
  *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *      Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
  *        Date: 2006-10-31
  */
 
@@ -171,5 +171,5 @@ module_init(cfag12864bfb_init);
 module_exit(cfag12864bfb_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
+MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
 MODULE_DESCRIPTION("cfag12864b LCD framebuffer driver");
index f43430e..24fd6f3 100644 (file)
@@ -470,12 +470,14 @@ static ssize_t charlcd_write(struct file *file, const char __user *buf,
        char c;
 
        for (; count-- > 0; (*ppos)++, tmp++) {
-               if (!in_interrupt() && (((count + 1) & 0x1f) == 0))
+               if (((count + 1) & 0x1f) == 0) {
                        /*
-                        * let's be a little nice with other processes
-                        * that need some CPU
+                        * charlcd_write() is invoked as a VFS->write() callback
+                        * and as such it is always invoked from preemptible
+                        * context and may sleep.
                         */
-                       schedule();
+                       cond_resched();
+               }
 
                if (get_user(c, tmp))
                        return -EFAULT;
@@ -537,12 +539,8 @@ static void charlcd_puts(struct charlcd *lcd, const char *s)
        int count = strlen(s);
 
        for (; count-- > 0; tmp++) {
-               if (!in_interrupt() && (((count + 1) & 0x1f) == 0))
-                       /*
-                        * let's be a little nice with other processes
-                        * that need some CPU
-                        */
-                       schedule();
+               if (((count + 1) & 0x1f) == 0)
+                       cond_resched();
 
                charlcd_write_char(lcd, *tmp);
        }
index abfe3fa..03c95ad 100644 (file)
@@ -5,7 +5,7 @@
  * Description: ks0108 LCD Controller driver
  *     Depends: parport
  *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *      Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
  *        Date: 2006-10-31
  */
 
@@ -182,6 +182,6 @@ module_init(ks0108_init);
 module_exit(ks0108_exit);
 
 MODULE_LICENSE("GPL v2");
-MODULE_AUTHOR("Miguel Ojeda Sandonis <miguel.ojeda.sandonis@gmail.com>");
+MODULE_AUTHOR("Miguel Ojeda <ojeda@kernel.org>");
 MODULE_DESCRIPTION("ks0108 LCD Controller driver");
 
index 040be48..ffcbe2b 100644 (file)
@@ -161,7 +161,7 @@ config HMEM_REPORTING
        default n
        depends on NUMA
        help
-         Enable reporting for heterogenous memory access attributes under
+         Enable reporting for heterogeneous memory access attributes under
          their non-uniform memory nodes.
 
 source "drivers/base/test/Kconfig"
@@ -213,4 +213,10 @@ config GENERIC_ARCH_TOPOLOGY
          appropriate scaling, sysfs interface for reading capacity values at
          runtime.
 
+config GENERIC_ARCH_NUMA
+       bool
+       help
+         Enable support for generic NUMA implementation. Currently, RISC-V
+         and ARM64 use it.
+
 endmenu
index 5e7bf96..8b93a7f 100644 (file)
@@ -24,6 +24,7 @@ obj-$(CONFIG_PINCTRL) += pinctrl.o
 obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
 obj-$(CONFIG_GENERIC_MSI_IRQ_DOMAIN) += platform-msi.o
 obj-$(CONFIG_GENERIC_ARCH_TOPOLOGY) += arch_topology.o
+obj-$(CONFIG_GENERIC_ARCH_NUMA) += arch_numa.o
 
 obj-y                  += test/
 
similarity index 93%
rename from arch/arm64/mm/numa.c
rename to drivers/base/arch_numa.c
index a8303bc..4cc4e11 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/module.h>
 #include <linux/of.h>
 
-#include <asm/acpi.h>
 #include <asm/sections.h>
 
 struct pglist_data *node_data[MAX_NUMNODES] __read_mostly;
@@ -356,11 +355,12 @@ static int __init numa_register_nodes(void)
        /* Check that valid nid is set to memblks */
        for_each_mem_region(mblk) {
                int mblk_nid = memblock_get_region_node(mblk);
+               phys_addr_t start = mblk->base;
+               phys_addr_t end = mblk->base + mblk->size - 1;
 
                if (mblk_nid == NUMA_NO_NODE || mblk_nid >= MAX_NUMNODES) {
-                       pr_warn("Warning: invalid memblk node %d [mem %#010Lx-%#010Lx]\n",
-                               mblk_nid, mblk->base,
-                               mblk->base + mblk->size - 1);
+                       pr_warn("Warning: invalid memblk node %d [mem %pap-%pap]\n",
+                               mblk_nid, &start, &end);
                        return -EINVAL;
                }
        }
@@ -428,14 +428,14 @@ out_free_distance:
 static int __init dummy_numa_init(void)
 {
        phys_addr_t start = memblock_start_of_DRAM();
-       phys_addr_t end = memblock_end_of_DRAM();
+       phys_addr_t end = memblock_end_of_DRAM() - 1;
        int ret;
 
        if (numa_off)
                pr_info("NUMA disabled\n"); /* Forced off on command line. */
-       pr_info("Faking a node at [mem %#018Lx-%#018Lx]\n", start, end - 1);
+       pr_info("Faking a node at [mem %pap-%pap]\n", &start, &end);
 
-       ret = numa_add_memblk(0, start, end);
+       ret = numa_add_memblk(0, start, end + 1);
        if (ret) {
                pr_err("NUMA init failed\n");
                return ret;
@@ -445,16 +445,36 @@ static int __init dummy_numa_init(void)
        return 0;
 }
 
+#ifdef CONFIG_ACPI_NUMA
+static int __init arch_acpi_numa_init(void)
+{
+       int ret;
+
+       ret = acpi_numa_init();
+       if (ret) {
+               pr_info("Failed to initialise from firmware\n");
+               return ret;
+       }
+
+       return srat_disabled() ? -EINVAL : 0;
+}
+#else
+static int __init arch_acpi_numa_init(void)
+{
+       return -EOPNOTSUPP;
+}
+#endif
+
 /**
- * arm64_numa_init() - Initialize NUMA
+ * arch_numa_init() - Initialize NUMA
  *
  * Try each configured NUMA initialization method until one succeeds. The
  * last fallback is dummy single node config encompassing whole memory.
  */
-void __init arm64_numa_init(void)
+void __init arch_numa_init(void)
 {
        if (!numa_off) {
-               if (!acpi_disabled && !numa_init(arm64_acpi_numa_init))
+               if (!acpi_disabled && !numa_init(arch_acpi_numa_init))
                        return;
                if (acpi_disabled && !numa_init(of_numa_init))
                        return;
index 8336535..d8b314e 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/string.h>
 #include <linux/auxiliary_bus.h>
+#include "base.h"
 
 static const struct auxiliary_device_id *auxiliary_match_id(const struct auxiliary_device_id *id,
                                                            const struct auxiliary_device *auxdev)
@@ -260,19 +261,11 @@ void auxiliary_driver_unregister(struct auxiliary_driver *auxdrv)
 }
 EXPORT_SYMBOL_GPL(auxiliary_driver_unregister);
 
-static int __init auxiliary_bus_init(void)
+void __init auxiliary_bus_init(void)
 {
-       return bus_register(&auxiliary_bus_type);
+       WARN_ON(bus_register(&auxiliary_bus_type));
 }
 
-static void __exit auxiliary_bus_exit(void)
-{
-       bus_unregister(&auxiliary_bus_type);
-}
-
-module_init(auxiliary_bus_init);
-module_exit(auxiliary_bus_exit);
-
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Auxiliary Bus");
 MODULE_AUTHOR("David Ertman <david.m.ertman@intel.com>");
index f5600a8..52b3d7b 100644 (file)
@@ -119,6 +119,11 @@ static inline int hypervisor_init(void) { return 0; }
 extern int platform_bus_init(void);
 extern void cpu_dev_init(void);
 extern void container_dev_init(void);
+#ifdef CONFIG_AUXILIARY_BUS
+extern void auxiliary_bus_init(void);
+#else
+static inline void auxiliary_bus_init(void) { }
+#endif
 
 struct kobject *virtual_device_parent(struct device *dev);
 
index a9c23ec..36d0c65 100644 (file)
@@ -633,7 +633,7 @@ int bus_add_driver(struct device_driver *drv)
        error = driver_add_groups(drv, bus->drv_groups);
        if (error) {
                /* How the hell do we get out of this pickle? Give up */
-               printk(KERN_ERR "%s: driver_create_groups(%s) failed\n",
+               printk(KERN_ERR "%s: driver_add_groups(%s) failed\n",
                        __func__, drv->name);
        }
 
@@ -729,23 +729,6 @@ int device_reprobe(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(device_reprobe);
 
-/**
- * find_bus - locate bus by name.
- * @name: name of bus.
- *
- * Call kset_find_obj() to iterate over list of buses to
- * find a bus by name. Return bus if found.
- *
- * Note that kset_find_obj increments bus' reference count.
- */
-#if 0
-struct bus_type *find_bus(char *name)
-{
-       struct kobject *k = kset_find_obj(bus_kset, name);
-       return k ? to_bus(k) : NULL;
-}
-#endif  /*  0  */
-
 static int bus_add_groups(struct bus_type *bus,
                          const struct attribute_group **groups)
 {
index 7c0406e..f298393 100644 (file)
@@ -149,6 +149,21 @@ void fwnode_links_purge(struct fwnode_handle *fwnode)
        fwnode_links_purge_consumers(fwnode);
 }
 
+static void fw_devlink_purge_absent_suppliers(struct fwnode_handle *fwnode)
+{
+       struct fwnode_handle *child;
+
+       /* Don't purge consumer links of an added child */
+       if (fwnode->dev)
+               return;
+
+       fwnode->flags |= FWNODE_FLAG_NOT_DEVICE;
+       fwnode_links_purge_consumers(fwnode);
+
+       fwnode_for_each_available_child_node(fwnode, child)
+               fw_devlink_purge_absent_suppliers(child);
+}
+
 #ifdef CONFIG_SRCU
 static DEFINE_MUTEX(device_links_lock);
 DEFINE_STATIC_SRCU(device_links_srcu);
@@ -245,7 +260,8 @@ int device_is_dependent(struct device *dev, void *target)
                return ret;
 
        list_for_each_entry(link, &dev->links.consumers, s_node) {
-               if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+               if ((link->flags & ~DL_FLAG_INFERRED) ==
+                   (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
                        continue;
 
                if (link->consumer == target)
@@ -318,7 +334,8 @@ static int device_reorder_to_tail(struct device *dev, void *not_used)
 
        device_for_each_child(dev, NULL, device_reorder_to_tail);
        list_for_each_entry(link, &dev->links.consumers, s_node) {
-               if (link->flags == (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+               if ((link->flags & ~DL_FLAG_INFERRED) ==
+                   (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
                        continue;
                device_reorder_to_tail(link->consumer, NULL);
        }
@@ -566,7 +583,8 @@ postcore_initcall(devlink_class_init);
 #define DL_MANAGED_LINK_FLAGS (DL_FLAG_AUTOREMOVE_CONSUMER | \
                               DL_FLAG_AUTOREMOVE_SUPPLIER | \
                               DL_FLAG_AUTOPROBE_CONSUMER  | \
-                              DL_FLAG_SYNC_STATE_ONLY)
+                              DL_FLAG_SYNC_STATE_ONLY | \
+                              DL_FLAG_INFERRED)
 
 #define DL_ADD_VALID_FLAGS (DL_MANAGED_LINK_FLAGS | DL_FLAG_STATELESS | \
                            DL_FLAG_PM_RUNTIME | DL_FLAG_RPM_ACTIVE)
@@ -635,7 +653,7 @@ struct device_link *device_link_add(struct device *consumer,
        if (!consumer || !supplier || flags & ~DL_ADD_VALID_FLAGS ||
            (flags & DL_FLAG_STATELESS && flags & DL_MANAGED_LINK_FLAGS) ||
            (flags & DL_FLAG_SYNC_STATE_ONLY &&
-            flags != DL_FLAG_SYNC_STATE_ONLY) ||
+            (flags & ~DL_FLAG_INFERRED) != DL_FLAG_SYNC_STATE_ONLY) ||
            (flags & DL_FLAG_AUTOPROBE_CONSUMER &&
             flags & (DL_FLAG_AUTOREMOVE_CONSUMER |
                      DL_FLAG_AUTOREMOVE_SUPPLIER)))
@@ -691,6 +709,10 @@ struct device_link *device_link_add(struct device *consumer,
                if (link->consumer != consumer)
                        continue;
 
+               if (link->flags & DL_FLAG_INFERRED &&
+                   !(flags & DL_FLAG_INFERRED))
+                       link->flags &= ~DL_FLAG_INFERRED;
+
                if (flags & DL_FLAG_PM_RUNTIME) {
                        if (!(link->flags & DL_FLAG_PM_RUNTIME)) {
                                pm_runtime_new_link(consumer);
@@ -950,6 +972,10 @@ int device_links_check_suppliers(struct device *dev)
        mutex_lock(&fwnode_link_lock);
        if (dev->fwnode && !list_empty(&dev->fwnode->suppliers) &&
            !fw_devlink_is_permissive()) {
+               dev_dbg(dev, "probe deferral - wait for supplier %pfwP\n",
+                       list_first_entry(&dev->fwnode->suppliers,
+                       struct fwnode_link,
+                       c_hook)->supplier);
                mutex_unlock(&fwnode_link_lock);
                return -EPROBE_DEFER;
        }
@@ -964,6 +990,8 @@ int device_links_check_suppliers(struct device *dev)
                if (link->status != DL_STATE_AVAILABLE &&
                    !(link->flags & DL_FLAG_SYNC_STATE_ONLY)) {
                        device_links_missing_supplier(dev);
+                       dev_dbg(dev, "probe deferral - supplier %s not ready\n",
+                               dev_name(link->supplier));
                        ret = -EPROBE_DEFER;
                        break;
                }
@@ -1142,12 +1170,22 @@ void device_links_driver_bound(struct device *dev)
        LIST_HEAD(sync_list);
 
        /*
-        * If a device probes successfully, it's expected to have created all
+        * If a device binds successfully, it's expected to have created all
         * the device links it needs to or make new device links as it needs
-        * them. So, it no longer needs to wait on any suppliers.
+        * them. So, fw_devlink no longer needs to create device links to any
+        * of the device's suppliers.
+        *
+        * Also, if a child firmware node of this bound device is not added as
+        * a device by now, assume it is never going to be added and make sure
+        * other devices don't defer probe indefinitely by waiting for such a
+        * child device.
         */
-       if (dev->fwnode && dev->fwnode->dev == dev)
+       if (dev->fwnode && dev->fwnode->dev == dev) {
+               struct fwnode_handle *child;
                fwnode_links_purge_suppliers(dev->fwnode);
+               fwnode_for_each_available_child_node(dev->fwnode, child)
+                       fw_devlink_purge_absent_suppliers(child);
+       }
        device_remove_file(dev, &dev_attr_waiting_for_supplier);
 
        device_links_write_lock();
@@ -1458,7 +1496,14 @@ static void device_links_purge(struct device *dev)
        device_links_write_unlock();
 }
 
-static u32 fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY;
+#define FW_DEVLINK_FLAGS_PERMISSIVE    (DL_FLAG_INFERRED | \
+                                        DL_FLAG_SYNC_STATE_ONLY)
+#define FW_DEVLINK_FLAGS_ON            (DL_FLAG_INFERRED | \
+                                        DL_FLAG_AUTOPROBE_CONSUMER)
+#define FW_DEVLINK_FLAGS_RPM           (FW_DEVLINK_FLAGS_ON | \
+                                        DL_FLAG_PM_RUNTIME)
+
+static u32 fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
 static int __init fw_devlink_setup(char *arg)
 {
        if (!arg)
@@ -1467,17 +1512,23 @@ static int __init fw_devlink_setup(char *arg)
        if (strcmp(arg, "off") == 0) {
                fw_devlink_flags = 0;
        } else if (strcmp(arg, "permissive") == 0) {
-               fw_devlink_flags = DL_FLAG_SYNC_STATE_ONLY;
+               fw_devlink_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
        } else if (strcmp(arg, "on") == 0) {
-               fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER;
+               fw_devlink_flags = FW_DEVLINK_FLAGS_ON;
        } else if (strcmp(arg, "rpm") == 0) {
-               fw_devlink_flags = DL_FLAG_AUTOPROBE_CONSUMER |
-                                  DL_FLAG_PM_RUNTIME;
+               fw_devlink_flags = FW_DEVLINK_FLAGS_RPM;
        }
        return 0;
 }
 early_param("fw_devlink", fw_devlink_setup);
 
+static bool fw_devlink_strict;
+static int __init fw_devlink_strict_setup(char *arg)
+{
+       return strtobool(arg, &fw_devlink_strict);
+}
+early_param("fw_devlink.strict", fw_devlink_strict_setup);
+
 u32 fw_devlink_get_flags(void)
 {
        return fw_devlink_flags;
@@ -1485,7 +1536,12 @@ u32 fw_devlink_get_flags(void)
 
 static bool fw_devlink_is_permissive(void)
 {
-       return fw_devlink_flags == DL_FLAG_SYNC_STATE_ONLY;
+       return fw_devlink_flags == FW_DEVLINK_FLAGS_PERMISSIVE;
+}
+
+bool fw_devlink_is_strict(void)
+{
+       return fw_devlink_strict && !fw_devlink_is_permissive();
 }
 
 static void fw_devlink_parse_fwnode(struct fwnode_handle *fwnode)
@@ -1508,6 +1564,53 @@ static void fw_devlink_parse_fwtree(struct fwnode_handle *fwnode)
 }
 
 /**
+ * fw_devlink_relax_cycle - Convert cyclic links to SYNC_STATE_ONLY links
+ * @con: Device to check dependencies for.
+ * @sup: Device to check against.
+ *
+ * Check if @sup depends on @con or any device dependent on it (its child or
+ * its consumer etc).  When such a cyclic dependency is found, convert all
+ * device links created solely by fw_devlink into SYNC_STATE_ONLY device links.
+ * This is the equivalent of doing fw_devlink=permissive just between the
+ * devices in the cycle. We need to do this because, at this point, fw_devlink
+ * can't tell which of these dependencies is not a real dependency.
+ *
+ * Return 1 if a cycle is found. Otherwise, return 0.
+ */
+static int fw_devlink_relax_cycle(struct device *con, void *sup)
+{
+       struct device_link *link;
+       int ret;
+
+       if (con == sup)
+               return 1;
+
+       ret = device_for_each_child(con, sup, fw_devlink_relax_cycle);
+       if (ret)
+               return ret;
+
+       list_for_each_entry(link, &con->links.consumers, s_node) {
+               if ((link->flags & ~DL_FLAG_INFERRED) ==
+                   (DL_FLAG_SYNC_STATE_ONLY | DL_FLAG_MANAGED))
+                       continue;
+
+               if (!fw_devlink_relax_cycle(link->consumer, sup))
+                       continue;
+
+               ret = 1;
+
+               if (!(link->flags & DL_FLAG_INFERRED))
+                       continue;
+
+               pm_runtime_drop_link(link);
+               link->flags = DL_FLAG_MANAGED | FW_DEVLINK_FLAGS_PERMISSIVE;
+               dev_dbg(link->consumer, "Relaxing link with %s\n",
+                       dev_name(link->supplier));
+       }
+       return ret;
+}
+
+/**
  * fw_devlink_create_devlink - Create a device link from a consumer to fwnode
  * @con - Consumer device for the device link
  * @sup_handle - fwnode handle of supplier
@@ -1535,15 +1638,39 @@ static int fw_devlink_create_devlink(struct device *con,
        sup_dev = get_dev_from_fwnode(sup_handle);
        if (sup_dev) {
                /*
+                * If it's one of those drivers that don't actually bind to
+                * their device using driver core, then don't wait on this
+                * supplier device indefinitely.
+                */
+               if (sup_dev->links.status == DL_DEV_NO_DRIVER &&
+                   sup_handle->flags & FWNODE_FLAG_INITIALIZED) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+
+               /*
                 * If this fails, it is due to cycles in device links.  Just
                 * give up on this link and treat it as invalid.
                 */
-               if (!device_link_add(con, sup_dev, flags))
+               if (!device_link_add(con, sup_dev, flags) &&
+                   !(flags & DL_FLAG_SYNC_STATE_ONLY)) {
+                       dev_info(con, "Fixing up cyclic dependency with %s\n",
+                                dev_name(sup_dev));
+                       device_links_write_lock();
+                       fw_devlink_relax_cycle(con, sup_dev);
+                       device_links_write_unlock();
+                       device_link_add(con, sup_dev,
+                                       FW_DEVLINK_FLAGS_PERMISSIVE);
                        ret = -EINVAL;
+               }
 
                goto out;
        }
 
+       /* Supplier that's already initialized without a struct device. */
+       if (sup_handle->flags & FWNODE_FLAG_INITIALIZED)
+               return -EINVAL;
+
        /*
         * DL_FLAG_SYNC_STATE_ONLY doesn't block probing and supports
         * cycles. So cycle detection isn't necessary and shouldn't be
@@ -1632,7 +1759,7 @@ static void __fw_devlink_link_to_consumers(struct device *dev)
                                con_dev = NULL;
                        } else {
                                own_link = false;
-                               dl_flags = DL_FLAG_SYNC_STATE_ONLY;
+                               dl_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
                        }
                }
 
@@ -1687,7 +1814,7 @@ static void __fw_devlink_link_to_suppliers(struct device *dev,
        if (own_link)
                dl_flags = fw_devlink_get_flags();
        else
-               dl_flags = DL_FLAG_SYNC_STATE_ONLY;
+               dl_flags = FW_DEVLINK_FLAGS_PERMISSIVE;
 
        list_for_each_entry_safe(link, tmp, &fwnode->suppliers, c_hook) {
                int ret;
index 9179825..e2cf3b2 100644 (file)
@@ -97,6 +97,9 @@ static void deferred_probe_work_func(struct work_struct *work)
 
                get_device(dev);
 
+               kfree(dev->p->deferred_probe_reason);
+               dev->p->deferred_probe_reason = NULL;
+
                /*
                 * Drop the mutex while probing each device; the probe path may
                 * manipulate the deferred list
index eac184e..653c8c6 100644 (file)
@@ -162,7 +162,7 @@ static int dev_mkdir(const char *name, umode_t mode)
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       err = vfs_mkdir(d_inode(path.dentry), dentry, mode);
+       err = vfs_mkdir(&init_user_ns, d_inode(path.dentry), dentry, mode);
        if (!err)
                /* mark as kernel-created inode */
                d_inode(dentry)->i_private = &thread;
@@ -212,7 +212,8 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
        if (IS_ERR(dentry))
                return PTR_ERR(dentry);
 
-       err = vfs_mknod(d_inode(path.dentry), dentry, mode, dev->devt);
+       err = vfs_mknod(&init_user_ns, d_inode(path.dentry), dentry, mode,
+                       dev->devt);
        if (!err) {
                struct iattr newattrs;
 
@@ -221,7 +222,7 @@ static int handle_create(const char *nodename, umode_t mode, kuid_t uid,
                newattrs.ia_gid = gid;
                newattrs.ia_valid = ATTR_MODE|ATTR_UID|ATTR_GID;
                inode_lock(d_inode(dentry));
-               notify_change(dentry, &newattrs, NULL);
+               notify_change(&init_user_ns, dentry, &newattrs, NULL);
                inode_unlock(d_inode(dentry));
 
                /* mark as kernel-created inode */
@@ -242,7 +243,8 @@ static int dev_rmdir(const char *name)
                return PTR_ERR(dentry);
        if (d_really_is_positive(dentry)) {
                if (d_inode(dentry)->i_private == &thread)
-                       err = vfs_rmdir(d_inode(parent.dentry), dentry);
+                       err = vfs_rmdir(&init_user_ns, d_inode(parent.dentry),
+                                       dentry);
                else
                        err = -EPERM;
        } else {
@@ -328,9 +330,10 @@ static int handle_remove(const char *nodename, struct device *dev)
                        newattrs.ia_valid =
                                ATTR_UID|ATTR_GID|ATTR_MODE;
                        inode_lock(d_inode(dentry));
-                       notify_change(dentry, &newattrs, NULL);
+                       notify_change(&init_user_ns, dentry, &newattrs, NULL);
                        inode_unlock(d_inode(dentry));
-                       err = vfs_unlink(d_inode(parent.dentry), dentry, NULL);
+                       err = vfs_unlink(&init_user_ns, d_inode(parent.dentry),
+                                        dentry, NULL);
                        if (!err || err == -ENOENT)
                                deleted = 1;
                }
index 908e652..a9f57c2 100644 (file)
@@ -32,6 +32,7 @@ void __init driver_init(void)
         */
        of_core_init();
        platform_bus_init();
+       auxiliary_bus_init();
        cpu_dev_init();
        memory_dev_init();
        container_dev_init();
index eef4ffb..f352984 100644 (file)
@@ -35,7 +35,7 @@ static const char *const online_type_to_str[] = {
        [MMOP_ONLINE_MOVABLE] = "online_movable",
 };
 
-int memhp_online_type_from_str(const char *str)
+int mhp_online_type_from_str(const char *str)
 {
        int i;
 
@@ -253,7 +253,7 @@ static int memory_subsys_offline(struct device *dev)
 static ssize_t state_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t count)
 {
-       const int online_type = memhp_online_type_from_str(buf);
+       const int online_type = mhp_online_type_from_str(buf);
        struct memory_block *mem = to_memory_block(dev);
        int ret;
 
@@ -290,20 +290,20 @@ static ssize_t state_store(struct device *dev, struct device_attribute *attr,
 }
 
 /*
- * phys_device is a bad name for this.  What I really want
- * is a way to differentiate between memory ranges that
- * are part of physical devices that constitute
- * a complete removable unit or fru.
- * i.e. do these ranges belong to the same physical device,
- * s.t. if I offline all of these sections I can then
- * remove the physical device?
+ * Legacy interface that we cannot remove: s390x exposes the storage increment
+ * covered by a memory block, allowing for identifying which memory blocks
+ * comprise a storage increment. Since a memory block spans complete
+ * storage increments nowadays, this interface is basically unused. Other
+ * archs never exposed != 0.
  */
 static ssize_t phys_device_show(struct device *dev,
                                struct device_attribute *attr, char *buf)
 {
        struct memory_block *mem = to_memory_block(dev);
+       unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr);
 
-       return sysfs_emit(buf, "%d\n", mem->phys_device);
+       return sysfs_emit(buf, "%d\n",
+                         arch_get_memory_phys_device(start_pfn));
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
@@ -387,19 +387,19 @@ static ssize_t auto_online_blocks_show(struct device *dev,
                                       struct device_attribute *attr, char *buf)
 {
        return sysfs_emit(buf, "%s\n",
-                         online_type_to_str[memhp_default_online_type]);
+                         online_type_to_str[mhp_default_online_type]);
 }
 
 static ssize_t auto_online_blocks_store(struct device *dev,
                                        struct device_attribute *attr,
                                        const char *buf, size_t count)
 {
-       const int online_type = memhp_online_type_from_str(buf);
+       const int online_type = mhp_online_type_from_str(buf);
 
        if (online_type < 0)
                return -EINVAL;
 
-       memhp_default_online_type = online_type;
+       mhp_default_online_type = online_type;
        return count;
 }
 
@@ -488,11 +488,7 @@ static DEVICE_ATTR_WO(soft_offline_page);
 static DEVICE_ATTR_WO(hard_offline_page);
 #endif
 
-/*
- * Note that phys_device is optional.  It is here to allow for
- * differentiation between which *physical* devices each
- * section belongs to...
- */
+/* See phys_device_show(). */
 int __weak arch_get_memory_phys_device(unsigned long start_pfn)
 {
        return 0;
@@ -574,7 +570,6 @@ int register_memory(struct memory_block *memory)
 static int init_memory_block(unsigned long block_id, unsigned long state)
 {
        struct memory_block *mem;
-       unsigned long start_pfn;
        int ret = 0;
 
        mem = find_memory_block_by_id(block_id);
@@ -588,8 +583,6 @@ static int init_memory_block(unsigned long block_id, unsigned long state)
 
        mem->start_section_nr = block_id * sections_per_block;
        mem->state = state;
-       start_pfn = section_nr_to_pfn(mem->start_section_nr);
-       mem->phys_device = arch_get_memory_phys_device(start_pfn);
        mem->nid = NUMA_NO_NODE;
 
        ret = register_memory(mem);
index 04f71c7..f449dbb 100644 (file)
@@ -372,14 +372,19 @@ static ssize_t node_read_meminfo(struct device *dev,
        struct pglist_data *pgdat = NODE_DATA(nid);
        struct sysinfo i;
        unsigned long sreclaimable, sunreclaimable;
+       unsigned long swapcached = 0;
 
        si_meminfo_node(&i, nid);
        sreclaimable = node_page_state_pages(pgdat, NR_SLAB_RECLAIMABLE_B);
        sunreclaimable = node_page_state_pages(pgdat, NR_SLAB_UNRECLAIMABLE_B);
+#ifdef CONFIG_SWAP
+       swapcached = node_page_state_pages(pgdat, NR_SWAPCACHE);
+#endif
        len = sysfs_emit_at(buf, len,
                            "Node %d MemTotal:       %8lu kB\n"
                            "Node %d MemFree:        %8lu kB\n"
                            "Node %d MemUsed:        %8lu kB\n"
+                           "Node %d SwapCached:     %8lu kB\n"
                            "Node %d Active:         %8lu kB\n"
                            "Node %d Inactive:       %8lu kB\n"
                            "Node %d Active(anon):   %8lu kB\n"
@@ -391,6 +396,7 @@ static ssize_t node_read_meminfo(struct device *dev,
                            nid, K(i.totalram),
                            nid, K(i.freeram),
                            nid, K(i.totalram - i.freeram),
+                           nid, K(swapcached),
                            nid, K(node_page_state(pgdat, NR_ACTIVE_ANON) +
                                   node_page_state(pgdat, NR_ACTIVE_FILE)),
                            nid, K(node_page_state(pgdat, NR_INACTIVE_ANON) +
@@ -461,16 +467,11 @@ static ssize_t node_read_meminfo(struct device *dev,
                             nid, K(sunreclaimable)
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
                             ,
-                            nid, K(node_page_state(pgdat, NR_ANON_THPS) *
-                                   HPAGE_PMD_NR),
-                            nid, K(node_page_state(pgdat, NR_SHMEM_THPS) *
-                                   HPAGE_PMD_NR),
-                            nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED) *
-                                   HPAGE_PMD_NR),
-                            nid, K(node_page_state(pgdat, NR_FILE_THPS) *
-                                   HPAGE_PMD_NR),
-                            nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED) *
-                                   HPAGE_PMD_NR)
+                            nid, K(node_page_state(pgdat, NR_ANON_THPS)),
+                            nid, K(node_page_state(pgdat, NR_SHMEM_THPS)),
+                            nid, K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
+                            nid, K(node_page_state(pgdat, NR_FILE_THPS)),
+                            nid, K(node_page_state(pgdat, NR_FILE_PMDMAPPED))
 #endif
                            );
        len += hugetlb_report_node_meminfo(buf, len, nid);
@@ -519,10 +520,14 @@ static ssize_t node_read_vmstat(struct device *dev,
                                     sum_zone_numa_state(nid, i));
 
 #endif
-       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
-               len += sysfs_emit_at(buf, len, "%s %lu\n",
-                                    node_stat_name(i),
-                                    node_page_state_pages(pgdat, i));
+       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+               unsigned long pages = node_page_state_pages(pgdat, i);
+
+               if (vmstat_item_print_in_thp(i))
+                       pages /= HPAGE_PMD_NR;
+               len += sysfs_emit_at(buf, len, "%s %lu\n", node_stat_name(i),
+                                    pages);
+       }
 
        return len;
 }
index ac68328..6e1f8e0 100644 (file)
@@ -1463,13 +1463,16 @@ static int platform_remove(struct device *_dev)
 {
        struct platform_driver *drv = to_platform_driver(_dev->driver);
        struct platform_device *dev = to_platform_device(_dev);
-       int ret = 0;
 
-       if (drv->remove)
-               ret = drv->remove(dev);
+       if (drv->remove) {
+               int ret = drv->remove(dev);
+
+               if (ret)
+                       dev_warn(_dev, "remove callback returned a non-zero value. This will be ignored.\n");
+       }
        dev_pm_domain_detach(_dev, true);
 
-       return ret;
+       return 0;
 }
 
 static void platform_shutdown(struct device *_dev)
index aaf6c83..78c310d 100644 (file)
@@ -2196,6 +2196,7 @@ static int genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
        cp->node = of_node_get(np);
        cp->data = data;
        cp->xlate = xlate;
+       fwnode_dev_initialized(&np->fwnode, true);
 
        mutex_lock(&of_genpd_mutex);
        list_add(&cp->link, &of_genpd_providers);
@@ -2385,6 +2386,7 @@ void of_genpd_del_provider(struct device_node *np)
                                }
                        }
 
+                       fwnode_dev_initialized(&cp->node->fwnode, false);
                        list_del(&cp->link);
                        of_node_put(cp->node);
                        kfree(cp);
index a46a7e3..fe1dad6 100644 (file)
@@ -305,7 +305,7 @@ static int rpm_get_suppliers(struct device *dev)
        return 0;
 }
 
-static void rpm_put_suppliers(struct device *dev)
+static void __rpm_put_suppliers(struct device *dev, bool try_to_suspend)
 {
        struct device_link *link;
 
@@ -313,10 +313,30 @@ static void rpm_put_suppliers(struct device *dev)
                                device_links_read_lock_held()) {
 
                while (refcount_dec_not_one(&link->rpm_active))
-                       pm_runtime_put(link->supplier);
+                       pm_runtime_put_noidle(link->supplier);
+
+               if (try_to_suspend)
+                       pm_request_idle(link->supplier);
        }
 }
 
+static void rpm_put_suppliers(struct device *dev)
+{
+       __rpm_put_suppliers(dev, true);
+}
+
+static void rpm_suspend_suppliers(struct device *dev)
+{
+       struct device_link *link;
+       int idx = device_links_read_lock();
+
+       list_for_each_entry_rcu(link, &dev->links.suppliers, c_node,
+                               device_links_read_lock_held())
+               pm_request_idle(link->supplier);
+
+       device_links_read_unlock(idx);
+}
+
 /**
  * __rpm_callback - Run a given runtime PM callback for a given device.
  * @cb: Runtime PM callback to run.
@@ -344,8 +364,10 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
                        idx = device_links_read_lock();
 
                        retval = rpm_get_suppliers(dev);
-                       if (retval)
+                       if (retval) {
+                               rpm_put_suppliers(dev);
                                goto fail;
+                       }
 
                        device_links_read_unlock(idx);
                }
@@ -368,9 +390,9 @@ static int __rpm_callback(int (*cb)(struct device *), struct device *dev)
                    || (dev->power.runtime_status == RPM_RESUMING && retval))) {
                        idx = device_links_read_lock();
 
- fail:
-                       rpm_put_suppliers(dev);
+                       __rpm_put_suppliers(dev, false);
 
+fail:
                        device_links_read_unlock(idx);
                }
 
@@ -642,8 +664,11 @@ static int rpm_suspend(struct device *dev, int rpmflags)
                goto out;
        }
 
+       if (dev->power.irq_safe)
+               goto out;
+
        /* Maybe the parent is now able to suspend. */
-       if (parent && !parent->power.ignore_children && !dev->power.irq_safe) {
+       if (parent && !parent->power.ignore_children) {
                spin_unlock(&dev->power.lock);
 
                spin_lock(&parent->power.lock);
@@ -652,6 +677,14 @@ static int rpm_suspend(struct device *dev, int rpmflags)
 
                spin_lock(&dev->power.lock);
        }
+       /* Maybe the suppliers are now able to suspend. */
+       if (dev->power.links_count > 0) {
+               spin_unlock_irq(&dev->power.lock);
+
+               rpm_suspend_suppliers(dev);
+
+               spin_lock_irq(&dev->power.lock);
+       }
 
  out:
        trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
@@ -1657,8 +1690,8 @@ void pm_runtime_get_suppliers(struct device *dev)
                                device_links_read_lock_held())
                if (link->flags & DL_FLAG_PM_RUNTIME) {
                        link->supplier_preactivated = true;
-                       refcount_inc(&link->rpm_active);
                        pm_runtime_get_sync(link->supplier);
+                       refcount_inc(&link->rpm_active);
                }
 
        device_links_read_unlock(idx);
@@ -1671,6 +1704,8 @@ void pm_runtime_get_suppliers(struct device *dev)
 void pm_runtime_put_suppliers(struct device *dev)
 {
        struct device_link *link;
+       unsigned long flags;
+       bool put;
        int idx;
 
        idx = device_links_read_lock();
@@ -1679,7 +1714,11 @@ void pm_runtime_put_suppliers(struct device *dev)
                                device_links_read_lock_held())
                if (link->supplier_preactivated) {
                        link->supplier_preactivated = false;
-                       if (refcount_dec_not_one(&link->rpm_active))
+                       spin_lock_irqsave(&dev->power.lock, flags);
+                       put = pm_runtime_status_suspended(dev) &&
+                             refcount_dec_not_one(&link->rpm_active);
+                       spin_unlock_irqrestore(&dev->power.lock, flags);
+                       if (put)
                                pm_runtime_put(link->supplier);
                }
 
index 8ce3065..fe3ac26 100644 (file)
@@ -15,11 +15,11 @@ static int regmap_sdw_mbq_write(void *context, unsigned int reg, unsigned int va
        struct sdw_slave *slave = dev_to_sdw_dev(dev);
        int ret;
 
-       ret = sdw_write(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff);
+       ret = sdw_write_no_pm(slave, SDW_SDCA_MBQ_CTL(reg), (val >> 8) & 0xff);
        if (ret < 0)
                return ret;
 
-       return sdw_write(slave, reg, val & 0xff);
+       return sdw_write_no_pm(slave, reg, val & 0xff);
 }
 
 static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *val)
@@ -29,11 +29,11 @@ static int regmap_sdw_mbq_read(void *context, unsigned int reg, unsigned int *va
        int read0;
        int read1;
 
-       read0 = sdw_read(slave, reg);
+       read0 = sdw_read_no_pm(slave, reg);
        if (read0 < 0)
                return read0;
 
-       read1 = sdw_read(slave, SDW_SDCA_MBQ_CTL(reg));
+       read1 = sdw_read_no_pm(slave, SDW_SDCA_MBQ_CTL(reg));
        if (read1 < 0)
                return read1;
 
@@ -98,4 +98,4 @@ struct regmap *__devm_regmap_init_sdw_mbq(struct sdw_slave *sdw,
 EXPORT_SYMBOL_GPL(__devm_regmap_init_sdw_mbq);
 
 MODULE_DESCRIPTION("Regmap SoundWire MBQ Module");
-MODULE_LICENSE("GPL v2");
+MODULE_LICENSE("GPL");
index c83be26..966de8a 100644 (file)
@@ -13,7 +13,7 @@ static int regmap_sdw_write(void *context, unsigned int reg, unsigned int val)
        struct device *dev = context;
        struct sdw_slave *slave = dev_to_sdw_dev(dev);
 
-       return sdw_write(slave, reg, val);
+       return sdw_write_no_pm(slave, reg, val);
 }
 
 static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val)
@@ -22,7 +22,7 @@ static int regmap_sdw_read(void *context, unsigned int reg, unsigned int *val)
        struct sdw_slave *slave = dev_to_sdw_dev(dev);
        int read;
 
-       read = sdw_read(slave, reg);
+       read = sdw_read_no_pm(slave, reg);
        if (read < 0)
                return read;
 
index 37179a8..fa3719e 100644 (file)
@@ -938,6 +938,9 @@ int software_node_register(const struct software_node *node)
        if (software_node_to_swnode(node))
                return -EEXIST;
 
+       if (node->parent && !parent)
+               return -EINVAL;
+
        return PTR_ERR_OR_ZERO(swnode_register(node, parent, 0));
 }
 EXPORT_SYMBOL_GPL(software_node_register);
@@ -1002,25 +1005,33 @@ EXPORT_SYMBOL_GPL(fwnode_remove_software_node);
 /**
  * device_add_software_node - Assign software node to a device
  * @dev: The device the software node is meant for.
- * @swnode: The software node.
+ * @node: The software node.
  *
- * This function will register @swnode and make it the secondary firmware node
- * pointer of @dev. If @dev has no primary node, then @swnode will become the primary
- * node.
+ * This function will make @node the secondary firmware node pointer of @dev. If
+ * @dev has no primary node, then @node will become the primary node. The
+ * function will register @node automatically if it wasn't already registered.
  */
-int device_add_software_node(struct device *dev, const struct software_node *swnode)
+int device_add_software_node(struct device *dev, const struct software_node *node)
 {
+       struct swnode *swnode;
        int ret;
 
        /* Only one software node per device. */
        if (dev_to_swnode(dev))
                return -EBUSY;
 
-       ret = software_node_register(swnode);
-       if (ret)
-               return ret;
+       swnode = software_node_to_swnode(node);
+       if (swnode) {
+               kobject_get(&swnode->kobj);
+       } else {
+               ret = software_node_register(node);
+               if (ret)
+                       return ret;
+
+               swnode = software_node_to_swnode(node);
+       }
 
-       set_secondary_fwnode(dev, software_node_fwnode(swnode));
+       set_secondary_fwnode(dev, &swnode->fwnode);
 
        return 0;
 }
index 3ca5636..2f15fae 100644 (file)
@@ -2,3 +2,4 @@
 obj-$(CONFIG_TEST_ASYNC_DRIVER_PROBE)  += test_async_driver_probe.o
 
 obj-$(CONFIG_KUNIT_DRIVER_PE_TEST) += property-entry-test.o
+CFLAGS_REMOVE_property-entry-test.o += -fplugin-arg-structleak_plugin-byref -fplugin-arg-structleak_plugin-byref-all
index 2779e85..fd23615 100644 (file)
@@ -66,6 +66,12 @@ config AMIGA_Z2RAM
          To compile this driver as a module, choose M here: the
          module will be called z2ram.
 
+config N64CART
+       bool "N64 cart support"
+       depends on MACH_NINTENDO64
+       help
+         Support for the N64 cart.
+
 config CDROM
        tristate
        select BLK_SCSI_REQUEST
index b501b87..e3e3f1c 100644 (file)
@@ -17,6 +17,7 @@ obj-$(CONFIG_PS3_DISK)                += ps3disk.o
 obj-$(CONFIG_PS3_VRAM)         += ps3vram.o
 obj-$(CONFIG_ATARI_FLOPPY)     += ataflop.o
 obj-$(CONFIG_AMIGA_Z2RAM)      += z2ram.o
+obj-$(CONFIG_N64CART)          += n64cart.o
 obj-$(CONFIG_BLK_DEV_RAM)      += brd.o
 obj-$(CONFIG_BLK_DEV_LOOP)     += loop.o
 obj-$(CONFIG_XILINX_SYSACE)    += xsysace.o
index 7d9cc43..5d91813 100644 (file)
@@ -1324,7 +1324,7 @@ struct bm_extent {
  * A followup commit may allow even bigger BIO sizes,
  * once we thought that through. */
 #define DRBD_MAX_BIO_SIZE (1U << 20)
-#if DRBD_MAX_BIO_SIZE > (BIO_MAX_PAGES << PAGE_SHIFT)
+#if DRBD_MAX_BIO_SIZE > (BIO_MAX_VECS << PAGE_SHIFT)
 #error Architecture not supported: DRBD_MAX_BIO_SIZE > BIO_MAX_SIZE
 #endif
 #define DRBD_MAX_BIO_SIZE_SAFE (1U << 12)       /* Works always = 4k */
index 0b71292..4aa9683 100644 (file)
@@ -5091,7 +5091,6 @@ module_param(floppy, charp, 0);
 module_param(FLOPPY_IRQ, int, 0);
 module_param(FLOPPY_DMA, int, 0);
 MODULE_AUTHOR("Alain L. Knaff");
-MODULE_SUPPORTED_DEVICE("fd");
 MODULE_LICENSE("GPL");
 
 /* This doesn't actually get used other than for module information */
index 578fc03..a370cde 100644 (file)
@@ -663,7 +663,7 @@ static inline int is_loop_device(struct file *file)
 {
        struct inode *i = file->f_mapping->host;
 
-       return i && S_ISBLK(i->i_mode) && MAJOR(i->i_rdev) == LOOP_MAJOR;
+       return i && S_ISBLK(i->i_mode) && imajor(i) == LOOP_MAJOR;
 }
 
 static int loop_validate_file(struct file *file, struct block_device *bdev)
@@ -1212,6 +1212,9 @@ static int __loop_clr_fd(struct loop_device *lo, bool release)
                goto out_unlock;
        }
 
+       if (test_bit(QUEUE_FLAG_WC, &lo->lo_queue->queue_flags))
+               blk_queue_write_cache(lo->lo_queue, false, false);
+
        /* freeze request queue during the transition */
        blk_mq_freeze_queue(lo->lo_queue);
 
diff --git a/drivers/block/n64cart.c b/drivers/block/n64cart.c
new file mode 100644 (file)
index 0000000..47bdf32
--- /dev/null
@@ -0,0 +1,178 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for the N64 cart.
+ *
+ * Copyright (c) 2021 Lauri Kasanen
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+#include <linux/bitops.h>
+#include <linux/blkdev.h>
+#include <linux/dma-mapping.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+
+enum {
+       PI_DRAM_REG = 0,
+       PI_CART_REG,
+       PI_READ_REG,
+       PI_WRITE_REG,
+       PI_STATUS_REG,
+};
+
+#define PI_STATUS_DMA_BUSY     (1 << 0)
+#define PI_STATUS_IO_BUSY      (1 << 1)
+
+#define CART_DOMAIN            0x10000000
+#define CART_MAX               0x1FFFFFFF
+
+#define MIN_ALIGNMENT          8
+
+static u32 __iomem *reg_base;
+
+static unsigned int start;
+module_param(start, uint, 0);
+MODULE_PARM_DESC(start, "Start address of the cart block data");
+
+static unsigned int size;
+module_param(size, uint, 0);
+MODULE_PARM_DESC(size, "Size of the cart block data, in bytes");
+
+static void n64cart_write_reg(const u8 reg, const u32 value)
+{
+       writel(value, reg_base + reg);
+}
+
+static u32 n64cart_read_reg(const u8 reg)
+{
+       return readl(reg_base + reg);
+}
+
+static void n64cart_wait_dma(void)
+{
+       while (n64cart_read_reg(PI_STATUS_REG) &
+               (PI_STATUS_DMA_BUSY | PI_STATUS_IO_BUSY))
+               cpu_relax();
+}
+
+/*
+ * Process a single bvec of a bio.
+ */
+static bool n64cart_do_bvec(struct device *dev, struct bio_vec *bv, u32 pos)
+{
+       dma_addr_t dma_addr;
+       const u32 bstart = pos + start;
+
+       /* Alignment check */
+       WARN_ON_ONCE((bv->bv_offset & (MIN_ALIGNMENT - 1)) ||
+                    (bv->bv_len & (MIN_ALIGNMENT - 1)));
+
+       dma_addr = dma_map_bvec(dev, bv, DMA_FROM_DEVICE, 0);
+       if (dma_mapping_error(dev, dma_addr))
+               return false;
+
+       n64cart_wait_dma();
+
+       n64cart_write_reg(PI_DRAM_REG, dma_addr + bv->bv_offset);
+       n64cart_write_reg(PI_CART_REG, (bstart | CART_DOMAIN) & CART_MAX);
+       n64cart_write_reg(PI_WRITE_REG, bv->bv_len - 1);
+
+       n64cart_wait_dma();
+
+       dma_unmap_page(dev, dma_addr, bv->bv_len, DMA_FROM_DEVICE);
+       return true;
+}
+
+static blk_qc_t n64cart_submit_bio(struct bio *bio)
+{
+       struct bio_vec bvec;
+       struct bvec_iter iter;
+       struct device *dev = bio->bi_disk->private_data;
+       u32 pos = bio->bi_iter.bi_sector << SECTOR_SHIFT;
+
+       bio_for_each_segment(bvec, bio, iter) {
+               if (!n64cart_do_bvec(dev, &bvec, pos))
+                       goto io_error;
+               pos += bvec.bv_len;
+       }
+
+       bio_endio(bio);
+       return BLK_QC_T_NONE;
+io_error:
+       bio_io_error(bio);
+       return BLK_QC_T_NONE;
+}
+
+static const struct block_device_operations n64cart_fops = {
+       .owner          = THIS_MODULE,
+       .submit_bio     = n64cart_submit_bio,
+};
+
+/*
+ * The target device is embedded and RAM-constrained. We save RAM
+ * by initializing in __init code that gets dropped late in boot.
+ * For the same reason there is no module or unloading support.
+ */
+static int __init n64cart_probe(struct platform_device *pdev)
+{
+       struct gendisk *disk;
+
+       if (!start || !size) {
+               pr_err("start or size not specified\n");
+               return -ENODEV;
+       }
+
+       if (size & 4095) {
+               pr_err("size must be a multiple of 4K\n");
+               return -ENODEV;
+       }
+
+       reg_base = devm_platform_ioremap_resource(pdev, 0);
+       if (!reg_base)
+               return -EINVAL;
+
+       disk = alloc_disk(0);
+       if (!disk)
+               return -ENOMEM;
+
+       disk->queue = blk_alloc_queue(NUMA_NO_NODE);
+       if (!disk->queue)
+               return -ENOMEM;
+
+       disk->first_minor = 0;
+       disk->flags = GENHD_FL_NO_PART_SCAN | GENHD_FL_EXT_DEVT;
+       disk->fops = &n64cart_fops;
+       disk->private_data = &pdev->dev;
+       strcpy(disk->disk_name, "n64cart");
+
+       set_capacity(disk, size >> SECTOR_SHIFT);
+       set_disk_ro(disk, 1);
+
+       blk_queue_flag_set(QUEUE_FLAG_NONROT, disk->queue);
+       blk_queue_physical_block_size(disk->queue, 4096);
+       blk_queue_logical_block_size(disk->queue, 4096);
+
+       add_disk(disk);
+
+       pr_info("n64cart: %u kb disk\n", size / 1024);
+
+       return 0;
+}
+
+static struct platform_driver n64cart_driver = {
+       .driver = {
+               .name = "n64cart",
+       },
+};
+
+static int __init n64cart_init(void)
+{
+       return platform_driver_probe(&n64cart_driver, n64cart_probe);
+}
+
+module_init(n64cart_init);
+
+MODULE_AUTHOR("Lauri Kasanen <cand@gmx.com>");
+MODULE_DESCRIPTION("Driver for the N64 cart");
+MODULE_LICENSE("GPL");
index 8b9622e..4ff71b5 100644 (file)
@@ -78,8 +78,7 @@ struct link_dead_args {
 #define NBD_RT_HAS_PID_FILE            3
 #define NBD_RT_HAS_CONFIG_REF          4
 #define NBD_RT_BOUND                   5
-#define NBD_RT_DESTROY_ON_DISCONNECT   6
-#define NBD_RT_DISCONNECT_ON_CLOSE     7
+#define NBD_RT_DISCONNECT_ON_CLOSE     6
 
 #define NBD_DESTROY_ON_DISCONNECT      0
 #define NBD_DISCONNECT_REQUESTED       1
@@ -1904,12 +1903,21 @@ again:
        if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
                u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
                if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-                       set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
-                               &config->runtime_flags);
-                       set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
-                       put_dev = true;
+                       /*
+                        * We have 1 ref to keep the device around, and then 1
+                        * ref for our current operation here, which will be
+                        * inherited by the config.  If we already have
+                        * DESTROY_ON_DISCONNECT set then we know we don't have
+                        * that extra ref already held so we don't need the
+                        * put_dev.
+                        */
+                       if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+                                             &nbd->flags))
+                               put_dev = true;
                } else {
-                       clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
+                       if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+                                              &nbd->flags))
+                               refcount_inc(&nbd->refs);
                }
                if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
                        set_bit(NBD_RT_DISCONNECT_ON_CLOSE,
@@ -2080,15 +2088,13 @@ static int nbd_genl_reconfigure(struct sk_buff *skb, struct genl_info *info)
        if (info->attrs[NBD_ATTR_CLIENT_FLAGS]) {
                u64 flags = nla_get_u64(info->attrs[NBD_ATTR_CLIENT_FLAGS]);
                if (flags & NBD_CFLAG_DESTROY_ON_DISCONNECT) {
-                       if (!test_and_set_bit(NBD_RT_DESTROY_ON_DISCONNECT,
-                                             &config->runtime_flags))
+                       if (!test_and_set_bit(NBD_DESTROY_ON_DISCONNECT,
+                                             &nbd->flags))
                                put_dev = true;
-                       set_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                } else {
-                       if (test_and_clear_bit(NBD_RT_DESTROY_ON_DISCONNECT,
-                                              &config->runtime_flags))
+                       if (test_and_clear_bit(NBD_DESTROY_ON_DISCONNECT,
+                                              &nbd->flags))
                                refcount_inc(&nbd->refs);
-                       clear_bit(NBD_DESTROY_ON_DISCONNECT, &nbd->flags);
                }
 
                if (flags & NBD_CFLAG_DISCONNECT_ON_CLOSE) {
index d6c821d..51bfd77 100644 (file)
@@ -1369,10 +1369,13 @@ static blk_status_t null_handle_cmd(struct nullb_cmd *cmd, sector_t sector,
        }
 
        if (dev->zoned)
-               cmd->error = null_process_zoned_cmd(cmd, op,
-                                                   sector, nr_sectors);
+               sts = null_process_zoned_cmd(cmd, op, sector, nr_sectors);
        else
-               cmd->error = null_process_cmd(cmd, op, sector, nr_sectors);
+               sts = null_process_cmd(cmd, op, sector, nr_sectors);
+
+       /* Do not overwrite errors (e.g. timeout errors) */
+       if (cmd->error == BLK_STS_OK)
+               cmd->error = sts;
 
 out:
        nullb_complete_cmd(cmd);
@@ -1451,8 +1454,20 @@ static bool should_requeue_request(struct request *rq)
 
 static enum blk_eh_timer_return null_timeout_rq(struct request *rq, bool res)
 {
+       struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+
        pr_info("rq %p timed out\n", rq);
-       blk_mq_complete_request(rq);
+
+       /*
+        * If the device is marked as blocking (i.e. memory backed or zoned
+        * device), the submission path may be blocked waiting for resources
+        * and cause real timeouts. For these real timeouts, the submission
+        * path will complete the request using blk_mq_complete_request().
+        * Only fake timeouts need to execute blk_mq_complete_request() here.
+        */
+       cmd->error = BLK_STS_TIMEOUT;
+       if (cmd->fake_timeout)
+               blk_mq_complete_request(rq);
        return BLK_EH_DONE;
 }
 
@@ -1473,6 +1488,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
        cmd->rq = bd->rq;
        cmd->error = BLK_STS_OK;
        cmd->nq = nq;
+       cmd->fake_timeout = should_timeout_request(bd->rq);
 
        blk_mq_start_request(bd->rq);
 
@@ -1489,7 +1505,7 @@ static blk_status_t null_queue_rq(struct blk_mq_hw_ctx *hctx,
                        return BLK_STS_OK;
                }
        }
-       if (should_timeout_request(bd->rq))
+       if (cmd->fake_timeout)
                return BLK_STS_OK;
 
        return null_handle_cmd(cmd, sector, nr_sectors, req_op(bd->rq));
index 83504f3..4876d5a 100644 (file)
@@ -22,6 +22,7 @@ struct nullb_cmd {
        blk_status_t error;
        struct nullb_queue *nq;
        struct hrtimer timer;
+       bool fake_timeout;
 };
 
 struct nullb_queue {
index 63f5498..227e1be 100644 (file)
@@ -165,15 +165,17 @@ static ssize_t rsxx_cram_read(struct file *fp, char __user *ubuf,
 {
        struct rsxx_cardinfo *card = file_inode(fp)->i_private;
        char *buf;
-       ssize_t st;
+       int st;
 
        buf = kzalloc(cnt, GFP_KERNEL);
        if (!buf)
                return -ENOMEM;
 
        st = rsxx_creg_read(card, CREG_ADD_CRAM + (u32)*ppos, cnt, buf, 1);
-       if (!st)
-               st = copy_to_user(ubuf, buf, cnt);
+       if (!st) {
+               if (copy_to_user(ubuf, buf, cnt))
+                       st = -EFAULT;
+       }
        kfree(buf);
        if (st)
                return st;
@@ -869,6 +871,7 @@ static int rsxx_pci_probe(struct pci_dev *dev,
        card->event_wq = create_singlethread_workqueue(DRIVER_NAME"_event");
        if (!card->event_wq) {
                dev_err(CARD_TO_DEV(card), "Failed card event setup.\n");
+               st = -ENOMEM;
                goto failed_event_handler;
        }
 
index 4861669..6147977 100644 (file)
@@ -11,7 +11,6 @@
 #ifndef __RSXX_PRIV_H__
 #define __RSXX_PRIV_H__
 
-#include <linux/version.h>
 #include <linux/semaphore.h>
 
 #include <linux/fs.h>
index 982732d..664280f 100644 (file)
@@ -877,6 +877,7 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
        if (card->mm_pages[0].desc == NULL ||
            card->mm_pages[1].desc == NULL) {
                dev_printk(KERN_ERR, &card->dev->dev, "alloc failed\n");
+               ret = -ENOMEM;
                goto failed_alloc;
        }
        reset_page(&card->mm_pages[0]);
@@ -888,8 +889,10 @@ static int mm_pci_probe(struct pci_dev *dev, const struct pci_device_id *id)
        spin_lock_init(&card->lock);
 
        card->queue = blk_alloc_queue(NUMA_NO_NODE);
-       if (!card->queue)
+       if (!card->queue) {
+               ret = -ENOMEM;
                goto failed_alloc;
+       }
 
        tasklet_init(&card->tasklet, process_page, (unsigned long)card);
 
index b0285db..b9fa3ef 100644 (file)
@@ -705,6 +705,7 @@ static int virtblk_probe(struct virtio_device *vdev)
        u32 v, blk_size, max_size, sg_elems, opt_io_size;
        u16 min_io_size;
        u8 physical_block_exp, alignment_offset;
+       unsigned int queue_depth;
 
        if (!vdev->config->get) {
                dev_err(&vdev->dev, "%s failure: config access disabled\n",
@@ -756,16 +757,18 @@ static int virtblk_probe(struct virtio_device *vdev)
        }
 
        /* Default queue sizing is to fill the ring. */
-       if (!virtblk_queue_depth) {
-               virtblk_queue_depth = vblk->vqs[0].vq->num_free;
+       if (likely(!virtblk_queue_depth)) {
+               queue_depth = vblk->vqs[0].vq->num_free;
                /* ... but without indirect descs, we use 2 descs per req */
                if (!virtio_has_feature(vdev, VIRTIO_RING_F_INDIRECT_DESC))
-                       virtblk_queue_depth /= 2;
+                       queue_depth /= 2;
+       } else {
+               queue_depth = virtblk_queue_depth;
        }
 
        memset(&vblk->tag_set, 0, sizeof(vblk->tag_set));
        vblk->tag_set.ops = &virtio_mq_ops;
-       vblk->tag_set.queue_depth = virtblk_queue_depth;
+       vblk->tag_set.queue_depth = queue_depth;
        vblk->tag_set.numa_node = NUMA_NO_NODE;
        vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
        vblk->tag_set.cmd_size =
index da16121..14e4528 100644 (file)
@@ -891,7 +891,7 @@ next:
 out:
        for (i = last_map; i < num; i++) {
                /* Don't zap current batch's valid persistent grants. */
-               if(i >= last_map + segs_to_map)
+               if(i >= map_until)
                        pages[i]->persistent_gnt = NULL;
                pages[i]->handle = BLKBACK_INVALID_HANDLE;
        }
@@ -1326,9 +1326,7 @@ static int dispatch_rw_block_io(struct xen_blkif_ring *ring,
                                     pages[i]->page,
                                     seg[i].nsec << 9,
                                     seg[i].offset) == 0)) {
-
-                       int nr_iovecs = min_t(int, (nseg-i), BIO_MAX_PAGES);
-                       bio = bio_alloc(GFP_KERNEL, nr_iovecs);
+                       bio = bio_alloc(GFP_KERNEL, bio_max_segs(nseg - i));
                        if (unlikely(bio == NULL))
                                goto fail_put_bio;
 
index d701854..cf8deec 100644 (file)
@@ -627,7 +627,7 @@ static ssize_t writeback_store(struct device *dev,
        struct bio_vec bio_vec;
        struct page *page;
        ssize_t ret = len;
-       int mode;
+       int mode, err;
        unsigned long blk_idx = 0;
 
        if (sysfs_streq(buf, "idle"))
@@ -638,8 +638,8 @@ static ssize_t writeback_store(struct device *dev,
                if (strncmp(buf, PAGE_WB_SIG, sizeof(PAGE_WB_SIG) - 1))
                        return -EINVAL;
 
-               ret = kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index);
-               if (ret || index >= nr_pages)
+               if (kstrtol(buf + sizeof(PAGE_WB_SIG) - 1, 10, &index) ||
+                               index >= nr_pages)
                        return -EINVAL;
 
                nr_pages = 1;
@@ -663,7 +663,7 @@ static ssize_t writeback_store(struct device *dev,
                goto release_init_lock;
        }
 
-       while (nr_pages--) {
+       for (; nr_pages != 0; index++, nr_pages--) {
                struct bio_vec bvec;
 
                bvec.bv_page = page;
@@ -728,12 +728,17 @@ static ssize_t writeback_store(struct device *dev,
                 * XXX: A single page IO would be inefficient for write
                 * but it would be not bad as starter.
                 */
-               ret = submit_bio_wait(&bio);
-               if (ret) {
+               err = submit_bio_wait(&bio);
+               if (err) {
                        zram_slot_lock(zram, index);
                        zram_clear_flag(zram, index, ZRAM_UNDER_WB);
                        zram_clear_flag(zram, index, ZRAM_IDLE);
                        zram_slot_unlock(zram, index);
+                       /*
+                        * Return last IO error unless every IO were
+                        * not suceeded.
+                        */
+                       ret = err;
                        continue;
                }
 
@@ -1081,7 +1086,7 @@ static ssize_t mm_stat_show(struct device *dev,
                        zram->limit_pages << PAGE_SHIFT,
                        max_used << PAGE_SHIFT,
                        (u64)atomic64_read(&zram->stats.same_pages),
-                       pool_stats.pages_compacted,
+                       atomic_long_read(&pool_stats.pages_compacted),
                        (u64)atomic64_read(&zram->stats.huge_pages),
                        (u64)atomic64_read(&zram->stats.huge_pages_since));
        up_read(&zram->init_lock);
index 3951f7b..bea1595 100644 (file)
@@ -194,5 +194,4 @@ module_init(rsi_91x_bt_module_init);
 module_exit(rsi_91x_bt_module_exit);
 MODULE_AUTHOR("Redpine Signals Inc");
 MODULE_DESCRIPTION("RSI BT driver");
-MODULE_SUPPORTED_DEVICE("RSI-BT");
 MODULE_LICENSE("Dual BSD/GPL");
index 52683fd..5cbfbd9 100644 (file)
@@ -4849,8 +4849,8 @@ static int btusb_probe(struct usb_interface *intf,
                        data->diag = NULL;
        }
 
-       if (!enable_autosuspend)
-               usb_disable_autosuspend(data->udev);
+       if (enable_autosuspend)
+               usb_enable_autosuspend(data->udev);
 
        err = hci_register_dev(hdev);
        if (err < 0)
@@ -4910,9 +4910,6 @@ static void btusb_disconnect(struct usb_interface *intf)
                gpiod_put(data->reset_gpio);
 
        hci_free_dev(hdev);
-
-       if (!enable_autosuspend)
-               usb_enable_autosuspend(data->udev);
 }
 
 #ifdef CONFIG_PM
index c23c77c..b1fd559 100644 (file)
@@ -14,3 +14,10 @@ config FSL_MC_BUS
          architecture.  The fsl-mc bus driver handles discovery of
          DPAA2 objects (which are represented as Linux devices) and
          binding objects to drivers.
+
+config FSL_MC_UAPI_SUPPORT
+       bool "Management Complex (MC) userspace support"
+       depends on FSL_MC_BUS
+       help
+         Provides userspace support for interrogating, creating, destroying or
+         configuring DPAA2 objects exported by the Management Complex.
index 3c518c7..4ae292a 100644 (file)
@@ -16,3 +16,6 @@ mc-bus-driver-objs := fsl-mc-bus.o \
                      fsl-mc-allocator.o \
                      fsl-mc-msi.o \
                      dpmcp.o
+
+# MC userspace support
+obj-$(CONFIG_FSL_MC_UAPI_SUPPORT) += fsl-mc-uapi.o
index 68488a7..e3e2ae4 100644 (file)
@@ -237,8 +237,8 @@ static void dprc_add_new_devices(struct fsl_mc_device *mc_bus_dev,
  * populated before they can get allocation requests from probe callbacks
  * of the device drivers for the non-allocatable devices.
  */
-static int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev,
-                           bool alloc_interrupts)
+int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev,
+                     bool alloc_interrupts)
 {
        int num_child_objects;
        int dprc_get_obj_failures;
@@ -458,8 +458,9 @@ out:
 /*
  * Disable and clear interrupt for a given DPRC object
  */
-static int disable_dprc_irq(struct fsl_mc_device *mc_dev)
+int disable_dprc_irq(struct fsl_mc_device *mc_dev)
 {
+       struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
        int error;
        struct fsl_mc_io *mc_io = mc_dev->mc_io;
 
@@ -496,9 +497,18 @@ static int disable_dprc_irq(struct fsl_mc_device *mc_dev)
                return error;
        }
 
+       mc_bus->irq_enabled = 0;
+
        return 0;
 }
 
+int get_dprc_irq_state(struct fsl_mc_device *mc_dev)
+{
+       struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
+
+       return mc_bus->irq_enabled;
+}
+
 static int register_dprc_irq_handler(struct fsl_mc_device *mc_dev)
 {
        int error;
@@ -525,8 +535,9 @@ static int register_dprc_irq_handler(struct fsl_mc_device *mc_dev)
        return 0;
 }
 
-static int enable_dprc_irq(struct fsl_mc_device *mc_dev)
+int enable_dprc_irq(struct fsl_mc_device *mc_dev)
 {
+       struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
        int error;
 
        /*
@@ -554,6 +565,8 @@ static int enable_dprc_irq(struct fsl_mc_device *mc_dev)
                return error;
        }
 
+       mc_bus->irq_enabled = 1;
+
        return 0;
 }
 
@@ -603,6 +616,7 @@ int dprc_setup(struct fsl_mc_device *mc_dev)
        struct irq_domain *mc_msi_domain;
        bool mc_io_created = false;
        bool msi_domain_set = false;
+       bool uapi_created = false;
        u16 major_ver, minor_ver;
        size_t region_size;
        int error;
@@ -635,6 +649,11 @@ int dprc_setup(struct fsl_mc_device *mc_dev)
                        return error;
 
                mc_io_created = true;
+       } else {
+               error = fsl_mc_uapi_create_device_file(mc_bus);
+               if (error < 0)
+                       return -EPROBE_DEFER;
+               uapi_created = true;
        }
 
        mc_msi_domain = fsl_mc_find_msi_domain(&mc_dev->dev);
@@ -692,6 +711,9 @@ error_cleanup_msi_domain:
                mc_dev->mc_io = NULL;
        }
 
+       if (uapi_created)
+               fsl_mc_uapi_remove_device_file(mc_bus);
+
        return error;
 }
 EXPORT_SYMBOL_GPL(dprc_setup);
@@ -763,6 +785,7 @@ static void dprc_teardown_irq(struct fsl_mc_device *mc_dev)
 
 int dprc_cleanup(struct fsl_mc_device *mc_dev)
 {
+       struct fsl_mc_bus *mc_bus = to_fsl_mc_bus(mc_dev);
        int error;
 
        /* this function should be called only for DPRCs, it
@@ -793,6 +816,8 @@ int dprc_cleanup(struct fsl_mc_device *mc_dev)
        if (!fsl_mc_is_root_dprc(&mc_dev->dev)) {
                fsl_destroy_mc_io(mc_dev->mc_io);
                mc_dev->mc_io = NULL;
+       } else {
+               fsl_mc_uapi_remove_device_file(mc_bus);
        }
 
        return 0;
index 8af978b..380ad1f 100644 (file)
@@ -41,7 +41,7 @@ struct fsl_mc {
        struct fsl_mc_device *root_mc_bus_dev;
        u8 num_translation_ranges;
        struct fsl_mc_addr_translation_range *translation_ranges;
-       void *fsl_mc_regs;
+       void __iomem *fsl_mc_regs;
 };
 
 /**
@@ -208,12 +208,108 @@ static struct attribute *fsl_mc_dev_attrs[] = {
 
 ATTRIBUTE_GROUPS(fsl_mc_dev);
 
+static int scan_fsl_mc_bus(struct device *dev, void *data)
+{
+       struct fsl_mc_device *root_mc_dev;
+       struct fsl_mc_bus *root_mc_bus;
+
+       if (!fsl_mc_is_root_dprc(dev))
+               goto exit;
+
+       root_mc_dev = to_fsl_mc_device(dev);
+       root_mc_bus = to_fsl_mc_bus(root_mc_dev);
+       mutex_lock(&root_mc_bus->scan_mutex);
+       dprc_scan_objects(root_mc_dev, NULL);
+       mutex_unlock(&root_mc_bus->scan_mutex);
+
+exit:
+       return 0;
+}
+
+static ssize_t rescan_store(struct bus_type *bus,
+                           const char *buf, size_t count)
+{
+       unsigned long val;
+
+       if (kstrtoul(buf, 0, &val) < 0)
+               return -EINVAL;
+
+       if (val)
+               bus_for_each_dev(bus, NULL, NULL, scan_fsl_mc_bus);
+
+       return count;
+}
+static BUS_ATTR_WO(rescan);
+
+static int fsl_mc_bus_set_autorescan(struct device *dev, void *data)
+{
+       struct fsl_mc_device *root_mc_dev;
+       unsigned long val;
+       char *buf = data;
+
+       if (!fsl_mc_is_root_dprc(dev))
+               goto exit;
+
+       root_mc_dev = to_fsl_mc_device(dev);
+
+       if (kstrtoul(buf, 0, &val) < 0)
+               return -EINVAL;
+
+       if (val)
+               enable_dprc_irq(root_mc_dev);
+       else
+               disable_dprc_irq(root_mc_dev);
+
+exit:
+       return 0;
+}
+
+static int fsl_mc_bus_get_autorescan(struct device *dev, void *data)
+{
+       struct fsl_mc_device *root_mc_dev;
+       char *buf = data;
+
+       if (!fsl_mc_is_root_dprc(dev))
+               goto exit;
+
+       root_mc_dev = to_fsl_mc_device(dev);
+
+       sprintf(buf, "%d\n", get_dprc_irq_state(root_mc_dev));
+exit:
+       return 0;
+}
+
+static ssize_t autorescan_store(struct bus_type *bus,
+                               const char *buf, size_t count)
+{
+       bus_for_each_dev(bus, NULL, (void *)buf, fsl_mc_bus_set_autorescan);
+
+       return count;
+}
+
+static ssize_t autorescan_show(struct bus_type *bus, char *buf)
+{
+       bus_for_each_dev(bus, NULL, (void *)buf, fsl_mc_bus_get_autorescan);
+       return strlen(buf);
+}
+
+static BUS_ATTR_RW(autorescan);
+
+static struct attribute *fsl_mc_bus_attrs[] = {
+       &bus_attr_rescan.attr,
+       &bus_attr_autorescan.attr,
+       NULL,
+};
+
+ATTRIBUTE_GROUPS(fsl_mc_bus);
+
 struct bus_type fsl_mc_bus_type = {
        .name = "fsl-mc",
        .match = fsl_mc_bus_match,
        .uevent = fsl_mc_bus_uevent,
        .dma_configure  = fsl_mc_dma_configure,
        .dev_groups = fsl_mc_dev_groups,
+       .bus_groups = fsl_mc_bus_groups,
 };
 EXPORT_SYMBOL_GPL(fsl_mc_bus_type);
 
@@ -292,6 +388,11 @@ struct device_type fsl_mc_bus_dpdmai_type = {
 };
 EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdmai_type);
 
+struct device_type fsl_mc_bus_dpdbg_type = {
+       .name = "fsl_mc_bus_dpdbg"
+};
+EXPORT_SYMBOL_GPL(fsl_mc_bus_dpdbg_type);
+
 static struct device_type *fsl_mc_get_device_type(const char *type)
 {
        static const struct {
@@ -313,6 +414,7 @@ static struct device_type *fsl_mc_get_device_type(const char *type)
                { &fsl_mc_bus_dpaiop_type, "dpaiop" },
                { &fsl_mc_bus_dpci_type, "dpci" },
                { &fsl_mc_bus_dpdmai_type, "dpdmai" },
+               { &fsl_mc_bus_dpdbg_type, "dpdbg" },
                { NULL, NULL }
        };
        int i;
index c932387..1958fa0 100644 (file)
@@ -10,6 +10,8 @@
 
 #include <linux/fsl/mc.h>
 #include <linux/mutex.h>
+#include <linux/ioctl.h>
+#include <linux/miscdevice.h>
 
 /*
  * Data Path Management Complex (DPMNG) General API
@@ -543,6 +545,22 @@ struct fsl_mc_resource_pool {
 };
 
 /**
+ * struct fsl_mc_uapi - information associated with a device file
+ * @misc: struct miscdevice linked to the root dprc
+ * @device: newly created device in /dev
+ * @mutex: mutex lock to serialize the open/release operations
+ * @local_instance_in_use: local MC I/O instance in use or not
+ * @static_mc_io: pointer to the static MC I/O object
+ */
+struct fsl_mc_uapi {
+       struct miscdevice misc;
+       struct device *device;
+       struct mutex mutex; /* serialize open/release operations */
+       u32 local_instance_in_use;
+       struct fsl_mc_io *static_mc_io;
+};
+
+/**
  * struct fsl_mc_bus - logical bus that corresponds to a physical DPRC
  * @mc_dev: fsl-mc device for the bus device itself.
  * @resource_pools: array of resource pools (one pool per resource type)
@@ -551,6 +569,7 @@ struct fsl_mc_resource_pool {
  * @irq_resources: Pointer to array of IRQ objects for the IRQ pool
  * @scan_mutex: Serializes bus scanning
  * @dprc_attr: DPRC attributes
+ * @uapi_misc: struct that abstracts the interaction with userspace
  */
 struct fsl_mc_bus {
        struct fsl_mc_device mc_dev;
@@ -558,6 +577,8 @@ struct fsl_mc_bus {
        struct fsl_mc_device_irq *irq_resources;
        struct mutex scan_mutex;    /* serializes bus scanning */
        struct dprc_attributes dprc_attr;
+       struct fsl_mc_uapi uapi_misc;
+       int irq_enabled;
 };
 
 #define to_fsl_mc_bus(_mc_dev) \
@@ -574,6 +595,9 @@ int __init dprc_driver_init(void);
 
 void dprc_driver_exit(void);
 
+int dprc_scan_objects(struct fsl_mc_device *mc_bus_dev,
+                     bool alloc_interrupts);
+
 int __init fsl_mc_allocator_driver_init(void);
 
 void fsl_mc_allocator_driver_exit(void);
@@ -612,4 +636,29 @@ void fsl_mc_get_root_dprc(struct device *dev,
 struct fsl_mc_device *fsl_mc_device_lookup(struct fsl_mc_obj_desc *obj_desc,
                                           struct fsl_mc_device *mc_bus_dev);
 
+u16 mc_cmd_hdr_read_cmdid(struct fsl_mc_command *cmd);
+
+#ifdef CONFIG_FSL_MC_UAPI_SUPPORT
+
+int fsl_mc_uapi_create_device_file(struct fsl_mc_bus *mc_bus);
+
+void fsl_mc_uapi_remove_device_file(struct fsl_mc_bus *mc_bus);
+
+#else
+
+static inline int fsl_mc_uapi_create_device_file(struct fsl_mc_bus *mc_bus)
+{
+       return 0;
+}
+
+static inline void fsl_mc_uapi_remove_device_file(struct fsl_mc_bus *mc_bus)
+{
+}
+
+#endif
+
+int disable_dprc_irq(struct fsl_mc_device *mc_dev);
+int enable_dprc_irq(struct fsl_mc_device *mc_dev);
+int get_dprc_irq_state(struct fsl_mc_device *mc_dev);
+
 #endif /* _FSL_MC_PRIVATE_H_ */
diff --git a/drivers/bus/fsl-mc/fsl-mc-uapi.c b/drivers/bus/fsl-mc/fsl-mc-uapi.c
new file mode 100644 (file)
index 0000000..9c4c139
--- /dev/null
@@ -0,0 +1,597 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Management Complex (MC) userspace support
+ *
+ * Copyright 2021 NXP
+ *
+ */
+
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/uaccess.h>
+#include <linux/miscdevice.h>
+
+#include "fsl-mc-private.h"
+
+struct uapi_priv_data {
+       struct fsl_mc_uapi *uapi;
+       struct fsl_mc_io *mc_io;
+};
+
+struct fsl_mc_cmd_desc {
+       u16 cmdid_value;
+       u16 cmdid_mask;
+       int size;
+       bool token;
+       int flags;
+};
+
+#define FSL_MC_CHECK_MODULE_ID         BIT(0)
+#define FSL_MC_CAP_NET_ADMIN_NEEDED    BIT(1)
+
+enum fsl_mc_cmd_index {
+       DPDBG_DUMP = 0,
+       DPDBG_SET,
+       DPRC_GET_CONTAINER_ID,
+       DPRC_CREATE_CONT,
+       DPRC_DESTROY_CONT,
+       DPRC_ASSIGN,
+       DPRC_UNASSIGN,
+       DPRC_GET_OBJ_COUNT,
+       DPRC_GET_OBJ,
+       DPRC_GET_RES_COUNT,
+       DPRC_GET_RES_IDS,
+       DPRC_SET_OBJ_LABEL,
+       DPRC_SET_LOCKED,
+       DPRC_CONNECT,
+       DPRC_DISCONNECT,
+       DPRC_GET_POOL,
+       DPRC_GET_POOL_COUNT,
+       DPRC_GET_CONNECTION,
+       DPCI_GET_LINK_STATE,
+       DPCI_GET_PEER_ATTR,
+       DPAIOP_GET_SL_VERSION,
+       DPAIOP_GET_STATE,
+       DPMNG_GET_VERSION,
+       DPSECI_GET_TX_QUEUE,
+       DPMAC_GET_COUNTER,
+       DPMAC_GET_MAC_ADDR,
+       DPNI_SET_PRIM_MAC,
+       DPNI_GET_PRIM_MAC,
+       DPNI_GET_STATISTICS,
+       DPNI_GET_LINK_STATE,
+       DPNI_GET_MAX_FRAME_LENGTH,
+       DPSW_GET_TAILDROP,
+       DPSW_SET_TAILDROP,
+       DPSW_IF_GET_COUNTER,
+       DPSW_IF_GET_MAX_FRAME_LENGTH,
+       DPDMUX_GET_COUNTER,
+       DPDMUX_IF_GET_MAX_FRAME_LENGTH,
+       GET_ATTR,
+       GET_IRQ_MASK,
+       GET_IRQ_STATUS,
+       CLOSE,
+       OPEN,
+       GET_API_VERSION,
+       DESTROY,
+       CREATE,
+};
+
+static struct fsl_mc_cmd_desc fsl_mc_accepted_cmds[] = {
+       [DPDBG_DUMP] = {
+               .cmdid_value = 0x1300,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 28,
+       },
+       [DPDBG_SET] = {
+               .cmdid_value = 0x1400,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 28,
+       },
+       [DPRC_GET_CONTAINER_ID] = {
+               .cmdid_value = 0x8300,
+               .cmdid_mask = 0xFFF0,
+               .token = false,
+               .size = 8,
+       },
+       [DPRC_CREATE_CONT] = {
+               .cmdid_value = 0x1510,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 40,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_DESTROY_CONT] = {
+               .cmdid_value = 0x1520,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 12,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_ASSIGN] = {
+               .cmdid_value = 0x1570,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 40,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_UNASSIGN] = {
+               .cmdid_value = 0x1580,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 40,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_GET_OBJ_COUNT] = {
+               .cmdid_value = 0x1590,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 16,
+       },
+       [DPRC_GET_OBJ] = {
+               .cmdid_value = 0x15A0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 12,
+       },
+       [DPRC_GET_RES_COUNT] = {
+               .cmdid_value = 0x15B0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 32,
+       },
+       [DPRC_GET_RES_IDS] = {
+               .cmdid_value = 0x15C0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 40,
+       },
+       [DPRC_SET_OBJ_LABEL] = {
+               .cmdid_value = 0x1610,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 48,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_SET_LOCKED] = {
+               .cmdid_value = 0x16B0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 16,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_CONNECT] = {
+               .cmdid_value = 0x1670,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 56,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_DISCONNECT] = {
+               .cmdid_value = 0x1680,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 32,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPRC_GET_POOL] = {
+               .cmdid_value = 0x1690,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 12,
+       },
+       [DPRC_GET_POOL_COUNT] = {
+               .cmdid_value = 0x16A0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPRC_GET_CONNECTION] = {
+               .cmdid_value = 0x16C0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 32,
+       },
+
+       [DPCI_GET_LINK_STATE] = {
+               .cmdid_value = 0x0E10,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPCI_GET_PEER_ATTR] = {
+               .cmdid_value = 0x0E20,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPAIOP_GET_SL_VERSION] = {
+               .cmdid_value = 0x2820,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPAIOP_GET_STATE] = {
+               .cmdid_value = 0x2830,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPMNG_GET_VERSION] = {
+               .cmdid_value = 0x8310,
+               .cmdid_mask = 0xFFF0,
+               .token = false,
+               .size = 8,
+       },
+       [DPSECI_GET_TX_QUEUE] = {
+               .cmdid_value = 0x1970,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 14,
+       },
+       [DPMAC_GET_COUNTER] = {
+               .cmdid_value = 0x0c40,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 9,
+       },
+       [DPMAC_GET_MAC_ADDR] = {
+               .cmdid_value = 0x0c50,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPNI_SET_PRIM_MAC] = {
+               .cmdid_value = 0x2240,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 16,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPNI_GET_PRIM_MAC] = {
+               .cmdid_value = 0x2250,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPNI_GET_STATISTICS] = {
+               .cmdid_value = 0x25D0,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 10,
+       },
+       [DPNI_GET_LINK_STATE] = {
+               .cmdid_value = 0x2150,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPNI_GET_MAX_FRAME_LENGTH] = {
+               .cmdid_value = 0x2170,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [DPSW_GET_TAILDROP] = {
+               .cmdid_value = 0x0A80,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 14,
+       },
+       [DPSW_SET_TAILDROP] = {
+               .cmdid_value = 0x0A90,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 24,
+               .flags = FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [DPSW_IF_GET_COUNTER] = {
+               .cmdid_value = 0x0340,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 11,
+       },
+       [DPSW_IF_GET_MAX_FRAME_LENGTH] = {
+               .cmdid_value = 0x0450,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 10,
+       },
+       [DPDMUX_GET_COUNTER] = {
+               .cmdid_value = 0x0b20,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 11,
+       },
+       [DPDMUX_IF_GET_MAX_FRAME_LENGTH] = {
+               .cmdid_value = 0x0a20,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 10,
+       },
+       [GET_ATTR] = {
+               .cmdid_value = 0x0040,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+       [GET_IRQ_MASK] = {
+               .cmdid_value = 0x0150,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 13,
+       },
+       [GET_IRQ_STATUS] = {
+               .cmdid_value = 0x0160,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 13,
+       },
+       [CLOSE] = {
+               .cmdid_value = 0x8000,
+               .cmdid_mask = 0xFFF0,
+               .token = true,
+               .size = 8,
+       },
+
+       /* Common commands amongst all types of objects. Must be checked last. */
+       [OPEN] = {
+               .cmdid_value = 0x8000,
+               .cmdid_mask = 0xFC00,
+               .token = false,
+               .size = 12,
+               .flags = FSL_MC_CHECK_MODULE_ID,
+       },
+       [GET_API_VERSION] = {
+               .cmdid_value = 0xA000,
+               .cmdid_mask = 0xFC00,
+               .token = false,
+               .size = 8,
+               .flags = FSL_MC_CHECK_MODULE_ID,
+       },
+       [DESTROY] = {
+               .cmdid_value = 0x9800,
+               .cmdid_mask = 0xFC00,
+               .token = true,
+               .size = 12,
+               .flags = FSL_MC_CHECK_MODULE_ID | FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+       [CREATE] = {
+               .cmdid_value = 0x9000,
+               .cmdid_mask = 0xFC00,
+               .token = true,
+               .size = 64,
+               .flags = FSL_MC_CHECK_MODULE_ID | FSL_MC_CAP_NET_ADMIN_NEEDED,
+       },
+};
+
+#define FSL_MC_NUM_ACCEPTED_CMDS ARRAY_SIZE(fsl_mc_accepted_cmds)
+
+#define FSL_MC_MAX_MODULE_ID 0x10
+
+static int fsl_mc_command_check(struct fsl_mc_device *mc_dev,
+                               struct fsl_mc_command *mc_cmd)
+{
+       struct fsl_mc_cmd_desc *desc = NULL;
+       int mc_cmd_max_size, i;
+       bool token_provided;
+       u16 cmdid, module_id;
+       char *mc_cmd_end;
+       char sum = 0;
+
+       /* Check if this is an accepted MC command */
+       cmdid = mc_cmd_hdr_read_cmdid(mc_cmd);
+       for (i = 0; i < FSL_MC_NUM_ACCEPTED_CMDS; i++) {
+               desc = &fsl_mc_accepted_cmds[i];
+               if ((cmdid & desc->cmdid_mask) == desc->cmdid_value)
+                       break;
+       }
+       if (i == FSL_MC_NUM_ACCEPTED_CMDS) {
+               dev_err(&mc_dev->dev, "MC command 0x%04x: cmdid not accepted\n", cmdid);
+               return -EACCES;
+       }
+
+       /* Check if the size of the command is honored. Anything beyond the
+        * last valid byte of the command should be zeroed.
+        */
+       mc_cmd_max_size = sizeof(*mc_cmd);
+       mc_cmd_end = ((char *)mc_cmd) + desc->size;
+       for (i = desc->size; i < mc_cmd_max_size; i++)
+               sum |= *mc_cmd_end++;
+       if (sum) {
+               dev_err(&mc_dev->dev, "MC command 0x%04x: garbage beyond max size of %d bytes!\n",
+                       cmdid, desc->size);
+               return -EACCES;
+       }
+
+       /* Some MC commands request a token to be passed so that object
+        * identification is possible. Check if the token passed in the command
+        * is as expected.
+        */
+       token_provided = mc_cmd_hdr_read_token(mc_cmd) ? true : false;
+       if (token_provided != desc->token) {
+               dev_err(&mc_dev->dev, "MC command 0x%04x: token 0x%04x is invalid!\n",
+                       cmdid, mc_cmd_hdr_read_token(mc_cmd));
+               return -EACCES;
+       }
+
+       /* If needed, check if the module ID passed is valid */
+       if (desc->flags & FSL_MC_CHECK_MODULE_ID) {
+               /* The module ID is represented by bits [4:9] from the cmdid */
+               module_id = (cmdid & GENMASK(9, 4)) >> 4;
+               if (module_id == 0 || module_id > FSL_MC_MAX_MODULE_ID) {
+                       dev_err(&mc_dev->dev, "MC command 0x%04x: unknown module ID 0x%x\n",
+                               cmdid, module_id);
+                       return -EACCES;
+               }
+       }
+
+       /* Some commands alter how hardware resources are managed. For these
+        * commands, check for CAP_NET_ADMIN.
+        */
+       if (desc->flags & FSL_MC_CAP_NET_ADMIN_NEEDED) {
+               if (!capable(CAP_NET_ADMIN)) {
+                       dev_err(&mc_dev->dev, "MC command 0x%04x: needs CAP_NET_ADMIN!\n",
+                               cmdid);
+                       return -EPERM;
+               }
+       }
+
+       return 0;
+}
+
+static int fsl_mc_uapi_send_command(struct fsl_mc_device *mc_dev, unsigned long arg,
+                                   struct fsl_mc_io *mc_io)
+{
+       struct fsl_mc_command mc_cmd;
+       int error;
+
+       error = copy_from_user(&mc_cmd, (void __user *)arg, sizeof(mc_cmd));
+       if (error)
+               return -EFAULT;
+
+       error = fsl_mc_command_check(mc_dev, &mc_cmd);
+       if (error)
+               return error;
+
+       error = mc_send_command(mc_io, &mc_cmd);
+       if (error)
+               return error;
+
+       error = copy_to_user((void __user *)arg, &mc_cmd, sizeof(mc_cmd));
+       if (error)
+               return -EFAULT;
+
+       return 0;
+}
+
+static int fsl_mc_uapi_dev_open(struct inode *inode, struct file *filep)
+{
+       struct fsl_mc_device *root_mc_device;
+       struct uapi_priv_data *priv_data;
+       struct fsl_mc_io *dynamic_mc_io;
+       struct fsl_mc_uapi *mc_uapi;
+       struct fsl_mc_bus *mc_bus;
+       int error;
+
+       priv_data = kzalloc(sizeof(*priv_data), GFP_KERNEL);
+       if (!priv_data)
+               return -ENOMEM;
+
+       mc_uapi = container_of(filep->private_data, struct fsl_mc_uapi, misc);
+       mc_bus = container_of(mc_uapi, struct fsl_mc_bus, uapi_misc);
+       root_mc_device = &mc_bus->mc_dev;
+
+       mutex_lock(&mc_uapi->mutex);
+
+       if (!mc_uapi->local_instance_in_use) {
+               priv_data->mc_io = mc_uapi->static_mc_io;
+               mc_uapi->local_instance_in_use = 1;
+       } else {
+               error = fsl_mc_portal_allocate(root_mc_device, 0,
+                                              &dynamic_mc_io);
+               if (error) {
+                       dev_dbg(&root_mc_device->dev,
+                               "Could not allocate MC portal\n");
+                       goto error_portal_allocate;
+               }
+
+               priv_data->mc_io = dynamic_mc_io;
+       }
+       priv_data->uapi = mc_uapi;
+       filep->private_data = priv_data;
+
+       mutex_unlock(&mc_uapi->mutex);
+
+       return 0;
+
+error_portal_allocate:
+       mutex_unlock(&mc_uapi->mutex);
+       kfree(priv_data);
+
+       return error;
+}
+
+static int fsl_mc_uapi_dev_release(struct inode *inode, struct file *filep)
+{
+       struct uapi_priv_data *priv_data;
+       struct fsl_mc_uapi *mc_uapi;
+       struct fsl_mc_io *mc_io;
+
+       priv_data = filep->private_data;
+       mc_uapi = priv_data->uapi;
+       mc_io = priv_data->mc_io;
+
+       mutex_lock(&mc_uapi->mutex);
+
+       if (mc_io == mc_uapi->static_mc_io)
+               mc_uapi->local_instance_in_use = 0;
+       else
+               fsl_mc_portal_free(mc_io);
+
+       kfree(filep->private_data);
+       filep->private_data =  NULL;
+
+       mutex_unlock(&mc_uapi->mutex);
+
+       return 0;
+}
+
+static long fsl_mc_uapi_dev_ioctl(struct file *file,
+                                 unsigned int cmd,
+                                 unsigned long arg)
+{
+       struct uapi_priv_data *priv_data = file->private_data;
+       struct fsl_mc_device *root_mc_device;
+       struct fsl_mc_bus *mc_bus;
+       int error;
+
+       mc_bus = container_of(priv_data->uapi, struct fsl_mc_bus, uapi_misc);
+       root_mc_device = &mc_bus->mc_dev;
+
+       switch (cmd) {
+       case FSL_MC_SEND_MC_COMMAND:
+               error = fsl_mc_uapi_send_command(root_mc_device, arg, priv_data->mc_io);
+               break;
+       default:
+               dev_dbg(&root_mc_device->dev, "unexpected ioctl call number\n");
+               error = -EINVAL;
+       }
+
+       return error;
+}
+
+static const struct file_operations fsl_mc_uapi_dev_fops = {
+       .owner = THIS_MODULE,
+       .open = fsl_mc_uapi_dev_open,
+       .release = fsl_mc_uapi_dev_release,
+       .unlocked_ioctl = fsl_mc_uapi_dev_ioctl,
+};
+
+int fsl_mc_uapi_create_device_file(struct fsl_mc_bus *mc_bus)
+{
+       struct fsl_mc_device *mc_dev = &mc_bus->mc_dev;
+       struct fsl_mc_uapi *mc_uapi = &mc_bus->uapi_misc;
+       int error;
+
+       mc_uapi->misc.minor = MISC_DYNAMIC_MINOR;
+       mc_uapi->misc.name = dev_name(&mc_dev->dev);
+       mc_uapi->misc.fops = &fsl_mc_uapi_dev_fops;
+
+       error = misc_register(&mc_uapi->misc);
+       if (error)
+               return error;
+
+       mc_uapi->static_mc_io = mc_bus->mc_dev.mc_io;
+
+       mutex_init(&mc_uapi->mutex);
+
+       return 0;
+}
+
+void fsl_mc_uapi_remove_device_file(struct fsl_mc_bus *mc_bus)
+{
+       misc_deregister(&mc_bus->uapi_misc.misc);
+}
index 85a0225..b291b35 100644 (file)
@@ -35,7 +35,7 @@ static enum mc_cmd_status mc_cmd_hdr_read_status(struct fsl_mc_command *cmd)
        return (enum mc_cmd_status)hdr->status;
 }
 
-static u16 mc_cmd_hdr_read_cmdid(struct fsl_mc_command *cmd)
+u16 mc_cmd_hdr_read_cmdid(struct fsl_mc_command *cmd)
 {
        struct mc_cmd_header *hdr = (struct mc_cmd_header *)&cmd->header;
        u16 cmd_id = le16_to_cpu(hdr->cmd_id);
index f0697f4..be4eebb 100644 (file)
@@ -151,12 +151,17 @@ int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl)
 {
        struct mhi_event *mhi_event = mhi_cntrl->mhi_event;
        struct device *dev = &mhi_cntrl->mhi_dev->dev;
+       unsigned long irq_flags = IRQF_SHARED | IRQF_NO_SUSPEND;
        int i, ret;
 
+       /* if controller driver has set irq_flags, use it */
+       if (mhi_cntrl->irq_flags)
+               irq_flags = mhi_cntrl->irq_flags;
+
        /* Setup BHI_INTVEC IRQ */
        ret = request_threaded_irq(mhi_cntrl->irq[0], mhi_intvec_handler,
                                   mhi_intvec_threaded_handler,
-                                  IRQF_SHARED | IRQF_NO_SUSPEND,
+                                  irq_flags,
                                   "bhi", mhi_cntrl);
        if (ret)
                return ret;
@@ -174,7 +179,7 @@ int mhi_init_irq_setup(struct mhi_controller *mhi_cntrl)
 
                ret = request_irq(mhi_cntrl->irq[mhi_event->irq],
                                  mhi_irq_handler,
-                                 IRQF_SHARED | IRQF_NO_SUSPEND,
+                                 irq_flags,
                                  "mhi", mhi_event);
                if (ret) {
                        dev_err(dev, "Error requesting irq:%d for ev:%d\n",
@@ -552,6 +557,9 @@ void mhi_deinit_chan_ctxt(struct mhi_controller *mhi_cntrl,
        tre_ring = &mhi_chan->tre_ring;
        chan_ctxt = &mhi_cntrl->mhi_ctxt->chan_ctxt[mhi_chan->chan];
 
+       if (!chan_ctxt->rbase) /* Already uninitialized */
+               return;
+
        mhi_free_coherent(mhi_cntrl, tre_ring->alloc_size,
                          tre_ring->pre_aligned, tre_ring->dma_handle);
        vfree(buf_ring->base);
index 1202433..4e0131b 100644 (file)
@@ -111,7 +111,14 @@ void mhi_ring_chan_db(struct mhi_controller *mhi_cntrl,
        dma_addr_t db;
 
        db = ring->iommu_base + (ring->wp - ring->base);
+
+       /*
+        * Writes to the new ring element must be visible to the hardware
+        * before letting h/w know there is new element to fetch.
+        */
+       dma_wmb();
        *ring->ctxt_wp = db;
+
        mhi_chan->db_cfg.process_db(mhi_cntrl, &mhi_chan->db_cfg,
                                    ring->db_addr, db);
 }
@@ -135,6 +142,19 @@ enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl)
 }
 EXPORT_SYMBOL_GPL(mhi_get_mhi_state);
 
+void mhi_soc_reset(struct mhi_controller *mhi_cntrl)
+{
+       if (mhi_cntrl->reset) {
+               mhi_cntrl->reset(mhi_cntrl);
+               return;
+       }
+
+       /* Generic MHI SoC reset */
+       mhi_write_reg(mhi_cntrl, mhi_cntrl->regs, MHI_SOC_RESET_REQ_OFFSET,
+                     MHI_SOC_RESET_REQ);
+}
+EXPORT_SYMBOL_GPL(mhi_soc_reset);
+
 int mhi_map_single_no_bb(struct mhi_controller *mhi_cntrl,
                         struct mhi_buf_info *buf_info)
 {
@@ -959,118 +979,88 @@ static bool mhi_is_ring_full(struct mhi_controller *mhi_cntrl,
        return (tmp == ring->rp);
 }
 
-int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir,
-                 struct sk_buff *skb, size_t len, enum mhi_flags mflags)
+static int mhi_queue(struct mhi_device *mhi_dev, struct mhi_buf_info *buf_info,
+                    enum dma_data_direction dir, enum mhi_flags mflags)
 {
        struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
        struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
                                                             mhi_dev->dl_chan;
        struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
-       struct mhi_buf_info buf_info = { };
+       unsigned long flags;
        int ret;
 
-       /* If MHI host pre-allocates buffers then client drivers cannot queue */
-       if (mhi_chan->pre_alloc)
-               return -EINVAL;
+       if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
+               return -EIO;
 
-       if (mhi_is_ring_full(mhi_cntrl, tre_ring))
-               return -ENOMEM;
+       read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
 
-       read_lock_bh(&mhi_cntrl->pm_lock);
-       if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) {
-               read_unlock_bh(&mhi_cntrl->pm_lock);
-               return -EIO;
+       ret = mhi_is_ring_full(mhi_cntrl, tre_ring);
+       if (unlikely(ret)) {
+               ret = -ENOMEM;
+               goto exit_unlock;
        }
 
-       /* we're in M3 or transitioning to M3 */
+       ret = mhi_gen_tre(mhi_cntrl, mhi_chan, buf_info, mflags);
+       if (unlikely(ret))
+               goto exit_unlock;
+
+       /* trigger M3 exit if necessary */
        if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
                mhi_trigger_resume(mhi_cntrl);
 
-       /* Toggle wake to exit out of M2 */
+       /* Assert dev_wake (to exit/prevent M1/M2)*/
        mhi_cntrl->wake_toggle(mhi_cntrl);
 
-       buf_info.v_addr = skb->data;
-       buf_info.cb_buf = skb;
-       buf_info.len = len;
-
-       ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &buf_info, mflags);
-       if (unlikely(ret)) {
-               read_unlock_bh(&mhi_cntrl->pm_lock);
-               return ret;
-       }
-
        if (mhi_chan->dir == DMA_TO_DEVICE)
                atomic_inc(&mhi_cntrl->pending_pkts);
 
-       if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl))) {
-               read_lock_bh(&mhi_chan->lock);
-               mhi_ring_chan_db(mhi_cntrl, mhi_chan);
-               read_unlock_bh(&mhi_chan->lock);
+       if (unlikely(!MHI_DB_ACCESS_VALID(mhi_cntrl))) {
+               ret = -EIO;
+               goto exit_unlock;
        }
 
-       read_unlock_bh(&mhi_cntrl->pm_lock);
+       mhi_ring_chan_db(mhi_cntrl, mhi_chan);
 
-       return 0;
+exit_unlock:
+       read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
+
+       return ret;
 }
-EXPORT_SYMBOL_GPL(mhi_queue_skb);
 
-int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
-                 struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags)
+int mhi_queue_skb(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+                 struct sk_buff *skb, size_t len, enum mhi_flags mflags)
 {
-       struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
        struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
                                                             mhi_dev->dl_chan;
-       struct device *dev = &mhi_cntrl->mhi_dev->dev;
-       struct mhi_ring *tre_ring = &mhi_chan->tre_ring;
        struct mhi_buf_info buf_info = { };
-       int ret;
-
-       /* If MHI host pre-allocates buffers then client drivers cannot queue */
-       if (mhi_chan->pre_alloc)
-               return -EINVAL;
 
-       if (mhi_is_ring_full(mhi_cntrl, tre_ring))
-               return -ENOMEM;
-
-       read_lock_bh(&mhi_cntrl->pm_lock);
-       if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state))) {
-               dev_err(dev, "MHI is not in activate state, PM state: %s\n",
-                       to_mhi_pm_state_str(mhi_cntrl->pm_state));
-               read_unlock_bh(&mhi_cntrl->pm_lock);
+       buf_info.v_addr = skb->data;
+       buf_info.cb_buf = skb;
+       buf_info.len = len;
 
-               return -EIO;
-       }
+       if (unlikely(mhi_chan->pre_alloc))
+               return -EINVAL;
 
-       /* we're in M3 or transitioning to M3 */
-       if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
-               mhi_trigger_resume(mhi_cntrl);
+       return mhi_queue(mhi_dev, &buf_info, dir, mflags);
+}
+EXPORT_SYMBOL_GPL(mhi_queue_skb);
 
-       /* Toggle wake to exit out of M2 */
-       mhi_cntrl->wake_toggle(mhi_cntrl);
+int mhi_queue_dma(struct mhi_device *mhi_dev, enum dma_data_direction dir,
+                 struct mhi_buf *mhi_buf, size_t len, enum mhi_flags mflags)
+{
+       struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
+                                                            mhi_dev->dl_chan;
+       struct mhi_buf_info buf_info = { };
 
        buf_info.p_addr = mhi_buf->dma_addr;
        buf_info.cb_buf = mhi_buf;
        buf_info.pre_mapped = true;
        buf_info.len = len;
 
-       ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &buf_info, mflags);
-       if (unlikely(ret)) {
-               read_unlock_bh(&mhi_cntrl->pm_lock);
-               return ret;
-       }
-
-       if (mhi_chan->dir == DMA_TO_DEVICE)
-               atomic_inc(&mhi_cntrl->pending_pkts);
-
-       if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl))) {
-               read_lock_bh(&mhi_chan->lock);
-               mhi_ring_chan_db(mhi_cntrl, mhi_chan);
-               read_unlock_bh(&mhi_chan->lock);
-       }
-
-       read_unlock_bh(&mhi_cntrl->pm_lock);
+       if (unlikely(mhi_chan->pre_alloc))
+               return -EINVAL;
 
-       return 0;
+       return mhi_queue(mhi_dev, &buf_info, dir, mflags);
 }
 EXPORT_SYMBOL_GPL(mhi_queue_dma);
 
@@ -1124,57 +1114,13 @@ int mhi_gen_tre(struct mhi_controller *mhi_cntrl, struct mhi_chan *mhi_chan,
 int mhi_queue_buf(struct mhi_device *mhi_dev, enum dma_data_direction dir,
                  void *buf, size_t len, enum mhi_flags mflags)
 {
-       struct mhi_controller *mhi_cntrl = mhi_dev->mhi_cntrl;
-       struct mhi_chan *mhi_chan = (dir == DMA_TO_DEVICE) ? mhi_dev->ul_chan :
-                                                            mhi_dev->dl_chan;
-       struct mhi_ring *tre_ring;
        struct mhi_buf_info buf_info = { };
-       unsigned long flags;
-       int ret;
-
-       /*
-        * this check here only as a guard, it's always
-        * possible mhi can enter error while executing rest of function,
-        * which is not fatal so we do not need to hold pm_lock
-        */
-       if (unlikely(MHI_PM_IN_ERROR_STATE(mhi_cntrl->pm_state)))
-               return -EIO;
-
-       tre_ring = &mhi_chan->tre_ring;
-       if (mhi_is_ring_full(mhi_cntrl, tre_ring))
-               return -ENOMEM;
 
        buf_info.v_addr = buf;
        buf_info.cb_buf = buf;
        buf_info.len = len;
 
-       ret = mhi_gen_tre(mhi_cntrl, mhi_chan, &buf_info, mflags);
-       if (unlikely(ret))
-               return ret;
-
-       read_lock_irqsave(&mhi_cntrl->pm_lock, flags);
-
-       /* we're in M3 or transitioning to M3 */
-       if (MHI_PM_IN_SUSPEND_STATE(mhi_cntrl->pm_state))
-               mhi_trigger_resume(mhi_cntrl);
-
-       /* Toggle wake to exit out of M2 */
-       mhi_cntrl->wake_toggle(mhi_cntrl);
-
-       if (mhi_chan->dir == DMA_TO_DEVICE)
-               atomic_inc(&mhi_cntrl->pending_pkts);
-
-       if (likely(MHI_DB_ACCESS_VALID(mhi_cntrl))) {
-               unsigned long flags;
-
-               read_lock_irqsave(&mhi_chan->lock, flags);
-               mhi_ring_chan_db(mhi_cntrl, mhi_chan);
-               read_unlock_irqrestore(&mhi_chan->lock, flags);
-       }
-
-       read_unlock_irqrestore(&mhi_cntrl->pm_lock, flags);
-
-       return 0;
+       return mhi_queue(mhi_dev, &buf_info, dir, mflags);
 }
 EXPORT_SYMBOL_GPL(mhi_queue_buf);
 
index f5bee76..20673a4 100644 (file)
@@ -8,13 +8,21 @@
  * Copyright (C) 2020 Linaro Ltd <loic.poulain@linaro.org>
  */
 
+#include <linux/aer.h>
+#include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/mhi.h>
 #include <linux/module.h>
 #include <linux/pci.h>
+#include <linux/timer.h>
+#include <linux/workqueue.h>
 
 #define MHI_PCI_DEFAULT_BAR_NUM 0
 
+#define MHI_POST_RESET_DELAY_MS 500
+
+#define HEALTH_CHECK_PERIOD (HZ * 2)
+
 /**
  * struct mhi_pci_dev_info - MHI PCI device specific information
  * @config: MHI controller configuration
@@ -76,6 +84,36 @@ struct mhi_pci_dev_info {
                .offload_channel = false,       \
        }
 
+#define MHI_CHANNEL_CONFIG_HW_UL(ch_num, ch_name, el_count, ev_ring) \
+       {                                               \
+               .num = ch_num,                          \
+               .name = ch_name,                        \
+               .num_elements = el_count,               \
+               .event_ring = ev_ring,                  \
+               .dir = DMA_TO_DEVICE,                   \
+               .ee_mask = BIT(MHI_EE_AMSS),            \
+               .pollcfg = 0,                           \
+               .doorbell = MHI_DB_BRST_ENABLE, \
+               .lpm_notify = false,                    \
+               .offload_channel = false,               \
+               .doorbell_mode_switch = true,           \
+       }                                               \
+
+#define MHI_CHANNEL_CONFIG_HW_DL(ch_num, ch_name, el_count, ev_ring) \
+       {                                               \
+               .num = ch_num,                          \
+               .name = ch_name,                        \
+               .num_elements = el_count,               \
+               .event_ring = ev_ring,                  \
+               .dir = DMA_FROM_DEVICE,                 \
+               .ee_mask = BIT(MHI_EE_AMSS),            \
+               .pollcfg = 0,                           \
+               .doorbell = MHI_DB_BRST_ENABLE, \
+               .lpm_notify = false,                    \
+               .offload_channel = false,               \
+               .doorbell_mode_switch = true,           \
+       }
+
 #define MHI_EVENT_CONFIG_DATA(ev_ring)         \
        {                                       \
                .num_elements = 128,            \
@@ -91,8 +129,8 @@ struct mhi_pci_dev_info {
 
 #define MHI_EVENT_CONFIG_HW_DATA(ev_ring, ch_num) \
        {                                       \
-               .num_elements = 128,            \
-               .irq_moderation_ms = 5,         \
+               .num_elements = 2048,           \
+               .irq_moderation_ms = 1,         \
                .irq = (ev_ring) + 1,           \
                .priority = 1,                  \
                .mode = MHI_DB_BRST_DISABLE,    \
@@ -104,27 +142,31 @@ struct mhi_pci_dev_info {
        }
 
 static const struct mhi_channel_config modem_qcom_v1_mhi_channels[] = {
+       MHI_CHANNEL_CONFIG_UL(4, "DIAG", 16, 1),
+       MHI_CHANNEL_CONFIG_DL(5, "DIAG", 16, 1),
        MHI_CHANNEL_CONFIG_UL(12, "MBIM", 4, 0),
        MHI_CHANNEL_CONFIG_DL(13, "MBIM", 4, 0),
        MHI_CHANNEL_CONFIG_UL(14, "QMI", 4, 0),
        MHI_CHANNEL_CONFIG_DL(15, "QMI", 4, 0),
        MHI_CHANNEL_CONFIG_UL(20, "IPCR", 8, 0),
        MHI_CHANNEL_CONFIG_DL(21, "IPCR", 8, 0),
-       MHI_CHANNEL_CONFIG_UL(100, "IP_HW0", 128, 1),
-       MHI_CHANNEL_CONFIG_DL(101, "IP_HW0", 128, 2),
+       MHI_CHANNEL_CONFIG_HW_UL(100, "IP_HW0", 128, 2),
+       MHI_CHANNEL_CONFIG_HW_DL(101, "IP_HW0", 128, 3),
 };
 
-static const struct mhi_event_config modem_qcom_v1_mhi_events[] = {
+static struct mhi_event_config modem_qcom_v1_mhi_events[] = {
        /* first ring is control+data ring */
        MHI_EVENT_CONFIG_CTRL(0),
+       /* DIAG dedicated event ring */
+       MHI_EVENT_CONFIG_DATA(1),
        /* Hardware channels request dedicated hardware event rings */
-       MHI_EVENT_CONFIG_HW_DATA(1, 100),
-       MHI_EVENT_CONFIG_HW_DATA(2, 101)
+       MHI_EVENT_CONFIG_HW_DATA(2, 100),
+       MHI_EVENT_CONFIG_HW_DATA(3, 101)
 };
 
-static const struct mhi_controller_config modem_qcom_v1_mhiv_config = {
+static struct mhi_controller_config modem_qcom_v1_mhiv_config = {
        .max_channels = 128,
-       .timeout_ms = 5000,
+       .timeout_ms = 8000,
        .num_channels = ARRAY_SIZE(modem_qcom_v1_mhi_channels),
        .ch_cfg = modem_qcom_v1_mhi_channels,
        .num_events = ARRAY_SIZE(modem_qcom_v1_mhi_events),
@@ -147,6 +189,18 @@ static const struct pci_device_id mhi_pci_id_table[] = {
 };
 MODULE_DEVICE_TABLE(pci, mhi_pci_id_table);
 
+enum mhi_pci_device_status {
+       MHI_PCI_DEV_STARTED,
+};
+
+struct mhi_pci_device {
+       struct mhi_controller mhi_cntrl;
+       struct pci_saved_state *pci_state;
+       struct work_struct recovery_work;
+       struct timer_list health_check_timer;
+       unsigned long status;
+};
+
 static int mhi_pci_read_reg(struct mhi_controller *mhi_cntrl,
                            void __iomem *addr, u32 *out)
 {
@@ -163,7 +217,31 @@ static void mhi_pci_write_reg(struct mhi_controller *mhi_cntrl,
 static void mhi_pci_status_cb(struct mhi_controller *mhi_cntrl,
                              enum mhi_callback cb)
 {
+       struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+
        /* Nothing to do for now */
+       switch (cb) {
+       case MHI_CB_FATAL_ERROR:
+       case MHI_CB_SYS_ERROR:
+               dev_warn(&pdev->dev, "firmware crashed (%u)\n", cb);
+               break;
+       default:
+               break;
+       }
+}
+
+static bool mhi_pci_is_alive(struct mhi_controller *mhi_cntrl)
+{
+       struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+       u16 vendor = 0;
+
+       if (pci_read_config_word(pdev, PCI_VENDOR_ID, &vendor))
+               return false;
+
+       if (vendor == (u16) ~0 || vendor == 0)
+               return false;
+
+       return true;
 }
 
 static int mhi_pci_claim(struct mhi_controller *mhi_cntrl,
@@ -227,8 +305,12 @@ static int mhi_pci_get_irqs(struct mhi_controller *mhi_cntrl,
        }
 
        if (nr_vectors < mhi_cntrl->nr_irqs) {
-               dev_warn(&pdev->dev, "Not enough MSI vectors (%d/%d), use shared MSI\n",
-                        nr_vectors, mhi_cntrl_config->num_events);
+               dev_warn(&pdev->dev, "using shared MSI\n");
+
+               /* Patch msi vectors, use only one (shared) */
+               for (i = 0; i < mhi_cntrl_config->num_events; i++)
+                       mhi_cntrl_config->event_cfg[i].irq = 0;
+               mhi_cntrl->nr_irqs = 1;
        }
 
        irq = devm_kcalloc(&pdev->dev, mhi_cntrl->nr_irqs, sizeof(int), GFP_KERNEL);
@@ -257,20 +339,89 @@ static void mhi_pci_runtime_put(struct mhi_controller *mhi_cntrl)
        /* no PM for now */
 }
 
+static void mhi_pci_recovery_work(struct work_struct *work)
+{
+       struct mhi_pci_device *mhi_pdev = container_of(work, struct mhi_pci_device,
+                                                      recovery_work);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+       struct pci_dev *pdev = to_pci_dev(mhi_cntrl->cntrl_dev);
+       int err;
+
+       dev_warn(&pdev->dev, "device recovery started\n");
+
+       del_timer(&mhi_pdev->health_check_timer);
+
+       /* Clean up MHI state */
+       if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+               mhi_power_down(mhi_cntrl, false);
+               mhi_unprepare_after_power_down(mhi_cntrl);
+       }
+
+       /* Check if we can recover without full reset */
+       pci_set_power_state(pdev, PCI_D0);
+       pci_load_saved_state(pdev, mhi_pdev->pci_state);
+       pci_restore_state(pdev);
+
+       if (!mhi_pci_is_alive(mhi_cntrl))
+               goto err_try_reset;
+
+       err = mhi_prepare_for_power_up(mhi_cntrl);
+       if (err)
+               goto err_try_reset;
+
+       err = mhi_sync_power_up(mhi_cntrl);
+       if (err)
+               goto err_unprepare;
+
+       dev_dbg(&pdev->dev, "Recovery completed\n");
+
+       set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+       mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+       return;
+
+err_unprepare:
+       mhi_unprepare_after_power_down(mhi_cntrl);
+err_try_reset:
+       if (pci_reset_function(pdev))
+               dev_err(&pdev->dev, "Recovery failed\n");
+}
+
+static void health_check(struct timer_list *t)
+{
+       struct mhi_pci_device *mhi_pdev = from_timer(mhi_pdev, t, health_check_timer);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+       if (!mhi_pci_is_alive(mhi_cntrl)) {
+               dev_err(mhi_cntrl->cntrl_dev, "Device died\n");
+               queue_work(system_long_wq, &mhi_pdev->recovery_work);
+               return;
+       }
+
+       /* reschedule in two seconds */
+       mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+}
+
 static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
        const struct mhi_pci_dev_info *info = (struct mhi_pci_dev_info *) id->driver_data;
        const struct mhi_controller_config *mhi_cntrl_config;
+       struct mhi_pci_device *mhi_pdev;
        struct mhi_controller *mhi_cntrl;
        int err;
 
        dev_dbg(&pdev->dev, "MHI PCI device found: %s\n", info->name);
 
-       mhi_cntrl = mhi_alloc_controller();
-       if (!mhi_cntrl)
+       /* mhi_pdev.mhi_cntrl must be zero-initialized */
+       mhi_pdev = devm_kzalloc(&pdev->dev, sizeof(*mhi_pdev), GFP_KERNEL);
+       if (!mhi_pdev)
                return -ENOMEM;
 
+       INIT_WORK(&mhi_pdev->recovery_work, mhi_pci_recovery_work);
+       timer_setup(&mhi_pdev->health_check_timer, health_check, 0);
+
        mhi_cntrl_config = info->config;
+       mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
        mhi_cntrl->cntrl_dev = &pdev->dev;
        mhi_cntrl->iova_start = 0;
        mhi_cntrl->iova_stop = (dma_addr_t)DMA_BIT_MASK(info->dma_data_width);
@@ -285,17 +436,23 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 
        err = mhi_pci_claim(mhi_cntrl, info->bar_num, DMA_BIT_MASK(info->dma_data_width));
        if (err)
-               goto err_release;
+               return err;
 
        err = mhi_pci_get_irqs(mhi_cntrl, mhi_cntrl_config);
        if (err)
-               goto err_release;
+               return err;
+
+       pci_set_drvdata(pdev, mhi_pdev);
+
+       /* Have stored pci confspace at hand for restore in sudden PCI error */
+       pci_save_state(pdev);
+       mhi_pdev->pci_state = pci_store_saved_state(pdev);
 
-       pci_set_drvdata(pdev, mhi_cntrl);
+       pci_enable_pcie_error_reporting(pdev);
 
        err = mhi_register_controller(mhi_cntrl, mhi_cntrl_config);
        if (err)
-               goto err_release;
+               return err;
 
        /* MHI bus does not power up the controller by default */
        err = mhi_prepare_for_power_up(mhi_cntrl);
@@ -310,33 +467,209 @@ static int mhi_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
                goto err_unprepare;
        }
 
+       set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+
+       /* start health check */
+       mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
        return 0;
 
 err_unprepare:
        mhi_unprepare_after_power_down(mhi_cntrl);
 err_unregister:
        mhi_unregister_controller(mhi_cntrl);
-err_release:
-       mhi_free_controller(mhi_cntrl);
 
        return err;
 }
 
 static void mhi_pci_remove(struct pci_dev *pdev)
 {
-       struct mhi_controller *mhi_cntrl = pci_get_drvdata(pdev);
+       struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+       del_timer(&mhi_pdev->health_check_timer);
+       cancel_work_sync(&mhi_pdev->recovery_work);
+
+       if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+               mhi_power_down(mhi_cntrl, true);
+               mhi_unprepare_after_power_down(mhi_cntrl);
+       }
 
-       mhi_power_down(mhi_cntrl, true);
-       mhi_unprepare_after_power_down(mhi_cntrl);
        mhi_unregister_controller(mhi_cntrl);
-       mhi_free_controller(mhi_cntrl);
 }
 
+static void mhi_pci_reset_prepare(struct pci_dev *pdev)
+{
+       struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+       dev_info(&pdev->dev, "reset\n");
+
+       del_timer(&mhi_pdev->health_check_timer);
+
+       /* Clean up MHI state */
+       if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+               mhi_power_down(mhi_cntrl, false);
+               mhi_unprepare_after_power_down(mhi_cntrl);
+       }
+
+       /* cause internal device reset */
+       mhi_soc_reset(mhi_cntrl);
+
+       /* Be sure device reset has been executed */
+       msleep(MHI_POST_RESET_DELAY_MS);
+}
+
+static void mhi_pci_reset_done(struct pci_dev *pdev)
+{
+       struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+       int err;
+
+       /* Restore initial known working PCI state */
+       pci_load_saved_state(pdev, mhi_pdev->pci_state);
+       pci_restore_state(pdev);
+
+       /* Is device status available ? */
+       if (!mhi_pci_is_alive(mhi_cntrl)) {
+               dev_err(&pdev->dev, "reset failed\n");
+               return;
+       }
+
+       err = mhi_prepare_for_power_up(mhi_cntrl);
+       if (err) {
+               dev_err(&pdev->dev, "failed to prepare MHI controller\n");
+               return;
+       }
+
+       err = mhi_sync_power_up(mhi_cntrl);
+       if (err) {
+               dev_err(&pdev->dev, "failed to power up MHI controller\n");
+               mhi_unprepare_after_power_down(mhi_cntrl);
+               return;
+       }
+
+       set_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status);
+       mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+}
+
+static pci_ers_result_t mhi_pci_error_detected(struct pci_dev *pdev,
+                                              pci_channel_state_t state)
+{
+       struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+       dev_err(&pdev->dev, "PCI error detected, state = %u\n", state);
+
+       if (state == pci_channel_io_perm_failure)
+               return PCI_ERS_RESULT_DISCONNECT;
+
+       /* Clean up MHI state */
+       if (test_and_clear_bit(MHI_PCI_DEV_STARTED, &mhi_pdev->status)) {
+               mhi_power_down(mhi_cntrl, false);
+               mhi_unprepare_after_power_down(mhi_cntrl);
+       } else {
+               /* Nothing to do */
+               return PCI_ERS_RESULT_RECOVERED;
+       }
+
+       pci_disable_device(pdev);
+
+       return PCI_ERS_RESULT_NEED_RESET;
+}
+
+static pci_ers_result_t mhi_pci_slot_reset(struct pci_dev *pdev)
+{
+       if (pci_enable_device(pdev)) {
+               dev_err(&pdev->dev, "Cannot re-enable PCI device after reset.\n");
+               return PCI_ERS_RESULT_DISCONNECT;
+       }
+
+       return PCI_ERS_RESULT_RECOVERED;
+}
+
+static void mhi_pci_io_resume(struct pci_dev *pdev)
+{
+       struct mhi_pci_device *mhi_pdev = pci_get_drvdata(pdev);
+
+       dev_err(&pdev->dev, "PCI slot reset done\n");
+
+       queue_work(system_long_wq, &mhi_pdev->recovery_work);
+}
+
+static const struct pci_error_handlers mhi_pci_err_handler = {
+       .error_detected = mhi_pci_error_detected,
+       .slot_reset = mhi_pci_slot_reset,
+       .resume = mhi_pci_io_resume,
+       .reset_prepare = mhi_pci_reset_prepare,
+       .reset_done = mhi_pci_reset_done,
+};
+
+static int  __maybe_unused mhi_pci_suspend(struct device *dev)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+
+       del_timer(&mhi_pdev->health_check_timer);
+       cancel_work_sync(&mhi_pdev->recovery_work);
+
+       /* Transition to M3 state */
+       mhi_pm_suspend(mhi_cntrl);
+
+       pci_save_state(pdev);
+       pci_disable_device(pdev);
+       pci_wake_from_d3(pdev, true);
+       pci_set_power_state(pdev, PCI_D3hot);
+
+       return 0;
+}
+
+static int __maybe_unused mhi_pci_resume(struct device *dev)
+{
+       struct pci_dev *pdev = to_pci_dev(dev);
+       struct mhi_pci_device *mhi_pdev = dev_get_drvdata(dev);
+       struct mhi_controller *mhi_cntrl = &mhi_pdev->mhi_cntrl;
+       int err;
+
+       pci_set_power_state(pdev, PCI_D0);
+       pci_restore_state(pdev);
+       pci_set_master(pdev);
+
+       err = pci_enable_device(pdev);
+       if (err)
+               goto err_recovery;
+
+       /* Exit M3, transition to M0 state */
+       err = mhi_pm_resume(mhi_cntrl);
+       if (err) {
+               dev_err(&pdev->dev, "failed to resume device: %d\n", err);
+               goto err_recovery;
+       }
+
+       /* Resume health check */
+       mod_timer(&mhi_pdev->health_check_timer, jiffies + HEALTH_CHECK_PERIOD);
+
+       return 0;
+
+err_recovery:
+       /* The device may have loose power or crashed, try recovering it */
+       queue_work(system_long_wq, &mhi_pdev->recovery_work);
+
+       return err;
+}
+
+static const struct dev_pm_ops mhi_pci_pm_ops = {
+       SET_SYSTEM_SLEEP_PM_OPS(mhi_pci_suspend, mhi_pci_resume)
+};
+
 static struct pci_driver mhi_pci_driver = {
        .name           = "mhi-pci-generic",
        .id_table       = mhi_pci_id_table,
        .probe          = mhi_pci_probe,
-       .remove         = mhi_pci_remove
+       .remove         = mhi_pci_remove,
+       .err_handler    = &mhi_pci_err_handler,
+       .driver.pm      = &mhi_pci_pm_ops
 };
 module_pci_driver(mhi_pci_driver);
 
index dd9e734..ea04249 100644 (file)
@@ -618,7 +618,7 @@ mvebu_mbus_find_bridge_hole(uint64_t *start, uint64_t *end)
                 * This part of the memory is above 4 GB, so we don't
                 * care for the MBus bridge hole.
                 */
-               if (reg_start >= 0x100000000ULL)
+               if ((u64)reg_start >= 0x100000000ULL)
                        continue;
 
                /*
index b040447..dcfb32e 100644 (file)
@@ -285,7 +285,7 @@ static int omap_l3_probe(struct platform_device *pdev)
         */
        l3->debug_irq = platform_get_irq(pdev, 0);
        ret = devm_request_irq(l3->dev, l3->debug_irq, l3_interrupt_handler,
-                              0x0, "l3-dbg-irq", l3);
+                              IRQF_NO_THREAD, "l3-dbg-irq", l3);
        if (ret) {
                dev_err(l3->dev, "request_irq failed for %d\n",
                        l3->debug_irq);
@@ -294,7 +294,7 @@ static int omap_l3_probe(struct platform_device *pdev)
 
        l3->app_irq = platform_get_irq(pdev, 1);
        ret = devm_request_irq(l3->dev, l3->app_irq, l3_interrupt_handler,
-                              0x0, "l3-app-irq", l3);
+                              IRQF_NO_THREAD, "l3-app-irq", l3);
        if (ret)
                dev_err(l3->dev, "request_irq failed for %d\n", l3->app_irq);
 
index a27d751..3d74f23 100644 (file)
@@ -3053,7 +3053,9 @@ static int sysc_remove(struct platform_device *pdev)
 
        pm_runtime_put_sync(&pdev->dev);
        pm_runtime_disable(&pdev->dev);
-       reset_control_assert(ddata->rsts);
+
+       if (!reset_control_status(ddata->rsts))
+               reset_control_assert(ddata->rsts);
 
 unprepare:
        sysc_unprepare(ddata);
index a086dd3..4f501e4 100644 (file)
@@ -125,7 +125,7 @@ config AGP_HP_ZX1
 
 config AGP_PARISC
        tristate "HP Quicksilver AGP support"
-       depends on AGP && PARISC && 64BIT
+       depends on AGP && PARISC && 64BIT && IOMMU_SBA
        help
          This option gives you AGP GART support for the HP Quicksilver
          AGP bus adapter on HP PA-RISC machines (Ok, just on the C8000
index 14b2d80..45ac7ab 100644 (file)
@@ -81,9 +81,6 @@ MODULE_DESCRIPTION("Driver for Applicom Profibus card");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_MISCDEV(AC_MINOR);
 
-MODULE_SUPPORTED_DEVICE("ac");
-
-
 static struct applicom_board {
        unsigned long PhysIO;
        void __iomem *RamIO;
index 8038a8a..f4949b6 100644 (file)
@@ -54,10 +54,9 @@ static int pseries_rng_probe(struct vio_dev *dev,
        return hwrng_register(&pseries_rng);
 }
 
-static int pseries_rng_remove(struct vio_dev *dev)
+static void pseries_rng_remove(struct vio_dev *dev)
 {
        hwrng_unregister(&pseries_rng);
-       return 0;
 }
 
 static const struct vio_device_id pseries_rng_driver_ids[] = {
index 84e2498..0fe9e20 100644 (file)
@@ -1959,7 +1959,7 @@ static long random_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
                        return -EPERM;
                if (crng_init < 2)
                        return -ENODATA;
-               crng_reseed(&primary_crng, NULL);
+               crng_reseed(&primary_crng, &input_pool);
                crng_global_init_time = jiffies - 1;
                return 0;
        default:
index aff0a8e..776abbf 100644 (file)
@@ -64,7 +64,6 @@
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Jonathan Buzzard <jonathan@buzzard.org.uk>");
 MODULE_DESCRIPTION("Toshiba laptop SMM driver");
-MODULE_SUPPORTED_DEVICE("toshiba");
 
 static DEFINE_MUTEX(tosh_mutex);
 static int tosh_fn;
index 19e23fc..ddaeceb 100644 (file)
@@ -278,8 +278,6 @@ static void tpm_devs_release(struct device *dev)
 {
        struct tpm_chip *chip = container_of(dev, struct tpm_chip, devs);
 
-       dump_stack();
-
        /* release the master device reference */
        put_device(&chip->dev);
 }
index 994385b..9036047 100644 (file)
@@ -343,7 +343,7 @@ static int ibmvtpm_crq_send_init_complete(struct ibmvtpm_dev *ibmvtpm)
  *
  * Return: Always 0.
  */
-static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
+static void tpm_ibmvtpm_remove(struct vio_dev *vdev)
 {
        struct tpm_chip *chip = dev_get_drvdata(&vdev->dev);
        struct ibmvtpm_dev *ibmvtpm = dev_get_drvdata(&chip->dev);
@@ -372,8 +372,6 @@ static int tpm_ibmvtpm_remove(struct vio_dev *vdev)
        kfree(ibmvtpm);
        /* For tpm_ibmvtpm_get_desired_dma */
        dev_set_drvdata(&vdev->dev, NULL);
-
-       return 0;
 }
 
 /**
index 431919d..a2e0395 100644 (file)
@@ -707,12 +707,22 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
        const char *desc = "attempting to generate an interrupt";
        u32 cap2;
        cap_t cap;
+       int ret;
 
+       /* TPM 2.0 */
        if (chip->flags & TPM_CHIP_FLAG_TPM2)
                return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
-       else
-               return tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc,
-                                 0);
+
+       /* TPM 1.2 */
+       ret = request_locality(chip, 0);
+       if (ret < 0)
+               return ret;
+
+       ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+
+       release_locality(chip, 0);
+
+       return ret;
 }
 
 /* Register the IRQ and issue a command that will cause an interrupt. If an
@@ -1019,11 +1029,21 @@ int tpm_tis_core_init(struct device *dev, struct tpm_tis_data *priv, int irq,
        init_waitqueue_head(&priv->read_queue);
        init_waitqueue_head(&priv->int_queue);
        if (irq != -1) {
-               /* Before doing irq testing issue a command to the TPM in polling mode
+               /*
+                * Before doing irq testing issue a command to the TPM in polling mode
                 * to make sure it works. May as well use that command to set the
                 * proper timeouts for the driver.
                 */
-               if (tpm_get_timeouts(chip)) {
+
+               rc = request_locality(chip, 0);
+               if (rc < 0)
+                       goto out_err;
+
+               rc = tpm_get_timeouts(chip);
+
+               release_locality(chip, 0);
+
+               if (rc) {
                        dev_err(dev, "Could not get TPM timeouts and durations\n");
                        rc = -ENODEV;
                        goto out_err;
index 4b47170..a588d56 100644 (file)
@@ -369,6 +369,13 @@ config COMMON_CLK_FIXED_MMIO
        help
          Support for Memory Mapped IO Fixed clocks
 
+config COMMON_CLK_K210
+       bool "Clock driver for the Canaan Kendryte K210 SoC"
+       depends on OF && RISCV && SOC_CANAAN
+       default SOC_CANAAN
+       help
+         Support for the Canaan Kendryte K210 RISC-V SoC clocks.
+
 source "drivers/clk/actions/Kconfig"
 source "drivers/clk/analogbits/Kconfig"
 source "drivers/clk/baikal-t1/Kconfig"
index 71c1fa2..b22ae4f 100644 (file)
@@ -36,6 +36,7 @@ obj-$(CONFIG_COMMON_CLK_ASPEED)               += clk-aspeed.o
 obj-$(CONFIG_MACH_ASPEED_G6)           += clk-ast2600.o
 obj-$(CONFIG_ARCH_HIGHBANK)            += clk-highbank.o
 obj-$(CONFIG_CLK_HSDK)                 += clk-hsdk-pll.o
+obj-$(CONFIG_COMMON_CLK_K210)          += clk-k210.o
 obj-$(CONFIG_COMMON_CLK_LOCHNAGAR)     += clk-lochnagar.o
 obj-$(CONFIG_COMMON_CLK_MAX77686)      += clk-max77686.o
 obj-$(CONFIG_COMMON_CLK_MAX9485)       += clk-max9485.o
diff --git a/drivers/clk/clk-k210.c b/drivers/clk/clk-k210.c
new file mode 100644 (file)
index 0000000..6c84abf
--- /dev/null
@@ -0,0 +1,1007 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ */
+#define pr_fmt(fmt)     "k210-clk: " fmt
+
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/of.h>
+#include <linux/of_clk.h>
+#include <linux/of_platform.h>
+#include <linux/of_address.h>
+#include <linux/clk-provider.h>
+#include <linux/bitfield.h>
+#include <linux/delay.h>
+#include <soc/canaan/k210-sysctl.h>
+
+#include <dt-bindings/clock/k210-clk.h>
+
+struct k210_sysclk;
+
+struct k210_clk {
+       int id;
+       struct k210_sysclk *ksc;
+       struct clk_hw hw;
+};
+
+struct k210_clk_cfg {
+       const char *name;
+       u8 gate_reg;
+       u8 gate_bit;
+       u8 div_reg;
+       u8 div_shift;
+       u8 div_width;
+       u8 div_type;
+       u8 mux_reg;
+       u8 mux_bit;
+};
+
+enum k210_clk_div_type {
+       K210_DIV_NONE,
+       K210_DIV_ONE_BASED,
+       K210_DIV_DOUBLE_ONE_BASED,
+       K210_DIV_POWER_OF_TWO,
+};
+
+#define K210_GATE(_reg, _bit)  \
+       .gate_reg = (_reg),     \
+       .gate_bit = (_bit)
+
+#define K210_DIV(_reg, _shift, _width, _type)  \
+       .div_reg = (_reg),                      \
+       .div_shift = (_shift),                  \
+       .div_width = (_width),                  \
+       .div_type = (_type)
+
+#define K210_MUX(_reg, _bit)   \
+       .mux_reg = (_reg),      \
+       .mux_bit = (_bit)
+
+static struct k210_clk_cfg k210_clk_cfgs[K210_NUM_CLKS] = {
+       /* Gated clocks, no mux, no divider */
+       [K210_CLK_CPU] = {
+               .name = "cpu",
+               K210_GATE(K210_SYSCTL_EN_CENT, 0)
+       },
+       [K210_CLK_DMA] = {
+               .name = "dma",
+               K210_GATE(K210_SYSCTL_EN_PERI, 1)
+       },
+       [K210_CLK_FFT] = {
+               .name = "fft",
+               K210_GATE(K210_SYSCTL_EN_PERI, 4)
+       },
+       [K210_CLK_GPIO] = {
+               .name = "gpio",
+               K210_GATE(K210_SYSCTL_EN_PERI, 5)
+       },
+       [K210_CLK_UART1] = {
+               .name = "uart1",
+               K210_GATE(K210_SYSCTL_EN_PERI, 16)
+       },
+       [K210_CLK_UART2] = {
+               .name = "uart2",
+               K210_GATE(K210_SYSCTL_EN_PERI, 17)
+       },
+       [K210_CLK_UART3] = {
+               .name = "uart3",
+               K210_GATE(K210_SYSCTL_EN_PERI, 18)
+       },
+       [K210_CLK_FPIOA] = {
+               .name = "fpioa",
+               K210_GATE(K210_SYSCTL_EN_PERI, 20)
+       },
+       [K210_CLK_SHA] = {
+               .name = "sha",
+               K210_GATE(K210_SYSCTL_EN_PERI, 26)
+       },
+       [K210_CLK_AES] = {
+               .name = "aes",
+               K210_GATE(K210_SYSCTL_EN_PERI, 19)
+       },
+       [K210_CLK_OTP] = {
+               .name = "otp",
+               K210_GATE(K210_SYSCTL_EN_PERI, 27)
+       },
+       [K210_CLK_RTC] = {
+               .name = "rtc",
+               K210_GATE(K210_SYSCTL_EN_PERI, 29)
+       },
+
+       /* Gated divider clocks */
+       [K210_CLK_SRAM0] = {
+               .name = "sram0",
+               K210_GATE(K210_SYSCTL_EN_CENT, 1),
+               K210_DIV(K210_SYSCTL_THR0, 0, 4, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_SRAM1] = {
+               .name = "sram1",
+               K210_GATE(K210_SYSCTL_EN_CENT, 2),
+               K210_DIV(K210_SYSCTL_THR0, 4, 4, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_ROM] = {
+               .name = "rom",
+               K210_GATE(K210_SYSCTL_EN_PERI, 0),
+               K210_DIV(K210_SYSCTL_THR0, 16, 4, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_DVP] = {
+               .name = "dvp",
+               K210_GATE(K210_SYSCTL_EN_PERI, 3),
+               K210_DIV(K210_SYSCTL_THR0, 12, 4, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_APB0] = {
+               .name = "apb0",
+               K210_GATE(K210_SYSCTL_EN_CENT, 3),
+               K210_DIV(K210_SYSCTL_SEL0, 3, 3, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_APB1] = {
+               .name = "apb1",
+               K210_GATE(K210_SYSCTL_EN_CENT, 4),
+               K210_DIV(K210_SYSCTL_SEL0, 6, 3, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_APB2] = {
+               .name = "apb2",
+               K210_GATE(K210_SYSCTL_EN_CENT, 5),
+               K210_DIV(K210_SYSCTL_SEL0, 9, 3, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_AI] = {
+               .name = "ai",
+               K210_GATE(K210_SYSCTL_EN_PERI, 2),
+               K210_DIV(K210_SYSCTL_THR0, 8, 4, K210_DIV_ONE_BASED)
+       },
+       [K210_CLK_SPI0] = {
+               .name = "spi0",
+               K210_GATE(K210_SYSCTL_EN_PERI, 6),
+               K210_DIV(K210_SYSCTL_THR1, 0, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_SPI1] = {
+               .name = "spi1",
+               K210_GATE(K210_SYSCTL_EN_PERI, 7),
+               K210_DIV(K210_SYSCTL_THR1, 8, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_SPI2] = {
+               .name = "spi2",
+               K210_GATE(K210_SYSCTL_EN_PERI, 8),
+               K210_DIV(K210_SYSCTL_THR1, 16, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2C0] = {
+               .name = "i2c0",
+               K210_GATE(K210_SYSCTL_EN_PERI, 13),
+               K210_DIV(K210_SYSCTL_THR5, 8, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2C1] = {
+               .name = "i2c1",
+               K210_GATE(K210_SYSCTL_EN_PERI, 14),
+               K210_DIV(K210_SYSCTL_THR5, 16, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2C2] = {
+               .name = "i2c2",
+               K210_GATE(K210_SYSCTL_EN_PERI, 15),
+               K210_DIV(K210_SYSCTL_THR5, 24, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_WDT0] = {
+               .name = "wdt0",
+               K210_GATE(K210_SYSCTL_EN_PERI, 24),
+               K210_DIV(K210_SYSCTL_THR6, 0, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_WDT1] = {
+               .name = "wdt1",
+               K210_GATE(K210_SYSCTL_EN_PERI, 25),
+               K210_DIV(K210_SYSCTL_THR6, 8, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2S0] = {
+               .name = "i2s0",
+               K210_GATE(K210_SYSCTL_EN_PERI, 10),
+               K210_DIV(K210_SYSCTL_THR3, 0, 16, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2S1] = {
+               .name = "i2s1",
+               K210_GATE(K210_SYSCTL_EN_PERI, 11),
+               K210_DIV(K210_SYSCTL_THR3, 16, 16, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2S2] = {
+               .name = "i2s2",
+               K210_GATE(K210_SYSCTL_EN_PERI, 12),
+               K210_DIV(K210_SYSCTL_THR4, 0, 16, K210_DIV_DOUBLE_ONE_BASED)
+       },
+
+       /* Divider clocks, no gate, no mux */
+       [K210_CLK_I2S0_M] = {
+               .name = "i2s0_m",
+               K210_DIV(K210_SYSCTL_THR4, 16, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2S1_M] = {
+               .name = "i2s1_m",
+               K210_DIV(K210_SYSCTL_THR4, 24, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+       [K210_CLK_I2S2_M] = {
+               .name = "i2s2_m",
+               K210_DIV(K210_SYSCTL_THR4, 0, 8, K210_DIV_DOUBLE_ONE_BASED)
+       },
+
+       /* Muxed gated divider clocks */
+       [K210_CLK_SPI3] = {
+               .name = "spi3",
+               K210_GATE(K210_SYSCTL_EN_PERI, 9),
+               K210_DIV(K210_SYSCTL_THR1, 24, 8, K210_DIV_DOUBLE_ONE_BASED),
+               K210_MUX(K210_SYSCTL_SEL0, 12)
+       },
+       [K210_CLK_TIMER0] = {
+               .name = "timer0",
+               K210_GATE(K210_SYSCTL_EN_PERI, 21),
+               K210_DIV(K210_SYSCTL_THR2,  0, 8, K210_DIV_DOUBLE_ONE_BASED),
+               K210_MUX(K210_SYSCTL_SEL0, 13)
+       },
+       [K210_CLK_TIMER1] = {
+               .name = "timer1",
+               K210_GATE(K210_SYSCTL_EN_PERI, 22),
+               K210_DIV(K210_SYSCTL_THR2, 8, 8, K210_DIV_DOUBLE_ONE_BASED),
+               K210_MUX(K210_SYSCTL_SEL0, 14)
+       },
+       [K210_CLK_TIMER2] = {
+               .name = "timer2",
+               K210_GATE(K210_SYSCTL_EN_PERI, 23),
+               K210_DIV(K210_SYSCTL_THR2, 16, 8, K210_DIV_DOUBLE_ONE_BASED),
+               K210_MUX(K210_SYSCTL_SEL0, 15)
+       },
+};
+
+/*
+ * PLL control register bits.
+ */
+#define K210_PLL_CLKR          GENMASK(3, 0)
+#define K210_PLL_CLKF          GENMASK(9, 4)
+#define K210_PLL_CLKOD         GENMASK(13, 10)
+#define K210_PLL_BWADJ         GENMASK(19, 14)
+#define K210_PLL_RESET         (1 << 20)
+#define K210_PLL_PWRD          (1 << 21)
+#define K210_PLL_INTFB         (1 << 22)
+#define K210_PLL_BYPASS                (1 << 23)
+#define K210_PLL_TEST          (1 << 24)
+#define K210_PLL_EN            (1 << 25)
+#define K210_PLL_SEL           GENMASK(27, 26) /* PLL2 only */
+
+/*
+ * PLL lock register bits.
+ */
+#define K210_PLL_LOCK          0
+#define K210_PLL_CLEAR_SLIP    2
+#define K210_PLL_TEST_OUT      3
+
+/*
+ * Clock selector register bits.
+ */
+#define K210_ACLK_SEL          BIT(0)
+#define K210_ACLK_DIV          GENMASK(2, 1)
+
+/*
+ * PLLs.
+ */
+enum k210_pll_id {
+       K210_PLL0, K210_PLL1, K210_PLL2, K210_PLL_NUM
+};
+
+struct k210_pll {
+       enum k210_pll_id id;
+       struct k210_sysclk *ksc;
+       void __iomem *base;
+       void __iomem *reg;
+       void __iomem *lock;
+       u8 lock_shift;
+       u8 lock_width;
+       struct clk_hw hw;
+};
+#define to_k210_pll(_hw)       container_of(_hw, struct k210_pll, hw)
+
+/*
+ * PLLs configuration: by default PLL0 runs at 780 MHz and PLL1 at 299 MHz.
+ * The first 2 SRAM banks depend on ACLK/CPU clock which is by default PLL0
+ * rate divided by 2. Set PLL1 to 390 MHz so that the third SRAM bank has the
+ * same clock as the first 2.
+ */
+struct k210_pll_cfg {
+       u32 reg;
+       u8 lock_shift;
+       u8 lock_width;
+       u32 r;
+       u32 f;
+       u32 od;
+       u32 bwadj;
+};
+
+static struct k210_pll_cfg k210_plls_cfg[] = {
+       { K210_SYSCTL_PLL0,  0, 2, 0, 59, 1, 59 }, /* 780 MHz */
+       { K210_SYSCTL_PLL1,  8, 1, 0, 59, 3, 59 }, /* 390 MHz */
+       { K210_SYSCTL_PLL2, 16, 1, 0, 22, 1, 22 }, /* 299 MHz */
+};
+
+/**
+ * struct k210_sysclk - sysclk driver data
+ * @regs: system controller registers start address
+ * @clk_lock: clock setting spinlock
+ * @plls: SoC PLLs descriptors
+ * @aclk: ACLK clock
+ * @clks: All other clocks
+ */
+struct k210_sysclk {
+       void __iomem                    *regs;
+       spinlock_t                      clk_lock;
+       struct k210_pll                 plls[K210_PLL_NUM];
+       struct clk_hw                   aclk;
+       struct k210_clk                 clks[K210_NUM_CLKS];
+};
+
+#define to_k210_sysclk(_hw)    container_of(_hw, struct k210_sysclk, aclk)
+
+/*
+ * Set ACLK parent selector: 0 for IN0, 1 for PLL0.
+ */
+static void k210_aclk_set_selector(void __iomem *regs, u8 sel)
+{
+       u32 reg = readl(regs + K210_SYSCTL_SEL0);
+
+       if (sel)
+               reg |= K210_ACLK_SEL;
+       else
+               reg &= K210_ACLK_SEL;
+       writel(reg, regs + K210_SYSCTL_SEL0);
+}
+
+static void k210_init_pll(void __iomem *regs, enum k210_pll_id pllid,
+                         struct k210_pll *pll)
+{
+       pll->id = pllid;
+       pll->reg = regs + k210_plls_cfg[pllid].reg;
+       pll->lock = regs + K210_SYSCTL_PLL_LOCK;
+       pll->lock_shift = k210_plls_cfg[pllid].lock_shift;
+       pll->lock_width = k210_plls_cfg[pllid].lock_width;
+}
+
+static void k210_pll_wait_for_lock(struct k210_pll *pll)
+{
+       u32 reg, mask = GENMASK(pll->lock_shift + pll->lock_width - 1,
+                               pll->lock_shift);
+
+       while (true) {
+               reg = readl(pll->lock);
+               if ((reg & mask) == mask)
+                       break;
+
+               reg |= BIT(pll->lock_shift + K210_PLL_CLEAR_SLIP);
+               writel(reg, pll->lock);
+       }
+}
+
+static bool k210_pll_hw_is_enabled(struct k210_pll *pll)
+{
+       u32 reg = readl(pll->reg);
+       u32 mask = K210_PLL_PWRD | K210_PLL_EN;
+
+       if (reg & K210_PLL_RESET)
+               return false;
+
+       return (reg & mask) == mask;
+}
+
+static void k210_pll_enable_hw(void __iomem *regs, struct k210_pll *pll)
+{
+       struct k210_pll_cfg *pll_cfg = &k210_plls_cfg[pll->id];
+       u32 reg;
+
+       if (k210_pll_hw_is_enabled(pll))
+               return;
+
+       /*
+        * For PLL0, we need to re-parent ACLK to IN0 to keep the CPU cores and
+        * SRAM running.
+        */
+       if (pll->id == K210_PLL0)
+               k210_aclk_set_selector(regs, 0);
+
+       /* Set PLL factors */
+       reg = readl(pll->reg);
+       reg &= ~GENMASK(19, 0);
+       reg |= FIELD_PREP(K210_PLL_CLKR, pll_cfg->r);
+       reg |= FIELD_PREP(K210_PLL_CLKF, pll_cfg->f);
+       reg |= FIELD_PREP(K210_PLL_CLKOD, pll_cfg->od);
+       reg |= FIELD_PREP(K210_PLL_BWADJ, pll_cfg->bwadj);
+       reg |= K210_PLL_PWRD;
+       writel(reg, pll->reg);
+
+       /*
+        * Reset the PLL: ensure reset is low before asserting it.
+        * The magic NOPs come from the Kendryte reference SDK.
+        */
+       reg &= ~K210_PLL_RESET;
+       writel(reg, pll->reg);
+       reg |= K210_PLL_RESET;
+       writel(reg, pll->reg);
+       nop();
+       nop();
+       reg &= ~K210_PLL_RESET;
+       writel(reg, pll->reg);
+
+       k210_pll_wait_for_lock(pll);
+
+       reg &= ~K210_PLL_BYPASS;
+       reg |= K210_PLL_EN;
+       writel(reg, pll->reg);
+
+       if (pll->id == K210_PLL0)
+               k210_aclk_set_selector(regs, 1);
+}
+
+static int k210_pll_enable(struct clk_hw *hw)
+{
+       struct k210_pll *pll = to_k210_pll(hw);
+       struct k210_sysclk *ksc = pll->ksc;
+       unsigned long flags;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+
+       k210_pll_enable_hw(ksc->regs, pll);
+
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+       return 0;
+}
+
+static void k210_pll_disable(struct clk_hw *hw)
+{
+       struct k210_pll *pll = to_k210_pll(hw);
+       struct k210_sysclk *ksc = pll->ksc;
+       unsigned long flags;
+       u32 reg;
+
+       /*
+        * Bypassing before powering off is important so child clocks do not
+        * stop working. This is especially important for pll0, the indirect
+        * parent of the cpu clock.
+        */
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+       reg = readl(pll->reg);
+       reg |= K210_PLL_BYPASS;
+       writel(reg, pll->reg);
+
+       reg &= ~K210_PLL_PWRD;
+       reg &= ~K210_PLL_EN;
+       writel(reg, pll->reg);
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+}
+
+static int k210_pll_is_enabled(struct clk_hw *hw)
+{
+       return k210_pll_hw_is_enabled(to_k210_pll(hw));
+}
+
+static unsigned long k210_pll_get_rate(struct clk_hw *hw,
+                                      unsigned long parent_rate)
+{
+       struct k210_pll *pll = to_k210_pll(hw);
+       u32 reg = readl(pll->reg);
+       u32 r, f, od;
+
+       if (reg & K210_PLL_BYPASS)
+               return parent_rate;
+
+       if (!(reg & K210_PLL_PWRD))
+               return 0;
+
+       r = FIELD_GET(K210_PLL_CLKR, reg) + 1;
+       f = FIELD_GET(K210_PLL_CLKF, reg) + 1;
+       od = FIELD_GET(K210_PLL_CLKOD, reg) + 1;
+
+       return (u64)parent_rate * f / (r * od);
+}
+
+static const struct clk_ops k210_pll_ops = {
+       .enable         = k210_pll_enable,
+       .disable        = k210_pll_disable,
+       .is_enabled     = k210_pll_is_enabled,
+       .recalc_rate    = k210_pll_get_rate,
+};
+
+static int k210_pll2_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct k210_pll *pll = to_k210_pll(hw);
+       struct k210_sysclk *ksc = pll->ksc;
+       unsigned long flags;
+       u32 reg;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+
+       reg = readl(pll->reg);
+       reg &= ~K210_PLL_SEL;
+       reg |= FIELD_PREP(K210_PLL_SEL, index);
+       writel(reg, pll->reg);
+
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+       return 0;
+}
+
+static u8 k210_pll2_get_parent(struct clk_hw *hw)
+{
+       struct k210_pll *pll = to_k210_pll(hw);
+       u32 reg = readl(pll->reg);
+
+       return FIELD_GET(K210_PLL_SEL, reg);
+}
+
+static const struct clk_ops k210_pll2_ops = {
+       .enable         = k210_pll_enable,
+       .disable        = k210_pll_disable,
+       .is_enabled     = k210_pll_is_enabled,
+       .recalc_rate    = k210_pll_get_rate,
+       .set_parent     = k210_pll2_set_parent,
+       .get_parent     = k210_pll2_get_parent,
+};
+
+static int __init k210_register_pll(struct device_node *np,
+                                   struct k210_sysclk *ksc,
+                                   enum k210_pll_id pllid, const char *name,
+                                   int num_parents, const struct clk_ops *ops)
+{
+       struct k210_pll *pll = &ksc->plls[pllid];
+       struct clk_init_data init = {};
+       const struct clk_parent_data parent_data[] = {
+               { /* .index = 0 for in0 */ },
+               { .hw = &ksc->plls[K210_PLL0].hw },
+               { .hw = &ksc->plls[K210_PLL1].hw },
+       };
+
+       init.name = name;
+       init.parent_data = parent_data;
+       init.num_parents = num_parents;
+       init.ops = ops;
+
+       pll->hw.init = &init;
+       pll->ksc = ksc;
+
+       return of_clk_hw_register(np, &pll->hw);
+}
+
+static int __init k210_register_plls(struct device_node *np,
+                                    struct k210_sysclk *ksc)
+{
+       int i, ret;
+
+       for (i = 0; i < K210_PLL_NUM; i++)
+               k210_init_pll(ksc->regs, i, &ksc->plls[i]);
+
+       /* PLL0 and PLL1 only have IN0 as parent */
+       ret = k210_register_pll(np, ksc, K210_PLL0, "pll0", 1, &k210_pll_ops);
+       if (ret) {
+               pr_err("%pOFP: register PLL0 failed\n", np);
+               return ret;
+       }
+       ret = k210_register_pll(np, ksc, K210_PLL1, "pll1", 1, &k210_pll_ops);
+       if (ret) {
+               pr_err("%pOFP: register PLL1 failed\n", np);
+               return ret;
+       }
+
+       /* PLL2 has IN0, PLL0 and PLL1 as parents */
+       ret = k210_register_pll(np, ksc, K210_PLL2, "pll2", 3, &k210_pll2_ops);
+       if (ret) {
+               pr_err("%pOFP: register PLL2 failed\n", np);
+               return ret;
+       }
+
+       return 0;
+}
+
+static int k210_aclk_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct k210_sysclk *ksc = to_k210_sysclk(hw);
+       unsigned long flags;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+
+       k210_aclk_set_selector(ksc->regs, index);
+
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+       return 0;
+}
+
+static u8 k210_aclk_get_parent(struct clk_hw *hw)
+{
+       struct k210_sysclk *ksc = to_k210_sysclk(hw);
+       u32 sel;
+
+       sel = readl(ksc->regs + K210_SYSCTL_SEL0) & K210_ACLK_SEL;
+
+       return sel ? 1 : 0;
+}
+
+static unsigned long k210_aclk_get_rate(struct clk_hw *hw,
+                                       unsigned long parent_rate)
+{
+       struct k210_sysclk *ksc = to_k210_sysclk(hw);
+       u32 reg = readl(ksc->regs + K210_SYSCTL_SEL0);
+       unsigned int shift;
+
+       if (!(reg & 0x1))
+               return parent_rate;
+
+       shift = FIELD_GET(K210_ACLK_DIV, reg);
+
+       return parent_rate / (2UL << shift);
+}
+
+static const struct clk_ops k210_aclk_ops = {
+       .set_parent     = k210_aclk_set_parent,
+       .get_parent     = k210_aclk_get_parent,
+       .recalc_rate    = k210_aclk_get_rate,
+};
+
+/*
+ * ACLK has IN0 and PLL0 as parents.
+ */
+static int __init k210_register_aclk(struct device_node *np,
+                                    struct k210_sysclk *ksc)
+{
+       struct clk_init_data init = {};
+       const struct clk_parent_data parent_data[] = {
+               { /* .index = 0 for in0 */ },
+               { .hw = &ksc->plls[K210_PLL0].hw },
+       };
+       int ret;
+
+       init.name = "aclk";
+       init.parent_data = parent_data;
+       init.num_parents = 2;
+       init.ops = &k210_aclk_ops;
+       ksc->aclk.init = &init;
+
+       ret = of_clk_hw_register(np, &ksc->aclk);
+       if (ret) {
+               pr_err("%pOFP: register aclk failed\n", np);
+               return ret;
+       }
+
+       return 0;
+}
+
+#define to_k210_clk(_hw)       container_of(_hw, struct k210_clk, hw)
+
+static int k210_clk_enable(struct clk_hw *hw)
+{
+       struct k210_clk *kclk = to_k210_clk(hw);
+       struct k210_sysclk *ksc = kclk->ksc;
+       struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+       unsigned long flags;
+       u32 reg;
+
+       if (!cfg->gate_reg)
+               return 0;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+       reg = readl(ksc->regs + cfg->gate_reg);
+       reg |= BIT(cfg->gate_bit);
+       writel(reg, ksc->regs + cfg->gate_reg);
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+       return 0;
+}
+
+static void k210_clk_disable(struct clk_hw *hw)
+{
+       struct k210_clk *kclk = to_k210_clk(hw);
+       struct k210_sysclk *ksc = kclk->ksc;
+       struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+       unsigned long flags;
+       u32 reg;
+
+       if (!cfg->gate_reg)
+               return;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+       reg = readl(ksc->regs + cfg->gate_reg);
+       reg &= ~BIT(cfg->gate_bit);
+       writel(reg, ksc->regs + cfg->gate_reg);
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+}
+
+static int k210_clk_set_parent(struct clk_hw *hw, u8 index)
+{
+       struct k210_clk *kclk = to_k210_clk(hw);
+       struct k210_sysclk *ksc = kclk->ksc;
+       struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+       unsigned long flags;
+       u32 reg;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+       reg = readl(ksc->regs + cfg->mux_reg);
+       if (index)
+               reg |= BIT(cfg->mux_bit);
+       else
+               reg &= ~BIT(cfg->mux_bit);
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+       return 0;
+}
+
+static u8 k210_clk_get_parent(struct clk_hw *hw)
+{
+       struct k210_clk *kclk = to_k210_clk(hw);
+       struct k210_sysclk *ksc = kclk->ksc;
+       struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+       unsigned long flags;
+       u32 reg, idx;
+
+       spin_lock_irqsave(&ksc->clk_lock, flags);
+       reg = readl(ksc->regs + cfg->mux_reg);
+       idx = (reg & BIT(cfg->mux_bit)) ? 1 : 0;
+       spin_unlock_irqrestore(&ksc->clk_lock, flags);
+
+       return idx;
+}
+
+static unsigned long k210_clk_get_rate(struct clk_hw *hw,
+                                      unsigned long parent_rate)
+{
+       struct k210_clk *kclk = to_k210_clk(hw);
+       struct k210_sysclk *ksc = kclk->ksc;
+       struct k210_clk_cfg *cfg = &k210_clk_cfgs[kclk->id];
+       u32 reg, div_val;
+
+       if (!cfg->div_reg)
+               return parent_rate;
+
+       reg = readl(ksc->regs + cfg->div_reg);
+       div_val = (reg >> cfg->div_shift) & GENMASK(cfg->div_width - 1, 0);
+
+       switch (cfg->div_type) {
+       case K210_DIV_ONE_BASED:
+               return parent_rate / (div_val + 1);
+       case K210_DIV_DOUBLE_ONE_BASED:
+               return parent_rate / ((div_val + 1) * 2);
+       case K210_DIV_POWER_OF_TWO:
+               return parent_rate / (2UL << div_val);
+       case K210_DIV_NONE:
+       default:
+               return 0;
+       }
+}
+
+static const struct clk_ops k210_clk_mux_ops = {
+       .enable         = k210_clk_enable,
+       .disable        = k210_clk_disable,
+       .set_parent     = k210_clk_set_parent,
+       .get_parent     = k210_clk_get_parent,
+       .recalc_rate    = k210_clk_get_rate,
+};
+
+static const struct clk_ops k210_clk_ops = {
+       .enable         = k210_clk_enable,
+       .disable        = k210_clk_disable,
+       .recalc_rate    = k210_clk_get_rate,
+};
+
+static void __init k210_register_clk(struct device_node *np,
+                                    struct k210_sysclk *ksc, int id,
+                                    const struct clk_parent_data *parent_data,
+                                    int num_parents, unsigned long flags)
+{
+       struct k210_clk *kclk = &ksc->clks[id];
+       struct clk_init_data init = {};
+       int ret;
+
+       init.name = k210_clk_cfgs[id].name;
+       init.flags = flags;
+       init.parent_data = parent_data;
+       init.num_parents = num_parents;
+       if (num_parents > 1)
+               init.ops = &k210_clk_mux_ops;
+       else
+               init.ops = &k210_clk_ops;
+
+       kclk->id = id;
+       kclk->ksc = ksc;
+       kclk->hw.init = &init;
+
+       ret = of_clk_hw_register(np, &kclk->hw);
+       if (ret) {
+               pr_err("%pOFP: register clock %s failed\n",
+                      np, k210_clk_cfgs[id].name);
+               kclk->id = -1;
+       }
+}
+
+/*
+ * All muxed clocks have IN0 and PLL0 as parents.
+ */
+static inline void __init k210_register_mux_clk(struct device_node *np,
+                                               struct k210_sysclk *ksc, int id)
+{
+       const struct clk_parent_data parent_data[2] = {
+               { /* .index = 0 for in0 */ },
+               { .hw = &ksc->plls[K210_PLL0].hw }
+       };
+
+       k210_register_clk(np, ksc, id, parent_data, 2, 0);
+}
+
+static inline void __init k210_register_in0_child(struct device_node *np,
+                                               struct k210_sysclk *ksc, int id)
+{
+       const struct clk_parent_data parent_data = {
+               /* .index = 0 for in0 */
+       };
+
+       k210_register_clk(np, ksc, id, &parent_data, 1, 0);
+}
+
+static inline void __init k210_register_pll_child(struct device_node *np,
+                                               struct k210_sysclk *ksc, int id,
+                                               enum k210_pll_id pllid,
+                                               unsigned long flags)
+{
+       const struct clk_parent_data parent_data = {
+               .hw = &ksc->plls[pllid].hw,
+       };
+
+       k210_register_clk(np, ksc, id, &parent_data, 1, flags);
+}
+
+static inline void __init k210_register_aclk_child(struct device_node *np,
+                                               struct k210_sysclk *ksc, int id,
+                                               unsigned long flags)
+{
+       const struct clk_parent_data parent_data = {
+               .hw = &ksc->aclk,
+       };
+
+       k210_register_clk(np, ksc, id, &parent_data, 1, flags);
+}
+
+static inline void __init k210_register_clk_child(struct device_node *np,
+                                               struct k210_sysclk *ksc, int id,
+                                               int parent_id)
+{
+       const struct clk_parent_data parent_data = {
+               .hw = &ksc->clks[parent_id].hw,
+       };
+
+       k210_register_clk(np, ksc, id, &parent_data, 1, 0);
+}
+
+static struct clk_hw *k210_clk_hw_onecell_get(struct of_phandle_args *clkspec,
+                                             void *data)
+{
+       struct k210_sysclk *ksc = data;
+       unsigned int idx = clkspec->args[0];
+
+       if (idx >= K210_NUM_CLKS)
+               return ERR_PTR(-EINVAL);
+
+       return &ksc->clks[idx].hw;
+}
+
+static void __init k210_clk_init(struct device_node *np)
+{
+       struct device_node *sysctl_np;
+       struct k210_sysclk *ksc;
+       int i, ret;
+
+       ksc = kzalloc(sizeof(*ksc), GFP_KERNEL);
+       if (!ksc)
+               return;
+
+       spin_lock_init(&ksc->clk_lock);
+       sysctl_np = of_get_parent(np);
+       ksc->regs = of_iomap(sysctl_np, 0);
+       of_node_put(sysctl_np);
+       if (!ksc->regs) {
+               pr_err("%pOFP: failed to map registers\n", np);
+               return;
+       }
+
+       ret = k210_register_plls(np, ksc);
+       if (ret)
+               return;
+
+       ret = k210_register_aclk(np, ksc);
+       if (ret)
+               return;
+
+       /*
+        * Critical clocks: there are no consumers of the SRAM clocks,
+        * including the AI clock for the third SRAM bank. The CPU clock
+        * is only referenced by the uarths serial device and so would be
+        * disabled if the serial console is disabled to switch to another
+        * console. Mark all these clocks as critical so that they are never
+        * disabled by the core clock management.
+        */
+       k210_register_aclk_child(np, ksc, K210_CLK_CPU, CLK_IS_CRITICAL);
+       k210_register_aclk_child(np, ksc, K210_CLK_SRAM0, CLK_IS_CRITICAL);
+       k210_register_aclk_child(np, ksc, K210_CLK_SRAM1, CLK_IS_CRITICAL);
+       k210_register_pll_child(np, ksc, K210_CLK_AI, K210_PLL1,
+                               CLK_IS_CRITICAL);
+
+       /* Clocks with aclk as source */
+       k210_register_aclk_child(np, ksc, K210_CLK_DMA, 0);
+       k210_register_aclk_child(np, ksc, K210_CLK_FFT, 0);
+       k210_register_aclk_child(np, ksc, K210_CLK_ROM, 0);
+       k210_register_aclk_child(np, ksc, K210_CLK_DVP, 0);
+       k210_register_aclk_child(np, ksc, K210_CLK_APB0, 0);
+       k210_register_aclk_child(np, ksc, K210_CLK_APB1, 0);
+       k210_register_aclk_child(np, ksc, K210_CLK_APB2, 0);
+
+       /* Clocks with PLL0 as source */
+       k210_register_pll_child(np, ksc, K210_CLK_SPI0, K210_PLL0, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_SPI1, K210_PLL0, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_SPI2, K210_PLL0, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2C0, K210_PLL0, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2C1, K210_PLL0, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2C2, K210_PLL0, 0);
+
+       /* Clocks with PLL2 as source */
+       k210_register_pll_child(np, ksc, K210_CLK_I2S0, K210_PLL2, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2S1, K210_PLL2, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2S2, K210_PLL2, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2S0_M, K210_PLL2, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2S1_M, K210_PLL2, 0);
+       k210_register_pll_child(np, ksc, K210_CLK_I2S2_M, K210_PLL2, 0);
+
+       /* Clocks with IN0 as source */
+       k210_register_in0_child(np, ksc, K210_CLK_WDT0);
+       k210_register_in0_child(np, ksc, K210_CLK_WDT1);
+       k210_register_in0_child(np, ksc, K210_CLK_RTC);
+
+       /* Clocks with APB0 as source */
+       k210_register_clk_child(np, ksc, K210_CLK_GPIO, K210_CLK_APB0);
+       k210_register_clk_child(np, ksc, K210_CLK_UART1, K210_CLK_APB0);
+       k210_register_clk_child(np, ksc, K210_CLK_UART2, K210_CLK_APB0);
+       k210_register_clk_child(np, ksc, K210_CLK_UART3, K210_CLK_APB0);
+       k210_register_clk_child(np, ksc, K210_CLK_FPIOA, K210_CLK_APB0);
+       k210_register_clk_child(np, ksc, K210_CLK_SHA, K210_CLK_APB0);
+
+       /* Clocks with APB1 as source */
+       k210_register_clk_child(np, ksc, K210_CLK_AES, K210_CLK_APB1);
+       k210_register_clk_child(np, ksc, K210_CLK_OTP, K210_CLK_APB1);
+
+       /* Mux clocks with in0 or pll0 as source */
+       k210_register_mux_clk(np, ksc, K210_CLK_SPI3);
+       k210_register_mux_clk(np, ksc, K210_CLK_TIMER0);
+       k210_register_mux_clk(np, ksc, K210_CLK_TIMER1);
+       k210_register_mux_clk(np, ksc, K210_CLK_TIMER2);
+
+       /* Check for registration errors */
+       for (i = 0; i < K210_NUM_CLKS; i++) {
+               if (ksc->clks[i].id != i)
+                       return;
+       }
+
+       ret = of_clk_add_hw_provider(np, k210_clk_hw_onecell_get, ksc);
+       if (ret) {
+               pr_err("%pOFP: add clock provider failed %d\n", np, ret);
+               return;
+       }
+
+       pr_info("%pOFP: CPU running at %lu MHz\n",
+               np, clk_hw_get_rate(&ksc->clks[K210_CLK_CPU].hw) / 1000000);
+}
+
+CLK_OF_DECLARE(k210_clk, "canaan,k210-clk", k210_clk_init);
+
+/*
+ * Enable PLL1 to be able to use the AI SRAM.
+ */
+void __init k210_clk_early_init(void __iomem *regs)
+{
+       struct k210_pll pll1;
+
+       /* Make sure ACLK selector is set to PLL0 */
+       k210_aclk_set_selector(regs, 1);
+
+       /* Startup PLL1 to enable the aisram bank for general memory use */
+       k210_init_pll(regs, K210_PLL1, &pll1);
+       k210_pll_enable_hw(regs, &pll1);
+}
index 3d751ae..5052541 100644 (file)
@@ -4576,6 +4576,8 @@ int of_clk_add_provider(struct device_node *np,
        if (ret < 0)
                of_clk_del_provider(np);
 
+       fwnode_dev_initialized(&np->fwnode, true);
+
        return ret;
 }
 EXPORT_SYMBOL_GPL(of_clk_add_provider);
@@ -4693,6 +4695,7 @@ void of_clk_del_provider(struct device_node *np)
        list_for_each_entry(cp, &of_clk_providers, link) {
                if (cp->node == np) {
                        list_del(&cp->link);
+                       fwnode_dev_initialized(&np->fwnode, false);
                        of_node_put(cp->node);
                        kfree(cp);
                        break;
index 42f13a2..05ff3b0 100644 (file)
@@ -730,7 +730,8 @@ static int clk_gfx3d_determine_rate(struct clk_hw *hw,
        struct clk_rate_request parent_req = { };
        struct clk_rcg2_gfx3d *cgfx = to_clk_rcg2_gfx3d(hw);
        struct clk_hw *xo, *p0, *p1, *p2;
-       unsigned long request, p0_rate;
+       unsigned long p0_rate;
+       u8 mux_div = cgfx->div;
        int ret;
 
        p0 = cgfx->hws[0];
@@ -750,14 +751,15 @@ static int clk_gfx3d_determine_rate(struct clk_hw *hw,
                return 0;
        }
 
-       request = req->rate;
-       if (cgfx->div > 1)
-               parent_req.rate = request = request * cgfx->div;
+       if (mux_div == 0)
+               mux_div = 1;
+
+       parent_req.rate = req->rate * mux_div;
 
        /* This has to be a fixed rate PLL */
        p0_rate = clk_hw_get_rate(p0);
 
-       if (request == p0_rate) {
+       if (parent_req.rate == p0_rate) {
                req->rate = req->best_parent_rate = p0_rate;
                req->best_parent_hw = p0;
                return 0;
@@ -765,7 +767,7 @@ static int clk_gfx3d_determine_rate(struct clk_hw *hw,
 
        if (req->best_parent_hw == p0) {
                /* Are we going back to a previously used rate? */
-               if (clk_hw_get_rate(p2) == request)
+               if (clk_hw_get_rate(p2) == parent_req.rate)
                        req->best_parent_hw = p2;
                else
                        req->best_parent_hw = p1;
@@ -780,8 +782,7 @@ static int clk_gfx3d_determine_rate(struct clk_hw *hw,
                return ret;
 
        req->rate = req->best_parent_rate = parent_req.rate;
-       if (cgfx->div > 1)
-               req->rate /= cgfx->div;
+       req->rate /= mux_div;
 
        return 0;
 }
index 91dc390..c623ce9 100644 (file)
@@ -510,9 +510,12 @@ static const struct clk_rpmh_desc clk_rpmh_sm8350 = {
        .num_clks = ARRAY_SIZE(sm8350_rpmh_clocks),
 };
 
+/* Resource name must match resource id present in cmd-db */
+DEFINE_CLK_RPMH_ARC(sc7280, bi_tcxo, bi_tcxo_ao, "xo.lvl", 0x3, 4);
+
 static struct clk_hw *sc7280_rpmh_clocks[] = {
-       [RPMH_CXO_CLK]      = &sdm845_bi_tcxo.hw,
-       [RPMH_CXO_CLK_A]    = &sdm845_bi_tcxo_ao.hw,
+       [RPMH_CXO_CLK]      = &sc7280_bi_tcxo.hw,
+       [RPMH_CXO_CLK_A]    = &sc7280_bi_tcxo_ao.hw,
        [RPMH_LN_BB_CLK2]   = &sdm845_ln_bb_clk2.hw,
        [RPMH_LN_BB_CLK2_A] = &sdm845_ln_bb_clk2_ao.hw,
        [RPMH_RF_CLK1]      = &sdm845_rf_clk1.hw,
index 88e896a..da8b627 100644 (file)
@@ -620,7 +620,7 @@ static struct clk_rcg2 gcc_sdcc1_apps_clk_src = {
                .name = "gcc_sdcc1_apps_clk_src",
                .parent_data = gcc_parent_data_1,
                .num_parents = 5,
-               .ops = &clk_rcg2_ops,
+               .ops = &clk_rcg2_floor_ops,
        },
 };
 
@@ -642,7 +642,7 @@ static struct clk_rcg2 gcc_sdcc1_ice_core_clk_src = {
                .name = "gcc_sdcc1_ice_core_clk_src",
                .parent_data = gcc_parent_data_0,
                .num_parents = 4,
-               .ops = &clk_rcg2_floor_ops,
+               .ops = &clk_rcg2_ops,
        },
 };
 
index ef2a974..75bc401 100644 (file)
@@ -31,7 +31,7 @@ struct stm32_timer_cnt {
        struct counter_device counter;
        struct regmap *regmap;
        struct clk *clk;
-       u32 ceiling;
+       u32 max_arr;
        bool enabled;
        struct stm32_timer_regs bak;
 };
@@ -44,13 +44,14 @@ struct stm32_timer_cnt {
  * @STM32_COUNT_ENCODER_MODE_3: counts on both TI1FP1 and TI2FP2 edges
  */
 enum stm32_count_function {
-       STM32_COUNT_SLAVE_MODE_DISABLED = -1,
+       STM32_COUNT_SLAVE_MODE_DISABLED,
        STM32_COUNT_ENCODER_MODE_1,
        STM32_COUNT_ENCODER_MODE_2,
        STM32_COUNT_ENCODER_MODE_3,
 };
 
 static enum counter_count_function stm32_count_functions[] = {
+       [STM32_COUNT_SLAVE_MODE_DISABLED] = COUNTER_COUNT_FUNCTION_INCREASE,
        [STM32_COUNT_ENCODER_MODE_1] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_A,
        [STM32_COUNT_ENCODER_MODE_2] = COUNTER_COUNT_FUNCTION_QUADRATURE_X2_B,
        [STM32_COUNT_ENCODER_MODE_3] = COUNTER_COUNT_FUNCTION_QUADRATURE_X4,
@@ -73,8 +74,10 @@ static int stm32_count_write(struct counter_device *counter,
                             const unsigned long val)
 {
        struct stm32_timer_cnt *const priv = counter->priv;
+       u32 ceiling;
 
-       if (val > priv->ceiling)
+       regmap_read(priv->regmap, TIM_ARR, &ceiling);
+       if (val > ceiling)
                return -EINVAL;
 
        return regmap_write(priv->regmap, TIM_CNT, val);
@@ -90,6 +93,9 @@ static int stm32_count_function_get(struct counter_device *counter,
        regmap_read(priv->regmap, TIM_SMCR, &smcr);
 
        switch (smcr & TIM_SMCR_SMS) {
+       case 0:
+               *function = STM32_COUNT_SLAVE_MODE_DISABLED;
+               return 0;
        case 1:
                *function = STM32_COUNT_ENCODER_MODE_1;
                return 0;
@@ -99,9 +105,9 @@ static int stm32_count_function_get(struct counter_device *counter,
        case 3:
                *function = STM32_COUNT_ENCODER_MODE_3;
                return 0;
+       default:
+               return -EINVAL;
        }
-
-       return -EINVAL;
 }
 
 static int stm32_count_function_set(struct counter_device *counter,
@@ -112,6 +118,9 @@ static int stm32_count_function_set(struct counter_device *counter,
        u32 cr1, sms;
 
        switch (function) {
+       case STM32_COUNT_SLAVE_MODE_DISABLED:
+               sms = 0;
+               break;
        case STM32_COUNT_ENCODER_MODE_1:
                sms = 1;
                break;
@@ -122,8 +131,7 @@ static int stm32_count_function_set(struct counter_device *counter,
                sms = 3;
                break;
        default:
-               sms = 0;
-               break;
+               return -EINVAL;
        }
 
        /* Store enable status */
@@ -131,10 +139,6 @@ static int stm32_count_function_set(struct counter_device *counter,
 
        regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_CEN, 0);
 
-       /* TIMx_ARR register shouldn't be buffered (ARPE=0) */
-       regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0);
-       regmap_write(priv->regmap, TIM_ARR, priv->ceiling);
-
        regmap_update_bits(priv->regmap, TIM_SMCR, TIM_SMCR_SMS, sms);
 
        /* Make sure that registers are updated */
@@ -185,11 +189,13 @@ static ssize_t stm32_count_ceiling_write(struct counter_device *counter,
        if (ret)
                return ret;
 
+       if (ceiling > priv->max_arr)
+               return -ERANGE;
+
        /* TIMx_ARR register shouldn't be buffered (ARPE=0) */
        regmap_update_bits(priv->regmap, TIM_CR1, TIM_CR1_ARPE, 0);
        regmap_write(priv->regmap, TIM_ARR, ceiling);
 
-       priv->ceiling = ceiling;
        return len;
 }
 
@@ -274,31 +280,36 @@ static int stm32_action_get(struct counter_device *counter,
        size_t function;
        int err;
 
-       /* Default action mode (e.g. STM32_COUNT_SLAVE_MODE_DISABLED) */
-       *action = STM32_SYNAPSE_ACTION_NONE;
-
        err = stm32_count_function_get(counter, count, &function);
        if (err)
-               return 0;
+               return err;
 
        switch (function) {
+       case STM32_COUNT_SLAVE_MODE_DISABLED:
+               /* counts on internal clock when CEN=1 */
+               *action = STM32_SYNAPSE_ACTION_NONE;
+               return 0;
        case STM32_COUNT_ENCODER_MODE_1:
                /* counts up/down on TI1FP1 edge depending on TI2FP2 level */
                if (synapse->signal->id == count->synapses[0].signal->id)
                        *action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
-               break;
+               else
+                       *action = STM32_SYNAPSE_ACTION_NONE;
+               return 0;
        case STM32_COUNT_ENCODER_MODE_2:
                /* counts up/down on TI2FP2 edge depending on TI1FP1 level */
                if (synapse->signal->id == count->synapses[1].signal->id)
                        *action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
-               break;
+               else
+                       *action = STM32_SYNAPSE_ACTION_NONE;
+               return 0;
        case STM32_COUNT_ENCODER_MODE_3:
                /* counts up/down on both TI1FP1 and TI2FP2 edges */
                *action = STM32_SYNAPSE_ACTION_BOTH_EDGES;
-               break;
+               return 0;
+       default:
+               return -EINVAL;
        }
-
-       return 0;
 }
 
 static const struct counter_ops stm32_timer_cnt_ops = {
@@ -359,7 +370,7 @@ static int stm32_timer_cnt_probe(struct platform_device *pdev)
 
        priv->regmap = ddata->regmap;
        priv->clk = ddata->clk;
-       priv->ceiling = ddata->max_arr;
+       priv->max_arr = ddata->max_arr;
 
        priv->counter.name = dev_name(dev);
        priv->counter.parent = dev;
index 3995262..92701a1 100644 (file)
@@ -62,16 +62,6 @@ config X86_ACPI_CPUFREQ_CPB
          By enabling this option the acpi_cpufreq driver provides the old
          entry in addition to the new boost ones, for compatibility reasons.
 
-config X86_SFI_CPUFREQ
-       tristate "SFI Performance-States driver"
-       depends on X86_INTEL_MID && SFI
-       help
-         This adds a CPUFreq driver for some Silvermont based Intel Atom
-         architectures like Z34xx and Z35xx which enumerate processor
-         performance states through SFI.
-
-         If in doubt, say N.
-
 config ELAN_CPUFREQ
        tristate "AMD Elan SC400 and SC410"
        depends on MELAN
index 1ab9b15..27d3bd7 100644 (file)
@@ -43,7 +43,6 @@ obj-$(CONFIG_X86_P4_CLOCKMOD)         += p4-clockmod.o
 obj-$(CONFIG_X86_CPUFREQ_NFORCE2)      += cpufreq-nforce2.o
 obj-$(CONFIG_X86_INTEL_PSTATE)         += intel_pstate.o
 obj-$(CONFIG_X86_AMD_FREQ_SENSITIVITY) += amd_freq_sensitivity.o
-obj-$(CONFIG_X86_SFI_CPUFREQ)          += sfi-cpufreq.o
 
 ##################################################################################
 # ARM SoC drivers
index d3e5a6f..d1bbc16 100644 (file)
@@ -54,7 +54,6 @@ struct acpi_cpufreq_data {
        unsigned int resume;
        unsigned int cpu_feature;
        unsigned int acpi_perf_cpu;
-       unsigned int first_perf_state;
        cpumask_var_t freqdomain_cpus;
        void (*cpu_freq_write)(struct acpi_pct_register *reg, u32 val);
        u32 (*cpu_freq_read)(struct acpi_pct_register *reg);
@@ -223,10 +222,10 @@ static unsigned extract_msr(struct cpufreq_policy *policy, u32 msr)
 
        perf = to_perf_data(data);
 
-       cpufreq_for_each_entry(pos, policy->freq_table + data->first_perf_state)
+       cpufreq_for_each_entry(pos, policy->freq_table)
                if (msr == perf->states[pos->driver_data].status)
                        return pos->frequency;
-       return policy->freq_table[data->first_perf_state].frequency;
+       return policy->freq_table[0].frequency;
 }
 
 static unsigned extract_freq(struct cpufreq_policy *policy, u32 val)
@@ -365,7 +364,6 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
        struct cpufreq_policy *policy;
        unsigned int freq;
        unsigned int cached_freq;
-       unsigned int state;
 
        pr_debug("%s (%d)\n", __func__, cpu);
 
@@ -377,11 +375,7 @@ static unsigned int get_cur_freq_on_cpu(unsigned int cpu)
        if (unlikely(!data || !policy->freq_table))
                return 0;
 
-       state = to_perf_data(data)->state;
-       if (state < data->first_perf_state)
-               state = data->first_perf_state;
-
-       cached_freq = policy->freq_table[state].frequency;
+       cached_freq = policy->freq_table[to_perf_data(data)->state].frequency;
        freq = extract_freq(policy, get_cur_val(cpumask_of(cpu), data));
        if (freq != cached_freq) {
                /*
@@ -680,7 +674,6 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
        struct cpuinfo_x86 *c = &cpu_data(cpu);
        unsigned int valid_states = 0;
        unsigned int result = 0;
-       unsigned int state_count;
        u64 max_boost_ratio;
        unsigned int i;
 #ifdef CONFIG_SMP
@@ -795,28 +788,8 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
                goto err_unreg;
        }
 
-       state_count = perf->state_count + 1;
-
-       max_boost_ratio = get_max_boost_ratio(cpu);
-       if (max_boost_ratio) {
-               /*
-                * Make a room for one more entry to represent the highest
-                * available "boost" frequency.
-                */
-               state_count++;
-               valid_states++;
-               data->first_perf_state = valid_states;
-       } else {
-               /*
-                * If the maximum "boost" frequency is unknown, ask the arch
-                * scale-invariance code to use the "nominal" performance for
-                * CPU utilization scaling so as to prevent the schedutil
-                * governor from selecting inadequate CPU frequencies.
-                */
-               arch_set_max_freq_ratio(true);
-       }
-
-       freq_table = kcalloc(state_count, sizeof(*freq_table), GFP_KERNEL);
+       freq_table = kcalloc(perf->state_count + 1, sizeof(*freq_table),
+                            GFP_KERNEL);
        if (!freq_table) {
                result = -ENOMEM;
                goto err_unreg;
@@ -851,27 +824,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy)
        }
        freq_table[valid_states].frequency = CPUFREQ_TABLE_END;
 
+       max_boost_ratio = get_max_boost_ratio(cpu);
        if (max_boost_ratio) {
-               unsigned int state = data->first_perf_state;
-               unsigned int freq = freq_table[state].frequency;
+               unsigned int freq = freq_table[0].frequency;
 
                /*
                 * Because the loop above sorts the freq_table entries in the
                 * descending order, freq is the maximum frequency in the table.
                 * Assume that it corresponds to the CPPC nominal frequency and
-                * use it to populate the frequency field of the extra "boost"
-                * frequency entry.
+                * use it to set cpuinfo.max_freq.
                 */
-               freq_table[0].frequency = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
+               policy->cpuinfo.max_freq = freq * max_boost_ratio >> SCHED_CAPACITY_SHIFT;
+       } else {
                /*
-                * The purpose of the extra "boost" frequency entry is to make
-                * the rest of cpufreq aware of the real maximum frequency, but
-                * the way to request it is the same as for the first_perf_state
-                * entry that is expected to cover the entire range of "boost"
-                * frequencies of the CPU, so copy the driver_data value from
-                * that entry.
+                * If the maximum "boost" frequency is unknown, ask the arch
+                * scale-invariance code to use the "nominal" performance for
+                * CPU utilization scaling so as to prevent the schedutil
+                * governor from selecting inadequate CPU frequencies.
                 */
-               freq_table[0].driver_data = freq_table[state].driver_data;
+               arch_set_max_freq_ratio(true);
        }
 
        policy->freq_table = freq_table;
@@ -947,8 +918,7 @@ static void acpi_cpufreq_cpu_ready(struct cpufreq_policy *policy)
 {
        struct acpi_processor_performance *perf = per_cpu_ptr(acpi_perf_data,
                                                              policy->cpu);
-       struct acpi_cpufreq_data *data = policy->driver_data;
-       unsigned int freq = policy->freq_table[data->first_perf_state].frequency;
+       unsigned int freq = policy->freq_table[0].frequency;
 
        if (perf->states[0].core_frequency * 1000 != freq)
                pr_warn(FW_WARN "P-state 0 is not max freq\n");
index 3ba2f71..5e07065 100644 (file)
@@ -103,6 +103,8 @@ static const struct of_device_id whitelist[] __initconst = {
 static const struct of_device_id blacklist[] __initconst = {
        { .compatible = "allwinner,sun50i-h6", },
 
+       { .compatible = "arm,vexpress", },
+
        { .compatible = "calxeda,highbank", },
        { .compatible = "calxeda,ecx-2000", },
 
index 7d0ae96..1d1b563 100644 (file)
@@ -2101,7 +2101,7 @@ EXPORT_SYMBOL_GPL(cpufreq_driver_fast_switch);
  * cpufreq_driver_adjust_perf - Adjust CPU performance level in one go.
  * @cpu: Target CPU.
  * @min_perf: Minimum (required) performance level (units of @capacity).
- * @target_perf: Terget (desired) performance level (units of @capacity).
+ * @target_perf: Target (desired) performance level (units of @capacity).
  * @capacity: Capacity of the target CPU.
  *
  * Carry out a fast performance level switch of @cpu without sleeping.
index f839dc9..67e56cf 100644 (file)
@@ -52,7 +52,13 @@ int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy,
        }
 
        policy->min = policy->cpuinfo.min_freq = min_freq;
-       policy->max = policy->cpuinfo.max_freq = max_freq;
+       policy->max = max_freq;
+       /*
+        * If the driver has set its own cpuinfo.max_freq above max_freq, leave
+        * it as is.
+        */
+       if (policy->cpuinfo.max_freq < max_freq)
+               policy->max = policy->cpuinfo.max_freq = max_freq;
 
        if (policy->min == ~0)
                return -EINVAL;
@@ -261,7 +267,7 @@ struct freq_attr cpufreq_freq_attr_##_name##_freqs =     \
 __ATTR_RO(_name##_frequencies)
 
 /*
- * show_scaling_available_frequencies - show available normal frequencies for
+ * scaling_available_frequencies_show - show available normal frequencies for
  * the specified CPU
  */
 static ssize_t scaling_available_frequencies_show(struct cpufreq_policy *policy,
@@ -273,7 +279,7 @@ cpufreq_attr_available_freq(scaling_available);
 EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs);
 
 /*
- * show_available_boost_freqs - show available boost frequencies for
+ * scaling_boost_frequencies_show - show available boost frequencies for
  * the specified CPU
  */
 static ssize_t scaling_boost_frequencies_show(struct cpufreq_policy *policy,
index 3562445..f86859b 100644 (file)
@@ -32,6 +32,7 @@ struct qcom_cpufreq_soc_data {
 
 struct qcom_cpufreq_data {
        void __iomem *base;
+       struct resource *res;
        const struct qcom_cpufreq_soc_data *soc_data;
 };
 
@@ -280,6 +281,7 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
        struct of_phandle_args args;
        struct device_node *cpu_np;
        struct device *cpu_dev;
+       struct resource *res;
        void __iomem *base;
        struct qcom_cpufreq_data *data;
        int ret, index;
@@ -303,18 +305,33 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 
        index = args.args[0];
 
-       base = devm_platform_ioremap_resource(pdev, index);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
+       res = platform_get_resource(pdev, IORESOURCE_MEM, index);
+       if (!res) {
+               dev_err(dev, "failed to get mem resource %d\n", index);
+               return -ENODEV;
+       }
 
-       data = devm_kzalloc(dev, sizeof(*data), GFP_KERNEL);
+       if (!request_mem_region(res->start, resource_size(res), res->name)) {
+               dev_err(dev, "failed to request resource %pR\n", res);
+               return -EBUSY;
+       }
+
+       base = ioremap(res->start, resource_size(res));
+       if (!base) {
+               dev_err(dev, "failed to map resource %pR\n", res);
+               ret = -ENOMEM;
+               goto release_region;
+       }
+
+       data = kzalloc(sizeof(*data), GFP_KERNEL);
        if (!data) {
                ret = -ENOMEM;
-               goto error;
+               goto unmap_base;
        }
 
        data->soc_data = of_device_get_match_data(&pdev->dev);
        data->base = base;
+       data->res = res;
 
        /* HW should be in enabled state to proceed */
        if (!(readl_relaxed(base + data->soc_data->reg_enable) & 0x1)) {
@@ -355,7 +372,11 @@ static int qcom_cpufreq_hw_cpu_init(struct cpufreq_policy *policy)
 
        return 0;
 error:
-       devm_iounmap(dev, base);
+       kfree(data);
+unmap_base:
+       iounmap(base);
+release_region:
+       release_mem_region(res->start, resource_size(res));
        return ret;
 }
 
@@ -363,12 +384,15 @@ static int qcom_cpufreq_hw_cpu_exit(struct cpufreq_policy *policy)
 {
        struct device *cpu_dev = get_cpu_device(policy->cpu);
        struct qcom_cpufreq_data *data = policy->driver_data;
-       struct platform_device *pdev = cpufreq_get_driver_data();
+       struct resource *res = data->res;
+       void __iomem *base = data->base;
 
        dev_pm_opp_remove_all_dynamic(cpu_dev);
        dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
        kfree(policy->freq_table);
-       devm_iounmap(&pdev->dev, data->base);
+       kfree(data);
+       iounmap(base);
+       release_mem_region(res->start, resource_size(res));
 
        return 0;
 }
diff --git a/drivers/cpufreq/sfi-cpufreq.c b/drivers/cpufreq/sfi-cpufreq.c
deleted file mode 100644 (file)
index 45cfdf6..0000000
+++ /dev/null
@@ -1,127 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  SFI Performance States Driver
- *
- *  Author: Vishwesh M Rudramuni <vishwesh.m.rudramuni@intel.com>
- *  Author: Srinidhi Kasagar <srinidhi.kasagar@intel.com>
- */
-
-#include <linux/cpufreq.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/sfi.h>
-#include <linux/slab.h>
-#include <linux/smp.h>
-
-#include <asm/msr.h>
-
-static struct cpufreq_frequency_table *freq_table;
-static struct sfi_freq_table_entry *sfi_cpufreq_array;
-static int num_freq_table_entries;
-
-static int sfi_parse_freq(struct sfi_table_header *table)
-{
-       struct sfi_table_simple *sb;
-       struct sfi_freq_table_entry *pentry;
-       int totallen;
-
-       sb = (struct sfi_table_simple *)table;
-       num_freq_table_entries = SFI_GET_NUM_ENTRIES(sb,
-                       struct sfi_freq_table_entry);
-       if (num_freq_table_entries <= 1) {
-               pr_err("No p-states discovered\n");
-               return -ENODEV;
-       }
-
-       pentry = (struct sfi_freq_table_entry *)sb->pentry;
-       totallen = num_freq_table_entries * sizeof(*pentry);
-
-       sfi_cpufreq_array = kmemdup(pentry, totallen, GFP_KERNEL);
-       if (!sfi_cpufreq_array)
-               return -ENOMEM;
-
-       return 0;
-}
-
-static int sfi_cpufreq_target(struct cpufreq_policy *policy, unsigned int index)
-{
-       unsigned int next_perf_state = 0; /* Index into perf table */
-       u32 lo, hi;
-
-       next_perf_state = policy->freq_table[index].driver_data;
-
-       rdmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, &lo, &hi);
-       lo = (lo & ~INTEL_PERF_CTL_MASK) |
-               ((u32) sfi_cpufreq_array[next_perf_state].ctrl_val &
-               INTEL_PERF_CTL_MASK);
-       wrmsr_on_cpu(policy->cpu, MSR_IA32_PERF_CTL, lo, hi);
-
-       return 0;
-}
-
-static int sfi_cpufreq_cpu_init(struct cpufreq_policy *policy)
-{
-       policy->shared_type = CPUFREQ_SHARED_TYPE_HW;
-       policy->cpuinfo.transition_latency = 100000;    /* 100us */
-       policy->freq_table = freq_table;
-
-       return 0;
-}
-
-static struct cpufreq_driver sfi_cpufreq_driver = {
-       .flags          = CPUFREQ_CONST_LOOPS,
-       .verify         = cpufreq_generic_frequency_table_verify,
-       .target_index   = sfi_cpufreq_target,
-       .init           = sfi_cpufreq_cpu_init,
-       .name           = "sfi-cpufreq",
-       .attr           = cpufreq_generic_attr,
-};
-
-static int __init sfi_cpufreq_init(void)
-{
-       int ret, i;
-
-       /* parse the freq table from SFI */
-       ret = sfi_table_parse(SFI_SIG_FREQ, NULL, NULL, sfi_parse_freq);
-       if (ret)
-               return ret;
-
-       freq_table = kcalloc(num_freq_table_entries + 1, sizeof(*freq_table),
-                            GFP_KERNEL);
-       if (!freq_table) {
-               ret = -ENOMEM;
-               goto err_free_array;
-       }
-
-       for (i = 0; i < num_freq_table_entries; i++) {
-               freq_table[i].driver_data = i;
-               freq_table[i].frequency = sfi_cpufreq_array[i].freq_mhz * 1000;
-       }
-       freq_table[i].frequency = CPUFREQ_TABLE_END;
-
-       ret = cpufreq_register_driver(&sfi_cpufreq_driver);
-       if (ret)
-               goto err_free_tbl;
-
-       return ret;
-
-err_free_tbl:
-       kfree(freq_table);
-err_free_array:
-       kfree(sfi_cpufreq_array);
-       return ret;
-}
-late_initcall(sfi_cpufreq_init);
-
-static void __exit sfi_cpufreq_exit(void)
-{
-       cpufreq_unregister_driver(&sfi_cpufreq_driver);
-       kfree(freq_table);
-       kfree(sfi_cpufreq_array);
-}
-module_exit(sfi_cpufreq_exit);
-
-MODULE_AUTHOR("Vishwesh M Rudramuni <vishwesh.m.rudramuni@intel.com>");
-MODULE_DESCRIPTION("SFI Performance-States Driver");
-MODULE_LICENSE("GPL");
index 2de5e36..cc8dd30 100644 (file)
@@ -1042,7 +1042,7 @@ error:
        return ret;
 }
 
-static int nx842_remove(struct vio_dev *viodev)
+static void nx842_remove(struct vio_dev *viodev)
 {
        struct nx842_devdata *old_devdata;
        unsigned long flags;
@@ -1063,8 +1063,6 @@ static int nx842_remove(struct vio_dev *viodev)
        if (old_devdata)
                kfree(old_devdata->counters);
        kfree(old_devdata);
-
-       return 0;
 }
 
 static const struct vio_device_id nx842_vio_driver_ids[] = {
index 0d2dc5b..1d0e8a1 100644 (file)
@@ -783,7 +783,7 @@ static int nx_probe(struct vio_dev *viodev, const struct vio_device_id *id)
        return nx_register_algs();
 }
 
-static int nx_remove(struct vio_dev *viodev)
+static void nx_remove(struct vio_dev *viodev)
 {
        dev_dbg(&viodev->dev, "entering nx_remove for UA 0x%x\n",
                viodev->unit_address);
@@ -811,8 +811,6 @@ static int nx_remove(struct vio_dev *viodev)
                nx_unregister_skcipher(&nx_ecb_aes_alg, NX_FC_AES,
                                       NX_MODE_AES_ECB);
        }
-
-       return 0;
 }
 
 
diff --git a/drivers/cxl/Kconfig b/drivers/cxl/Kconfig
new file mode 100644 (file)
index 0000000..97dc4d7
--- /dev/null
@@ -0,0 +1,53 @@
+# SPDX-License-Identifier: GPL-2.0-only
+menuconfig CXL_BUS
+       tristate "CXL (Compute Express Link) Devices Support"
+       depends on PCI
+       help
+         CXL is a bus that is electrically compatible with PCI Express, but
+         layers three protocols on that signalling (CXL.io, CXL.cache, and
+         CXL.mem). The CXL.cache protocol allows devices to hold cachelines
+         locally, the CXL.mem protocol allows devices to be fully coherent
+         memory targets, the CXL.io protocol is equivalent to PCI Express.
+         Say 'y' to enable support for the configuration and management of
+         devices supporting these protocols.
+
+if CXL_BUS
+
+config CXL_MEM
+       tristate "CXL.mem: Memory Devices"
+       help
+         The CXL.mem protocol allows a device to act as a provider of
+         "System RAM" and/or "Persistent Memory" that is fully coherent
+         as if the memory was attached to the typical CPU memory
+         controller.
+
+         Say 'y/m' to enable a driver (named "cxl_mem.ko" when built as
+         a module) that will attach to CXL.mem devices for
+         configuration, provisioning, and health monitoring. This
+         driver is required for dynamic provisioning of CXL.mem
+         attached memory which is a prerequisite for persistent memory
+         support. Typically volatile memory is mapped by platform
+         firmware and included in the platform memory map, but in some
+         cases the OS is responsible for mapping that memory. See
+         Chapter 2.3 Type 3 CXL Device in the CXL 2.0 specification.
+
+         If unsure say 'm'.
+
+config CXL_MEM_RAW_COMMANDS
+       bool "RAW Command Interface for Memory Devices"
+       depends on CXL_MEM
+       help
+         Enable CXL RAW command interface.
+
+         The CXL driver ioctl interface may assign a kernel ioctl command
+         number for each specification defined opcode. At any given point in
+         time the number of opcodes that the specification defines and a device
+         may implement may exceed the kernel's set of associated ioctl function
+         numbers. The mismatch is either by omission, specification is too new,
+         or by design. When prototyping new hardware, or developing / debugging
+         the driver it is useful to be able to submit any possible command to
+         the hardware, even commands that may crash the kernel due to their
+         potential impact to memory currently in use by the kernel.
+
+         If developing CXL hardware or the driver say Y, otherwise say N.
+endif
diff --git a/drivers/cxl/Makefile b/drivers/cxl/Makefile
new file mode 100644 (file)
index 0000000..a314a18
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_CXL_BUS) += cxl_bus.o
+obj-$(CONFIG_CXL_MEM) += cxl_mem.o
+
+ccflags-y += -DDEFAULT_SYMBOL_NAMESPACE=CXL
+cxl_bus-y := bus.o
+cxl_mem-y := mem.o
diff --git a/drivers/cxl/bus.c b/drivers/cxl/bus.c
new file mode 100644 (file)
index 0000000..58f7479
--- /dev/null
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <linux/device.h>
+#include <linux/module.h>
+
+/**
+ * DOC: cxl bus
+ *
+ * The CXL bus provides namespace for control devices and a rendezvous
+ * point for cross-device interleave coordination.
+ */
+struct bus_type cxl_bus_type = {
+       .name = "cxl",
+};
+EXPORT_SYMBOL_GPL(cxl_bus_type);
+
+static __init int cxl_bus_init(void)
+{
+       return bus_register(&cxl_bus_type);
+}
+
+static void cxl_bus_exit(void)
+{
+       bus_unregister(&cxl_bus_type);
+}
+
+module_init(cxl_bus_init);
+module_exit(cxl_bus_exit);
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/cxl/cxl.h b/drivers/cxl/cxl.h
new file mode 100644 (file)
index 0000000..6f14838
--- /dev/null
@@ -0,0 +1,95 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020 Intel Corporation. */
+
+#ifndef __CXL_H__
+#define __CXL_H__
+
+#include <linux/bitfield.h>
+#include <linux/bitops.h>
+#include <linux/io.h>
+
+/* CXL 2.0 8.2.8.1 Device Capabilities Array Register */
+#define CXLDEV_CAP_ARRAY_OFFSET 0x0
+#define   CXLDEV_CAP_ARRAY_CAP_ID 0
+#define   CXLDEV_CAP_ARRAY_ID_MASK GENMASK_ULL(15, 0)
+#define   CXLDEV_CAP_ARRAY_COUNT_MASK GENMASK_ULL(47, 32)
+/* CXL 2.0 8.2.8.2 CXL Device Capability Header Register */
+#define CXLDEV_CAP_HDR_CAP_ID_MASK GENMASK(15, 0)
+/* CXL 2.0 8.2.8.2.1 CXL Device Capabilities */
+#define CXLDEV_CAP_CAP_ID_DEVICE_STATUS 0x1
+#define CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX 0x2
+#define CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX 0x3
+#define CXLDEV_CAP_CAP_ID_MEMDEV 0x4000
+
+/* CXL 2.0 8.2.8.4 Mailbox Registers */
+#define CXLDEV_MBOX_CAPS_OFFSET 0x00
+#define   CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK GENMASK(4, 0)
+#define CXLDEV_MBOX_CTRL_OFFSET 0x04
+#define   CXLDEV_MBOX_CTRL_DOORBELL BIT(0)
+#define CXLDEV_MBOX_CMD_OFFSET 0x08
+#define   CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK GENMASK_ULL(15, 0)
+#define   CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK GENMASK_ULL(36, 16)
+#define CXLDEV_MBOX_STATUS_OFFSET 0x10
+#define   CXLDEV_MBOX_STATUS_RET_CODE_MASK GENMASK_ULL(47, 32)
+#define CXLDEV_MBOX_BG_CMD_STATUS_OFFSET 0x18
+#define CXLDEV_MBOX_PAYLOAD_OFFSET 0x20
+
+/* CXL 2.0 8.2.8.5.1.1 Memory Device Status Register */
+#define CXLMDEV_STATUS_OFFSET 0x0
+#define   CXLMDEV_DEV_FATAL BIT(0)
+#define   CXLMDEV_FW_HALT BIT(1)
+#define   CXLMDEV_STATUS_MEDIA_STATUS_MASK GENMASK(3, 2)
+#define     CXLMDEV_MS_NOT_READY 0
+#define     CXLMDEV_MS_READY 1
+#define     CXLMDEV_MS_ERROR 2
+#define     CXLMDEV_MS_DISABLED 3
+#define CXLMDEV_READY(status)                                                  \
+       (FIELD_GET(CXLMDEV_STATUS_MEDIA_STATUS_MASK, status) ==                \
+        CXLMDEV_MS_READY)
+#define   CXLMDEV_MBOX_IF_READY BIT(4)
+#define   CXLMDEV_RESET_NEEDED_MASK GENMASK(7, 5)
+#define     CXLMDEV_RESET_NEEDED_NOT 0
+#define     CXLMDEV_RESET_NEEDED_COLD 1
+#define     CXLMDEV_RESET_NEEDED_WARM 2
+#define     CXLMDEV_RESET_NEEDED_HOT 3
+#define     CXLMDEV_RESET_NEEDED_CXL 4
+#define CXLMDEV_RESET_NEEDED(status)                                           \
+       (FIELD_GET(CXLMDEV_RESET_NEEDED_MASK, status) !=                       \
+        CXLMDEV_RESET_NEEDED_NOT)
+
+struct cxl_memdev;
+/**
+ * struct cxl_mem - A CXL memory device
+ * @pdev: The PCI device associated with this CXL device.
+ * @regs: IO mappings to the device's MMIO
+ * @status_regs: CXL 2.0 8.2.8.3 Device Status Registers
+ * @mbox_regs: CXL 2.0 8.2.8.4 Mailbox Registers
+ * @memdev_regs: CXL 2.0 8.2.8.5 Memory Device Registers
+ * @payload_size: Size of space for payload
+ *                (CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register)
+ * @mbox_mutex: Mutex to synchronize mailbox access.
+ * @firmware_version: Firmware version for the memory device.
+ * @enabled_commands: Hardware commands found enabled in CEL.
+ * @pmem_range: Persistent memory capacity information.
+ * @ram_range: Volatile memory capacity information.
+ */
+struct cxl_mem {
+       struct pci_dev *pdev;
+       void __iomem *regs;
+       struct cxl_memdev *cxlmd;
+
+       void __iomem *status_regs;
+       void __iomem *mbox_regs;
+       void __iomem *memdev_regs;
+
+       size_t payload_size;
+       struct mutex mbox_mutex; /* Protects device mailbox and firmware */
+       char firmware_version[0x10];
+       unsigned long *enabled_cmds;
+
+       struct range pmem_range;
+       struct range ram_range;
+};
+
+extern struct bus_type cxl_bus_type;
+#endif /* __CXL_H__ */
diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
new file mode 100644 (file)
index 0000000..244cb7d
--- /dev/null
@@ -0,0 +1,1552 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#include <uapi/linux/cxl_mem.h>
+#include <linux/security.h>
+#include <linux/debugfs.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/cdev.h>
+#include <linux/idr.h>
+#include <linux/pci.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include "pci.h"
+#include "cxl.h"
+
+/**
+ * DOC: cxl mem
+ *
+ * This implements a CXL memory device ("type-3") as it is defined by the
+ * Compute Express Link specification.
+ *
+ * The driver has several responsibilities, mainly:
+ *  - Create the memX device and register on the CXL bus.
+ *  - Enumerate device's register interface and map them.
+ *  - Probe the device attributes to establish sysfs interface.
+ *  - Provide an IOCTL interface to userspace to communicate with the device for
+ *    things like firmware update.
+ *  - Support management of interleave sets.
+ *  - Handle and manage error conditions.
+ */
+
+/*
+ * An entire PCI topology full of devices should be enough for any
+ * config
+ */
+#define CXL_MEM_MAX_DEVS 65536
+
+#define cxl_doorbell_busy(cxlm)                                                \
+       (readl((cxlm)->mbox_regs + CXLDEV_MBOX_CTRL_OFFSET) &                  \
+        CXLDEV_MBOX_CTRL_DOORBELL)
+
+/* CXL 2.0 - 8.2.8.4 */
+#define CXL_MAILBOX_TIMEOUT_MS (2 * HZ)
+
+enum opcode {
+       CXL_MBOX_OP_INVALID             = 0x0000,
+       CXL_MBOX_OP_RAW                 = CXL_MBOX_OP_INVALID,
+       CXL_MBOX_OP_GET_FW_INFO         = 0x0200,
+       CXL_MBOX_OP_ACTIVATE_FW         = 0x0202,
+       CXL_MBOX_OP_GET_SUPPORTED_LOGS  = 0x0400,
+       CXL_MBOX_OP_GET_LOG             = 0x0401,
+       CXL_MBOX_OP_IDENTIFY            = 0x4000,
+       CXL_MBOX_OP_GET_PARTITION_INFO  = 0x4100,
+       CXL_MBOX_OP_SET_PARTITION_INFO  = 0x4101,
+       CXL_MBOX_OP_GET_LSA             = 0x4102,
+       CXL_MBOX_OP_SET_LSA             = 0x4103,
+       CXL_MBOX_OP_GET_HEALTH_INFO     = 0x4200,
+       CXL_MBOX_OP_SET_SHUTDOWN_STATE  = 0x4204,
+       CXL_MBOX_OP_SCAN_MEDIA          = 0x4304,
+       CXL_MBOX_OP_GET_SCAN_MEDIA      = 0x4305,
+       CXL_MBOX_OP_MAX                 = 0x10000
+};
+
+/**
+ * struct mbox_cmd - A command to be submitted to hardware.
+ * @opcode: (input) The command set and command submitted to hardware.
+ * @payload_in: (input) Pointer to the input payload.
+ * @payload_out: (output) Pointer to the output payload. Must be allocated by
+ *              the caller.
+ * @size_in: (input) Number of bytes to load from @payload_in.
+ * @size_out: (input) Max number of bytes loaded into @payload_out.
+ *            (output) Number of bytes generated by the device. For fixed size
+ *            outputs commands this is always expected to be deterministic. For
+ *            variable sized output commands, it tells the exact number of bytes
+ *            written.
+ * @return_code: (output) Error code returned from hardware.
+ *
+ * This is the primary mechanism used to send commands to the hardware.
+ * All the fields except @payload_* correspond exactly to the fields described in
+ * Command Register section of the CXL 2.0 8.2.8.4.5. @payload_in and
+ * @payload_out are written to, and read from the Command Payload Registers
+ * defined in CXL 2.0 8.2.8.4.8.
+ */
+struct mbox_cmd {
+       u16 opcode;
+       void *payload_in;
+       void *payload_out;
+       size_t size_in;
+       size_t size_out;
+       u16 return_code;
+#define CXL_MBOX_SUCCESS 0
+};
+
+/**
+ * struct cxl_memdev - CXL bus object representing a Type-3 Memory Device
+ * @dev: driver core device object
+ * @cdev: char dev core object for ioctl operations
+ * @cxlm: pointer to the parent device driver data
+ * @ops_active: active user of @cxlm in ops handlers
+ * @ops_dead: completion when all @cxlm ops users have exited
+ * @id: id number of this memdev instance.
+ */
+struct cxl_memdev {
+       struct device dev;
+       struct cdev cdev;
+       struct cxl_mem *cxlm;
+       struct percpu_ref ops_active;
+       struct completion ops_dead;
+       int id;
+};
+
+static int cxl_mem_major;
+static DEFINE_IDA(cxl_memdev_ida);
+static struct dentry *cxl_debugfs;
+static bool cxl_raw_allow_all;
+
+enum {
+       CEL_UUID,
+       VENDOR_DEBUG_UUID,
+};
+
+/* See CXL 2.0 Table 170. Get Log Input Payload */
+static const uuid_t log_uuid[] = {
+       [CEL_UUID] = UUID_INIT(0xda9c0b5, 0xbf41, 0x4b78, 0x8f, 0x79, 0x96,
+                              0xb1, 0x62, 0x3b, 0x3f, 0x17),
+       [VENDOR_DEBUG_UUID] = UUID_INIT(0xe1819d9, 0x11a9, 0x400c, 0x81, 0x1f,
+                                       0xd6, 0x07, 0x19, 0x40, 0x3d, 0x86),
+};
+
+/**
+ * struct cxl_mem_command - Driver representation of a memory device command
+ * @info: Command information as it exists for the UAPI
+ * @opcode: The actual bits used for the mailbox protocol
+ * @flags: Set of flags effecting driver behavior.
+ *
+ *  * %CXL_CMD_FLAG_FORCE_ENABLE: In cases of error, commands with this flag
+ *    will be enabled by the driver regardless of what hardware may have
+ *    advertised.
+ *
+ * The cxl_mem_command is the driver's internal representation of commands that
+ * are supported by the driver. Some of these commands may not be supported by
+ * the hardware. The driver will use @info to validate the fields passed in by
+ * the user then submit the @opcode to the hardware.
+ *
+ * See struct cxl_command_info.
+ */
+struct cxl_mem_command {
+       struct cxl_command_info info;
+       enum opcode opcode;
+       u32 flags;
+#define CXL_CMD_FLAG_NONE 0
+#define CXL_CMD_FLAG_FORCE_ENABLE BIT(0)
+};
+
+#define CXL_CMD(_id, sin, sout, _flags)                                        \
+       [CXL_MEM_COMMAND_ID_##_id] = {                                         \
+       .info = {                                                              \
+                       .id = CXL_MEM_COMMAND_ID_##_id,                        \
+                       .size_in = sin,                                        \
+                       .size_out = sout,                                      \
+               },                                                             \
+       .opcode = CXL_MBOX_OP_##_id,                                           \
+       .flags = _flags,                                                       \
+       }
+
+/*
+ * This table defines the supported mailbox commands for the driver. This table
+ * is made up of a UAPI structure. Non-negative values as parameters in the
+ * table will be validated against the user's input. For example, if size_in is
+ * 0, and the user passed in 1, it is an error.
+ */
+static struct cxl_mem_command mem_commands[] = {
+       CXL_CMD(IDENTIFY, 0, 0x43, CXL_CMD_FLAG_FORCE_ENABLE),
+#ifdef CONFIG_CXL_MEM_RAW_COMMANDS
+       CXL_CMD(RAW, ~0, ~0, 0),
+#endif
+       CXL_CMD(GET_SUPPORTED_LOGS, 0, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
+       CXL_CMD(GET_FW_INFO, 0, 0x50, 0),
+       CXL_CMD(GET_PARTITION_INFO, 0, 0x20, 0),
+       CXL_CMD(GET_LSA, 0x8, ~0, 0),
+       CXL_CMD(GET_HEALTH_INFO, 0, 0x12, 0),
+       CXL_CMD(GET_LOG, 0x18, ~0, CXL_CMD_FLAG_FORCE_ENABLE),
+};
+
+/*
+ * Commands that RAW doesn't permit. The rationale for each:
+ *
+ * CXL_MBOX_OP_ACTIVATE_FW: Firmware activation requires adjustment /
+ * coordination of transaction timeout values at the root bridge level.
+ *
+ * CXL_MBOX_OP_SET_PARTITION_INFO: The device memory map may change live
+ * and needs to be coordinated with HDM updates.
+ *
+ * CXL_MBOX_OP_SET_LSA: The label storage area may be cached by the
+ * driver and any writes from userspace invalidates those contents.
+ *
+ * CXL_MBOX_OP_SET_SHUTDOWN_STATE: Set shutdown state assumes no writes
+ * to the device after it is marked clean, userspace can not make that
+ * assertion.
+ *
+ * CXL_MBOX_OP_[GET_]SCAN_MEDIA: The kernel provides a native error list that
+ * is kept up to date with patrol notifications and error management.
+ */
+static u16 cxl_disabled_raw_commands[] = {
+       CXL_MBOX_OP_ACTIVATE_FW,
+       CXL_MBOX_OP_SET_PARTITION_INFO,
+       CXL_MBOX_OP_SET_LSA,
+       CXL_MBOX_OP_SET_SHUTDOWN_STATE,
+       CXL_MBOX_OP_SCAN_MEDIA,
+       CXL_MBOX_OP_GET_SCAN_MEDIA,
+};
+
+/*
+ * Command sets that RAW doesn't permit. All opcodes in this set are
+ * disabled because they pass plain text security payloads over the
+ * user/kernel boundary. This functionality is intended to be wrapped
+ * behind the keys ABI which allows for encrypted payloads in the UAPI
+ */
+static u8 security_command_sets[] = {
+       0x44, /* Sanitize */
+       0x45, /* Persistent Memory Data-at-rest Security */
+       0x46, /* Security Passthrough */
+};
+
+#define cxl_for_each_cmd(cmd)                                                  \
+       for ((cmd) = &mem_commands[0];                                         \
+            ((cmd) - mem_commands) < ARRAY_SIZE(mem_commands); (cmd)++)
+
+#define cxl_cmd_count ARRAY_SIZE(mem_commands)
+
+static int cxl_mem_wait_for_doorbell(struct cxl_mem *cxlm)
+{
+       const unsigned long start = jiffies;
+       unsigned long end = start;
+
+       while (cxl_doorbell_busy(cxlm)) {
+               end = jiffies;
+
+               if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) {
+                       /* Check again in case preempted before timeout test */
+                       if (!cxl_doorbell_busy(cxlm))
+                               break;
+                       return -ETIMEDOUT;
+               }
+               cpu_relax();
+       }
+
+       dev_dbg(&cxlm->pdev->dev, "Doorbell wait took %dms",
+               jiffies_to_msecs(end) - jiffies_to_msecs(start));
+       return 0;
+}
+
+static bool cxl_is_security_command(u16 opcode)
+{
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(security_command_sets); i++)
+               if (security_command_sets[i] == (opcode >> 8))
+                       return true;
+       return false;
+}
+
+static void cxl_mem_mbox_timeout(struct cxl_mem *cxlm,
+                                struct mbox_cmd *mbox_cmd)
+{
+       struct device *dev = &cxlm->pdev->dev;
+
+       dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n",
+               mbox_cmd->opcode, mbox_cmd->size_in);
+}
+
+/**
+ * __cxl_mem_mbox_send_cmd() - Execute a mailbox command
+ * @cxlm: The CXL memory device to communicate with.
+ * @mbox_cmd: Command to send to the memory device.
+ *
+ * Context: Any context. Expects mbox_mutex to be held.
+ * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success.
+ *         Caller should check the return code in @mbox_cmd to make sure it
+ *         succeeded.
+ *
+ * This is a generic form of the CXL mailbox send command thus only using the
+ * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory
+ * devices, and perhaps other types of CXL devices may have further information
+ * available upon error conditions. Driver facilities wishing to send mailbox
+ * commands should use the wrapper command.
+ *
+ * The CXL spec allows for up to two mailboxes. The intention is for the primary
+ * mailbox to be OS controlled and the secondary mailbox to be used by system
+ * firmware. This allows the OS and firmware to communicate with the device and
+ * not need to coordinate with each other. The driver only uses the primary
+ * mailbox.
+ */
+static int __cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm,
+                                  struct mbox_cmd *mbox_cmd)
+{
+       void __iomem *payload = cxlm->mbox_regs + CXLDEV_MBOX_PAYLOAD_OFFSET;
+       u64 cmd_reg, status_reg;
+       size_t out_len;
+       int rc;
+
+       lockdep_assert_held(&cxlm->mbox_mutex);
+
+       /*
+        * Here are the steps from 8.2.8.4 of the CXL 2.0 spec.
+        *   1. Caller reads MB Control Register to verify doorbell is clear
+        *   2. Caller writes Command Register
+        *   3. Caller writes Command Payload Registers if input payload is non-empty
+        *   4. Caller writes MB Control Register to set doorbell
+        *   5. Caller either polls for doorbell to be clear or waits for interrupt if configured
+        *   6. Caller reads MB Status Register to fetch Return code
+        *   7. If command successful, Caller reads Command Register to get Payload Length
+        *   8. If output payload is non-empty, host reads Command Payload Registers
+        *
+        * Hardware is free to do whatever it wants before the doorbell is rung,
+        * and isn't allowed to change anything after it clears the doorbell. As
+        * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can
+        * also happen in any order (though some orders might not make sense).
+        */
+
+       /* #1 */
+       if (cxl_doorbell_busy(cxlm)) {
+               dev_err_ratelimited(&cxlm->pdev->dev,
+                                   "Mailbox re-busy after acquiring\n");
+               return -EBUSY;
+       }
+
+       cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK,
+                            mbox_cmd->opcode);
+       if (mbox_cmd->size_in) {
+               if (WARN_ON(!mbox_cmd->payload_in))
+                       return -EINVAL;
+
+               cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK,
+                                     mbox_cmd->size_in);
+               memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in);
+       }
+
+       /* #2, #3 */
+       writeq(cmd_reg, cxlm->mbox_regs + CXLDEV_MBOX_CMD_OFFSET);
+
+       /* #4 */
+       dev_dbg(&cxlm->pdev->dev, "Sending command\n");
+       writel(CXLDEV_MBOX_CTRL_DOORBELL,
+              cxlm->mbox_regs + CXLDEV_MBOX_CTRL_OFFSET);
+
+       /* #5 */
+       rc = cxl_mem_wait_for_doorbell(cxlm);
+       if (rc == -ETIMEDOUT) {
+               cxl_mem_mbox_timeout(cxlm, mbox_cmd);
+               return rc;
+       }
+
+       /* #6 */
+       status_reg = readq(cxlm->mbox_regs + CXLDEV_MBOX_STATUS_OFFSET);
+       mbox_cmd->return_code =
+               FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg);
+
+       if (mbox_cmd->return_code != 0) {
+               dev_dbg(&cxlm->pdev->dev, "Mailbox operation had an error\n");
+               return 0;
+       }
+
+       /* #7 */
+       cmd_reg = readq(cxlm->mbox_regs + CXLDEV_MBOX_CMD_OFFSET);
+       out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg);
+
+       /* #8 */
+       if (out_len && mbox_cmd->payload_out) {
+               /*
+                * Sanitize the copy. If hardware misbehaves, out_len per the
+                * spec can actually be greater than the max allowed size (21
+                * bits available but spec defined 1M max). The caller also may
+                * have requested less data than the hardware supplied even
+                * within spec.
+                */
+               size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len);
+
+               memcpy_fromio(mbox_cmd->payload_out, payload, n);
+               mbox_cmd->size_out = n;
+       } else {
+               mbox_cmd->size_out = 0;
+       }
+
+       return 0;
+}
+
+/**
+ * cxl_mem_mbox_get() - Acquire exclusive access to the mailbox.
+ * @cxlm: The memory device to gain access to.
+ *
+ * Context: Any context. Takes the mbox_mutex.
+ * Return: 0 if exclusive access was acquired.
+ */
+static int cxl_mem_mbox_get(struct cxl_mem *cxlm)
+{
+       struct device *dev = &cxlm->pdev->dev;
+       u64 md_status;
+       int rc;
+
+       mutex_lock_io(&cxlm->mbox_mutex);
+
+       /*
+        * XXX: There is some amount of ambiguity in the 2.0 version of the spec
+        * around the mailbox interface ready (8.2.8.5.1.1).  The purpose of the
+        * bit is to allow firmware running on the device to notify the driver
+        * that it's ready to receive commands. It is unclear if the bit needs
+        * to be read for each transaction mailbox, ie. the firmware can switch
+        * it on and off as needed. Second, there is no defined timeout for
+        * mailbox ready, like there is for the doorbell interface.
+        *
+        * Assumptions:
+        * 1. The firmware might toggle the Mailbox Interface Ready bit, check
+        *    it for every command.
+        *
+        * 2. If the doorbell is clear, the firmware should have first set the
+        *    Mailbox Interface Ready bit. Therefore, waiting for the doorbell
+        *    to be ready is sufficient.
+        */
+       rc = cxl_mem_wait_for_doorbell(cxlm);
+       if (rc) {
+               dev_warn(dev, "Mailbox interface not ready\n");
+               goto out;
+       }
+
+       md_status = readq(cxlm->memdev_regs + CXLMDEV_STATUS_OFFSET);
+       if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) {
+               dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n");
+               rc = -EBUSY;
+               goto out;
+       }
+
+       /*
+        * Hardware shouldn't allow a ready status but also have failure bits
+        * set. Spit out an error, this should be a bug report
+        */
+       rc = -EFAULT;
+       if (md_status & CXLMDEV_DEV_FATAL) {
+               dev_err(dev, "mbox: reported ready, but fatal\n");
+               goto out;
+       }
+       if (md_status & CXLMDEV_FW_HALT) {
+               dev_err(dev, "mbox: reported ready, but halted\n");
+               goto out;
+       }
+       if (CXLMDEV_RESET_NEEDED(md_status)) {
+               dev_err(dev, "mbox: reported ready, but reset needed\n");
+               goto out;
+       }
+
+       /* with lock held */
+       return 0;
+
+out:
+       mutex_unlock(&cxlm->mbox_mutex);
+       return rc;
+}
+
+/**
+ * cxl_mem_mbox_put() - Release exclusive access to the mailbox.
+ * @cxlm: The CXL memory device to communicate with.
+ *
+ * Context: Any context. Expects mbox_mutex to be held.
+ */
+static void cxl_mem_mbox_put(struct cxl_mem *cxlm)
+{
+       mutex_unlock(&cxlm->mbox_mutex);
+}
+
+/**
+ * handle_mailbox_cmd_from_user() - Dispatch a mailbox command for userspace.
+ * @cxlm: The CXL memory device to communicate with.
+ * @cmd: The validated command.
+ * @in_payload: Pointer to userspace's input payload.
+ * @out_payload: Pointer to userspace's output payload.
+ * @size_out: (Input) Max payload size to copy out.
+ *            (Output) Payload size hardware generated.
+ * @retval: Hardware generated return code from the operation.
+ *
+ * Return:
+ *  * %0       - Mailbox transaction succeeded. This implies the mailbox
+ *               protocol completed successfully not that the operation itself
+ *               was successful.
+ *  * %-ENOMEM  - Couldn't allocate a bounce buffer.
+ *  * %-EFAULT - Something happened with copy_to/from_user.
+ *  * %-EINTR  - Mailbox acquisition interrupted.
+ *  * %-EXXX   - Transaction level failures.
+ *
+ * Creates the appropriate mailbox command and dispatches it on behalf of a
+ * userspace request. The input and output payloads are copied between
+ * userspace.
+ *
+ * See cxl_send_cmd().
+ */
+static int handle_mailbox_cmd_from_user(struct cxl_mem *cxlm,
+                                       const struct cxl_mem_command *cmd,
+                                       u64 in_payload, u64 out_payload,
+                                       s32 *size_out, u32 *retval)
+{
+       struct device *dev = &cxlm->pdev->dev;
+       struct mbox_cmd mbox_cmd = {
+               .opcode = cmd->opcode,
+               .size_in = cmd->info.size_in,
+               .size_out = cmd->info.size_out,
+       };
+       int rc;
+
+       if (cmd->info.size_out) {
+               mbox_cmd.payload_out = kvzalloc(cmd->info.size_out, GFP_KERNEL);
+               if (!mbox_cmd.payload_out)
+                       return -ENOMEM;
+       }
+
+       if (cmd->info.size_in) {
+               mbox_cmd.payload_in = vmemdup_user(u64_to_user_ptr(in_payload),
+                                                  cmd->info.size_in);
+               if (IS_ERR(mbox_cmd.payload_in)) {
+                       kvfree(mbox_cmd.payload_out);
+                       return PTR_ERR(mbox_cmd.payload_in);
+               }
+       }
+
+       rc = cxl_mem_mbox_get(cxlm);
+       if (rc)
+               goto out;
+
+       dev_dbg(dev,
+               "Submitting %s command for user\n"
+               "\topcode: %x\n"
+               "\tsize: %ub\n",
+               cxl_command_names[cmd->info.id].name, mbox_cmd.opcode,
+               cmd->info.size_in);
+
+       dev_WARN_ONCE(dev, cmd->info.id == CXL_MEM_COMMAND_ID_RAW,
+                     "raw command path used\n");
+
+       rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
+       cxl_mem_mbox_put(cxlm);
+       if (rc)
+               goto out;
+
+       /*
+        * @size_out contains the max size that's allowed to be written back out
+        * to userspace. While the payload may have written more output than
+        * this it will have to be ignored.
+        */
+       if (mbox_cmd.size_out) {
+               dev_WARN_ONCE(dev, mbox_cmd.size_out > *size_out,
+                             "Invalid return size\n");
+               if (copy_to_user(u64_to_user_ptr(out_payload),
+                                mbox_cmd.payload_out, mbox_cmd.size_out)) {
+                       rc = -EFAULT;
+                       goto out;
+               }
+       }
+
+       *size_out = mbox_cmd.size_out;
+       *retval = mbox_cmd.return_code;
+
+out:
+       kvfree(mbox_cmd.payload_in);
+       kvfree(mbox_cmd.payload_out);
+       return rc;
+}
+
+static bool cxl_mem_raw_command_allowed(u16 opcode)
+{
+       int i;
+
+       if (!IS_ENABLED(CONFIG_CXL_MEM_RAW_COMMANDS))
+               return false;
+
+       if (security_locked_down(LOCKDOWN_NONE))
+               return false;
+
+       if (cxl_raw_allow_all)
+               return true;
+
+       if (cxl_is_security_command(opcode))
+               return false;
+
+       for (i = 0; i < ARRAY_SIZE(cxl_disabled_raw_commands); i++)
+               if (cxl_disabled_raw_commands[i] == opcode)
+                       return false;
+
+       return true;
+}
+
+/**
+ * cxl_validate_cmd_from_user() - Check fields for CXL_MEM_SEND_COMMAND.
+ * @cxlm: &struct cxl_mem device whose mailbox will be used.
+ * @send_cmd: &struct cxl_send_command copied in from userspace.
+ * @out_cmd: Sanitized and populated &struct cxl_mem_command.
+ *
+ * Return:
+ *  * %0       - @out_cmd is ready to send.
+ *  * %-ENOTTY - Invalid command specified.
+ *  * %-EINVAL - Reserved fields or invalid values were used.
+ *  * %-ENOMEM - Input or output buffer wasn't sized properly.
+ *  * %-EPERM  - Attempted to use a protected command.
+ *
+ * The result of this command is a fully validated command in @out_cmd that is
+ * safe to send to the hardware.
+ *
+ * See handle_mailbox_cmd_from_user()
+ */
+static int cxl_validate_cmd_from_user(struct cxl_mem *cxlm,
+                                     const struct cxl_send_command *send_cmd,
+                                     struct cxl_mem_command *out_cmd)
+{
+       const struct cxl_command_info *info;
+       struct cxl_mem_command *c;
+
+       if (send_cmd->id == 0 || send_cmd->id >= CXL_MEM_COMMAND_ID_MAX)
+               return -ENOTTY;
+
+       /*
+        * The user can never specify an input payload larger than what hardware
+        * supports, but output can be arbitrarily large (simply write out as
+        * much data as the hardware provides).
+        */
+       if (send_cmd->in.size > cxlm->payload_size)
+               return -EINVAL;
+
+       /*
+        * Checks are bypassed for raw commands but a WARN/taint will occur
+        * later in the callchain
+        */
+       if (send_cmd->id == CXL_MEM_COMMAND_ID_RAW) {
+               const struct cxl_mem_command temp = {
+                       .info = {
+                               .id = CXL_MEM_COMMAND_ID_RAW,
+                               .flags = 0,
+                               .size_in = send_cmd->in.size,
+                               .size_out = send_cmd->out.size,
+                       },
+                       .opcode = send_cmd->raw.opcode
+               };
+
+               if (send_cmd->raw.rsvd)
+                       return -EINVAL;
+
+               /*
+                * Unlike supported commands, the output size of RAW commands
+                * gets passed along without further checking, so it must be
+                * validated here.
+                */
+               if (send_cmd->out.size > cxlm->payload_size)
+                       return -EINVAL;
+
+               if (!cxl_mem_raw_command_allowed(send_cmd->raw.opcode))
+                       return -EPERM;
+
+               memcpy(out_cmd, &temp, sizeof(temp));
+
+               return 0;
+       }
+
+       if (send_cmd->flags & ~CXL_MEM_COMMAND_FLAG_MASK)
+               return -EINVAL;
+
+       if (send_cmd->rsvd)
+               return -EINVAL;
+
+       if (send_cmd->in.rsvd || send_cmd->out.rsvd)
+               return -EINVAL;
+
+       /* Convert user's command into the internal representation */
+       c = &mem_commands[send_cmd->id];
+       info = &c->info;
+
+       /* Check that the command is enabled for hardware */
+       if (!test_bit(info->id, cxlm->enabled_cmds))
+               return -ENOTTY;
+
+       /* Check the input buffer is the expected size */
+       if (info->size_in >= 0 && info->size_in != send_cmd->in.size)
+               return -ENOMEM;
+
+       /* Check the output buffer is at least large enough */
+       if (info->size_out >= 0 && send_cmd->out.size < info->size_out)
+               return -ENOMEM;
+
+       memcpy(out_cmd, c, sizeof(*c));
+       out_cmd->info.size_in = send_cmd->in.size;
+       /*
+        * XXX: out_cmd->info.size_out will be controlled by the driver, and the
+        * specified number of bytes @send_cmd->out.size will be copied back out
+        * to userspace.
+        */
+
+       return 0;
+}
+
+static int cxl_query_cmd(struct cxl_memdev *cxlmd,
+                        struct cxl_mem_query_commands __user *q)
+{
+       struct device *dev = &cxlmd->dev;
+       struct cxl_mem_command *cmd;
+       u32 n_commands;
+       int j = 0;
+
+       dev_dbg(dev, "Query IOCTL\n");
+
+       if (get_user(n_commands, &q->n_commands))
+               return -EFAULT;
+
+       /* returns the total number if 0 elements are requested. */
+       if (n_commands == 0)
+               return put_user(cxl_cmd_count, &q->n_commands);
+
+       /*
+        * otherwise, return max(n_commands, total commands) cxl_command_info
+        * structures.
+        */
+       cxl_for_each_cmd(cmd) {
+               const struct cxl_command_info *info = &cmd->info;
+
+               if (copy_to_user(&q->commands[j++], info, sizeof(*info)))
+                       return -EFAULT;
+
+               if (j == n_commands)
+                       break;
+       }
+
+       return 0;
+}
+
+static int cxl_send_cmd(struct cxl_memdev *cxlmd,
+                       struct cxl_send_command __user *s)
+{
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+       struct device *dev = &cxlmd->dev;
+       struct cxl_send_command send;
+       struct cxl_mem_command c;
+       int rc;
+
+       dev_dbg(dev, "Send IOCTL\n");
+
+       if (copy_from_user(&send, s, sizeof(send)))
+               return -EFAULT;
+
+       rc = cxl_validate_cmd_from_user(cxlmd->cxlm, &send, &c);
+       if (rc)
+               return rc;
+
+       /* Prepare to handle a full payload for variable sized output */
+       if (c.info.size_out < 0)
+               c.info.size_out = cxlm->payload_size;
+
+       rc = handle_mailbox_cmd_from_user(cxlm, &c, send.in.payload,
+                                         send.out.payload, &send.out.size,
+                                         &send.retval);
+       if (rc)
+               return rc;
+
+       if (copy_to_user(s, &send, sizeof(send)))
+               return -EFAULT;
+
+       return 0;
+}
+
+static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd,
+                              unsigned long arg)
+{
+       switch (cmd) {
+       case CXL_MEM_QUERY_COMMANDS:
+               return cxl_query_cmd(cxlmd, (void __user *)arg);
+       case CXL_MEM_SEND_COMMAND:
+               return cxl_send_cmd(cxlmd, (void __user *)arg);
+       default:
+               return -ENOTTY;
+       }
+}
+
+static long cxl_memdev_ioctl(struct file *file, unsigned int cmd,
+                            unsigned long arg)
+{
+       struct cxl_memdev *cxlmd;
+       struct inode *inode;
+       int rc = -ENOTTY;
+
+       inode = file_inode(file);
+       cxlmd = container_of(inode->i_cdev, typeof(*cxlmd), cdev);
+
+       if (!percpu_ref_tryget_live(&cxlmd->ops_active))
+               return -ENXIO;
+
+       rc = __cxl_memdev_ioctl(cxlmd, cmd, arg);
+
+       percpu_ref_put(&cxlmd->ops_active);
+
+       return rc;
+}
+
+static const struct file_operations cxl_memdev_fops = {
+       .owner = THIS_MODULE,
+       .unlocked_ioctl = cxl_memdev_ioctl,
+       .compat_ioctl = compat_ptr_ioctl,
+       .llseek = noop_llseek,
+};
+
+static inline struct cxl_mem_command *cxl_mem_find_command(u16 opcode)
+{
+       struct cxl_mem_command *c;
+
+       cxl_for_each_cmd(c)
+               if (c->opcode == opcode)
+                       return c;
+
+       return NULL;
+}
+
+/**
+ * cxl_mem_mbox_send_cmd() - Send a mailbox command to a memory device.
+ * @cxlm: The CXL memory device to communicate with.
+ * @opcode: Opcode for the mailbox command.
+ * @in: The input payload for the mailbox command.
+ * @in_size: The length of the input payload
+ * @out: Caller allocated buffer for the output.
+ * @out_size: Expected size of output.
+ *
+ * Context: Any context. Will acquire and release mbox_mutex.
+ * Return:
+ *  * %>=0     - Number of bytes returned in @out.
+ *  * %-E2BIG  - Payload is too large for hardware.
+ *  * %-EBUSY  - Couldn't acquire exclusive mailbox access.
+ *  * %-EFAULT - Hardware error occurred.
+ *  * %-ENXIO  - Command completed, but device reported an error.
+ *  * %-EIO    - Unexpected output size.
+ *
+ * Mailbox commands may execute successfully yet the device itself reported an
+ * error. While this distinction can be useful for commands from userspace, the
+ * kernel will only be able to use results when both are successful.
+ *
+ * See __cxl_mem_mbox_send_cmd()
+ */
+static int cxl_mem_mbox_send_cmd(struct cxl_mem *cxlm, u16 opcode,
+                                void *in, size_t in_size,
+                                void *out, size_t out_size)
+{
+       const struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
+       struct mbox_cmd mbox_cmd = {
+               .opcode = opcode,
+               .payload_in = in,
+               .size_in = in_size,
+               .size_out = out_size,
+               .payload_out = out,
+       };
+       int rc;
+
+       if (out_size > cxlm->payload_size)
+               return -E2BIG;
+
+       rc = cxl_mem_mbox_get(cxlm);
+       if (rc)
+               return rc;
+
+       rc = __cxl_mem_mbox_send_cmd(cxlm, &mbox_cmd);
+       cxl_mem_mbox_put(cxlm);
+       if (rc)
+               return rc;
+
+       /* TODO: Map return code to proper kernel style errno */
+       if (mbox_cmd.return_code != CXL_MBOX_SUCCESS)
+               return -ENXIO;
+
+       /*
+        * Variable sized commands can't be validated and so it's up to the
+        * caller to do that if they wish.
+        */
+       if (cmd->info.size_out >= 0 && mbox_cmd.size_out != out_size)
+               return -EIO;
+
+       return 0;
+}
+
+/**
+ * cxl_mem_setup_regs() - Setup necessary MMIO.
+ * @cxlm: The CXL memory device to communicate with.
+ *
+ * Return: 0 if all necessary registers mapped.
+ *
+ * A memory device is required by spec to implement a certain set of MMIO
+ * regions. The purpose of this function is to enumerate and map those
+ * registers.
+ */
+static int cxl_mem_setup_regs(struct cxl_mem *cxlm)
+{
+       struct device *dev = &cxlm->pdev->dev;
+       int cap, cap_count;
+       u64 cap_array;
+
+       cap_array = readq(cxlm->regs + CXLDEV_CAP_ARRAY_OFFSET);
+       if (FIELD_GET(CXLDEV_CAP_ARRAY_ID_MASK, cap_array) !=
+           CXLDEV_CAP_ARRAY_CAP_ID)
+               return -ENODEV;
+
+       cap_count = FIELD_GET(CXLDEV_CAP_ARRAY_COUNT_MASK, cap_array);
+
+       for (cap = 1; cap <= cap_count; cap++) {
+               void __iomem *register_block;
+               u32 offset;
+               u16 cap_id;
+
+               cap_id = FIELD_GET(CXLDEV_CAP_HDR_CAP_ID_MASK,
+                                  readl(cxlm->regs + cap * 0x10));
+               offset = readl(cxlm->regs + cap * 0x10 + 0x4);
+               register_block = cxlm->regs + offset;
+
+               switch (cap_id) {
+               case CXLDEV_CAP_CAP_ID_DEVICE_STATUS:
+                       dev_dbg(dev, "found Status capability (0x%x)\n", offset);
+                       cxlm->status_regs = register_block;
+                       break;
+               case CXLDEV_CAP_CAP_ID_PRIMARY_MAILBOX:
+                       dev_dbg(dev, "found Mailbox capability (0x%x)\n", offset);
+                       cxlm->mbox_regs = register_block;
+                       break;
+               case CXLDEV_CAP_CAP_ID_SECONDARY_MAILBOX:
+                       dev_dbg(dev, "found Secondary Mailbox capability (0x%x)\n", offset);
+                       break;
+               case CXLDEV_CAP_CAP_ID_MEMDEV:
+                       dev_dbg(dev, "found Memory Device capability (0x%x)\n", offset);
+                       cxlm->memdev_regs = register_block;
+                       break;
+               default:
+                       dev_dbg(dev, "Unknown cap ID: %d (0x%x)\n", cap_id, offset);
+                       break;
+               }
+       }
+
+       if (!cxlm->status_regs || !cxlm->mbox_regs || !cxlm->memdev_regs) {
+               dev_err(dev, "registers not found: %s%s%s\n",
+                       !cxlm->status_regs ? "status " : "",
+                       !cxlm->mbox_regs ? "mbox " : "",
+                       !cxlm->memdev_regs ? "memdev" : "");
+               return -ENXIO;
+       }
+
+       return 0;
+}
+
+static int cxl_mem_setup_mailbox(struct cxl_mem *cxlm)
+{
+       const int cap = readl(cxlm->mbox_regs + CXLDEV_MBOX_CAPS_OFFSET);
+
+       cxlm->payload_size =
+               1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap);
+
+       /*
+        * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register
+        *
+        * If the size is too small, mandatory commands will not work and so
+        * there's no point in going forward. If the size is too large, there's
+        * no harm is soft limiting it.
+        */
+       cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M);
+       if (cxlm->payload_size < 256) {
+               dev_err(&cxlm->pdev->dev, "Mailbox is too small (%zub)",
+                       cxlm->payload_size);
+               return -ENXIO;
+       }
+
+       dev_dbg(&cxlm->pdev->dev, "Mailbox payload sized %zu",
+               cxlm->payload_size);
+
+       return 0;
+}
+
+static struct cxl_mem *cxl_mem_create(struct pci_dev *pdev, u32 reg_lo,
+                                     u32 reg_hi)
+{
+       struct device *dev = &pdev->dev;
+       struct cxl_mem *cxlm;
+       void __iomem *regs;
+       u64 offset;
+       u8 bar;
+       int rc;
+
+       cxlm = devm_kzalloc(&pdev->dev, sizeof(*cxlm), GFP_KERNEL);
+       if (!cxlm) {
+               dev_err(dev, "No memory available\n");
+               return NULL;
+       }
+
+       offset = ((u64)reg_hi << 32) | FIELD_GET(CXL_REGLOC_ADDR_MASK, reg_lo);
+       bar = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo);
+
+       /* Basic sanity check that BAR is big enough */
+       if (pci_resource_len(pdev, bar) < offset) {
+               dev_err(dev, "BAR%d: %pr: too small (offset: %#llx)\n", bar,
+                       &pdev->resource[bar], (unsigned long long)offset);
+               return NULL;
+       }
+
+       rc = pcim_iomap_regions(pdev, BIT(bar), pci_name(pdev));
+       if (rc) {
+               dev_err(dev, "failed to map registers\n");
+               return NULL;
+       }
+       regs = pcim_iomap_table(pdev)[bar];
+
+       mutex_init(&cxlm->mbox_mutex);
+       cxlm->pdev = pdev;
+       cxlm->regs = regs + offset;
+       cxlm->enabled_cmds =
+               devm_kmalloc_array(dev, BITS_TO_LONGS(cxl_cmd_count),
+                                  sizeof(unsigned long),
+                                  GFP_KERNEL | __GFP_ZERO);
+       if (!cxlm->enabled_cmds) {
+               dev_err(dev, "No memory available for bitmap\n");
+               return NULL;
+       }
+
+       dev_dbg(dev, "Mapped CXL Memory Device resource\n");
+       return cxlm;
+}
+
+static int cxl_mem_dvsec(struct pci_dev *pdev, int dvsec)
+{
+       int pos;
+
+       pos = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DVSEC);
+       if (!pos)
+               return 0;
+
+       while (pos) {
+               u16 vendor, id;
+
+               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER1, &vendor);
+               pci_read_config_word(pdev, pos + PCI_DVSEC_HEADER2, &id);
+               if (vendor == PCI_DVSEC_VENDOR_ID_CXL && dvsec == id)
+                       return pos;
+
+               pos = pci_find_next_ext_capability(pdev, pos,
+                                                  PCI_EXT_CAP_ID_DVSEC);
+       }
+
+       return 0;
+}
+
+static struct cxl_memdev *to_cxl_memdev(struct device *dev)
+{
+       return container_of(dev, struct cxl_memdev, dev);
+}
+
+static void cxl_memdev_release(struct device *dev)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+
+       percpu_ref_exit(&cxlmd->ops_active);
+       ida_free(&cxl_memdev_ida, cxlmd->id);
+       kfree(cxlmd);
+}
+
+static char *cxl_memdev_devnode(struct device *dev, umode_t *mode, kuid_t *uid,
+                               kgid_t *gid)
+{
+       return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev));
+}
+
+static ssize_t firmware_version_show(struct device *dev,
+                                    struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+
+       return sprintf(buf, "%.16s\n", cxlm->firmware_version);
+}
+static DEVICE_ATTR_RO(firmware_version);
+
+static ssize_t payload_max_show(struct device *dev,
+                               struct device_attribute *attr, char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+
+       return sprintf(buf, "%zu\n", cxlm->payload_size);
+}
+static DEVICE_ATTR_RO(payload_max);
+
+static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr,
+                            char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+       unsigned long long len = range_len(&cxlm->ram_range);
+
+       return sprintf(buf, "%#llx\n", len);
+}
+
+static struct device_attribute dev_attr_ram_size =
+       __ATTR(size, 0444, ram_size_show, NULL);
+
+static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr,
+                             char *buf)
+{
+       struct cxl_memdev *cxlmd = to_cxl_memdev(dev);
+       struct cxl_mem *cxlm = cxlmd->cxlm;
+       unsigned long long len = range_len(&cxlm->pmem_range);
+
+       return sprintf(buf, "%#llx\n", len);
+}
+
+static struct device_attribute dev_attr_pmem_size =
+       __ATTR(size, 0444, pmem_size_show, NULL);
+
+static struct attribute *cxl_memdev_attributes[] = {
+       &dev_attr_firmware_version.attr,
+       &dev_attr_payload_max.attr,
+       NULL,
+};
+
+static struct attribute *cxl_memdev_pmem_attributes[] = {
+       &dev_attr_pmem_size.attr,
+       NULL,
+};
+
+static struct attribute *cxl_memdev_ram_attributes[] = {
+       &dev_attr_ram_size.attr,
+       NULL,
+};
+
+static struct attribute_group cxl_memdev_attribute_group = {
+       .attrs = cxl_memdev_attributes,
+};
+
+static struct attribute_group cxl_memdev_ram_attribute_group = {
+       .name = "ram",
+       .attrs = cxl_memdev_ram_attributes,
+};
+
+static struct attribute_group cxl_memdev_pmem_attribute_group = {
+       .name = "pmem",
+       .attrs = cxl_memdev_pmem_attributes,
+};
+
+static const struct attribute_group *cxl_memdev_attribute_groups[] = {
+       &cxl_memdev_attribute_group,
+       &cxl_memdev_ram_attribute_group,
+       &cxl_memdev_pmem_attribute_group,
+       NULL,
+};
+
+static const struct device_type cxl_memdev_type = {
+       .name = "cxl_memdev",
+       .release = cxl_memdev_release,
+       .devnode = cxl_memdev_devnode,
+       .groups = cxl_memdev_attribute_groups,
+};
+
+static void cxlmdev_unregister(void *_cxlmd)
+{
+       struct cxl_memdev *cxlmd = _cxlmd;
+       struct device *dev = &cxlmd->dev;
+
+       percpu_ref_kill(&cxlmd->ops_active);
+       cdev_device_del(&cxlmd->cdev, dev);
+       wait_for_completion(&cxlmd->ops_dead);
+       cxlmd->cxlm = NULL;
+       put_device(dev);
+}
+
+static void cxlmdev_ops_active_release(struct percpu_ref *ref)
+{
+       struct cxl_memdev *cxlmd =
+               container_of(ref, typeof(*cxlmd), ops_active);
+
+       complete(&cxlmd->ops_dead);
+}
+
+static int cxl_mem_add_memdev(struct cxl_mem *cxlm)
+{
+       struct pci_dev *pdev = cxlm->pdev;
+       struct cxl_memdev *cxlmd;
+       struct device *dev;
+       struct cdev *cdev;
+       int rc;
+
+       cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL);
+       if (!cxlmd)
+               return -ENOMEM;
+       init_completion(&cxlmd->ops_dead);
+
+       /*
+        * @cxlm is deallocated when the driver unbinds so operations
+        * that are using it need to hold a live reference.
+        */
+       cxlmd->cxlm = cxlm;
+       rc = percpu_ref_init(&cxlmd->ops_active, cxlmdev_ops_active_release, 0,
+                            GFP_KERNEL);
+       if (rc)
+               goto err_ref;
+
+       rc = ida_alloc_range(&cxl_memdev_ida, 0, CXL_MEM_MAX_DEVS, GFP_KERNEL);
+       if (rc < 0)
+               goto err_id;
+       cxlmd->id = rc;
+
+       dev = &cxlmd->dev;
+       device_initialize(dev);
+       dev->parent = &pdev->dev;
+       dev->bus = &cxl_bus_type;
+       dev->devt = MKDEV(cxl_mem_major, cxlmd->id);
+       dev->type = &cxl_memdev_type;
+       dev_set_name(dev, "mem%d", cxlmd->id);
+
+       cdev = &cxlmd->cdev;
+       cdev_init(cdev, &cxl_memdev_fops);
+
+       rc = cdev_device_add(cdev, dev);
+       if (rc)
+               goto err_add;
+
+       return devm_add_action_or_reset(dev->parent, cxlmdev_unregister, cxlmd);
+
+err_add:
+       ida_free(&cxl_memdev_ida, cxlmd->id);
+err_id:
+       /*
+        * Theoretically userspace could have already entered the fops,
+        * so flush ops_active.
+        */
+       percpu_ref_kill(&cxlmd->ops_active);
+       wait_for_completion(&cxlmd->ops_dead);
+       percpu_ref_exit(&cxlmd->ops_active);
+err_ref:
+       kfree(cxlmd);
+
+       return rc;
+}
+
+static int cxl_xfer_log(struct cxl_mem *cxlm, uuid_t *uuid, u32 size, u8 *out)
+{
+       u32 remaining = size;
+       u32 offset = 0;
+
+       while (remaining) {
+               u32 xfer_size = min_t(u32, remaining, cxlm->payload_size);
+               struct cxl_mbox_get_log {
+                       uuid_t uuid;
+                       __le32 offset;
+                       __le32 length;
+               } __packed log = {
+                       .uuid = *uuid,
+                       .offset = cpu_to_le32(offset),
+                       .length = cpu_to_le32(xfer_size)
+               };
+               int rc;
+
+               rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_LOG, &log,
+                                          sizeof(log), out, xfer_size);
+               if (rc < 0)
+                       return rc;
+
+               out += xfer_size;
+               remaining -= xfer_size;
+               offset += xfer_size;
+       }
+
+       return 0;
+}
+
+/**
+ * cxl_walk_cel() - Walk through the Command Effects Log.
+ * @cxlm: Device.
+ * @size: Length of the Command Effects Log.
+ * @cel: CEL
+ *
+ * Iterate over each entry in the CEL and determine if the driver supports the
+ * command. If so, the command is enabled for the device and can be used later.
+ */
+static void cxl_walk_cel(struct cxl_mem *cxlm, size_t size, u8 *cel)
+{
+       struct cel_entry {
+               __le16 opcode;
+               __le16 effect;
+       } __packed * cel_entry;
+       const int cel_entries = size / sizeof(*cel_entry);
+       int i;
+
+       cel_entry = (struct cel_entry *)cel;
+
+       for (i = 0; i < cel_entries; i++) {
+               u16 opcode = le16_to_cpu(cel_entry[i].opcode);
+               struct cxl_mem_command *cmd = cxl_mem_find_command(opcode);
+
+               if (!cmd) {
+                       dev_dbg(&cxlm->pdev->dev,
+                               "Opcode 0x%04x unsupported by driver", opcode);
+                       continue;
+               }
+
+               set_bit(cmd->info.id, cxlm->enabled_cmds);
+       }
+}
+
+struct cxl_mbox_get_supported_logs {
+       __le16 entries;
+       u8 rsvd[6];
+       struct gsl_entry {
+               uuid_t uuid;
+               __le32 size;
+       } __packed entry[];
+} __packed;
+
+static struct cxl_mbox_get_supported_logs *cxl_get_gsl(struct cxl_mem *cxlm)
+{
+       struct cxl_mbox_get_supported_logs *ret;
+       int rc;
+
+       ret = kvmalloc(cxlm->payload_size, GFP_KERNEL);
+       if (!ret)
+               return ERR_PTR(-ENOMEM);
+
+       rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_GET_SUPPORTED_LOGS, NULL,
+                                  0, ret, cxlm->payload_size);
+       if (rc < 0) {
+               kvfree(ret);
+               return ERR_PTR(rc);
+       }
+
+       return ret;
+}
+
+/**
+ * cxl_mem_enumerate_cmds() - Enumerate commands for a device.
+ * @cxlm: The device.
+ *
+ * Returns 0 if enumerate completed successfully.
+ *
+ * CXL devices have optional support for certain commands. This function will
+ * determine the set of supported commands for the hardware and update the
+ * enabled_cmds bitmap in the @cxlm.
+ */
+static int cxl_mem_enumerate_cmds(struct cxl_mem *cxlm)
+{
+       struct cxl_mbox_get_supported_logs *gsl;
+       struct device *dev = &cxlm->pdev->dev;
+       struct cxl_mem_command *cmd;
+       int i, rc;
+
+       gsl = cxl_get_gsl(cxlm);
+       if (IS_ERR(gsl))
+               return PTR_ERR(gsl);
+
+       rc = -ENOENT;
+       for (i = 0; i < le16_to_cpu(gsl->entries); i++) {
+               u32 size = le32_to_cpu(gsl->entry[i].size);
+               uuid_t uuid = gsl->entry[i].uuid;
+               u8 *log;
+
+               dev_dbg(dev, "Found LOG type %pU of size %d", &uuid, size);
+
+               if (!uuid_equal(&uuid, &log_uuid[CEL_UUID]))
+                       continue;
+
+               log = kvmalloc(size, GFP_KERNEL);
+               if (!log) {
+                       rc = -ENOMEM;
+                       goto out;
+               }
+
+               rc = cxl_xfer_log(cxlm, &uuid, size, log);
+               if (rc) {
+                       kvfree(log);
+                       goto out;
+               }
+
+               cxl_walk_cel(cxlm, size, log);
+               kvfree(log);
+
+               /* In case CEL was bogus, enable some default commands. */
+               cxl_for_each_cmd(cmd)
+                       if (cmd->flags & CXL_CMD_FLAG_FORCE_ENABLE)
+                               set_bit(cmd->info.id, cxlm->enabled_cmds);
+
+               /* Found the required CEL */
+               rc = 0;
+       }
+
+out:
+       kvfree(gsl);
+       return rc;
+}
+
+/**
+ * cxl_mem_identify() - Send the IDENTIFY command to the device.
+ * @cxlm: The device to identify.
+ *
+ * Return: 0 if identify was executed successfully.
+ *
+ * This will dispatch the identify command to the device and on success populate
+ * structures to be exported to sysfs.
+ */
+static int cxl_mem_identify(struct cxl_mem *cxlm)
+{
+       struct cxl_mbox_identify {
+               char fw_revision[0x10];
+               __le64 total_capacity;
+               __le64 volatile_capacity;
+               __le64 persistent_capacity;
+               __le64 partition_align;
+               __le16 info_event_log_size;
+               __le16 warning_event_log_size;
+               __le16 failure_event_log_size;
+               __le16 fatal_event_log_size;
+               __le32 lsa_size;
+               u8 poison_list_max_mer[3];
+               __le16 inject_poison_limit;
+               u8 poison_caps;
+               u8 qos_telemetry_caps;
+       } __packed id;
+       int rc;
+
+       rc = cxl_mem_mbox_send_cmd(cxlm, CXL_MBOX_OP_IDENTIFY, NULL, 0, &id,
+                                  sizeof(id));
+       if (rc < 0)
+               return rc;
+
+       /*
+        * TODO: enumerate DPA map, as 'ram' and 'pmem' do not alias.
+        * For now, only the capacity is exported in sysfs
+        */
+       cxlm->ram_range.start = 0;
+       cxlm->ram_range.end = le64_to_cpu(id.volatile_capacity) - 1;
+
+       cxlm->pmem_range.start = 0;
+       cxlm->pmem_range.end = le64_to_cpu(id.persistent_capacity) - 1;
+
+       memcpy(cxlm->firmware_version, id.fw_revision, sizeof(id.fw_revision));
+
+       return 0;
+}
+
+static int cxl_mem_probe(struct pci_dev *pdev, const struct pci_device_id *id)
+{
+       struct device *dev = &pdev->dev;
+       struct cxl_mem *cxlm = NULL;
+       u32 regloc_size, regblocks;
+       int rc, regloc, i;
+
+       rc = pcim_enable_device(pdev);
+       if (rc)
+               return rc;
+
+       regloc = cxl_mem_dvsec(pdev, PCI_DVSEC_ID_CXL_REGLOC_OFFSET);
+       if (!regloc) {
+               dev_err(dev, "register location dvsec not found\n");
+               return -ENXIO;
+       }
+
+       /* Get the size of the Register Locator DVSEC */
+       pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, &regloc_size);
+       regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size);
+
+       regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET;
+       regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8;
+
+       for (i = 0; i < regblocks; i++, regloc += 8) {
+               u32 reg_lo, reg_hi;
+               u8 reg_type;
+
+               /* "register low and high" contain other bits */
+               pci_read_config_dword(pdev, regloc, &reg_lo);
+               pci_read_config_dword(pdev, regloc + 4, &reg_hi);
+
+               reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo);
+
+               if (reg_type == CXL_REGLOC_RBI_MEMDEV) {
+                       cxlm = cxl_mem_create(pdev, reg_lo, reg_hi);
+                       break;
+               }
+       }
+
+       if (!cxlm)
+               return -ENODEV;
+
+       rc = cxl_mem_setup_regs(cxlm);
+       if (rc)
+               return rc;
+
+       rc = cxl_mem_setup_mailbox(cxlm);
+       if (rc)
+               return rc;
+
+       rc = cxl_mem_enumerate_cmds(cxlm);
+       if (rc)
+               return rc;
+
+       rc = cxl_mem_identify(cxlm);
+       if (rc)
+               return rc;
+
+       return cxl_mem_add_memdev(cxlm);
+}
+
+static const struct pci_device_id cxl_mem_pci_tbl[] = {
+       /* PCI class code for CXL.mem Type-3 Devices */
+       { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)},
+       { /* terminate list */ },
+};
+MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl);
+
+static struct pci_driver cxl_mem_driver = {
+       .name                   = KBUILD_MODNAME,
+       .id_table               = cxl_mem_pci_tbl,
+       .probe                  = cxl_mem_probe,
+       .driver = {
+               .probe_type     = PROBE_PREFER_ASYNCHRONOUS,
+       },
+};
+
+static __init int cxl_mem_init(void)
+{
+       struct dentry *mbox_debugfs;
+       dev_t devt;
+       int rc;
+
+       rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl");
+       if (rc)
+               return rc;
+
+       cxl_mem_major = MAJOR(devt);
+
+       rc = pci_register_driver(&cxl_mem_driver);
+       if (rc) {
+               unregister_chrdev_region(MKDEV(cxl_mem_major, 0),
+                                        CXL_MEM_MAX_DEVS);
+               return rc;
+       }
+
+       cxl_debugfs = debugfs_create_dir("cxl", NULL);
+       mbox_debugfs = debugfs_create_dir("mbox", cxl_debugfs);
+       debugfs_create_bool("raw_allow_all", 0600, mbox_debugfs,
+                           &cxl_raw_allow_all);
+
+       return 0;
+}
+
+static __exit void cxl_mem_exit(void)
+{
+       debugfs_remove_recursive(cxl_debugfs);
+       pci_unregister_driver(&cxl_mem_driver);
+       unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS);
+}
+
+MODULE_LICENSE("GPL v2");
+module_init(cxl_mem_init);
+module_exit(cxl_mem_exit);
+MODULE_IMPORT_NS(CXL);
diff --git a/drivers/cxl/pci.h b/drivers/cxl/pci.h
new file mode 100644 (file)
index 0000000..af3ec07
--- /dev/null
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2020 Intel Corporation. All rights reserved. */
+#ifndef __CXL_PCI_H__
+#define __CXL_PCI_H__
+
+#define CXL_MEMORY_PROGIF      0x10
+
+/*
+ * See section 8.1 Configuration Space Registers in the CXL 2.0
+ * Specification
+ */
+#define PCI_DVSEC_HEADER1_LENGTH_MASK  GENMASK(31, 20)
+#define PCI_DVSEC_VENDOR_ID_CXL                0x1E98
+#define PCI_DVSEC_ID_CXL               0x0
+
+#define PCI_DVSEC_ID_CXL_REGLOC_OFFSET         0x8
+#define PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET  0xC
+
+/* BAR Indicator Register (BIR) */
+#define CXL_REGLOC_BIR_MASK GENMASK(2, 0)
+
+/* Register Block Identifier (RBI) */
+#define CXL_REGLOC_RBI_MASK GENMASK(15, 8)
+#define CXL_REGLOC_RBI_EMPTY 0
+#define CXL_REGLOC_RBI_COMPONENT 1
+#define CXL_REGLOC_RBI_VIRT 2
+#define CXL_REGLOC_RBI_MEMDEV 3
+
+#define CXL_REGLOC_ADDR_MASK GENMASK(31, 16)
+
+#endif /* __CXL_PCI_H__ */
index 737b207..452e85a 100644 (file)
@@ -179,7 +179,10 @@ static int dax_bus_remove(struct device *dev)
        struct dax_device_driver *dax_drv = to_dax_drv(dev->driver);
        struct dev_dax *dev_dax = to_dev_dax(dev);
 
-       return dax_drv->remove(dev_dax);
+       if (dax_drv->remove)
+               dax_drv->remove(dev_dax);
+
+       return 0;
 }
 
 static struct bus_type dax_bus_type = {
@@ -1038,7 +1041,7 @@ static ssize_t range_parse(const char *opt, size_t len, struct range *range)
 {
        unsigned long long addr = 0;
        char *start, *end, *str;
-       ssize_t rc = EINVAL;
+       ssize_t rc = -EINVAL;
 
        str = kstrdup(opt, GFP_KERNEL);
        if (!str)
@@ -1392,6 +1395,13 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
        struct device_driver *drv = &dax_drv->drv;
        int rc = 0;
 
+       /*
+        * dax_bus_probe() calls dax_drv->probe() unconditionally.
+        * So better be safe than sorry and ensure it is provided.
+        */
+       if (!dax_drv->probe)
+               return -EINVAL;
+
        INIT_LIST_HEAD(&dax_drv->ids);
        drv->owner = module;
        drv->name = mod_name;
@@ -1409,7 +1419,15 @@ int __dax_driver_register(struct dax_device_driver *dax_drv,
        mutex_unlock(&dax_bus_lock);
        if (rc)
                return rc;
-       return driver_register(drv);
+
+       rc = driver_register(drv);
+       if (rc && dax_drv->match_always) {
+               mutex_lock(&dax_bus_lock);
+               match_always_count -= dax_drv->match_always;
+               mutex_unlock(&dax_bus_lock);
+       }
+
+       return rc;
 }
 EXPORT_SYMBOL_GPL(__dax_driver_register);
 
index 72b92f9..1e946ad 100644 (file)
@@ -39,7 +39,7 @@ struct dax_device_driver {
        struct list_head ids;
        int match_always;
        int (*probe)(struct dev_dax *dev);
-       int (*remove)(struct dev_dax *dev);
+       void (*remove)(struct dev_dax *dev);
 };
 
 int __dax_driver_register(struct dax_device_driver *dax_drv,
index 5da2980..db92573 100644 (file)
@@ -452,15 +452,9 @@ int dev_dax_probe(struct dev_dax *dev_dax)
 }
 EXPORT_SYMBOL_GPL(dev_dax_probe);
 
-static int dev_dax_remove(struct dev_dax *dev_dax)
-{
-       /* all probe actions are unwound by devm */
-       return 0;
-}
-
 static struct dax_device_driver device_dax_driver = {
        .probe = dev_dax_probe,
-       .remove = dev_dax_remove,
+       /* all probe actions are unwound by devm, so .remove isn't necessary */
        .match_always = 1,
 };
 
index 403ec42..ac231cc 100644 (file)
@@ -136,7 +136,7 @@ err_res_name:
 }
 
 #ifdef CONFIG_MEMORY_HOTREMOVE
-static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
+static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 {
        int i, success = 0;
        struct device *dev = &dev_dax->dev;
@@ -176,11 +176,9 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
                kfree(data);
                dev_set_drvdata(dev, NULL);
        }
-
-       return 0;
 }
 #else
-static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
+static void dev_dax_kmem_remove(struct dev_dax *dev_dax)
 {
        /*
         * Without hotremove purposely leak the request_mem_region() for the
@@ -190,7 +188,6 @@ static int dev_dax_kmem_remove(struct dev_dax *dev_dax)
         * request_mem_region().
         */
        any_hotremove_failed = true;
-       return 0;
 }
 #endif /* CONFIG_MEMORY_HOTREMOVE */
 
index 863c114..d81dc35 100644 (file)
@@ -41,10 +41,9 @@ static int dax_pmem_compat_release(struct device *dev, void *data)
        return 0;
 }
 
-static int dax_pmem_compat_remove(struct device *dev)
+static void dax_pmem_compat_remove(struct device *dev)
 {
        device_for_each_child(dev, NULL, dax_pmem_compat_release);
-       return 0;
 }
 
 static struct nd_device_driver dax_pmem_compat_driver = {
index cadbd0a..5fa6ae9 100644 (file)
@@ -480,7 +480,7 @@ static void dax_free_inode(struct inode *inode)
        kfree(dax_dev->host);
        dax_dev->host = NULL;
        if (inode->i_rdev)
-               ida_simple_remove(&dax_minor_ida, MINOR(inode->i_rdev));
+               ida_simple_remove(&dax_minor_ida, iminor(inode));
        kmem_cache_free(dax_cache, dax_dev);
 }
 
index 7475e09..d64fc03 100644 (file)
@@ -312,22 +312,25 @@ void __dma_fence_might_wait(void)
 
 
 /**
- * dma_fence_signal_locked - signal completion of a fence
+ * dma_fence_signal_timestamp_locked - signal completion of a fence
  * @fence: the fence to signal
+ * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
  *
  * Signal completion for software callbacks on a fence, this will unblock
  * dma_fence_wait() calls and run all the callbacks added with
  * dma_fence_add_callback(). Can be called multiple times, but since a fence
  * can only go from the unsignaled to the signaled state and not back, it will
- * only be effective the first time.
+ * only be effective the first time. Set the timestamp provided as the fence
+ * signal timestamp.
  *
- * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock
- * held.
+ * Unlike dma_fence_signal_timestamp(), this function must be called with
+ * &dma_fence.lock held.
  *
  * Returns 0 on success and a negative error value when @fence has been
  * signalled already.
  */
-int dma_fence_signal_locked(struct dma_fence *fence)
+int dma_fence_signal_timestamp_locked(struct dma_fence *fence,
+                                     ktime_t timestamp)
 {
        struct dma_fence_cb *cur, *tmp;
        struct list_head cb_list;
@@ -341,7 +344,7 @@ int dma_fence_signal_locked(struct dma_fence *fence)
        /* Stash the cb_list before replacing it with the timestamp */
        list_replace(&fence->cb_list, &cb_list);
 
-       fence->timestamp = ktime_get();
+       fence->timestamp = timestamp;
        set_bit(DMA_FENCE_FLAG_TIMESTAMP_BIT, &fence->flags);
        trace_dma_fence_signaled(fence);
 
@@ -352,6 +355,59 @@ int dma_fence_signal_locked(struct dma_fence *fence)
 
        return 0;
 }
+EXPORT_SYMBOL(dma_fence_signal_timestamp_locked);
+
+/**
+ * dma_fence_signal_timestamp - signal completion of a fence
+ * @fence: the fence to signal
+ * @timestamp: fence signal timestamp in kernel's CLOCK_MONOTONIC time domain
+ *
+ * Signal completion for software callbacks on a fence, this will unblock
+ * dma_fence_wait() calls and run all the callbacks added with
+ * dma_fence_add_callback(). Can be called multiple times, but since a fence
+ * can only go from the unsignaled to the signaled state and not back, it will
+ * only be effective the first time. Set the timestamp provided as the fence
+ * signal timestamp.
+ *
+ * Returns 0 on success and a negative error value when @fence has been
+ * signalled already.
+ */
+int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp)
+{
+       unsigned long flags;
+       int ret;
+
+       if (!fence)
+               return -EINVAL;
+
+       spin_lock_irqsave(fence->lock, flags);
+       ret = dma_fence_signal_timestamp_locked(fence, timestamp);
+       spin_unlock_irqrestore(fence->lock, flags);
+
+       return ret;
+}
+EXPORT_SYMBOL(dma_fence_signal_timestamp);
+
+/**
+ * dma_fence_signal_locked - signal completion of a fence
+ * @fence: the fence to signal
+ *
+ * Signal completion for software callbacks on a fence, this will unblock
+ * dma_fence_wait() calls and run all the callbacks added with
+ * dma_fence_add_callback(). Can be called multiple times, but since a fence
+ * can only go from the unsignaled to the signaled state and not back, it will
+ * only be effective the first time.
+ *
+ * Unlike dma_fence_signal(), this function must be called with &dma_fence.lock
+ * held.
+ *
+ * Returns 0 on success and a negative error value when @fence has been
+ * signalled already.
+ */
+int dma_fence_signal_locked(struct dma_fence *fence)
+{
+       return dma_fence_signal_timestamp_locked(fence, ktime_get());
+}
 EXPORT_SYMBOL(dma_fence_signal_locked);
 
 /**
@@ -379,7 +435,7 @@ int dma_fence_signal(struct dma_fence *fence)
        tmp = dma_fence_begin_signalling();
 
        spin_lock_irqsave(fence->lock, flags);
-       ret = dma_fence_signal_locked(fence);
+       ret = dma_fence_signal_timestamp_locked(fence, ktime_get());
        spin_unlock_irqrestore(fence->lock, flags);
 
        dma_fence_end_signalling(tmp);
index afd22c9..6b5db95 100644 (file)
@@ -52,6 +52,9 @@ static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len,
                                 unsigned int fd_flags,
                                 unsigned int heap_flags)
 {
+       struct dma_buf *dmabuf;
+       int fd;
+
        /*
         * Allocations from all heaps have to begin
         * and end on page boundaries.
@@ -60,7 +63,16 @@ static int dma_heap_buffer_alloc(struct dma_heap *heap, size_t len,
        if (!len)
                return -EINVAL;
 
-       return heap->ops->allocate(heap, len, fd_flags, heap_flags);
+       dmabuf = heap->ops->allocate(heap, len, fd_flags, heap_flags);
+       if (IS_ERR(dmabuf))
+               return PTR_ERR(dmabuf);
+
+       fd = dma_buf_fd(dmabuf, fd_flags);
+       if (fd < 0) {
+               dma_buf_put(dmabuf);
+               /* just return, as put will call release and that will free */
+       }
+       return fd;
 }
 
 static int dma_heap_open(struct inode *inode, struct file *file)
index 364fc2f..5d64ecc 100644 (file)
@@ -271,10 +271,10 @@ static const struct dma_buf_ops cma_heap_buf_ops = {
        .release = cma_heap_dma_buf_release,
 };
 
-static int cma_heap_allocate(struct dma_heap *heap,
-                                 unsigned long len,
-                                 unsigned long fd_flags,
-                                 unsigned long heap_flags)
+static struct dma_buf *cma_heap_allocate(struct dma_heap *heap,
+                                        unsigned long len,
+                                        unsigned long fd_flags,
+                                        unsigned long heap_flags)
 {
        struct cma_heap *cma_heap = dma_heap_get_drvdata(heap);
        struct cma_heap_buffer *buffer;
@@ -289,7 +289,7 @@ static int cma_heap_allocate(struct dma_heap *heap,
 
        buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
        if (!buffer)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&buffer->attachments);
        mutex_init(&buffer->lock);
@@ -348,15 +348,7 @@ static int cma_heap_allocate(struct dma_heap *heap,
                ret = PTR_ERR(dmabuf);
                goto free_pages;
        }
-
-       ret = dma_buf_fd(dmabuf, fd_flags);
-       if (ret < 0) {
-               dma_buf_put(dmabuf);
-               /* just return, as put will call release and that will free */
-               return ret;
-       }
-
-       return ret;
+       return dmabuf;
 
 free_pages:
        kfree(buffer->pages);
@@ -365,7 +357,7 @@ free_cma:
 free_buffer:
        kfree(buffer);
 
-       return ret;
+       return ERR_PTR(ret);
 }
 
 static const struct dma_heap_ops cma_heap_ops = {
index 17e0e9a..29e49ac 100644 (file)
@@ -331,10 +331,10 @@ static struct page *alloc_largest_available(unsigned long size,
        return NULL;
 }
 
-static int system_heap_allocate(struct dma_heap *heap,
-                               unsigned long len,
-                               unsigned long fd_flags,
-                               unsigned long heap_flags)
+static struct dma_buf *system_heap_allocate(struct dma_heap *heap,
+                                           unsigned long len,
+                                           unsigned long fd_flags,
+                                           unsigned long heap_flags)
 {
        struct system_heap_buffer *buffer;
        DEFINE_DMA_BUF_EXPORT_INFO(exp_info);
@@ -349,7 +349,7 @@ static int system_heap_allocate(struct dma_heap *heap,
 
        buffer = kzalloc(sizeof(*buffer), GFP_KERNEL);
        if (!buffer)
-               return -ENOMEM;
+               return ERR_PTR(-ENOMEM);
 
        INIT_LIST_HEAD(&buffer->attachments);
        mutex_init(&buffer->lock);
@@ -363,8 +363,10 @@ static int system_heap_allocate(struct dma_heap *heap,
                 * Avoid trying to allocate memory if the process
                 * has been killed by SIGKILL
                 */
-               if (fatal_signal_pending(current))
+               if (fatal_signal_pending(current)) {
+                       ret = -EINTR;
                        goto free_buffer;
+               }
 
                page = alloc_largest_available(size_remaining, max_order);
                if (!page)
@@ -397,14 +399,7 @@ static int system_heap_allocate(struct dma_heap *heap,
                ret = PTR_ERR(dmabuf);
                goto free_pages;
        }
-
-       ret = dma_buf_fd(dmabuf, fd_flags);
-       if (ret < 0) {
-               dma_buf_put(dmabuf);
-               /* just return, as put will call release and that will free */
-               return ret;
-       }
-       return ret;
+       return dmabuf;
 
 free_pages:
        for_each_sgtable_sg(table, sg, i) {
@@ -418,7 +413,7 @@ free_buffer:
                __free_pages(page, compound_order(page));
        kfree(buffer);
 
-       return ret;
+       return ERR_PTR(ret);
 }
 
 static const struct dma_heap_ops system_heap_ops = {
index d242c76..0c2827f 100644 (file)
@@ -124,13 +124,6 @@ config BCM_SBA_RAID
          has the capability to offload memcpy, xor and pq computation
          for raid5/6.
 
-config COH901318
-       bool "ST-Ericsson COH901318 DMA support"
-       select DMA_ENGINE
-       depends on ARCH_U300 || COMPILE_TEST
-       help
-         Enable support for ST-Ericsson COH 901 318 DMA.
-
 config DMA_BCM2835
        tristate "BCM2835 DMA engine support"
        depends on ARCH_BCM2835
@@ -179,6 +172,7 @@ config DMA_SUN6I
 config DW_AXI_DMAC
        tristate "Synopsys DesignWare AXI DMA support"
        depends on OF || COMPILE_TEST
+       depends on HAS_IOMEM
        select DMA_ENGINE
        select DMA_VIRTUAL_CHANNELS
        help
@@ -378,14 +372,14 @@ config MILBEAUT_XDMAC
          XDMAC device.
 
 config MMP_PDMA
-       bool "MMP PDMA support"
+       tristate "MMP PDMA support"
        depends on ARCH_MMP || ARCH_PXA || COMPILE_TEST
        select DMA_ENGINE
        help
          Support the MMP PDMA engine for PXA and MMP platform.
 
 config MMP_TDMA
-       bool "MMP Two-Channel DMA support"
+       tristate "MMP Two-Channel DMA support"
        depends on ARCH_MMP || COMPILE_TEST
        select DMA_ENGINE
        select GENERIC_ALLOCATOR
@@ -519,13 +513,6 @@ config PLX_DMA
          These are exposed via extra functions on the switch's
          upstream port. Each function exposes one DMA channel.
 
-config SIRF_DMA
-       tristate "CSR SiRFprimaII/SiRFmarco DMA support"
-       depends on ARCH_SIRF
-       select DMA_ENGINE
-       help
-         Enable support for the CSR SiRFprimaII DMA engine.
-
 config STE_DMA40
        bool "ST-Ericsson DMA40 support"
        depends on ARCH_U8500
@@ -710,15 +697,6 @@ config XILINX_ZYNQMP_DPDMA
          driver provides the dmaengine required by the DisplayPort subsystem
          display driver.
 
-config ZX_DMA
-       tristate "ZTE ZX DMA support"
-       depends on ARCH_ZX || COMPILE_TEST
-       select DMA_ENGINE
-       select DMA_VIRTUAL_CHANNELS
-       help
-         Support the DMA engine for ZTE ZX family platform devices.
-
-
 # driver files
 source "drivers/dma/bestcomm/Kconfig"
 
@@ -740,6 +718,8 @@ source "drivers/dma/ti/Kconfig"
 
 source "drivers/dma/fsl-dpaa2-qdma/Kconfig"
 
+source "drivers/dma/lgm/Kconfig"
+
 # clients
 comment "DMA Clients"
        depends on DMA_ENGINE
index 948a8da..aa69094 100644 (file)
@@ -20,7 +20,6 @@ obj-$(CONFIG_AT_HDMAC) += at_hdmac.o
 obj-$(CONFIG_AT_XDMAC) += at_xdmac.o
 obj-$(CONFIG_AXI_DMAC) += dma-axi-dmac.o
 obj-$(CONFIG_BCM_SBA_RAID) += bcm-sba-raid.o
-obj-$(CONFIG_COH901318) += coh901318.o coh901318_lli.o
 obj-$(CONFIG_DMA_BCM2835) += bcm2835-dma.o
 obj-$(CONFIG_DMA_JZ4780) += dma-jz4780.o
 obj-$(CONFIG_DMA_SA11X0) += sa11x0-dma.o
@@ -65,7 +64,6 @@ obj-$(CONFIG_PPC_BESTCOMM) += bestcomm/
 obj-$(CONFIG_PXA_DMA) += pxa_dma.o
 obj-$(CONFIG_RENESAS_DMA) += sh/
 obj-$(CONFIG_SF_PDMA) += sf-pdma/
-obj-$(CONFIG_SIRF_DMA) += sirf-dma.o
 obj-$(CONFIG_STE_DMA40) += ste_dma40.o ste_dma40_ll.o
 obj-$(CONFIG_STM32_DMA) += stm32-dma.o
 obj-$(CONFIG_STM32_DMAMUX) += stm32-dmamux.o
@@ -79,9 +77,9 @@ obj-$(CONFIG_TIMB_DMA) += timb_dma.o
 obj-$(CONFIG_UNIPHIER_MDMAC) += uniphier-mdmac.o
 obj-$(CONFIG_UNIPHIER_XDMAC) += uniphier-xdmac.o
 obj-$(CONFIG_XGENE_DMA) += xgene-dma.o
-obj-$(CONFIG_ZX_DMA) += zx_dma.o
 obj-$(CONFIG_ST_FDMA) += st_fdma.o
 obj-$(CONFIG_FSL_DPAA2_QDMA) += fsl-dpaa2-qdma/
+obj-$(CONFIG_INTEL_LDMA) += lgm/
 
 obj-y += mediatek/
 obj-y += qcom/
index 7eaee5b..30ae361 100644 (file)
@@ -54,6 +54,25 @@ module_param(init_nr_desc_per_channel, uint, 0644);
 MODULE_PARM_DESC(init_nr_desc_per_channel,
                 "initial descriptors per channel (default: 64)");
 
+/**
+ * struct at_dma_platform_data - Controller configuration parameters
+ * @nr_channels: Number of channels supported by hardware (max 8)
+ * @cap_mask: dma_capability flags supported by the platform
+ */
+struct at_dma_platform_data {
+       unsigned int    nr_channels;
+       dma_cap_mask_t  cap_mask;
+};
+
+/**
+ * struct at_dma_slave - Controller-specific information about a slave
+ * @dma_dev: required DMA master device
+ * @cfg: Platform-specific initializer for the CFG register
+ */
+struct at_dma_slave {
+       struct device           *dma_dev;
+       u32                     cfg;
+};
 
 /* prototypes */
 static dma_cookie_t atc_tx_submit(struct dma_async_tx_descriptor *tx);
index 80fc2fe..4d1ebc0 100644 (file)
@@ -7,8 +7,6 @@
 #ifndef AT_HDMAC_REGS_H
 #define        AT_HDMAC_REGS_H
 
-#include <linux/platform_data/dma-atmel.h>
-
 #define        AT_DMA_MAX_NR_CHANNELS  8
 
 
 #define        ATC_AUTO                (0x1 << 31)     /* Auto multiple buffer tx enable */
 
 /* Bitfields in CFG */
-/* are in at_hdmac.h */
+#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4)    /* Extract most significant bits of a handshaking identifier */
+
+#define        ATC_SRC_PER(h)          (0xFU & (h))    /* Channel src rq associated with periph handshaking ifc h */
+#define        ATC_DST_PER(h)          ((0xFU & (h)) <<  4)    /* Channel dst rq associated with periph handshaking ifc h */
+#define        ATC_SRC_REP             (0x1 <<  8)     /* Source Replay Mod */
+#define        ATC_SRC_H2SEL           (0x1 <<  9)     /* Source Handshaking Mod */
+#define                ATC_SRC_H2SEL_SW        (0x0 <<  9)
+#define                ATC_SRC_H2SEL_HW        (0x1 <<  9)
+#define        ATC_SRC_PER_MSB(h)      (ATC_PER_MSB(h) << 10)  /* Channel src rq (most significant bits) */
+#define        ATC_DST_REP             (0x1 << 12)     /* Destination Replay Mod */
+#define        ATC_DST_H2SEL           (0x1 << 13)     /* Destination Handshaking Mod */
+#define                ATC_DST_H2SEL_SW        (0x0 << 13)
+#define                ATC_DST_H2SEL_HW        (0x1 << 13)
+#define        ATC_DST_PER_MSB(h)      (ATC_PER_MSB(h) << 14)  /* Channel dst rq (most significant bits) */
+#define        ATC_SOD                 (0x1 << 16)     /* Stop On Done */
+#define        ATC_LOCK_IF             (0x1 << 20)     /* Interface Lock */
+#define        ATC_LOCK_B              (0x1 << 21)     /* AHB Bus Lock */
+#define        ATC_LOCK_IF_L           (0x1 << 22)     /* Master Interface Arbiter Lock */
+#define                ATC_LOCK_IF_L_CHUNK     (0x0 << 22)
+#define                ATC_LOCK_IF_L_BUFFER    (0x1 << 22)
+#define        ATC_AHB_PROT_MASK       (0x7 << 24)     /* AHB Protection */
+#define        ATC_FIFOCFG_MASK        (0x3 << 28)     /* FIFO Request Configuration */
+#define                ATC_FIFOCFG_LARGESTBURST        (0x0 << 28)
+#define                ATC_FIFOCFG_HALFFIFO            (0x1 << 28)
+#define                ATC_FIFOCFG_ENOUGHSPACE         (0x2 << 28)
 
 /* Bitfields in SPIP */
 #define        ATC_SPIP_HOLE(x)        (0xFFFFU & (x))
diff --git a/drivers/dma/coh901318.c b/drivers/dma/coh901318.c
deleted file mode 100644 (file)
index 95b9b2f..0000000
+++ /dev/null
@@ -1,2808 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * driver/dma/coh901318.c
- *
- * Copyright (C) 2007-2009 ST-Ericsson
- * DMA driver for COH 901 318
- * Author: Per Friden <per.friden@stericsson.com>
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/kernel.h> /* printk() */
-#include <linux/fs.h> /* everything... */
-#include <linux/scatterlist.h>
-#include <linux/slab.h> /* kmalloc() */
-#include <linux/dmaengine.h>
-#include <linux/platform_device.h>
-#include <linux/device.h>
-#include <linux/irqreturn.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/uaccess.h>
-#include <linux/debugfs.h>
-#include <linux/platform_data/dma-coh901318.h>
-#include <linux/of_dma.h>
-
-#include "coh901318.h"
-#include "dmaengine.h"
-
-#define COH901318_MOD32_MASK                                   (0x1F)
-#define COH901318_WORD_MASK                                    (0xFFFFFFFF)
-/* INT_STATUS - Interrupt Status Registers 32bit (R/-) */
-#define COH901318_INT_STATUS1                                  (0x0000)
-#define COH901318_INT_STATUS2                                  (0x0004)
-/* TC_INT_STATUS - Terminal Count Interrupt Status Registers 32bit (R/-) */
-#define COH901318_TC_INT_STATUS1                               (0x0008)
-#define COH901318_TC_INT_STATUS2                               (0x000C)
-/* TC_INT_CLEAR - Terminal Count Interrupt Clear Registers 32bit (-/W) */
-#define COH901318_TC_INT_CLEAR1                                        (0x0010)
-#define COH901318_TC_INT_CLEAR2                                        (0x0014)
-/* RAW_TC_INT_STATUS - Raw Term Count Interrupt Status Registers 32bit (R/-) */
-#define COH901318_RAW_TC_INT_STATUS1                           (0x0018)
-#define COH901318_RAW_TC_INT_STATUS2                           (0x001C)
-/* BE_INT_STATUS - Bus Error Interrupt Status Registers 32bit (R/-) */
-#define COH901318_BE_INT_STATUS1                               (0x0020)
-#define COH901318_BE_INT_STATUS2                               (0x0024)
-/* BE_INT_CLEAR - Bus Error Interrupt Clear Registers 32bit (-/W) */
-#define COH901318_BE_INT_CLEAR1                                        (0x0028)
-#define COH901318_BE_INT_CLEAR2                                        (0x002C)
-/* RAW_BE_INT_STATUS - Raw Term Count Interrupt Status Registers 32bit (R/-) */
-#define COH901318_RAW_BE_INT_STATUS1                           (0x0030)
-#define COH901318_RAW_BE_INT_STATUS2                           (0x0034)
-
-/*
- * CX_CFG - Channel Configuration Registers 32bit (R/W)
- */
-#define COH901318_CX_CFG                                       (0x0100)
-#define COH901318_CX_CFG_SPACING                               (0x04)
-/* Channel enable activates tha dma job */
-#define COH901318_CX_CFG_CH_ENABLE                             (0x00000001)
-#define COH901318_CX_CFG_CH_DISABLE                            (0x00000000)
-/* Request Mode */
-#define COH901318_CX_CFG_RM_MASK                               (0x00000006)
-#define COH901318_CX_CFG_RM_MEMORY_TO_MEMORY                   (0x0 << 1)
-#define COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY                  (0x1 << 1)
-#define COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY                  (0x1 << 1)
-#define COH901318_CX_CFG_RM_PRIMARY_TO_SECONDARY               (0x3 << 1)
-#define COH901318_CX_CFG_RM_SECONDARY_TO_PRIMARY               (0x3 << 1)
-/* Linked channel request field. RM must == 11 */
-#define COH901318_CX_CFG_LCRF_SHIFT                            3
-#define COH901318_CX_CFG_LCRF_MASK                             (0x000001F8)
-#define COH901318_CX_CFG_LCR_DISABLE                           (0x00000000)
-/* Terminal Counter Interrupt Request Mask */
-#define COH901318_CX_CFG_TC_IRQ_ENABLE                         (0x00000200)
-#define COH901318_CX_CFG_TC_IRQ_DISABLE                                (0x00000000)
-/* Bus Error interrupt Mask */
-#define COH901318_CX_CFG_BE_IRQ_ENABLE                         (0x00000400)
-#define COH901318_CX_CFG_BE_IRQ_DISABLE                                (0x00000000)
-
-/*
- * CX_STAT - Channel Status Registers 32bit (R/-)
- */
-#define COH901318_CX_STAT                                      (0x0200)
-#define COH901318_CX_STAT_SPACING                              (0x04)
-#define COH901318_CX_STAT_RBE_IRQ_IND                          (0x00000008)
-#define COH901318_CX_STAT_RTC_IRQ_IND                          (0x00000004)
-#define COH901318_CX_STAT_ACTIVE                               (0x00000002)
-#define COH901318_CX_STAT_ENABLED                              (0x00000001)
-
-/*
- * CX_CTRL - Channel Control Registers 32bit (R/W)
- */
-#define COH901318_CX_CTRL                                      (0x0400)
-#define COH901318_CX_CTRL_SPACING                              (0x10)
-/* Transfer Count Enable */
-#define COH901318_CX_CTRL_TC_ENABLE                            (0x00001000)
-#define COH901318_CX_CTRL_TC_DISABLE                           (0x00000000)
-/* Transfer Count Value 0 - 4095 */
-#define COH901318_CX_CTRL_TC_VALUE_MASK                                (0x00000FFF)
-/* Burst count */
-#define COH901318_CX_CTRL_BURST_COUNT_MASK                     (0x0000E000)
-#define COH901318_CX_CTRL_BURST_COUNT_64_BYTES                 (0x7 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_48_BYTES                 (0x6 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_32_BYTES                 (0x5 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_16_BYTES                 (0x4 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_8_BYTES                  (0x3 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_4_BYTES                  (0x2 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_2_BYTES                  (0x1 << 13)
-#define COH901318_CX_CTRL_BURST_COUNT_1_BYTE                   (0x0 << 13)
-/* Source bus size  */
-#define COH901318_CX_CTRL_SRC_BUS_SIZE_MASK                    (0x00030000)
-#define COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS                 (0x2 << 16)
-#define COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS                 (0x1 << 16)
-#define COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS                  (0x0 << 16)
-/* Source address increment */
-#define COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE                  (0x00040000)
-#define COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE                 (0x00000000)
-/* Destination Bus Size */
-#define COH901318_CX_CTRL_DST_BUS_SIZE_MASK                    (0x00180000)
-#define COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS                 (0x2 << 19)
-#define COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS                 (0x1 << 19)
-#define COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS                  (0x0 << 19)
-/* Destination address increment */
-#define COH901318_CX_CTRL_DST_ADDR_INC_ENABLE                  (0x00200000)
-#define COH901318_CX_CTRL_DST_ADDR_INC_DISABLE                 (0x00000000)
-/* Master Mode (Master2 is only connected to MSL) */
-#define COH901318_CX_CTRL_MASTER_MODE_MASK                     (0x00C00000)
-#define COH901318_CX_CTRL_MASTER_MODE_M2R_M1W                  (0x3 << 22)
-#define COH901318_CX_CTRL_MASTER_MODE_M1R_M2W                  (0x2 << 22)
-#define COH901318_CX_CTRL_MASTER_MODE_M2RW                     (0x1 << 22)
-#define COH901318_CX_CTRL_MASTER_MODE_M1RW                     (0x0 << 22)
-/* Terminal Count flag to PER enable */
-#define COH901318_CX_CTRL_TCP_ENABLE                           (0x01000000)
-#define COH901318_CX_CTRL_TCP_DISABLE                          (0x00000000)
-/* Terminal Count flags to CPU enable */
-#define COH901318_CX_CTRL_TC_IRQ_ENABLE                                (0x02000000)
-#define COH901318_CX_CTRL_TC_IRQ_DISABLE                       (0x00000000)
-/* Hand shake to peripheral */
-#define COH901318_CX_CTRL_HSP_ENABLE                           (0x04000000)
-#define COH901318_CX_CTRL_HSP_DISABLE                          (0x00000000)
-#define COH901318_CX_CTRL_HSS_ENABLE                           (0x08000000)
-#define COH901318_CX_CTRL_HSS_DISABLE                          (0x00000000)
-/* DMA mode */
-#define COH901318_CX_CTRL_DDMA_MASK                            (0x30000000)
-#define COH901318_CX_CTRL_DDMA_LEGACY                          (0x0 << 28)
-#define COH901318_CX_CTRL_DDMA_DEMAND_DMA1                     (0x1 << 28)
-#define COH901318_CX_CTRL_DDMA_DEMAND_DMA2                     (0x2 << 28)
-/* Primary Request Data Destination */
-#define COH901318_CX_CTRL_PRDD_MASK                            (0x40000000)
-#define COH901318_CX_CTRL_PRDD_DEST                            (0x1 << 30)
-#define COH901318_CX_CTRL_PRDD_SOURCE                          (0x0 << 30)
-
-/*
- * CX_SRC_ADDR - Channel Source Address Registers 32bit (R/W)
- */
-#define COH901318_CX_SRC_ADDR                                  (0x0404)
-#define COH901318_CX_SRC_ADDR_SPACING                          (0x10)
-
-/*
- * CX_DST_ADDR - Channel Destination Address Registers 32bit R/W
- */
-#define COH901318_CX_DST_ADDR                                  (0x0408)
-#define COH901318_CX_DST_ADDR_SPACING                          (0x10)
-
-/*
- * CX_LNK_ADDR - Channel Link Address Registers 32bit (R/W)
- */
-#define COH901318_CX_LNK_ADDR                                  (0x040C)
-#define COH901318_CX_LNK_ADDR_SPACING                          (0x10)
-#define COH901318_CX_LNK_LINK_IMMEDIATE                                (0x00000001)
-
-/**
- * struct coh901318_params - parameters for DMAC configuration
- * @config: DMA config register
- * @ctrl_lli_last: DMA control register for the last lli in the list
- * @ctrl_lli: DMA control register for an lli
- * @ctrl_lli_chained: DMA control register for a chained lli
- */
-struct coh901318_params {
-       u32 config;
-       u32 ctrl_lli_last;
-       u32 ctrl_lli;
-       u32 ctrl_lli_chained;
-};
-
-/**
- * struct coh_dma_channel - dma channel base
- * @name: ascii name of dma channel
- * @number: channel id number
- * @desc_nbr_max: number of preallocated descriptors
- * @priority_high: prio of channel, 0 low otherwise high.
- * @param: configuration parameters
- */
-struct coh_dma_channel {
-       const char name[32];
-       const int number;
-       const int desc_nbr_max;
-       const int priority_high;
-       const struct coh901318_params param;
-};
-
-/**
- * struct powersave - DMA power save structure
- * @lock: lock protecting data in this struct
- * @started_channels: bit mask indicating active dma channels
- */
-struct powersave {
-       spinlock_t lock;
-       u64 started_channels;
-};
-
-/* points out all dma slave channels.
- * Syntax is [A1, B1, A2, B2, .... ,-1,-1]
- * Select all channels from A to B, end of list is marked with -1,-1
- */
-static int dma_slave_channels[] = {
-       U300_DMA_MSL_TX_0, U300_DMA_SPI_RX,
-       U300_DMA_UART1_TX, U300_DMA_UART1_RX, -1, -1};
-
-/* points out all dma memcpy channels. */
-static int dma_memcpy_channels[] = {
-       U300_DMA_GENERAL_PURPOSE_0, U300_DMA_GENERAL_PURPOSE_8, -1, -1};
-
-#define flags_memcpy_config (COH901318_CX_CFG_CH_DISABLE | \
-                       COH901318_CX_CFG_RM_MEMORY_TO_MEMORY | \
-                       COH901318_CX_CFG_LCR_DISABLE | \
-                       COH901318_CX_CFG_TC_IRQ_ENABLE | \
-                       COH901318_CX_CFG_BE_IRQ_ENABLE)
-#define flags_memcpy_lli_chained (COH901318_CX_CTRL_TC_ENABLE | \
-                       COH901318_CX_CTRL_BURST_COUNT_32_BYTES | \
-                       COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | \
-                       COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE | \
-                       COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS | \
-                       COH901318_CX_CTRL_DST_ADDR_INC_ENABLE | \
-                       COH901318_CX_CTRL_MASTER_MODE_M1RW | \
-                       COH901318_CX_CTRL_TCP_DISABLE | \
-                       COH901318_CX_CTRL_TC_IRQ_DISABLE | \
-                       COH901318_CX_CTRL_HSP_DISABLE | \
-                       COH901318_CX_CTRL_HSS_DISABLE | \
-                       COH901318_CX_CTRL_DDMA_LEGACY | \
-                       COH901318_CX_CTRL_PRDD_SOURCE)
-#define flags_memcpy_lli (COH901318_CX_CTRL_TC_ENABLE | \
-                       COH901318_CX_CTRL_BURST_COUNT_32_BYTES | \
-                       COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | \
-                       COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE | \
-                       COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS | \
-                       COH901318_CX_CTRL_DST_ADDR_INC_ENABLE | \
-                       COH901318_CX_CTRL_MASTER_MODE_M1RW | \
-                       COH901318_CX_CTRL_TCP_DISABLE | \
-                       COH901318_CX_CTRL_TC_IRQ_DISABLE | \
-                       COH901318_CX_CTRL_HSP_DISABLE | \
-                       COH901318_CX_CTRL_HSS_DISABLE | \
-                       COH901318_CX_CTRL_DDMA_LEGACY | \
-                       COH901318_CX_CTRL_PRDD_SOURCE)
-#define flags_memcpy_lli_last (COH901318_CX_CTRL_TC_ENABLE | \
-                       COH901318_CX_CTRL_BURST_COUNT_32_BYTES | \
-                       COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS | \
-                       COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE | \
-                       COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS | \
-                       COH901318_CX_CTRL_DST_ADDR_INC_ENABLE | \
-                       COH901318_CX_CTRL_MASTER_MODE_M1RW | \
-                       COH901318_CX_CTRL_TCP_DISABLE | \
-                       COH901318_CX_CTRL_TC_IRQ_ENABLE | \
-                       COH901318_CX_CTRL_HSP_DISABLE | \
-                       COH901318_CX_CTRL_HSS_DISABLE | \
-                       COH901318_CX_CTRL_DDMA_LEGACY | \
-                       COH901318_CX_CTRL_PRDD_SOURCE)
-
-static const struct coh_dma_channel chan_config[U300_DMA_CHANNELS] = {
-       {
-               .number = U300_DMA_MSL_TX_0,
-               .name = "MSL TX 0",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_MSL_TX_1,
-               .name = "MSL TX 1",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-       },
-       {
-               .number = U300_DMA_MSL_TX_2,
-               .name = "MSL TX 2",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .desc_nbr_max = 10,
-       },
-       {
-               .number = U300_DMA_MSL_TX_3,
-               .name = "MSL TX 3",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-       },
-       {
-               .number = U300_DMA_MSL_TX_4,
-               .name = "MSL TX 4",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1R_M2W |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-       },
-       {
-               .number = U300_DMA_MSL_TX_5,
-               .name = "MSL TX 5",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_MSL_TX_6,
-               .name = "MSL TX 6",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_MSL_RX_0,
-               .name = "MSL RX 0",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_MSL_RX_1,
-               .name = "MSL RX 1",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_MSL_RX_2,
-               .name = "MSL RX 2",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_MSL_RX_3,
-               .name = "MSL RX 3",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_MSL_RX_4,
-               .name = "MSL RX 4",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_MSL_RX_5,
-               .name = "MSL RX 5",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_32_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M2R_M1W |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_DEMAND_DMA1 |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_MSL_RX_6,
-               .name = "MSL RX 6",
-               .priority_high = 0,
-       },
-       /*
-        * Don't set up device address, burst count or size of src
-        * or dst bus for this peripheral - handled by PrimeCell
-        * DMA extension.
-        */
-       {
-               .number = U300_DMA_MMCSD_RX_TX,
-               .name = "MMCSD RX TX",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-
-       },
-       {
-               .number = U300_DMA_MSPRO_TX,
-               .name = "MSPRO TX",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_MSPRO_RX,
-               .name = "MSPRO RX",
-               .priority_high = 0,
-       },
-       /*
-        * Don't set up device address, burst count or size of src
-        * or dst bus for this peripheral - handled by PrimeCell
-        * DMA extension.
-        */
-       {
-               .number = U300_DMA_UART0_TX,
-               .name = "UART0 TX",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-       },
-       {
-               .number = U300_DMA_UART0_RX,
-               .name = "UART0 RX",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-       },
-       {
-               .number = U300_DMA_APEX_TX,
-               .name = "APEX TX",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_APEX_RX,
-               .name = "APEX RX",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_PCM_I2S0_TX,
-               .name = "PCM I2S0 TX",
-               .priority_high = 1,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-       },
-       {
-               .number = U300_DMA_PCM_I2S0_RX,
-               .name = "PCM I2S0 RX",
-               .priority_high = 1,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_PCM_I2S1_TX,
-               .name = "PCM I2S1 TX",
-               .priority_high = 1,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_SOURCE,
-       },
-       {
-               .number = U300_DMA_PCM_I2S1_RX,
-               .name = "PCM I2S1 RX",
-               .priority_high = 1,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_DEST,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_BURST_COUNT_16_BYTES |
-                               COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_SRC_ADDR_INC_DISABLE |
-                               COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS |
-                               COH901318_CX_CTRL_DST_ADDR_INC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_ENABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY |
-                               COH901318_CX_CTRL_PRDD_DEST,
-       },
-       {
-               .number = U300_DMA_XGAM_CDI,
-               .name = "XGAM CDI",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_XGAM_PDI,
-               .name = "XGAM PDI",
-               .priority_high = 0,
-       },
-       /*
-        * Don't set up device address, burst count or size of src
-        * or dst bus for this peripheral - handled by PrimeCell
-        * DMA extension.
-        */
-       {
-               .number = U300_DMA_SPI_TX,
-               .name = "SPI TX",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-       },
-       {
-               .number = U300_DMA_SPI_RX,
-               .name = "SPI RX",
-               .priority_high = 0,
-               .param.config = COH901318_CX_CFG_CH_DISABLE |
-                               COH901318_CX_CFG_LCR_DISABLE |
-                               COH901318_CX_CFG_TC_IRQ_ENABLE |
-                               COH901318_CX_CFG_BE_IRQ_ENABLE,
-               .param.ctrl_lli_chained = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_DISABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-               .param.ctrl_lli_last = 0 |
-                               COH901318_CX_CTRL_TC_ENABLE |
-                               COH901318_CX_CTRL_MASTER_MODE_M1RW |
-                               COH901318_CX_CTRL_TCP_DISABLE |
-                               COH901318_CX_CTRL_TC_IRQ_ENABLE |
-                               COH901318_CX_CTRL_HSP_ENABLE |
-                               COH901318_CX_CTRL_HSS_DISABLE |
-                               COH901318_CX_CTRL_DDMA_LEGACY,
-
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_0,
-               .name = "GENERAL 00",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_1,
-               .name = "GENERAL 01",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_2,
-               .name = "GENERAL 02",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_3,
-               .name = "GENERAL 03",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_4,
-               .name = "GENERAL 04",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_5,
-               .name = "GENERAL 05",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_6,
-               .name = "GENERAL 06",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_7,
-               .name = "GENERAL 07",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_GENERAL_PURPOSE_8,
-               .name = "GENERAL 08",
-               .priority_high = 0,
-
-               .param.config = flags_memcpy_config,
-               .param.ctrl_lli_chained = flags_memcpy_lli_chained,
-               .param.ctrl_lli = flags_memcpy_lli,
-               .param.ctrl_lli_last = flags_memcpy_lli_last,
-       },
-       {
-               .number = U300_DMA_UART1_TX,
-               .name = "UART1 TX",
-               .priority_high = 0,
-       },
-       {
-               .number = U300_DMA_UART1_RX,
-               .name = "UART1 RX",
-               .priority_high = 0,
-       }
-};
-
-#define COHC_2_DEV(cohc) (&cohc->chan.dev->device)
-
-#ifdef VERBOSE_DEBUG
-#define COH_DBG(x) ({ if (1) x; 0; })
-#else
-#define COH_DBG(x) ({ if (0) x; 0; })
-#endif
-
-struct coh901318_desc {
-       struct dma_async_tx_descriptor desc;
-       struct list_head node;
-       struct scatterlist *sg;
-       unsigned int sg_len;
-       struct coh901318_lli *lli;
-       enum dma_transfer_direction dir;
-       unsigned long flags;
-       u32 head_config;
-       u32 head_ctrl;
-};
-
-struct coh901318_base {
-       struct device *dev;
-       void __iomem *virtbase;
-       unsigned int irq;
-       struct coh901318_pool pool;
-       struct powersave pm;
-       struct dma_device dma_slave;
-       struct dma_device dma_memcpy;
-       struct coh901318_chan *chans;
-};
-
-struct coh901318_chan {
-       spinlock_t lock;
-       int allocated;
-       int id;
-       int stopped;
-
-       struct work_struct free_work;
-       struct dma_chan chan;
-
-       struct tasklet_struct tasklet;
-
-       struct list_head active;
-       struct list_head queue;
-       struct list_head free;
-
-       unsigned long nbr_active_done;
-       unsigned long busy;
-
-       struct dma_slave_config config;
-       u32 addr;
-       u32 ctrl;
-
-       struct coh901318_base *base;
-};
-
-static void coh901318_list_print(struct coh901318_chan *cohc,
-                                struct coh901318_lli *lli)
-{
-       struct coh901318_lli *l = lli;
-       int i = 0;
-
-       while (l) {
-               dev_vdbg(COHC_2_DEV(cohc), "i %d, lli %p, ctrl 0x%x, src %pad"
-                        ", dst %pad, link %pad virt_link_addr 0x%p\n",
-                        i, l, l->control, &l->src_addr, &l->dst_addr,
-                        &l->link_addr, l->virt_link_addr);
-               i++;
-               l = l->virt_link_addr;
-       }
-}
-
-#ifdef CONFIG_DEBUG_FS
-
-#define COH901318_DEBUGFS_ASSIGN(x, y) (x = y)
-
-static struct coh901318_base *debugfs_dma_base;
-static struct dentry *dma_dentry;
-
-static ssize_t coh901318_debugfs_read(struct file *file, char __user *buf,
-                                 size_t count, loff_t *f_pos)
-{
-       u64 started_channels = debugfs_dma_base->pm.started_channels;
-       int pool_count = debugfs_dma_base->pool.debugfs_pool_counter;
-       char *dev_buf;
-       char *tmp;
-       int ret;
-       int i;
-
-       dev_buf = kmalloc(4*1024, GFP_KERNEL);
-       if (dev_buf == NULL)
-               return -ENOMEM;
-       tmp = dev_buf;
-
-       tmp += sprintf(tmp, "DMA -- enabled dma channels\n");
-
-       for (i = 0; i < U300_DMA_CHANNELS; i++) {
-               if (started_channels & (1ULL << i))
-                       tmp += sprintf(tmp, "channel %d\n", i);
-       }
-
-       tmp += sprintf(tmp, "Pool alloc nbr %d\n", pool_count);
-
-       ret = simple_read_from_buffer(buf, count, f_pos, dev_buf, 
-                                       tmp - dev_buf);
-       kfree(dev_buf);
-       return ret;
-}
-
-static const struct file_operations coh901318_debugfs_status_operations = {
-       .open           = simple_open,
-       .read           = coh901318_debugfs_read,
-       .llseek         = default_llseek,
-};
-
-
-static int __init init_coh901318_debugfs(void)
-{
-
-       dma_dentry = debugfs_create_dir("dma", NULL);
-
-       debugfs_create_file("status", S_IFREG | S_IRUGO, dma_dentry, NULL,
-                           &coh901318_debugfs_status_operations);
-       return 0;
-}
-
-static void __exit exit_coh901318_debugfs(void)
-{
-       debugfs_remove_recursive(dma_dentry);
-}
-
-module_init(init_coh901318_debugfs);
-module_exit(exit_coh901318_debugfs);
-#else
-
-#define COH901318_DEBUGFS_ASSIGN(x, y)
-
-#endif /* CONFIG_DEBUG_FS */
-
-static inline struct coh901318_chan *to_coh901318_chan(struct dma_chan *chan)
-{
-       return container_of(chan, struct coh901318_chan, chan);
-}
-
-static int coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
-                                          struct dma_slave_config *config,
-                                          enum dma_transfer_direction direction);
-
-static inline const struct coh901318_params *
-cohc_chan_param(struct coh901318_chan *cohc)
-{
-       return &chan_config[cohc->id].param;
-}
-
-static inline const struct coh_dma_channel *
-cohc_chan_conf(struct coh901318_chan *cohc)
-{
-       return &chan_config[cohc->id];
-}
-
-static void enable_powersave(struct coh901318_chan *cohc)
-{
-       unsigned long flags;
-       struct powersave *pm = &cohc->base->pm;
-
-       spin_lock_irqsave(&pm->lock, flags);
-
-       pm->started_channels &= ~(1ULL << cohc->id);
-
-       spin_unlock_irqrestore(&pm->lock, flags);
-}
-static void disable_powersave(struct coh901318_chan *cohc)
-{
-       unsigned long flags;
-       struct powersave *pm = &cohc->base->pm;
-
-       spin_lock_irqsave(&pm->lock, flags);
-
-       pm->started_channels |= (1ULL << cohc->id);
-
-       spin_unlock_irqrestore(&pm->lock, flags);
-}
-
-static inline int coh901318_set_ctrl(struct coh901318_chan *cohc, u32 control)
-{
-       int channel = cohc->id;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       writel(control,
-              virtbase + COH901318_CX_CTRL +
-              COH901318_CX_CTRL_SPACING * channel);
-       return 0;
-}
-
-static inline int coh901318_set_conf(struct coh901318_chan *cohc, u32 conf)
-{
-       int channel = cohc->id;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       writel(conf,
-              virtbase + COH901318_CX_CFG +
-              COH901318_CX_CFG_SPACING*channel);
-       return 0;
-}
-
-
-static int coh901318_start(struct coh901318_chan *cohc)
-{
-       u32 val;
-       int channel = cohc->id;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       disable_powersave(cohc);
-
-       val = readl(virtbase + COH901318_CX_CFG +
-                   COH901318_CX_CFG_SPACING * channel);
-
-       /* Enable channel */
-       val |= COH901318_CX_CFG_CH_ENABLE;
-       writel(val, virtbase + COH901318_CX_CFG +
-              COH901318_CX_CFG_SPACING * channel);
-
-       return 0;
-}
-
-static int coh901318_prep_linked_list(struct coh901318_chan *cohc,
-                                     struct coh901318_lli *lli)
-{
-       int channel = cohc->id;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       BUG_ON(readl(virtbase + COH901318_CX_STAT +
-                    COH901318_CX_STAT_SPACING*channel) &
-              COH901318_CX_STAT_ACTIVE);
-
-       writel(lli->src_addr,
-              virtbase + COH901318_CX_SRC_ADDR +
-              COH901318_CX_SRC_ADDR_SPACING * channel);
-
-       writel(lli->dst_addr, virtbase +
-              COH901318_CX_DST_ADDR +
-              COH901318_CX_DST_ADDR_SPACING * channel);
-
-       writel(lli->link_addr, virtbase + COH901318_CX_LNK_ADDR +
-              COH901318_CX_LNK_ADDR_SPACING * channel);
-
-       writel(lli->control, virtbase + COH901318_CX_CTRL +
-              COH901318_CX_CTRL_SPACING * channel);
-
-       return 0;
-}
-
-static struct coh901318_desc *
-coh901318_desc_get(struct coh901318_chan *cohc)
-{
-       struct coh901318_desc *desc;
-
-       if (list_empty(&cohc->free)) {
-               /* alloc new desc because we're out of used ones
-                * TODO: alloc a pile of descs instead of just one,
-                * avoid many small allocations.
-                */
-               desc = kzalloc(sizeof(struct coh901318_desc), GFP_NOWAIT);
-               if (desc == NULL)
-                       goto out;
-               INIT_LIST_HEAD(&desc->node);
-               dma_async_tx_descriptor_init(&desc->desc, &cohc->chan);
-       } else {
-               /* Reuse an old desc. */
-               desc = list_first_entry(&cohc->free,
-                                       struct coh901318_desc,
-                                       node);
-               list_del(&desc->node);
-               /* Initialize it a bit so it's not insane */
-               desc->sg = NULL;
-               desc->sg_len = 0;
-               desc->desc.callback = NULL;
-               desc->desc.callback_param = NULL;
-       }
-
- out:
-       return desc;
-}
-
-static void
-coh901318_desc_free(struct coh901318_chan *cohc, struct coh901318_desc *cohd)
-{
-       list_add_tail(&cohd->node, &cohc->free);
-}
-
-/* call with irq lock held */
-static void
-coh901318_desc_submit(struct coh901318_chan *cohc, struct coh901318_desc *desc)
-{
-       list_add_tail(&desc->node, &cohc->active);
-}
-
-static struct coh901318_desc *
-coh901318_first_active_get(struct coh901318_chan *cohc)
-{
-       return list_first_entry_or_null(&cohc->active, struct coh901318_desc,
-                                       node);
-}
-
-static void
-coh901318_desc_remove(struct coh901318_desc *cohd)
-{
-       list_del(&cohd->node);
-}
-
-static void
-coh901318_desc_queue(struct coh901318_chan *cohc, struct coh901318_desc *desc)
-{
-       list_add_tail(&desc->node, &cohc->queue);
-}
-
-static struct coh901318_desc *
-coh901318_first_queued(struct coh901318_chan *cohc)
-{
-       return list_first_entry_or_null(&cohc->queue, struct coh901318_desc,
-                                       node);
-}
-
-static inline u32 coh901318_get_bytes_in_lli(struct coh901318_lli *in_lli)
-{
-       struct coh901318_lli *lli = in_lli;
-       u32 bytes = 0;
-
-       while (lli) {
-               bytes += lli->control & COH901318_CX_CTRL_TC_VALUE_MASK;
-               lli = lli->virt_link_addr;
-       }
-       return bytes;
-}
-
-/*
- * Get the number of bytes left to transfer on this channel,
- * it is unwise to call this before stopping the channel for
- * absolute measures, but for a rough guess you can still call
- * it.
- */
-static u32 coh901318_get_bytes_left(struct dma_chan *chan)
-{
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       struct coh901318_desc *cohd;
-       struct list_head *pos;
-       unsigned long flags;
-       u32 left = 0;
-       int i = 0;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /*
-        * If there are many queued jobs, we iterate and add the
-        * size of them all. We take a special look on the first
-        * job though, since it is probably active.
-        */
-       list_for_each(pos, &cohc->active) {
-               /*
-                * The first job in the list will be working on the
-                * hardware. The job can be stopped but still active,
-                * so that the transfer counter is somewhere inside
-                * the buffer.
-                */
-               cohd = list_entry(pos, struct coh901318_desc, node);
-
-               if (i == 0) {
-                       struct coh901318_lli *lli;
-                       dma_addr_t ladd;
-
-                       /* Read current transfer count value */
-                       left = readl(cohc->base->virtbase +
-                                    COH901318_CX_CTRL +
-                                    COH901318_CX_CTRL_SPACING * cohc->id) &
-                               COH901318_CX_CTRL_TC_VALUE_MASK;
-
-                       /* See if the transfer is linked... */
-                       ladd = readl(cohc->base->virtbase +
-                                    COH901318_CX_LNK_ADDR +
-                                    COH901318_CX_LNK_ADDR_SPACING *
-                                    cohc->id) &
-                               ~COH901318_CX_LNK_LINK_IMMEDIATE;
-                       /* Single transaction */
-                       if (!ladd)
-                               continue;
-
-                       /*
-                        * Linked transaction, follow the lli, find the
-                        * currently processing lli, and proceed to the next
-                        */
-                       lli = cohd->lli;
-                       while (lli && lli->link_addr != ladd)
-                               lli = lli->virt_link_addr;
-
-                       if (lli)
-                               lli = lli->virt_link_addr;
-
-                       /*
-                        * Follow remaining lli links around to count the total
-                        * number of bytes left
-                        */
-                       left += coh901318_get_bytes_in_lli(lli);
-               } else {
-                       left += coh901318_get_bytes_in_lli(cohd->lli);
-               }
-               i++;
-       }
-
-       /* Also count bytes in the queued jobs */
-       list_for_each(pos, &cohc->queue) {
-               cohd = list_entry(pos, struct coh901318_desc, node);
-               left += coh901318_get_bytes_in_lli(cohd->lli);
-       }
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       return left;
-}
-
-/*
- * Pauses a transfer without losing data. Enables power save.
- * Use this function in conjunction with coh901318_resume.
- */
-static int coh901318_pause(struct dma_chan *chan)
-{
-       u32 val;
-       unsigned long flags;
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       int channel = cohc->id;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /* Disable channel in HW */
-       val = readl(virtbase + COH901318_CX_CFG +
-                   COH901318_CX_CFG_SPACING * channel);
-
-       /* Stopping infinite transfer */
-       if ((val & COH901318_CX_CTRL_TC_ENABLE) == 0 &&
-           (val & COH901318_CX_CFG_CH_ENABLE))
-               cohc->stopped = 1;
-
-
-       val &= ~COH901318_CX_CFG_CH_ENABLE;
-       /* Enable twice, HW bug work around */
-       writel(val, virtbase + COH901318_CX_CFG +
-              COH901318_CX_CFG_SPACING * channel);
-       writel(val, virtbase + COH901318_CX_CFG +
-              COH901318_CX_CFG_SPACING * channel);
-
-       /* Spin-wait for it to actually go inactive */
-       while (readl(virtbase + COH901318_CX_STAT+COH901318_CX_STAT_SPACING *
-                    channel) & COH901318_CX_STAT_ACTIVE)
-               cpu_relax();
-
-       /* Check if we stopped an active job */
-       if ((readl(virtbase + COH901318_CX_CTRL+COH901318_CX_CTRL_SPACING *
-                  channel) & COH901318_CX_CTRL_TC_VALUE_MASK) > 0)
-               cohc->stopped = 1;
-
-       enable_powersave(cohc);
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-       return 0;
-}
-
-/* Resumes a transfer that has been stopped via 300_dma_stop(..).
-   Power save is handled.
-*/
-static int coh901318_resume(struct dma_chan *chan)
-{
-       u32 val;
-       unsigned long flags;
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       int channel = cohc->id;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       disable_powersave(cohc);
-
-       if (cohc->stopped) {
-               /* Enable channel in HW */
-               val = readl(cohc->base->virtbase + COH901318_CX_CFG +
-                           COH901318_CX_CFG_SPACING * channel);
-
-               val |= COH901318_CX_CFG_CH_ENABLE;
-
-               writel(val, cohc->base->virtbase + COH901318_CX_CFG +
-                      COH901318_CX_CFG_SPACING*channel);
-
-               cohc->stopped = 0;
-       }
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-       return 0;
-}
-
-bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
-{
-       unsigned long ch_nr = (unsigned long) chan_id;
-
-       if (ch_nr == to_coh901318_chan(chan)->id)
-               return true;
-
-       return false;
-}
-EXPORT_SYMBOL(coh901318_filter_id);
-
-struct coh901318_filter_args {
-       struct coh901318_base *base;
-       unsigned int ch_nr;
-};
-
-static bool coh901318_filter_base_and_id(struct dma_chan *chan, void *data)
-{
-       struct coh901318_filter_args *args = data;
-
-       if (&args->base->dma_slave == chan->device &&
-           args->ch_nr == to_coh901318_chan(chan)->id)
-               return true;
-
-       return false;
-}
-
-static struct dma_chan *coh901318_xlate(struct of_phandle_args *dma_spec,
-                                       struct of_dma *ofdma)
-{
-       struct coh901318_filter_args args = {
-               .base = ofdma->of_dma_data,
-               .ch_nr = dma_spec->args[0],
-       };
-       dma_cap_mask_t cap;
-       dma_cap_zero(cap);
-       dma_cap_set(DMA_SLAVE, cap);
-
-       return dma_request_channel(cap, coh901318_filter_base_and_id, &args);
-}
-/*
- * DMA channel allocation
- */
-static int coh901318_config(struct coh901318_chan *cohc,
-                           struct coh901318_params *param)
-{
-       const struct coh901318_params *p;
-       int channel = cohc->id;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       if (param)
-               p = param;
-       else
-               p = cohc_chan_param(cohc);
-
-       /* Clear any pending BE or TC interrupt */
-       if (channel < 32) {
-               writel(1 << channel, virtbase + COH901318_BE_INT_CLEAR1);
-               writel(1 << channel, virtbase + COH901318_TC_INT_CLEAR1);
-       } else {
-               writel(1 << (channel - 32), virtbase +
-                      COH901318_BE_INT_CLEAR2);
-               writel(1 << (channel - 32), virtbase +
-                      COH901318_TC_INT_CLEAR2);
-       }
-
-       coh901318_set_conf(cohc, p->config);
-       coh901318_set_ctrl(cohc, p->ctrl_lli_last);
-
-       return 0;
-}
-
-/* must lock when calling this function
- * start queued jobs, if any
- * TODO: start all queued jobs in one go
- *
- * Returns descriptor if queued job is started otherwise NULL.
- * If the queue is empty NULL is returned.
- */
-static struct coh901318_desc *coh901318_queue_start(struct coh901318_chan *cohc)
-{
-       struct coh901318_desc *cohd;
-
-       /*
-        * start queued jobs, if any
-        * TODO: transmit all queued jobs in one go
-        */
-       cohd = coh901318_first_queued(cohc);
-
-       if (cohd != NULL) {
-               /* Remove from queue */
-               coh901318_desc_remove(cohd);
-               /* initiate DMA job */
-               cohc->busy = 1;
-
-               coh901318_desc_submit(cohc, cohd);
-
-               /* Program the transaction head */
-               coh901318_set_conf(cohc, cohd->head_config);
-               coh901318_set_ctrl(cohc, cohd->head_ctrl);
-               coh901318_prep_linked_list(cohc, cohd->lli);
-
-               /* start dma job on this channel */
-               coh901318_start(cohc);
-
-       }
-
-       return cohd;
-}
-
-/*
- * This tasklet is called from the interrupt handler to
- * handle each descriptor (DMA job) that is sent to a channel.
- */
-static void dma_tasklet(struct tasklet_struct *t)
-{
-       struct coh901318_chan *cohc = from_tasklet(cohc, t, tasklet);
-       struct coh901318_desc *cohd_fin;
-       unsigned long flags;
-       struct dmaengine_desc_callback cb;
-
-       dev_vdbg(COHC_2_DEV(cohc), "[%s] chan_id %d"
-                " nbr_active_done %ld\n", __func__,
-                cohc->id, cohc->nbr_active_done);
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /* get first active descriptor entry from list */
-       cohd_fin = coh901318_first_active_get(cohc);
-
-       if (cohd_fin == NULL)
-               goto err;
-
-       /* locate callback to client */
-       dmaengine_desc_get_callback(&cohd_fin->desc, &cb);
-
-       /* sign this job as completed on the channel */
-       dma_cookie_complete(&cohd_fin->desc);
-
-       /* release the lli allocation and remove the descriptor */
-       coh901318_lli_free(&cohc->base->pool, &cohd_fin->lli);
-
-       /* return desc to free-list */
-       coh901318_desc_remove(cohd_fin);
-       coh901318_desc_free(cohc, cohd_fin);
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       /* Call the callback when we're done */
-       dmaengine_desc_callback_invoke(&cb, NULL);
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /*
-        * If another interrupt fired while the tasklet was scheduling,
-        * we don't get called twice, so we have this number of active
-        * counter that keep track of the number of IRQs expected to
-        * be handled for this channel. If there happen to be more than
-        * one IRQ to be ack:ed, we simply schedule this tasklet again.
-        */
-       cohc->nbr_active_done--;
-       if (cohc->nbr_active_done) {
-               dev_dbg(COHC_2_DEV(cohc), "scheduling tasklet again, new IRQs "
-                       "came in while we were scheduling this tasklet\n");
-               if (cohc_chan_conf(cohc)->priority_high)
-                       tasklet_hi_schedule(&cohc->tasklet);
-               else
-                       tasklet_schedule(&cohc->tasklet);
-       }
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       return;
-
- err:
-       spin_unlock_irqrestore(&cohc->lock, flags);
-       dev_err(COHC_2_DEV(cohc), "[%s] No active dma desc\n", __func__);
-}
-
-
-/* called from interrupt context */
-static void dma_tc_handle(struct coh901318_chan *cohc)
-{
-       /*
-        * If the channel is not allocated, then we shouldn't have
-        * any TC interrupts on it.
-        */
-       if (!cohc->allocated) {
-               dev_err(COHC_2_DEV(cohc), "spurious interrupt from "
-                       "unallocated channel\n");
-               return;
-       }
-
-       /*
-        * When we reach this point, at least one queue item
-        * should have been moved over from cohc->queue to
-        * cohc->active and run to completion, that is why we're
-        * getting a terminal count interrupt is it not?
-        * If you get this BUG() the most probable cause is that
-        * the individual nodes in the lli chain have IRQ enabled,
-        * so check your platform config for lli chain ctrl.
-        */
-       BUG_ON(list_empty(&cohc->active));
-
-       cohc->nbr_active_done++;
-
-       /*
-        * This attempt to take a job from cohc->queue, put it
-        * into cohc->active and start it.
-        */
-       if (coh901318_queue_start(cohc) == NULL)
-               cohc->busy = 0;
-
-       /*
-        * This tasklet will remove items from cohc->active
-        * and thus terminates them.
-        */
-       if (cohc_chan_conf(cohc)->priority_high)
-               tasklet_hi_schedule(&cohc->tasklet);
-       else
-               tasklet_schedule(&cohc->tasklet);
-}
-
-
-static irqreturn_t dma_irq_handler(int irq, void *dev_id)
-{
-       u32 status1;
-       u32 status2;
-       int i;
-       int ch;
-       struct coh901318_base *base  = dev_id;
-       struct coh901318_chan *cohc;
-       void __iomem *virtbase = base->virtbase;
-
-       status1 = readl(virtbase + COH901318_INT_STATUS1);
-       status2 = readl(virtbase + COH901318_INT_STATUS2);
-
-       if (unlikely(status1 == 0 && status2 == 0)) {
-               dev_warn(base->dev, "spurious DMA IRQ from no channel!\n");
-               return IRQ_HANDLED;
-       }
-
-       /* TODO: consider handle IRQ in tasklet here to
-        *       minimize interrupt latency */
-
-       /* Check the first 32 DMA channels for IRQ */
-       while (status1) {
-               /* Find first bit set, return as a number. */
-               i = ffs(status1) - 1;
-               ch = i;
-
-               cohc = &base->chans[ch];
-               spin_lock(&cohc->lock);
-
-               /* Mask off this bit */
-               status1 &= ~(1 << i);
-               /* Check the individual channel bits */
-               if (test_bit(i, virtbase + COH901318_BE_INT_STATUS1)) {
-                       dev_crit(COHC_2_DEV(cohc),
-                                "DMA bus error on channel %d!\n", ch);
-                       BUG_ON(1);
-                       /* Clear BE interrupt */
-                       __set_bit(i, virtbase + COH901318_BE_INT_CLEAR1);
-               } else {
-                       /* Caused by TC, really? */
-                       if (unlikely(!test_bit(i, virtbase +
-                                              COH901318_TC_INT_STATUS1))) {
-                               dev_warn(COHC_2_DEV(cohc),
-                                        "ignoring interrupt not caused by terminal count on channel %d\n", ch);
-                               /* Clear TC interrupt */
-                               BUG_ON(1);
-                               __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
-                       } else {
-                               /* Enable powersave if transfer has finished */
-                               if (!(readl(virtbase + COH901318_CX_STAT +
-                                           COH901318_CX_STAT_SPACING*ch) &
-                                     COH901318_CX_STAT_ENABLED)) {
-                                       enable_powersave(cohc);
-                               }
-
-                               /* Must clear TC interrupt before calling
-                                * dma_tc_handle
-                                * in case tc_handle initiate a new dma job
-                                */
-                               __set_bit(i, virtbase + COH901318_TC_INT_CLEAR1);
-
-                               dma_tc_handle(cohc);
-                       }
-               }
-               spin_unlock(&cohc->lock);
-       }
-
-       /* Check the remaining 32 DMA channels for IRQ */
-       while (status2) {
-               /* Find first bit set, return as a number. */
-               i = ffs(status2) - 1;
-               ch = i + 32;
-               cohc = &base->chans[ch];
-               spin_lock(&cohc->lock);
-
-               /* Mask off this bit */
-               status2 &= ~(1 << i);
-               /* Check the individual channel bits */
-               if (test_bit(i, virtbase + COH901318_BE_INT_STATUS2)) {
-                       dev_crit(COHC_2_DEV(cohc),
-                                "DMA bus error on channel %d!\n", ch);
-                       /* Clear BE interrupt */
-                       BUG_ON(1);
-                       __set_bit(i, virtbase + COH901318_BE_INT_CLEAR2);
-               } else {
-                       /* Caused by TC, really? */
-                       if (unlikely(!test_bit(i, virtbase +
-                                              COH901318_TC_INT_STATUS2))) {
-                               dev_warn(COHC_2_DEV(cohc),
-                                        "ignoring interrupt not caused by terminal count on channel %d\n", ch);
-                               /* Clear TC interrupt */
-                               __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
-                               BUG_ON(1);
-                       } else {
-                               /* Enable powersave if transfer has finished */
-                               if (!(readl(virtbase + COH901318_CX_STAT +
-                                           COH901318_CX_STAT_SPACING*ch) &
-                                     COH901318_CX_STAT_ENABLED)) {
-                                       enable_powersave(cohc);
-                               }
-                               /* Must clear TC interrupt before calling
-                                * dma_tc_handle
-                                * in case tc_handle initiate a new dma job
-                                */
-                               __set_bit(i, virtbase + COH901318_TC_INT_CLEAR2);
-
-                               dma_tc_handle(cohc);
-                       }
-               }
-               spin_unlock(&cohc->lock);
-       }
-
-       return IRQ_HANDLED;
-}
-
-static int coh901318_terminate_all(struct dma_chan *chan)
-{
-       unsigned long flags;
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       struct coh901318_desc *cohd;
-       void __iomem *virtbase = cohc->base->virtbase;
-
-       /* The remainder of this function terminates the transfer */
-       coh901318_pause(chan);
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /* Clear any pending BE or TC interrupt */
-       if (cohc->id < 32) {
-               writel(1 << cohc->id, virtbase + COH901318_BE_INT_CLEAR1);
-               writel(1 << cohc->id, virtbase + COH901318_TC_INT_CLEAR1);
-       } else {
-               writel(1 << (cohc->id - 32), virtbase +
-                      COH901318_BE_INT_CLEAR2);
-               writel(1 << (cohc->id - 32), virtbase +
-                      COH901318_TC_INT_CLEAR2);
-       }
-
-       enable_powersave(cohc);
-
-       while ((cohd = coh901318_first_active_get(cohc))) {
-               /* release the lli allocation*/
-               coh901318_lli_free(&cohc->base->pool, &cohd->lli);
-
-               /* return desc to free-list */
-               coh901318_desc_remove(cohd);
-               coh901318_desc_free(cohc, cohd);
-       }
-
-       while ((cohd = coh901318_first_queued(cohc))) {
-               /* release the lli allocation*/
-               coh901318_lli_free(&cohc->base->pool, &cohd->lli);
-
-               /* return desc to free-list */
-               coh901318_desc_remove(cohd);
-               coh901318_desc_free(cohc, cohd);
-       }
-
-
-       cohc->nbr_active_done = 0;
-       cohc->busy = 0;
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       return 0;
-}
-
-static int coh901318_alloc_chan_resources(struct dma_chan *chan)
-{
-       struct coh901318_chan   *cohc = to_coh901318_chan(chan);
-       unsigned long flags;
-
-       dev_vdbg(COHC_2_DEV(cohc), "[%s] DMA channel %d\n",
-                __func__, cohc->id);
-
-       if (chan->client_count > 1)
-               return -EBUSY;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       coh901318_config(cohc, NULL);
-
-       cohc->allocated = 1;
-       dma_cookie_init(chan);
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       return 1;
-}
-
-static void
-coh901318_free_chan_resources(struct dma_chan *chan)
-{
-       struct coh901318_chan   *cohc = to_coh901318_chan(chan);
-       int channel = cohc->id;
-       unsigned long flags;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /* Disable HW */
-       writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CFG +
-              COH901318_CX_CFG_SPACING*channel);
-       writel(0x00000000U, cohc->base->virtbase + COH901318_CX_CTRL +
-              COH901318_CX_CTRL_SPACING*channel);
-
-       cohc->allocated = 0;
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       coh901318_terminate_all(chan);
-}
-
-
-static dma_cookie_t
-coh901318_tx_submit(struct dma_async_tx_descriptor *tx)
-{
-       struct coh901318_desc *cohd = container_of(tx, struct coh901318_desc,
-                                                  desc);
-       struct coh901318_chan *cohc = to_coh901318_chan(tx->chan);
-       unsigned long flags;
-       dma_cookie_t cookie;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-       cookie = dma_cookie_assign(tx);
-
-       coh901318_desc_queue(cohc, cohd);
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-
-       return cookie;
-}
-
-static struct dma_async_tx_descriptor *
-coh901318_prep_memcpy(struct dma_chan *chan, dma_addr_t dest, dma_addr_t src,
-                     size_t size, unsigned long flags)
-{
-       struct coh901318_lli *lli;
-       struct coh901318_desc *cohd;
-       unsigned long flg;
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       int lli_len;
-       u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
-       int ret;
-
-       spin_lock_irqsave(&cohc->lock, flg);
-
-       dev_vdbg(COHC_2_DEV(cohc),
-                "[%s] channel %d src %pad dest %pad size %zu\n",
-                __func__, cohc->id, &src, &dest, size);
-
-       if (flags & DMA_PREP_INTERRUPT)
-               /* Trigger interrupt after last lli */
-               ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
-
-       lli_len = size >> MAX_DMA_PACKET_SIZE_SHIFT;
-       if ((lli_len << MAX_DMA_PACKET_SIZE_SHIFT) < size)
-               lli_len++;
-
-       lli = coh901318_lli_alloc(&cohc->base->pool, lli_len);
-
-       if (lli == NULL)
-               goto err;
-
-       ret = coh901318_lli_fill_memcpy(
-               &cohc->base->pool, lli, src, size, dest,
-               cohc_chan_param(cohc)->ctrl_lli_chained,
-               ctrl_last);
-       if (ret)
-               goto err;
-
-       COH_DBG(coh901318_list_print(cohc, lli));
-
-       /* Pick a descriptor to handle this transfer */
-       cohd = coh901318_desc_get(cohc);
-       cohd->lli = lli;
-       cohd->flags = flags;
-       cohd->desc.tx_submit = coh901318_tx_submit;
-
-       spin_unlock_irqrestore(&cohc->lock, flg);
-
-       return &cohd->desc;
- err:
-       spin_unlock_irqrestore(&cohc->lock, flg);
-       return NULL;
-}
-
-static struct dma_async_tx_descriptor *
-coh901318_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
-                       unsigned int sg_len, enum dma_transfer_direction direction,
-                       unsigned long flags, void *context)
-{
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       struct coh901318_lli *lli;
-       struct coh901318_desc *cohd;
-       const struct coh901318_params *params;
-       struct scatterlist *sg;
-       int len = 0;
-       int size;
-       int i;
-       u32 ctrl_chained = cohc_chan_param(cohc)->ctrl_lli_chained;
-       u32 ctrl = cohc_chan_param(cohc)->ctrl_lli;
-       u32 ctrl_last = cohc_chan_param(cohc)->ctrl_lli_last;
-       u32 config;
-       unsigned long flg;
-       int ret;
-
-       if (!sgl)
-               goto out;
-       if (sg_dma_len(sgl) == 0)
-               goto out;
-
-       spin_lock_irqsave(&cohc->lock, flg);
-
-       dev_vdbg(COHC_2_DEV(cohc), "[%s] sg_len %d dir %d\n",
-                __func__, sg_len, direction);
-
-       if (flags & DMA_PREP_INTERRUPT)
-               /* Trigger interrupt after last lli */
-               ctrl_last |= COH901318_CX_CTRL_TC_IRQ_ENABLE;
-
-       params = cohc_chan_param(cohc);
-       config = params->config;
-       /*
-        * Add runtime-specific control on top, make
-        * sure the bits you set per peripheral channel are
-        * cleared in the default config from the platform.
-        */
-       ctrl_chained |= cohc->ctrl;
-       ctrl_last |= cohc->ctrl;
-       ctrl |= cohc->ctrl;
-
-       if (direction == DMA_MEM_TO_DEV) {
-               u32 tx_flags = COH901318_CX_CTRL_PRDD_SOURCE |
-                       COH901318_CX_CTRL_SRC_ADDR_INC_ENABLE;
-
-               config |= COH901318_CX_CFG_RM_MEMORY_TO_PRIMARY;
-               ctrl_chained |= tx_flags;
-               ctrl_last |= tx_flags;
-               ctrl |= tx_flags;
-       } else if (direction == DMA_DEV_TO_MEM) {
-               u32 rx_flags = COH901318_CX_CTRL_PRDD_DEST |
-                       COH901318_CX_CTRL_DST_ADDR_INC_ENABLE;
-
-               config |= COH901318_CX_CFG_RM_PRIMARY_TO_MEMORY;
-               ctrl_chained |= rx_flags;
-               ctrl_last |= rx_flags;
-               ctrl |= rx_flags;
-       } else
-               goto err_direction;
-
-       /* The dma only supports transmitting packages up to
-        * MAX_DMA_PACKET_SIZE. Calculate to total number of
-        * dma elemts required to send the entire sg list
-        */
-       for_each_sg(sgl, sg, sg_len, i) {
-               unsigned int factor;
-               size = sg_dma_len(sg);
-
-               if (size <= MAX_DMA_PACKET_SIZE) {
-                       len++;
-                       continue;
-               }
-
-               factor = size >> MAX_DMA_PACKET_SIZE_SHIFT;
-               if ((factor << MAX_DMA_PACKET_SIZE_SHIFT) < size)
-                       factor++;
-
-               len += factor;
-       }
-
-       pr_debug("Allocate %d lli:s for this transfer\n", len);
-       lli = coh901318_lli_alloc(&cohc->base->pool, len);
-
-       if (lli == NULL)
-               goto err_dma_alloc;
-
-       coh901318_dma_set_runtimeconfig(chan, &cohc->config, direction);
-
-       /* initiate allocated lli list */
-       ret = coh901318_lli_fill_sg(&cohc->base->pool, lli, sgl, sg_len,
-                                   cohc->addr,
-                                   ctrl_chained,
-                                   ctrl,
-                                   ctrl_last,
-                                   direction, COH901318_CX_CTRL_TC_IRQ_ENABLE);
-       if (ret)
-               goto err_lli_fill;
-
-
-       COH_DBG(coh901318_list_print(cohc, lli));
-
-       /* Pick a descriptor to handle this transfer */
-       cohd = coh901318_desc_get(cohc);
-       cohd->head_config = config;
-       /*
-        * Set the default head ctrl for the channel to the one from the
-        * lli, things may have changed due to odd buffer alignment
-        * etc.
-        */
-       cohd->head_ctrl = lli->control;
-       cohd->dir = direction;
-       cohd->flags = flags;
-       cohd->desc.tx_submit = coh901318_tx_submit;
-       cohd->lli = lli;
-
-       spin_unlock_irqrestore(&cohc->lock, flg);
-
-       return &cohd->desc;
- err_lli_fill:
- err_dma_alloc:
- err_direction:
-       spin_unlock_irqrestore(&cohc->lock, flg);
- out:
-       return NULL;
-}
-
-static enum dma_status
-coh901318_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
-                struct dma_tx_state *txstate)
-{
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       enum dma_status ret;
-
-       ret = dma_cookie_status(chan, cookie, txstate);
-       if (ret == DMA_COMPLETE || !txstate)
-               return ret;
-
-       dma_set_residue(txstate, coh901318_get_bytes_left(chan));
-
-       if (ret == DMA_IN_PROGRESS && cohc->stopped)
-               ret = DMA_PAUSED;
-
-       return ret;
-}
-
-static void
-coh901318_issue_pending(struct dma_chan *chan)
-{
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       unsigned long flags;
-
-       spin_lock_irqsave(&cohc->lock, flags);
-
-       /*
-        * Busy means that pending jobs are already being processed,
-        * and then there is no point in starting the queue: the
-        * terminal count interrupt on the channel will take the next
-        * job on the queue and execute it anyway.
-        */
-       if (!cohc->busy)
-               coh901318_queue_start(cohc);
-
-       spin_unlock_irqrestore(&cohc->lock, flags);
-}
-
-/*
- * Here we wrap in the runtime dma control interface
- */
-struct burst_table {
-       int burst_8bit;
-       int burst_16bit;
-       int burst_32bit;
-       u32 reg;
-};
-
-static const struct burst_table burst_sizes[] = {
-       {
-               .burst_8bit = 64,
-               .burst_16bit = 32,
-               .burst_32bit = 16,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_64_BYTES,
-       },
-       {
-               .burst_8bit = 48,
-               .burst_16bit = 24,
-               .burst_32bit = 12,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_48_BYTES,
-       },
-       {
-               .burst_8bit = 32,
-               .burst_16bit = 16,
-               .burst_32bit = 8,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_32_BYTES,
-       },
-       {
-               .burst_8bit = 16,
-               .burst_16bit = 8,
-               .burst_32bit = 4,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_16_BYTES,
-       },
-       {
-               .burst_8bit = 8,
-               .burst_16bit = 4,
-               .burst_32bit = 2,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_8_BYTES,
-       },
-       {
-               .burst_8bit = 4,
-               .burst_16bit = 2,
-               .burst_32bit = 1,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_4_BYTES,
-       },
-       {
-               .burst_8bit = 2,
-               .burst_16bit = 1,
-               .burst_32bit = 0,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_2_BYTES,
-       },
-       {
-               .burst_8bit = 1,
-               .burst_16bit = 0,
-               .burst_32bit = 0,
-               .reg = COH901318_CX_CTRL_BURST_COUNT_1_BYTE,
-       },
-};
-
-static int coh901318_dma_set_runtimeconfig(struct dma_chan *chan,
-                                          struct dma_slave_config *config,
-                                          enum dma_transfer_direction direction)
-{
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-       dma_addr_t addr;
-       enum dma_slave_buswidth addr_width;
-       u32 maxburst;
-       u32 ctrl = 0;
-       int i = 0;
-
-       /* We only support mem to per or per to mem transfers */
-       if (direction == DMA_DEV_TO_MEM) {
-               addr = config->src_addr;
-               addr_width = config->src_addr_width;
-               maxburst = config->src_maxburst;
-       } else if (direction == DMA_MEM_TO_DEV) {
-               addr = config->dst_addr;
-               addr_width = config->dst_addr_width;
-               maxburst = config->dst_maxburst;
-       } else {
-               dev_err(COHC_2_DEV(cohc), "illegal channel mode\n");
-               return -EINVAL;
-       }
-
-       dev_dbg(COHC_2_DEV(cohc), "configure channel for %d byte transfers\n",
-               addr_width);
-       switch (addr_width)  {
-       case DMA_SLAVE_BUSWIDTH_1_BYTE:
-               ctrl |=
-                       COH901318_CX_CTRL_SRC_BUS_SIZE_8_BITS |
-                       COH901318_CX_CTRL_DST_BUS_SIZE_8_BITS;
-
-               while (i < ARRAY_SIZE(burst_sizes)) {
-                       if (burst_sizes[i].burst_8bit <= maxburst)
-                               break;
-                       i++;
-               }
-
-               break;
-       case DMA_SLAVE_BUSWIDTH_2_BYTES:
-               ctrl |=
-                       COH901318_CX_CTRL_SRC_BUS_SIZE_16_BITS |
-                       COH901318_CX_CTRL_DST_BUS_SIZE_16_BITS;
-
-               while (i < ARRAY_SIZE(burst_sizes)) {
-                       if (burst_sizes[i].burst_16bit <= maxburst)
-                               break;
-                       i++;
-               }
-
-               break;
-       case DMA_SLAVE_BUSWIDTH_4_BYTES:
-               /* Direction doesn't matter here, it's 32/32 bits */
-               ctrl |=
-                       COH901318_CX_CTRL_SRC_BUS_SIZE_32_BITS |
-                       COH901318_CX_CTRL_DST_BUS_SIZE_32_BITS;
-
-               while (i < ARRAY_SIZE(burst_sizes)) {
-                       if (burst_sizes[i].burst_32bit <= maxburst)
-                               break;
-                       i++;
-               }
-
-               break;
-       default:
-               dev_err(COHC_2_DEV(cohc),
-                       "bad runtimeconfig: alien address width\n");
-               return -EINVAL;
-       }
-
-       ctrl |= burst_sizes[i].reg;
-       dev_dbg(COHC_2_DEV(cohc),
-               "selected burst size %d bytes for address width %d bytes, maxburst %d\n",
-               burst_sizes[i].burst_8bit, addr_width, maxburst);
-
-       cohc->addr = addr;
-       cohc->ctrl = ctrl;
-
-       return 0;
-}
-
-static int coh901318_dma_slave_config(struct dma_chan *chan,
-                                          struct dma_slave_config *config)
-{
-       struct coh901318_chan *cohc = to_coh901318_chan(chan);
-
-       memcpy(&cohc->config, config, sizeof(*config));
-
-       return 0;
-}
-
-static void coh901318_base_init(struct dma_device *dma, const int *pick_chans,
-                               struct coh901318_base *base)
-{
-       int chans_i;
-       int i = 0;
-       struct coh901318_chan *cohc;
-
-       INIT_LIST_HEAD(&dma->channels);
-
-       for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) {
-               for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) {
-                       cohc = &base->chans[i];
-
-                       cohc->base = base;
-                       cohc->chan.device = dma;
-                       cohc->id = i;
-
-                       /* TODO: do we really need this lock if only one
-                        * client is connected to each channel?
-                        */
-
-                       spin_lock_init(&cohc->lock);
-
-                       cohc->nbr_active_done = 0;
-                       cohc->busy = 0;
-                       INIT_LIST_HEAD(&cohc->free);
-                       INIT_LIST_HEAD(&cohc->active);
-                       INIT_LIST_HEAD(&cohc->queue);
-
-                       tasklet_setup(&cohc->tasklet, dma_tasklet);
-
-                       list_add_tail(&cohc->chan.device_node,
-                                     &dma->channels);
-               }
-       }
-}
-
-static int __init coh901318_probe(struct platform_device *pdev)
-{
-       int err = 0;
-       struct coh901318_base *base;
-       int irq;
-       struct resource *io;
-
-       io = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       if (!io)
-               return -ENODEV;
-
-       /* Map DMA controller registers to virtual memory */
-       if (devm_request_mem_region(&pdev->dev,
-                                   io->start,
-                                   resource_size(io),
-                                   pdev->dev.driver->name) == NULL)
-               return -ENOMEM;
-
-       base = devm_kzalloc(&pdev->dev,
-                           ALIGN(sizeof(struct coh901318_base), 4) +
-                           U300_DMA_CHANNELS *
-                           sizeof(struct coh901318_chan),
-                           GFP_KERNEL);
-       if (!base)
-               return -ENOMEM;
-
-       base->chans = ((void *)base) + ALIGN(sizeof(struct coh901318_base), 4);
-
-       base->virtbase = devm_ioremap(&pdev->dev, io->start, resource_size(io));
-       if (!base->virtbase)
-               return -ENOMEM;
-
-       base->dev = &pdev->dev;
-       spin_lock_init(&base->pm.lock);
-       base->pm.started_channels = 0;
-
-       COH901318_DEBUGFS_ASSIGN(debugfs_dma_base, base);
-
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
-               return irq;
-
-       err = devm_request_irq(&pdev->dev, irq, dma_irq_handler, 0,
-                              "coh901318", base);
-       if (err)
-               return err;
-
-       base->irq = irq;
-
-       err = coh901318_pool_create(&base->pool, &pdev->dev,
-                                   sizeof(struct coh901318_lli),
-                                   32);
-       if (err)
-               return err;
-
-       /* init channels for device transfers */
-       coh901318_base_init(&base->dma_slave, dma_slave_channels,
-                           base);
-
-       dma_cap_zero(base->dma_slave.cap_mask);
-       dma_cap_set(DMA_SLAVE, base->dma_slave.cap_mask);
-
-       base->dma_slave.device_alloc_chan_resources = coh901318_alloc_chan_resources;
-       base->dma_slave.device_free_chan_resources = coh901318_free_chan_resources;
-       base->dma_slave.device_prep_slave_sg = coh901318_prep_slave_sg;
-       base->dma_slave.device_tx_status = coh901318_tx_status;
-       base->dma_slave.device_issue_pending = coh901318_issue_pending;
-       base->dma_slave.device_config = coh901318_dma_slave_config;
-       base->dma_slave.device_pause = coh901318_pause;
-       base->dma_slave.device_resume = coh901318_resume;
-       base->dma_slave.device_terminate_all = coh901318_terminate_all;
-       base->dma_slave.dev = &pdev->dev;
-
-       err = dma_async_device_register(&base->dma_slave);
-
-       if (err)
-               goto err_register_slave;
-
-       /* init channels for memcpy */
-       coh901318_base_init(&base->dma_memcpy, dma_memcpy_channels,
-                           base);
-
-       dma_cap_zero(base->dma_memcpy.cap_mask);
-       dma_cap_set(DMA_MEMCPY, base->dma_memcpy.cap_mask);
-
-       base->dma_memcpy.device_alloc_chan_resources = coh901318_alloc_chan_resources;
-       base->dma_memcpy.device_free_chan_resources = coh901318_free_chan_resources;
-       base->dma_memcpy.device_prep_dma_memcpy = coh901318_prep_memcpy;
-       base->dma_memcpy.device_tx_status = coh901318_tx_status;
-       base->dma_memcpy.device_issue_pending = coh901318_issue_pending;
-       base->dma_memcpy.device_config = coh901318_dma_slave_config;
-       base->dma_memcpy.device_pause = coh901318_pause;
-       base->dma_memcpy.device_resume = coh901318_resume;
-       base->dma_memcpy.device_terminate_all = coh901318_terminate_all;
-       base->dma_memcpy.dev = &pdev->dev;
-       /*
-        * This controller can only access address at even 32bit boundaries,
-        * i.e. 2^2
-        */
-       base->dma_memcpy.copy_align = DMAENGINE_ALIGN_4_BYTES;
-       err = dma_async_device_register(&base->dma_memcpy);
-
-       if (err)
-               goto err_register_memcpy;
-
-       err = of_dma_controller_register(pdev->dev.of_node, coh901318_xlate,
-                                        base);
-       if (err)
-               goto err_register_of_dma;
-
-       platform_set_drvdata(pdev, base);
-       dev_info(&pdev->dev, "Initialized COH901318 DMA on virtual base 0x%p\n",
-               base->virtbase);
-
-       return err;
-
- err_register_of_dma:
-       dma_async_device_unregister(&base->dma_memcpy);
- err_register_memcpy:
-       dma_async_device_unregister(&base->dma_slave);
- err_register_slave:
-       coh901318_pool_destroy(&base->pool);
-       return err;
-}
-static void coh901318_base_remove(struct coh901318_base *base, const int *pick_chans)
-{
-       int chans_i;
-       int i = 0;
-       struct coh901318_chan *cohc;
-
-       for (chans_i = 0; pick_chans[chans_i] != -1; chans_i += 2) {
-               for (i = pick_chans[chans_i]; i <= pick_chans[chans_i+1]; i++) {
-                       cohc = &base->chans[i];
-
-                       tasklet_kill(&cohc->tasklet);
-               }
-       }
-
-}
-
-static int coh901318_remove(struct platform_device *pdev)
-{
-       struct coh901318_base *base = platform_get_drvdata(pdev);
-
-       devm_free_irq(&pdev->dev, base->irq, base);
-
-       coh901318_base_remove(base, dma_slave_channels);
-       coh901318_base_remove(base, dma_memcpy_channels);
-
-       of_dma_controller_free(pdev->dev.of_node);
-       dma_async_device_unregister(&base->dma_memcpy);
-       dma_async_device_unregister(&base->dma_slave);
-       coh901318_pool_destroy(&base->pool);
-       return 0;
-}
-
-static const struct of_device_id coh901318_dt_match[] = {
-       { .compatible = "stericsson,coh901318" },
-       {},
-};
-
-static struct platform_driver coh901318_driver = {
-       .remove = coh901318_remove,
-       .driver = {
-               .name   = "coh901318",
-               .of_match_table = coh901318_dt_match,
-       },
-};
-
-static int __init coh901318_init(void)
-{
-       return platform_driver_probe(&coh901318_driver, coh901318_probe);
-}
-subsys_initcall(coh901318_init);
-
-static void __exit coh901318_exit(void)
-{
-       platform_driver_unregister(&coh901318_driver);
-}
-module_exit(coh901318_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Per Friden");
diff --git a/drivers/dma/coh901318.h b/drivers/dma/coh901318.h
deleted file mode 100644 (file)
index bbf5336..0000000
+++ /dev/null
@@ -1,141 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Copyright (C) 2007-2013 ST-Ericsson
- * DMA driver for COH 901 318
- * Author: Per Friden <per.friden@stericsson.com>
- */
-
-#ifndef COH901318_H
-#define COH901318_H
-
-#define MAX_DMA_PACKET_SIZE_SHIFT 11
-#define MAX_DMA_PACKET_SIZE (1 << MAX_DMA_PACKET_SIZE_SHIFT)
-
-struct device;
-
-struct coh901318_pool {
-       spinlock_t lock;
-       struct dma_pool *dmapool;
-       struct device *dev;
-
-#ifdef CONFIG_DEBUG_FS
-       int debugfs_pool_counter;
-#endif
-};
-
-/**
- * struct coh901318_lli - linked list item for DMAC
- * @control: control settings for DMAC
- * @src_addr: transfer source address
- * @dst_addr: transfer destination address
- * @link_addr:  physical address to next lli
- * @virt_link_addr: virtual address of next lli (only used by pool_free)
- * @phy_this: physical address of current lli (only used by pool_free)
- */
-struct coh901318_lli {
-       u32 control;
-       dma_addr_t src_addr;
-       dma_addr_t dst_addr;
-       dma_addr_t link_addr;
-
-       void *virt_link_addr;
-       dma_addr_t phy_this;
-};
-
-/**
- * coh901318_pool_create() - Creates an dma pool for lli:s
- * @pool: pool handle
- * @dev: dma device
- * @lli_nbr: number of lli:s in the pool
- * @algin: address alignemtn of lli:s
- * returns 0 on success otherwise none zero
- */
-int coh901318_pool_create(struct coh901318_pool *pool,
-                         struct device *dev,
-                         size_t lli_nbr, size_t align);
-
-/**
- * coh901318_pool_destroy() - Destroys the dma pool
- * @pool: pool handle
- * returns 0 on success otherwise none zero
- */
-int coh901318_pool_destroy(struct coh901318_pool *pool);
-
-/**
- * coh901318_lli_alloc() - Allocates a linked list
- *
- * @pool: pool handle
- * @len: length to list
- * return: none NULL if success otherwise NULL
- */
-struct coh901318_lli *
-coh901318_lli_alloc(struct coh901318_pool *pool,
-                   unsigned int len);
-
-/**
- * coh901318_lli_free() - Returns the linked list items to the pool
- * @pool: pool handle
- * @lli: reference to lli pointer to be freed
- */
-void coh901318_lli_free(struct coh901318_pool *pool,
-                       struct coh901318_lli **lli);
-
-/**
- * coh901318_lli_fill_memcpy() - Prepares the lli:s for dma memcpy
- * @pool: pool handle
- * @lli: allocated lli
- * @src: src address
- * @size: transfer size
- * @dst: destination address
- * @ctrl_chained: ctrl for chained lli
- * @ctrl_last: ctrl for the last lli
- * returns number of CPU interrupts for the lli, negative on error.
- */
-int
-coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
-                         struct coh901318_lli *lli,
-                         dma_addr_t src, unsigned int size,
-                         dma_addr_t dst, u32 ctrl_chained, u32 ctrl_last);
-
-/**
- * coh901318_lli_fill_single() - Prepares the lli:s for dma single transfer
- * @pool: pool handle
- * @lli: allocated lli
- * @buf: transfer buffer
- * @size: transfer size
- * @dev_addr: address of periphal
- * @ctrl_chained: ctrl for chained lli
- * @ctrl_last: ctrl for the last lli
- * @dir: direction of transfer (to or from device)
- * returns number of CPU interrupts for the lli, negative on error.
- */
-int
-coh901318_lli_fill_single(struct coh901318_pool *pool,
-                         struct coh901318_lli *lli,
-                         dma_addr_t buf, unsigned int size,
-                         dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_last,
-                         enum dma_transfer_direction dir);
-
-/**
- * coh901318_lli_fill_single() - Prepares the lli:s for dma scatter list transfer
- * @pool: pool handle
- * @lli: allocated lli
- * @sg: scatter gather list
- * @nents: number of entries in sg
- * @dev_addr: address of periphal
- * @ctrl_chained: ctrl for chained lli
- * @ctrl: ctrl of middle lli
- * @ctrl_last: ctrl for the last lli
- * @dir: direction of transfer (to or from device)
- * @ctrl_irq_mask: ctrl mask for CPU interrupt
- * returns number of CPU interrupts for the lli, negative on error.
- */
-int
-coh901318_lli_fill_sg(struct coh901318_pool *pool,
-                     struct coh901318_lli *lli,
-                     struct scatterlist *sg, unsigned int nents,
-                     dma_addr_t dev_addr, u32 ctrl_chained,
-                     u32 ctrl, u32 ctrl_last,
-                     enum dma_transfer_direction dir, u32 ctrl_irq_mask);
-
-#endif /* COH901318_H */
diff --git a/drivers/dma/coh901318_lli.c b/drivers/dma/coh901318_lli.c
deleted file mode 100644 (file)
index 6b6c2fd..0000000
+++ /dev/null
@@ -1,313 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * driver/dma/coh901318_lli.c
- *
- * Copyright (C) 2007-2009 ST-Ericsson
- * Support functions for handling lli for dma
- * Author: Per Friden <per.friden@stericsson.com>
- */
-
-#include <linux/spinlock.h>
-#include <linux/memory.h>
-#include <linux/gfp.h>
-#include <linux/dmapool.h>
-#include <linux/dmaengine.h>
-
-#include "coh901318.h"
-
-#if (defined(CONFIG_DEBUG_FS) && defined(CONFIG_U300_DEBUG))
-#define DEBUGFS_POOL_COUNTER_RESET(pool) (pool->debugfs_pool_counter = 0)
-#define DEBUGFS_POOL_COUNTER_ADD(pool, add) (pool->debugfs_pool_counter += add)
-#else
-#define DEBUGFS_POOL_COUNTER_RESET(pool)
-#define DEBUGFS_POOL_COUNTER_ADD(pool, add)
-#endif
-
-static struct coh901318_lli *
-coh901318_lli_next(struct coh901318_lli *data)
-{
-       if (data == NULL || data->link_addr == 0)
-               return NULL;
-
-       return (struct coh901318_lli *) data->virt_link_addr;
-}
-
-int coh901318_pool_create(struct coh901318_pool *pool,
-                         struct device *dev,
-                         size_t size, size_t align)
-{
-       spin_lock_init(&pool->lock);
-       pool->dev = dev;
-       pool->dmapool = dma_pool_create("lli_pool", dev, size, align, 0);
-
-       DEBUGFS_POOL_COUNTER_RESET(pool);
-       return 0;
-}
-
-int coh901318_pool_destroy(struct coh901318_pool *pool)
-{
-
-       dma_pool_destroy(pool->dmapool);
-       return 0;
-}
-
-struct coh901318_lli *
-coh901318_lli_alloc(struct coh901318_pool *pool, unsigned int len)
-{
-       int i;
-       struct coh901318_lli *head;
-       struct coh901318_lli *lli;
-       struct coh901318_lli *lli_prev;
-       dma_addr_t phy;
-
-       if (len == 0)
-               return NULL;
-
-       spin_lock(&pool->lock);
-
-       head = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
-
-       if (head == NULL)
-               goto err;
-
-       DEBUGFS_POOL_COUNTER_ADD(pool, 1);
-
-       lli = head;
-       lli->phy_this = phy;
-       lli->link_addr = 0x00000000;
-       lli->virt_link_addr = NULL;
-
-       for (i = 1; i < len; i++) {
-               lli_prev = lli;
-
-               lli = dma_pool_alloc(pool->dmapool, GFP_NOWAIT, &phy);
-
-               if (lli == NULL)
-                       goto err_clean_up;
-
-               DEBUGFS_POOL_COUNTER_ADD(pool, 1);
-               lli->phy_this = phy;
-               lli->link_addr = 0x00000000;
-               lli->virt_link_addr = NULL;
-
-               lli_prev->link_addr = phy;
-               lli_prev->virt_link_addr = lli;
-       }
-
-       spin_unlock(&pool->lock);
-
-       return head;
-
- err:
-       spin_unlock(&pool->lock);
-       return NULL;
-
- err_clean_up:
-       lli_prev->link_addr = 0x00000000U;
-       spin_unlock(&pool->lock);
-       coh901318_lli_free(pool, &head);
-       return NULL;
-}
-
-void coh901318_lli_free(struct coh901318_pool *pool,
-                       struct coh901318_lli **lli)
-{
-       struct coh901318_lli *l;
-       struct coh901318_lli *next;
-
-       if (lli == NULL)
-               return;
-
-       l = *lli;
-
-       if (l == NULL)
-               return;
-
-       spin_lock(&pool->lock);
-
-       while (l->link_addr) {
-               next = l->virt_link_addr;
-               dma_pool_free(pool->dmapool, l, l->phy_this);
-               DEBUGFS_POOL_COUNTER_ADD(pool, -1);
-               l = next;
-       }
-       dma_pool_free(pool->dmapool, l, l->phy_this);
-       DEBUGFS_POOL_COUNTER_ADD(pool, -1);
-
-       spin_unlock(&pool->lock);
-       *lli = NULL;
-}
-
-int
-coh901318_lli_fill_memcpy(struct coh901318_pool *pool,
-                         struct coh901318_lli *lli,
-                         dma_addr_t source, unsigned int size,
-                         dma_addr_t destination, u32 ctrl_chained,
-                         u32 ctrl_eom)
-{
-       int s = size;
-       dma_addr_t src = source;
-       dma_addr_t dst = destination;
-
-       lli->src_addr = src;
-       lli->dst_addr = dst;
-
-       while (lli->link_addr) {
-               lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
-               lli->src_addr = src;
-               lli->dst_addr = dst;
-
-               s -= MAX_DMA_PACKET_SIZE;
-               lli = coh901318_lli_next(lli);
-
-               src += MAX_DMA_PACKET_SIZE;
-               dst += MAX_DMA_PACKET_SIZE;
-       }
-
-       lli->control = ctrl_eom | s;
-       lli->src_addr = src;
-       lli->dst_addr = dst;
-
-       return 0;
-}
-
-int
-coh901318_lli_fill_single(struct coh901318_pool *pool,
-                         struct coh901318_lli *lli,
-                         dma_addr_t buf, unsigned int size,
-                         dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl_eom,
-                         enum dma_transfer_direction dir)
-{
-       int s = size;
-       dma_addr_t src;
-       dma_addr_t dst;
-
-
-       if (dir == DMA_MEM_TO_DEV) {
-               src = buf;
-               dst = dev_addr;
-
-       } else if (dir == DMA_DEV_TO_MEM) {
-
-               src = dev_addr;
-               dst = buf;
-       } else {
-               return -EINVAL;
-       }
-
-       while (lli->link_addr) {
-               size_t block_size = MAX_DMA_PACKET_SIZE;
-               lli->control = ctrl_chained | MAX_DMA_PACKET_SIZE;
-
-               /* If we are on the next-to-final block and there will
-                * be less than half a DMA packet left for the last
-                * block, then we want to make this block a little
-                * smaller to balance the sizes. This is meant to
-                * avoid too small transfers if the buffer size is
-                * (MAX_DMA_PACKET_SIZE*N + 1) */
-               if (s < (MAX_DMA_PACKET_SIZE + MAX_DMA_PACKET_SIZE/2))
-                       block_size = MAX_DMA_PACKET_SIZE/2;
-
-               s -= block_size;
-               lli->src_addr = src;
-               lli->dst_addr = dst;
-
-               lli = coh901318_lli_next(lli);
-
-               if (dir == DMA_MEM_TO_DEV)
-                       src += block_size;
-               else if (dir == DMA_DEV_TO_MEM)
-                       dst += block_size;
-       }
-
-       lli->control = ctrl_eom | s;
-       lli->src_addr = src;
-       lli->dst_addr = dst;
-
-       return 0;
-}
-
-int
-coh901318_lli_fill_sg(struct coh901318_pool *pool,
-                     struct coh901318_lli *lli,
-                     struct scatterlist *sgl, unsigned int nents,
-                     dma_addr_t dev_addr, u32 ctrl_chained, u32 ctrl,
-                     u32 ctrl_last,
-                     enum dma_transfer_direction dir, u32 ctrl_irq_mask)
-{
-       int i;
-       struct scatterlist *sg;
-       u32 ctrl_sg;
-       dma_addr_t src = 0;
-       dma_addr_t dst = 0;
-       u32 bytes_to_transfer;
-       u32 elem_size;
-
-       if (lli == NULL)
-               goto err;
-
-       spin_lock(&pool->lock);
-
-       if (dir == DMA_MEM_TO_DEV)
-               dst = dev_addr;
-       else if (dir == DMA_DEV_TO_MEM)
-               src = dev_addr;
-       else
-               goto err;
-
-       for_each_sg(sgl, sg, nents, i) {
-               if (sg_is_chain(sg)) {
-                       /* sg continues to the next sg-element don't
-                        * send ctrl_finish until the last
-                        * sg-element in the chain
-                        */
-                       ctrl_sg = ctrl_chained;
-               } else if (i == nents - 1)
-                       ctrl_sg = ctrl_last;
-               else
-                       ctrl_sg = ctrl ? ctrl : ctrl_last;
-
-
-               if (dir == DMA_MEM_TO_DEV)
-                       /* increment source address */
-                       src = sg_dma_address(sg);
-               else
-                       /* increment destination address */
-                       dst = sg_dma_address(sg);
-
-               bytes_to_transfer = sg_dma_len(sg);
-
-               while (bytes_to_transfer) {
-                       u32 val;
-
-                       if (bytes_to_transfer > MAX_DMA_PACKET_SIZE) {
-                               elem_size = MAX_DMA_PACKET_SIZE;
-                               val = ctrl_chained;
-                       } else {
-                               elem_size = bytes_to_transfer;
-                               val = ctrl_sg;
-                       }
-
-                       lli->control = val | elem_size;
-                       lli->src_addr = src;
-                       lli->dst_addr = dst;
-
-                       if (dir == DMA_DEV_TO_MEM)
-                               dst += elem_size;
-                       else
-                               src += elem_size;
-
-                       BUG_ON(lli->link_addr & 3);
-
-                       bytes_to_transfer -= elem_size;
-                       lli = coh901318_lli_next(lli);
-               }
-
-       }
-       spin_unlock(&pool->lock);
-
-       return 0;
- err:
-       spin_unlock(&pool->lock);
-       return -EINVAL;
-}
index 612d353..ebee94d 100644 (file)
@@ -1004,6 +1004,18 @@ static const struct jz4780_dma_soc_data jz4725b_dma_soc_data = {
                 JZ_SOC_DATA_BREAK_LINKS,
 };
 
+static const struct jz4780_dma_soc_data jz4760_dma_soc_data = {
+       .nb_channels = 5,
+       .transfer_ord_max = 6,
+       .flags = JZ_SOC_DATA_PER_CHAN_PM | JZ_SOC_DATA_NO_DCKES_DCKEC,
+};
+
+static const struct jz4780_dma_soc_data jz4760b_dma_soc_data = {
+       .nb_channels = 5,
+       .transfer_ord_max = 6,
+       .flags = JZ_SOC_DATA_PER_CHAN_PM,
+};
+
 static const struct jz4780_dma_soc_data jz4770_dma_soc_data = {
        .nb_channels = 6,
        .transfer_ord_max = 6,
@@ -1031,6 +1043,8 @@ static const struct jz4780_dma_soc_data x1830_dma_soc_data = {
 static const struct of_device_id jz4780_dma_dt_match[] = {
        { .compatible = "ingenic,jz4740-dma", .data = &jz4740_dma_soc_data },
        { .compatible = "ingenic,jz4725b-dma", .data = &jz4725b_dma_soc_data },
+       { .compatible = "ingenic,jz4760-dma", .data = &jz4760_dma_soc_data },
+       { .compatible = "ingenic,jz4760b-dma", .data = &jz4760b_dma_soc_data },
        { .compatible = "ingenic,jz4770-dma", .data = &jz4770_dma_soc_data },
        { .compatible = "ingenic,jz4780-dma", .data = &jz4780_dma_soc_data },
        { .compatible = "ingenic,x1000-dma", .data = &x1000_dma_soc_data },
index e164f32..d9e4ac3 100644 (file)
 #include <linux/device.h>
 #include <linux/dmaengine.h>
 #include <linux/dmapool.h>
+#include <linux/dma-mapping.h>
 #include <linux/err.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_dma.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
+#include <linux/slab.h>
 #include <linux/types.h>
 
 #include "dw-axi-dmac.h"
@@ -195,43 +200,56 @@ static inline const char *axi_chan_name(struct axi_dma_chan *chan)
        return dma_chan_name(&chan->vc.chan);
 }
 
-static struct axi_dma_desc *axi_desc_get(struct axi_dma_chan *chan)
+static struct axi_dma_desc *axi_desc_alloc(u32 num)
 {
-       struct dw_axi_dma *dw = chan->chip->dw;
        struct axi_dma_desc *desc;
+
+       desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
+       if (!desc)
+               return NULL;
+
+       desc->hw_desc = kcalloc(num, sizeof(*desc->hw_desc), GFP_NOWAIT);
+       if (!desc->hw_desc) {
+               kfree(desc);
+               return NULL;
+       }
+
+       return desc;
+}
+
+static struct axi_dma_lli *axi_desc_get(struct axi_dma_chan *chan,
+                                       dma_addr_t *addr)
+{
+       struct axi_dma_lli *lli;
        dma_addr_t phys;
 
-       desc = dma_pool_zalloc(dw->desc_pool, GFP_NOWAIT, &phys);
-       if (unlikely(!desc)) {
+       lli = dma_pool_zalloc(chan->desc_pool, GFP_NOWAIT, &phys);
+       if (unlikely(!lli)) {
                dev_err(chan2dev(chan), "%s: not enough descriptors available\n",
                        axi_chan_name(chan));
                return NULL;
        }
 
        atomic_inc(&chan->descs_allocated);
-       INIT_LIST_HEAD(&desc->xfer_list);
-       desc->vd.tx.phys = phys;
-       desc->chan = chan;
+       *addr = phys;
 
-       return desc;
+       return lli;
 }
 
 static void axi_desc_put(struct axi_dma_desc *desc)
 {
        struct axi_dma_chan *chan = desc->chan;
-       struct dw_axi_dma *dw = chan->chip->dw;
-       struct axi_dma_desc *child, *_next;
-       unsigned int descs_put = 0;
+       int count = atomic_read(&chan->descs_allocated);
+       struct axi_dma_hw_desc *hw_desc;
+       int descs_put;
 
-       list_for_each_entry_safe(child, _next, &desc->xfer_list, xfer_list) {
-               list_del(&child->xfer_list);
-               dma_pool_free(dw->desc_pool, child, child->vd.tx.phys);
-               descs_put++;
+       for (descs_put = 0; descs_put < count; descs_put++) {
+               hw_desc = &desc->hw_desc[descs_put];
+               dma_pool_free(chan->desc_pool, hw_desc->lli, hw_desc->llp);
        }
 
-       dma_pool_free(dw->desc_pool, desc, desc->vd.tx.phys);
-       descs_put++;
-
+       kfree(desc->hw_desc);
+       kfree(desc);
        atomic_sub(descs_put, &chan->descs_allocated);
        dev_vdbg(chan2dev(chan), "%s: %d descs put, %d still allocated\n",
                axi_chan_name(chan), descs_put,
@@ -248,19 +266,41 @@ dma_chan_tx_status(struct dma_chan *dchan, dma_cookie_t cookie,
                  struct dma_tx_state *txstate)
 {
        struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
-       enum dma_status ret;
+       struct virt_dma_desc *vdesc;
+       enum dma_status status;
+       u32 completed_length;
+       unsigned long flags;
+       u32 completed_blocks;
+       size_t bytes = 0;
+       u32 length;
+       u32 len;
 
-       ret = dma_cookie_status(dchan, cookie, txstate);
+       status = dma_cookie_status(dchan, cookie, txstate);
+       if (status == DMA_COMPLETE || !txstate)
+               return status;
 
-       if (chan->is_paused && ret == DMA_IN_PROGRESS)
-               ret = DMA_PAUSED;
+       spin_lock_irqsave(&chan->vc.lock, flags);
 
-       return ret;
+       vdesc = vchan_find_desc(&chan->vc, cookie);
+       if (vdesc) {
+               length = vd_to_axi_desc(vdesc)->length;
+               completed_blocks = vd_to_axi_desc(vdesc)->completed_blocks;
+               len = vd_to_axi_desc(vdesc)->hw_desc[0].len;
+               completed_length = completed_blocks * len;
+               bytes = length - completed_length;
+       } else {
+               bytes = vd_to_axi_desc(vdesc)->length;
+       }
+
+       spin_unlock_irqrestore(&chan->vc.lock, flags);
+       dma_set_residue(txstate, bytes);
+
+       return status;
 }
 
-static void write_desc_llp(struct axi_dma_desc *desc, dma_addr_t adr)
+static void write_desc_llp(struct axi_dma_hw_desc *desc, dma_addr_t adr)
 {
-       desc->lli.llp = cpu_to_le64(adr);
+       desc->lli->llp = cpu_to_le64(adr);
 }
 
 static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
@@ -268,6 +308,29 @@ static void write_chan_llp(struct axi_dma_chan *chan, dma_addr_t adr)
        axi_chan_iowrite64(chan, CH_LLP, adr);
 }
 
+static void dw_axi_dma_set_byte_halfword(struct axi_dma_chan *chan, bool set)
+{
+       u32 offset = DMAC_APB_BYTE_WR_CH_EN;
+       u32 reg_width, val;
+
+       if (!chan->chip->apb_regs) {
+               dev_dbg(chan->chip->dev, "apb_regs not initialized\n");
+               return;
+       }
+
+       reg_width = __ffs(chan->config.dst_addr_width);
+       if (reg_width == DWAXIDMAC_TRANS_WIDTH_16)
+               offset = DMAC_APB_HALFWORD_WR_CH_EN;
+
+       val = ioread32(chan->chip->apb_regs + offset);
+
+       if (set)
+               val |= BIT(chan->id);
+       else
+               val &= ~BIT(chan->id);
+
+       iowrite32(val, chan->chip->apb_regs + offset);
+}
 /* Called in chan locked context */
 static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
                                      struct axi_dma_desc *first)
@@ -293,9 +356,26 @@ static void axi_chan_block_xfer_start(struct axi_dma_chan *chan,
               priority << CH_CFG_H_PRIORITY_POS |
               DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_DST_POS |
               DWAXIDMAC_HS_SEL_HW << CH_CFG_H_HS_SEL_SRC_POS);
+       switch (chan->direction) {
+       case DMA_MEM_TO_DEV:
+               dw_axi_dma_set_byte_halfword(chan, true);
+               reg |= (chan->config.device_fc ?
+                       DWAXIDMAC_TT_FC_MEM_TO_PER_DST :
+                       DWAXIDMAC_TT_FC_MEM_TO_PER_DMAC)
+                       << CH_CFG_H_TT_FC_POS;
+               break;
+       case DMA_DEV_TO_MEM:
+               reg |= (chan->config.device_fc ?
+                       DWAXIDMAC_TT_FC_PER_TO_MEM_SRC :
+                       DWAXIDMAC_TT_FC_PER_TO_MEM_DMAC)
+                       << CH_CFG_H_TT_FC_POS;
+               break;
+       default:
+               break;
+       }
        axi_chan_iowrite32(chan, CH_CFG_H, reg);
 
-       write_chan_llp(chan, first->vd.tx.phys | lms);
+       write_chan_llp(chan, first->hw_desc[0].llp | lms);
 
        irq_mask = DWAXIDMAC_IRQ_DMA_TRF | DWAXIDMAC_IRQ_ALL_ERR;
        axi_chan_irq_sig_set(chan, irq_mask);
@@ -333,6 +413,13 @@ static void dma_chan_issue_pending(struct dma_chan *dchan)
        spin_unlock_irqrestore(&chan->vc.lock, flags);
 }
 
+static void dw_axi_dma_synchronize(struct dma_chan *dchan)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+       vchan_synchronize(&chan->vc);
+}
+
 static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
 {
        struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
@@ -344,6 +431,15 @@ static int dma_chan_alloc_chan_resources(struct dma_chan *dchan)
                return -EBUSY;
        }
 
+       /* LLI address must be aligned to a 64-byte boundary */
+       chan->desc_pool = dma_pool_create(dev_name(chan2dev(chan)),
+                                         chan->chip->dev,
+                                         sizeof(struct axi_dma_lli),
+                                         64, 0);
+       if (!chan->desc_pool) {
+               dev_err(chan2dev(chan), "No memory for descriptors\n");
+               return -ENOMEM;
+       }
        dev_vdbg(dchan2dev(dchan), "%s: allocating\n", axi_chan_name(chan));
 
        pm_runtime_get(chan->chip->dev);
@@ -365,6 +461,8 @@ static void dma_chan_free_chan_resources(struct dma_chan *dchan)
 
        vchan_free_chan_resources(&chan->vc);
 
+       dma_pool_destroy(chan->desc_pool);
+       chan->desc_pool = NULL;
        dev_vdbg(dchan2dev(dchan),
                 "%s: free resources, descriptor still allocated: %u\n",
                 axi_chan_name(chan), atomic_read(&chan->descs_allocated));
@@ -372,73 +470,398 @@ static void dma_chan_free_chan_resources(struct dma_chan *dchan)
        pm_runtime_put(chan->chip->dev);
 }
 
+static void dw_axi_dma_set_hw_channel(struct axi_dma_chip *chip,
+                                     u32 handshake_num, bool set)
+{
+       unsigned long start = 0;
+       unsigned long reg_value;
+       unsigned long reg_mask;
+       unsigned long reg_set;
+       unsigned long mask;
+       unsigned long val;
+
+       if (!chip->apb_regs) {
+               dev_dbg(chip->dev, "apb_regs not initialized\n");
+               return;
+       }
+
+       /*
+        * An unused DMA channel has a default value of 0x3F.
+        * Lock the DMA channel by assign a handshake number to the channel.
+        * Unlock the DMA channel by assign 0x3F to the channel.
+        */
+       if (set) {
+               reg_set = UNUSED_CHANNEL;
+               val = handshake_num;
+       } else {
+               reg_set = handshake_num;
+               val = UNUSED_CHANNEL;
+       }
+
+       reg_value = lo_hi_readq(chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
+
+       for_each_set_clump8(start, reg_mask, &reg_value, 64) {
+               if (reg_mask == reg_set) {
+                       mask = GENMASK_ULL(start + 7, start);
+                       reg_value &= ~mask;
+                       reg_value |= rol64(val, start);
+                       lo_hi_writeq(reg_value,
+                                    chip->apb_regs + DMAC_APB_HW_HS_SEL_0);
+                       break;
+               }
+       }
+}
+
 /*
  * If DW_axi_dmac sees CHx_CTL.ShadowReg_Or_LLI_Last bit of the fetched LLI
  * as 1, it understands that the current block is the final block in the
  * transfer and completes the DMA transfer operation at the end of current
  * block transfer.
  */
-static void set_desc_last(struct axi_dma_desc *desc)
+static void set_desc_last(struct axi_dma_hw_desc *desc)
 {
        u32 val;
 
-       val = le32_to_cpu(desc->lli.ctl_hi);
+       val = le32_to_cpu(desc->lli->ctl_hi);
        val |= CH_CTL_H_LLI_LAST;
-       desc->lli.ctl_hi = cpu_to_le32(val);
+       desc->lli->ctl_hi = cpu_to_le32(val);
 }
 
-static void write_desc_sar(struct axi_dma_desc *desc, dma_addr_t adr)
+static void write_desc_sar(struct axi_dma_hw_desc *desc, dma_addr_t adr)
 {
-       desc->lli.sar = cpu_to_le64(adr);
+       desc->lli->sar = cpu_to_le64(adr);
 }
 
-static void write_desc_dar(struct axi_dma_desc *desc, dma_addr_t adr)
+static void write_desc_dar(struct axi_dma_hw_desc *desc, dma_addr_t adr)
 {
-       desc->lli.dar = cpu_to_le64(adr);
+       desc->lli->dar = cpu_to_le64(adr);
 }
 
-static void set_desc_src_master(struct axi_dma_desc *desc)
+static void set_desc_src_master(struct axi_dma_hw_desc *desc)
 {
        u32 val;
 
        /* Select AXI0 for source master */
-       val = le32_to_cpu(desc->lli.ctl_lo);
+       val = le32_to_cpu(desc->lli->ctl_lo);
        val &= ~CH_CTL_L_SRC_MAST;
-       desc->lli.ctl_lo = cpu_to_le32(val);
+       desc->lli->ctl_lo = cpu_to_le32(val);
 }
 
-static void set_desc_dest_master(struct axi_dma_desc *desc)
+static void set_desc_dest_master(struct axi_dma_hw_desc *hw_desc,
+                                struct axi_dma_desc *desc)
 {
        u32 val;
 
        /* Select AXI1 for source master if available */
-       val = le32_to_cpu(desc->lli.ctl_lo);
+       val = le32_to_cpu(hw_desc->lli->ctl_lo);
        if (desc->chan->chip->dw->hdata->nr_masters > 1)
                val |= CH_CTL_L_DST_MAST;
        else
                val &= ~CH_CTL_L_DST_MAST;
 
-       desc->lli.ctl_lo = cpu_to_le32(val);
+       hw_desc->lli->ctl_lo = cpu_to_le32(val);
+}
+
+static int dw_axi_dma_set_hw_desc(struct axi_dma_chan *chan,
+                                 struct axi_dma_hw_desc *hw_desc,
+                                 dma_addr_t mem_addr, size_t len)
+{
+       unsigned int data_width = BIT(chan->chip->dw->hdata->m_data_width);
+       unsigned int reg_width;
+       unsigned int mem_width;
+       dma_addr_t device_addr;
+       size_t axi_block_ts;
+       size_t block_ts;
+       u32 ctllo, ctlhi;
+       u32 burst_len;
+
+       axi_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+       mem_width = __ffs(data_width | mem_addr | len);
+       if (mem_width > DWAXIDMAC_TRANS_WIDTH_32)
+               mem_width = DWAXIDMAC_TRANS_WIDTH_32;
+
+       if (!IS_ALIGNED(mem_addr, 4)) {
+               dev_err(chan->chip->dev, "invalid buffer alignment\n");
+               return -EINVAL;
+       }
+
+       switch (chan->direction) {
+       case DMA_MEM_TO_DEV:
+               reg_width = __ffs(chan->config.dst_addr_width);
+               device_addr = chan->config.dst_addr;
+               ctllo = reg_width << CH_CTL_L_DST_WIDTH_POS |
+                       mem_width << CH_CTL_L_SRC_WIDTH_POS |
+                       DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_DST_INC_POS |
+                       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS;
+               block_ts = len >> mem_width;
+               break;
+       case DMA_DEV_TO_MEM:
+               reg_width = __ffs(chan->config.src_addr_width);
+               device_addr = chan->config.src_addr;
+               ctllo = reg_width << CH_CTL_L_SRC_WIDTH_POS |
+                       mem_width << CH_CTL_L_DST_WIDTH_POS |
+                       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
+                       DWAXIDMAC_CH_CTL_L_NOINC << CH_CTL_L_SRC_INC_POS;
+               block_ts = len >> reg_width;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       if (block_ts > axi_block_ts)
+               return -EINVAL;
+
+       hw_desc->lli = axi_desc_get(chan, &hw_desc->llp);
+       if (unlikely(!hw_desc->lli))
+               return -ENOMEM;
+
+       ctlhi = CH_CTL_H_LLI_VALID;
+
+       if (chan->chip->dw->hdata->restrict_axi_burst_len) {
+               burst_len = chan->chip->dw->hdata->axi_rw_burst_len;
+               ctlhi |= CH_CTL_H_ARLEN_EN | CH_CTL_H_AWLEN_EN |
+                        burst_len << CH_CTL_H_ARLEN_POS |
+                        burst_len << CH_CTL_H_AWLEN_POS;
+       }
+
+       hw_desc->lli->ctl_hi = cpu_to_le32(ctlhi);
+
+       if (chan->direction == DMA_MEM_TO_DEV) {
+               write_desc_sar(hw_desc, mem_addr);
+               write_desc_dar(hw_desc, device_addr);
+       } else {
+               write_desc_sar(hw_desc, device_addr);
+               write_desc_dar(hw_desc, mem_addr);
+       }
+
+       hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1);
+
+       ctllo |= DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
+                DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS;
+       hw_desc->lli->ctl_lo = cpu_to_le32(ctllo);
+
+       set_desc_src_master(hw_desc);
+
+       hw_desc->len = len;
+       return 0;
+}
+
+static size_t calculate_block_len(struct axi_dma_chan *chan,
+                                 dma_addr_t dma_addr, size_t buf_len,
+                                 enum dma_transfer_direction direction)
+{
+       u32 data_width, reg_width, mem_width;
+       size_t axi_block_ts, block_len;
+
+       axi_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+
+       switch (direction) {
+       case DMA_MEM_TO_DEV:
+               data_width = BIT(chan->chip->dw->hdata->m_data_width);
+               mem_width = __ffs(data_width | dma_addr | buf_len);
+               if (mem_width > DWAXIDMAC_TRANS_WIDTH_32)
+                       mem_width = DWAXIDMAC_TRANS_WIDTH_32;
+
+               block_len = axi_block_ts << mem_width;
+               break;
+       case DMA_DEV_TO_MEM:
+               reg_width = __ffs(chan->config.src_addr_width);
+               block_len = axi_block_ts << reg_width;
+               break;
+       default:
+               block_len = 0;
+       }
+
+       return block_len;
+}
+
+static struct dma_async_tx_descriptor *
+dw_axi_dma_chan_prep_cyclic(struct dma_chan *dchan, dma_addr_t dma_addr,
+                           size_t buf_len, size_t period_len,
+                           enum dma_transfer_direction direction,
+                           unsigned long flags)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       struct axi_dma_hw_desc *hw_desc = NULL;
+       struct axi_dma_desc *desc = NULL;
+       dma_addr_t src_addr = dma_addr;
+       u32 num_periods, num_segments;
+       size_t axi_block_len;
+       u32 total_segments;
+       u32 segment_len;
+       unsigned int i;
+       int status;
+       u64 llp = 0;
+       u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+       num_periods = buf_len / period_len;
+
+       axi_block_len = calculate_block_len(chan, dma_addr, buf_len, direction);
+       if (axi_block_len == 0)
+               return NULL;
+
+       num_segments = DIV_ROUND_UP(period_len, axi_block_len);
+       segment_len = DIV_ROUND_UP(period_len, num_segments);
+
+       total_segments = num_periods * num_segments;
+
+       desc = axi_desc_alloc(total_segments);
+       if (unlikely(!desc))
+               goto err_desc_get;
+
+       chan->direction = direction;
+       desc->chan = chan;
+       chan->cyclic = true;
+       desc->length = 0;
+       desc->period_len = period_len;
+
+       for (i = 0; i < total_segments; i++) {
+               hw_desc = &desc->hw_desc[i];
+
+               status = dw_axi_dma_set_hw_desc(chan, hw_desc, src_addr,
+                                               segment_len);
+               if (status < 0)
+                       goto err_desc_get;
+
+               desc->length += hw_desc->len;
+               /* Set end-of-link to the linked descriptor, so that cyclic
+                * callback function can be triggered during interrupt.
+                */
+               set_desc_last(hw_desc);
+
+               src_addr += segment_len;
+       }
+
+       llp = desc->hw_desc[0].llp;
+
+       /* Managed transfer list */
+       do {
+               hw_desc = &desc->hw_desc[--total_segments];
+               write_desc_llp(hw_desc, llp | lms);
+               llp = hw_desc->llp;
+       } while (total_segments);
+
+       dw_axi_dma_set_hw_channel(chan->chip, chan->hw_handshake_num, true);
+
+       return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+err_desc_get:
+       if (desc)
+               axi_desc_put(desc);
+
+       return NULL;
+}
+
+static struct dma_async_tx_descriptor *
+dw_axi_dma_chan_prep_slave_sg(struct dma_chan *dchan, struct scatterlist *sgl,
+                             unsigned int sg_len,
+                             enum dma_transfer_direction direction,
+                             unsigned long flags, void *context)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       struct axi_dma_hw_desc *hw_desc = NULL;
+       struct axi_dma_desc *desc = NULL;
+       u32 num_segments, segment_len;
+       unsigned int loop = 0;
+       struct scatterlist *sg;
+       size_t axi_block_len;
+       u32 len, num_sgs = 0;
+       unsigned int i;
+       dma_addr_t mem;
+       int status;
+       u64 llp = 0;
+       u8 lms = 0; /* Select AXI0 master for LLI fetching */
+
+       if (unlikely(!is_slave_direction(direction) || !sg_len))
+               return NULL;
+
+       mem = sg_dma_address(sgl);
+       len = sg_dma_len(sgl);
+
+       axi_block_len = calculate_block_len(chan, mem, len, direction);
+       if (axi_block_len == 0)
+               return NULL;
+
+       for_each_sg(sgl, sg, sg_len, i)
+               num_sgs += DIV_ROUND_UP(sg_dma_len(sg), axi_block_len);
+
+       desc = axi_desc_alloc(num_sgs);
+       if (unlikely(!desc))
+               goto err_desc_get;
+
+       desc->chan = chan;
+       desc->length = 0;
+       chan->direction = direction;
+
+       for_each_sg(sgl, sg, sg_len, i) {
+               mem = sg_dma_address(sg);
+               len = sg_dma_len(sg);
+               num_segments = DIV_ROUND_UP(sg_dma_len(sg), axi_block_len);
+               segment_len = DIV_ROUND_UP(sg_dma_len(sg), num_segments);
+
+               do {
+                       hw_desc = &desc->hw_desc[loop++];
+                       status = dw_axi_dma_set_hw_desc(chan, hw_desc, mem, segment_len);
+                       if (status < 0)
+                               goto err_desc_get;
+
+                       desc->length += hw_desc->len;
+                       len -= segment_len;
+                       mem += segment_len;
+               } while (len >= segment_len);
+       }
+
+       /* Set end-of-link to the last link descriptor of list */
+       set_desc_last(&desc->hw_desc[num_sgs - 1]);
+
+       /* Managed transfer list */
+       do {
+               hw_desc = &desc->hw_desc[--num_sgs];
+               write_desc_llp(hw_desc, llp | lms);
+               llp = hw_desc->llp;
+       } while (num_sgs);
+
+       dw_axi_dma_set_hw_channel(chan->chip, chan->hw_handshake_num, true);
+
+       return vchan_tx_prep(&chan->vc, &desc->vd, flags);
+
+err_desc_get:
+       if (desc)
+               axi_desc_put(desc);
+
+       return NULL;
 }
 
 static struct dma_async_tx_descriptor *
 dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
                         dma_addr_t src_adr, size_t len, unsigned long flags)
 {
-       struct axi_dma_desc *first = NULL, *desc = NULL, *prev = NULL;
        struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
        size_t block_ts, max_block_ts, xfer_len;
-       u32 xfer_width, reg;
+       struct axi_dma_hw_desc *hw_desc = NULL;
+       struct axi_dma_desc *desc = NULL;
+       u32 xfer_width, reg, num;
+       u64 llp = 0;
        u8 lms = 0; /* Select AXI0 master for LLI fetching */
 
        dev_dbg(chan2dev(chan), "%s: memcpy: src: %pad dst: %pad length: %zd flags: %#lx",
                axi_chan_name(chan), &src_adr, &dst_adr, len, flags);
 
        max_block_ts = chan->chip->dw->hdata->block_size[chan->id];
+       xfer_width = axi_chan_get_xfer_width(chan, src_adr, dst_adr, len);
+       num = DIV_ROUND_UP(len, max_block_ts << xfer_width);
+       desc = axi_desc_alloc(num);
+       if (unlikely(!desc))
+               goto err_desc_get;
 
+       desc->chan = chan;
+       num = 0;
+       desc->length = 0;
        while (len) {
                xfer_len = len;
 
+               hw_desc = &desc->hw_desc[num];
                /*
                 * Take care for the alignment.
                 * Actually source and destination widths can be different, but
@@ -457,13 +880,13 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
                        xfer_len = max_block_ts << xfer_width;
                }
 
-               desc = axi_desc_get(chan);
-               if (unlikely(!desc))
+               hw_desc->lli = axi_desc_get(chan, &hw_desc->llp);
+               if (unlikely(!hw_desc->lli))
                        goto err_desc_get;
 
-               write_desc_sar(desc, src_adr);
-               write_desc_dar(desc, dst_adr);
-               desc->lli.block_ts_lo = cpu_to_le32(block_ts - 1);
+               write_desc_sar(hw_desc, src_adr);
+               write_desc_dar(hw_desc, dst_adr);
+               hw_desc->lli->block_ts_lo = cpu_to_le32(block_ts - 1);
 
                reg = CH_CTL_H_LLI_VALID;
                if (chan->chip->dw->hdata->restrict_axi_burst_len) {
@@ -474,7 +897,7 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
                                CH_CTL_H_AWLEN_EN |
                                burst_len << CH_CTL_H_AWLEN_POS);
                }
-               desc->lli.ctl_hi = cpu_to_le32(reg);
+               hw_desc->lli->ctl_hi = cpu_to_le32(reg);
 
                reg = (DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_DST_MSIZE_POS |
                       DWAXIDMAC_BURST_TRANS_LEN_4 << CH_CTL_L_SRC_MSIZE_POS |
@@ -482,62 +905,68 @@ dma_chan_prep_dma_memcpy(struct dma_chan *dchan, dma_addr_t dst_adr,
                       xfer_width << CH_CTL_L_SRC_WIDTH_POS |
                       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_DST_INC_POS |
                       DWAXIDMAC_CH_CTL_L_INC << CH_CTL_L_SRC_INC_POS);
-               desc->lli.ctl_lo = cpu_to_le32(reg);
+               hw_desc->lli->ctl_lo = cpu_to_le32(reg);
 
-               set_desc_src_master(desc);
-               set_desc_dest_master(desc);
-
-               /* Manage transfer list (xfer_list) */
-               if (!first) {
-                       first = desc;
-               } else {
-                       list_add_tail(&desc->xfer_list, &first->xfer_list);
-                       write_desc_llp(prev, desc->vd.tx.phys | lms);
-               }
-               prev = desc;
+               set_desc_src_master(hw_desc);
+               set_desc_dest_master(hw_desc, desc);
 
+               hw_desc->len = xfer_len;
+               desc->length += hw_desc->len;
                /* update the length and addresses for the next loop cycle */
                len -= xfer_len;
                dst_adr += xfer_len;
                src_adr += xfer_len;
+               num++;
        }
 
-       /* Total len of src/dest sg == 0, so no descriptor were allocated */
-       if (unlikely(!first))
-               return NULL;
-
        /* Set end-of-link to the last link descriptor of list */
-       set_desc_last(desc);
+       set_desc_last(&desc->hw_desc[num - 1]);
+       /* Managed transfer list */
+       do {
+               hw_desc = &desc->hw_desc[--num];
+               write_desc_llp(hw_desc, llp | lms);
+               llp = hw_desc->llp;
+       } while (num);
 
-       return vchan_tx_prep(&chan->vc, &first->vd, flags);
+       return vchan_tx_prep(&chan->vc, &desc->vd, flags);
 
 err_desc_get:
-       if (first)
-               axi_desc_put(first);
+       if (desc)
+               axi_desc_put(desc);
        return NULL;
 }
 
+static int dw_axi_dma_chan_slave_config(struct dma_chan *dchan,
+                                       struct dma_slave_config *config)
+{
+       struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+
+       memcpy(&chan->config, config, sizeof(*config));
+
+       return 0;
+}
+
 static void axi_chan_dump_lli(struct axi_dma_chan *chan,
-                             struct axi_dma_desc *desc)
+                             struct axi_dma_hw_desc *desc)
 {
        dev_err(dchan2dev(&chan->vc.chan),
                "SAR: 0x%llx DAR: 0x%llx LLP: 0x%llx BTS 0x%x CTL: 0x%x:%08x",
-               le64_to_cpu(desc->lli.sar),
-               le64_to_cpu(desc->lli.dar),
-               le64_to_cpu(desc->lli.llp),
-               le32_to_cpu(desc->lli.block_ts_lo),
-               le32_to_cpu(desc->lli.ctl_hi),
-               le32_to_cpu(desc->lli.ctl_lo));
+               le64_to_cpu(desc->lli->sar),
+               le64_to_cpu(desc->lli->dar),
+               le64_to_cpu(desc->lli->llp),
+               le32_to_cpu(desc->lli->block_ts_lo),
+               le32_to_cpu(desc->lli->ctl_hi),
+               le32_to_cpu(desc->lli->ctl_lo));
 }
 
 static void axi_chan_list_dump_lli(struct axi_dma_chan *chan,
                                   struct axi_dma_desc *desc_head)
 {
-       struct axi_dma_desc *desc;
+       int count = atomic_read(&chan->descs_allocated);
+       int i;
 
-       axi_chan_dump_lli(chan, desc_head);
-       list_for_each_entry(desc, &desc_head->xfer_list, xfer_list)
-               axi_chan_dump_lli(chan, desc);
+       for (i = 0; i < count; i++)
+               axi_chan_dump_lli(chan, &desc_head->hw_desc[i]);
 }
 
 static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
@@ -570,8 +999,13 @@ static noinline void axi_chan_handle_err(struct axi_dma_chan *chan, u32 status)
 
 static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
 {
+       int count = atomic_read(&chan->descs_allocated);
+       struct axi_dma_hw_desc *hw_desc;
+       struct axi_dma_desc *desc;
        struct virt_dma_desc *vd;
        unsigned long flags;
+       u64 llp;
+       int i;
 
        spin_lock_irqsave(&chan->vc.lock, flags);
        if (unlikely(axi_chan_is_hw_enable(chan))) {
@@ -582,12 +1016,34 @@ static void axi_chan_block_xfer_complete(struct axi_dma_chan *chan)
 
        /* The completed descriptor currently is in the head of vc list */
        vd = vchan_next_desc(&chan->vc);
-       /* Remove the completed descriptor from issued list before completing */
-       list_del(&vd->node);
-       vchan_cookie_complete(vd);
 
-       /* Submit queued descriptors after processing the completed ones */
-       axi_chan_start_first_queued(chan);
+       if (chan->cyclic) {
+               desc = vd_to_axi_desc(vd);
+               if (desc) {
+                       llp = lo_hi_readq(chan->chan_regs + CH_LLP);
+                       for (i = 0; i < count; i++) {
+                               hw_desc = &desc->hw_desc[i];
+                               if (hw_desc->llp == llp) {
+                                       axi_chan_irq_clear(chan, hw_desc->lli->status_lo);
+                                       hw_desc->lli->ctl_hi |= CH_CTL_H_LLI_VALID;
+                                       desc->completed_blocks = i;
+
+                                       if (((hw_desc->len * (i + 1)) % desc->period_len) == 0)
+                                               vchan_cyclic_callback(vd);
+                                       break;
+                               }
+                       }
+
+                       axi_chan_enable(chan);
+               }
+       } else {
+               /* Remove the completed descriptor from issued list before completing */
+               list_del(&vd->node);
+               vchan_cookie_complete(vd);
+
+               /* Submit queued descriptors after processing the completed ones */
+               axi_chan_start_first_queued(chan);
+       }
 
        spin_unlock_irqrestore(&chan->vc.lock, flags);
 }
@@ -627,15 +1083,31 @@ static irqreturn_t dw_axi_dma_interrupt(int irq, void *dev_id)
 static int dma_chan_terminate_all(struct dma_chan *dchan)
 {
        struct axi_dma_chan *chan = dchan_to_axi_dma_chan(dchan);
+       u32 chan_active = BIT(chan->id) << DMAC_CHAN_EN_SHIFT;
        unsigned long flags;
+       u32 val;
+       int ret;
        LIST_HEAD(head);
 
-       spin_lock_irqsave(&chan->vc.lock, flags);
-
        axi_chan_disable(chan);
 
+       ret = readl_poll_timeout_atomic(chan->chip->regs + DMAC_CHEN, val,
+                                       !(val & chan_active), 1000, 10000);
+       if (ret == -ETIMEDOUT)
+               dev_warn(dchan2dev(dchan),
+                        "%s failed to stop\n", axi_chan_name(chan));
+
+       if (chan->direction != DMA_MEM_TO_MEM)
+               dw_axi_dma_set_hw_channel(chan->chip,
+                                         chan->hw_handshake_num, false);
+       if (chan->direction == DMA_MEM_TO_DEV)
+               dw_axi_dma_set_byte_halfword(chan, false);
+
+       spin_lock_irqsave(&chan->vc.lock, flags);
+
        vchan_get_all_descriptors(&chan->vc, &head);
 
+       chan->cyclic = false;
        spin_unlock_irqrestore(&chan->vc.lock, flags);
 
        vchan_dma_desc_free_list(&chan->vc, &head);
@@ -746,6 +1218,22 @@ static int __maybe_unused axi_dma_runtime_resume(struct device *dev)
        return axi_dma_resume(chip);
 }
 
+static struct dma_chan *dw_axi_dma_of_xlate(struct of_phandle_args *dma_spec,
+                                           struct of_dma *ofdma)
+{
+       struct dw_axi_dma *dw = ofdma->of_dma_data;
+       struct axi_dma_chan *chan;
+       struct dma_chan *dchan;
+
+       dchan = dma_get_any_slave_channel(&dw->dma);
+       if (!dchan)
+               return NULL;
+
+       chan = dchan_to_axi_dma_chan(dchan);
+       chan->hw_handshake_num = dma_spec->args[0];
+       return dchan;
+}
+
 static int parse_device_properties(struct axi_dma_chip *chip)
 {
        struct device *dev = chip->dev;
@@ -816,6 +1304,7 @@ static int parse_device_properties(struct axi_dma_chip *chip)
 
 static int dw_probe(struct platform_device *pdev)
 {
+       struct device_node *node = pdev->dev.of_node;
        struct axi_dma_chip *chip;
        struct resource *mem;
        struct dw_axi_dma *dw;
@@ -848,6 +1337,12 @@ static int dw_probe(struct platform_device *pdev)
        if (IS_ERR(chip->regs))
                return PTR_ERR(chip->regs);
 
+       if (of_device_is_compatible(node, "intel,kmb-axi-dma")) {
+               chip->apb_regs = devm_platform_ioremap_resource(pdev, 1);
+               if (IS_ERR(chip->apb_regs))
+                       return PTR_ERR(chip->apb_regs);
+       }
+
        chip->core_clk = devm_clk_get(chip->dev, "core-clk");
        if (IS_ERR(chip->core_clk))
                return PTR_ERR(chip->core_clk);
@@ -870,13 +1365,6 @@ static int dw_probe(struct platform_device *pdev)
        if (ret)
                return ret;
 
-       /* Lli address must be aligned to a 64-byte boundary */
-       dw->desc_pool = dmam_pool_create(KBUILD_MODNAME, chip->dev,
-                                        sizeof(struct axi_dma_desc), 64, 0);
-       if (!dw->desc_pool) {
-               dev_err(chip->dev, "No memory for descriptors dma pool\n");
-               return -ENOMEM;
-       }
 
        INIT_LIST_HEAD(&dw->dma.channels);
        for (i = 0; i < hdata->nr_channels; i++) {
@@ -893,13 +1381,16 @@ static int dw_probe(struct platform_device *pdev)
 
        /* Set capabilities */
        dma_cap_set(DMA_MEMCPY, dw->dma.cap_mask);
+       dma_cap_set(DMA_SLAVE, dw->dma.cap_mask);
+       dma_cap_set(DMA_CYCLIC, dw->dma.cap_mask);
 
        /* DMA capabilities */
        dw->dma.chancnt = hdata->nr_channels;
        dw->dma.src_addr_widths = AXI_DMA_BUSWIDTHS;
        dw->dma.dst_addr_widths = AXI_DMA_BUSWIDTHS;
        dw->dma.directions = BIT(DMA_MEM_TO_MEM);
-       dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+       dw->dma.directions |= BIT(DMA_MEM_TO_DEV) | BIT(DMA_DEV_TO_MEM);
+       dw->dma.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
 
        dw->dma.dev = chip->dev;
        dw->dma.device_tx_status = dma_chan_tx_status;
@@ -912,7 +1403,18 @@ static int dw_probe(struct platform_device *pdev)
        dw->dma.device_free_chan_resources = dma_chan_free_chan_resources;
 
        dw->dma.device_prep_dma_memcpy = dma_chan_prep_dma_memcpy;
+       dw->dma.device_synchronize = dw_axi_dma_synchronize;
+       dw->dma.device_config = dw_axi_dma_chan_slave_config;
+       dw->dma.device_prep_slave_sg = dw_axi_dma_chan_prep_slave_sg;
+       dw->dma.device_prep_dma_cyclic = dw_axi_dma_chan_prep_cyclic;
 
+       /*
+        * Synopsis DesignWare AxiDMA datasheet mentioned Maximum
+        * supported blocks is 1024. Device register width is 4 bytes.
+        * Therefore, set constraint to 1024 * 4.
+        */
+       dw->dma.dev->dma_parms = &dw->dma_parms;
+       dma_set_max_seg_size(&pdev->dev, MAX_BLOCK_SIZE);
        platform_set_drvdata(pdev, chip);
 
        pm_runtime_enable(chip->dev);
@@ -935,6 +1437,13 @@ static int dw_probe(struct platform_device *pdev)
        if (ret)
                goto err_pm_disable;
 
+       /* Register with OF helpers for DMA lookups */
+       ret = of_dma_controller_register(pdev->dev.of_node,
+                                        dw_axi_dma_of_xlate, dw);
+       if (ret < 0)
+               dev_warn(&pdev->dev,
+                        "Failed to register OF DMA controller, fallback to MEM_TO_MEM mode\n");
+
        dev_info(chip->dev, "DesignWare AXI DMA Controller, %d channels\n",
                 dw->hdata->nr_channels);
 
@@ -968,6 +1477,8 @@ static int dw_remove(struct platform_device *pdev)
 
        devm_free_irq(chip->dev, chip->irq, chip);
 
+       of_dma_controller_free(chip->dev->of_node);
+
        list_for_each_entry_safe(chan, _chan, &dw->dma.channels,
                        vc.chan.device_node) {
                list_del(&chan->vc.chan.device_node);
@@ -983,6 +1494,7 @@ static const struct dev_pm_ops dw_axi_dma_pm_ops = {
 
 static const struct of_device_id dw_dma_of_id_table[] = {
        { .compatible = "snps,axi-dma-1.01a" },
+       { .compatible = "intel,kmb-axi-dma" },
        {}
 };
 MODULE_DEVICE_TABLE(of, dw_dma_of_id_table);
index 18b6014..b698978 100644 (file)
@@ -37,10 +37,16 @@ struct axi_dma_chan {
        struct axi_dma_chip             *chip;
        void __iomem                    *chan_regs;
        u8                              id;
+       u8                              hw_handshake_num;
        atomic_t                        descs_allocated;
 
+       struct dma_pool                 *desc_pool;
        struct virt_dma_chan            vc;
 
+       struct axi_dma_desc             *desc;
+       struct dma_slave_config         config;
+       enum dma_transfer_direction     direction;
+       bool                            cyclic;
        /* these other elements are all protected by vc.lock */
        bool                            is_paused;
 };
@@ -48,7 +54,7 @@ struct axi_dma_chan {
 struct dw_axi_dma {
        struct dma_device       dma;
        struct dw_axi_dma_hcfg  *hdata;
-       struct dma_pool         *desc_pool;
+       struct device_dma_parameters    dma_parms;
 
        /* channels */
        struct axi_dma_chan     *chan;
@@ -58,6 +64,7 @@ struct axi_dma_chip {
        struct device           *dev;
        int                     irq;
        void __iomem            *regs;
+       void __iomem            *apb_regs;
        struct clk              *core_clk;
        struct clk              *cfgr_clk;
        struct dw_axi_dma       *dw;
@@ -80,12 +87,20 @@ struct __packed axi_dma_lli {
        __le32          reserved_hi;
 };
 
+struct axi_dma_hw_desc {
+       struct axi_dma_lli      *lli;
+       dma_addr_t              llp;
+       u32                     len;
+};
+
 struct axi_dma_desc {
-       struct axi_dma_lli              lli;
+       struct axi_dma_hw_desc  *hw_desc;
 
        struct virt_dma_desc            vd;
        struct axi_dma_chan             *chan;
-       struct list_head                xfer_list;
+       u32                             completed_blocks;
+       u32                             length;
+       u32                             period_len;
 };
 
 static inline struct device *dchan2dev(struct dma_chan *dchan)
@@ -157,6 +172,19 @@ static inline struct axi_dma_chan *dchan_to_axi_dma_chan(struct dma_chan *dchan)
 #define CH_INTSIGNAL_ENA       0x090 /* R/W Chan Interrupt Signal Enable */
 #define CH_INTCLEAR            0x098 /* W Chan Interrupt Clear */
 
+/* These Apb registers are used by Intel KeemBay SoC */
+#define DMAC_APB_CFG           0x000 /* DMAC Apb Configuration Register */
+#define DMAC_APB_STAT          0x004 /* DMAC Apb Status Register */
+#define DMAC_APB_DEBUG_STAT_0  0x008 /* DMAC Apb Debug Status Register 0 */
+#define DMAC_APB_DEBUG_STAT_1  0x00C /* DMAC Apb Debug Status Register 1 */
+#define DMAC_APB_HW_HS_SEL_0   0x010 /* DMAC Apb HW HS register 0 */
+#define DMAC_APB_HW_HS_SEL_1   0x014 /* DMAC Apb HW HS register 1 */
+#define DMAC_APB_LPI           0x018 /* DMAC Apb Low Power Interface Reg */
+#define DMAC_APB_BYTE_WR_CH_EN 0x01C /* DMAC Apb Byte Write Enable */
+#define DMAC_APB_HALFWORD_WR_CH_EN     0x020 /* DMAC Halfword write enables */
+
+#define UNUSED_CHANNEL         0x3F /* Set unused DMA channel to 0x3F */
+#define MAX_BLOCK_SIZE         0x1000 /* 1024 blocks * 4 bytes data width */
 
 /* DMAC_CFG */
 #define DMAC_EN_POS                    0
index 0feb323..f8459cc 100644 (file)
@@ -1214,6 +1214,7 @@ static int fsldma_of_probe(struct platform_device *op)
 {
        struct fsldma_device *fdev;
        struct device_node *child;
+       unsigned int i;
        int err;
 
        fdev = kzalloc(sizeof(*fdev), GFP_KERNEL);
@@ -1292,6 +1293,10 @@ static int fsldma_of_probe(struct platform_device *op)
        return 0;
 
 out_free_fdev:
+       for (i = 0; i < FSL_DMA_MAX_CHANS_PER_DEVICE; i++) {
+               if (fdev->chan[i])
+                       fsl_dma_chan_remove(fdev->chan[i]);
+       }
        irq_dispose_mapping(fdev->irq);
        iounmap(fdev->regs);
 out_free:
@@ -1314,6 +1319,7 @@ static int fsldma_of_remove(struct platform_device *op)
                if (fdev->chan[i])
                        fsl_dma_chan_remove(fdev->chan[i]);
        }
+       irq_dispose_mapping(fdev->irq);
 
        iounmap(fdev->regs);
        kfree(fdev);
index 07cc732..9045a6f 100644 (file)
 static irqreturn_t hsu_pci_irq(int irq, void *dev)
 {
        struct hsu_dma_chip *chip = dev;
-       struct pci_dev *pdev = to_pci_dev(chip->dev);
        u32 dmaisr;
        u32 status;
        unsigned short i;
        int ret = 0;
        int err;
 
-       /*
-        * On Intel Tangier B0 and Anniedale the interrupt line, disregarding
-        * to have different numbers, is shared between HSU DMA and UART IPs.
-        * Thus on such SoCs we are expecting that IRQ handler is called in
-        * UART driver only.
-        */
-       if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA)
-               return IRQ_HANDLED;
-
        dmaisr = readl(chip->regs + HSU_PCI_DMAISR);
        for (i = 0; i < chip->hsu->nr_channels; i++) {
                if (dmaisr & 0x1) {
@@ -105,6 +95,17 @@ static int hsu_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        if (ret)
                goto err_register_irq;
 
+       /*
+        * On Intel Tangier B0 and Anniedale the interrupt line, disregarding
+        * to have different numbers, is shared between HSU DMA and UART IPs.
+        * Thus on such SoCs we are expecting that IRQ handler is called in
+        * UART driver only. Instead of handling the spurious interrupt
+        * from HSU DMA here and waste CPU time and delay HSU UART interrupt
+        * handling, disable the interrupt entirely.
+        */
+       if (pdev->device == PCI_DEVICE_ID_INTEL_MRFLD_HSU_DMA)
+               disable_irq_nosync(chip->irq);
+
        pci_set_drvdata(pdev, chip);
 
        return 0;
index 71fd6e4..a15e501 100644 (file)
@@ -165,6 +165,7 @@ int idxd_register_dma_device(struct idxd_device *idxd)
        INIT_LIST_HEAD(&dma->channels);
        dma->dev = &idxd->pdev->dev;
 
+       dma_cap_set(DMA_PRIVATE, dma->cap_mask);
        dma_cap_set(DMA_COMPLETION_NO_ORDER, dma->cap_mask);
        dma->device_release = idxd_dma_release;
 
index fa04acd..085a0c3 100644 (file)
@@ -26,12 +26,16 @@ MODULE_VERSION(IDXD_DRIVER_VERSION);
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Intel Corporation");
 
+static bool sva = true;
+module_param(sva, bool, 0644);
+MODULE_PARM_DESC(sva, "Toggle SVA support on/off");
+
 #define DRV_NAME "idxd"
 
 bool support_enqcmd;
 
 static struct idr idxd_idrs[IDXD_TYPE_MAX];
-static struct mutex idxd_idr_lock;
+static DEFINE_MUTEX(idxd_idr_lock);
 
 static struct pci_device_id idxd_pci_tbl[] = {
        /* DSA ver 1.0 platforms */
@@ -341,12 +345,14 @@ static int idxd_probe(struct idxd_device *idxd)
 
        dev_dbg(dev, "IDXD reset complete\n");
 
-       if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM)) {
+       if (IS_ENABLED(CONFIG_INTEL_IDXD_SVM) && sva) {
                rc = idxd_enable_system_pasid(idxd);
                if (rc < 0)
                        dev_warn(dev, "Failed to enable PASID. No SVA support: %d\n", rc);
                else
                        set_bit(IDXD_FLAG_PASID_ENABLED, &idxd->flags);
+       } else if (!sva) {
+               dev_warn(dev, "User forced SVA off via module param.\n");
        }
 
        idxd_read_caps(idxd);
@@ -547,7 +553,6 @@ static int __init idxd_init_module(void)
        else
                support_enqcmd = true;
 
-       mutex_init(&idxd_idr_lock);
        for (i = 0; i < IDXD_TYPE_MAX; i++)
                idr_init(&idxd_idrs[i]);
 
index 41ba21e..d5590c0 100644 (file)
@@ -1952,8 +1952,6 @@ static struct dma_chan *sdma_xlate(struct of_phandle_args *dma_spec,
 
 static int sdma_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id =
-                       of_match_device(sdma_dt_ids, &pdev->dev);
        struct device_node *np = pdev->dev.of_node;
        struct device_node *spba_bus;
        const char *fw_name;
@@ -1961,17 +1959,9 @@ static int sdma_probe(struct platform_device *pdev)
        int irq;
        struct resource *iores;
        struct resource spba_res;
-       struct sdma_platform_data *pdata = dev_get_platdata(&pdev->dev);
        int i;
        struct sdma_engine *sdma;
        s32 *saddr_arr;
-       const struct sdma_driver_data *drvdata = NULL;
-
-       drvdata = of_id->data;
-       if (!drvdata) {
-               dev_err(&pdev->dev, "unable to find driver data\n");
-               return -EINVAL;
-       }
 
        ret = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(32));
        if (ret)
@@ -1984,7 +1974,7 @@ static int sdma_probe(struct platform_device *pdev)
        spin_lock_init(&sdma->channel_0_lock);
 
        sdma->dev = &pdev->dev;
-       sdma->drvdata = drvdata;
+       sdma->drvdata = of_device_get_match_data(sdma->dev);
 
        irq = platform_get_irq(pdev, 0);
        if (irq < 0)
@@ -2063,8 +2053,6 @@ static int sdma_probe(struct platform_device *pdev)
 
        if (sdma->drvdata->script_addrs)
                sdma_add_scripts(sdma, sdma->drvdata->script_addrs);
-       if (pdata && pdata->script_addrs)
-               sdma_add_scripts(sdma, pdata->script_addrs);
 
        sdma->dma_device.dev = &pdev->dev;
 
@@ -2110,30 +2098,18 @@ static int sdma_probe(struct platform_device *pdev)
        }
 
        /*
-        * Kick off firmware loading as the very last step:
-        * attempt to load firmware only if we're not on the error path, because
-        * the firmware callback requires a fully functional and allocated sdma
-        * instance.
+        * Because that device tree does not encode ROM script address,
+        * the RAM script in firmware is mandatory for device tree
+        * probe, otherwise it fails.
         */
-       if (pdata) {
-               ret = sdma_get_firmware(sdma, pdata->fw_name);
-               if (ret)
-                       dev_warn(&pdev->dev, "failed to get firmware from platform data\n");
+       ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
+                                     &fw_name);
+       if (ret) {
+               dev_warn(&pdev->dev, "failed to get firmware name\n");
        } else {
-               /*
-                * Because that device tree does not encode ROM script address,
-                * the RAM script in firmware is mandatory for device tree
-                * probe, otherwise it fails.
-                */
-               ret = of_property_read_string(np, "fsl,sdma-ram-script-name",
-                                             &fw_name);
-               if (ret) {
-                       dev_warn(&pdev->dev, "failed to get firmware name\n");
-               } else {
-                       ret = sdma_get_firmware(sdma, fw_name);
-                       if (ret)
-                               dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
-               }
+               ret = sdma_get_firmware(sdma, fw_name);
+               if (ret)
+                       dev_warn(&pdev->dev, "failed to get firmware from device tree\n");
        }
 
        return 0;
diff --git a/drivers/dma/lgm/Kconfig b/drivers/dma/lgm/Kconfig
new file mode 100644 (file)
index 0000000..9194330
--- /dev/null
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0-only
+config INTEL_LDMA
+       bool "Lightning Mountain centralized DMA controllers"
+       depends on X86 || COMPILE_TEST
+       select DMA_ENGINE
+       select DMA_VIRTUAL_CHANNELS
+       help
+         Enable support for Intel Lightning Mountain SOC DMA controllers.
+         These controllers provide DMA capabilities for a variety of on-chip
+         devices such as HSNAND and GSWIP (Gigabit Switch IP).
diff --git a/drivers/dma/lgm/Makefile b/drivers/dma/lgm/Makefile
new file mode 100644 (file)
index 0000000..f318a8e
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_INTEL_LDMA)       += lgm-dma.o
diff --git a/drivers/dma/lgm/lgm-dma.c b/drivers/dma/lgm/lgm-dma.c
new file mode 100644 (file)
index 0000000..efe8bd3
--- /dev/null
@@ -0,0 +1,1739 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Lightning Mountain centralized DMA controller driver
+ *
+ * Copyright (c) 2016 - 2020 Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/dma-mapping.h>
+#include <linux/dmapool.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/iopoll.h>
+#include <linux/of_dma.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/reset.h>
+
+#include "../dmaengine.h"
+#include "../virt-dma.h"
+
+#define DRIVER_NAME                    "lgm-dma"
+
+#define DMA_ID                         0x0008
+#define DMA_ID_REV                     GENMASK(7, 0)
+#define DMA_ID_PNR                     GENMASK(19, 16)
+#define DMA_ID_CHNR                    GENMASK(26, 20)
+#define DMA_ID_DW_128B                 BIT(27)
+#define DMA_ID_AW_36B                  BIT(28)
+#define DMA_VER32                      0x32
+#define DMA_VER31                      0x31
+#define DMA_VER22                      0x0A
+
+#define DMA_CTRL                       0x0010
+#define DMA_CTRL_RST                   BIT(0)
+#define DMA_CTRL_DSRAM_PATH            BIT(1)
+#define DMA_CTRL_DBURST_WR             BIT(3)
+#define DMA_CTRL_VLD_DF_ACK            BIT(4)
+#define DMA_CTRL_CH_FL                 BIT(6)
+#define DMA_CTRL_DS_FOD                        BIT(7)
+#define DMA_CTRL_DRB                   BIT(8)
+#define DMA_CTRL_ENBE                  BIT(9)
+#define DMA_CTRL_DESC_TMOUT_CNT_V31    GENMASK(27, 16)
+#define DMA_CTRL_DESC_TMOUT_EN_V31     BIT(30)
+#define DMA_CTRL_PKTARB                        BIT(31)
+
+#define DMA_CPOLL                      0x0014
+#define DMA_CPOLL_CNT                  GENMASK(15, 4)
+#define DMA_CPOLL_EN                   BIT(31)
+
+#define DMA_CS                         0x0018
+#define DMA_CS_MASK                    GENMASK(5, 0)
+
+#define DMA_CCTRL                      0x001C
+#define DMA_CCTRL_ON                   BIT(0)
+#define DMA_CCTRL_RST                  BIT(1)
+#define DMA_CCTRL_CH_POLL_EN           BIT(2)
+#define DMA_CCTRL_CH_ABC               BIT(3) /* Adaptive Burst Chop */
+#define DMA_CDBA_MSB                   GENMASK(7, 4)
+#define DMA_CCTRL_DIR_TX               BIT(8)
+#define DMA_CCTRL_CLASS                        GENMASK(11, 9)
+#define DMA_CCTRL_CLASSH               GENMASK(19, 18)
+#define DMA_CCTRL_WR_NP_EN             BIT(21)
+#define DMA_CCTRL_PDEN                 BIT(23)
+#define DMA_MAX_CLASS                  (SZ_32 - 1)
+
+#define DMA_CDBA                       0x0020
+#define DMA_CDLEN                      0x0024
+#define DMA_CIS                                0x0028
+#define DMA_CIE                                0x002C
+#define DMA_CI_EOP                     BIT(1)
+#define DMA_CI_DUR                     BIT(2)
+#define DMA_CI_DESCPT                  BIT(3)
+#define DMA_CI_CHOFF                   BIT(4)
+#define DMA_CI_RDERR                   BIT(5)
+#define DMA_CI_ALL                                                     \
+       (DMA_CI_EOP | DMA_CI_DUR | DMA_CI_DESCPT | DMA_CI_CHOFF | DMA_CI_RDERR)
+
+#define DMA_PS                         0x0040
+#define DMA_PCTRL                      0x0044
+#define DMA_PCTRL_RXBL16               BIT(0)
+#define DMA_PCTRL_TXBL16               BIT(1)
+#define DMA_PCTRL_RXBL                 GENMASK(3, 2)
+#define DMA_PCTRL_RXBL_8               3
+#define DMA_PCTRL_TXBL                 GENMASK(5, 4)
+#define DMA_PCTRL_TXBL_8               3
+#define DMA_PCTRL_PDEN                 BIT(6)
+#define DMA_PCTRL_RXBL32               BIT(7)
+#define DMA_PCTRL_RXENDI               GENMASK(9, 8)
+#define DMA_PCTRL_TXENDI               GENMASK(11, 10)
+#define DMA_PCTRL_TXBL32               BIT(15)
+#define DMA_PCTRL_MEM_FLUSH            BIT(16)
+
+#define DMA_IRNEN1                     0x00E8
+#define DMA_IRNCR1                     0x00EC
+#define DMA_IRNEN                      0x00F4
+#define DMA_IRNCR                      0x00F8
+#define DMA_C_DP_TICK                  0x100
+#define DMA_C_DP_TICK_TIKNARB          GENMASK(15, 0)
+#define DMA_C_DP_TICK_TIKARB           GENMASK(31, 16)
+
+#define DMA_C_HDRM                     0x110
+/*
+ * If header mode is set in DMA descriptor,
+ *   If bit 30 is disabled, HDR_LEN must be configured according to channel
+ *     requirement.
+ *   If bit 30 is enabled(checksum with heade mode), HDR_LEN has no need to
+ *     be configured. It will enable check sum for switch
+ * If header mode is not set in DMA descriptor,
+ *   This register setting doesn't matter
+ */
+#define DMA_C_HDRM_HDR_SUM             BIT(30)
+
+#define DMA_C_BOFF                     0x120
+#define DMA_C_BOFF_BOF_LEN             GENMASK(7, 0)
+#define DMA_C_BOFF_EN                  BIT(31)
+
+#define DMA_ORRC                       0x190
+#define DMA_ORRC_ORRCNT                        GENMASK(8, 4)
+#define DMA_ORRC_EN                    BIT(31)
+
+#define DMA_C_ENDIAN                   0x200
+#define DMA_C_END_DATAENDI             GENMASK(1, 0)
+#define DMA_C_END_DE_EN                        BIT(7)
+#define DMA_C_END_DESENDI              GENMASK(9, 8)
+#define DMA_C_END_DES_EN               BIT(16)
+
+/* DMA controller capability */
+#define DMA_ADDR_36BIT                 BIT(0)
+#define DMA_DATA_128BIT                        BIT(1)
+#define DMA_CHAN_FLOW_CTL              BIT(2)
+#define DMA_DESC_FOD                   BIT(3)
+#define DMA_DESC_IN_SRAM               BIT(4)
+#define DMA_EN_BYTE_EN                 BIT(5)
+#define DMA_DBURST_WR                  BIT(6)
+#define DMA_VALID_DESC_FETCH_ACK       BIT(7)
+#define DMA_DFT_DRB                    BIT(8)
+
+#define DMA_ORRC_MAX_CNT               (SZ_32 - 1)
+#define DMA_DFT_POLL_CNT               SZ_4
+#define DMA_DFT_BURST_V22              SZ_2
+#define DMA_BURSTL_8DW                 SZ_8
+#define DMA_BURSTL_16DW                        SZ_16
+#define DMA_BURSTL_32DW                        SZ_32
+#define DMA_DFT_BURST                  DMA_BURSTL_16DW
+#define DMA_MAX_DESC_NUM               (SZ_8K - 1)
+#define DMA_CHAN_BOFF_MAX              (SZ_256 - 1)
+#define DMA_DFT_ENDIAN                 0
+
+#define DMA_DFT_DESC_TCNT              50
+#define DMA_HDR_LEN_MAX                        (SZ_16K - 1)
+
+/* DMA flags */
+#define DMA_TX_CH                      BIT(0)
+#define DMA_RX_CH                      BIT(1)
+#define DEVICE_ALLOC_DESC              BIT(2)
+#define CHAN_IN_USE                    BIT(3)
+#define DMA_HW_DESC                    BIT(4)
+
+/* Descriptor fields */
+#define DESC_DATA_LEN                  GENMASK(15, 0)
+#define DESC_BYTE_OFF                  GENMASK(25, 23)
+#define DESC_EOP                       BIT(28)
+#define DESC_SOP                       BIT(29)
+#define DESC_C                         BIT(30)
+#define DESC_OWN                       BIT(31)
+
+#define DMA_CHAN_RST                   1
+#define DMA_MAX_SIZE                   (BIT(16) - 1)
+#define MAX_LOWER_CHANS                        32
+#define MASK_LOWER_CHANS               GENMASK(4, 0)
+#define DMA_OWN                                1
+#define HIGH_4_BITS                    GENMASK(3, 0)
+#define DMA_DFT_DESC_NUM               1
+#define DMA_PKT_DROP_DIS               0
+
+enum ldma_chan_on_off {
+       DMA_CH_OFF = 0,
+       DMA_CH_ON = 1,
+};
+
+enum {
+       DMA_TYPE_TX = 0,
+       DMA_TYPE_RX,
+       DMA_TYPE_MCPY,
+};
+
+struct ldma_dev;
+struct ldma_port;
+
+struct ldma_chan {
+       struct virt_dma_chan    vchan;
+       struct ldma_port        *port; /* back pointer */
+       char                    name[8]; /* Channel name */
+       int                     nr; /* Channel id in hardware */
+       u32                     flags; /* central way or channel based way */
+       enum ldma_chan_on_off   onoff;
+       dma_addr_t              desc_phys;
+       void                    *desc_base; /* Virtual address */
+       u32                     desc_cnt; /* Number of descriptors */
+       int                     rst;
+       u32                     hdrm_len;
+       bool                    hdrm_csum;
+       u32                     boff_len;
+       u32                     data_endian;
+       u32                     desc_endian;
+       bool                    pden;
+       bool                    desc_rx_np;
+       bool                    data_endian_en;
+       bool                    desc_endian_en;
+       bool                    abc_en;
+       bool                    desc_init;
+       struct dma_pool         *desc_pool; /* Descriptors pool */
+       u32                     desc_num;
+       struct dw2_desc_sw      *ds;
+       struct work_struct      work;
+       struct dma_slave_config config;
+};
+
+struct ldma_port {
+       struct ldma_dev         *ldev; /* back pointer */
+       u32                     portid;
+       u32                     rxbl;
+       u32                     txbl;
+       u32                     rxendi;
+       u32                     txendi;
+       u32                     pkt_drop;
+};
+
+/* Instance specific data */
+struct ldma_inst_data {
+       bool                    desc_in_sram;
+       bool                    chan_fc;
+       bool                    desc_fod; /* Fetch On Demand */
+       bool                    valid_desc_fetch_ack;
+       u32                     orrc; /* Outstanding read count */
+       const char              *name;
+       u32                     type;
+};
+
+struct ldma_dev {
+       struct device           *dev;
+       void __iomem            *base;
+       struct reset_control    *rst;
+       struct clk              *core_clk;
+       struct dma_device       dma_dev;
+       u32                     ver;
+       int                     irq;
+       struct ldma_port        *ports;
+       struct ldma_chan        *chans; /* channel list on this DMA or port */
+       spinlock_t              dev_lock; /* Controller register exclusive */
+       u32                     chan_nrs;
+       u32                     port_nrs;
+       u32                     channels_mask;
+       u32                     flags;
+       u32                     pollcnt;
+       const struct ldma_inst_data *inst;
+       struct workqueue_struct *wq;
+};
+
+struct dw2_desc {
+       u32 field;
+       u32 addr;
+} __packed __aligned(8);
+
+struct dw2_desc_sw {
+       struct virt_dma_desc    vdesc;
+       struct ldma_chan        *chan;
+       dma_addr_t              desc_phys;
+       size_t                  desc_cnt;
+       size_t                  size;
+       struct dw2_desc         *desc_hw;
+};
+
+static inline void
+ldma_update_bits(struct ldma_dev *d, u32 mask, u32 val, u32 ofs)
+{
+       u32 old_val, new_val;
+
+       old_val = readl(d->base +  ofs);
+       new_val = (old_val & ~mask) | (val & mask);
+
+       if (new_val != old_val)
+               writel(new_val, d->base + ofs);
+}
+
+static inline struct ldma_chan *to_ldma_chan(struct dma_chan *chan)
+{
+       return container_of(chan, struct ldma_chan, vchan.chan);
+}
+
+static inline struct ldma_dev *to_ldma_dev(struct dma_device *dma_dev)
+{
+       return container_of(dma_dev, struct ldma_dev, dma_dev);
+}
+
+static inline struct dw2_desc_sw *to_lgm_dma_desc(struct virt_dma_desc *vdesc)
+{
+       return container_of(vdesc, struct dw2_desc_sw, vdesc);
+}
+
+static inline bool ldma_chan_tx(struct ldma_chan *c)
+{
+       return !!(c->flags & DMA_TX_CH);
+}
+
+static inline bool ldma_chan_is_hw_desc(struct ldma_chan *c)
+{
+       return !!(c->flags & DMA_HW_DESC);
+}
+
+static void ldma_dev_reset(struct ldma_dev *d)
+
+{
+       unsigned long flags;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CTRL_RST, DMA_CTRL_RST, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_pkt_arb_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask = DMA_CTRL_PKTARB;
+       u32 val = enable ? DMA_CTRL_PKTARB : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_sram_desc_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask = DMA_CTRL_DSRAM_PATH;
+       u32 val = enable ? DMA_CTRL_DSRAM_PATH : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_chan_flow_ctl_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask, val;
+
+       if (d->inst->type != DMA_TYPE_TX)
+               return;
+
+       mask = DMA_CTRL_CH_FL;
+       val = enable ? DMA_CTRL_CH_FL : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_global_polling_enable(struct ldma_dev *d)
+{
+       unsigned long flags;
+       u32 mask = DMA_CPOLL_EN | DMA_CPOLL_CNT;
+       u32 val = DMA_CPOLL_EN;
+
+       val |= FIELD_PREP(DMA_CPOLL_CNT, d->pollcnt);
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CPOLL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_desc_fetch_on_demand_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask, val;
+
+       if (d->inst->type == DMA_TYPE_MCPY)
+               return;
+
+       mask = DMA_CTRL_DS_FOD;
+       val = enable ? DMA_CTRL_DS_FOD : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_byte_enable_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask = DMA_CTRL_ENBE;
+       u32 val = enable ? DMA_CTRL_ENBE : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_orrc_cfg(struct ldma_dev *d)
+{
+       unsigned long flags;
+       u32 val = 0;
+       u32 mask;
+
+       if (d->inst->type == DMA_TYPE_RX)
+               return;
+
+       mask = DMA_ORRC_EN | DMA_ORRC_ORRCNT;
+       if (d->inst->orrc > 0 && d->inst->orrc <= DMA_ORRC_MAX_CNT)
+               val = DMA_ORRC_EN | FIELD_PREP(DMA_ORRC_ORRCNT, d->inst->orrc);
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_ORRC);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_df_tout_cfg(struct ldma_dev *d, bool enable, int tcnt)
+{
+       u32 mask = DMA_CTRL_DESC_TMOUT_CNT_V31;
+       unsigned long flags;
+       u32 val;
+
+       if (enable)
+               val = DMA_CTRL_DESC_TMOUT_EN_V31 | FIELD_PREP(DMA_CTRL_DESC_TMOUT_CNT_V31, tcnt);
+       else
+               val = 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_dburst_wr_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask, val;
+
+       if (d->inst->type != DMA_TYPE_RX && d->inst->type != DMA_TYPE_MCPY)
+               return;
+
+       mask = DMA_CTRL_DBURST_WR;
+       val = enable ? DMA_CTRL_DBURST_WR : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_vld_fetch_ack_cfg(struct ldma_dev *d, bool enable)
+{
+       unsigned long flags;
+       u32 mask, val;
+
+       if (d->inst->type != DMA_TYPE_TX)
+               return;
+
+       mask = DMA_CTRL_VLD_DF_ACK;
+       val = enable ? DMA_CTRL_VLD_DF_ACK : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_dev_drb_cfg(struct ldma_dev *d, int enable)
+{
+       unsigned long flags;
+       u32 mask = DMA_CTRL_DRB;
+       u32 val = enable ? DMA_CTRL_DRB : 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, mask, val, DMA_CTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static int ldma_dev_cfg(struct ldma_dev *d)
+{
+       bool enable;
+
+       ldma_dev_pkt_arb_cfg(d, true);
+       ldma_dev_global_polling_enable(d);
+
+       enable = !!(d->flags & DMA_DFT_DRB);
+       ldma_dev_drb_cfg(d, enable);
+
+       enable = !!(d->flags & DMA_EN_BYTE_EN);
+       ldma_dev_byte_enable_cfg(d, enable);
+
+       enable = !!(d->flags & DMA_CHAN_FLOW_CTL);
+       ldma_dev_chan_flow_ctl_cfg(d, enable);
+
+       enable = !!(d->flags & DMA_DESC_FOD);
+       ldma_dev_desc_fetch_on_demand_cfg(d, enable);
+
+       enable = !!(d->flags & DMA_DESC_IN_SRAM);
+       ldma_dev_sram_desc_cfg(d, enable);
+
+       enable = !!(d->flags & DMA_DBURST_WR);
+       ldma_dev_dburst_wr_cfg(d, enable);
+
+       enable = !!(d->flags & DMA_VALID_DESC_FETCH_ACK);
+       ldma_dev_vld_fetch_ack_cfg(d, enable);
+
+       if (d->ver > DMA_VER22) {
+               ldma_dev_orrc_cfg(d);
+               ldma_dev_df_tout_cfg(d, true, DMA_DFT_DESC_TCNT);
+       }
+
+       dev_dbg(d->dev, "%s Controller 0x%08x configuration done\n",
+               d->inst->name, readl(d->base + DMA_CTRL));
+
+       return 0;
+}
+
+static int ldma_chan_cctrl_cfg(struct ldma_chan *c, u32 val)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 class_low, class_high;
+       unsigned long flags;
+       u32 reg;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       reg = readl(d->base + DMA_CCTRL);
+       /* Read from hardware */
+       if (reg & DMA_CCTRL_DIR_TX)
+               c->flags |= DMA_TX_CH;
+       else
+               c->flags |= DMA_RX_CH;
+
+       /* Keep the class value unchanged */
+       class_low = FIELD_GET(DMA_CCTRL_CLASS, reg);
+       class_high = FIELD_GET(DMA_CCTRL_CLASSH, reg);
+       val &= ~DMA_CCTRL_CLASS;
+       val |= FIELD_PREP(DMA_CCTRL_CLASS, class_low);
+       val &= ~DMA_CCTRL_CLASSH;
+       val |= FIELD_PREP(DMA_CCTRL_CLASSH, class_high);
+       writel(val, d->base + DMA_CCTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       return 0;
+}
+
+static void ldma_chan_irq_init(struct ldma_chan *c)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+       u32 enofs, crofs;
+       u32 cn_bit;
+
+       if (c->nr < MAX_LOWER_CHANS) {
+               enofs = DMA_IRNEN;
+               crofs = DMA_IRNCR;
+       } else {
+               enofs = DMA_IRNEN1;
+               crofs = DMA_IRNCR1;
+       }
+
+       cn_bit = BIT(c->nr & MASK_LOWER_CHANS);
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+
+       /* Clear all interrupts and disabled it */
+       writel(0, d->base + DMA_CIE);
+       writel(DMA_CI_ALL, d->base + DMA_CIS);
+
+       ldma_update_bits(d, cn_bit, 0, enofs);
+       writel(cn_bit, d->base + crofs);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_chan_set_class(struct ldma_chan *c, u32 val)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 class_val;
+
+       if (d->inst->type == DMA_TYPE_MCPY || val > DMA_MAX_CLASS)
+               return;
+
+       /* 3 bits low */
+       class_val = FIELD_PREP(DMA_CCTRL_CLASS, val & 0x7);
+       /* 2 bits high */
+       class_val |= FIELD_PREP(DMA_CCTRL_CLASSH, (val >> 3) & 0x3);
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, DMA_CCTRL_CLASS | DMA_CCTRL_CLASSH, class_val,
+                        DMA_CCTRL);
+}
+
+static int ldma_chan_on(struct ldma_chan *c)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+
+       /* If descriptors not configured, not allow to turn on channel */
+       if (WARN_ON(!c->desc_init))
+               return -EINVAL;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, DMA_CCTRL_ON, DMA_CCTRL_ON, DMA_CCTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       c->onoff = DMA_CH_ON;
+
+       return 0;
+}
+
+static int ldma_chan_off(struct ldma_chan *c)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+       u32 val;
+       int ret;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, DMA_CCTRL_ON, 0, DMA_CCTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       ret = readl_poll_timeout_atomic(d->base + DMA_CCTRL, val,
+                                       !(val & DMA_CCTRL_ON), 0, 10000);
+       if (ret)
+               return ret;
+
+       c->onoff = DMA_CH_OFF;
+
+       return 0;
+}
+
+static void ldma_chan_desc_hw_cfg(struct ldma_chan *c, dma_addr_t desc_base,
+                                 int desc_num)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       writel(lower_32_bits(desc_base), d->base + DMA_CDBA);
+
+       /* Higher 4 bits of 36 bit addressing */
+       if (IS_ENABLED(CONFIG_64BIT)) {
+               u32 hi = upper_32_bits(desc_base) & HIGH_4_BITS;
+
+               ldma_update_bits(d, DMA_CDBA_MSB,
+                                FIELD_PREP(DMA_CDBA_MSB, hi), DMA_CCTRL);
+       }
+       writel(desc_num, d->base + DMA_CDLEN);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       c->desc_init = true;
+}
+
+static struct dma_async_tx_descriptor *
+ldma_chan_desc_cfg(struct dma_chan *chan, dma_addr_t desc_base, int desc_num)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       struct dma_async_tx_descriptor *tx;
+       struct dw2_desc_sw *ds;
+
+       if (!desc_num) {
+               dev_err(d->dev, "Channel %d must allocate descriptor first\n",
+                       c->nr);
+               return NULL;
+       }
+
+       if (desc_num > DMA_MAX_DESC_NUM) {
+               dev_err(d->dev, "Channel %d descriptor number out of range %d\n",
+                       c->nr, desc_num);
+               return NULL;
+       }
+
+       ldma_chan_desc_hw_cfg(c, desc_base, desc_num);
+
+       c->flags |= DMA_HW_DESC;
+       c->desc_cnt = desc_num;
+       c->desc_phys = desc_base;
+
+       ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+       if (!ds)
+               return NULL;
+
+       tx = &ds->vdesc.tx;
+       dma_async_tx_descriptor_init(tx, chan);
+
+       return tx;
+}
+
+static int ldma_chan_reset(struct ldma_chan *c)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+       u32 val;
+       int ret;
+
+       ret = ldma_chan_off(c);
+       if (ret)
+               return ret;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, DMA_CCTRL_RST, DMA_CCTRL_RST, DMA_CCTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       ret = readl_poll_timeout_atomic(d->base + DMA_CCTRL, val,
+                                       !(val & DMA_CCTRL_RST), 0, 10000);
+       if (ret)
+               return ret;
+
+       c->rst = 1;
+       c->desc_init = false;
+
+       return 0;
+}
+
+static void ldma_chan_byte_offset_cfg(struct ldma_chan *c, u32 boff_len)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 mask = DMA_C_BOFF_EN | DMA_C_BOFF_BOF_LEN;
+       u32 val;
+
+       if (boff_len > 0 && boff_len <= DMA_CHAN_BOFF_MAX)
+               val = FIELD_PREP(DMA_C_BOFF_BOF_LEN, boff_len) | DMA_C_BOFF_EN;
+       else
+               val = 0;
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, mask, val, DMA_C_BOFF);
+}
+
+static void ldma_chan_data_endian_cfg(struct ldma_chan *c, bool enable,
+                                     u32 endian_type)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 mask = DMA_C_END_DE_EN | DMA_C_END_DATAENDI;
+       u32 val;
+
+       if (enable)
+               val = DMA_C_END_DE_EN | FIELD_PREP(DMA_C_END_DATAENDI, endian_type);
+       else
+               val = 0;
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, mask, val, DMA_C_ENDIAN);
+}
+
+static void ldma_chan_desc_endian_cfg(struct ldma_chan *c, bool enable,
+                                     u32 endian_type)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 mask = DMA_C_END_DES_EN | DMA_C_END_DESENDI;
+       u32 val;
+
+       if (enable)
+               val = DMA_C_END_DES_EN | FIELD_PREP(DMA_C_END_DESENDI, endian_type);
+       else
+               val = 0;
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, mask, val, DMA_C_ENDIAN);
+}
+
+static void ldma_chan_hdr_mode_cfg(struct ldma_chan *c, u32 hdr_len, bool csum)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 mask, val;
+
+       /* NB, csum disabled, hdr length must be provided */
+       if (!csum && (!hdr_len || hdr_len > DMA_HDR_LEN_MAX))
+               return;
+
+       mask = DMA_C_HDRM_HDR_SUM;
+       val = DMA_C_HDRM_HDR_SUM;
+
+       if (!csum && hdr_len)
+               val = hdr_len;
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, mask, val, DMA_C_HDRM);
+}
+
+static void ldma_chan_rxwr_np_cfg(struct ldma_chan *c, bool enable)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 mask, val;
+
+       /* Only valid for RX channel */
+       if (ldma_chan_tx(c))
+               return;
+
+       mask = DMA_CCTRL_WR_NP_EN;
+       val = enable ? DMA_CCTRL_WR_NP_EN : 0;
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, mask, val, DMA_CCTRL);
+}
+
+static void ldma_chan_abc_cfg(struct ldma_chan *c, bool enable)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 mask, val;
+
+       if (d->ver < DMA_VER32 || ldma_chan_tx(c))
+               return;
+
+       mask = DMA_CCTRL_CH_ABC;
+       val = enable ? DMA_CCTRL_CH_ABC : 0;
+
+       ldma_update_bits(d, DMA_CS_MASK, c->nr, DMA_CS);
+       ldma_update_bits(d, mask, val, DMA_CCTRL);
+}
+
+static int ldma_port_cfg(struct ldma_port *p)
+{
+       unsigned long flags;
+       struct ldma_dev *d;
+       u32 reg;
+
+       d = p->ldev;
+       reg = FIELD_PREP(DMA_PCTRL_TXENDI, p->txendi);
+       reg |= FIELD_PREP(DMA_PCTRL_RXENDI, p->rxendi);
+
+       if (d->ver == DMA_VER22) {
+               reg |= FIELD_PREP(DMA_PCTRL_TXBL, p->txbl);
+               reg |= FIELD_PREP(DMA_PCTRL_RXBL, p->rxbl);
+       } else {
+               reg |= FIELD_PREP(DMA_PCTRL_PDEN, p->pkt_drop);
+
+               if (p->txbl == DMA_BURSTL_32DW)
+                       reg |= DMA_PCTRL_TXBL32;
+               else if (p->txbl == DMA_BURSTL_16DW)
+                       reg |= DMA_PCTRL_TXBL16;
+               else
+                       reg |= FIELD_PREP(DMA_PCTRL_TXBL, DMA_PCTRL_TXBL_8);
+
+               if (p->rxbl == DMA_BURSTL_32DW)
+                       reg |= DMA_PCTRL_RXBL32;
+               else if (p->rxbl == DMA_BURSTL_16DW)
+                       reg |= DMA_PCTRL_RXBL16;
+               else
+                       reg |= FIELD_PREP(DMA_PCTRL_RXBL, DMA_PCTRL_RXBL_8);
+       }
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       writel(p->portid, d->base + DMA_PS);
+       writel(reg, d->base + DMA_PCTRL);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       reg = readl(d->base + DMA_PCTRL); /* read back */
+       dev_dbg(d->dev, "Port Control 0x%08x configuration done\n", reg);
+
+       return 0;
+}
+
+static int ldma_chan_cfg(struct ldma_chan *c)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+       u32 reg;
+
+       reg = c->pden ? DMA_CCTRL_PDEN : 0;
+       reg |= c->onoff ? DMA_CCTRL_ON : 0;
+       reg |= c->rst ? DMA_CCTRL_RST : 0;
+
+       ldma_chan_cctrl_cfg(c, reg);
+       ldma_chan_irq_init(c);
+
+       if (d->ver <= DMA_VER22)
+               return 0;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       ldma_chan_set_class(c, c->nr);
+       ldma_chan_byte_offset_cfg(c, c->boff_len);
+       ldma_chan_data_endian_cfg(c, c->data_endian_en, c->data_endian);
+       ldma_chan_desc_endian_cfg(c, c->desc_endian_en, c->desc_endian);
+       ldma_chan_hdr_mode_cfg(c, c->hdrm_len, c->hdrm_csum);
+       ldma_chan_rxwr_np_cfg(c, c->desc_rx_np);
+       ldma_chan_abc_cfg(c, c->abc_en);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+
+       if (ldma_chan_is_hw_desc(c))
+               ldma_chan_desc_hw_cfg(c, c->desc_phys, c->desc_cnt);
+
+       return 0;
+}
+
+static void ldma_dev_init(struct ldma_dev *d)
+{
+       unsigned long ch_mask = (unsigned long)d->channels_mask;
+       struct ldma_port *p;
+       struct ldma_chan *c;
+       int i;
+       u32 j;
+
+       spin_lock_init(&d->dev_lock);
+       ldma_dev_reset(d);
+       ldma_dev_cfg(d);
+
+       /* DMA port initialization */
+       for (i = 0; i < d->port_nrs; i++) {
+               p = &d->ports[i];
+               ldma_port_cfg(p);
+       }
+
+       /* DMA channel initialization */
+       for_each_set_bit(j, &ch_mask, d->chan_nrs) {
+               c = &d->chans[j];
+               ldma_chan_cfg(c);
+       }
+}
+
+static int ldma_cfg_init(struct ldma_dev *d)
+{
+       struct fwnode_handle *fwnode = dev_fwnode(d->dev);
+       struct ldma_port *p;
+       int i;
+
+       if (fwnode_property_read_bool(fwnode, "intel,dma-byte-en"))
+               d->flags |= DMA_EN_BYTE_EN;
+
+       if (fwnode_property_read_bool(fwnode, "intel,dma-dburst-wr"))
+               d->flags |= DMA_DBURST_WR;
+
+       if (fwnode_property_read_bool(fwnode, "intel,dma-drb"))
+               d->flags |= DMA_DFT_DRB;
+
+       if (fwnode_property_read_u32(fwnode, "intel,dma-poll-cnt",
+                                    &d->pollcnt))
+               d->pollcnt = DMA_DFT_POLL_CNT;
+
+       if (d->inst->chan_fc)
+               d->flags |= DMA_CHAN_FLOW_CTL;
+
+       if (d->inst->desc_fod)
+               d->flags |= DMA_DESC_FOD;
+
+       if (d->inst->desc_in_sram)
+               d->flags |= DMA_DESC_IN_SRAM;
+
+       if (d->inst->valid_desc_fetch_ack)
+               d->flags |= DMA_VALID_DESC_FETCH_ACK;
+
+       if (d->ver > DMA_VER22) {
+               if (!d->port_nrs)
+                       return -EINVAL;
+
+               for (i = 0; i < d->port_nrs; i++) {
+                       p = &d->ports[i];
+                       p->rxendi = DMA_DFT_ENDIAN;
+                       p->txendi = DMA_DFT_ENDIAN;
+                       p->rxbl = DMA_DFT_BURST;
+                       p->txbl = DMA_DFT_BURST;
+                       p->pkt_drop = DMA_PKT_DROP_DIS;
+               }
+       }
+
+       return 0;
+}
+
+static void dma_free_desc_resource(struct virt_dma_desc *vdesc)
+{
+       struct dw2_desc_sw *ds = to_lgm_dma_desc(vdesc);
+       struct ldma_chan *c = ds->chan;
+
+       dma_pool_free(c->desc_pool, ds->desc_hw, ds->desc_phys);
+       kfree(ds);
+}
+
+static struct dw2_desc_sw *
+dma_alloc_desc_resource(int num, struct ldma_chan *c)
+{
+       struct device *dev = c->vchan.chan.device->dev;
+       struct dw2_desc_sw *ds;
+
+       if (num > c->desc_num) {
+               dev_err(dev, "sg num %d exceed max %d\n", num, c->desc_num);
+               return NULL;
+       }
+
+       ds = kzalloc(sizeof(*ds), GFP_NOWAIT);
+       if (!ds)
+               return NULL;
+
+       ds->chan = c;
+       ds->desc_hw = dma_pool_zalloc(c->desc_pool, GFP_ATOMIC,
+                                     &ds->desc_phys);
+       if (!ds->desc_hw) {
+               dev_dbg(dev, "out of memory for link descriptor\n");
+               kfree(ds);
+               return NULL;
+       }
+       ds->desc_cnt = num;
+
+       return ds;
+}
+
+static void ldma_chan_irq_en(struct ldma_chan *c)
+{
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+
+       spin_lock_irqsave(&d->dev_lock, flags);
+       writel(c->nr, d->base + DMA_CS);
+       writel(DMA_CI_EOP, d->base + DMA_CIE);
+       writel(BIT(c->nr), d->base + DMA_IRNEN);
+       spin_unlock_irqrestore(&d->dev_lock, flags);
+}
+
+static void ldma_issue_pending(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       unsigned long flags;
+
+       if (d->ver == DMA_VER22) {
+               spin_lock_irqsave(&c->vchan.lock, flags);
+               if (vchan_issue_pending(&c->vchan)) {
+                       struct virt_dma_desc *vdesc;
+
+                       /* Get the next descriptor */
+                       vdesc = vchan_next_desc(&c->vchan);
+                       if (!vdesc) {
+                               c->ds = NULL;
+                               spin_unlock_irqrestore(&c->vchan.lock, flags);
+                               return;
+                       }
+                       list_del(&vdesc->node);
+                       c->ds = to_lgm_dma_desc(vdesc);
+                       ldma_chan_desc_hw_cfg(c, c->ds->desc_phys, c->ds->desc_cnt);
+                       ldma_chan_irq_en(c);
+               }
+               spin_unlock_irqrestore(&c->vchan.lock, flags);
+       }
+       ldma_chan_on(c);
+}
+
+static void ldma_synchronize(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+
+       /*
+        * clear any pending work if any. In that
+        * case the resource needs to be free here.
+        */
+       cancel_work_sync(&c->work);
+       vchan_synchronize(&c->vchan);
+       if (c->ds)
+               dma_free_desc_resource(&c->ds->vdesc);
+}
+
+static int ldma_terminate_all(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&c->vchan.lock, flags);
+       vchan_get_all_descriptors(&c->vchan, &head);
+       spin_unlock_irqrestore(&c->vchan.lock, flags);
+       vchan_dma_desc_free_list(&c->vchan, &head);
+
+       return ldma_chan_reset(c);
+}
+
+static int ldma_resume_chan(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+
+       ldma_chan_on(c);
+
+       return 0;
+}
+
+static int ldma_pause_chan(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+
+       return ldma_chan_off(c);
+}
+
+static enum dma_status
+ldma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
+              struct dma_tx_state *txstate)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       enum dma_status status = DMA_COMPLETE;
+
+       if (d->ver == DMA_VER22)
+               status = dma_cookie_status(chan, cookie, txstate);
+
+       return status;
+}
+
+static void dma_chan_irq(int irq, void *data)
+{
+       struct ldma_chan *c = data;
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       u32 stat;
+
+       /* Disable channel interrupts  */
+       writel(c->nr, d->base + DMA_CS);
+       stat = readl(d->base + DMA_CIS);
+       if (!stat)
+               return;
+
+       writel(readl(d->base + DMA_CIE) & ~DMA_CI_ALL, d->base + DMA_CIE);
+       writel(stat, d->base + DMA_CIS);
+       queue_work(d->wq, &c->work);
+}
+
+static irqreturn_t dma_interrupt(int irq, void *dev_id)
+{
+       struct ldma_dev *d = dev_id;
+       struct ldma_chan *c;
+       unsigned long irncr;
+       u32 cid;
+
+       irncr = readl(d->base + DMA_IRNCR);
+       if (!irncr) {
+               dev_err(d->dev, "dummy interrupt\n");
+               return IRQ_NONE;
+       }
+
+       for_each_set_bit(cid, &irncr, d->chan_nrs) {
+               /* Mask */
+               writel(readl(d->base + DMA_IRNEN) & ~BIT(cid), d->base + DMA_IRNEN);
+               /* Ack */
+               writel(readl(d->base + DMA_IRNCR) | BIT(cid), d->base + DMA_IRNCR);
+
+               c = &d->chans[cid];
+               dma_chan_irq(irq, c);
+       }
+
+       return IRQ_HANDLED;
+}
+
+static void prep_slave_burst_len(struct ldma_chan *c)
+{
+       struct ldma_port *p = c->port;
+       struct dma_slave_config *cfg = &c->config;
+
+       if (cfg->dst_maxburst)
+               cfg->src_maxburst = cfg->dst_maxburst;
+
+       /* TX and RX has the same burst length */
+       p->txbl = ilog2(cfg->src_maxburst);
+       p->rxbl = p->txbl;
+}
+
+static struct dma_async_tx_descriptor *
+ldma_prep_slave_sg(struct dma_chan *chan, struct scatterlist *sgl,
+                  unsigned int sglen, enum dma_transfer_direction dir,
+                  unsigned long flags, void *context)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       size_t len, avail, total = 0;
+       struct dw2_desc *hw_ds;
+       struct dw2_desc_sw *ds;
+       struct scatterlist *sg;
+       int num = sglen, i;
+       dma_addr_t addr;
+
+       if (!sgl)
+               return NULL;
+
+       if (d->ver > DMA_VER22)
+               return ldma_chan_desc_cfg(chan, sgl->dma_address, sglen);
+
+       for_each_sg(sgl, sg, sglen, i) {
+               avail = sg_dma_len(sg);
+               if (avail > DMA_MAX_SIZE)
+                       num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
+       }
+
+       ds = dma_alloc_desc_resource(num, c);
+       if (!ds)
+               return NULL;
+
+       c->ds = ds;
+
+       num = 0;
+       /* sop and eop has to be handled nicely */
+       for_each_sg(sgl, sg, sglen, i) {
+               addr = sg_dma_address(sg);
+               avail = sg_dma_len(sg);
+               total += avail;
+
+               do {
+                       len = min_t(size_t, avail, DMA_MAX_SIZE);
+
+                       hw_ds = &ds->desc_hw[num];
+                       switch (sglen) {
+                       case 1:
+                               hw_ds->field &= ~DESC_SOP;
+                               hw_ds->field |= FIELD_PREP(DESC_SOP, 1);
+
+                               hw_ds->field &= ~DESC_EOP;
+                               hw_ds->field |= FIELD_PREP(DESC_EOP, 1);
+                               break;
+                       default:
+                               if (num == 0) {
+                                       hw_ds->field &= ~DESC_SOP;
+                                       hw_ds->field |= FIELD_PREP(DESC_SOP, 1);
+
+                                       hw_ds->field &= ~DESC_EOP;
+                                       hw_ds->field |= FIELD_PREP(DESC_EOP, 0);
+                               } else if (num == (sglen - 1)) {
+                                       hw_ds->field &= ~DESC_SOP;
+                                       hw_ds->field |= FIELD_PREP(DESC_SOP, 0);
+                                       hw_ds->field &= ~DESC_EOP;
+                                       hw_ds->field |= FIELD_PREP(DESC_EOP, 1);
+                               } else {
+                                       hw_ds->field &= ~DESC_SOP;
+                                       hw_ds->field |= FIELD_PREP(DESC_SOP, 0);
+
+                                       hw_ds->field &= ~DESC_EOP;
+                                       hw_ds->field |= FIELD_PREP(DESC_EOP, 0);
+                               }
+                               break;
+                       }
+                       /* Only 32 bit address supported */
+                       hw_ds->addr = (u32)addr;
+
+                       hw_ds->field &= ~DESC_DATA_LEN;
+                       hw_ds->field |= FIELD_PREP(DESC_DATA_LEN, len);
+
+                       hw_ds->field &= ~DESC_C;
+                       hw_ds->field |= FIELD_PREP(DESC_C, 0);
+
+                       hw_ds->field &= ~DESC_BYTE_OFF;
+                       hw_ds->field |= FIELD_PREP(DESC_BYTE_OFF, addr & 0x3);
+
+                       /* Ensure data ready before ownership change */
+                       wmb();
+                       hw_ds->field &= ~DESC_OWN;
+                       hw_ds->field |= FIELD_PREP(DESC_OWN, DMA_OWN);
+
+                       /* Ensure ownership changed before moving forward */
+                       wmb();
+                       num++;
+                       addr += len;
+                       avail -= len;
+               } while (avail);
+       }
+
+       ds->size = total;
+       prep_slave_burst_len(c);
+
+       return vchan_tx_prep(&c->vchan, &ds->vdesc, DMA_CTRL_ACK);
+}
+
+static int
+ldma_slave_config(struct dma_chan *chan, struct dma_slave_config *cfg)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+
+       memcpy(&c->config, cfg, sizeof(c->config));
+
+       return 0;
+}
+
+static int ldma_alloc_chan_resources(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+       struct device *dev = c->vchan.chan.device->dev;
+       size_t  desc_sz;
+
+       if (d->ver > DMA_VER22) {
+               c->flags |= CHAN_IN_USE;
+               return 0;
+       }
+
+       if (c->desc_pool)
+               return c->desc_num;
+
+       desc_sz = c->desc_num * sizeof(struct dw2_desc);
+       c->desc_pool = dma_pool_create(c->name, dev, desc_sz,
+                                      __alignof__(struct dw2_desc), 0);
+
+       if (!c->desc_pool) {
+               dev_err(dev, "unable to allocate descriptor pool\n");
+               return -ENOMEM;
+       }
+
+       return c->desc_num;
+}
+
+static void ldma_free_chan_resources(struct dma_chan *chan)
+{
+       struct ldma_chan *c = to_ldma_chan(chan);
+       struct ldma_dev *d = to_ldma_dev(c->vchan.chan.device);
+
+       if (d->ver == DMA_VER22) {
+               dma_pool_destroy(c->desc_pool);
+               c->desc_pool = NULL;
+               vchan_free_chan_resources(to_virt_chan(chan));
+               ldma_chan_reset(c);
+       } else {
+               c->flags &= ~CHAN_IN_USE;
+       }
+}
+
+static void dma_work(struct work_struct *work)
+{
+       struct ldma_chan *c = container_of(work, struct ldma_chan, work);
+       struct dma_async_tx_descriptor *tx = &c->ds->vdesc.tx;
+       struct virt_dma_chan *vc = &c->vchan;
+       struct dmaengine_desc_callback cb;
+       struct virt_dma_desc *vd, *_vd;
+       unsigned long flags;
+       LIST_HEAD(head);
+
+       spin_lock_irqsave(&c->vchan.lock, flags);
+       list_splice_tail_init(&vc->desc_completed, &head);
+       spin_unlock_irqrestore(&c->vchan.lock, flags);
+       dmaengine_desc_get_callback(tx, &cb);
+       dma_cookie_complete(tx);
+       dmaengine_desc_callback_invoke(&cb, NULL);
+
+       list_for_each_entry_safe(vd, _vd, &head, node) {
+               dmaengine_desc_get_callback(tx, &cb);
+               dma_cookie_complete(tx);
+               list_del(&vd->node);
+               dmaengine_desc_callback_invoke(&cb, NULL);
+
+               vchan_vdesc_fini(vd);
+       }
+       c->ds = NULL;
+}
+
+static void
+update_burst_len_v22(struct ldma_chan *c, struct ldma_port *p, u32 burst)
+{
+       if (ldma_chan_tx(c))
+               p->txbl = ilog2(burst);
+       else
+               p->rxbl = ilog2(burst);
+}
+
+static void
+update_burst_len_v3X(struct ldma_chan *c, struct ldma_port *p, u32 burst)
+{
+       if (ldma_chan_tx(c))
+               p->txbl = burst;
+       else
+               p->rxbl = burst;
+}
+
+static int
+update_client_configs(struct of_dma *ofdma, struct of_phandle_args *spec)
+{
+       struct ldma_dev *d = ofdma->of_dma_data;
+       u32 chan_id =  spec->args[0];
+       u32 port_id =  spec->args[1];
+       u32 burst = spec->args[2];
+       struct ldma_port *p;
+       struct ldma_chan *c;
+
+       if (chan_id >= d->chan_nrs || port_id >= d->port_nrs)
+               return 0;
+
+       p = &d->ports[port_id];
+       c = &d->chans[chan_id];
+       c->port = p;
+
+       if (d->ver == DMA_VER22)
+               update_burst_len_v22(c, p, burst);
+       else
+               update_burst_len_v3X(c, p, burst);
+
+       ldma_port_cfg(p);
+
+       return 1;
+}
+
+static struct dma_chan *ldma_xlate(struct of_phandle_args *spec,
+                                  struct of_dma *ofdma)
+{
+       struct ldma_dev *d = ofdma->of_dma_data;
+       u32 chan_id =  spec->args[0];
+       int ret;
+
+       if (!spec->args_count)
+               return NULL;
+
+       /* if args_count is 1 driver use default settings */
+       if (spec->args_count > 1) {
+               ret = update_client_configs(ofdma, spec);
+               if (!ret)
+                       return NULL;
+       }
+
+       return dma_get_slave_channel(&d->chans[chan_id].vchan.chan);
+}
+
+static void ldma_dma_init_v22(int i, struct ldma_dev *d)
+{
+       struct ldma_chan *c;
+
+       c = &d->chans[i];
+       c->nr = i; /* Real channel number */
+       c->rst = DMA_CHAN_RST;
+       c->desc_num = DMA_DFT_DESC_NUM;
+       snprintf(c->name, sizeof(c->name), "chan%d", c->nr);
+       INIT_WORK(&c->work, dma_work);
+       c->vchan.desc_free = dma_free_desc_resource;
+       vchan_init(&c->vchan, &d->dma_dev);
+}
+
+static void ldma_dma_init_v3X(int i, struct ldma_dev *d)
+{
+       struct ldma_chan *c;
+
+       c = &d->chans[i];
+       c->data_endian = DMA_DFT_ENDIAN;
+       c->desc_endian = DMA_DFT_ENDIAN;
+       c->data_endian_en = false;
+       c->desc_endian_en = false;
+       c->desc_rx_np = false;
+       c->flags |= DEVICE_ALLOC_DESC;
+       c->onoff = DMA_CH_OFF;
+       c->rst = DMA_CHAN_RST;
+       c->abc_en = true;
+       c->hdrm_csum = false;
+       c->boff_len = 0;
+       c->nr = i;
+       c->vchan.desc_free = dma_free_desc_resource;
+       vchan_init(&c->vchan, &d->dma_dev);
+}
+
+static int ldma_init_v22(struct ldma_dev *d, struct platform_device *pdev)
+{
+       int ret;
+
+       ret = device_property_read_u32(d->dev, "dma-channels", &d->chan_nrs);
+       if (ret < 0) {
+               dev_err(d->dev, "unable to read dma-channels property\n");
+               return ret;
+       }
+
+       d->irq = platform_get_irq(pdev, 0);
+       if (d->irq < 0)
+               return d->irq;
+
+       ret = devm_request_irq(&pdev->dev, d->irq, dma_interrupt, 0,
+                              DRIVER_NAME, d);
+       if (ret)
+               return ret;
+
+       d->wq = alloc_ordered_workqueue("dma_wq", WQ_MEM_RECLAIM |
+                       WQ_HIGHPRI);
+       if (!d->wq)
+               return -ENOMEM;
+
+       return 0;
+}
+
+static void ldma_clk_disable(void *data)
+{
+       struct ldma_dev *d = data;
+
+       clk_disable_unprepare(d->core_clk);
+       reset_control_assert(d->rst);
+}
+
+static const struct ldma_inst_data dma0 = {
+       .name = "dma0",
+       .chan_fc = false,
+       .desc_fod = false,
+       .desc_in_sram = false,
+       .valid_desc_fetch_ack = false,
+};
+
+static const struct ldma_inst_data dma2tx = {
+       .name = "dma2tx",
+       .type = DMA_TYPE_TX,
+       .orrc = 16,
+       .chan_fc = true,
+       .desc_fod = true,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data dma1rx = {
+       .name = "dma1rx",
+       .type = DMA_TYPE_RX,
+       .orrc = 16,
+       .chan_fc = false,
+       .desc_fod = true,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = false,
+};
+
+static const struct ldma_inst_data dma1tx = {
+       .name = "dma1tx",
+       .type = DMA_TYPE_TX,
+       .orrc = 16,
+       .chan_fc = true,
+       .desc_fod = true,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data dma0tx = {
+       .name = "dma0tx",
+       .type = DMA_TYPE_TX,
+       .orrc = 16,
+       .chan_fc = true,
+       .desc_fod = true,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data dma3 = {
+       .name = "dma3",
+       .type = DMA_TYPE_MCPY,
+       .orrc = 16,
+       .chan_fc = false,
+       .desc_fod = false,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = false,
+};
+
+static const struct ldma_inst_data toe_dma30 = {
+       .name = "toe_dma30",
+       .type = DMA_TYPE_MCPY,
+       .orrc = 16,
+       .chan_fc = false,
+       .desc_fod = false,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = true,
+};
+
+static const struct ldma_inst_data toe_dma31 = {
+       .name = "toe_dma31",
+       .type = DMA_TYPE_MCPY,
+       .orrc = 16,
+       .chan_fc = false,
+       .desc_fod = false,
+       .desc_in_sram = true,
+       .valid_desc_fetch_ack = true,
+};
+
+static const struct of_device_id intel_ldma_match[] = {
+       { .compatible = "intel,lgm-cdma", .data = &dma0},
+       { .compatible = "intel,lgm-dma2tx", .data = &dma2tx},
+       { .compatible = "intel,lgm-dma1rx", .data = &dma1rx},
+       { .compatible = "intel,lgm-dma1tx", .data = &dma1tx},
+       { .compatible = "intel,lgm-dma0tx", .data = &dma0tx},
+       { .compatible = "intel,lgm-dma3", .data = &dma3},
+       { .compatible = "intel,lgm-toe-dma30", .data = &toe_dma30},
+       { .compatible = "intel,lgm-toe-dma31", .data = &toe_dma31},
+       {}
+};
+
+static int intel_ldma_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct dma_device *dma_dev;
+       unsigned long ch_mask;
+       struct ldma_chan *c;
+       struct ldma_port *p;
+       struct ldma_dev *d;
+       u32 id, bitn = 32, j;
+       int i, ret;
+
+       d = devm_kzalloc(dev, sizeof(*d), GFP_KERNEL);
+       if (!d)
+               return -ENOMEM;
+
+       /* Link controller to platform device */
+       d->dev = &pdev->dev;
+
+       d->inst = device_get_match_data(dev);
+       if (!d->inst) {
+               dev_err(dev, "No device match found\n");
+               return -ENODEV;
+       }
+
+       d->base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(d->base))
+               return PTR_ERR(d->base);
+
+       /* Power up and reset the dma engine, some DMAs always on?? */
+       d->core_clk = devm_clk_get_optional(dev, NULL);
+       if (IS_ERR(d->core_clk))
+               return PTR_ERR(d->core_clk);
+       clk_prepare_enable(d->core_clk);
+
+       d->rst = devm_reset_control_get_optional(dev, NULL);
+       if (IS_ERR(d->rst))
+               return PTR_ERR(d->rst);
+       reset_control_deassert(d->rst);
+
+       ret = devm_add_action_or_reset(dev, ldma_clk_disable, d);
+       if (ret) {
+               dev_err(dev, "Failed to devm_add_action_or_reset, %d\n", ret);
+               return ret;
+       }
+
+       id = readl(d->base + DMA_ID);
+       d->chan_nrs = FIELD_GET(DMA_ID_CHNR, id);
+       d->port_nrs = FIELD_GET(DMA_ID_PNR, id);
+       d->ver = FIELD_GET(DMA_ID_REV, id);
+
+       if (id & DMA_ID_AW_36B)
+               d->flags |= DMA_ADDR_36BIT;
+
+       if (IS_ENABLED(CONFIG_64BIT) && (id & DMA_ID_AW_36B))
+               bitn = 36;
+
+       if (id & DMA_ID_DW_128B)
+               d->flags |= DMA_DATA_128BIT;
+
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(bitn));
+       if (ret) {
+               dev_err(dev, "No usable DMA configuration\n");
+               return ret;
+       }
+
+       if (d->ver == DMA_VER22) {
+               ret = ldma_init_v22(d, pdev);
+               if (ret)
+                       return ret;
+       }
+
+       ret = device_property_read_u32(dev, "dma-channel-mask", &d->channels_mask);
+       if (ret < 0)
+               d->channels_mask = GENMASK(d->chan_nrs - 1, 0);
+
+       dma_dev = &d->dma_dev;
+
+       dma_cap_zero(dma_dev->cap_mask);
+       dma_cap_set(DMA_SLAVE, dma_dev->cap_mask);
+
+       /* Channel initializations */
+       INIT_LIST_HEAD(&dma_dev->channels);
+
+       /* Port Initializations */
+       d->ports = devm_kcalloc(dev, d->port_nrs, sizeof(*p), GFP_KERNEL);
+       if (!d->ports)
+               return -ENOMEM;
+
+       /* Channels Initializations */
+       d->chans = devm_kcalloc(d->dev, d->chan_nrs, sizeof(*c), GFP_KERNEL);
+       if (!d->chans)
+               return -ENOMEM;
+
+       for (i = 0; i < d->port_nrs; i++) {
+               p = &d->ports[i];
+               p->portid = i;
+               p->ldev = d;
+       }
+
+       ret = ldma_cfg_init(d);
+       if (ret)
+               return ret;
+
+       dma_dev->dev = &pdev->dev;
+
+       ch_mask = (unsigned long)d->channels_mask;
+       for_each_set_bit(j, &ch_mask, d->chan_nrs) {
+               if (d->ver == DMA_VER22)
+                       ldma_dma_init_v22(j, d);
+               else
+                       ldma_dma_init_v3X(j, d);
+       }
+
+       dma_dev->device_alloc_chan_resources = ldma_alloc_chan_resources;
+       dma_dev->device_free_chan_resources = ldma_free_chan_resources;
+       dma_dev->device_terminate_all = ldma_terminate_all;
+       dma_dev->device_issue_pending = ldma_issue_pending;
+       dma_dev->device_tx_status = ldma_tx_status;
+       dma_dev->device_resume = ldma_resume_chan;
+       dma_dev->device_pause = ldma_pause_chan;
+       dma_dev->device_prep_slave_sg = ldma_prep_slave_sg;
+
+       if (d->ver == DMA_VER22) {
+               dma_dev->device_config = ldma_slave_config;
+               dma_dev->device_synchronize = ldma_synchronize;
+               dma_dev->src_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+               dma_dev->dst_addr_widths = BIT(DMA_SLAVE_BUSWIDTH_4_BYTES);
+               dma_dev->directions = BIT(DMA_MEM_TO_DEV) |
+                                     BIT(DMA_DEV_TO_MEM);
+               dma_dev->residue_granularity =
+                                       DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+       }
+
+       platform_set_drvdata(pdev, d);
+
+       ldma_dev_init(d);
+
+       ret = dma_async_device_register(dma_dev);
+       if (ret) {
+               dev_err(dev, "Failed to register slave DMA engine device\n");
+               return ret;
+       }
+
+       ret = of_dma_controller_register(pdev->dev.of_node, ldma_xlate, d);
+       if (ret) {
+               dev_err(dev, "Failed to register of DMA controller\n");
+               dma_async_device_unregister(dma_dev);
+               return ret;
+       }
+
+       dev_info(dev, "Init done - rev: %x, ports: %d channels: %d\n", d->ver,
+                d->port_nrs, d->chan_nrs);
+
+       return 0;
+}
+
+static struct platform_driver intel_ldma_driver = {
+       .probe = intel_ldma_probe,
+       .driver = {
+               .name = DRIVER_NAME,
+               .of_match_table = intel_ldma_match,
+       },
+};
+
+/*
+ * Perform this driver as device_initcall to make sure initialization happens
+ * before its DMA clients of some are platform specific and also to provide
+ * registered DMA channels and DMA capabilities to clients before their
+ * initialization.
+ */
+static int __init intel_ldma_init(void)
+{
+       return platform_driver_register(&intel_ldma_driver);
+}
+
+device_initcall(intel_ldma_init);
index b84303b..89f1814 100644 (file)
@@ -18,7 +18,6 @@
 #include <linux/of_device.h>
 #include <linux/of_dma.h>
 #include <linux/of.h>
-#include <linux/dma/mmp-pdma.h>
 
 #include "dmaengine.h"
 
@@ -1148,19 +1147,6 @@ static struct platform_driver mmp_pdma_driver = {
        .remove         = mmp_pdma_remove,
 };
 
-bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
-{
-       struct mmp_pdma_chan *c = to_mmp_pdma_chan(chan);
-
-       if (chan->device->dev->driver != &mmp_pdma_driver.driver)
-               return false;
-
-       c->drcmr = *(unsigned int *)param;
-
-       return true;
-}
-EXPORT_SYMBOL_GPL(mmp_pdma_filter_fn);
-
 module_platform_driver(mmp_pdma_driver);
 
 MODULE_DESCRIPTION("MARVELL MMP Peripheral DMA Driver");
index 9fede32..1f0bbae 100644 (file)
@@ -1080,8 +1080,9 @@ static struct dma_chan *owl_dma_of_xlate(struct of_phandle_args *dma_spec,
 }
 
 static const struct of_device_id owl_dma_match[] = {
-       { .compatible = "actions,s900-dma", .data = (void *)S900_DMA,},
+       { .compatible = "actions,s500-dma", .data = (void *)S900_DMA,},
        { .compatible = "actions,s700-dma", .data = (void *)S700_DMA,},
+       { .compatible = "actions,s900-dma", .data = (void *)S900_DMA,},
        { /* sentinel */ },
 };
 MODULE_DEVICE_TABLE(of, owl_dma_match);
@@ -1245,6 +1246,7 @@ static int owl_dma_remove(struct platform_device *pdev)
        owl_dma_free(od);
 
        clk_disable_unprepare(od->clk);
+       dma_pool_destroy(od->lli_pool);
 
        return 0;
 }
index 8857985..c8a77b4 100644 (file)
@@ -1270,13 +1270,13 @@ static int bam_dma_probe(struct platform_device *pdev)
                        dev_err(bdev->dev, "num-ees unspecified in dt\n");
        }
 
-       bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
-       if (IS_ERR(bdev->bamclk)) {
-               if (!bdev->controlled_remotely)
-                       return PTR_ERR(bdev->bamclk);
+       if (bdev->controlled_remotely)
+               bdev->bamclk = devm_clk_get_optional(bdev->dev, "bam_clk");
+       else
+               bdev->bamclk = devm_clk_get(bdev->dev, "bam_clk");
 
-               bdev->bamclk = NULL;
-       }
+       if (IS_ERR(bdev->bamclk))
+               return PTR_ERR(bdev->bamclk);
 
        ret = clk_prepare_enable(bdev->bamclk);
        if (ret) {
@@ -1350,7 +1350,7 @@ static int bam_dma_probe(struct platform_device *pdev)
        if (ret)
                goto err_unregister_dma;
 
-       if (bdev->controlled_remotely) {
+       if (!bdev->bamclk) {
                pm_runtime_disable(&pdev->dev);
                return 0;
        }
@@ -1438,10 +1438,10 @@ static int __maybe_unused bam_dma_suspend(struct device *dev)
 {
        struct bam_device *bdev = dev_get_drvdata(dev);
 
-       if (!bdev->controlled_remotely)
+       if (bdev->bamclk) {
                pm_runtime_force_suspend(dev);
-
-       clk_unprepare(bdev->bamclk);
+               clk_unprepare(bdev->bamclk);
+       }
 
        return 0;
 }
@@ -1451,12 +1451,13 @@ static int __maybe_unused bam_dma_resume(struct device *dev)
        struct bam_device *bdev = dev_get_drvdata(dev);
        int ret;
 
-       ret = clk_prepare(bdev->bamclk);
-       if (ret)
-               return ret;
+       if (bdev->bamclk) {
+               ret = clk_prepare(bdev->bamclk);
+               if (ret)
+                       return ret;
 
-       if (!bdev->controlled_remotely)
                pm_runtime_force_resume(dev);
+       }
 
        return 0;
 }
index 1a0bf6b..57f5ee4 100644 (file)
@@ -584,7 +584,7 @@ static inline void gpi_write_reg_field(struct gpii *gpii, void __iomem *addr,
        gpi_write_reg(gpii, addr, val);
 }
 
-static inline void
+static __always_inline void
 gpi_update_reg(struct gpii *gpii, u32 offset, u32 mask, u32 val)
 {
        void __iomem *addr = gpii->regs + offset;
@@ -1700,7 +1700,7 @@ static int gpi_create_i2c_tre(struct gchan *chan, struct gpi_desc *desc,
 
                tre->dword[3] = u32_encode_bits(TRE_TYPE_DMA, TRE_FLAGS_TYPE);
                tre->dword[3] |= u32_encode_bits(1, TRE_FLAGS_IEOT);
-       };
+       }
 
        for (i = 0; i < tre_idx; i++)
                dev_dbg(dev, "TRE:%d %x:%x:%x:%x\n", i, desc->tre[i].dword[0],
index a577053..d530c1b 100644 (file)
@@ -189,7 +189,8 @@ struct rcar_dmac_chan {
  * struct rcar_dmac - R-Car Gen2 DMA Controller
  * @engine: base DMA engine object
  * @dev: the hardware device
- * @iomem: remapped I/O memory base
+ * @dmac_base: remapped base register block
+ * @chan_base: remapped channel register block (optional)
  * @n_channels: number of available channels
  * @channels: array of DMAC channels
  * @channels_mask: bitfield of which DMA channels are managed by this driver
@@ -198,7 +199,8 @@ struct rcar_dmac_chan {
 struct rcar_dmac {
        struct dma_device engine;
        struct device *dev;
-       void __iomem *iomem;
+       void __iomem *dmac_base;
+       void __iomem *chan_base;
 
        unsigned int n_channels;
        struct rcar_dmac_chan *channels;
@@ -209,6 +211,10 @@ struct rcar_dmac {
 
 #define to_rcar_dmac(d)                container_of(d, struct rcar_dmac, engine)
 
+#define for_each_rcar_dmac_chan(i, dmac, chan)                                         \
+       for (i = 0, chan = &(dmac)->channels[0]; i < (dmac)->n_channels; i++, chan++)   \
+               if (!((dmac)->channels_mask & BIT(i))) continue; else
+
 /*
  * struct rcar_dmac_of_data - This driver's OF data
  * @chan_offset_base: DMAC channels base offset
@@ -230,7 +236,7 @@ struct rcar_dmac_of_data {
 #define RCAR_DMAOR_PRI_ROUND_ROBIN     (3 << 8)
 #define RCAR_DMAOR_AE                  (1 << 2)
 #define RCAR_DMAOR_DME                 (1 << 0)
-#define RCAR_DMACHCLR                  0x0080
+#define RCAR_DMACHCLR                  0x0080  /* Not on R-Car V3U */
 #define RCAR_DMADPSEC                  0x00a0
 
 #define RCAR_DMASAR                    0x0000
@@ -293,6 +299,9 @@ struct rcar_dmac_of_data {
 #define RCAR_DMAFIXDAR                 0x0014
 #define RCAR_DMAFIXDPBASE              0x0060
 
+/* For R-Car V3U */
+#define RCAR_V3U_DMACHCLR              0x0100
+
 /* Hardcode the MEMCPY transfer size to 4 bytes. */
 #define RCAR_DMAC_MEMCPY_XFER_SIZE     4
 
@@ -303,17 +312,17 @@ struct rcar_dmac_of_data {
 static void rcar_dmac_write(struct rcar_dmac *dmac, u32 reg, u32 data)
 {
        if (reg == RCAR_DMAOR)
-               writew(data, dmac->iomem + reg);
+               writew(data, dmac->dmac_base + reg);
        else
-               writel(data, dmac->iomem + reg);
+               writel(data, dmac->dmac_base + reg);
 }
 
 static u32 rcar_dmac_read(struct rcar_dmac *dmac, u32 reg)
 {
        if (reg == RCAR_DMAOR)
-               return readw(dmac->iomem + reg);
+               return readw(dmac->dmac_base + reg);
        else
-               return readl(dmac->iomem + reg);
+               return readl(dmac->dmac_base + reg);
 }
 
 static u32 rcar_dmac_chan_read(struct rcar_dmac_chan *chan, u32 reg)
@@ -332,6 +341,28 @@ static void rcar_dmac_chan_write(struct rcar_dmac_chan *chan, u32 reg, u32 data)
                writel(data, chan->iomem + reg);
 }
 
+static void rcar_dmac_chan_clear(struct rcar_dmac *dmac,
+                                struct rcar_dmac_chan *chan)
+{
+       if (dmac->chan_base)
+               rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1);
+       else
+               rcar_dmac_write(dmac, RCAR_DMACHCLR, BIT(chan->index));
+}
+
+static void rcar_dmac_chan_clear_all(struct rcar_dmac *dmac)
+{
+       struct rcar_dmac_chan *chan;
+       unsigned int i;
+
+       if (dmac->chan_base) {
+               for_each_rcar_dmac_chan(i, dmac, chan)
+                       rcar_dmac_chan_write(chan, RCAR_V3U_DMACHCLR, 1);
+       } else {
+               rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask);
+       }
+}
+
 /* -----------------------------------------------------------------------------
  * Initialization and configuration
  */
@@ -447,7 +478,7 @@ static int rcar_dmac_init(struct rcar_dmac *dmac)
        u16 dmaor;
 
        /* Clear all channels and enable the DMAC globally. */
-       rcar_dmac_write(dmac, RCAR_DMACHCLR, dmac->channels_mask);
+       rcar_dmac_chan_clear_all(dmac);
        rcar_dmac_write(dmac, RCAR_DMAOR,
                        RCAR_DMAOR_PRI_FIXED | RCAR_DMAOR_DME);
 
@@ -817,15 +848,11 @@ static void rcar_dmac_chan_reinit(struct rcar_dmac_chan *chan)
 
 static void rcar_dmac_stop_all_chan(struct rcar_dmac *dmac)
 {
+       struct rcar_dmac_chan *chan;
        unsigned int i;
 
        /* Stop all channels. */
-       for (i = 0; i < dmac->n_channels; ++i) {
-               struct rcar_dmac_chan *chan = &dmac->channels[i];
-
-               if (!(dmac->channels_mask & BIT(i)))
-                       continue;
-
+       for_each_rcar_dmac_chan(i, dmac, chan) {
                /* Stop and reinitialize the channel. */
                spin_lock_irq(&chan->lock);
                rcar_dmac_chan_halt(chan);
@@ -1566,7 +1593,7 @@ static irqreturn_t rcar_dmac_isr_channel(int irq, void *dev)
                 * because channel is already stopped in error case.
                 * We need to clear register and check DE bit as recovery.
                 */
-               rcar_dmac_write(dmac, RCAR_DMACHCLR, 1 << chan->index);
+               rcar_dmac_chan_clear(dmac, chan);
                rcar_dmac_chcr_de_barrier(chan);
                reinit = true;
                goto spin_lock_end;
@@ -1732,9 +1759,7 @@ static const struct dev_pm_ops rcar_dmac_pm = {
  */
 
 static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
-                               struct rcar_dmac_chan *rchan,
-                               const struct rcar_dmac_of_data *data,
-                               unsigned int index)
+                               struct rcar_dmac_chan *rchan)
 {
        struct platform_device *pdev = to_platform_device(dmac->dev);
        struct dma_chan *chan = &rchan->chan;
@@ -1742,9 +1767,6 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
        char *irqname;
        int ret;
 
-       rchan->index = index;
-       rchan->iomem = dmac->iomem + data->chan_offset_base +
-                      data->chan_offset_stride * index;
        rchan->mid_rid = -EINVAL;
 
        spin_lock_init(&rchan->lock);
@@ -1756,13 +1778,13 @@ static int rcar_dmac_chan_probe(struct rcar_dmac *dmac,
        INIT_LIST_HEAD(&rchan->desc.wait);
 
        /* Request the channel interrupt. */
-       sprintf(pdev_irqname, "ch%u", index);
+       sprintf(pdev_irqname, "ch%u", rchan->index);
        rchan->irq = platform_get_irq_byname(pdev, pdev_irqname);
        if (rchan->irq < 0)
                return -ENODEV;
 
        irqname = devm_kasprintf(dmac->dev, GFP_KERNEL, "%s:%u",
-                                dev_name(dmac->dev), index);
+                                dev_name(dmac->dev), rchan->index);
        if (!irqname)
                return -ENOMEM;
 
@@ -1828,9 +1850,11 @@ static int rcar_dmac_probe(struct platform_device *pdev)
                DMA_SLAVE_BUSWIDTH_2_BYTES | DMA_SLAVE_BUSWIDTH_4_BYTES |
                DMA_SLAVE_BUSWIDTH_8_BYTES | DMA_SLAVE_BUSWIDTH_16_BYTES |
                DMA_SLAVE_BUSWIDTH_32_BYTES | DMA_SLAVE_BUSWIDTH_64_BYTES;
+       const struct rcar_dmac_of_data *data;
+       struct rcar_dmac_chan *chan;
        struct dma_device *engine;
+       void __iomem *chan_base;
        struct rcar_dmac *dmac;
-       const struct rcar_dmac_of_data *data;
        unsigned int i;
        int ret;
 
@@ -1868,9 +1892,24 @@ static int rcar_dmac_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        /* Request resources. */
-       dmac->iomem = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(dmac->iomem))
-               return PTR_ERR(dmac->iomem);
+       dmac->dmac_base = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(dmac->dmac_base))
+               return PTR_ERR(dmac->dmac_base);
+
+       if (!data->chan_offset_base) {
+               dmac->chan_base = devm_platform_ioremap_resource(pdev, 1);
+               if (IS_ERR(dmac->chan_base))
+                       return PTR_ERR(dmac->chan_base);
+
+               chan_base = dmac->chan_base;
+       } else {
+               chan_base = dmac->dmac_base + data->chan_offset_base;
+       }
+
+       for_each_rcar_dmac_chan(i, dmac, chan) {
+               chan->index = i;
+               chan->iomem = chan_base + i * data->chan_offset_stride;
+       }
 
        /* Enable runtime PM and initialize the device. */
        pm_runtime_enable(&pdev->dev);
@@ -1916,11 +1955,8 @@ static int rcar_dmac_probe(struct platform_device *pdev)
 
        INIT_LIST_HEAD(&engine->channels);
 
-       for (i = 0; i < dmac->n_channels; ++i) {
-               if (!(dmac->channels_mask & BIT(i)))
-                       continue;
-
-               ret = rcar_dmac_chan_probe(dmac, &dmac->channels[i], data, i);
+       for_each_rcar_dmac_chan(i, dmac, chan) {
+               ret = rcar_dmac_chan_probe(dmac, chan);
                if (ret < 0)
                        goto error;
        }
@@ -1968,14 +2004,22 @@ static void rcar_dmac_shutdown(struct platform_device *pdev)
 }
 
 static const struct rcar_dmac_of_data rcar_dmac_data = {
-       .chan_offset_base = 0x8000,
-       .chan_offset_stride = 0x80,
+       .chan_offset_base       = 0x8000,
+       .chan_offset_stride     = 0x80,
+};
+
+static const struct rcar_dmac_of_data rcar_v3u_dmac_data = {
+       .chan_offset_base       = 0x0,
+       .chan_offset_stride     = 0x1000,
 };
 
 static const struct of_device_id rcar_dmac_of_ids[] = {
        {
                .compatible = "renesas,rcar-dmac",
                .data = &rcar_dmac_data,
+       }, {
+               .compatible = "renesas,dmac-r8a779a0",
+               .data = &rcar_v3u_dmac_data,
        },
        { /* Sentinel */ }
 };
diff --git a/drivers/dma/sirf-dma.c b/drivers/dma/sirf-dma.c
deleted file mode 100644 (file)
index a5c2843..0000000
+++ /dev/null
@@ -1,1170 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * DMA controller driver for CSR SiRFprimaII
- *
- * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- */
-
-#include <linux/module.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/pm_runtime.h>
-#include <linux/interrupt.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/of_irq.h>
-#include <linux/of_address.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
-#include <linux/clk.h>
-#include <linux/of_dma.h>
-#include <linux/sirfsoc_dma.h>
-
-#include "dmaengine.h"
-
-#define SIRFSOC_DMA_VER_A7V1                    1
-#define SIRFSOC_DMA_VER_A7V2                    2
-#define SIRFSOC_DMA_VER_A6                      4
-
-#define SIRFSOC_DMA_DESCRIPTORS                 16
-#define SIRFSOC_DMA_CHANNELS                    16
-#define SIRFSOC_DMA_TABLE_NUM                   256
-
-#define SIRFSOC_DMA_CH_ADDR                     0x00
-#define SIRFSOC_DMA_CH_XLEN                     0x04
-#define SIRFSOC_DMA_CH_YLEN                     0x08
-#define SIRFSOC_DMA_CH_CTRL                     0x0C
-
-#define SIRFSOC_DMA_WIDTH_0                     0x100
-#define SIRFSOC_DMA_CH_VALID                    0x140
-#define SIRFSOC_DMA_CH_INT                      0x144
-#define SIRFSOC_DMA_INT_EN                      0x148
-#define SIRFSOC_DMA_INT_EN_CLR                  0x14C
-#define SIRFSOC_DMA_CH_LOOP_CTRL                0x150
-#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR            0x154
-#define SIRFSOC_DMA_WIDTH_ATLAS7                0x10
-#define SIRFSOC_DMA_VALID_ATLAS7                0x14
-#define SIRFSOC_DMA_INT_ATLAS7                  0x18
-#define SIRFSOC_DMA_INT_EN_ATLAS7               0x1c
-#define SIRFSOC_DMA_LOOP_CTRL_ATLAS7            0x20
-#define SIRFSOC_DMA_CUR_DATA_ADDR               0x34
-#define SIRFSOC_DMA_MUL_ATLAS7                  0x38
-#define SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7         0x158
-#define SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7     0x15C
-#define SIRFSOC_DMA_IOBG_SCMD_EN               0x800
-#define SIRFSOC_DMA_EARLY_RESP_SET             0x818
-#define SIRFSOC_DMA_EARLY_RESP_CLR             0x81C
-
-#define SIRFSOC_DMA_MODE_CTRL_BIT               4
-#define SIRFSOC_DMA_DIR_CTRL_BIT                5
-#define SIRFSOC_DMA_MODE_CTRL_BIT_ATLAS7        2
-#define SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7       3
-#define SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7         4
-#define SIRFSOC_DMA_TAB_NUM_ATLAS7              7
-#define SIRFSOC_DMA_CHAIN_INT_BIT_ATLAS7        5
-#define SIRFSOC_DMA_CHAIN_FLAG_SHIFT_ATLAS7     25
-#define SIRFSOC_DMA_CHAIN_ADDR_SHIFT            32
-
-#define SIRFSOC_DMA_INT_FINI_INT_ATLAS7         BIT(0)
-#define SIRFSOC_DMA_INT_CNT_INT_ATLAS7          BIT(1)
-#define SIRFSOC_DMA_INT_PAU_INT_ATLAS7          BIT(2)
-#define SIRFSOC_DMA_INT_LOOP_INT_ATLAS7         BIT(3)
-#define SIRFSOC_DMA_INT_INV_INT_ATLAS7          BIT(4)
-#define SIRFSOC_DMA_INT_END_INT_ATLAS7          BIT(5)
-#define SIRFSOC_DMA_INT_ALL_ATLAS7              0x3F
-
-/* xlen and dma_width register is in 4 bytes boundary */
-#define SIRFSOC_DMA_WORD_LEN                   4
-#define SIRFSOC_DMA_XLEN_MAX_V1         0x800
-#define SIRFSOC_DMA_XLEN_MAX_V2         0x1000
-
-struct sirfsoc_dma_desc {
-       struct dma_async_tx_descriptor  desc;
-       struct list_head                node;
-
-       /* SiRFprimaII 2D-DMA parameters */
-
-       int             xlen;           /* DMA xlen */
-       int             ylen;           /* DMA ylen */
-       int             width;          /* DMA width */
-       int             dir;
-       bool            cyclic;         /* is loop DMA? */
-       bool            chain;          /* is chain DMA? */
-       u32             addr;           /* DMA buffer address */
-       u64 chain_table[SIRFSOC_DMA_TABLE_NUM]; /* chain tbl */
-};
-
-struct sirfsoc_dma_chan {
-       struct dma_chan                 chan;
-       struct list_head                free;
-       struct list_head                prepared;
-       struct list_head                queued;
-       struct list_head                active;
-       struct list_head                completed;
-       unsigned long                   happened_cyclic;
-       unsigned long                   completed_cyclic;
-
-       /* Lock for this structure */
-       spinlock_t                      lock;
-
-       int                             mode;
-};
-
-struct sirfsoc_dma_regs {
-       u32                             ctrl[SIRFSOC_DMA_CHANNELS];
-       u32                             interrupt_en;
-};
-
-struct sirfsoc_dma {
-       struct dma_device               dma;
-       struct tasklet_struct           tasklet;
-       struct sirfsoc_dma_chan         channels[SIRFSOC_DMA_CHANNELS];
-       void __iomem                    *base;
-       int                             irq;
-       struct clk                      *clk;
-       int                             type;
-       void (*exec_desc)(struct sirfsoc_dma_desc *sdesc,
-               int cid, int burst_mode, void __iomem *base);
-       struct sirfsoc_dma_regs         regs_save;
-};
-
-struct sirfsoc_dmadata {
-       void (*exec)(struct sirfsoc_dma_desc *sdesc,
-               int cid, int burst_mode, void __iomem *base);
-       int type;
-};
-
-enum sirfsoc_dma_chain_flag {
-       SIRFSOC_DMA_CHAIN_NORMAL = 0x01,
-       SIRFSOC_DMA_CHAIN_PAUSE = 0x02,
-       SIRFSOC_DMA_CHAIN_LOOP = 0x03,
-       SIRFSOC_DMA_CHAIN_END = 0x04
-};
-
-#define DRV_NAME       "sirfsoc_dma"
-
-static int sirfsoc_dma_runtime_suspend(struct device *dev);
-
-/* Convert struct dma_chan to struct sirfsoc_dma_chan */
-static inline
-struct sirfsoc_dma_chan *dma_chan_to_sirfsoc_dma_chan(struct dma_chan *c)
-{
-       return container_of(c, struct sirfsoc_dma_chan, chan);
-}
-
-/* Convert struct dma_chan to struct sirfsoc_dma */
-static inline struct sirfsoc_dma *dma_chan_to_sirfsoc_dma(struct dma_chan *c)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(c);
-       return container_of(schan, struct sirfsoc_dma, channels[c->chan_id]);
-}
-
-static void sirfsoc_dma_execute_hw_a7v2(struct sirfsoc_dma_desc *sdesc,
-               int cid, int burst_mode, void __iomem *base)
-{
-       if (sdesc->chain) {
-               /* DMA v2 HW chain mode */
-               writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
-                              (sdesc->chain <<
-                               SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
-                              (0x8 << SIRFSOC_DMA_TAB_NUM_ATLAS7) | 0x3,
-                              base + SIRFSOC_DMA_CH_CTRL);
-       } else {
-               /* DMA v2 legacy mode */
-               writel_relaxed(sdesc->xlen, base + SIRFSOC_DMA_CH_XLEN);
-               writel_relaxed(sdesc->ylen, base + SIRFSOC_DMA_CH_YLEN);
-               writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_ATLAS7);
-               writel_relaxed((sdesc->width*((sdesc->ylen+1)>>1)),
-                               base + SIRFSOC_DMA_MUL_ATLAS7);
-               writel_relaxed((sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT_ATLAS7) |
-                              (sdesc->chain <<
-                               SIRFSOC_DMA_CHAIN_CTRL_BIT_ATLAS7) |
-                              0x3, base + SIRFSOC_DMA_CH_CTRL);
-       }
-       writel_relaxed(sdesc->chain ? SIRFSOC_DMA_INT_END_INT_ATLAS7 :
-                      (SIRFSOC_DMA_INT_FINI_INT_ATLAS7 |
-                       SIRFSOC_DMA_INT_LOOP_INT_ATLAS7),
-                      base + SIRFSOC_DMA_INT_EN_ATLAS7);
-       writel(sdesc->addr, base + SIRFSOC_DMA_CH_ADDR);
-       if (sdesc->cyclic)
-               writel(0x10001, base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
-}
-
-static void sirfsoc_dma_execute_hw_a7v1(struct sirfsoc_dma_desc *sdesc,
-               int cid, int burst_mode, void __iomem *base)
-{
-       writel_relaxed(1, base + SIRFSOC_DMA_IOBG_SCMD_EN);
-       writel_relaxed((1 << cid), base + SIRFSOC_DMA_EARLY_RESP_SET);
-       writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
-       writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
-                      (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
-                      base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
-       writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
-       writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
-       writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
-                      (1 << cid), base + SIRFSOC_DMA_INT_EN);
-       writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
-       if (sdesc->cyclic) {
-               writel((1 << cid) | 1 << (cid + 16) |
-                      readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7),
-                      base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
-       }
-
-}
-
-static void sirfsoc_dma_execute_hw_a6(struct sirfsoc_dma_desc *sdesc,
-               int cid, int burst_mode, void __iomem *base)
-{
-       writel_relaxed(sdesc->width, base + SIRFSOC_DMA_WIDTH_0 + cid * 4);
-       writel_relaxed(cid | (burst_mode << SIRFSOC_DMA_MODE_CTRL_BIT) |
-                      (sdesc->dir << SIRFSOC_DMA_DIR_CTRL_BIT),
-                      base + cid * 0x10 + SIRFSOC_DMA_CH_CTRL);
-       writel_relaxed(sdesc->xlen, base + cid * 0x10 + SIRFSOC_DMA_CH_XLEN);
-       writel_relaxed(sdesc->ylen, base + cid * 0x10 + SIRFSOC_DMA_CH_YLEN);
-       writel_relaxed(readl_relaxed(base + SIRFSOC_DMA_INT_EN) |
-                      (1 << cid), base + SIRFSOC_DMA_INT_EN);
-       writel(sdesc->addr >> 2, base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR);
-       if (sdesc->cyclic) {
-               writel((1 << cid) | 1 << (cid + 16) |
-                      readl_relaxed(base + SIRFSOC_DMA_CH_LOOP_CTRL),
-                      base + SIRFSOC_DMA_CH_LOOP_CTRL);
-       }
-
-}
-
-/* Execute all queued DMA descriptors */
-static void sirfsoc_dma_execute(struct sirfsoc_dma_chan *schan)
-{
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
-       int cid = schan->chan.chan_id;
-       struct sirfsoc_dma_desc *sdesc = NULL;
-       void __iomem *base;
-
-       /*
-        * lock has been held by functions calling this, so we don't hold
-        * lock again
-        */
-       base = sdma->base;
-       sdesc = list_first_entry(&schan->queued, struct sirfsoc_dma_desc,
-                                node);
-       /* Move the first queued descriptor to active list */
-       list_move_tail(&sdesc->node, &schan->active);
-
-       if (sdma->type == SIRFSOC_DMA_VER_A7V2)
-               cid = 0;
-
-       /* Start the DMA transfer */
-       sdma->exec_desc(sdesc, cid, schan->mode, base);
-
-       if (sdesc->cyclic)
-               schan->happened_cyclic = schan->completed_cyclic = 0;
-}
-
-/* Interrupt handler */
-static irqreturn_t sirfsoc_dma_irq(int irq, void *data)
-{
-       struct sirfsoc_dma *sdma = data;
-       struct sirfsoc_dma_chan *schan;
-       struct sirfsoc_dma_desc *sdesc = NULL;
-       u32 is;
-       bool chain;
-       int ch;
-       void __iomem *reg;
-
-       switch (sdma->type) {
-       case SIRFSOC_DMA_VER_A6:
-       case SIRFSOC_DMA_VER_A7V1:
-               is = readl(sdma->base + SIRFSOC_DMA_CH_INT);
-               reg = sdma->base + SIRFSOC_DMA_CH_INT;
-               while ((ch = fls(is) - 1) >= 0) {
-                       is &= ~(1 << ch);
-                       writel_relaxed(1 << ch, reg);
-                       schan = &sdma->channels[ch];
-                       spin_lock(&schan->lock);
-                       sdesc = list_first_entry(&schan->active,
-                                                struct sirfsoc_dma_desc, node);
-                       if (!sdesc->cyclic) {
-                               /* Execute queued descriptors */
-                               list_splice_tail_init(&schan->active,
-                                                     &schan->completed);
-                               dma_cookie_complete(&sdesc->desc);
-                               if (!list_empty(&schan->queued))
-                                       sirfsoc_dma_execute(schan);
-                       } else
-                               schan->happened_cyclic++;
-                       spin_unlock(&schan->lock);
-               }
-               break;
-
-       case SIRFSOC_DMA_VER_A7V2:
-               is = readl(sdma->base + SIRFSOC_DMA_INT_ATLAS7);
-
-               reg = sdma->base + SIRFSOC_DMA_INT_ATLAS7;
-               writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7, reg);
-               schan = &sdma->channels[0];
-               spin_lock(&schan->lock);
-               sdesc = list_first_entry(&schan->active,
-                                        struct sirfsoc_dma_desc, node);
-               if (!sdesc->cyclic) {
-                       chain = sdesc->chain;
-                       if ((chain && (is & SIRFSOC_DMA_INT_END_INT_ATLAS7)) ||
-                               (!chain &&
-                               (is & SIRFSOC_DMA_INT_FINI_INT_ATLAS7))) {
-                               /* Execute queued descriptors */
-                               list_splice_tail_init(&schan->active,
-                                                     &schan->completed);
-                               dma_cookie_complete(&sdesc->desc);
-                               if (!list_empty(&schan->queued))
-                                       sirfsoc_dma_execute(schan);
-                       }
-               } else if (sdesc->cyclic && (is &
-                                       SIRFSOC_DMA_INT_LOOP_INT_ATLAS7))
-                       schan->happened_cyclic++;
-
-               spin_unlock(&schan->lock);
-               break;
-
-       default:
-               break;
-       }
-
-       /* Schedule tasklet */
-       tasklet_schedule(&sdma->tasklet);
-
-       return IRQ_HANDLED;
-}
-
-/* process completed descriptors */
-static void sirfsoc_dma_process_completed(struct sirfsoc_dma *sdma)
-{
-       dma_cookie_t last_cookie = 0;
-       struct sirfsoc_dma_chan *schan;
-       struct sirfsoc_dma_desc *sdesc;
-       struct dma_async_tx_descriptor *desc;
-       unsigned long flags;
-       unsigned long happened_cyclic;
-       LIST_HEAD(list);
-       int i;
-
-       for (i = 0; i < sdma->dma.chancnt; i++) {
-               schan = &sdma->channels[i];
-
-               /* Get all completed descriptors */
-               spin_lock_irqsave(&schan->lock, flags);
-               if (!list_empty(&schan->completed)) {
-                       list_splice_tail_init(&schan->completed, &list);
-                       spin_unlock_irqrestore(&schan->lock, flags);
-
-                       /* Execute callbacks and run dependencies */
-                       list_for_each_entry(sdesc, &list, node) {
-                               desc = &sdesc->desc;
-
-                               dmaengine_desc_get_callback_invoke(desc, NULL);
-                               last_cookie = desc->cookie;
-                               dma_run_dependencies(desc);
-                       }
-
-                       /* Free descriptors */
-                       spin_lock_irqsave(&schan->lock, flags);
-                       list_splice_tail_init(&list, &schan->free);
-                       schan->chan.completed_cookie = last_cookie;
-                       spin_unlock_irqrestore(&schan->lock, flags);
-               } else {
-                       if (list_empty(&schan->active)) {
-                               spin_unlock_irqrestore(&schan->lock, flags);
-                               continue;
-                       }
-
-                       /* for cyclic channel, desc is always in active list */
-                       sdesc = list_first_entry(&schan->active,
-                               struct sirfsoc_dma_desc, node);
-
-                       /* cyclic DMA */
-                       happened_cyclic = schan->happened_cyclic;
-                       spin_unlock_irqrestore(&schan->lock, flags);
-
-                       desc = &sdesc->desc;
-                       while (happened_cyclic != schan->completed_cyclic) {
-                               dmaengine_desc_get_callback_invoke(desc, NULL);
-                               schan->completed_cyclic++;
-                       }
-               }
-       }
-}
-
-/* DMA Tasklet */
-static void sirfsoc_dma_tasklet(struct tasklet_struct *t)
-{
-       struct sirfsoc_dma *sdma = from_tasklet(sdma, t, tasklet);
-
-       sirfsoc_dma_process_completed(sdma);
-}
-
-/* Submit descriptor to hardware */
-static dma_cookie_t sirfsoc_dma_tx_submit(struct dma_async_tx_descriptor *txd)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(txd->chan);
-       struct sirfsoc_dma_desc *sdesc;
-       unsigned long flags;
-       dma_cookie_t cookie;
-
-       sdesc = container_of(txd, struct sirfsoc_dma_desc, desc);
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       /* Move descriptor to queue */
-       list_move_tail(&sdesc->node, &schan->queued);
-
-       cookie = dma_cookie_assign(txd);
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return cookie;
-}
-
-static int sirfsoc_dma_slave_config(struct dma_chan *chan,
-                                   struct dma_slave_config *config)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       unsigned long flags;
-
-       if ((config->src_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES) ||
-               (config->dst_addr_width != DMA_SLAVE_BUSWIDTH_4_BYTES))
-               return -EINVAL;
-
-       spin_lock_irqsave(&schan->lock, flags);
-       schan->mode = (config->src_maxburst == 4 ? 1 : 0);
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return 0;
-}
-
-static int sirfsoc_dma_terminate_all(struct dma_chan *chan)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
-       int cid = schan->chan.chan_id;
-       unsigned long flags;
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       switch (sdma->type) {
-       case SIRFSOC_DMA_VER_A7V1:
-               writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_INT_EN_CLR);
-               writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_INT);
-               writel_relaxed((1 << cid) | 1 << (cid + 16),
-                              sdma->base +
-                              SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
-               writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
-               break;
-       case SIRFSOC_DMA_VER_A7V2:
-               writel_relaxed(0, sdma->base + SIRFSOC_DMA_INT_EN_ATLAS7);
-               writel_relaxed(SIRFSOC_DMA_INT_ALL_ATLAS7,
-                              sdma->base + SIRFSOC_DMA_INT_ATLAS7);
-               writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
-               writel_relaxed(0, sdma->base + SIRFSOC_DMA_VALID_ATLAS7);
-               break;
-       case SIRFSOC_DMA_VER_A6:
-               writel_relaxed(readl_relaxed(sdma->base + SIRFSOC_DMA_INT_EN) &
-                              ~(1 << cid), sdma->base + SIRFSOC_DMA_INT_EN);
-               writel_relaxed(readl_relaxed(sdma->base +
-                                            SIRFSOC_DMA_CH_LOOP_CTRL) &
-                              ~((1 << cid) | 1 << (cid + 16)),
-                              sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-               writel_relaxed(1 << cid, sdma->base + SIRFSOC_DMA_CH_VALID);
-               break;
-       default:
-               break;
-       }
-
-       list_splice_tail_init(&schan->active, &schan->free);
-       list_splice_tail_init(&schan->queued, &schan->free);
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return 0;
-}
-
-static int sirfsoc_dma_pause_chan(struct dma_chan *chan)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
-       int cid = schan->chan.chan_id;
-       unsigned long flags;
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       switch (sdma->type) {
-       case SIRFSOC_DMA_VER_A7V1:
-               writel_relaxed((1 << cid) | 1 << (cid + 16),
-                              sdma->base +
-                              SIRFSOC_DMA_CH_LOOP_CTRL_CLR_ATLAS7);
-               break;
-       case SIRFSOC_DMA_VER_A7V2:
-               writel_relaxed(0, sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
-               break;
-       case SIRFSOC_DMA_VER_A6:
-               writel_relaxed(readl_relaxed(sdma->base +
-                                            SIRFSOC_DMA_CH_LOOP_CTRL) &
-                              ~((1 << cid) | 1 << (cid + 16)),
-                              sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-               break;
-
-       default:
-               break;
-       }
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return 0;
-}
-
-static int sirfsoc_dma_resume_chan(struct dma_chan *chan)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(&schan->chan);
-       int cid = schan->chan.chan_id;
-       unsigned long flags;
-
-       spin_lock_irqsave(&schan->lock, flags);
-       switch (sdma->type) {
-       case SIRFSOC_DMA_VER_A7V1:
-               writel_relaxed((1 << cid) | 1 << (cid + 16),
-                              sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL_ATLAS7);
-               break;
-       case SIRFSOC_DMA_VER_A7V2:
-               writel_relaxed(0x10001,
-                              sdma->base + SIRFSOC_DMA_LOOP_CTRL_ATLAS7);
-               break;
-       case SIRFSOC_DMA_VER_A6:
-               writel_relaxed(readl_relaxed(sdma->base +
-                                            SIRFSOC_DMA_CH_LOOP_CTRL) |
-                              ((1 << cid) | 1 << (cid + 16)),
-                              sdma->base + SIRFSOC_DMA_CH_LOOP_CTRL);
-               break;
-
-       default:
-               break;
-       }
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return 0;
-}
-
-/* Alloc channel resources */
-static int sirfsoc_dma_alloc_chan_resources(struct dma_chan *chan)
-{
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma_desc *sdesc;
-       unsigned long flags;
-       LIST_HEAD(descs);
-       int i;
-
-       pm_runtime_get_sync(sdma->dma.dev);
-
-       /* Alloc descriptors for this channel */
-       for (i = 0; i < SIRFSOC_DMA_DESCRIPTORS; i++) {
-               sdesc = kzalloc(sizeof(*sdesc), GFP_KERNEL);
-               if (!sdesc) {
-                       dev_notice(sdma->dma.dev, "Memory allocation error. "
-                               "Allocated only %u descriptors\n", i);
-                       break;
-               }
-
-               dma_async_tx_descriptor_init(&sdesc->desc, chan);
-               sdesc->desc.flags = DMA_CTRL_ACK;
-               sdesc->desc.tx_submit = sirfsoc_dma_tx_submit;
-
-               list_add_tail(&sdesc->node, &descs);
-       }
-
-       /* Return error only if no descriptors were allocated */
-       if (i == 0)
-               return -ENOMEM;
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       list_splice_tail_init(&descs, &schan->free);
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return i;
-}
-
-/* Free channel resources */
-static void sirfsoc_dma_free_chan_resources(struct dma_chan *chan)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
-       struct sirfsoc_dma_desc *sdesc, *tmp;
-       unsigned long flags;
-       LIST_HEAD(descs);
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       /* Channel must be idle */
-       BUG_ON(!list_empty(&schan->prepared));
-       BUG_ON(!list_empty(&schan->queued));
-       BUG_ON(!list_empty(&schan->active));
-       BUG_ON(!list_empty(&schan->completed));
-
-       /* Move data */
-       list_splice_tail_init(&schan->free, &descs);
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       /* Free descriptors */
-       list_for_each_entry_safe(sdesc, tmp, &descs, node)
-               kfree(sdesc);
-
-       pm_runtime_put(sdma->dma.dev);
-}
-
-/* Send pending descriptor to hardware */
-static void sirfsoc_dma_issue_pending(struct dma_chan *chan)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       unsigned long flags;
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       if (list_empty(&schan->active) && !list_empty(&schan->queued))
-               sirfsoc_dma_execute(schan);
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-}
-
-/* Check request completion status */
-static enum dma_status
-sirfsoc_dma_tx_status(struct dma_chan *chan, dma_cookie_t cookie,
-       struct dma_tx_state *txstate)
-{
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       unsigned long flags;
-       enum dma_status ret;
-       struct sirfsoc_dma_desc *sdesc;
-       int cid = schan->chan.chan_id;
-       unsigned long dma_pos;
-       unsigned long dma_request_bytes;
-       unsigned long residue;
-
-       spin_lock_irqsave(&schan->lock, flags);
-
-       if (list_empty(&schan->active)) {
-               ret = dma_cookie_status(chan, cookie, txstate);
-               dma_set_residue(txstate, 0);
-               spin_unlock_irqrestore(&schan->lock, flags);
-               return ret;
-       }
-       sdesc = list_first_entry(&schan->active, struct sirfsoc_dma_desc, node);
-       if (sdesc->cyclic)
-               dma_request_bytes = (sdesc->xlen + 1) * (sdesc->ylen + 1) *
-                       (sdesc->width * SIRFSOC_DMA_WORD_LEN);
-       else
-               dma_request_bytes = sdesc->xlen * SIRFSOC_DMA_WORD_LEN;
-
-       ret = dma_cookie_status(chan, cookie, txstate);
-
-       if (sdma->type == SIRFSOC_DMA_VER_A7V2)
-               cid = 0;
-
-       if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
-               dma_pos = readl_relaxed(sdma->base + SIRFSOC_DMA_CUR_DATA_ADDR);
-       } else {
-               dma_pos = readl_relaxed(
-                       sdma->base + cid * 0x10 + SIRFSOC_DMA_CH_ADDR) << 2;
-       }
-
-       residue = dma_request_bytes - (dma_pos - sdesc->addr);
-       dma_set_residue(txstate, residue);
-
-       spin_unlock_irqrestore(&schan->lock, flags);
-
-       return ret;
-}
-
-static struct dma_async_tx_descriptor *sirfsoc_dma_prep_interleaved(
-       struct dma_chan *chan, struct dma_interleaved_template *xt,
-       unsigned long flags)
-{
-       struct sirfsoc_dma *sdma = dma_chan_to_sirfsoc_dma(chan);
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma_desc *sdesc = NULL;
-       unsigned long iflags;
-       int ret;
-
-       if ((xt->dir != DMA_MEM_TO_DEV) && (xt->dir != DMA_DEV_TO_MEM)) {
-               ret = -EINVAL;
-               goto err_dir;
-       }
-
-       /* Get free descriptor */
-       spin_lock_irqsave(&schan->lock, iflags);
-       if (!list_empty(&schan->free)) {
-               sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
-                       node);
-               list_del(&sdesc->node);
-       }
-       spin_unlock_irqrestore(&schan->lock, iflags);
-
-       if (!sdesc) {
-               /* try to free completed descriptors */
-               sirfsoc_dma_process_completed(sdma);
-               ret = 0;
-               goto no_desc;
-       }
-
-       /* Place descriptor in prepared list */
-       spin_lock_irqsave(&schan->lock, iflags);
-
-       /*
-        * Number of chunks in a frame can only be 1 for prima2
-        * and ylen (number of frame - 1) must be at least 0
-        */
-       if ((xt->frame_size == 1) && (xt->numf > 0)) {
-               sdesc->cyclic = 0;
-               sdesc->xlen = xt->sgl[0].size / SIRFSOC_DMA_WORD_LEN;
-               sdesc->width = (xt->sgl[0].size + xt->sgl[0].icg) /
-                               SIRFSOC_DMA_WORD_LEN;
-               sdesc->ylen = xt->numf - 1;
-               if (xt->dir == DMA_MEM_TO_DEV) {
-                       sdesc->addr = xt->src_start;
-                       sdesc->dir = 1;
-               } else {
-                       sdesc->addr = xt->dst_start;
-                       sdesc->dir = 0;
-               }
-
-               list_add_tail(&sdesc->node, &schan->prepared);
-       } else {
-               pr_err("sirfsoc DMA Invalid xfer\n");
-               ret = -EINVAL;
-               goto err_xfer;
-       }
-       spin_unlock_irqrestore(&schan->lock, iflags);
-
-       return &sdesc->desc;
-err_xfer:
-       spin_unlock_irqrestore(&schan->lock, iflags);
-no_desc:
-err_dir:
-       return ERR_PTR(ret);
-}
-
-static struct dma_async_tx_descriptor *
-sirfsoc_dma_prep_cyclic(struct dma_chan *chan, dma_addr_t addr,
-       size_t buf_len, size_t period_len,
-       enum dma_transfer_direction direction, unsigned long flags)
-{
-       struct sirfsoc_dma_chan *schan = dma_chan_to_sirfsoc_dma_chan(chan);
-       struct sirfsoc_dma_desc *sdesc = NULL;
-       unsigned long iflags;
-
-       /*
-        * we only support cycle transfer with 2 period
-        * If the X-length is set to 0, it would be the loop mode.
-        * The DMA address keeps increasing until reaching the end of a loop
-        * area whose size is defined by (DMA_WIDTH x (Y_LENGTH + 1)). Then
-        * the DMA address goes back to the beginning of this area.
-        * In loop mode, the DMA data region is divided into two parts, BUFA
-        * and BUFB. DMA controller generates interrupts twice in each loop:
-        * when the DMA address reaches the end of BUFA or the end of the
-        * BUFB
-        */
-       if (buf_len !=  2 * period_len)
-               return ERR_PTR(-EINVAL);
-
-       /* Get free descriptor */
-       spin_lock_irqsave(&schan->lock, iflags);
-       if (!list_empty(&schan->free)) {
-               sdesc = list_first_entry(&schan->free, struct sirfsoc_dma_desc,
-                       node);
-               list_del(&sdesc->node);
-       }
-       spin_unlock_irqrestore(&schan->lock, iflags);
-
-       if (!sdesc)
-               return NULL;
-
-       /* Place descriptor in prepared list */
-       spin_lock_irqsave(&schan->lock, iflags);
-       sdesc->addr = addr;
-       sdesc->cyclic = 1;
-       sdesc->xlen = 0;
-       sdesc->ylen = buf_len / SIRFSOC_DMA_WORD_LEN - 1;
-       sdesc->width = 1;
-       list_add_tail(&sdesc->node, &schan->prepared);
-       spin_unlock_irqrestore(&schan->lock, iflags);
-
-       return &sdesc->desc;
-}
-
-/*
- * The DMA controller consists of 16 independent DMA channels.
- * Each channel is allocated to a different function
- */
-bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id)
-{
-       unsigned int ch_nr = (unsigned int) chan_id;
-
-       if (ch_nr == chan->chan_id +
-               chan->device->dev_id * SIRFSOC_DMA_CHANNELS)
-               return true;
-
-       return false;
-}
-EXPORT_SYMBOL(sirfsoc_dma_filter_id);
-
-#define SIRFSOC_DMA_BUSWIDTHS \
-       (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
-       BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
-       BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
-       BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
-       BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
-
-static struct dma_chan *of_dma_sirfsoc_xlate(struct of_phandle_args *dma_spec,
-       struct of_dma *ofdma)
-{
-       struct sirfsoc_dma *sdma = ofdma->of_dma_data;
-       unsigned int request = dma_spec->args[0];
-
-       if (request >= SIRFSOC_DMA_CHANNELS)
-               return NULL;
-
-       return dma_get_slave_channel(&sdma->channels[request].chan);
-}
-
-static int sirfsoc_dma_probe(struct platform_device *op)
-{
-       struct device_node *dn = op->dev.of_node;
-       struct device *dev = &op->dev;
-       struct dma_device *dma;
-       struct sirfsoc_dma *sdma;
-       struct sirfsoc_dma_chan *schan;
-       struct sirfsoc_dmadata *data;
-       struct resource res;
-       ulong regs_start, regs_size;
-       u32 id;
-       int ret, i;
-
-       sdma = devm_kzalloc(dev, sizeof(*sdma), GFP_KERNEL);
-       if (!sdma)
-               return -ENOMEM;
-
-       data = (struct sirfsoc_dmadata *)
-               (of_match_device(op->dev.driver->of_match_table,
-                                &op->dev)->data);
-       sdma->exec_desc = data->exec;
-       sdma->type = data->type;
-
-       if (of_property_read_u32(dn, "cell-index", &id)) {
-               dev_err(dev, "Fail to get DMAC index\n");
-               return -ENODEV;
-       }
-
-       sdma->irq = irq_of_parse_and_map(dn, 0);
-       if (!sdma->irq) {
-               dev_err(dev, "Error mapping IRQ!\n");
-               return -EINVAL;
-       }
-
-       sdma->clk = devm_clk_get(dev, NULL);
-       if (IS_ERR(sdma->clk)) {
-               dev_err(dev, "failed to get a clock.\n");
-               return PTR_ERR(sdma->clk);
-       }
-
-       ret = of_address_to_resource(dn, 0, &res);
-       if (ret) {
-               dev_err(dev, "Error parsing memory region!\n");
-               goto irq_dispose;
-       }
-
-       regs_start = res.start;
-       regs_size = resource_size(&res);
-
-       sdma->base = devm_ioremap(dev, regs_start, regs_size);
-       if (!sdma->base) {
-               dev_err(dev, "Error mapping memory region!\n");
-               ret = -ENOMEM;
-               goto irq_dispose;
-       }
-
-       ret = request_irq(sdma->irq, &sirfsoc_dma_irq, 0, DRV_NAME, sdma);
-       if (ret) {
-               dev_err(dev, "Error requesting IRQ!\n");
-               ret = -EINVAL;
-               goto irq_dispose;
-       }
-
-       dma = &sdma->dma;
-       dma->dev = dev;
-
-       dma->device_alloc_chan_resources = sirfsoc_dma_alloc_chan_resources;
-       dma->device_free_chan_resources = sirfsoc_dma_free_chan_resources;
-       dma->device_issue_pending = sirfsoc_dma_issue_pending;
-       dma->device_config = sirfsoc_dma_slave_config;
-       dma->device_pause = sirfsoc_dma_pause_chan;
-       dma->device_resume = sirfsoc_dma_resume_chan;
-       dma->device_terminate_all = sirfsoc_dma_terminate_all;
-       dma->device_tx_status = sirfsoc_dma_tx_status;
-       dma->device_prep_interleaved_dma = sirfsoc_dma_prep_interleaved;
-       dma->device_prep_dma_cyclic = sirfsoc_dma_prep_cyclic;
-       dma->src_addr_widths = SIRFSOC_DMA_BUSWIDTHS;
-       dma->dst_addr_widths = SIRFSOC_DMA_BUSWIDTHS;
-       dma->directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
-
-       INIT_LIST_HEAD(&dma->channels);
-       dma_cap_set(DMA_SLAVE, dma->cap_mask);
-       dma_cap_set(DMA_CYCLIC, dma->cap_mask);
-       dma_cap_set(DMA_INTERLEAVE, dma->cap_mask);
-       dma_cap_set(DMA_PRIVATE, dma->cap_mask);
-
-       for (i = 0; i < SIRFSOC_DMA_CHANNELS; i++) {
-               schan = &sdma->channels[i];
-
-               schan->chan.device = dma;
-               dma_cookie_init(&schan->chan);
-
-               INIT_LIST_HEAD(&schan->free);
-               INIT_LIST_HEAD(&schan->prepared);
-               INIT_LIST_HEAD(&schan->queued);
-               INIT_LIST_HEAD(&schan->active);
-               INIT_LIST_HEAD(&schan->completed);
-
-               spin_lock_init(&schan->lock);
-               list_add_tail(&schan->chan.device_node, &dma->channels);
-       }
-
-       tasklet_setup(&sdma->tasklet, sirfsoc_dma_tasklet);
-
-       /* Register DMA engine */
-       dev_set_drvdata(dev, sdma);
-
-       ret = dma_async_device_register(dma);
-       if (ret)
-               goto free_irq;
-
-       /* Device-tree DMA controller registration */
-       ret = of_dma_controller_register(dn, of_dma_sirfsoc_xlate, sdma);
-       if (ret) {
-               dev_err(dev, "failed to register DMA controller\n");
-               goto unreg_dma_dev;
-       }
-
-       pm_runtime_enable(&op->dev);
-       dev_info(dev, "initialized SIRFSOC DMAC driver\n");
-
-       return 0;
-
-unreg_dma_dev:
-       dma_async_device_unregister(dma);
-free_irq:
-       free_irq(sdma->irq, sdma);
-irq_dispose:
-       irq_dispose_mapping(sdma->irq);
-       return ret;
-}
-
-static int sirfsoc_dma_remove(struct platform_device *op)
-{
-       struct device *dev = &op->dev;
-       struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
-
-       of_dma_controller_free(op->dev.of_node);
-       dma_async_device_unregister(&sdma->dma);
-       free_irq(sdma->irq, sdma);
-       tasklet_kill(&sdma->tasklet);
-       irq_dispose_mapping(sdma->irq);
-       pm_runtime_disable(&op->dev);
-       if (!pm_runtime_status_suspended(&op->dev))
-               sirfsoc_dma_runtime_suspend(&op->dev);
-
-       return 0;
-}
-
-static int __maybe_unused sirfsoc_dma_runtime_suspend(struct device *dev)
-{
-       struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
-
-       clk_disable_unprepare(sdma->clk);
-       return 0;
-}
-
-static int __maybe_unused sirfsoc_dma_runtime_resume(struct device *dev)
-{
-       struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
-       int ret;
-
-       ret = clk_prepare_enable(sdma->clk);
-       if (ret < 0) {
-               dev_err(dev, "clk_enable failed: %d\n", ret);
-               return ret;
-       }
-       return 0;
-}
-
-static int __maybe_unused sirfsoc_dma_pm_suspend(struct device *dev)
-{
-       struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
-       struct sirfsoc_dma_regs *save = &sdma->regs_save;
-       struct sirfsoc_dma_chan *schan;
-       int ch;
-       int ret;
-       int count;
-       u32 int_offset;
-
-       /*
-        * if we were runtime-suspended before, resume to enable clock
-        * before accessing register
-        */
-       if (pm_runtime_status_suspended(dev)) {
-               ret = sirfsoc_dma_runtime_resume(dev);
-               if (ret < 0)
-                       return ret;
-       }
-
-       if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
-               count = 1;
-               int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
-       } else {
-               count = SIRFSOC_DMA_CHANNELS;
-               int_offset = SIRFSOC_DMA_INT_EN;
-       }
-
-       /*
-        * DMA controller will lose all registers while suspending
-        * so we need to save registers for active channels
-        */
-       for (ch = 0; ch < count; ch++) {
-               schan = &sdma->channels[ch];
-               if (list_empty(&schan->active))
-                       continue;
-               save->ctrl[ch] = readl_relaxed(sdma->base +
-                       ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
-       }
-       save->interrupt_en = readl_relaxed(sdma->base + int_offset);
-
-       /* Disable clock */
-       sirfsoc_dma_runtime_suspend(dev);
-
-       return 0;
-}
-
-static int __maybe_unused sirfsoc_dma_pm_resume(struct device *dev)
-{
-       struct sirfsoc_dma *sdma = dev_get_drvdata(dev);
-       struct sirfsoc_dma_regs *save = &sdma->regs_save;
-       struct sirfsoc_dma_desc *sdesc;
-       struct sirfsoc_dma_chan *schan;
-       int ch;
-       int ret;
-       int count;
-       u32 int_offset;
-       u32 width_offset;
-
-       /* Enable clock before accessing register */
-       ret = sirfsoc_dma_runtime_resume(dev);
-       if (ret < 0)
-               return ret;
-
-       if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
-               count = 1;
-               int_offset = SIRFSOC_DMA_INT_EN_ATLAS7;
-               width_offset = SIRFSOC_DMA_WIDTH_ATLAS7;
-       } else {
-               count = SIRFSOC_DMA_CHANNELS;
-               int_offset = SIRFSOC_DMA_INT_EN;
-               width_offset = SIRFSOC_DMA_WIDTH_0;
-       }
-
-       writel_relaxed(save->interrupt_en, sdma->base + int_offset);
-       for (ch = 0; ch < count; ch++) {
-               schan = &sdma->channels[ch];
-               if (list_empty(&schan->active))
-                       continue;
-               sdesc = list_first_entry(&schan->active,
-                       struct sirfsoc_dma_desc,
-                       node);
-               writel_relaxed(sdesc->width,
-                       sdma->base + width_offset + ch * 4);
-               writel_relaxed(sdesc->xlen,
-                       sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_XLEN);
-               writel_relaxed(sdesc->ylen,
-                       sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_YLEN);
-               writel_relaxed(save->ctrl[ch],
-                       sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_CTRL);
-               if (sdma->type == SIRFSOC_DMA_VER_A7V2) {
-                       writel_relaxed(sdesc->addr,
-                               sdma->base + SIRFSOC_DMA_CH_ADDR);
-               } else {
-                       writel_relaxed(sdesc->addr >> 2,
-                               sdma->base + ch * 0x10 + SIRFSOC_DMA_CH_ADDR);
-
-               }
-       }
-
-       /* if we were runtime-suspended before, suspend again */
-       if (pm_runtime_status_suspended(dev))
-               sirfsoc_dma_runtime_suspend(dev);
-
-       return 0;
-}
-
-static const struct dev_pm_ops sirfsoc_dma_pm_ops = {
-       SET_RUNTIME_PM_OPS(sirfsoc_dma_runtime_suspend, sirfsoc_dma_runtime_resume, NULL)
-       SET_SYSTEM_SLEEP_PM_OPS(sirfsoc_dma_pm_suspend, sirfsoc_dma_pm_resume)
-};
-
-static struct sirfsoc_dmadata sirfsoc_dmadata_a6 = {
-       .exec = sirfsoc_dma_execute_hw_a6,
-       .type = SIRFSOC_DMA_VER_A6,
-};
-
-static struct sirfsoc_dmadata sirfsoc_dmadata_a7v1 = {
-       .exec = sirfsoc_dma_execute_hw_a7v1,
-       .type = SIRFSOC_DMA_VER_A7V1,
-};
-
-static struct sirfsoc_dmadata sirfsoc_dmadata_a7v2 = {
-       .exec = sirfsoc_dma_execute_hw_a7v2,
-       .type = SIRFSOC_DMA_VER_A7V2,
-};
-
-static const struct of_device_id sirfsoc_dma_match[] = {
-       { .compatible = "sirf,prima2-dmac", .data = &sirfsoc_dmadata_a6,},
-       { .compatible = "sirf,atlas7-dmac", .data = &sirfsoc_dmadata_a7v1,},
-       { .compatible = "sirf,atlas7-dmac-v2", .data = &sirfsoc_dmadata_a7v2,},
-       {},
-};
-MODULE_DEVICE_TABLE(of, sirfsoc_dma_match);
-
-static struct platform_driver sirfsoc_dma_driver = {
-       .probe          = sirfsoc_dma_probe,
-       .remove         = sirfsoc_dma_remove,
-       .driver = {
-               .name = DRV_NAME,
-               .pm = &sirfsoc_dma_pm_ops,
-               .of_match_table = sirfsoc_dma_match,
-       },
-};
-
-static __init int sirfsoc_dma_init(void)
-{
-       return platform_driver_register(&sirfsoc_dma_driver);
-}
-
-static void __exit sirfsoc_dma_exit(void)
-{
-       platform_driver_unregister(&sirfsoc_dma_driver);
-}
-
-subsys_initcall(sirfsoc_dma_init);
-module_exit(sirfsoc_dma_exit);
-
-MODULE_AUTHOR("Rongjun Ying <rongjun.ying@csr.com>");
-MODULE_AUTHOR("Barry Song <baohua.song@csr.com>");
-MODULE_DESCRIPTION("SIRFSOC DMA control driver");
-MODULE_LICENSE("GPL v2");
index 4256e55..265d7c0 100644 (file)
@@ -78,7 +78,7 @@ static int dma40_memcpy_channels[] = {
        DB8500_DMA_MEMCPY_EV_5,
 };
 
-/* Default configuration for physcial memcpy */
+/* Default configuration for physical memcpy */
 static const struct stedma40_chan_cfg dma40_memcpy_conf_phy = {
        .mode = STEDMA40_MODE_PHYSICAL,
        .dir = DMA_MEM_TO_MEM,
index f474a12..96ad218 100644 (file)
@@ -121,6 +121,11 @@ struct udma_oes_offsets {
 #define UDMA_FLAG_PDMA_ACC32           BIT(0)
 #define UDMA_FLAG_PDMA_BURST           BIT(1)
 #define UDMA_FLAG_TDTYPE               BIT(2)
+#define UDMA_FLAG_BURST_SIZE           BIT(3)
+#define UDMA_FLAGS_J7_CLASS            (UDMA_FLAG_PDMA_ACC32 | \
+                                        UDMA_FLAG_PDMA_BURST | \
+                                        UDMA_FLAG_TDTYPE | \
+                                        UDMA_FLAG_BURST_SIZE)
 
 struct udma_match_data {
        enum k3_dma_type type;
@@ -128,6 +133,7 @@ struct udma_match_data {
        bool enable_memcpy_support;
        u32 flags;
        u32 statictr_z_mask;
+       u8 burst_size[3];
 };
 
 struct udma_soc_data {
@@ -436,6 +442,18 @@ static void k3_configure_chan_coherency(struct dma_chan *chan, u32 asel)
        }
 }
 
+static u8 udma_get_chan_tpl_index(struct udma_tpl *tpl_map, int chan_id)
+{
+       int i;
+
+       for (i = 0; i < tpl_map->levels; i++) {
+               if (chan_id >= tpl_map->start_idx[i])
+                       return i;
+       }
+
+       return 0;
+}
+
 static void udma_reset_uchan(struct udma_chan *uc)
 {
        memset(&uc->config, 0, sizeof(uc->config));
@@ -1811,13 +1829,21 @@ static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
        const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
        struct udma_tchan *tchan = uc->tchan;
        struct udma_rchan *rchan = uc->rchan;
-       int ret = 0;
+       u8 burst_size = 0;
+       int ret;
+       u8 tpl;
 
        /* Non synchronized - mem to mem type of transfer */
        int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
        struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
        struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
 
+       if (ud->match_data->flags & UDMA_FLAG_BURST_SIZE) {
+               tpl = udma_get_chan_tpl_index(&ud->tchan_tpl, tchan->id);
+
+               burst_size = ud->match_data->burst_size[tpl];
+       }
+
        req_tx.valid_params = TISCI_UDMA_TCHAN_VALID_PARAMS;
        req_tx.nav_id = tisci_rm->tisci_dev_id;
        req_tx.index = tchan->id;
@@ -1825,6 +1851,10 @@ static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
        req_tx.tx_fetch_size = sizeof(struct cppi5_desc_hdr_t) >> 2;
        req_tx.txcq_qnum = tc_ring;
        req_tx.tx_atype = ud->atype;
+       if (burst_size) {
+               req_tx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID;
+               req_tx.tx_burst_size = burst_size;
+       }
 
        ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
        if (ret) {
@@ -1839,6 +1869,10 @@ static int udma_tisci_m2m_channel_config(struct udma_chan *uc)
        req_rx.rxcq_qnum = tc_ring;
        req_rx.rx_chan_type = TI_SCI_RM_UDMAP_CHAN_TYPE_3RDP_BCOPY_PBRR;
        req_rx.rx_atype = ud->atype;
+       if (burst_size) {
+               req_rx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID;
+               req_rx.rx_burst_size = burst_size;
+       }
 
        ret = tisci_ops->rx_ch_cfg(tisci_rm->tisci, &req_rx);
        if (ret)
@@ -1854,12 +1888,24 @@ static int bcdma_tisci_m2m_channel_config(struct udma_chan *uc)
        const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
        struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
        struct udma_bchan *bchan = uc->bchan;
-       int ret = 0;
+       u8 burst_size = 0;
+       int ret;
+       u8 tpl;
+
+       if (ud->match_data->flags & UDMA_FLAG_BURST_SIZE) {
+               tpl = udma_get_chan_tpl_index(&ud->bchan_tpl, bchan->id);
+
+               burst_size = ud->match_data->burst_size[tpl];
+       }
 
        req_tx.valid_params = TISCI_BCDMA_BCHAN_VALID_PARAMS;
        req_tx.nav_id = tisci_rm->tisci_dev_id;
        req_tx.extended_ch_type = TI_SCI_RM_BCDMA_EXTENDED_CH_TYPE_BCHAN;
        req_tx.index = bchan->id;
+       if (burst_size) {
+               req_tx.valid_params |= TI_SCI_MSG_VALUE_RM_UDMAP_CH_BURST_SIZE_VALID;
+               req_tx.tx_burst_size = burst_size;
+       }
 
        ret = tisci_ops->tx_ch_cfg(tisci_rm->tisci, &req_tx);
        if (ret)
@@ -1877,7 +1923,7 @@ static int udma_tisci_tx_channel_config(struct udma_chan *uc)
        int tc_ring = k3_ringacc_get_ring_id(tchan->tc_ring);
        struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
        u32 mode, fetch_size;
-       int ret = 0;
+       int ret;
 
        if (uc->config.pkt_mode) {
                mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
@@ -1918,7 +1964,7 @@ static int bcdma_tisci_tx_channel_config(struct udma_chan *uc)
        const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
        struct udma_tchan *tchan = uc->tchan;
        struct ti_sci_msg_rm_udmap_tx_ch_cfg req_tx = { 0 };
-       int ret = 0;
+       int ret;
 
        req_tx.valid_params = TISCI_BCDMA_TCHAN_VALID_PARAMS;
        req_tx.nav_id = tisci_rm->tisci_dev_id;
@@ -1951,7 +1997,7 @@ static int udma_tisci_rx_channel_config(struct udma_chan *uc)
        struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
        struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 };
        u32 mode, fetch_size;
-       int ret = 0;
+       int ret;
 
        if (uc->config.pkt_mode) {
                mode = TI_SCI_RM_UDMAP_CHAN_TYPE_PKT_PBRR;
@@ -2028,7 +2074,7 @@ static int bcdma_tisci_rx_channel_config(struct udma_chan *uc)
        const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
        struct udma_rchan *rchan = uc->rchan;
        struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
-       int ret = 0;
+       int ret;
 
        req_rx.valid_params = TISCI_BCDMA_RCHAN_VALID_PARAMS;
        req_rx.nav_id = tisci_rm->tisci_dev_id;
@@ -2048,7 +2094,7 @@ static int pktdma_tisci_rx_channel_config(struct udma_chan *uc)
        const struct ti_sci_rm_udmap_ops *tisci_ops = tisci_rm->tisci_udmap_ops;
        struct ti_sci_msg_rm_udmap_rx_ch_cfg req_rx = { 0 };
        struct ti_sci_msg_rm_udmap_flow_cfg flow_req = { 0 };
-       int ret = 0;
+       int ret;
 
        req_rx.valid_params = TISCI_BCDMA_RCHAN_VALID_PARAMS;
        req_rx.nav_id = tisci_rm->tisci_dev_id;
@@ -4168,6 +4214,11 @@ static struct udma_match_data am654_main_data = {
        .psil_base = 0x1000,
        .enable_memcpy_support = true,
        .statictr_z_mask = GENMASK(11, 0),
+       .burst_size = {
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* H Channels */
+               0, /* No UH Channels */
+       },
 };
 
 static struct udma_match_data am654_mcu_data = {
@@ -4175,38 +4226,63 @@ static struct udma_match_data am654_mcu_data = {
        .psil_base = 0x6000,
        .enable_memcpy_support = false,
        .statictr_z_mask = GENMASK(11, 0),
+       .burst_size = {
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* H Channels */
+               0, /* No UH Channels */
+       },
 };
 
 static struct udma_match_data j721e_main_data = {
        .type = DMA_TYPE_UDMA,
        .psil_base = 0x1000,
        .enable_memcpy_support = true,
-       .flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST | UDMA_FLAG_TDTYPE,
+       .flags = UDMA_FLAGS_J7_CLASS,
        .statictr_z_mask = GENMASK(23, 0),
+       .burst_size = {
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES, /* H Channels */
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES, /* UH Channels */
+       },
 };
 
 static struct udma_match_data j721e_mcu_data = {
        .type = DMA_TYPE_UDMA,
        .psil_base = 0x6000,
        .enable_memcpy_support = false, /* MEM_TO_MEM is slow via MCU UDMA */
-       .flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST | UDMA_FLAG_TDTYPE,
+       .flags = UDMA_FLAGS_J7_CLASS,
        .statictr_z_mask = GENMASK(23, 0),
+       .burst_size = {
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES, /* H Channels */
+               0, /* No UH Channels */
+       },
 };
 
 static struct udma_match_data am64_bcdma_data = {
        .type = DMA_TYPE_BCDMA,
        .psil_base = 0x2000, /* for tchan and rchan, not applicable to bchan */
        .enable_memcpy_support = true, /* Supported via bchan */
-       .flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST | UDMA_FLAG_TDTYPE,
+       .flags = UDMA_FLAGS_J7_CLASS,
        .statictr_z_mask = GENMASK(23, 0),
+       .burst_size = {
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+               0, /* No H Channels */
+               0, /* No UH Channels */
+       },
 };
 
 static struct udma_match_data am64_pktdma_data = {
        .type = DMA_TYPE_PKTDMA,
        .psil_base = 0x1000,
        .enable_memcpy_support = false, /* PKTDMA does not support MEM_TO_MEM */
-       .flags = UDMA_FLAG_PDMA_ACC32 | UDMA_FLAG_PDMA_BURST | UDMA_FLAG_TDTYPE,
+       .flags = UDMA_FLAGS_J7_CLASS,
        .statictr_z_mask = GENMASK(23, 0),
+       .burst_size = {
+               TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES, /* Normal Channels */
+               0, /* No H Channels */
+               0, /* No UH Channels */
+       },
 };
 
 static const struct of_device_id udma_of_match[] = {
@@ -4306,6 +4382,7 @@ static int udma_get_mmrs(struct platform_device *pdev, struct udma_dev *ud)
                ud->bchan_cnt = BCDMA_CAP2_BCHAN_CNT(cap2);
                ud->tchan_cnt = BCDMA_CAP2_TCHAN_CNT(cap2);
                ud->rchan_cnt = BCDMA_CAP2_RCHAN_CNT(cap2);
+               ud->rflow_cnt = ud->rchan_cnt;
                break;
        case DMA_TYPE_PKTDMA:
                cap4 = udma_read(ud->mmrs[MMR_GCFG], 0x30);
@@ -5046,6 +5123,34 @@ static void udma_dbg_summary_show(struct seq_file *s,
 }
 #endif /* CONFIG_DEBUG_FS */
 
+static enum dmaengine_alignment udma_get_copy_align(struct udma_dev *ud)
+{
+       const struct udma_match_data *match_data = ud->match_data;
+       u8 tpl;
+
+       if (!match_data->enable_memcpy_support)
+               return DMAENGINE_ALIGN_8_BYTES;
+
+       /* Get the highest TPL level the device supports for memcpy */
+       if (ud->bchan_cnt)
+               tpl = udma_get_chan_tpl_index(&ud->bchan_tpl, 0);
+       else if (ud->tchan_cnt)
+               tpl = udma_get_chan_tpl_index(&ud->tchan_tpl, 0);
+       else
+               return DMAENGINE_ALIGN_8_BYTES;
+
+       switch (match_data->burst_size[tpl]) {
+       case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_256_BYTES:
+               return DMAENGINE_ALIGN_256_BYTES;
+       case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_128_BYTES:
+               return DMAENGINE_ALIGN_128_BYTES;
+       case TI_SCI_RM_UDMAP_CHAN_BURST_SIZE_64_BYTES:
+       fallthrough;
+       default:
+               return DMAENGINE_ALIGN_64_BYTES;
+       }
+}
+
 #define TI_UDMAC_BUSWIDTHS     (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
                                 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
                                 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \
@@ -5202,7 +5307,6 @@ static int udma_probe(struct platform_device *pdev)
        ud->ddev.dst_addr_widths = TI_UDMAC_BUSWIDTHS;
        ud->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
        ud->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
-       ud->ddev.copy_align = DMAENGINE_ALIGN_8_BYTES;
        ud->ddev.desc_metadata_modes = DESC_METADATA_CLIENT |
                                       DESC_METADATA_ENGINE;
        if (ud->match_data->enable_memcpy_support &&
@@ -5284,6 +5388,9 @@ static int udma_probe(struct platform_device *pdev)
                INIT_DELAYED_WORK(&uc->tx_drain.work, udma_check_tx_completion);
        }
 
+       /* Configure the copy_align to the maximum burst size the device supports */
+       ud->ddev.copy_align = udma_get_copy_align(ud);
+
        ret = dma_async_device_register(&ud->ddev);
        if (ret) {
                dev_err(dev, "failed to register slave DMA engine: %d\n", ret);
index 7977755..3aded78 100644 (file)
@@ -800,7 +800,7 @@ xilinx_dma_alloc_tx_descriptor(struct xilinx_dma_chan *chan)
 {
        struct xilinx_dma_tx_descriptor *desc;
 
-       desc = kzalloc(sizeof(*desc), GFP_KERNEL);
+       desc = kzalloc(sizeof(*desc), GFP_NOWAIT);
        if (!desc)
                return NULL;
 
diff --git a/drivers/dma/zx_dma.c b/drivers/dma/zx_dma.c
deleted file mode 100644 (file)
index b057582..0000000
+++ /dev/null
@@ -1,941 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright 2015 Linaro.
- */
-#include <linux/sched.h>
-#include <linux/device.h>
-#include <linux/dmaengine.h>
-#include <linux/dma-mapping.h>
-#include <linux/dmapool.h>
-#include <linux/init.h>
-#include <linux/interrupt.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-#include <linux/spinlock.h>
-#include <linux/of_device.h>
-#include <linux/of.h>
-#include <linux/clk.h>
-#include <linux/of_dma.h>
-
-#include "virt-dma.h"
-
-#define DRIVER_NAME            "zx-dma"
-#define DMA_ALIGN              4
-#define DMA_MAX_SIZE           (0x10000 - 512)
-#define LLI_BLOCK_SIZE         (4 * PAGE_SIZE)
-
-#define REG_ZX_SRC_ADDR                        0x00
-#define REG_ZX_DST_ADDR                        0x04
-#define REG_ZX_TX_X_COUNT              0x08
-#define REG_ZX_TX_ZY_COUNT             0x0c
-#define REG_ZX_SRC_ZY_STEP             0x10
-#define REG_ZX_DST_ZY_STEP             0x14
-#define REG_ZX_LLI_ADDR                        0x1c
-#define REG_ZX_CTRL                    0x20
-#define REG_ZX_TC_IRQ                  0x800
-#define REG_ZX_SRC_ERR_IRQ             0x804
-#define REG_ZX_DST_ERR_IRQ             0x808
-#define REG_ZX_CFG_ERR_IRQ             0x80c
-#define REG_ZX_TC_IRQ_RAW              0x810
-#define REG_ZX_SRC_ERR_IRQ_RAW         0x814
-#define REG_ZX_DST_ERR_IRQ_RAW         0x818
-#define REG_ZX_CFG_ERR_IRQ_RAW         0x81c
-#define REG_ZX_STATUS                  0x820
-#define REG_ZX_DMA_GRP_PRIO            0x824
-#define REG_ZX_DMA_ARB                 0x828
-
-#define ZX_FORCE_CLOSE                 BIT(31)
-#define ZX_DST_BURST_WIDTH(x)          (((x) & 0x7) << 13)
-#define ZX_MAX_BURST_LEN               16
-#define ZX_SRC_BURST_LEN(x)            (((x) & 0xf) << 9)
-#define ZX_SRC_BURST_WIDTH(x)          (((x) & 0x7) << 6)
-#define ZX_IRQ_ENABLE_ALL              (3 << 4)
-#define ZX_DST_FIFO_MODE               BIT(3)
-#define ZX_SRC_FIFO_MODE               BIT(2)
-#define ZX_SOFT_REQ                    BIT(1)
-#define ZX_CH_ENABLE                   BIT(0)
-
-#define ZX_DMA_BUSWIDTHS \
-       (BIT(DMA_SLAVE_BUSWIDTH_UNDEFINED) | \
-       BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \
-       BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \
-       BIT(DMA_SLAVE_BUSWIDTH_4_BYTES) | \
-       BIT(DMA_SLAVE_BUSWIDTH_8_BYTES))
-
-enum zx_dma_burst_width {
-       ZX_DMA_WIDTH_8BIT       = 0,
-       ZX_DMA_WIDTH_16BIT      = 1,
-       ZX_DMA_WIDTH_32BIT      = 2,
-       ZX_DMA_WIDTH_64BIT      = 3,
-};
-
-struct zx_desc_hw {
-       u32 saddr;
-       u32 daddr;
-       u32 src_x;
-       u32 src_zy;
-       u32 src_zy_step;
-       u32 dst_zy_step;
-       u32 reserved1;
-       u32 lli;
-       u32 ctr;
-       u32 reserved[7]; /* pack as hardware registers region size */
-} __aligned(32);
-
-struct zx_dma_desc_sw {
-       struct virt_dma_desc    vd;
-       dma_addr_t              desc_hw_lli;
-       size_t                  desc_num;
-       size_t                  size;
-       struct zx_desc_hw       *desc_hw;
-};
-
-struct zx_dma_phy;
-
-struct zx_dma_chan {
-       struct dma_slave_config slave_cfg;
-       int                     id; /* Request phy chan id */
-       u32                     ccfg;
-       u32                     cyclic;
-       struct virt_dma_chan    vc;
-       struct zx_dma_phy       *phy;
-       struct list_head        node;
-       dma_addr_t              dev_addr;
-       enum dma_status         status;
-};
-
-struct zx_dma_phy {
-       u32                     idx;
-       void __iomem            *base;
-       struct zx_dma_chan      *vchan;
-       struct zx_dma_desc_sw   *ds_run;
-       struct zx_dma_desc_sw   *ds_done;
-};
-
-struct zx_dma_dev {
-       struct dma_device       slave;
-       void __iomem            *base;
-       spinlock_t              lock; /* lock for ch and phy */
-       struct list_head        chan_pending;
-       struct zx_dma_phy       *phy;
-       struct zx_dma_chan      *chans;
-       struct clk              *clk;
-       struct dma_pool         *pool;
-       u32                     dma_channels;
-       u32                     dma_requests;
-       int                     irq;
-};
-
-#define to_zx_dma(dmadev) container_of(dmadev, struct zx_dma_dev, slave)
-
-static struct zx_dma_chan *to_zx_chan(struct dma_chan *chan)
-{
-       return container_of(chan, struct zx_dma_chan, vc.chan);
-}
-
-static void zx_dma_terminate_chan(struct zx_dma_phy *phy, struct zx_dma_dev *d)
-{
-       u32 val = 0;
-
-       val = readl_relaxed(phy->base + REG_ZX_CTRL);
-       val &= ~ZX_CH_ENABLE;
-       val |= ZX_FORCE_CLOSE;
-       writel_relaxed(val, phy->base + REG_ZX_CTRL);
-
-       val = 0x1 << phy->idx;
-       writel_relaxed(val, d->base + REG_ZX_TC_IRQ_RAW);
-       writel_relaxed(val, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
-       writel_relaxed(val, d->base + REG_ZX_DST_ERR_IRQ_RAW);
-       writel_relaxed(val, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
-}
-
-static void zx_dma_set_desc(struct zx_dma_phy *phy, struct zx_desc_hw *hw)
-{
-       writel_relaxed(hw->saddr, phy->base + REG_ZX_SRC_ADDR);
-       writel_relaxed(hw->daddr, phy->base + REG_ZX_DST_ADDR);
-       writel_relaxed(hw->src_x, phy->base + REG_ZX_TX_X_COUNT);
-       writel_relaxed(0, phy->base + REG_ZX_TX_ZY_COUNT);
-       writel_relaxed(0, phy->base + REG_ZX_SRC_ZY_STEP);
-       writel_relaxed(0, phy->base + REG_ZX_DST_ZY_STEP);
-       writel_relaxed(hw->lli, phy->base + REG_ZX_LLI_ADDR);
-       writel_relaxed(hw->ctr, phy->base + REG_ZX_CTRL);
-}
-
-static u32 zx_dma_get_curr_lli(struct zx_dma_phy *phy)
-{
-       return readl_relaxed(phy->base + REG_ZX_LLI_ADDR);
-}
-
-static u32 zx_dma_get_chan_stat(struct zx_dma_dev *d)
-{
-       return readl_relaxed(d->base + REG_ZX_STATUS);
-}
-
-static void zx_dma_init_state(struct zx_dma_dev *d)
-{
-       /* set same priority */
-       writel_relaxed(0x0, d->base + REG_ZX_DMA_ARB);
-       /* clear all irq */
-       writel_relaxed(0xffffffff, d->base + REG_ZX_TC_IRQ_RAW);
-       writel_relaxed(0xffffffff, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
-       writel_relaxed(0xffffffff, d->base + REG_ZX_DST_ERR_IRQ_RAW);
-       writel_relaxed(0xffffffff, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
-}
-
-static int zx_dma_start_txd(struct zx_dma_chan *c)
-{
-       struct zx_dma_dev *d = to_zx_dma(c->vc.chan.device);
-       struct virt_dma_desc *vd = vchan_next_desc(&c->vc);
-
-       if (!c->phy)
-               return -EAGAIN;
-
-       if (BIT(c->phy->idx) & zx_dma_get_chan_stat(d))
-               return -EAGAIN;
-
-       if (vd) {
-               struct zx_dma_desc_sw *ds =
-                       container_of(vd, struct zx_dma_desc_sw, vd);
-               /*
-                * fetch and remove request from vc->desc_issued
-                * so vc->desc_issued only contains desc pending
-                */
-               list_del(&ds->vd.node);
-               c->phy->ds_run = ds;
-               c->phy->ds_done = NULL;
-               /* start dma */
-               zx_dma_set_desc(c->phy, ds->desc_hw);
-               return 0;
-       }
-       c->phy->ds_done = NULL;
-       c->phy->ds_run = NULL;
-       return -EAGAIN;
-}
-
-static void zx_dma_task(struct zx_dma_dev *d)
-{
-       struct zx_dma_phy *p;
-       struct zx_dma_chan *c, *cn;
-       unsigned pch, pch_alloc = 0;
-       unsigned long flags;
-
-       /* check new dma request of running channel in vc->desc_issued */
-       list_for_each_entry_safe(c, cn, &d->slave.channels,
-                                vc.chan.device_node) {
-               spin_lock_irqsave(&c->vc.lock, flags);
-               p = c->phy;
-               if (p && p->ds_done && zx_dma_start_txd(c)) {
-                       /* No current txd associated with this channel */
-                       dev_dbg(d->slave.dev, "pchan %u: free\n", p->idx);
-                       /* Mark this channel free */
-                       c->phy = NULL;
-                       p->vchan = NULL;
-               }
-               spin_unlock_irqrestore(&c->vc.lock, flags);
-       }
-
-       /* check new channel request in d->chan_pending */
-       spin_lock_irqsave(&d->lock, flags);
-       while (!list_empty(&d->chan_pending)) {
-               c = list_first_entry(&d->chan_pending,
-                                    struct zx_dma_chan, node);
-               p = &d->phy[c->id];
-               if (!p->vchan) {
-                       /* remove from d->chan_pending */
-                       list_del_init(&c->node);
-                       pch_alloc |= 1 << c->id;
-                       /* Mark this channel allocated */
-                       p->vchan = c;
-                       c->phy = p;
-               } else {
-                       dev_dbg(d->slave.dev, "pchan %u: busy!\n", c->id);
-               }
-       }
-       spin_unlock_irqrestore(&d->lock, flags);
-
-       for (pch = 0; pch < d->dma_channels; pch++) {
-               if (pch_alloc & (1 << pch)) {
-                       p = &d->phy[pch];
-                       c = p->vchan;
-                       if (c) {
-                               spin_lock_irqsave(&c->vc.lock, flags);
-                               zx_dma_start_txd(c);
-                               spin_unlock_irqrestore(&c->vc.lock, flags);
-                       }
-               }
-       }
-}
-
-static irqreturn_t zx_dma_int_handler(int irq, void *dev_id)
-{
-       struct zx_dma_dev *d = (struct zx_dma_dev *)dev_id;
-       struct zx_dma_phy *p;
-       struct zx_dma_chan *c;
-       u32 tc = readl_relaxed(d->base + REG_ZX_TC_IRQ);
-       u32 serr = readl_relaxed(d->base + REG_ZX_SRC_ERR_IRQ);
-       u32 derr = readl_relaxed(d->base + REG_ZX_DST_ERR_IRQ);
-       u32 cfg = readl_relaxed(d->base + REG_ZX_CFG_ERR_IRQ);
-       u32 i, irq_chan = 0, task = 0;
-
-       while (tc) {
-               i = __ffs(tc);
-               tc &= ~BIT(i);
-               p = &d->phy[i];
-               c = p->vchan;
-               if (c) {
-                       spin_lock(&c->vc.lock);
-                       if (c->cyclic) {
-                               vchan_cyclic_callback(&p->ds_run->vd);
-                       } else {
-                               vchan_cookie_complete(&p->ds_run->vd);
-                               p->ds_done = p->ds_run;
-                               task = 1;
-                       }
-                       spin_unlock(&c->vc.lock);
-                       irq_chan |= BIT(i);
-               }
-       }
-
-       if (serr || derr || cfg)
-               dev_warn(d->slave.dev, "DMA ERR src 0x%x, dst 0x%x, cfg 0x%x\n",
-                        serr, derr, cfg);
-
-       writel_relaxed(irq_chan, d->base + REG_ZX_TC_IRQ_RAW);
-       writel_relaxed(serr, d->base + REG_ZX_SRC_ERR_IRQ_RAW);
-       writel_relaxed(derr, d->base + REG_ZX_DST_ERR_IRQ_RAW);
-       writel_relaxed(cfg, d->base + REG_ZX_CFG_ERR_IRQ_RAW);
-
-       if (task)
-               zx_dma_task(d);
-       return IRQ_HANDLED;
-}
-
-static void zx_dma_free_chan_resources(struct dma_chan *chan)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_dev *d = to_zx_dma(chan->device);
-       unsigned long flags;
-
-       spin_lock_irqsave(&d->lock, flags);
-       list_del_init(&c->node);
-       spin_unlock_irqrestore(&d->lock, flags);
-
-       vchan_free_chan_resources(&c->vc);
-       c->ccfg = 0;
-}
-
-static enum dma_status zx_dma_tx_status(struct dma_chan *chan,
-                                       dma_cookie_t cookie,
-                                       struct dma_tx_state *state)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_phy *p;
-       struct virt_dma_desc *vd;
-       unsigned long flags;
-       enum dma_status ret;
-       size_t bytes = 0;
-
-       ret = dma_cookie_status(&c->vc.chan, cookie, state);
-       if (ret == DMA_COMPLETE || !state)
-               return ret;
-
-       spin_lock_irqsave(&c->vc.lock, flags);
-       p = c->phy;
-       ret = c->status;
-
-       /*
-        * If the cookie is on our issue queue, then the residue is
-        * its total size.
-        */
-       vd = vchan_find_desc(&c->vc, cookie);
-       if (vd) {
-               bytes = container_of(vd, struct zx_dma_desc_sw, vd)->size;
-       } else if ((!p) || (!p->ds_run)) {
-               bytes = 0;
-       } else {
-               struct zx_dma_desc_sw *ds = p->ds_run;
-               u32 clli = 0, index = 0;
-
-               bytes = 0;
-               clli = zx_dma_get_curr_lli(p);
-               index = (clli - ds->desc_hw_lli) /
-                               sizeof(struct zx_desc_hw) + 1;
-               for (; index < ds->desc_num; index++) {
-                       bytes += ds->desc_hw[index].src_x;
-                       /* end of lli */
-                       if (!ds->desc_hw[index].lli)
-                               break;
-               }
-       }
-       spin_unlock_irqrestore(&c->vc.lock, flags);
-       dma_set_residue(state, bytes);
-       return ret;
-}
-
-static void zx_dma_issue_pending(struct dma_chan *chan)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_dev *d = to_zx_dma(chan->device);
-       unsigned long flags;
-       int issue = 0;
-
-       spin_lock_irqsave(&c->vc.lock, flags);
-       /* add request to vc->desc_issued */
-       if (vchan_issue_pending(&c->vc)) {
-               spin_lock(&d->lock);
-               if (!c->phy && list_empty(&c->node)) {
-                       /* if new channel, add chan_pending */
-                       list_add_tail(&c->node, &d->chan_pending);
-                       issue = 1;
-                       dev_dbg(d->slave.dev, "vchan %p: issued\n", &c->vc);
-               }
-               spin_unlock(&d->lock);
-       } else {
-               dev_dbg(d->slave.dev, "vchan %p: nothing to issue\n", &c->vc);
-       }
-       spin_unlock_irqrestore(&c->vc.lock, flags);
-
-       if (issue)
-               zx_dma_task(d);
-}
-
-static void zx_dma_fill_desc(struct zx_dma_desc_sw *ds, dma_addr_t dst,
-                            dma_addr_t src, size_t len, u32 num, u32 ccfg)
-{
-       if ((num + 1) < ds->desc_num)
-               ds->desc_hw[num].lli = ds->desc_hw_lli + (num + 1) *
-                       sizeof(struct zx_desc_hw);
-       ds->desc_hw[num].saddr = src;
-       ds->desc_hw[num].daddr = dst;
-       ds->desc_hw[num].src_x = len;
-       ds->desc_hw[num].ctr = ccfg;
-}
-
-static struct zx_dma_desc_sw *zx_alloc_desc_resource(int num,
-                                                    struct dma_chan *chan)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_desc_sw *ds;
-       struct zx_dma_dev *d = to_zx_dma(chan->device);
-       int lli_limit = LLI_BLOCK_SIZE / sizeof(struct zx_desc_hw);
-
-       if (num > lli_limit) {
-               dev_dbg(chan->device->dev, "vch %p: sg num %d exceed max %d\n",
-                       &c->vc, num, lli_limit);
-               return NULL;
-       }
-
-       ds = kzalloc(sizeof(*ds), GFP_ATOMIC);
-       if (!ds)
-               return NULL;
-
-       ds->desc_hw = dma_pool_zalloc(d->pool, GFP_NOWAIT, &ds->desc_hw_lli);
-       if (!ds->desc_hw) {
-               dev_dbg(chan->device->dev, "vch %p: dma alloc fail\n", &c->vc);
-               kfree(ds);
-               return NULL;
-       }
-       ds->desc_num = num;
-       return ds;
-}
-
-static enum zx_dma_burst_width zx_dma_burst_width(enum dma_slave_buswidth width)
-{
-       switch (width) {
-       case DMA_SLAVE_BUSWIDTH_1_BYTE:
-       case DMA_SLAVE_BUSWIDTH_2_BYTES:
-       case DMA_SLAVE_BUSWIDTH_4_BYTES:
-       case DMA_SLAVE_BUSWIDTH_8_BYTES:
-               return ffs(width) - 1;
-       default:
-               return ZX_DMA_WIDTH_32BIT;
-       }
-}
-
-static int zx_pre_config(struct zx_dma_chan *c, enum dma_transfer_direction dir)
-{
-       struct dma_slave_config *cfg = &c->slave_cfg;
-       enum zx_dma_burst_width src_width;
-       enum zx_dma_burst_width dst_width;
-       u32 maxburst = 0;
-
-       switch (dir) {
-       case DMA_MEM_TO_MEM:
-               c->ccfg = ZX_CH_ENABLE | ZX_SOFT_REQ
-                       | ZX_SRC_BURST_LEN(ZX_MAX_BURST_LEN - 1)
-                       | ZX_SRC_BURST_WIDTH(ZX_DMA_WIDTH_32BIT)
-                       | ZX_DST_BURST_WIDTH(ZX_DMA_WIDTH_32BIT);
-               break;
-       case DMA_MEM_TO_DEV:
-               c->dev_addr = cfg->dst_addr;
-               /* dst len is calculated from src width, len and dst width.
-                * We need make sure dst len not exceed MAX LEN.
-                * Trailing single transaction that does not fill a full
-                * burst also require identical src/dst data width.
-                */
-               dst_width = zx_dma_burst_width(cfg->dst_addr_width);
-               maxburst = cfg->dst_maxburst;
-               maxburst = maxburst < ZX_MAX_BURST_LEN ?
-                               maxburst : ZX_MAX_BURST_LEN;
-               c->ccfg = ZX_DST_FIFO_MODE | ZX_CH_ENABLE
-                       | ZX_SRC_BURST_LEN(maxburst - 1)
-                       | ZX_SRC_BURST_WIDTH(dst_width)
-                       | ZX_DST_BURST_WIDTH(dst_width);
-               break;
-       case DMA_DEV_TO_MEM:
-               c->dev_addr = cfg->src_addr;
-               src_width = zx_dma_burst_width(cfg->src_addr_width);
-               maxburst = cfg->src_maxburst;
-               maxburst = maxburst < ZX_MAX_BURST_LEN ?
-                               maxburst : ZX_MAX_BURST_LEN;
-               c->ccfg = ZX_SRC_FIFO_MODE | ZX_CH_ENABLE
-                       | ZX_SRC_BURST_LEN(maxburst - 1)
-                       | ZX_SRC_BURST_WIDTH(src_width)
-                       | ZX_DST_BURST_WIDTH(src_width);
-               break;
-       default:
-               return -EINVAL;
-       }
-       return 0;
-}
-
-static struct dma_async_tx_descriptor *zx_dma_prep_memcpy(
-       struct dma_chan *chan,  dma_addr_t dst, dma_addr_t src,
-       size_t len, unsigned long flags)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_desc_sw *ds;
-       size_t copy = 0;
-       int num = 0;
-
-       if (!len)
-               return NULL;
-
-       if (zx_pre_config(c, DMA_MEM_TO_MEM))
-               return NULL;
-
-       num = DIV_ROUND_UP(len, DMA_MAX_SIZE);
-
-       ds = zx_alloc_desc_resource(num, chan);
-       if (!ds)
-               return NULL;
-
-       ds->size = len;
-       num = 0;
-
-       do {
-               copy = min_t(size_t, len, DMA_MAX_SIZE);
-               zx_dma_fill_desc(ds, dst, src, copy, num++, c->ccfg);
-
-               src += copy;
-               dst += copy;
-               len -= copy;
-       } while (len);
-
-       c->cyclic = 0;
-       ds->desc_hw[num - 1].lli = 0;   /* end of link */
-       ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL;
-       return vchan_tx_prep(&c->vc, &ds->vd, flags);
-}
-
-static struct dma_async_tx_descriptor *zx_dma_prep_slave_sg(
-       struct dma_chan *chan, struct scatterlist *sgl, unsigned int sglen,
-       enum dma_transfer_direction dir, unsigned long flags, void *context)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_desc_sw *ds;
-       size_t len, avail, total = 0;
-       struct scatterlist *sg;
-       dma_addr_t addr, src = 0, dst = 0;
-       int num = sglen, i;
-
-       if (!sgl)
-               return NULL;
-
-       if (zx_pre_config(c, dir))
-               return NULL;
-
-       for_each_sg(sgl, sg, sglen, i) {
-               avail = sg_dma_len(sg);
-               if (avail > DMA_MAX_SIZE)
-                       num += DIV_ROUND_UP(avail, DMA_MAX_SIZE) - 1;
-       }
-
-       ds = zx_alloc_desc_resource(num, chan);
-       if (!ds)
-               return NULL;
-
-       c->cyclic = 0;
-       num = 0;
-       for_each_sg(sgl, sg, sglen, i) {
-               addr = sg_dma_address(sg);
-               avail = sg_dma_len(sg);
-               total += avail;
-
-               do {
-                       len = min_t(size_t, avail, DMA_MAX_SIZE);
-
-                       if (dir == DMA_MEM_TO_DEV) {
-                               src = addr;
-                               dst = c->dev_addr;
-                       } else if (dir == DMA_DEV_TO_MEM) {
-                               src = c->dev_addr;
-                               dst = addr;
-                       }
-
-                       zx_dma_fill_desc(ds, dst, src, len, num++, c->ccfg);
-
-                       addr += len;
-                       avail -= len;
-               } while (avail);
-       }
-
-       ds->desc_hw[num - 1].lli = 0;   /* end of link */
-       ds->desc_hw[num - 1].ctr |= ZX_IRQ_ENABLE_ALL;
-       ds->size = total;
-       return vchan_tx_prep(&c->vc, &ds->vd, flags);
-}
-
-static struct dma_async_tx_descriptor *zx_dma_prep_dma_cyclic(
-               struct dma_chan *chan, dma_addr_t dma_addr, size_t buf_len,
-               size_t period_len, enum dma_transfer_direction dir,
-               unsigned long flags)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_desc_sw *ds;
-       dma_addr_t src = 0, dst = 0;
-       int num_periods = buf_len / period_len;
-       int buf = 0, num = 0;
-
-       if (period_len > DMA_MAX_SIZE) {
-               dev_err(chan->device->dev, "maximum period size exceeded\n");
-               return NULL;
-       }
-
-       if (zx_pre_config(c, dir))
-               return NULL;
-
-       ds = zx_alloc_desc_resource(num_periods, chan);
-       if (!ds)
-               return NULL;
-       c->cyclic = 1;
-
-       while (buf < buf_len) {
-               if (dir == DMA_MEM_TO_DEV) {
-                       src = dma_addr;
-                       dst = c->dev_addr;
-               } else if (dir == DMA_DEV_TO_MEM) {
-                       src = c->dev_addr;
-                       dst = dma_addr;
-               }
-               zx_dma_fill_desc(ds, dst, src, period_len, num++,
-                                c->ccfg | ZX_IRQ_ENABLE_ALL);
-               dma_addr += period_len;
-               buf += period_len;
-       }
-
-       ds->desc_hw[num - 1].lli = ds->desc_hw_lli;
-       ds->size = buf_len;
-       return vchan_tx_prep(&c->vc, &ds->vd, flags);
-}
-
-static int zx_dma_config(struct dma_chan *chan,
-                        struct dma_slave_config *cfg)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-
-       if (!cfg)
-               return -EINVAL;
-
-       memcpy(&c->slave_cfg, cfg, sizeof(*cfg));
-
-       return 0;
-}
-
-static int zx_dma_terminate_all(struct dma_chan *chan)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       struct zx_dma_dev *d = to_zx_dma(chan->device);
-       struct zx_dma_phy *p = c->phy;
-       unsigned long flags;
-       LIST_HEAD(head);
-
-       dev_dbg(d->slave.dev, "vchan %p: terminate all\n", &c->vc);
-
-       /* Prevent this channel being scheduled */
-       spin_lock(&d->lock);
-       list_del_init(&c->node);
-       spin_unlock(&d->lock);
-
-       /* Clear the tx descriptor lists */
-       spin_lock_irqsave(&c->vc.lock, flags);
-       vchan_get_all_descriptors(&c->vc, &head);
-       if (p) {
-               /* vchan is assigned to a pchan - stop the channel */
-               zx_dma_terminate_chan(p, d);
-               c->phy = NULL;
-               p->vchan = NULL;
-               p->ds_run = NULL;
-               p->ds_done = NULL;
-       }
-       spin_unlock_irqrestore(&c->vc.lock, flags);
-       vchan_dma_desc_free_list(&c->vc, &head);
-
-       return 0;
-}
-
-static int zx_dma_transfer_pause(struct dma_chan *chan)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       u32 val = 0;
-
-       val = readl_relaxed(c->phy->base + REG_ZX_CTRL);
-       val &= ~ZX_CH_ENABLE;
-       writel_relaxed(val, c->phy->base + REG_ZX_CTRL);
-
-       return 0;
-}
-
-static int zx_dma_transfer_resume(struct dma_chan *chan)
-{
-       struct zx_dma_chan *c = to_zx_chan(chan);
-       u32 val = 0;
-
-       val = readl_relaxed(c->phy->base + REG_ZX_CTRL);
-       val |= ZX_CH_ENABLE;
-       writel_relaxed(val, c->phy->base + REG_ZX_CTRL);
-
-       return 0;
-}
-
-static void zx_dma_free_desc(struct virt_dma_desc *vd)
-{
-       struct zx_dma_desc_sw *ds =
-               container_of(vd, struct zx_dma_desc_sw, vd);
-       struct zx_dma_dev *d = to_zx_dma(vd->tx.chan->device);
-
-       dma_pool_free(d->pool, ds->desc_hw, ds->desc_hw_lli);
-       kfree(ds);
-}
-
-static const struct of_device_id zx6702_dma_dt_ids[] = {
-       { .compatible = "zte,zx296702-dma", },
-       {}
-};
-MODULE_DEVICE_TABLE(of, zx6702_dma_dt_ids);
-
-static struct dma_chan *zx_of_dma_simple_xlate(struct of_phandle_args *dma_spec,
-                                              struct of_dma *ofdma)
-{
-       struct zx_dma_dev *d = ofdma->of_dma_data;
-       unsigned int request = dma_spec->args[0];
-       struct dma_chan *chan;
-       struct zx_dma_chan *c;
-
-       if (request >= d->dma_requests)
-               return NULL;
-
-       chan = dma_get_any_slave_channel(&d->slave);
-       if (!chan) {
-               dev_err(d->slave.dev, "get channel fail in %s.\n", __func__);
-               return NULL;
-       }
-       c = to_zx_chan(chan);
-       c->id = request;
-       dev_info(d->slave.dev, "zx_dma: pchan %u: alloc vchan %p\n",
-                c->id, &c->vc);
-       return chan;
-}
-
-static int zx_dma_probe(struct platform_device *op)
-{
-       struct zx_dma_dev *d;
-       int i, ret = 0;
-
-       d = devm_kzalloc(&op->dev, sizeof(*d), GFP_KERNEL);
-       if (!d)
-               return -ENOMEM;
-
-       d->base = devm_platform_ioremap_resource(op, 0);
-       if (IS_ERR(d->base))
-               return PTR_ERR(d->base);
-
-       of_property_read_u32((&op->dev)->of_node,
-                            "dma-channels", &d->dma_channels);
-       of_property_read_u32((&op->dev)->of_node,
-                            "dma-requests", &d->dma_requests);
-       if (!d->dma_requests || !d->dma_channels)
-               return -EINVAL;
-
-       d->clk = devm_clk_get(&op->dev, NULL);
-       if (IS_ERR(d->clk)) {
-               dev_err(&op->dev, "no dma clk\n");
-               return PTR_ERR(d->clk);
-       }
-
-       d->irq = platform_get_irq(op, 0);
-       ret = devm_request_irq(&op->dev, d->irq, zx_dma_int_handler,
-                              0, DRIVER_NAME, d);
-       if (ret)
-               return ret;
-
-       /* A DMA memory pool for LLIs, align on 32-byte boundary */
-       d->pool = dmam_pool_create(DRIVER_NAME, &op->dev,
-                       LLI_BLOCK_SIZE, 32, 0);
-       if (!d->pool)
-               return -ENOMEM;
-
-       /* init phy channel */
-       d->phy = devm_kcalloc(&op->dev,
-               d->dma_channels, sizeof(struct zx_dma_phy), GFP_KERNEL);
-       if (!d->phy)
-               return -ENOMEM;
-
-       for (i = 0; i < d->dma_channels; i++) {
-               struct zx_dma_phy *p = &d->phy[i];
-
-               p->idx = i;
-               p->base = d->base + i * 0x40;
-       }
-
-       INIT_LIST_HEAD(&d->slave.channels);
-       dma_cap_set(DMA_SLAVE, d->slave.cap_mask);
-       dma_cap_set(DMA_MEMCPY, d->slave.cap_mask);
-       dma_cap_set(DMA_CYCLIC, d->slave.cap_mask);
-       dma_cap_set(DMA_PRIVATE, d->slave.cap_mask);
-       d->slave.dev = &op->dev;
-       d->slave.device_free_chan_resources = zx_dma_free_chan_resources;
-       d->slave.device_tx_status = zx_dma_tx_status;
-       d->slave.device_prep_dma_memcpy = zx_dma_prep_memcpy;
-       d->slave.device_prep_slave_sg = zx_dma_prep_slave_sg;
-       d->slave.device_prep_dma_cyclic = zx_dma_prep_dma_cyclic;
-       d->slave.device_issue_pending = zx_dma_issue_pending;
-       d->slave.device_config = zx_dma_config;
-       d->slave.device_terminate_all = zx_dma_terminate_all;
-       d->slave.device_pause = zx_dma_transfer_pause;
-       d->slave.device_resume = zx_dma_transfer_resume;
-       d->slave.copy_align = DMA_ALIGN;
-       d->slave.src_addr_widths = ZX_DMA_BUSWIDTHS;
-       d->slave.dst_addr_widths = ZX_DMA_BUSWIDTHS;
-       d->slave.directions = BIT(DMA_MEM_TO_MEM) | BIT(DMA_MEM_TO_DEV)
-                       | BIT(DMA_DEV_TO_MEM);
-       d->slave.residue_granularity = DMA_RESIDUE_GRANULARITY_SEGMENT;
-
-       /* init virtual channel */
-       d->chans = devm_kcalloc(&op->dev,
-               d->dma_requests, sizeof(struct zx_dma_chan), GFP_KERNEL);
-       if (!d->chans)
-               return -ENOMEM;
-
-       for (i = 0; i < d->dma_requests; i++) {
-               struct zx_dma_chan *c = &d->chans[i];
-
-               c->status = DMA_IN_PROGRESS;
-               INIT_LIST_HEAD(&c->node);
-               c->vc.desc_free = zx_dma_free_desc;
-               vchan_init(&c->vc, &d->slave);
-       }
-
-       /* Enable clock before accessing registers */
-       ret = clk_prepare_enable(d->clk);
-       if (ret < 0) {
-               dev_err(&op->dev, "clk_prepare_enable failed: %d\n", ret);
-               goto zx_dma_out;
-       }
-
-       zx_dma_init_state(d);
-
-       spin_lock_init(&d->lock);
-       INIT_LIST_HEAD(&d->chan_pending);
-       platform_set_drvdata(op, d);
-
-       ret = dma_async_device_register(&d->slave);
-       if (ret)
-               goto clk_dis;
-
-       ret = of_dma_controller_register((&op->dev)->of_node,
-                                        zx_of_dma_simple_xlate, d);
-       if (ret)
-               goto of_dma_register_fail;
-
-       dev_info(&op->dev, "initialized\n");
-       return 0;
-
-of_dma_register_fail:
-       dma_async_device_unregister(&d->slave);
-clk_dis:
-       clk_disable_unprepare(d->clk);
-zx_dma_out:
-       return ret;
-}
-
-static int zx_dma_remove(struct platform_device *op)
-{
-       struct zx_dma_chan *c, *cn;
-       struct zx_dma_dev *d = platform_get_drvdata(op);
-
-       /* explictly free the irq */
-       devm_free_irq(&op->dev, d->irq, d);
-
-       dma_async_device_unregister(&d->slave);
-       of_dma_controller_free((&op->dev)->of_node);
-
-       list_for_each_entry_safe(c, cn, &d->slave.channels,
-                                vc.chan.device_node) {
-               list_del(&c->vc.chan.device_node);
-       }
-       clk_disable_unprepare(d->clk);
-
-       return 0;
-}
-
-#ifdef CONFIG_PM_SLEEP
-static int zx_dma_suspend_dev(struct device *dev)
-{
-       struct zx_dma_dev *d = dev_get_drvdata(dev);
-       u32 stat = 0;
-
-       stat = zx_dma_get_chan_stat(d);
-       if (stat) {
-               dev_warn(d->slave.dev,
-                        "chan %d is running fail to suspend\n", stat);
-               return -1;
-       }
-       clk_disable_unprepare(d->clk);
-       return 0;
-}
-
-static int zx_dma_resume_dev(struct device *dev)
-{
-       struct zx_dma_dev *d = dev_get_drvdata(dev);
-       int ret = 0;
-
-       ret = clk_prepare_enable(d->clk);
-       if (ret < 0) {
-               dev_err(d->slave.dev, "clk_prepare_enable failed: %d\n", ret);
-               return ret;
-       }
-       zx_dma_init_state(d);
-       return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(zx_dma_pmops, zx_dma_suspend_dev, zx_dma_resume_dev);
-
-static struct platform_driver zx_pdma_driver = {
-       .driver         = {
-               .name   = DRIVER_NAME,
-               .pm     = &zx_dma_pmops,
-               .of_match_table = zx6702_dma_dt_ids,
-       },
-       .probe          = zx_dma_probe,
-       .remove         = zx_dma_remove,
-};
-
-module_platform_driver(zx_pdma_driver);
-
-MODULE_DESCRIPTION("ZTE ZX296702 DMA Driver");
-MODULE_AUTHOR("Jun Nie jun.nie@linaro.org");
-MODULE_LICENSE("GPL v2");
index 0a6438c..e7a9561 100644 (file)
@@ -1241,6 +1241,7 @@ int extcon_dev_register(struct extcon_dev *edev)
                                sizeof(*edev->nh), GFP_KERNEL);
        if (!edev->nh) {
                ret = -ENOMEM;
+               device_unregister(&edev->dev);
                goto err_dev;
        }
 
index 80db43a..6821698 100644 (file)
@@ -192,7 +192,9 @@ static int fw_unit_remove(struct device *dev)
        struct fw_driver *driver =
                        container_of(dev->driver, struct fw_driver, driver);
 
-       return driver->remove(fw_unit(dev)), 0;
+       driver->remove(fw_unit(dev));
+
+       return 0;
 }
 
 static int get_modalias(struct fw_unit *unit, char *buffer, size_t buffer_size)
index 5fd6a60..88ed971 100644 (file)
@@ -346,6 +346,7 @@ nosy_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
        struct client *client = file->private_data;
        spinlock_t *client_list_lock = &client->lynx->client_list_lock;
        struct nosy_stats stats;
+       int ret;
 
        switch (cmd) {
        case NOSY_IOC_GET_STATS:
@@ -360,11 +361,15 @@ nosy_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                        return 0;
 
        case NOSY_IOC_START:
+               ret = -EBUSY;
                spin_lock_irq(client_list_lock);
-               list_add_tail(&client->link, &client->lynx->client_list);
+               if (list_empty(&client->link)) {
+                       list_add_tail(&client->link, &client->lynx->client_list);
+                       ret = 0;
+               }
                spin_unlock_irq(client_list_lock);
 
-               return 0;
+               return ret;
 
        case NOSY_IOC_STOP:
                spin_lock_irq(client_list_lock);
index df3f9bc..4b7ee3f 100644 (file)
@@ -927,7 +927,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
        }
 
        /* first try to find a slot in an existing linked list entry */
-       for (prsv = efi_memreserve_root->next; prsv; prsv = rsv->next) {
+       for (prsv = efi_memreserve_root->next; prsv; ) {
                rsv = memremap(prsv, sizeof(*rsv), MEMREMAP_WB);
                index = atomic_fetch_add_unless(&rsv->count, 1, rsv->size);
                if (index < rsv->size) {
@@ -937,6 +937,7 @@ int __ref efi_mem_reserve_persistent(phys_addr_t addr, u64 size)
                        memunmap(rsv);
                        return efi_mem_reserve_iomem(addr, size);
                }
+               prsv = rsv->next;
                memunmap(rsv);
        }
 
index 8a94388..c23466e 100644 (file)
@@ -38,6 +38,8 @@ KBUILD_CFLAGS                 := $(cflags-y) -Os -DDISABLE_BRANCH_PROFILING \
 
 # remove SCS flags from all objects in this directory
 KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_SCS), $(KBUILD_CFLAGS))
+# disable LTO
+KBUILD_CFLAGS := $(filter-out $(CC_FLAGS_LTO), $(KBUILD_CFLAGS))
 
 GCOV_PROFILE                   := n
 # Sanitizer runtimes are unavailable and cannot be linked here.
index b69d631..7bf0a7a 100644 (file)
@@ -24,7 +24,7 @@ efi_status_t check_platform_features(void)
                return EFI_SUCCESS;
 
        tg = (read_cpuid(ID_AA64MMFR0_EL1) >> ID_AA64MMFR0_TGRAN_SHIFT) & 0xf;
-       if (tg != ID_AA64MMFR0_TGRAN_SUPPORTED) {
+       if (tg < ID_AA64MMFR0_TGRAN_SUPPORTED_MIN || tg > ID_AA64MMFR0_TGRAN_SUPPORTED_MAX) {
                if (IS_ENABLED(CONFIG_ARM64_64K_PAGES))
                        efi_err("This 64 KB granular kernel is not supported by your CPU\n");
                else
index ec2f398..26e6978 100644 (file)
@@ -96,6 +96,18 @@ static void install_memreserve_table(void)
                efi_err("Failed to install memreserve config table!\n");
 }
 
+static u32 get_supported_rt_services(void)
+{
+       const efi_rt_properties_table_t *rt_prop_table;
+       u32 supported = EFI_RT_SUPPORTED_ALL;
+
+       rt_prop_table = get_efi_config_table(EFI_RT_PROPERTIES_TABLE_GUID);
+       if (rt_prop_table)
+               supported &= rt_prop_table->runtime_services_supported;
+
+       return supported;
+}
+
 /*
  * EFI entry point for the arm/arm64 EFI stubs.  This is the entrypoint
  * that is described in the PE/COFF header.  Most of the code is the same
@@ -250,6 +262,10 @@ efi_status_t __efiapi efi_pe_entry(efi_handle_t handle,
                          (prop_tbl->memory_protection_attribute &
                           EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA);
 
+       /* force efi_novamap if SetVirtualAddressMap() is unsupported */
+       efi_novamap |= !(get_supported_rt_services() &
+                        EFI_RT_SUPPORTED_SET_VIRTUAL_ADDRESS_MAP);
+
        /* hibernation expects the runtime regions to stay in the same place */
        if (!IS_ENABLED(CONFIG_HIBERNATION) && !efi_nokaslr && !flat_va_mapping) {
                /*
index 41c1d00..abdc8a6 100644 (file)
@@ -485,6 +485,10 @@ int efivar_init(int (*func)(efi_char16_t *, efi_guid_t, unsigned long, void *),
                        }
 
                        break;
+               case EFI_UNSUPPORTED:
+                       err = -EOPNOTSUPP;
+                       status = EFI_NOT_FOUND;
+                       break;
                case EFI_NOT_FOUND:
                        break;
                default:
index 0205987..dc83ea1 100644 (file)
@@ -46,14 +46,13 @@ static int coreboot_bus_probe(struct device *dev)
 
 static int coreboot_bus_remove(struct device *dev)
 {
-       int ret = 0;
        struct coreboot_device *device = CB_DEV(dev);
        struct coreboot_driver *driver = CB_DRV(dev->driver);
 
        if (driver->remove)
-               ret = driver->remove(device);
+               driver->remove(device);
 
-       return ret;
+       return 0;
 }
 
 static struct bus_type coreboot_bus_type = {
index 7b7b4a6..beb7786 100644 (file)
@@ -72,7 +72,7 @@ struct coreboot_device {
 /* A driver for handling devices described in coreboot tables. */
 struct coreboot_driver {
        int (*probe)(struct coreboot_device *);
-       int (*remove)(struct coreboot_device *);
+       void (*remove)(struct coreboot_device *);
        struct device_driver drv;
        u32 tag;
 };
index 916f26a..c6dcc1e 100644 (file)
@@ -72,13 +72,11 @@ static int framebuffer_probe(struct coreboot_device *dev)
        return PTR_ERR_OR_ZERO(pdev);
 }
 
-static int framebuffer_remove(struct coreboot_device *dev)
+static void framebuffer_remove(struct coreboot_device *dev)
 {
        struct platform_device *pdev = dev_get_drvdata(&dev->dev);
 
        platform_device_unregister(pdev);
-
-       return 0;
 }
 
 static struct coreboot_driver framebuffer_driver = {
index d17e4d6..74b5286 100644 (file)
@@ -91,11 +91,9 @@ static int memconsole_probe(struct coreboot_device *dev)
        return memconsole_sysfs_init();
 }
 
-static int memconsole_remove(struct coreboot_device *dev)
+static void memconsole_remove(struct coreboot_device *dev)
 {
        memconsole_exit();
-
-       return 0;
 }
 
 static struct coreboot_driver memconsole_driver = {
index d23c5c6..ee6e08c 100644 (file)
@@ -298,14 +298,12 @@ static int vpd_probe(struct coreboot_device *dev)
        return 0;
 }
 
-static int vpd_remove(struct coreboot_device *dev)
+static void vpd_remove(struct coreboot_device *dev)
 {
        vpd_section_destroy(&ro_vpd);
        vpd_section_destroy(&rw_vpd);
 
        kobject_put(vpd_kobj);
-
-       return 0;
 }
 
 static struct coreboot_driver vpd_driver = {
index 5645226..5ff9438 100644 (file)
@@ -192,6 +192,17 @@ config FPGA_DFL_AFU
          to the FPGA infrastructure via a Port. There may be more than one
          Port/AFU per DFL based FPGA device.
 
+config FPGA_DFL_NIOS_INTEL_PAC_N3000
+       tristate "FPGA DFL NIOS Driver for Intel PAC N3000"
+       depends on FPGA_DFL
+       select REGMAP
+       help
+         This is the driver for the N3000 Nios private feature on Intel
+         PAC (Programmable Acceleration Card) N3000. It communicates
+         with the embedded Nios processor to configure the retimers on
+         the card. It also instantiates the SPI master (spi-altera) for
+         the card's BMC (Board Management Controller).
+
 config FPGA_DFL_PCI
        tristate "FPGA DFL PCIe Device Driver"
        depends on PCI && FPGA_DFL
index d8e21df..18dc988 100644 (file)
@@ -44,5 +44,7 @@ dfl-fme-objs += dfl-fme-perf.o
 dfl-afu-objs := dfl-afu-main.o dfl-afu-region.o dfl-afu-dma-region.o
 dfl-afu-objs += dfl-afu-error.o
 
+obj-$(CONFIG_FPGA_DFL_NIOS_INTEL_PAC_N3000)    += dfl-n3000-nios.o
+
 # Drivers for FPGAs which implement DFL
 obj-$(CONFIG_FPGA_DFL_PCI)             += dfl-pci.o
index 5312662..4299145 100644 (file)
@@ -192,7 +192,7 @@ static struct attribute *fme_perf_cpumask_attrs[] = {
        NULL,
 };
 
-static struct attribute_group fme_perf_cpumask_group = {
+static const struct attribute_group fme_perf_cpumask_group = {
        .attrs = fme_perf_cpumask_attrs,
 };
 
@@ -225,7 +225,7 @@ static struct attribute *fme_perf_format_attrs[] = {
        NULL,
 };
 
-static struct attribute_group fme_perf_format_group = {
+static const struct attribute_group fme_perf_format_group = {
        .name = "format",
        .attrs = fme_perf_format_attrs,
 };
@@ -239,7 +239,7 @@ static struct attribute *fme_perf_events_attrs_empty[] = {
        NULL,
 };
 
-static struct attribute_group fme_perf_events_group = {
+static const struct attribute_group fme_perf_events_group = {
        .name = "events",
        .attrs = fme_perf_events_attrs_empty,
 };
diff --git a/drivers/fpga/dfl-n3000-nios.c b/drivers/fpga/dfl-n3000-nios.c
new file mode 100644 (file)
index 0000000..7a95366
--- /dev/null
@@ -0,0 +1,588 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DFL device driver for Nios private feature on Intel PAC (Programmable
+ * Acceleration Card) N3000
+ *
+ * Copyright (C) 2019-2020 Intel Corporation, Inc.
+ *
+ * Authors:
+ *   Wu Hao <hao.wu@intel.com>
+ *   Xu Yilun <yilun.xu@intel.com>
+ */
+#include <linux/bitfield.h>
+#include <linux/dfl.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/stddef.h>
+#include <linux/spi/altera.h>
+#include <linux/spi/spi.h>
+#include <linux/types.h>
+
+/*
+ * N3000 Nios private feature registers, named as NIOS_SPI_XX on spec.
+ * NS is the abbreviation of NIOS_SPI.
+ */
+#define N3000_NS_PARAM                         0x8
+#define N3000_NS_PARAM_SHIFT_MODE_MSK          BIT_ULL(1)
+#define N3000_NS_PARAM_SHIFT_MODE_MSB          0
+#define N3000_NS_PARAM_SHIFT_MODE_LSB          1
+#define N3000_NS_PARAM_DATA_WIDTH              GENMASK_ULL(7, 2)
+#define N3000_NS_PARAM_NUM_CS                  GENMASK_ULL(13, 8)
+#define N3000_NS_PARAM_CLK_POL                 BIT_ULL(14)
+#define N3000_NS_PARAM_CLK_PHASE               BIT_ULL(15)
+#define N3000_NS_PARAM_PERIPHERAL_ID           GENMASK_ULL(47, 32)
+
+#define N3000_NS_CTRL                          0x10
+#define N3000_NS_CTRL_WR_DATA                  GENMASK_ULL(31, 0)
+#define N3000_NS_CTRL_ADDR                     GENMASK_ULL(44, 32)
+#define N3000_NS_CTRL_CMD_MSK                  GENMASK_ULL(63, 62)
+#define N3000_NS_CTRL_CMD_NOP                  0
+#define N3000_NS_CTRL_CMD_RD                   1
+#define N3000_NS_CTRL_CMD_WR                   2
+
+#define N3000_NS_STAT                          0x18
+#define N3000_NS_STAT_RD_DATA                  GENMASK_ULL(31, 0)
+#define N3000_NS_STAT_RW_VAL                   BIT_ULL(32)
+
+/* Nios handshake registers, indirect access */
+#define N3000_NIOS_INIT                                0x1000
+#define N3000_NIOS_INIT_DONE                   BIT(0)
+#define N3000_NIOS_INIT_START                  BIT(1)
+/* Mode for retimer A, link 0, the same below */
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK    GENMASK(9, 8)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK    GENMASK(11, 10)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK    GENMASK(13, 12)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK    GENMASK(15, 14)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK    GENMASK(17, 16)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK    GENMASK(19, 18)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK    GENMASK(21, 20)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK    GENMASK(23, 22)
+#define N3000_NIOS_INIT_REQ_FEC_MODE_NO                0x0
+#define N3000_NIOS_INIT_REQ_FEC_MODE_KR                0x1
+#define N3000_NIOS_INIT_REQ_FEC_MODE_RS                0x2
+
+#define N3000_NIOS_FW_VERSION                  0x1004
+#define N3000_NIOS_FW_VERSION_PATCH            GENMASK(23, 20)
+#define N3000_NIOS_FW_VERSION_MINOR            GENMASK(27, 24)
+#define N3000_NIOS_FW_VERSION_MAJOR            GENMASK(31, 28)
+
+/* The retimers we use on Intel PAC N3000 is Parkvale, abbreviated to PKVL */
+#define N3000_NIOS_PKVL_A_MODE_STS             0x1020
+#define N3000_NIOS_PKVL_B_MODE_STS             0x1024
+#define N3000_NIOS_PKVL_MODE_STS_GROUP_MSK     GENMASK(15, 8)
+#define N3000_NIOS_PKVL_MODE_STS_GROUP_OK      0x0
+#define N3000_NIOS_PKVL_MODE_STS_ID_MSK                GENMASK(7, 0)
+/* When GROUP MASK field == GROUP_OK  */
+#define N3000_NIOS_PKVL_MODE_ID_RESET          0x0
+#define N3000_NIOS_PKVL_MODE_ID_4X10G          0x1
+#define N3000_NIOS_PKVL_MODE_ID_4X25G          0x2
+#define N3000_NIOS_PKVL_MODE_ID_2X25G          0x3
+#define N3000_NIOS_PKVL_MODE_ID_2X25G_2X10G    0x4
+#define N3000_NIOS_PKVL_MODE_ID_1X25G          0x5
+
+#define N3000_NIOS_REGBUS_RETRY_COUNT          10000   /* loop count */
+
+#define N3000_NIOS_INIT_TIMEOUT                        10000000        /* usec */
+#define N3000_NIOS_INIT_TIME_INTV              100000          /* usec */
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_MSK_ALL   \
+       (N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK |  \
+        N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK)
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_NO_ALL                    \
+       (FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_NO))
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_KR_ALL                    \
+       (FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_KR))
+
+#define N3000_NIOS_INIT_REQ_FEC_MODE_RS_ALL                    \
+       (FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A0_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A1_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A2_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_A3_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B0_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B1_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B2_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS) |          \
+        FIELD_PREP(N3000_NIOS_INIT_REQ_FEC_MODE_B3_MSK,        \
+                   N3000_NIOS_INIT_REQ_FEC_MODE_RS))
+
+struct n3000_nios {
+       void __iomem *base;
+       struct regmap *regmap;
+       struct device *dev;
+       struct platform_device *altera_spi;
+};
+
+static ssize_t nios_fw_version_show(struct device *dev,
+                                   struct device_attribute *attr, char *buf)
+{
+       struct n3000_nios *nn = dev_get_drvdata(dev);
+       unsigned int val;
+       int ret;
+
+       ret = regmap_read(nn->regmap, N3000_NIOS_FW_VERSION, &val);
+       if (ret)
+               return ret;
+
+       return sysfs_emit(buf, "%x.%x.%x\n",
+                         (u8)FIELD_GET(N3000_NIOS_FW_VERSION_MAJOR, val),
+                         (u8)FIELD_GET(N3000_NIOS_FW_VERSION_MINOR, val),
+                         (u8)FIELD_GET(N3000_NIOS_FW_VERSION_PATCH, val));
+}
+static DEVICE_ATTR_RO(nios_fw_version);
+
+#define IS_MODE_STATUS_OK(mode_stat)                                   \
+       (FIELD_GET(N3000_NIOS_PKVL_MODE_STS_GROUP_MSK, (mode_stat)) ==  \
+        N3000_NIOS_PKVL_MODE_STS_GROUP_OK)
+
+#define IS_RETIMER_FEC_SUPPORTED(retimer_mode)                 \
+       ((retimer_mode) != N3000_NIOS_PKVL_MODE_ID_RESET &&     \
+        (retimer_mode) != N3000_NIOS_PKVL_MODE_ID_4X10G)
+
+static int get_retimer_mode(struct n3000_nios *nn, unsigned int mode_stat_reg,
+                           unsigned int *retimer_mode)
+{
+       unsigned int val;
+       int ret;
+
+       ret = regmap_read(nn->regmap, mode_stat_reg, &val);
+       if (ret)
+               return ret;
+
+       if (!IS_MODE_STATUS_OK(val))
+               return -EFAULT;
+
+       *retimer_mode = FIELD_GET(N3000_NIOS_PKVL_MODE_STS_ID_MSK, val);
+
+       return 0;
+}
+
+static ssize_t retimer_A_mode_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct n3000_nios *nn = dev_get_drvdata(dev);
+       unsigned int mode;
+       int ret;
+
+       ret = get_retimer_mode(nn, N3000_NIOS_PKVL_A_MODE_STS, &mode);
+       if (ret)
+               return ret;
+
+       return sysfs_emit(buf, "0x%x\n", mode);
+}
+static DEVICE_ATTR_RO(retimer_A_mode);
+
+static ssize_t retimer_B_mode_show(struct device *dev,
+                                  struct device_attribute *attr, char *buf)
+{
+       struct n3000_nios *nn = dev_get_drvdata(dev);
+       unsigned int mode;
+       int ret;
+
+       ret = get_retimer_mode(nn, N3000_NIOS_PKVL_B_MODE_STS, &mode);
+       if (ret)
+               return ret;
+
+       return sysfs_emit(buf, "0x%x\n", mode);
+}
+static DEVICE_ATTR_RO(retimer_B_mode);
+
+static ssize_t fec_mode_show(struct device *dev,
+                            struct device_attribute *attr, char *buf)
+{
+       unsigned int val, retimer_a_mode, retimer_b_mode, fec_modes;
+       struct n3000_nios *nn = dev_get_drvdata(dev);
+       int ret;
+
+       /* FEC mode setting is not supported in early FW versions */
+       ret = regmap_read(nn->regmap, N3000_NIOS_FW_VERSION, &val);
+       if (ret)
+               return ret;
+
+       if (FIELD_GET(N3000_NIOS_FW_VERSION_MAJOR, val) < 3)
+               return sysfs_emit(buf, "not supported\n");
+
+       /* If no 25G links, FEC mode setting is not supported either */
+       ret = get_retimer_mode(nn, N3000_NIOS_PKVL_A_MODE_STS, &retimer_a_mode);
+       if (ret)
+               return ret;
+
+       ret = get_retimer_mode(nn, N3000_NIOS_PKVL_B_MODE_STS, &retimer_b_mode);
+       if (ret)
+               return ret;
+
+       if (!IS_RETIMER_FEC_SUPPORTED(retimer_a_mode) &&
+           !IS_RETIMER_FEC_SUPPORTED(retimer_b_mode))
+               return sysfs_emit(buf, "not supported\n");
+
+       /* get the valid FEC mode for 25G links */
+       ret = regmap_read(nn->regmap, N3000_NIOS_INIT, &val);
+       if (ret)
+               return ret;
+
+       /*
+        * FEC mode should always be the same for all links, as we set them
+        * in this way.
+        */
+       fec_modes = (val & N3000_NIOS_INIT_REQ_FEC_MODE_MSK_ALL);
+       if (fec_modes == N3000_NIOS_INIT_REQ_FEC_MODE_NO_ALL)
+               return sysfs_emit(buf, "no\n");
+       else if (fec_modes == N3000_NIOS_INIT_REQ_FEC_MODE_KR_ALL)
+               return sysfs_emit(buf, "kr\n");
+       else if (fec_modes == N3000_NIOS_INIT_REQ_FEC_MODE_RS_ALL)
+               return sysfs_emit(buf, "rs\n");
+
+       return -EFAULT;
+}
+static DEVICE_ATTR_RO(fec_mode);
+
+static struct attribute *n3000_nios_attrs[] = {
+       &dev_attr_nios_fw_version.attr,
+       &dev_attr_retimer_A_mode.attr,
+       &dev_attr_retimer_B_mode.attr,
+       &dev_attr_fec_mode.attr,
+       NULL,
+};
+ATTRIBUTE_GROUPS(n3000_nios);
+
+static int n3000_nios_init_done_check(struct n3000_nios *nn)
+{
+       unsigned int val, state_a, state_b;
+       struct device *dev = nn->dev;
+       int ret, ret2;
+
+       /*
+        * The SPI is shared by the Nios core inside the FPGA, Nios will use
+        * this SPI master to do some one time initialization after power up,
+        * and then release the control to OS. The driver needs to poll on
+        * INIT_DONE to see when driver could take the control.
+        *
+        * Please note that after Nios firmware version 3.0.0, INIT_START is
+        * introduced, so driver needs to trigger START firstly and then check
+        * INIT_DONE.
+        */
+
+       ret = regmap_read(nn->regmap, N3000_NIOS_FW_VERSION, &val);
+       if (ret)
+               return ret;
+
+       /*
+        * If Nios version register is totally uninitialized(== 0x0), then the
+        * Nios firmware is missing. So host could take control of SPI master
+        * safely, but initialization work for Nios is not done. To restore the
+        * card, we need to reprogram a new Nios firmware via the BMC chip on
+        * SPI bus. So the driver doesn't error out, it continues to create the
+        * spi controller device and spi_board_info for BMC.
+        */
+       if (val == 0) {
+               dev_err(dev, "Nios version reg = 0x%x, skip INIT_DONE check, but the retimer may be uninitialized\n",
+                       val);
+               return 0;
+       }
+
+       if (FIELD_GET(N3000_NIOS_FW_VERSION_MAJOR, val) >= 3) {
+               /* read NIOS_INIT to check if retimer initialization is done */
+               ret = regmap_read(nn->regmap, N3000_NIOS_INIT, &val);
+               if (ret)
+                       return ret;
+
+               /* check if retimers are initialized already */
+               if (val & (N3000_NIOS_INIT_DONE | N3000_NIOS_INIT_START))
+                       goto nios_init_done;
+
+               /* configure FEC mode per module param */
+               val = N3000_NIOS_INIT_START;
+
+               /*
+                * When the retimer is to be set to 10G mode, there is no FEC
+                * mode setting, so the REQ_FEC_MODE field will be ignored by
+                * Nios firmware in this case. But we should still fill the FEC
+                * mode field cause host could not get the retimer working mode
+                * until the Nios init is done.
+                *
+                * For now the driver doesn't support the retimer FEC mode
+                * switching per user's request. It is always set to Reed
+                * Solomon FEC.
+                *
+                * The driver will set the same FEC mode for all links.
+                */
+               val |= N3000_NIOS_INIT_REQ_FEC_MODE_RS_ALL;
+
+               ret = regmap_write(nn->regmap, N3000_NIOS_INIT, val);
+               if (ret)
+                       return ret;
+       }
+
+nios_init_done:
+       /* polls on NIOS_INIT_DONE */
+       ret = regmap_read_poll_timeout(nn->regmap, N3000_NIOS_INIT, val,
+                                      val & N3000_NIOS_INIT_DONE,
+                                      N3000_NIOS_INIT_TIME_INTV,
+                                      N3000_NIOS_INIT_TIMEOUT);
+       if (ret)
+               dev_err(dev, "NIOS_INIT_DONE %s\n",
+                       (ret == -ETIMEDOUT) ? "timed out" : "check error");
+
+       ret2 = regmap_read(nn->regmap, N3000_NIOS_PKVL_A_MODE_STS, &state_a);
+       if (ret2)
+               return ret2;
+
+       ret2 = regmap_read(nn->regmap, N3000_NIOS_PKVL_B_MODE_STS, &state_b);
+       if (ret2)
+               return ret2;
+
+       if (!ret) {
+               /*
+                * After INIT_DONE is detected, it still needs to check if the
+                * Nios firmware reports any error during the retimer
+                * configuration.
+                */
+               if (IS_MODE_STATUS_OK(state_a) && IS_MODE_STATUS_OK(state_b))
+                       return 0;
+
+               /*
+                * If the retimer configuration is failed, the Nios firmware
+                * will still release the spi controller for host to
+                * communicate with the BMC. It makes possible for people to
+                * reprogram a new Nios firmware and restore the card. So the
+                * driver doesn't error out, it continues to create the spi
+                * controller device and spi_board_info for BMC.
+                */
+               dev_err(dev, "NIOS_INIT_DONE OK, but err on retimer init\n");
+       }
+
+       dev_err(nn->dev, "PKVL_A_MODE_STS 0x%x\n", state_a);
+       dev_err(nn->dev, "PKVL_B_MODE_STS 0x%x\n", state_b);
+
+       return ret;
+}
+
+static struct spi_board_info m10_n3000_info = {
+       .modalias = "m10-n3000",
+       .max_speed_hz = 12500000,
+       .bus_num = 0,
+       .chip_select = 0,
+};
+
+static int create_altera_spi_controller(struct n3000_nios *nn)
+{
+       struct altera_spi_platform_data pdata = { 0 };
+       struct platform_device_info pdevinfo = { 0 };
+       void __iomem *base = nn->base;
+       u64 v;
+
+       v = readq(base + N3000_NS_PARAM);
+
+       pdata.mode_bits = SPI_CS_HIGH;
+       if (FIELD_GET(N3000_NS_PARAM_CLK_POL, v))
+               pdata.mode_bits |= SPI_CPOL;
+       if (FIELD_GET(N3000_NS_PARAM_CLK_PHASE, v))
+               pdata.mode_bits |= SPI_CPHA;
+
+       pdata.num_chipselect = FIELD_GET(N3000_NS_PARAM_NUM_CS, v);
+       pdata.bits_per_word_mask =
+               SPI_BPW_RANGE_MASK(1, FIELD_GET(N3000_NS_PARAM_DATA_WIDTH, v));
+
+       pdata.num_devices = 1;
+       pdata.devices = &m10_n3000_info;
+
+       dev_dbg(nn->dev, "%s cs %u bpm 0x%x mode 0x%x\n", __func__,
+               pdata.num_chipselect, pdata.bits_per_word_mask,
+               pdata.mode_bits);
+
+       pdevinfo.name = "subdev_spi_altera";
+       pdevinfo.id = PLATFORM_DEVID_AUTO;
+       pdevinfo.parent = nn->dev;
+       pdevinfo.data = &pdata;
+       pdevinfo.size_data = sizeof(pdata);
+
+       nn->altera_spi = platform_device_register_full(&pdevinfo);
+       return PTR_ERR_OR_ZERO(nn->altera_spi);
+}
+
+static void destroy_altera_spi_controller(struct n3000_nios *nn)
+{
+       platform_device_unregister(nn->altera_spi);
+}
+
+static int n3000_nios_poll_stat_timeout(void __iomem *base, u64 *v)
+{
+       int loops;
+
+       /*
+        * We don't use the time based timeout here for performance.
+        *
+        * The regbus read/write is on the critical path of Intel PAC N3000
+        * image programing. The time based timeout checking will add too much
+        * overhead on it. Usually the state changes in 1 or 2 loops on the
+        * test server, and we set 10000 times loop here for safety.
+        */
+       for (loops = N3000_NIOS_REGBUS_RETRY_COUNT; loops > 0 ; loops--) {
+               *v = readq(base + N3000_NS_STAT);
+               if (*v & N3000_NS_STAT_RW_VAL)
+                       break;
+               cpu_relax();
+       }
+
+       return (loops > 0) ? 0 : -ETIMEDOUT;
+}
+
+static int n3000_nios_reg_write(void *context, unsigned int reg, unsigned int val)
+{
+       struct n3000_nios *nn = context;
+       u64 v;
+       int ret;
+
+       v = FIELD_PREP(N3000_NS_CTRL_CMD_MSK, N3000_NS_CTRL_CMD_WR) |
+           FIELD_PREP(N3000_NS_CTRL_ADDR, reg) |
+           FIELD_PREP(N3000_NS_CTRL_WR_DATA, val);
+       writeq(v, nn->base + N3000_NS_CTRL);
+
+       ret = n3000_nios_poll_stat_timeout(nn->base, &v);
+       if (ret)
+               dev_err(nn->dev, "fail to write reg 0x%x val 0x%x: %d\n",
+                       reg, val, ret);
+
+       return ret;
+}
+
+static int n3000_nios_reg_read(void *context, unsigned int reg, unsigned int *val)
+{
+       struct n3000_nios *nn = context;
+       u64 v;
+       int ret;
+
+       v = FIELD_PREP(N3000_NS_CTRL_CMD_MSK, N3000_NS_CTRL_CMD_RD) |
+           FIELD_PREP(N3000_NS_CTRL_ADDR, reg);
+       writeq(v, nn->base + N3000_NS_CTRL);
+
+       ret = n3000_nios_poll_stat_timeout(nn->base, &v);
+       if (ret)
+               dev_err(nn->dev, "fail to read reg 0x%x: %d\n", reg, ret);
+       else
+               *val = FIELD_GET(N3000_NS_STAT_RD_DATA, v);
+
+       return ret;
+}
+
+static const struct regmap_config n3000_nios_regbus_cfg = {
+       .reg_bits = 32,
+       .reg_stride = 4,
+       .val_bits = 32,
+       .fast_io = true,
+
+       .reg_write = n3000_nios_reg_write,
+       .reg_read = n3000_nios_reg_read,
+};
+
+static int n3000_nios_probe(struct dfl_device *ddev)
+{
+       struct device *dev = &ddev->dev;
+       struct n3000_nios *nn;
+       int ret;
+
+       nn = devm_kzalloc(dev, sizeof(*nn), GFP_KERNEL);
+       if (!nn)
+               return -ENOMEM;
+
+       dev_set_drvdata(&ddev->dev, nn);
+
+       nn->dev = dev;
+
+       nn->base = devm_ioremap_resource(&ddev->dev, &ddev->mmio_res);
+       if (IS_ERR(nn->base))
+               return PTR_ERR(nn->base);
+
+       nn->regmap = devm_regmap_init(dev, NULL, nn, &n3000_nios_regbus_cfg);
+       if (IS_ERR(nn->regmap))
+               return PTR_ERR(nn->regmap);
+
+       ret = n3000_nios_init_done_check(nn);
+       if (ret)
+               return ret;
+
+       ret = create_altera_spi_controller(nn);
+       if (ret)
+               dev_err(dev, "altera spi controller create failed: %d\n", ret);
+
+       return ret;
+}
+
+static void n3000_nios_remove(struct dfl_device *ddev)
+{
+       struct n3000_nios *nn = dev_get_drvdata(&ddev->dev);
+
+       destroy_altera_spi_controller(nn);
+}
+
+#define FME_FEATURE_ID_N3000_NIOS      0xd
+
+static const struct dfl_device_id n3000_nios_ids[] = {
+       { FME_ID, FME_FEATURE_ID_N3000_NIOS },
+       { }
+};
+MODULE_DEVICE_TABLE(dfl, n3000_nios_ids);
+
+static struct dfl_driver n3000_nios_driver = {
+       .drv    = {
+               .name       = "dfl-n3000-nios",
+               .dev_groups = n3000_nios_groups,
+       },
+       .id_table = n3000_nios_ids,
+       .probe   = n3000_nios_probe,
+       .remove  = n3000_nios_remove,
+};
+
+module_dfl_driver(n3000_nios_driver);
+
+MODULE_DESCRIPTION("Driver for Nios private feature on Intel PAC N3000");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
index a2203d0..04e47e2 100644 (file)
 #define DRV_VERSION    "0.8"
 #define DRV_NAME       "dfl-pci"
 
+#define PCI_VSEC_ID_INTEL_DFLS 0x43
+
+#define PCI_VNDR_DFLS_CNT 0x8
+#define PCI_VNDR_DFLS_RES 0xc
+
+#define PCI_VNDR_DFLS_RES_BAR_MASK GENMASK(2, 0)
+#define PCI_VNDR_DFLS_RES_OFF_MASK GENMASK(31, 3)
+
 struct cci_drvdata {
        struct dfl_fpga_cdev *cdev;     /* container device */
 };
@@ -119,49 +127,94 @@ static int *cci_pci_create_irq_table(struct pci_dev *pcidev, unsigned int nvec)
        return table;
 }
 
-/* enumerate feature devices under pci device */
-static int cci_enumerate_feature_devs(struct pci_dev *pcidev)
+static int find_dfls_by_vsec(struct pci_dev *pcidev, struct dfl_fpga_enum_info *info)
 {
-       struct cci_drvdata *drvdata = pci_get_drvdata(pcidev);
-       int port_num, bar, i, nvec, ret = 0;
-       struct dfl_fpga_enum_info *info;
-       struct dfl_fpga_cdev *cdev;
+       u32 bir, offset, vndr_hdr, dfl_cnt, dfl_res;
+       int dfl_res_off, i, bars, voff = 0;
        resource_size_t start, len;
-       void __iomem *base;
-       int *irq_table;
-       u32 offset;
-       u64 v;
 
-       /* allocate enumeration info via pci_dev */
-       info = dfl_fpga_enum_info_alloc(&pcidev->dev);
-       if (!info)
-               return -ENOMEM;
+       while ((voff = pci_find_next_ext_capability(pcidev, voff, PCI_EXT_CAP_ID_VNDR))) {
+               vndr_hdr = 0;
+               pci_read_config_dword(pcidev, voff + PCI_VNDR_HEADER, &vndr_hdr);
 
-       /* add irq info for enumeration if the device support irq */
-       nvec = cci_pci_alloc_irq(pcidev);
-       if (nvec < 0) {
-               dev_err(&pcidev->dev, "Fail to alloc irq %d.\n", nvec);
-               ret = nvec;
-               goto enum_info_free_exit;
-       } else if (nvec) {
-               irq_table = cci_pci_create_irq_table(pcidev, nvec);
-               if (!irq_table) {
-                       ret = -ENOMEM;
-                       goto irq_free_exit;
+               if (PCI_VNDR_HEADER_ID(vndr_hdr) == PCI_VSEC_ID_INTEL_DFLS &&
+                   pcidev->vendor == PCI_VENDOR_ID_INTEL)
+                       break;
+       }
+
+       if (!voff) {
+               dev_dbg(&pcidev->dev, "%s no DFL VSEC found\n", __func__);
+               return -ENODEV;
+       }
+
+       dfl_cnt = 0;
+       pci_read_config_dword(pcidev, voff + PCI_VNDR_DFLS_CNT, &dfl_cnt);
+       if (dfl_cnt > PCI_STD_NUM_BARS) {
+               dev_err(&pcidev->dev, "%s too many DFLs %d > %d\n",
+                       __func__, dfl_cnt, PCI_STD_NUM_BARS);
+               return -EINVAL;
+       }
+
+       dfl_res_off = voff + PCI_VNDR_DFLS_RES;
+       if (dfl_res_off + (dfl_cnt * sizeof(u32)) > PCI_CFG_SPACE_EXP_SIZE) {
+               dev_err(&pcidev->dev, "%s DFL VSEC too big for PCIe config space\n",
+                       __func__);
+               return -EINVAL;
+       }
+
+       for (i = 0, bars = 0; i < dfl_cnt; i++, dfl_res_off += sizeof(u32)) {
+               dfl_res = GENMASK(31, 0);
+               pci_read_config_dword(pcidev, dfl_res_off, &dfl_res);
+
+               bir = dfl_res & PCI_VNDR_DFLS_RES_BAR_MASK;
+               if (bir >= PCI_STD_NUM_BARS) {
+                       dev_err(&pcidev->dev, "%s bad bir number %d\n",
+                               __func__, bir);
+                       return -EINVAL;
                }
 
-               ret = dfl_fpga_enum_info_add_irq(info, nvec, irq_table);
-               kfree(irq_table);
-               if (ret)
-                       goto irq_free_exit;
+               if (bars & BIT(bir)) {
+                       dev_err(&pcidev->dev, "%s DFL for BAR %d already specified\n",
+                               __func__, bir);
+                       return -EINVAL;
+               }
+
+               bars |= BIT(bir);
+
+               len = pci_resource_len(pcidev, bir);
+               offset = dfl_res & PCI_VNDR_DFLS_RES_OFF_MASK;
+               if (offset >= len) {
+                       dev_err(&pcidev->dev, "%s bad offset %u >= %pa\n",
+                               __func__, offset, &len);
+                       return -EINVAL;
+               }
+
+               dev_dbg(&pcidev->dev, "%s BAR %d offset 0x%x\n", __func__, bir, offset);
+
+               len -= offset;
+
+               start = pci_resource_start(pcidev, bir) + offset;
+
+               dfl_fpga_enum_info_add_dfl(info, start, len);
        }
 
-       /* start to find Device Feature List in Bar 0 */
+       return 0;
+}
+
+/* default method of finding dfls starting at offset 0 of bar 0 */
+static int find_dfls_by_default(struct pci_dev *pcidev,
+                               struct dfl_fpga_enum_info *info)
+{
+       int port_num, bar, i, ret = 0;
+       resource_size_t start, len;
+       void __iomem *base;
+       u32 offset;
+       u64 v;
+
+       /* start to find Device Feature List from Bar 0 */
        base = cci_pci_ioremap_bar0(pcidev);
-       if (!base) {
-               ret = -ENOMEM;
-               goto irq_free_exit;
-       }
+       if (!base)
+               return -ENOMEM;
 
        /*
         * PF device has FME and Ports/AFUs, and VF device only has one
@@ -208,12 +261,54 @@ static int cci_enumerate_feature_devs(struct pci_dev *pcidev)
                dfl_fpga_enum_info_add_dfl(info, start, len);
        } else {
                ret = -ENODEV;
-               goto irq_free_exit;
        }
 
        /* release I/O mappings for next step enumeration */
        pcim_iounmap_regions(pcidev, BIT(0));
 
+       return ret;
+}
+
+/* enumerate feature devices under pci device */
+static int cci_enumerate_feature_devs(struct pci_dev *pcidev)
+{
+       struct cci_drvdata *drvdata = pci_get_drvdata(pcidev);
+       struct dfl_fpga_enum_info *info;
+       struct dfl_fpga_cdev *cdev;
+       int nvec, ret = 0;
+       int *irq_table;
+
+       /* allocate enumeration info via pci_dev */
+       info = dfl_fpga_enum_info_alloc(&pcidev->dev);
+       if (!info)
+               return -ENOMEM;
+
+       /* add irq info for enumeration if the device support irq */
+       nvec = cci_pci_alloc_irq(pcidev);
+       if (nvec < 0) {
+               dev_err(&pcidev->dev, "Fail to alloc irq %d.\n", nvec);
+               ret = nvec;
+               goto enum_info_free_exit;
+       } else if (nvec) {
+               irq_table = cci_pci_create_irq_table(pcidev, nvec);
+               if (!irq_table) {
+                       ret = -ENOMEM;
+                       goto irq_free_exit;
+               }
+
+               ret = dfl_fpga_enum_info_add_irq(info, nvec, irq_table);
+               kfree(irq_table);
+               if (ret)
+                       goto irq_free_exit;
+       }
+
+       ret = find_dfls_by_vsec(pcidev, info);
+       if (ret == -ENODEV)
+               ret = find_dfls_by_default(pcidev, info);
+
+       if (ret)
+               goto irq_free_exit;
+
        /* start enumeration with prepared enumeration information */
        cdev = dfl_fpga_feature_devs_enumerate(info);
        if (IS_ERR(cdev)) {
index b450870..511b20f 100644 (file)
@@ -10,6 +10,7 @@
  *   Wu Hao <hao.wu@intel.com>
  *   Xiao Guangrong <guangrong.xiao@linux.intel.com>
  */
+#include <linux/dfl.h>
 #include <linux/fpga-dfl.h>
 #include <linux/module.h>
 #include <linux/uaccess.h>
@@ -298,8 +299,7 @@ static int dfl_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 {
        struct dfl_device *ddev = to_dfl_dev(dev);
 
-       /* The type has 4 valid bits and feature_id has 12 valid bits */
-       return add_uevent_var(env, "MODALIAS=dfl:t%01Xf%03X",
+       return add_uevent_var(env, "MODALIAS=dfl:t%04Xf%04X",
                              ddev->type, ddev->feature_id);
 }
 
index 5dc758f..2b82c96 100644 (file)
@@ -22,6 +22,7 @@
 #include <linux/interrupt.h>
 #include <linux/iopoll.h>
 #include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/mod_devicetable.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/uuid.h>
@@ -516,88 +517,4 @@ long dfl_feature_ioctl_set_irq(struct platform_device *pdev,
                               struct dfl_feature *feature,
                               unsigned long arg);
 
-/**
- * enum dfl_id_type - define the DFL FIU types
- */
-enum dfl_id_type {
-       FME_ID,
-       PORT_ID,
-       DFL_ID_MAX,
-};
-
-/**
- * struct dfl_device_id -  dfl device identifier
- * @type: contains 4 bits DFL FIU type of the device. See enum dfl_id_type.
- * @feature_id: contains 12 bits feature identifier local to its DFL FIU type.
- * @driver_data: driver specific data.
- */
-struct dfl_device_id {
-       u8 type;
-       u16 feature_id;
-       unsigned long driver_data;
-};
-
-/**
- * struct dfl_device - represent an dfl device on dfl bus
- *
- * @dev: generic device interface.
- * @id: id of the dfl device.
- * @type: type of DFL FIU of the device. See enum dfl_id_type.
- * @feature_id: 16 bits feature identifier local to its DFL FIU type.
- * @mmio_res: mmio resource of this dfl device.
- * @irqs: list of Linux IRQ numbers of this dfl device.
- * @num_irqs: number of IRQs supported by this dfl device.
- * @cdev: pointer to DFL FPGA container device this dfl device belongs to.
- * @id_entry: matched id entry in dfl driver's id table.
- */
-struct dfl_device {
-       struct device dev;
-       int id;
-       u8 type;
-       u16 feature_id;
-       struct resource mmio_res;
-       int *irqs;
-       unsigned int num_irqs;
-       struct dfl_fpga_cdev *cdev;
-       const struct dfl_device_id *id_entry;
-};
-
-/**
- * struct dfl_driver - represent an dfl device driver
- *
- * @drv: driver model structure.
- * @id_table: pointer to table of device IDs the driver is interested in.
- *           { } member terminated.
- * @probe: mandatory callback for device binding.
- * @remove: callback for device unbinding.
- */
-struct dfl_driver {
-       struct device_driver drv;
-       const struct dfl_device_id *id_table;
-
-       int (*probe)(struct dfl_device *dfl_dev);
-       void (*remove)(struct dfl_device *dfl_dev);
-};
-
-#define to_dfl_dev(d) container_of(d, struct dfl_device, dev)
-#define to_dfl_drv(d) container_of(d, struct dfl_driver, drv)
-
-/*
- * use a macro to avoid include chaining to get THIS_MODULE.
- */
-#define dfl_driver_register(drv) \
-       __dfl_driver_register(drv, THIS_MODULE)
-int __dfl_driver_register(struct dfl_driver *dfl_drv, struct module *owner);
-void dfl_driver_unregister(struct dfl_driver *dfl_drv);
-
-/*
- * module_dfl_driver() - Helper macro for drivers that don't do
- * anything special in module init/exit.  This eliminates a lot of
- * boilerplate.  Each module may only use this macro once, and
- * calling it replaces module_init() and module_exit().
- */
-#define module_dfl_driver(__dfl_driver) \
-       module_driver(__dfl_driver, dfl_driver_register, \
-                     dfl_driver_unregister)
-
 #endif /* __FPGA_DFL_H */
index 2deccac..e9266b2 100644 (file)
@@ -17,7 +17,7 @@ static DEFINE_IDA(fpga_bridge_ida);
 static struct class *fpga_bridge_class;
 
 /* Lock for adding/removing bridges to linked lists*/
-static spinlock_t bridge_list_lock;
+static DEFINE_SPINLOCK(bridge_list_lock);
 
 /**
  * fpga_bridge_enable - Enable transactions on the bridge
@@ -479,8 +479,6 @@ static void fpga_bridge_dev_release(struct device *dev)
 
 static int __init fpga_bridge_dev_init(void)
 {
-       spin_lock_init(&bridge_list_lock);
-
        fpga_bridge_class = class_create(THIS_MODULE, "fpga_bridge");
        if (IS_ERR(fpga_bridge_class))
                return PTR_ERR(fpga_bridge_class);
index 5ea09fd..c91d056 100644 (file)
@@ -113,8 +113,29 @@ MODULE_DEVICE_TABLE(i2c, pca953x_id);
 #ifdef CONFIG_GPIO_PCA953X_IRQ
 
 #include <linux/dmi.h>
-#include <linux/gpio.h>
-#include <linux/list.h>
+
+static const struct acpi_gpio_params pca953x_irq_gpios = { 0, 0, true };
+
+static const struct acpi_gpio_mapping pca953x_acpi_irq_gpios[] = {
+       { "irq-gpios", &pca953x_irq_gpios, 1, ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER },
+       { }
+};
+
+static int pca953x_acpi_get_irq(struct device *dev)
+{
+       int ret;
+
+       ret = devm_acpi_dev_add_driver_gpios(dev, pca953x_acpi_irq_gpios);
+       if (ret)
+               dev_warn(dev, "can't add GPIO ACPI mapping\n");
+
+       ret = acpi_dev_gpio_irq_get_by(ACPI_COMPANION(dev), "irq-gpios", 0);
+       if (ret < 0)
+               return ret;
+
+       dev_info(dev, "ACPI interrupt quirk (IRQ %d)\n", ret);
+       return ret;
+}
 
 static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = {
        {
@@ -133,59 +154,6 @@ static const struct dmi_system_id pca953x_dmi_acpi_irq_info[] = {
        },
        {}
 };
-
-#ifdef CONFIG_ACPI
-static int pca953x_acpi_get_pin(struct acpi_resource *ares, void *data)
-{
-       struct acpi_resource_gpio *agpio;
-       int *pin = data;
-
-       if (acpi_gpio_get_irq_resource(ares, &agpio))
-               *pin = agpio->pin_table[0];
-       return 1;
-}
-
-static int pca953x_acpi_find_pin(struct device *dev)
-{
-       struct acpi_device *adev = ACPI_COMPANION(dev);
-       int pin = -ENOENT, ret;
-       LIST_HEAD(r);
-
-       ret = acpi_dev_get_resources(adev, &r, pca953x_acpi_get_pin, &pin);
-       acpi_dev_free_resource_list(&r);
-       if (ret < 0)
-               return ret;
-
-       return pin;
-}
-#else
-static inline int pca953x_acpi_find_pin(struct device *dev) { return -ENXIO; }
-#endif
-
-static int pca953x_acpi_get_irq(struct device *dev)
-{
-       int pin, ret;
-
-       pin = pca953x_acpi_find_pin(dev);
-       if (pin < 0)
-               return pin;
-
-       dev_info(dev, "Applying ACPI interrupt quirk (GPIO %d)\n", pin);
-
-       if (!gpio_is_valid(pin))
-               return -EINVAL;
-
-       ret = gpio_request(pin, "pca953x interrupt");
-       if (ret)
-               return ret;
-
-       ret = gpio_to_irq(pin);
-
-       /* When pin is used as an IRQ, no need to keep it requested */
-       gpio_free(pin);
-
-       return ret;
-}
 #endif
 
 static const struct acpi_device_id pca953x_acpi_ids[] = {
index e37a57d..1aacd2a 100644 (file)
@@ -174,7 +174,7 @@ static void acpi_gpiochip_request_irq(struct acpi_gpio_chip *acpi_gpio,
        int ret, value;
 
        ret = request_threaded_irq(event->irq, NULL, event->handler,
-                                  event->irqflags, "ACPI:Event", event);
+                                  event->irqflags | IRQF_ONESHOT, "ACPI:Event", event);
        if (ret) {
                dev_err(acpi_gpio->chip->parent,
                        "Failed to setup interrupt handler for %d\n",
@@ -677,6 +677,7 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
        if (!lookup->desc) {
                const struct acpi_resource_gpio *agpio = &ares->data.gpio;
                bool gpioint = agpio->connection_type == ACPI_RESOURCE_GPIO_TYPE_INT;
+               struct gpio_desc *desc;
                u16 pin_index;
 
                if (lookup->info.quirks & ACPI_GPIO_QUIRK_ONLY_GPIOIO && gpioint)
@@ -689,8 +690,12 @@ static int acpi_populate_gpio_lookup(struct acpi_resource *ares, void *data)
                if (pin_index >= agpio->pin_table_length)
                        return 1;
 
-               lookup->desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
+               if (lookup->info.quirks & ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER)
+                       desc = gpio_to_desc(agpio->pin_table[pin_index]);
+               else
+                       desc = acpi_get_gpiod(agpio->resource_source.string_ptr,
                                              agpio->pin_table[pin_index]);
+               lookup->desc = desc;
                lookup->info.pin_config = agpio->pin_config;
                lookup->info.debounce = agpio->debounce_timeout;
                lookup->info.gpioint = gpioint;
@@ -940,8 +945,9 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
 }
 
 /**
- * acpi_dev_gpio_irq_get() - Find GpioInt and translate it to Linux IRQ number
+ * acpi_dev_gpio_irq_get_by() - Find GpioInt and translate it to Linux IRQ number
  * @adev: pointer to a ACPI device to get IRQ from
+ * @name: optional name of GpioInt resource
  * @index: index of GpioInt resource (starting from %0)
  *
  * If the device has one or more GpioInt resources, this function can be
@@ -951,9 +957,12 @@ struct gpio_desc *acpi_node_get_gpiod(struct fwnode_handle *fwnode,
  * The function is idempotent, though each time it runs it will configure GPIO
  * pin direction according to the flags in GpioInt resource.
  *
+ * The function takes optional @name parameter. If the resource has a property
+ * name, then only those will be taken into account.
+ *
  * Return: Linux IRQ number (> %0) on success, negative errno on failure.
  */
-int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index)
 {
        int idx, i;
        unsigned int irq_flags;
@@ -963,7 +972,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
                struct acpi_gpio_info info;
                struct gpio_desc *desc;
 
-               desc = acpi_get_gpiod_by_index(adev, NULL, i, &info);
+               desc = acpi_get_gpiod_by_index(adev, name, i, &info);
 
                /* Ignore -EPROBE_DEFER, it only matters if idx matches */
                if (IS_ERR(desc) && PTR_ERR(desc) != -EPROBE_DEFER)
@@ -1008,7 +1017,7 @@ int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
        }
        return -ENOENT;
 }
-EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get);
+EXPORT_SYMBOL_GPL(acpi_dev_gpio_irq_get_by);
 
 static acpi_status
 acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
index b4a7111..baf0153 100644 (file)
@@ -1039,3 +1039,14 @@ void of_gpiochip_remove(struct gpio_chip *chip)
 {
        of_node_put(chip->of_node);
 }
+
+void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev)
+{
+       /* If the gpiochip has an assigned OF node this takes precedence */
+       if (gc->of_node)
+               gdev->dev.of_node = gc->of_node;
+       else
+               gc->of_node = gdev->dev.of_node;
+       if (gdev->dev.of_node)
+               gdev->dev.fwnode = of_fwnode_handle(gdev->dev.of_node);
+}
index ed26664..8af2bc8 100644 (file)
@@ -15,6 +15,7 @@ int of_gpiochip_add(struct gpio_chip *gc);
 void of_gpiochip_remove(struct gpio_chip *gc);
 int of_gpio_get_count(struct device *dev, const char *con_id);
 bool of_gpio_need_valid_mask(const struct gpio_chip *gc);
+void of_gpio_dev_init(struct gpio_chip *gc, struct gpio_device *gdev);
 #else
 static inline struct gpio_desc *of_find_gpio(struct device *dev,
                                             const char *con_id,
@@ -33,6 +34,10 @@ static inline bool of_gpio_need_valid_mask(const struct gpio_chip *gc)
 {
        return false;
 }
+static inline void of_gpio_dev_init(struct gpio_chip *gc,
+                                   struct gpio_device *gdev)
+{
+}
 #endif /* CONFIG_OF_GPIO */
 
 extern struct notifier_block gpio_of_notifier;
index 844198c..6367646 100644 (file)
 static DEFINE_IDA(gpio_ida);
 static dev_t gpio_devt;
 #define GPIO_DEV_MAX 256 /* 256 GPIO chip devices supported */
+static int gpio_bus_match(struct device *dev, struct device_driver *drv);
 static struct bus_type gpio_bus_type = {
        .name = "gpio",
+       .match = gpio_bus_match,
 };
 
 /*
@@ -365,22 +367,18 @@ static int gpiochip_set_desc_names(struct gpio_chip *gc)
  *
  * Looks for device property "gpio-line-names" and if it exists assigns
  * GPIO line names for the chip. The memory allocated for the assigned
- * names belong to the underlying software node and should not be released
+ * names belong to the underlying firmware node and should not be released
  * by the caller.
  */
 static int devprop_gpiochip_set_names(struct gpio_chip *chip)
 {
        struct gpio_device *gdev = chip->gpiodev;
-       struct device *dev = chip->parent;
+       struct fwnode_handle *fwnode = dev_fwnode(&gdev->dev);
        const char **names;
        int ret, i;
        int count;
 
-       /* GPIO chip may not have a parent device whose properties we inspect. */
-       if (!dev)
-               return 0;
-
-       count = device_property_string_array_count(dev, "gpio-line-names");
+       count = fwnode_property_string_array_count(fwnode, "gpio-line-names");
        if (count < 0)
                return 0;
 
@@ -394,7 +392,7 @@ static int devprop_gpiochip_set_names(struct gpio_chip *chip)
        if (!names)
                return -ENOMEM;
 
-       ret = device_property_read_string_array(dev, "gpio-line-names",
+       ret = fwnode_property_read_string_array(fwnode, "gpio-line-names",
                                                names, count);
        if (ret < 0) {
                dev_warn(&gdev->dev, "failed to read GPIO line names\n");
@@ -472,9 +470,13 @@ EXPORT_SYMBOL_GPL(gpiochip_line_is_valid);
 
 static void gpiodevice_release(struct device *dev)
 {
-       struct gpio_device *gdev = dev_get_drvdata(dev);
+       struct gpio_device *gdev = container_of(dev, struct gpio_device, dev);
+       unsigned long flags;
 
+       spin_lock_irqsave(&gpio_lock, flags);
        list_del(&gdev->list);
+       spin_unlock_irqrestore(&gpio_lock, flags);
+
        ida_free(&gpio_ida, gdev->id);
        kfree_const(gdev->label);
        kfree(gdev->descs);
@@ -569,6 +571,7 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
                               struct lock_class_key *lock_key,
                               struct lock_class_key *request_key)
 {
+       struct fwnode_handle *fwnode = gc->parent ? dev_fwnode(gc->parent) : NULL;
        unsigned long   flags;
        int             ret = 0;
        unsigned        i;
@@ -590,13 +593,13 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
                gdev->dev.of_node = gc->parent->of_node;
        }
 
-#ifdef CONFIG_OF_GPIO
-       /* If the gpiochip has an assigned OF node this takes precedence */
-       if (gc->of_node)
-               gdev->dev.of_node = gc->of_node;
-       else
-               gc->of_node = gdev->dev.of_node;
-#endif
+       of_gpio_dev_init(gc, gdev);
+
+       /*
+        * Assign fwnode depending on the result of the previous calls,
+        * if none of them succeed, assign it to the parent's one.
+        */
+       gdev->dev.fwnode = dev_fwnode(&gdev->dev) ?: fwnode;
 
        gdev->id = ida_alloc(&gpio_ida, GFP_KERNEL);
        if (gdev->id < 0) {
@@ -609,7 +612,6 @@ int gpiochip_add_data_with_key(struct gpio_chip *gc, void *data,
                goto err_free_ida;
 
        device_initialize(&gdev->dev);
-       dev_set_drvdata(&gdev->dev, gdev);
        if (gc->parent && gc->parent->driver)
                gdev->owner = gc->parent->driver->owner;
        else if (gc->owner)
@@ -4215,6 +4217,41 @@ void gpiod_put_array(struct gpio_descs *descs)
 }
 EXPORT_SYMBOL_GPL(gpiod_put_array);
 
+
+static int gpio_bus_match(struct device *dev, struct device_driver *drv)
+{
+       /*
+        * Only match if the fwnode doesn't already have a proper struct device
+        * created for it.
+        */
+       if (dev->fwnode && dev->fwnode->dev != dev)
+               return 0;
+       return 1;
+}
+
+static int gpio_stub_drv_probe(struct device *dev)
+{
+       /*
+        * The DT node of some GPIO chips have a "compatible" property, but
+        * never have a struct device added and probed by a driver to register
+        * the GPIO chip with gpiolib. In such cases, fw_devlink=on will cause
+        * the consumers of the GPIO chip to get probe deferred forever because
+        * they will be waiting for a device associated with the GPIO chip
+        * firmware node to get added and bound to a driver.
+        *
+        * To allow these consumers to probe, we associate the struct
+        * gpio_device of the GPIO chip with the firmware node and then simply
+        * bind it to this stub driver.
+        */
+       return 0;
+}
+
+static struct device_driver gpio_stub_drv = {
+       .name = "gpio_stub_drv",
+       .bus = &gpio_bus_type,
+       .probe = gpio_stub_drv_probe,
+};
+
 static int __init gpiolib_dev_init(void)
 {
        int ret;
@@ -4226,9 +4263,17 @@ static int __init gpiolib_dev_init(void)
                return ret;
        }
 
+       ret = driver_register(&gpio_stub_drv);
+       if (ret < 0) {
+               pr_err("gpiolib: could not register GPIO stub driver\n");
+               bus_unregister(&gpio_bus_type);
+               return ret;
+       }
+
        ret = alloc_chrdev_region(&gpio_devt, 0, GPIO_DEV_MAX, GPIOCHIP_NAME);
        if (ret < 0) {
                pr_err("gpiolib: failed to allocate char dev region\n");
+               driver_unregister(&gpio_stub_drv);
                bus_unregister(&gpio_bus_type);
                return ret;
        }
index e392a90..85b79a7 100644 (file)
@@ -228,6 +228,7 @@ source "drivers/gpu/drm/arm/Kconfig"
 config DRM_RADEON
        tristate "ATI Radeon"
        depends on DRM && PCI && MMU
+       depends on AGP || !AGP
        select FW_LOADER
         select DRM_KMS_HELPER
         select DRM_TTM
index 86452f4..29885fe 100644 (file)
@@ -180,6 +180,7 @@ extern uint amdgpu_smu_memory_pool_size;
 extern uint amdgpu_dc_feature_mask;
 extern uint amdgpu_dc_debug_mask;
 extern uint amdgpu_dm_abm_level;
+extern int amdgpu_backlight;
 extern struct amdgpu_mgpu_info mgpu_info;
 extern int amdgpu_ras_enable;
 extern uint amdgpu_ras_mask;
@@ -1006,7 +1007,9 @@ struct amdgpu_device {
 
        /* s3/s4 mask */
        bool                            in_suspend;
-       bool                            in_hibernate;
+       bool                            in_s3;
+       bool                            in_s4;
+       bool                            in_s0ix;
 
        atomic_t                        in_gpu_reset;
        enum pp_mp1_state               mp1_state;
index 8155c54..2e9b16f 100644 (file)
@@ -903,10 +903,11 @@ void amdgpu_acpi_fini(struct amdgpu_device *adev)
  */
 bool amdgpu_acpi_is_s0ix_supported(struct amdgpu_device *adev)
 {
+#if defined(CONFIG_AMD_PMC) || defined(CONFIG_AMD_PMC_MODULE)
        if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
                if (adev->flags & AMD_IS_APU)
                        return true;
        }
-
+#endif
        return false;
 }
index 0a25fec..43059ea 100644 (file)
@@ -357,7 +357,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_read(struct file *f, char __user *buf,
        while (size) {
                uint32_t value;
 
-               value = RREG32_PCIE(*pos >> 2);
+               value = RREG32_PCIE(*pos);
                r = put_user(value, (uint32_t *)buf);
                if (r) {
                        pm_runtime_mark_last_busy(adev_to_drm(adev)->dev);
@@ -424,7 +424,7 @@ static ssize_t amdgpu_debugfs_regs_pcie_write(struct file *f, const char __user
                        return r;
                }
 
-               WREG32_PCIE(*pos >> 2, value);
+               WREG32_PCIE(*pos, value);
 
                result += 4;
                buf += 4;
index 7052dc3..8a5a8ff 100644 (file)
@@ -2371,6 +2371,10 @@ static int amdgpu_device_set_cg_state(struct amdgpu_device *adev,
                i = state == AMD_CG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
                if (!adev->ip_blocks[i].status.late_initialized)
                        continue;
+               /* skip CG for GFX on S0ix */
+               if (adev->in_s0ix &&
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+                       continue;
                /* skip CG for VCE/UVD, it's handled specially */
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
@@ -2402,6 +2406,10 @@ static int amdgpu_device_set_pg_state(struct amdgpu_device *adev, enum amd_power
                i = state == AMD_PG_STATE_GATE ? j : adev->num_ip_blocks - j - 1;
                if (!adev->ip_blocks[i].status.late_initialized)
                        continue;
+               /* skip PG for GFX on S0ix */
+               if (adev->in_s0ix &&
+                   adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX)
+                       continue;
                /* skip CG for VCE/UVD, it's handled specially */
                if (adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_UVD &&
                    adev->ip_blocks[i].version->type != AMD_IP_BLOCK_TYPE_VCE &&
@@ -2678,10 +2686,8 @@ static int amdgpu_device_ip_suspend_phase1(struct amdgpu_device *adev)
 {
        int i, r;
 
-       if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev)) {
-               amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
-               amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
-       }
+       amdgpu_device_set_pg_state(adev, AMD_PG_STATE_UNGATE);
+       amdgpu_device_set_cg_state(adev, AMD_CG_STATE_UNGATE);
 
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_blocks[i].status.valid)
@@ -2721,6 +2727,9 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
 {
        int i, r;
 
+       if (adev->in_s0ix)
+               amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
+
        for (i = adev->num_ip_blocks - 1; i >= 0; i--) {
                if (!adev->ip_blocks[i].status.valid)
                        continue;
@@ -2733,6 +2742,17 @@ static int amdgpu_device_ip_suspend_phase2(struct amdgpu_device *adev)
                        adev->ip_blocks[i].status.hw = false;
                        continue;
                }
+
+               /* skip suspend of gfx and psp for S0ix
+                * gfx is in gfxoff state, so on resume it will exit gfxoff just
+                * like at runtime. PSP is also part of the always on hardware
+                * so no need to suspend it.
+                */
+               if (adev->in_s0ix &&
+                   (adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_PSP ||
+                    adev->ip_blocks[i].version->type == AMD_IP_BLOCK_TYPE_GFX))
+                       continue;
+
                /* XXX handle errors */
                r = adev->ip_blocks[i].version->funcs->suspend(adev);
                /* XXX handle errors */
@@ -3672,14 +3692,9 @@ void amdgpu_device_fini(struct amdgpu_device *adev)
  */
 int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 {
-       struct amdgpu_device *adev;
-       struct drm_crtc *crtc;
-       struct drm_connector *connector;
-       struct drm_connector_list_iter iter;
+       struct amdgpu_device *adev = drm_to_adev(dev);
        int r;
 
-       adev = drm_to_adev(dev);
-
        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
                return 0;
 
@@ -3691,60 +3706,19 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
 
        cancel_delayed_work_sync(&adev->delayed_init_work);
 
-       if (!amdgpu_device_has_dc_support(adev)) {
-               /* turn off display hw */
-               drm_modeset_lock_all(dev);
-               drm_connector_list_iter_begin(dev, &iter);
-               drm_for_each_connector_iter(connector, &iter)
-                       drm_helper_connector_dpms(connector,
-                                                 DRM_MODE_DPMS_OFF);
-               drm_connector_list_iter_end(&iter);
-               drm_modeset_unlock_all(dev);
-                       /* unpin the front buffers and cursors */
-               list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-                       struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-                       struct drm_framebuffer *fb = crtc->primary->fb;
-                       struct amdgpu_bo *robj;
-
-                       if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
-                               struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-                               r = amdgpu_bo_reserve(aobj, true);
-                               if (r == 0) {
-                                       amdgpu_bo_unpin(aobj);
-                                       amdgpu_bo_unreserve(aobj);
-                               }
-                       }
-
-                       if (fb == NULL || fb->obj[0] == NULL) {
-                               continue;
-                       }
-                       robj = gem_to_amdgpu_bo(fb->obj[0]);
-                       /* don't unpin kernel fb objects */
-                       if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
-                               r = amdgpu_bo_reserve(robj, true);
-                               if (r == 0) {
-                                       amdgpu_bo_unpin(robj);
-                                       amdgpu_bo_unreserve(robj);
-                               }
-                       }
-               }
-       }
-
        amdgpu_ras_suspend(adev);
 
        r = amdgpu_device_ip_suspend_phase1(adev);
 
-       amdgpu_amdkfd_suspend(adev, adev->in_runpm);
+       if (!adev->in_s0ix)
+               amdgpu_amdkfd_suspend(adev, adev->in_runpm);
 
        /* evict vram memory */
        amdgpu_bo_evict_vram(adev);
 
        amdgpu_fence_driver_suspend(adev);
 
-       if (!amdgpu_acpi_is_s0ix_supported(adev) || amdgpu_in_reset(adev))
-               r = amdgpu_device_ip_suspend_phase2(adev);
-       else
-               amdgpu_gfx_state_change_set(adev, sGpuChangeState_D3Entry);
+       r = amdgpu_device_ip_suspend_phase2(adev);
        /* evict remaining vram memory
         * This second call to evict vram is to evict the gart page table
         * using the CPU.
@@ -3766,16 +3740,13 @@ int amdgpu_device_suspend(struct drm_device *dev, bool fbcon)
  */
 int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
 {
-       struct drm_connector *connector;
-       struct drm_connector_list_iter iter;
        struct amdgpu_device *adev = drm_to_adev(dev);
-       struct drm_crtc *crtc;
        int r = 0;
 
        if (dev->switch_power_state == DRM_SWITCH_POWER_OFF)
                return 0;
 
-       if (amdgpu_acpi_is_s0ix_supported(adev))
+       if (adev->in_s0ix)
                amdgpu_gfx_state_change_set(adev, sGpuChangeState_D0Entry);
 
        /* post card */
@@ -3800,50 +3771,17 @@ int amdgpu_device_resume(struct drm_device *dev, bool fbcon)
        queue_delayed_work(system_wq, &adev->delayed_init_work,
                           msecs_to_jiffies(AMDGPU_RESUME_MS));
 
-       if (!amdgpu_device_has_dc_support(adev)) {
-               /* pin cursors */
-               list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
-                       struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
-
-                       if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
-                               struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
-                               r = amdgpu_bo_reserve(aobj, true);
-                               if (r == 0) {
-                                       r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
-                                       if (r != 0)
-                                               dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
-                                       amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
-                                       amdgpu_bo_unreserve(aobj);
-                               }
-                       }
-               }
+       if (!adev->in_s0ix) {
+               r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
+               if (r)
+                       return r;
        }
-       r = amdgpu_amdkfd_resume(adev, adev->in_runpm);
-       if (r)
-               return r;
 
        /* Make sure IB tests flushed */
        flush_delayed_work(&adev->delayed_init_work);
 
-       /* blat the mode back in */
-       if (fbcon) {
-               if (!amdgpu_device_has_dc_support(adev)) {
-                       /* pre DCE11 */
-                       drm_helper_resume_force_mode(dev);
-
-                       /* turn on display hw */
-                       drm_modeset_lock_all(dev);
-
-                       drm_connector_list_iter_begin(dev, &iter);
-                       drm_for_each_connector_iter(connector, &iter)
-                               drm_helper_connector_dpms(connector,
-                                                         DRM_MODE_DPMS_ON);
-                       drm_connector_list_iter_end(&iter);
-
-                       drm_modeset_unlock_all(dev);
-               }
+       if (fbcon)
                amdgpu_fbdev_set_suspend(adev, 0);
-       }
 
        drm_kms_helper_poll_enable(dev);
 
index 48cb33e..f753e04 100644 (file)
@@ -1310,3 +1310,92 @@ bool amdgpu_crtc_get_scanout_position(struct drm_crtc *crtc,
        return amdgpu_display_get_crtc_scanoutpos(dev, pipe, 0, vpos, hpos,
                                                  stime, etime, mode);
 }
+
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev)
+{
+       struct drm_device *dev = adev_to_drm(adev);
+       struct drm_crtc *crtc;
+       struct drm_connector *connector;
+       struct drm_connector_list_iter iter;
+       int r;
+
+       /* turn off display hw */
+       drm_modeset_lock_all(dev);
+       drm_connector_list_iter_begin(dev, &iter);
+       drm_for_each_connector_iter(connector, &iter)
+               drm_helper_connector_dpms(connector,
+                                         DRM_MODE_DPMS_OFF);
+       drm_connector_list_iter_end(&iter);
+       drm_modeset_unlock_all(dev);
+       /* unpin the front buffers and cursors */
+       list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+               struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+               struct drm_framebuffer *fb = crtc->primary->fb;
+               struct amdgpu_bo *robj;
+
+               if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+                       struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+                       r = amdgpu_bo_reserve(aobj, true);
+                       if (r == 0) {
+                               amdgpu_bo_unpin(aobj);
+                               amdgpu_bo_unreserve(aobj);
+                       }
+               }
+
+               if (fb == NULL || fb->obj[0] == NULL) {
+                       continue;
+               }
+               robj = gem_to_amdgpu_bo(fb->obj[0]);
+               /* don't unpin kernel fb objects */
+               if (!amdgpu_fbdev_robj_is_fb(adev, robj)) {
+                       r = amdgpu_bo_reserve(robj, true);
+                       if (r == 0) {
+                               amdgpu_bo_unpin(robj);
+                               amdgpu_bo_unreserve(robj);
+                       }
+               }
+       }
+       return r;
+}
+
+int amdgpu_display_resume_helper(struct amdgpu_device *adev)
+{
+       struct drm_device *dev = adev_to_drm(adev);
+       struct drm_connector *connector;
+       struct drm_connector_list_iter iter;
+       struct drm_crtc *crtc;
+       int r;
+
+       /* pin cursors */
+       list_for_each_entry(crtc, &dev->mode_config.crtc_list, head) {
+               struct amdgpu_crtc *amdgpu_crtc = to_amdgpu_crtc(crtc);
+
+               if (amdgpu_crtc->cursor_bo && !adev->enable_virtual_display) {
+                       struct amdgpu_bo *aobj = gem_to_amdgpu_bo(amdgpu_crtc->cursor_bo);
+                       r = amdgpu_bo_reserve(aobj, true);
+                       if (r == 0) {
+                               r = amdgpu_bo_pin(aobj, AMDGPU_GEM_DOMAIN_VRAM);
+                               if (r != 0)
+                                       dev_err(adev->dev, "Failed to pin cursor BO (%d)\n", r);
+                               amdgpu_crtc->cursor_addr = amdgpu_bo_gpu_offset(aobj);
+                               amdgpu_bo_unreserve(aobj);
+                       }
+               }
+       }
+
+       drm_helper_resume_force_mode(dev);
+
+       /* turn on display hw */
+       drm_modeset_lock_all(dev);
+
+       drm_connector_list_iter_begin(dev, &iter);
+       drm_for_each_connector_iter(connector, &iter)
+               drm_helper_connector_dpms(connector,
+                                         DRM_MODE_DPMS_ON);
+       drm_connector_list_iter_end(&iter);
+
+       drm_modeset_unlock_all(dev);
+
+       return 0;
+}
+
index dc7b7d1..7b6d83e 100644 (file)
@@ -47,4 +47,7 @@ amdgpu_display_user_framebuffer_create(struct drm_device *dev,
 const struct drm_format_info *
 amdgpu_lookup_format_info(u32 format, uint64_t modifier);
 
+int amdgpu_display_suspend_helper(struct amdgpu_device *adev);
+int amdgpu_display_resume_helper(struct amdgpu_device *adev);
+
 #endif
index 03c5296..e92e7de 100644 (file)
@@ -781,6 +781,10 @@ uint amdgpu_dm_abm_level;
 MODULE_PARM_DESC(abmlevel, "ABM level (0 = off (default), 1-4 = backlight reduction level) ");
 module_param_named(abmlevel, amdgpu_dm_abm_level, uint, 0444);
 
+int amdgpu_backlight = -1;
+MODULE_PARM_DESC(backlight, "Backlight control (0 = pwm, 1 = aux, -1 auto (default))");
+module_param_named(backlight, amdgpu_backlight, bint, 0444);
+
 /**
  * DOC: tmz (int)
  * Trusted Memory Zone (TMZ) is a method to protect data being written
@@ -1103,6 +1107,7 @@ static const struct pci_device_id pciidlist[] = {
        {0x1002, 0x73A3, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73AB, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73AE, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
+       {0x1002, 0x73AF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
        {0x1002, 0x73BF, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CHIP_SIENNA_CICHLID},
 
        /* Van Gogh */
@@ -1277,15 +1282,28 @@ amdgpu_pci_shutdown(struct pci_dev *pdev)
 static int amdgpu_pmops_suspend(struct device *dev)
 {
        struct drm_device *drm_dev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
+       int r;
 
-       return amdgpu_device_suspend(drm_dev, true);
+       if (amdgpu_acpi_is_s0ix_supported(adev))
+               adev->in_s0ix = true;
+       adev->in_s3 = true;
+       r = amdgpu_device_suspend(drm_dev, true);
+       adev->in_s3 = false;
+
+       return r;
 }
 
 static int amdgpu_pmops_resume(struct device *dev)
 {
        struct drm_device *drm_dev = dev_get_drvdata(dev);
+       struct amdgpu_device *adev = drm_to_adev(drm_dev);
+       int r;
 
-       return amdgpu_device_resume(drm_dev, true);
+       r = amdgpu_device_resume(drm_dev, true);
+       if (amdgpu_acpi_is_s0ix_supported(adev))
+               adev->in_s0ix = false;
+       return r;
 }
 
 static int amdgpu_pmops_freeze(struct device *dev)
@@ -1294,9 +1312,9 @@ static int amdgpu_pmops_freeze(struct device *dev)
        struct amdgpu_device *adev = drm_to_adev(drm_dev);
        int r;
 
-       adev->in_hibernate = true;
+       adev->in_s4 = true;
        r = amdgpu_device_suspend(drm_dev, true);
-       adev->in_hibernate = false;
+       adev->in_s4 = false;
        if (r)
                return r;
        return amdgpu_asic_reset(adev);
index 51cd49c..24010ca 100644 (file)
@@ -146,7 +146,7 @@ static int amdgpufb_create_pinned_object(struct amdgpu_fbdev *rfbdev,
        size = mode_cmd->pitches[0] * height;
        aligned_size = ALIGN(size, PAGE_SIZE);
        ret = amdgpu_gem_object_create(adev, aligned_size, 0, domain, flags,
-                                      ttm_bo_type_kernel, NULL, &gobj);
+                                      ttm_bo_type_device, NULL, &gobj);
        if (ret) {
                pr_err("failed to allocate framebuffer (%d)\n", aligned_size);
                return -ENOMEM;
index 3c37cf1..a4e2cf7 100644 (file)
@@ -173,8 +173,6 @@ int amdgpu_driver_load_kms(struct amdgpu_device *adev, unsigned long flags)
                switch (adev->asic_type) {
                case CHIP_VEGA20:
                case CHIP_ARCTURUS:
-               case CHIP_SIENNA_CICHLID:
-               case CHIP_NAVY_FLOUNDER:
                        /* enable runpm if runpm=1 */
                        if (amdgpu_runtime_pm > 0)
                                adev->runpm = true;
@@ -780,9 +778,9 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
                        dev_info->high_va_offset = AMDGPU_GMC_HOLE_END;
                        dev_info->high_va_max = AMDGPU_GMC_HOLE_END | vm_size;
                }
-               dev_info->virtual_address_alignment = max((int)PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
+               dev_info->virtual_address_alignment = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
                dev_info->pte_fragment_size = (1 << adev->vm_manager.fragment_size) * AMDGPU_GPU_PAGE_SIZE;
-               dev_info->gart_page_size = AMDGPU_GPU_PAGE_SIZE;
+               dev_info->gart_page_size = max_t(u32, PAGE_SIZE, AMDGPU_GPU_PAGE_SIZE);
                dev_info->cu_active_number = adev->gfx.cu_info.number;
                dev_info->cu_ao_mask = adev->gfx.cu_info.ao_cu_mask;
                dev_info->ce_ram_size = adev->gfx.ce_ram_size;
index 4b29b82..0720504 100644 (file)
@@ -1028,13 +1028,10 @@ int amdgpu_bo_evict_vram(struct amdgpu_device *adev)
 {
        struct ttm_resource_manager *man;
 
-       /* late 2.6.33 fix IGP hibernate - we need pm ops to do this correct */
-#ifndef CONFIG_HIBERNATION
-       if (adev->flags & AMD_IS_APU) {
-               /* Useless to evict on IGP chips */
+       if (adev->in_s3 && (adev->flags & AMD_IS_APU)) {
+               /* No need to evict vram on APUs for suspend to ram */
                return 0;
        }
-#endif
 
        man = ttm_manager_type(&adev->mman.bdev, TTM_PL_VRAM);
        return ttm_resource_manager_evict_all(&adev->mman.bdev, man);
index 9fd2157..5efa331 100644 (file)
@@ -906,7 +906,7 @@ static int amdgpu_ttm_tt_pin_userptr(struct ttm_bo_device *bdev,
 
        /* Allocate an SG array and squash pages into it */
        r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
-                                     ttm->num_pages << PAGE_SHIFT,
+                                     (u64)ttm->num_pages << PAGE_SHIFT,
                                      GFP_KERNEL);
        if (r)
                goto release_sg;
index ad91c0c..7d2c8b1 100644 (file)
@@ -2197,8 +2197,8 @@ int amdgpu_vm_bo_map(struct amdgpu_device *adev,
        uint64_t eaddr;
 
        /* validate the parameters */
-       if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
-           size == 0 || size & AMDGPU_GPU_PAGE_MASK)
+       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
+           size == 0 || size & ~PAGE_MASK)
                return -EINVAL;
 
        /* make sure object fit at this offset */
@@ -2263,8 +2263,8 @@ int amdgpu_vm_bo_replace_map(struct amdgpu_device *adev,
        int r;
 
        /* validate the parameters */
-       if (saddr & AMDGPU_GPU_PAGE_MASK || offset & AMDGPU_GPU_PAGE_MASK ||
-           size == 0 || size & AMDGPU_GPU_PAGE_MASK)
+       if (saddr & ~PAGE_MASK || offset & ~PAGE_MASK ||
+           size == 0 || size & ~PAGE_MASK)
                return -EINVAL;
 
        /* make sure object fit at this offset */
@@ -2409,7 +2409,7 @@ int amdgpu_vm_bo_clear_mappings(struct amdgpu_device *adev,
                        after->start = eaddr + 1;
                        after->last = tmp->last;
                        after->offset = tmp->offset;
-                       after->offset += after->start - tmp->start;
+                       after->offset += (after->start - tmp->start) << PAGE_SHIFT;
                        after->flags = tmp->flags;
                        after->bo_va = tmp->bo_va;
                        list_add(&after->list, &tmp->bo_va->invalids);
index 7944781..19abb74 100644 (file)
@@ -2897,6 +2897,11 @@ static int dce_v10_0_hw_fini(void *handle)
 static int dce_v10_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_display_suspend_helper(adev);
+       if (r)
+               return r;
 
        adev->mode_info.bl_level =
                amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
@@ -2921,8 +2926,10 @@ static int dce_v10_0_resume(void *handle)
                amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
                                                    bl_level);
        }
+       if (ret)
+               return ret;
 
-       return ret;
+       return amdgpu_display_resume_helper(adev);
 }
 
 static bool dce_v10_0_is_idle(void *handle)
index 1b6ff04..320ec35 100644 (file)
@@ -3027,6 +3027,11 @@ static int dce_v11_0_hw_fini(void *handle)
 static int dce_v11_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_display_suspend_helper(adev);
+       if (r)
+               return r;
 
        adev->mode_info.bl_level =
                amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
@@ -3051,8 +3056,10 @@ static int dce_v11_0_resume(void *handle)
                amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
                                                    bl_level);
        }
+       if (ret)
+               return ret;
 
-       return ret;
+       return amdgpu_display_resume_helper(adev);
 }
 
 static bool dce_v11_0_is_idle(void *handle)
index 83a8838..1332200 100644 (file)
@@ -2770,7 +2770,11 @@ static int dce_v6_0_hw_fini(void *handle)
 static int dce_v6_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
 
+       r = amdgpu_display_suspend_helper(adev);
+       if (r)
+               return r;
        adev->mode_info.bl_level =
                amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
 
@@ -2794,8 +2798,10 @@ static int dce_v6_0_resume(void *handle)
                amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
                                                    bl_level);
        }
+       if (ret)
+               return ret;
 
-       return ret;
+       return amdgpu_display_resume_helper(adev);
 }
 
 static bool dce_v6_0_is_idle(void *handle)
index 224b302..04ebf02 100644 (file)
@@ -2796,6 +2796,11 @@ static int dce_v8_0_hw_fini(void *handle)
 static int dce_v8_0_suspend(void *handle)
 {
        struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_display_suspend_helper(adev);
+       if (r)
+               return r;
 
        adev->mode_info.bl_level =
                amdgpu_atombios_encoder_get_backlight_level_from_reg(adev);
@@ -2820,8 +2825,10 @@ static int dce_v8_0_resume(void *handle)
                amdgpu_display_backlight_set_level(adev, adev->mode_info.bl_encoder,
                                                    bl_level);
        }
+       if (ret)
+               return ret;
 
-       return ret;
+       return amdgpu_display_resume_helper(adev);
 }
 
 static bool dce_v8_0_is_idle(void *handle)
index 9810af7..5c11144 100644 (file)
@@ -39,6 +39,7 @@
 #include "dce_v11_0.h"
 #include "dce_virtual.h"
 #include "ivsrcid/ivsrcid_vislands30.h"
+#include "amdgpu_display.h"
 
 #define DCE_VIRTUAL_VBLANK_PERIOD 16666666
 
@@ -491,12 +492,24 @@ static int dce_virtual_hw_fini(void *handle)
 
 static int dce_virtual_suspend(void *handle)
 {
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = amdgpu_display_suspend_helper(adev);
+       if (r)
+               return r;
        return dce_virtual_hw_fini(handle);
 }
 
 static int dce_virtual_resume(void *handle)
 {
-       return dce_virtual_hw_init(handle);
+       struct amdgpu_device *adev = (struct amdgpu_device *)handle;
+       int r;
+
+       r = dce_virtual_hw_init(handle);
+       if (r)
+               return r;
+       return amdgpu_display_resume_helper(adev);
 }
 
 static bool dce_virtual_is_idle(void *handle)
index 160fa5f..c625c5d 100644 (file)
@@ -558,7 +558,8 @@ static bool nv_is_headless_sku(struct pci_dev *pdev)
 {
        if ((pdev->device == 0x731E &&
            (pdev->revision == 0xC6 || pdev->revision == 0xC7)) ||
-           (pdev->device == 0x7340 && pdev->revision == 0xC9))
+           (pdev->device == 0x7340 && pdev->revision == 0xC9)  ||
+           (pdev->device == 0x7360 && pdev->revision == 0xC7))
                return true;
        return false;
 }
@@ -634,7 +635,8 @@ int nv_set_ip_blocks(struct amdgpu_device *adev)
                if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT &&
                    !amdgpu_sriov_vf(adev))
                        amdgpu_device_ip_block_add(adev, &smu_v11_0_ip_block);
-               amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
+               if (!nv_is_headless_sku(adev->pdev))
+                       amdgpu_device_ip_block_add(adev, &vcn_v2_0_ip_block);
                if (!amdgpu_sriov_vf(adev))
                        amdgpu_device_ip_block_add(adev, &jpeg_v2_0_ip_block);
                break;
index b258a3d..159add0 100644 (file)
@@ -155,7 +155,7 @@ static int dbgdev_diq_submit_ib(struct kfd_dbgdev *dbgdev,
 
        /* Wait till CP writes sync code: */
        status = amdkfd_fence_wait_timeout(
-                       (unsigned int *) rm_state,
+                       rm_state,
                        QUEUESTATE__ACTIVE, 1500);
 
        kfd_gtt_sa_free(dbgdev->dev, mem_obj);
index e686ce2..4598a9a 100644 (file)
@@ -1167,7 +1167,7 @@ static int start_cpsch(struct device_queue_manager *dqm)
        if (retval)
                goto fail_allocate_vidmem;
 
-       dqm->fence_addr = dqm->fence_mem->cpu_ptr;
+       dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
        dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
 
        init_interrupts(dqm);
@@ -1340,8 +1340,8 @@ out:
        return retval;
 }
 
-int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
-                               unsigned int fence_value,
+int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
+                               uint64_t fence_value,
                                unsigned int timeout_ms)
 {
        unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
index 7351dd1..45f8159 100644 (file)
@@ -192,7 +192,7 @@ struct device_queue_manager {
        uint16_t                vmid_pasid[VMID_NUM];
        uint64_t                pipelines_addr;
        uint64_t                fence_gpu_addr;
-       unsigned int            *fence_addr;
+       uint64_t                *fence_addr;
        struct kfd_mem_obj      *fence_mem;
        bool                    active_runlist;
        int                     sched_policy;
index 5d541e0..f71a7fa 100644 (file)
@@ -347,7 +347,7 @@ fail_create_runlist_ib:
 }
 
 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
-                       uint32_t fence_value)
+                       uint64_t fence_value)
 {
        uint32_t *buffer, size;
        int retval = 0;
index dfaf771..e3ba0cd 100644 (file)
@@ -283,7 +283,7 @@ static int pm_unmap_queues_v9(struct packet_manager *pm, uint32_t *buffer,
 }
 
 static int pm_query_status_v9(struct packet_manager *pm, uint32_t *buffer,
-                       uint64_t fence_address, uint32_t fence_value)
+                       uint64_t fence_address, uint64_t fence_value)
 {
        struct pm4_mes_query_status *packet;
 
index a852e0d..08442e7 100644 (file)
@@ -263,7 +263,7 @@ static int pm_unmap_queues_vi(struct packet_manager *pm, uint32_t *buffer,
 }
 
 static int pm_query_status_vi(struct packet_manager *pm, uint32_t *buffer,
-                       uint64_t fence_address, uint32_t fence_value)
+                       uint64_t fence_address, uint64_t fence_value)
 {
        struct pm4_mes_query_status *packet;
 
index 09599ef..f304d1f 100644 (file)
@@ -1003,8 +1003,8 @@ int pqm_get_wave_state(struct process_queue_manager *pqm,
                       u32 *ctl_stack_used_size,
                       u32 *save_area_used_size);
 
-int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
-                             unsigned int fence_value,
+int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
+                             uint64_t fence_value,
                              unsigned int timeout_ms);
 
 /* Packet Manager */
@@ -1040,7 +1040,7 @@ struct packet_manager_funcs {
                        uint32_t filter_param, bool reset,
                        unsigned int sdma_engine);
        int (*query_status)(struct packet_manager *pm, uint32_t *buffer,
-                       uint64_t fence_address, uint32_t fence_value);
+                       uint64_t fence_address, uint64_t fence_value);
        int (*release_mem)(uint64_t gpu_addr, uint32_t *buffer);
 
        /* Packet sizes */
@@ -1062,7 +1062,7 @@ int pm_send_set_resources(struct packet_manager *pm,
                                struct scheduling_resources *res);
 int pm_send_runlist(struct packet_manager *pm, struct list_head *dqm_queues);
 int pm_send_query_status(struct packet_manager *pm, uint64_t fence_address,
-                               uint32_t fence_value);
+                               uint64_t fence_value);
 
 int pm_send_unmap_queue(struct packet_manager *pm, enum kfd_queue_type type,
                        enum kfd_unmap_queues_filter mode,
index 94cd5dd..573cf17 100644 (file)
@@ -937,7 +937,49 @@ static void mmhub_read_system_context(struct amdgpu_device *adev, struct dc_phy_
 
 }
 #endif
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+static void event_mall_stutter(struct work_struct *work)
+{
+
+       struct vblank_workqueue *vblank_work = container_of(work, struct vblank_workqueue, mall_work);
+       struct amdgpu_display_manager *dm = vblank_work->dm;
+
+       mutex_lock(&dm->dc_lock);
+
+       if (vblank_work->enable)
+               dm->active_vblank_irq_count++;
+       else
+               dm->active_vblank_irq_count--;
+
 
+       dc_allow_idle_optimizations(
+               dm->dc, dm->active_vblank_irq_count == 0 ? true : false);
+
+       DRM_DEBUG_DRIVER("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
+
+
+       mutex_unlock(&dm->dc_lock);
+}
+
+static struct vblank_workqueue *vblank_create_workqueue(struct amdgpu_device *adev, struct dc *dc)
+{
+
+       int max_caps = dc->caps.max_links;
+       struct vblank_workqueue *vblank_work;
+       int i = 0;
+
+       vblank_work = kcalloc(max_caps, sizeof(*vblank_work), GFP_KERNEL);
+       if (ZERO_OR_NULL_PTR(vblank_work)) {
+               kfree(vblank_work);
+               return NULL;
+       }
+
+       for (i = 0; i < max_caps; i++)
+               INIT_WORK(&vblank_work[i].mall_work, event_mall_stutter);
+
+       return vblank_work;
+}
+#endif
 static int amdgpu_dm_init(struct amdgpu_device *adev)
 {
        struct dc_init_data init_data;
@@ -957,6 +999,9 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
        mutex_init(&adev->dm.dc_lock);
        mutex_init(&adev->dm.audio_lock);
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       spin_lock_init(&adev->dm.vblank_lock);
+#endif
 
        if(amdgpu_dm_irq_init(adev)) {
                DRM_ERROR("amdgpu: failed to initialize DM IRQ support.\n");
@@ -1071,6 +1116,17 @@ static int amdgpu_dm_init(struct amdgpu_device *adev)
 
        amdgpu_dm_init_color_mod();
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       if (adev->dm.dc->caps.max_links > 0) {
+               adev->dm.vblank_workqueue = vblank_create_workqueue(adev, adev->dm.dc);
+
+               if (!adev->dm.vblank_workqueue)
+                       DRM_ERROR("amdgpu: failed to initialize vblank_workqueue.\n");
+               else
+                       DRM_DEBUG_DRIVER("amdgpu: vblank_workqueue init done %p.\n", adev->dm.vblank_workqueue);
+       }
+#endif
+
 #ifdef CONFIG_DRM_AMD_DC_HDCP
        if (adev->dm.dc->caps.max_links > 0 && adev->asic_type >= CHIP_RAVEN) {
                adev->dm.hdcp_workqueue = hdcp_create_workqueue(adev, &init_params.cp_psp, adev->dm.dc);
@@ -1936,7 +1992,7 @@ static void dm_gpureset_commit_state(struct dc_state *dc_state,
                dc_commit_updates_for_stream(
                        dm->dc, bundle->surface_updates,
                        dc_state->stream_status->plane_count,
-                       dc_state->streams[k], &bundle->stream_update);
+                       dc_state->streams[k], &bundle->stream_update, dc_state);
        }
 
 cleanup:
@@ -1967,7 +2023,8 @@ static void dm_set_dpms_off(struct dc_link *link)
 
        stream_update.stream = stream_state;
        dc_commit_updates_for_stream(stream_state->ctx->dc, NULL, 0,
-                                    stream_state, &stream_update);
+                                    stream_state, &stream_update,
+                                    stream_state->ctx->dc->current_state);
        mutex_unlock(&adev->dm.dc_lock);
 }
 
@@ -2210,6 +2267,11 @@ static void update_connector_ext_caps(struct amdgpu_dm_connector *aconnector)
            caps->ext_caps->bits.hdr_aux_backlight_control == 1)
                caps->aux_support = true;
 
+       if (amdgpu_backlight == 0)
+               caps->aux_support = false;
+       else if (amdgpu_backlight == 1)
+               caps->aux_support = true;
+
        /* From the specification (CTA-861-G), for calculating the maximum
         * luminance we need to use:
         *      Luminance = 50*2**(CV/32)
@@ -3128,19 +3190,6 @@ static void amdgpu_dm_update_backlight_caps(struct amdgpu_display_manager *dm)
 #endif
 }
 
-static int set_backlight_via_aux(struct dc_link *link, uint32_t brightness)
-{
-       bool rc;
-
-       if (!link)
-               return 1;
-
-       rc = dc_link_set_backlight_level_nits(link, true, brightness,
-                                             AUX_BL_DEFAULT_TRANSITION_TIME_MS);
-
-       return rc ? 0 : 1;
-}
-
 static int get_brightness_range(const struct amdgpu_dm_backlight_caps *caps,
                                unsigned *min, unsigned *max)
 {
@@ -3203,9 +3252,10 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
        brightness = convert_brightness_from_user(&caps, bd->props.brightness);
        // Change brightness based on AUX property
        if (caps.aux_support)
-               return set_backlight_via_aux(link, brightness);
-
-       rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0);
+               rc = dc_link_set_backlight_level_nits(link, true, brightness,
+                                                     AUX_BL_DEFAULT_TRANSITION_TIME_MS);
+       else
+               rc = dc_link_set_backlight_level(dm->backlight_link, brightness, 0);
 
        return rc ? 0 : 1;
 }
@@ -3213,11 +3263,27 @@ static int amdgpu_dm_backlight_update_status(struct backlight_device *bd)
 static int amdgpu_dm_backlight_get_brightness(struct backlight_device *bd)
 {
        struct amdgpu_display_manager *dm = bl_get_data(bd);
-       int ret = dc_link_get_backlight_level(dm->backlight_link);
+       struct amdgpu_dm_backlight_caps caps;
+
+       amdgpu_dm_update_backlight_caps(dm);
+       caps = dm->backlight_caps;
 
-       if (ret == DC_ERROR_UNEXPECTED)
-               return bd->props.brightness;
-       return convert_brightness_to_user(&dm->backlight_caps, ret);
+       if (caps.aux_support) {
+               struct dc_link *link = (struct dc_link *)dm->backlight_link;
+               u32 avg, peak;
+               bool rc;
+
+               rc = dc_link_get_backlight_level_nits(link, &avg, &peak);
+               if (!rc)
+                       return bd->props.brightness;
+               return convert_brightness_to_user(&caps, avg);
+       } else {
+               int ret = dc_link_get_backlight_level(dm->backlight_link);
+
+               if (ret == DC_ERROR_UNEXPECTED)
+                       return bd->props.brightness;
+               return convert_brightness_to_user(&caps, ret);
+       }
 }
 
 static const struct backlight_ops amdgpu_dm_backlight_ops = {
@@ -4659,6 +4725,7 @@ static int fill_dc_plane_attributes(struct amdgpu_device *adev,
        dc_plane_state->global_alpha_value = plane_info.global_alpha_value;
        dc_plane_state->dcc = plane_info.dcc;
        dc_plane_state->layer_index = plane_info.layer_index; // Always returns 0
+       dc_plane_state->flip_int_enabled = true;
 
        /*
         * Always set input transfer function, since plane state is refreshed
@@ -5374,7 +5441,10 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
        struct amdgpu_crtc *acrtc = to_amdgpu_crtc(crtc);
        struct amdgpu_device *adev = drm_to_adev(crtc->dev);
        struct dm_crtc_state *acrtc_state = to_dm_crtc_state(crtc->state);
+#if defined(CONFIG_DRM_AMD_DC_DCN)
        struct amdgpu_display_manager *dm = &adev->dm;
+       unsigned long flags;
+#endif
        int rc = 0;
 
        if (enable) {
@@ -5397,22 +5467,15 @@ static inline int dm_set_vblank(struct drm_crtc *crtc, bool enable)
        if (amdgpu_in_reset(adev))
                return 0;
 
-       mutex_lock(&dm->dc_lock);
-
-       if (enable)
-               dm->active_vblank_irq_count++;
-       else
-               dm->active_vblank_irq_count--;
-
 #if defined(CONFIG_DRM_AMD_DC_DCN)
-       dc_allow_idle_optimizations(
-               adev->dm.dc, dm->active_vblank_irq_count == 0 ? true : false);
-
-       DRM_DEBUG_DRIVER("Allow idle optimizations (MALL): %d\n", dm->active_vblank_irq_count == 0);
+       spin_lock_irqsave(&dm->vblank_lock, flags);
+       dm->vblank_workqueue->dm = dm;
+       dm->vblank_workqueue->otg_inst = acrtc->otg_inst;
+       dm->vblank_workqueue->enable = enable;
+       spin_unlock_irqrestore(&dm->vblank_lock, flags);
+       schedule_work(&dm->vblank_workqueue->mall_work);
 #endif
 
-       mutex_unlock(&dm->dc_lock);
-
        return 0;
 }
 
@@ -7663,7 +7726,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                                    struct drm_crtc *pcrtc,
                                    bool wait_for_vblank)
 {
-       int i;
+       uint32_t i;
        uint64_t timestamp_ns;
        struct drm_plane *plane;
        struct drm_plane_state *old_plane_state, *new_plane_state;
@@ -7704,7 +7767,7 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                amdgpu_dm_commit_cursors(state);
 
        /* update planes when needed */
-       for_each_oldnew_plane_in_state_reverse(state, plane, old_plane_state, new_plane_state, i) {
+       for_each_oldnew_plane_in_state(state, plane, old_plane_state, new_plane_state, i) {
                struct drm_crtc *crtc = new_plane_state->crtc;
                struct drm_crtc_state *new_crtc_state;
                struct drm_framebuffer *fb = new_plane_state->fb;
@@ -7927,7 +7990,8 @@ static void amdgpu_dm_commit_planes(struct drm_atomic_state *state,
                                                     bundle->surface_updates,
                                                     planes_count,
                                                     acrtc_state->stream,
-                                                    &bundle->stream_update);
+                                                    &bundle->stream_update,
+                                                    dc_state);
 
                /**
                 * Enable or disable the interrupts on the backend.
@@ -8263,13 +8327,13 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
                struct dm_connector_state *dm_new_con_state = to_dm_connector_state(new_con_state);
                struct dm_connector_state *dm_old_con_state = to_dm_connector_state(old_con_state);
                struct amdgpu_crtc *acrtc = to_amdgpu_crtc(dm_new_con_state->base.crtc);
-               struct dc_surface_update surface_updates[MAX_SURFACES];
+               struct dc_surface_update dummy_updates[MAX_SURFACES];
                struct dc_stream_update stream_update;
                struct dc_info_packet hdr_packet;
                struct dc_stream_status *status = NULL;
                bool abm_changed, hdr_changed, scaling_changed;
 
-               memset(&surface_updates, 0, sizeof(surface_updates));
+               memset(&dummy_updates, 0, sizeof(dummy_updates));
                memset(&stream_update, 0, sizeof(stream_update));
 
                if (acrtc) {
@@ -8326,15 +8390,16 @@ static void amdgpu_dm_atomic_commit_tail(struct drm_atomic_state *state)
                 * To fix this, DC should permit updating only stream properties.
                 */
                for (j = 0; j < status->plane_count; j++)
-                       surface_updates[j].surface = status->plane_states[j];
+                       dummy_updates[j].surface = status->plane_states[0];
 
 
                mutex_lock(&dm->dc_lock);
                dc_commit_updates_for_stream(dm->dc,
-                                               surface_updates,
+                                                    dummy_updates,
                                                     status->plane_count,
                                                     dm_new_crtc_state->stream,
-                                                    &stream_update);
+                                                    &stream_update,
+                                                    dc_state);
                mutex_unlock(&dm->dc_lock);
        }
 
index f72930c..8bfe901 100644 (file)
@@ -93,6 +93,20 @@ struct dm_compressor_info {
 };
 
 /**
+ * struct vblank_workqueue - Works to be executed in a separate thread during vblank
+ * @mall_work: work for mall stutter
+ * @dm: amdgpu display manager device
+ * @otg_inst: otg instance of which vblank is being set
+ * @enable: true if enable vblank
+ */
+struct vblank_workqueue {
+       struct work_struct mall_work;
+       struct amdgpu_display_manager *dm;
+       int otg_inst;
+       bool enable;
+};
+
+/**
  * struct amdgpu_dm_backlight_caps - Information about backlight
  *
  * Describe the backlight support for ACPI or eDP AUX.
@@ -244,6 +258,15 @@ struct amdgpu_display_manager {
        struct mutex audio_lock;
 
        /**
+        * @vblank_work_lock:
+        *
+        * Guards access to deferred vblank work state.
+        */
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       spinlock_t vblank_lock;
+#endif
+
+       /**
         * @audio_component:
         *
         * Used to notify ELD changes to sound driver.
@@ -321,6 +344,10 @@ struct amdgpu_display_manager {
        struct hdcp_workqueue *hdcp_workqueue;
 #endif
 
+#if defined(CONFIG_DRM_AMD_DC_DCN)
+       struct vblank_workqueue *vblank_workqueue;
+#endif
+
        struct drm_atomic_state *cached_state;
        struct dc_state *cached_dc_state;
 
index 5159399..5750818 100644 (file)
@@ -530,7 +530,7 @@ bool dm_helpers_dp_write_dsc_enable(
 {
        uint8_t enable_dsc = enable ? 1 : 0;
        struct amdgpu_dm_connector *aconnector;
-       uint8_t ret;
+       uint8_t ret = 0;
 
        if (!stream)
                return false;
index c9aede2..8f8a13c 100644 (file)
@@ -2697,7 +2697,8 @@ void dc_commit_updates_for_stream(struct dc *dc,
                struct dc_surface_update *srf_updates,
                int surface_count,
                struct dc_stream_state *stream,
-               struct dc_stream_update *stream_update)
+               struct dc_stream_update *stream_update,
+               struct dc_state *state)
 {
        const struct dc_stream_status *stream_status;
        enum surface_update_type update_type;
@@ -2716,12 +2717,6 @@ void dc_commit_updates_for_stream(struct dc *dc,
 
 
        if (update_type >= UPDATE_TYPE_FULL) {
-               struct dc_plane_state *new_planes[MAX_SURFACES];
-
-               memset(new_planes, 0, sizeof(new_planes));
-
-               for (i = 0; i < surface_count; i++)
-                       new_planes[i] = srf_updates[i].surface;
 
                /* initialize scratch memory for building context */
                context = dc_create_state(dc);
@@ -2730,21 +2725,15 @@ void dc_commit_updates_for_stream(struct dc *dc,
                        return;
                }
 
-               dc_resource_state_copy_construct(
-                               dc->current_state, context);
+               dc_resource_state_copy_construct(state, context);
 
-               /*remove old surfaces from context */
-               if (!dc_rem_all_planes_for_stream(dc, stream, context)) {
-                       DC_ERROR("Failed to remove streams for new validate context!\n");
-                       return;
-               }
+               for (i = 0; i < dc->res_pool->pipe_count; i++) {
+                       struct pipe_ctx *new_pipe = &context->res_ctx.pipe_ctx[i];
+                       struct pipe_ctx *old_pipe = &dc->current_state->res_ctx.pipe_ctx[i];
 
-               /* add surface to context */
-               if (!dc_add_all_planes_for_stream(dc, stream, new_planes, surface_count, context)) {
-                       DC_ERROR("Failed to add streams for new validate context!\n");
-                       return;
+                       if (new_pipe->plane_state && new_pipe->plane_state != old_pipe->plane_state)
+                               new_pipe->plane_state->force_full_update = true;
                }
-
        }
 
 
index fa5059f..bd01010 100644 (file)
@@ -2602,7 +2602,6 @@ bool dc_link_set_backlight_level(const struct dc_link *link,
                        if (pipe_ctx->plane_state == NULL)
                                frame_ramp = 0;
                } else {
-                       ASSERT(false);
                        return false;
                }
 
index 4eee3a5..18ed0d3 100644 (file)
@@ -887,6 +887,7 @@ struct dc_plane_state {
        int layer_index;
 
        union surface_update_flags update_flags;
+       bool flip_int_enabled;
        /* private to DC core */
        struct dc_plane_status status;
        struct dc_context *ctx;
index a4f7ec8..80b67b8 100644 (file)
@@ -294,7 +294,8 @@ void dc_commit_updates_for_stream(struct dc *dc,
                struct dc_surface_update *srf_updates,
                int surface_count,
                struct dc_stream_state *stream,
-               struct dc_stream_update *stream_update);
+               struct dc_stream_update *stream_update,
+               struct dc_state *state);
 /*
  * Log the current stream state.
  */
index 9e796df..714c71a 100644 (file)
@@ -1257,6 +1257,16 @@ void hubp1_soft_reset(struct hubp *hubp, bool reset)
        REG_UPDATE(DCHUBP_CNTL, HUBP_DISABLE, reset ? 1 : 0);
 }
 
+void hubp1_set_flip_int(struct hubp *hubp)
+{
+       struct dcn10_hubp *hubp1 = TO_DCN10_HUBP(hubp);
+
+       REG_UPDATE(DCSURF_SURFACE_FLIP_INTERRUPT,
+               SURFACE_FLIP_INT_MASK, 1);
+
+       return;
+}
+
 void hubp1_init(struct hubp *hubp)
 {
        //do nothing
@@ -1290,6 +1300,7 @@ static const struct hubp_funcs dcn10_hubp_funcs = {
        .dmdata_load = NULL,
        .hubp_soft_reset = hubp1_soft_reset,
        .hubp_in_blank = hubp1_in_blank,
+       .hubp_set_flip_int = hubp1_set_flip_int,
 };
 
 /*****************************************/
index a9a6ed7..e2f2f69 100644 (file)
@@ -74,6 +74,7 @@
        SRI(DCSURF_SURFACE_EARLIEST_INUSE_C, HUBPREQ, id),\
        SRI(DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C, HUBPREQ, id),\
        SRI(DCSURF_SURFACE_CONTROL, HUBPREQ, id),\
+       SRI(DCSURF_SURFACE_FLIP_INTERRUPT, HUBPREQ, id),\
        SRI(HUBPRET_CONTROL, HUBPRET, id),\
        SRI(DCN_EXPANSION_MODE, HUBPREQ, id),\
        SRI(DCHUBP_REQ_SIZE_CONFIG, HUBP, id),\
        uint32_t DCSURF_SURFACE_EARLIEST_INUSE_C; \
        uint32_t DCSURF_SURFACE_EARLIEST_INUSE_HIGH_C; \
        uint32_t DCSURF_SURFACE_CONTROL; \
+       uint32_t DCSURF_SURFACE_FLIP_INTERRUPT; \
        uint32_t HUBPRET_CONTROL; \
        uint32_t DCN_EXPANSION_MODE; \
        uint32_t DCHUBP_REQ_SIZE_CONFIG; \
        HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_META_SURFACE_TMZ_C, mask_sh),\
        HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\
        HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_IND_64B_BLK, mask_sh),\
+       HUBP_SF(HUBPREQ0_DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK, mask_sh),\
        HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\
        HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\
        HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\
        type PRIMARY_SURFACE_DCC_IND_64B_BLK;\
        type SECONDARY_SURFACE_DCC_EN;\
        type SECONDARY_SURFACE_DCC_IND_64B_BLK;\
+       type SURFACE_FLIP_INT_MASK;\
        type DET_BUF_PLANE1_BASE_ADDRESS;\
        type CROSSBAR_SRC_CB_B;\
        type CROSSBAR_SRC_CR_R;\
@@ -777,4 +781,6 @@ void hubp1_read_state_common(struct hubp *hubp);
 bool hubp1_in_blank(struct hubp *hubp);
 void hubp1_soft_reset(struct hubp *hubp, bool reset);
 
+void hubp1_set_flip_int(struct hubp *hubp);
+
 #endif
index 89912bb..9ba5c62 100644 (file)
@@ -2196,6 +2196,13 @@ static void dcn10_enable_plane(
        if (dc->debug.sanity_checks) {
                hws->funcs.verify_allow_pstate_change_high(dc);
        }
+
+       if (!pipe_ctx->top_pipe
+               && pipe_ctx->plane_state
+               && pipe_ctx->plane_state->flip_int_enabled
+               && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int)
+                       pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp);
+
 }
 
 void dcn10_program_gamut_remap(struct pipe_ctx *pipe_ctx)
index 5902465..e470182 100644 (file)
@@ -480,7 +480,6 @@ unsigned int dcn10_get_dig_frontend(struct link_encoder *enc)
                break;
        default:
                // invalid source select DIG
-               ASSERT(false);
                result = ENGINE_ID_UNKNOWN;
        }
 
index 0df0da2..bec7059 100644 (file)
@@ -1597,6 +1597,7 @@ static struct hubp_funcs dcn20_hubp_funcs = {
        .validate_dml_output = hubp2_validate_dml_output,
        .hubp_in_blank = hubp1_in_blank,
        .hubp_soft_reset = hubp1_soft_reset,
+       .hubp_set_flip_int = hubp1_set_flip_int,
 };
 
 
index 0726fb4..aece110 100644 (file)
@@ -1146,6 +1146,12 @@ void dcn20_enable_plane(
                pipe_ctx->plane_res.hubp->funcs->hubp_set_vm_system_aperture_settings(pipe_ctx->plane_res.hubp, &apt);
        }
 
+       if (!pipe_ctx->top_pipe
+               && pipe_ctx->plane_state
+               && pipe_ctx->plane_state->flip_int_enabled
+               && pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int)
+                       pipe_ctx->plane_res.hubp->funcs->hubp_set_flip_int(pipe_ctx->plane_res.hubp);
+
 //     if (dc->debug.sanity_checks) {
 //             dcn10_verify_allow_pstate_change_high(dc);
 //     }
@@ -1501,38 +1507,8 @@ static void dcn20_update_dchubp_dpp(
        if (pipe_ctx->update_flags.bits.enable || pipe_ctx->update_flags.bits.opp_changed
                        || pipe_ctx->stream->update_flags.bits.gamut_remap
                        || pipe_ctx->stream->update_flags.bits.out_csc) {
-               struct mpc *mpc = pipe_ctx->stream_res.opp->ctx->dc->res_pool->mpc;
-
-               if (mpc->funcs->set_gamut_remap) {
-                       int i;
-                       int mpcc_id = hubp->inst;
-                       struct mpc_grph_gamut_adjustment adjust;
-                       bool enable_remap_dpp = false;
-
-                       memset(&adjust, 0, sizeof(adjust));
-                       adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_BYPASS;
-
-                       /* save the enablement of gamut remap for dpp */
-                       enable_remap_dpp = pipe_ctx->stream->gamut_remap_matrix.enable_remap;
-
-                       /* force bypass gamut remap for dpp/cm */
-                       pipe_ctx->stream->gamut_remap_matrix.enable_remap = false;
-                       dc->hwss.program_gamut_remap(pipe_ctx);
-
-                       /* restore gamut remap flag and use this remap into mpc */
-                       pipe_ctx->stream->gamut_remap_matrix.enable_remap = enable_remap_dpp;
-
-                       /* build remap matrix for top plane if enabled */
-                       if (enable_remap_dpp && pipe_ctx->top_pipe == NULL) {
-                                       adjust.gamut_adjust_type = GRAPHICS_GAMUT_ADJUST_TYPE_SW;
-                                       for (i = 0; i < CSC_TEMPERATURE_MATRIX_SIZE; i++)
-                                               adjust.temperature_matrix[i] =
-                                                               pipe_ctx->stream->gamut_remap_matrix.matrix[i];
-                       }
-                       mpc->funcs->set_gamut_remap(mpc, mpcc_id, &adjust);
-               } else
-                       /* dpp/cm gamut remap*/
-                       dc->hwss.program_gamut_remap(pipe_ctx);
+               /* dpp/cm gamut remap*/
+               dc->hwss.program_gamut_remap(pipe_ctx);
 
                /*call the dcn2 method which uses mpc csc*/
                dc->hwss.program_output_csc(dc,
index fa01349..2f9bfae 100644 (file)
@@ -341,8 +341,7 @@ void enc2_hw_init(struct link_encoder *enc)
        } else {
                AUX_REG_WRITE(AUX_DPHY_RX_CONTROL0, 0x103d1110);
 
-               AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c4d);
-
+               AUX_REG_WRITE(AUX_DPHY_TX_CONTROL, 0x21c7a);
        }
 
        //AUX_DPHY_TX_REF_CONTROL'AUX_TX_REF_DIV HW default is 0x32;
index f904585..b0c9180 100644 (file)
@@ -838,6 +838,7 @@ static struct hubp_funcs dcn21_hubp_funcs = {
        .hubp_set_flip_control_surface_gsl = hubp2_set_flip_control_surface_gsl,
        .hubp_init = hubp21_init,
        .validate_dml_output = hubp21_validate_dml_output,
+       .hubp_set_flip_int = hubp1_set_flip_int,
 };
 
 bool hubp21_construct(
index 072f8c8..4a3df13 100644 (file)
@@ -296,7 +296,7 @@ struct _vcs_dpi_soc_bounding_box_st dcn2_1_soc = {
        .num_banks = 8,
        .num_chans = 4,
        .vmm_page_size_bytes = 4096,
-       .dram_clock_change_latency_us = 11.72,
+       .dram_clock_change_latency_us = 23.84,
        .return_bus_width_bytes = 64,
        .dispclk_dppclk_vco_speed_mhz = 3600,
        .xfc_bus_transport_time_us = 4,
@@ -1062,8 +1062,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s
 {
        int i;
 
-       DC_FP_START();
-
        if (dc->bb_overrides.sr_exit_time_ns) {
                for (i = 0; i < WM_SET_COUNT; i++) {
                          dc->clk_mgr->bw_params->wm_table.entries[i].sr_exit_time_us =
@@ -1088,8 +1086,6 @@ static void patch_bounding_box(struct dc *dc, struct _vcs_dpi_soc_bounding_box_s
                                dc->bb_overrides.dram_clock_change_latency_ns / 1000.0;
                }
        }
-
-       DC_FP_END();
 }
 
 void dcn21_calculate_wm(
@@ -1339,7 +1335,7 @@ static noinline bool dcn21_validate_bandwidth_fp(struct dc *dc,
        int vlevel = 0;
        int pipe_split_from[MAX_PIPES];
        int pipe_cnt = 0;
-       display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_KERNEL);
+       display_e2e_pipe_params_st *pipes = kzalloc(dc->res_pool->pipe_count * sizeof(display_e2e_pipe_params_st), GFP_ATOMIC);
        DC_LOGGER_INIT(dc->ctx->logger);
 
        BW_VAL_TRACE_COUNT();
@@ -1599,6 +1595,11 @@ static void update_bw_bounding_box(struct dc *dc, struct clk_bw_params *bw_param
        dcn2_1_soc.num_chans = bw_params->num_channels;
 
        ASSERT(clk_table->num_entries);
+       /* Copy dcn2_1_soc.clock_limits to clock_limits to avoid copying over null states later */
+       for (i = 0; i < dcn2_1_soc.num_states + 1; i++) {
+               clock_limits[i] = dcn2_1_soc.clock_limits[i];
+       }
+
        for (i = 0; i < clk_table->num_entries; i++) {
                /* loop backwards*/
                for (closest_clk_lvl = 0, j = dcn2_1_soc.num_states - 1; j >= 0; j--) {
index 41a1d0e..e0df9b0 100644 (file)
@@ -113,6 +113,7 @@ bool cm3_helper_translate_curve_to_hw_format(
        struct pwl_result_data *rgb_resulted;
        struct pwl_result_data *rgb;
        struct pwl_result_data *rgb_plus_1;
+       struct pwl_result_data *rgb_minus_1;
        struct fixed31_32 end_value;
 
        int32_t region_start, region_end;
@@ -140,7 +141,7 @@ bool cm3_helper_translate_curve_to_hw_format(
                region_start = -MAX_LOW_POINT;
                region_end   = NUMBER_REGIONS - MAX_LOW_POINT;
        } else {
-               /* 10 segments
+               /* 11 segments
                 * segment is from 2^-10 to 2^0
                 * There are less than 256 points, for optimization
                 */
@@ -154,9 +155,10 @@ bool cm3_helper_translate_curve_to_hw_format(
                seg_distr[7] = 4;
                seg_distr[8] = 4;
                seg_distr[9] = 4;
+               seg_distr[10] = 1;
 
                region_start = -10;
-               region_end = 0;
+               region_end = 1;
        }
 
        for (i = region_end - region_start; i < MAX_REGIONS_NUMBER ; i++)
@@ -189,6 +191,10 @@ bool cm3_helper_translate_curve_to_hw_format(
        rgb_resulted[hw_points - 1].green = output_tf->tf_pts.green[start_index];
        rgb_resulted[hw_points - 1].blue = output_tf->tf_pts.blue[start_index];
 
+       rgb_resulted[hw_points].red = rgb_resulted[hw_points - 1].red;
+       rgb_resulted[hw_points].green = rgb_resulted[hw_points - 1].green;
+       rgb_resulted[hw_points].blue = rgb_resulted[hw_points - 1].blue;
+
        // All 3 color channels have same x
        corner_points[0].red.x = dc_fixpt_pow(dc_fixpt_from_int(2),
                                             dc_fixpt_from_int(region_start));
@@ -259,15 +265,18 @@ bool cm3_helper_translate_curve_to_hw_format(
 
        rgb = rgb_resulted;
        rgb_plus_1 = rgb_resulted + 1;
+       rgb_minus_1 = rgb;
 
        i = 1;
        while (i != hw_points + 1) {
-               if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
-                       rgb_plus_1->red = rgb->red;
-               if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
-                       rgb_plus_1->green = rgb->green;
-               if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
-                       rgb_plus_1->blue = rgb->blue;
+               if (i >= hw_points - 1) {
+                       if (dc_fixpt_lt(rgb_plus_1->red, rgb->red))
+                               rgb_plus_1->red = dc_fixpt_add(rgb->red, rgb_minus_1->delta_red);
+                       if (dc_fixpt_lt(rgb_plus_1->green, rgb->green))
+                               rgb_plus_1->green = dc_fixpt_add(rgb->green, rgb_minus_1->delta_green);
+                       if (dc_fixpt_lt(rgb_plus_1->blue, rgb->blue))
+                               rgb_plus_1->blue = dc_fixpt_add(rgb->blue, rgb_minus_1->delta_blue);
+               }
 
                rgb->delta_red   = dc_fixpt_sub(rgb_plus_1->red,   rgb->red);
                rgb->delta_green = dc_fixpt_sub(rgb_plus_1->green, rgb->green);
@@ -283,6 +292,7 @@ bool cm3_helper_translate_curve_to_hw_format(
                }
 
                ++rgb_plus_1;
+               rgb_minus_1 = rgb;
                ++rgb;
                ++i;
        }
index 88ffa9f..f246125 100644 (file)
@@ -511,6 +511,7 @@ static struct hubp_funcs dcn30_hubp_funcs = {
        .hubp_init = hubp3_init,
        .hubp_in_blank = hubp1_in_blank,
        .hubp_soft_reset = hubp1_soft_reset,
+       .hubp_set_flip_int = hubp1_set_flip_int,
 };
 
 bool hubp3_construct(
index 705fbfc..8a32772 100644 (file)
        HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_EN, mask_sh),\
        HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_IND_BLK, mask_sh),\
        HUBP_SF(HUBPREQ0_DCSURF_SURFACE_CONTROL, SECONDARY_SURFACE_DCC_IND_BLK_C, mask_sh),\
+       HUBP_SF(HUBPREQ0_DCSURF_SURFACE_FLIP_INTERRUPT, SURFACE_FLIP_INT_MASK, mask_sh),\
        HUBP_SF(HUBPRET0_HUBPRET_CONTROL, DET_BUF_PLANE1_BASE_ADDRESS, mask_sh),\
        HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CB_B, mask_sh),\
        HUBP_SF(HUBPRET0_HUBPRET_CONTROL, CROSSBAR_SRC_CR_R, mask_sh),\
index 9620fb8..06dc1e2 100644 (file)
@@ -539,6 +539,8 @@ void dcn30_init_hw(struct dc *dc)
 
                                        fe = dc->links[i]->link_enc->funcs->get_dig_frontend(
                                                                                dc->links[i]->link_enc);
+                                       if (fe == ENGINE_ID_UNKNOWN)
+                                               continue;
 
                                        for (j = 0; j < dc->res_pool->stream_enc_count; j++) {
                                                if (fe == dc->res_pool->stream_enc[j]->id) {
index 8d0f663..fb7f1de 100644 (file)
@@ -2508,6 +2508,19 @@ static const struct resource_funcs dcn30_res_pool_funcs = {
        .patch_unknown_plane_state = dcn20_patch_unknown_plane_state,
 };
 
+#define CTX ctx
+
+#define REG(reg_name) \
+       (DCN_BASE.instance[0].segment[mm ## reg_name ## _BASE_IDX] + mm ## reg_name)
+
+static uint32_t read_pipe_fuses(struct dc_context *ctx)
+{
+       uint32_t value = REG_READ(CC_DC_PIPE_DIS);
+       /* Support for max 6 pipes */
+       value = value & 0x3f;
+       return value;
+}
+
 static bool dcn30_resource_construct(
        uint8_t num_virtual_links,
        struct dc *dc,
@@ -2517,6 +2530,15 @@ static bool dcn30_resource_construct(
        struct dc_context *ctx = dc->ctx;
        struct irq_service_init_data init_data;
        struct ddc_service_init_data ddc_init_data;
+       uint32_t pipe_fuses = read_pipe_fuses(ctx);
+       uint32_t num_pipes = 0;
+
+       if (!(pipe_fuses == 0 || pipe_fuses == 0x3e)) {
+               BREAK_TO_DEBUGGER();
+               dm_error("DC: Unexpected fuse recipe for navi2x !\n");
+               /* fault to single pipe */
+               pipe_fuses = 0x3e;
+       }
 
        DC_FP_START();
 
@@ -2650,6 +2672,15 @@ static bool dcn30_resource_construct(
        /* PP Lib and SMU interfaces */
        init_soc_bounding_box(dc, pool);
 
+       num_pipes = dcn3_0_ip.max_num_dpp;
+
+       for (i = 0; i < dcn3_0_ip.max_num_dpp; i++)
+               if (pipe_fuses & 1 << i)
+                       num_pipes--;
+
+       dcn3_0_ip.max_num_dpp = num_pipes;
+       dcn3_0_ip.max_num_otg = num_pipes;
+
        dml_init_instance(&dc->dml, &dcn3_0_soc, &dcn3_0_ip, DML_PROJECT_DCN30);
 
        /* IRQ */
index 5d4b2c6..c494235 100644 (file)
@@ -1619,12 +1619,106 @@ static void dcn301_update_bw_bounding_box(struct dc *dc, struct clk_bw_params *b
        dml_init_instance(&dc->dml, &dcn3_01_soc, &dcn3_01_ip, DML_PROJECT_DCN30);
 }
 
+static void calculate_wm_set_for_vlevel(
+               int vlevel,
+               struct wm_range_table_entry *table_entry,
+               struct dcn_watermarks *wm_set,
+               struct display_mode_lib *dml,
+               display_e2e_pipe_params_st *pipes,
+               int pipe_cnt)
+{
+       double dram_clock_change_latency_cached = dml->soc.dram_clock_change_latency_us;
+
+       ASSERT(vlevel < dml->soc.num_states);
+       /* only pipe 0 is read for voltage and dcf/soc clocks */
+       pipes[0].clks_cfg.voltage = vlevel;
+       pipes[0].clks_cfg.dcfclk_mhz = dml->soc.clock_limits[vlevel].dcfclk_mhz;
+       pipes[0].clks_cfg.socclk_mhz = dml->soc.clock_limits[vlevel].socclk_mhz;
+
+       dml->soc.dram_clock_change_latency_us = table_entry->pstate_latency_us;
+       dml->soc.sr_exit_time_us = table_entry->sr_exit_time_us;
+       dml->soc.sr_enter_plus_exit_time_us = table_entry->sr_enter_plus_exit_time_us;
+
+       wm_set->urgent_ns = get_wm_urgent(dml, pipes, pipe_cnt) * 1000;
+       wm_set->cstate_pstate.cstate_enter_plus_exit_ns = get_wm_stutter_enter_exit(dml, pipes, pipe_cnt) * 1000;
+       wm_set->cstate_pstate.cstate_exit_ns = get_wm_stutter_exit(dml, pipes, pipe_cnt) * 1000;
+       wm_set->cstate_pstate.pstate_change_ns = get_wm_dram_clock_change(dml, pipes, pipe_cnt) * 1000;
+       wm_set->pte_meta_urgent_ns = get_wm_memory_trip(dml, pipes, pipe_cnt) * 1000;
+       wm_set->frac_urg_bw_nom = get_fraction_of_urgent_bandwidth(dml, pipes, pipe_cnt) * 1000;
+       wm_set->frac_urg_bw_flip = get_fraction_of_urgent_bandwidth_imm_flip(dml, pipes, pipe_cnt) * 1000;
+       wm_set->urgent_latency_ns = get_urgent_latency(dml, pipes, pipe_cnt) * 1000;
+       dml->soc.dram_clock_change_latency_us = dram_clock_change_latency_cached;
+
+}
+
+static void dcn301_calculate_wm_and_dlg(
+               struct dc *dc, struct dc_state *context,
+               display_e2e_pipe_params_st *pipes,
+               int pipe_cnt,
+               int vlevel_req)
+{
+       int i, pipe_idx;
+       int vlevel, vlevel_max;
+       struct wm_range_table_entry *table_entry;
+       struct clk_bw_params *bw_params = dc->clk_mgr->bw_params;
+
+       ASSERT(bw_params);
+
+       vlevel_max = bw_params->clk_table.num_entries - 1;
+
+       /* WM Set D */
+       table_entry = &bw_params->wm_table.entries[WM_D];
+       if (table_entry->wm_type == WM_TYPE_RETRAINING)
+               vlevel = 0;
+       else
+               vlevel = vlevel_max;
+       calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.d,
+                                               &context->bw_ctx.dml, pipes, pipe_cnt);
+       /* WM Set C */
+       table_entry = &bw_params->wm_table.entries[WM_C];
+       vlevel = min(max(vlevel_req, 2), vlevel_max);
+       calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.c,
+                                               &context->bw_ctx.dml, pipes, pipe_cnt);
+       /* WM Set B */
+       table_entry = &bw_params->wm_table.entries[WM_B];
+       vlevel = min(max(vlevel_req, 1), vlevel_max);
+       calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.b,
+                                               &context->bw_ctx.dml, pipes, pipe_cnt);
+
+       /* WM Set A */
+       table_entry = &bw_params->wm_table.entries[WM_A];
+       vlevel = min(vlevel_req, vlevel_max);
+       calculate_wm_set_for_vlevel(vlevel, table_entry, &context->bw_ctx.bw.dcn.watermarks.a,
+                                               &context->bw_ctx.dml, pipes, pipe_cnt);
+
+       for (i = 0, pipe_idx = 0; i < dc->res_pool->pipe_count; i++) {
+               if (!context->res_ctx.pipe_ctx[i].stream)
+                       continue;
+
+               pipes[pipe_idx].clks_cfg.dispclk_mhz = get_dispclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt);
+               pipes[pipe_idx].clks_cfg.dppclk_mhz = get_dppclk_calculated(&context->bw_ctx.dml, pipes, pipe_cnt, pipe_idx);
+
+               if (dc->config.forced_clocks) {
+                       pipes[pipe_idx].clks_cfg.dispclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dispclk_mhz;
+                       pipes[pipe_idx].clks_cfg.dppclk_mhz = context->bw_ctx.dml.soc.clock_limits[0].dppclk_mhz;
+               }
+               if (dc->debug.min_disp_clk_khz > pipes[pipe_idx].clks_cfg.dispclk_mhz * 1000)
+                       pipes[pipe_idx].clks_cfg.dispclk_mhz = dc->debug.min_disp_clk_khz / 1000.0;
+               if (dc->debug.min_dpp_clk_khz > pipes[pipe_idx].clks_cfg.dppclk_mhz * 1000)
+                       pipes[pipe_idx].clks_cfg.dppclk_mhz = dc->debug.min_dpp_clk_khz / 1000.0;
+
+               pipe_idx++;
+       }
+
+       dcn20_calculate_dlg_params(dc, context, pipes, pipe_cnt, vlevel);
+}
+
 static struct resource_funcs dcn301_res_pool_funcs = {
        .destroy = dcn301_destroy_resource_pool,
        .link_enc_create = dcn301_link_encoder_create,
        .panel_cntl_create = dcn301_panel_cntl_create,
        .validate_bandwidth = dcn30_validate_bandwidth,
-       .calculate_wm_and_dlg = dcn30_calculate_wm_and_dlg,
+       .calculate_wm_and_dlg = dcn301_calculate_wm_and_dlg,
        .populate_dml_pipes = dcn30_populate_dml_pipes_from_context,
        .acquire_idle_pipe_for_layer = dcn20_acquire_idle_pipe_for_layer,
        .add_stream_to_ctx = dcn30_add_stream_to_ctx,
index 22f3f64..346dcd8 100644 (file)
@@ -191,6 +191,8 @@ struct hubp_funcs {
        bool (*hubp_in_blank)(struct hubp *hubp);
        void (*hubp_soft_reset)(struct hubp *hubp, bool reset);
 
+       void (*hubp_set_flip_int)(struct hubp *hubp);
+
 };
 
 #endif
index 1b97126..0e0f494 100644 (file)
@@ -168,6 +168,11 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = {
        .ack = NULL
 };
 
+static const struct irq_source_info_funcs vupdate_no_lock_irq_info_funcs = {
+       .set = NULL,
+       .ack = NULL
+};
+
 #undef BASE_INNER
 #define BASE_INNER(seg) DMU_BASE__INST0_SEG ## seg
 
@@ -230,6 +235,17 @@ static const struct irq_source_info_funcs vblank_irq_info_funcs = {
                .funcs = &vblank_irq_info_funcs\
        }
 
+/* vupdate_no_lock_int_entry maps to DC_IRQ_SOURCE_VUPDATEx, to match semantic
+ * of DCE's DC_IRQ_SOURCE_VUPDATEx.
+ */
+#define vupdate_no_lock_int_entry(reg_num)\
+       [DC_IRQ_SOURCE_VUPDATE1 + reg_num] = {\
+               IRQ_REG_ENTRY(OTG, reg_num,\
+                       OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_INT_EN,\
+                       OTG_GLOBAL_SYNC_STATUS, VUPDATE_NO_LOCK_EVENT_CLEAR),\
+               .funcs = &vupdate_no_lock_irq_info_funcs\
+       }
+
 #define vblank_int_entry(reg_num)\
        [DC_IRQ_SOURCE_VBLANK1 + reg_num] = {\
                IRQ_REG_ENTRY(OTG, reg_num,\
@@ -338,6 +354,12 @@ irq_source_info_dcn21[DAL_IRQ_SOURCES_NUMBER] = {
        vupdate_int_entry(3),
        vupdate_int_entry(4),
        vupdate_int_entry(5),
+       vupdate_no_lock_int_entry(0),
+       vupdate_no_lock_int_entry(1),
+       vupdate_no_lock_int_entry(2),
+       vupdate_no_lock_int_entry(3),
+       vupdate_no_lock_int_entry(4),
+       vupdate_no_lock_int_entry(5),
        vblank_int_entry(0),
        vblank_int_entry(1),
        vblank_int_entry(2),
index c57dc9a..c0565a9 100644 (file)
@@ -587,6 +587,48 @@ static int smu7_force_switch_to_arbf0(struct pp_hwmgr *hwmgr)
                        tmp, MC_CG_ARB_FREQ_F0);
 }
 
+static uint16_t smu7_override_pcie_speed(struct pp_hwmgr *hwmgr)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+       uint16_t pcie_gen = 0;
+
+       if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4 &&
+           adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN4)
+               pcie_gen = 3;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3 &&
+               adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN3)
+               pcie_gen = 2;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2 &&
+               adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN2)
+               pcie_gen = 1;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1 &&
+               adev->pm.pcie_gen_mask & CAIL_ASIC_PCIE_LINK_SPEED_SUPPORT_GEN1)
+               pcie_gen = 0;
+
+       return pcie_gen;
+}
+
+static uint16_t smu7_override_pcie_width(struct pp_hwmgr *hwmgr)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+       uint16_t pcie_width = 0;
+
+       if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+               pcie_width = 16;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
+               pcie_width = 12;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
+               pcie_width = 8;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
+               pcie_width = 4;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
+               pcie_width = 2;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
+               pcie_width = 1;
+
+       return pcie_width;
+}
+
 static int smu7_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
 {
        struct smu7_hwmgr *data = (struct smu7_hwmgr *)(hwmgr->backend);
@@ -683,6 +725,11 @@ static int smu7_setup_default_pcie_table(struct pp_hwmgr *hwmgr)
                                        PP_Min_PCIEGen),
                        get_pcie_lane_support(data->pcie_lane_cap,
                                        PP_Max_PCIELane));
+
+               if (data->pcie_dpm_key_disabled)
+                       phm_setup_pcie_table_entry(&data->dpm_table.pcie_speed_table,
+                               data->dpm_table.pcie_speed_table.count,
+                               smu7_override_pcie_speed(hwmgr), smu7_override_pcie_width(hwmgr));
        }
        return 0;
 }
@@ -1177,7 +1224,8 @@ static int smu7_enable_sclk_mclk_dpm(struct pp_hwmgr *hwmgr)
                    (hwmgr->chip_id == CHIP_POLARIS10) ||
                    (hwmgr->chip_id == CHIP_POLARIS11) ||
                    (hwmgr->chip_id == CHIP_POLARIS12) ||
-                   (hwmgr->chip_id == CHIP_TONGA))
+                   (hwmgr->chip_id == CHIP_TONGA) ||
+                   (hwmgr->chip_id == CHIP_TOPAZ))
                        PHM_WRITE_FIELD(hwmgr->device, MC_SEQ_CNTL_3, CAC_EN, 0x1);
 
 
@@ -1248,6 +1296,13 @@ static int smu7_start_dpm(struct pp_hwmgr *hwmgr)
                                                NULL)),
                                "Failed to enable pcie DPM during DPM Start Function!",
                                return -EINVAL);
+       } else {
+               PP_ASSERT_WITH_CODE(
+                               (0 == smum_send_msg_to_smc(hwmgr,
+                                               PPSMC_MSG_PCIeDPM_Disable,
+                                               NULL)),
+                               "Failed to disble pcie DPM during DPM Start Function!",
+                               return -EINVAL);
        }
 
        if (phm_cap_enabled(hwmgr->platform_descriptor.platformCaps,
@@ -3276,7 +3331,8 @@ static int smu7_apply_state_adjust_rules(struct pp_hwmgr *hwmgr,
 
        disable_mclk_switching_for_display = ((1 < hwmgr->display_config->num_display) &&
                                                !hwmgr->display_config->multi_monitor_in_sync) ||
-                                               smu7_vblank_too_short(hwmgr, hwmgr->display_config->min_vblank_time);
+                                               (hwmgr->display_config->num_display &&
+                                               smu7_vblank_too_short(hwmgr, hwmgr->display_config->min_vblank_time));
 
        disable_mclk_switching = disable_mclk_switching_for_frame_lock ||
                                         disable_mclk_switching_for_display;
@@ -5216,10 +5272,10 @@ static int smu7_set_watermarks_for_clocks_ranges(struct pp_hwmgr *hwmgr,
                for (j = 0; j < dep_sclk_table->count; j++) {
                        valid_entry = false;
                        for (k = 0; k < watermarks->num_wm_sets; k++) {
-                               if (dep_sclk_table->entries[i].clk / 10 >= watermarks->wm_clk_ranges[k].wm_min_eng_clk_in_khz &&
-                                   dep_sclk_table->entries[i].clk / 10 < watermarks->wm_clk_ranges[k].wm_max_eng_clk_in_khz &&
-                                   dep_mclk_table->entries[i].clk / 10 >= watermarks->wm_clk_ranges[k].wm_min_mem_clk_in_khz &&
-                                   dep_mclk_table->entries[i].clk / 10 < watermarks->wm_clk_ranges[k].wm_max_mem_clk_in_khz) {
+                               if (dep_sclk_table->entries[i].clk >= watermarks->wm_clk_ranges[k].wm_min_eng_clk_in_khz / 10 &&
+                                   dep_sclk_table->entries[i].clk < watermarks->wm_clk_ranges[k].wm_max_eng_clk_in_khz / 10 &&
+                                   dep_mclk_table->entries[i].clk >= watermarks->wm_clk_ranges[k].wm_min_mem_clk_in_khz / 10 &&
+                                   dep_mclk_table->entries[i].clk < watermarks->wm_clk_ranges[k].wm_max_mem_clk_in_khz / 10) {
                                        valid_entry = true;
                                        table->DisplayWatermark[i][j] = watermarks->wm_clk_ranges[k].wm_set_id;
                                        break;
index 29c9964..599ec97 100644 (file)
@@ -54,6 +54,9 @@
 #include "smuio/smuio_9_0_offset.h"
 #include "smuio/smuio_9_0_sh_mask.h"
 
+#define smnPCIE_LC_SPEED_CNTL                  0x11140290
+#define smnPCIE_LC_LINK_WIDTH_CNTL             0x11140288
+
 #define HBM_MEMORY_CHANNEL_WIDTH    128
 
 static const uint32_t channel_number[] = {1, 2, 0, 4, 0, 8, 0, 16, 2};
@@ -443,8 +446,7 @@ static void vega10_init_dpm_defaults(struct pp_hwmgr *hwmgr)
        if (PP_CAP(PHM_PlatformCaps_VCEDPM))
                data->smu_features[GNLD_DPM_VCE].supported = true;
 
-       if (!data->registry_data.pcie_dpm_key_disabled)
-               data->smu_features[GNLD_DPM_LINK].supported = true;
+       data->smu_features[GNLD_DPM_LINK].supported = true;
 
        if (!data->registry_data.dcefclk_dpm_key_disabled)
                data->smu_features[GNLD_DPM_DCEFCLK].supported = true;
@@ -1505,6 +1507,55 @@ static int vega10_populate_single_lclk_level(struct pp_hwmgr *hwmgr,
        return 0;
 }
 
+static int vega10_override_pcie_parameters(struct pp_hwmgr *hwmgr)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+       struct vega10_hwmgr *data =
+                       (struct vega10_hwmgr *)(hwmgr->backend);
+       uint32_t pcie_gen = 0, pcie_width = 0;
+       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+       int i;
+
+       if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
+               pcie_gen = 3;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+               pcie_gen = 2;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+               pcie_gen = 1;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
+               pcie_gen = 0;
+
+       if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+               pcie_width = 6;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
+               pcie_width = 5;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
+               pcie_width = 4;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
+               pcie_width = 3;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
+               pcie_width = 2;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
+               pcie_width = 1;
+
+       for (i = 0; i < NUM_LINK_LEVELS; i++) {
+               if (pp_table->PcieGenSpeed[i] > pcie_gen)
+                       pp_table->PcieGenSpeed[i] = pcie_gen;
+
+               if (pp_table->PcieLaneCount[i] > pcie_width)
+                       pp_table->PcieLaneCount[i] = pcie_width;
+       }
+
+       if (data->registry_data.pcie_dpm_key_disabled) {
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       pp_table->PcieGenSpeed[i] = pcie_gen;
+                       pp_table->PcieLaneCount[i] = pcie_width;
+               }
+       }
+
+       return 0;
+}
+
 static int vega10_populate_smc_link_levels(struct pp_hwmgr *hwmgr)
 {
        int result = -1;
@@ -2556,6 +2607,11 @@ static int vega10_init_smc_table(struct pp_hwmgr *hwmgr)
                        "Failed to initialize Link Level!",
                        return result);
 
+       result = vega10_override_pcie_parameters(hwmgr);
+       PP_ASSERT_WITH_CODE(!result,
+                       "Failed to override pcie parameters!",
+                       return result);
+
        result = vega10_populate_all_graphic_levels(hwmgr);
        PP_ASSERT_WITH_CODE(!result,
                        "Failed to initialize Graphics Level!",
@@ -2919,9 +2975,18 @@ static int vega10_start_dpm(struct pp_hwmgr *hwmgr, uint32_t bitmap)
                }
        }
 
+       if (data->registry_data.pcie_dpm_key_disabled) {
+               PP_ASSERT_WITH_CODE(!vega10_enable_smc_features(hwmgr,
+                               false, data->smu_features[GNLD_DPM_LINK].smu_feature_bitmap),
+               "Attempt to Disable Link DPM feature Failed!", return -EINVAL);
+               data->smu_features[GNLD_DPM_LINK].enabled = false;
+               data->smu_features[GNLD_DPM_LINK].supported = false;
+       }
+
        return 0;
 }
 
+
 static int vega10_enable_disable_PCC_limit_feature(struct pp_hwmgr *hwmgr, bool enable)
 {
        struct vega10_hwmgr *data = hwmgr->backend;
@@ -4536,6 +4601,24 @@ static int vega10_set_ppfeature_status(struct pp_hwmgr *hwmgr, uint64_t new_ppfe
        return 0;
 }
 
+static int vega10_get_current_pcie_link_width_level(struct pp_hwmgr *hwmgr)
+{
+       struct amdgpu_device *adev = hwmgr->adev;
+
+       return (RREG32_PCIE(smnPCIE_LC_LINK_WIDTH_CNTL) &
+               PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD_MASK)
+               >> PCIE_LC_LINK_WIDTH_CNTL__LC_LINK_WIDTH_RD__SHIFT;
+}
+
+static int vega10_get_current_pcie_link_speed_level(struct pp_hwmgr *hwmgr)
+{
+       struct amdgpu_device *adev = hwmgr->adev;
+
+       return (RREG32_PCIE(smnPCIE_LC_SPEED_CNTL) &
+               PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK)
+               >> PSWUSP0_PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT;
+}
+
 static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                enum pp_clock_type type, char *buf)
 {
@@ -4544,8 +4627,9 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
        struct vega10_single_dpm_table *mclk_table = &(data->dpm_table.mem_table);
        struct vega10_single_dpm_table *soc_table = &(data->dpm_table.soc_table);
        struct vega10_single_dpm_table *dcef_table = &(data->dpm_table.dcef_table);
-       struct vega10_pcie_table *pcie_table = &(data->dpm_table.pcie_table);
        struct vega10_odn_clock_voltage_dependency_table *podn_vdd_dep = NULL;
+       uint32_t gen_speed, lane_width, current_gen_speed, current_lane_width;
+       PPTable_t *pptable = &(data->smc_state_table.pp_table);
 
        int i, now, size = 0, count = 0;
 
@@ -4602,15 +4686,31 @@ static int vega10_print_clock_levels(struct pp_hwmgr *hwmgr,
                                        "*" : "");
                break;
        case PP_PCIE:
-               smum_send_msg_to_smc(hwmgr, PPSMC_MSG_GetCurrentLinkIndex, &now);
-
-               for (i = 0; i < pcie_table->count; i++)
-                       size += sprintf(buf + size, "%d: %s %s\n", i,
-                                       (pcie_table->pcie_gen[i] == 0) ? "2.5GT/s, x1" :
-                                       (pcie_table->pcie_gen[i] == 1) ? "5.0GT/s, x16" :
-                                       (pcie_table->pcie_gen[i] == 2) ? "8.0GT/s, x16" : "",
-                                       (i == now) ? "*" : "");
+               current_gen_speed =
+                       vega10_get_current_pcie_link_speed_level(hwmgr);
+               current_lane_width =
+                       vega10_get_current_pcie_link_width_level(hwmgr);
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       gen_speed = pptable->PcieGenSpeed[i];
+                       lane_width = pptable->PcieLaneCount[i];
+
+                       size += sprintf(buf + size, "%d: %s %s %s\n", i,
+                                       (gen_speed == 0) ? "2.5GT/s," :
+                                       (gen_speed == 1) ? "5.0GT/s," :
+                                       (gen_speed == 2) ? "8.0GT/s," :
+                                       (gen_speed == 3) ? "16.0GT/s," : "",
+                                       (lane_width == 1) ? "x1" :
+                                       (lane_width == 2) ? "x2" :
+                                       (lane_width == 3) ? "x4" :
+                                       (lane_width == 4) ? "x8" :
+                                       (lane_width == 5) ? "x12" :
+                                       (lane_width == 6) ? "x16" : "",
+                                       (current_gen_speed == gen_speed) &&
+                                       (current_lane_width == lane_width) ?
+                                       "*" : "");
+               }
                break;
+
        case OD_SCLK:
                if (hwmgr->od_enabled) {
                        size = sprintf(buf, "%s:\n", "OD_SCLK");
index c075302..4f6da11 100644 (file)
@@ -133,6 +133,7 @@ static void vega12_set_default_registry_data(struct pp_hwmgr *hwmgr)
        data->registry_data.auto_wattman_debug = 0;
        data->registry_data.auto_wattman_sample_period = 100;
        data->registry_data.auto_wattman_threshold = 50;
+       data->registry_data.pcie_dpm_key_disabled = !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
 }
 
 static int vega12_set_features_platform_caps(struct pp_hwmgr *hwmgr)
@@ -481,6 +482,90 @@ static void vega12_init_dpm_state(struct vega12_dpm_state *dpm_state)
        dpm_state->hard_max_level = 0xffff;
 }
 
+static int vega12_override_pcie_parameters(struct pp_hwmgr *hwmgr)
+{
+       struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
+       struct vega12_hwmgr *data =
+                       (struct vega12_hwmgr *)(hwmgr->backend);
+       uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg, pcie_gen_arg, pcie_width_arg;
+       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+       int i;
+       int ret;
+
+       if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
+               pcie_gen = 3;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN3)
+               pcie_gen = 2;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN2)
+               pcie_gen = 1;
+       else if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN1)
+               pcie_gen = 0;
+
+       if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X16)
+               pcie_width = 6;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X12)
+               pcie_width = 5;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X8)
+               pcie_width = 4;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X4)
+               pcie_width = 3;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X2)
+               pcie_width = 2;
+       else if (adev->pm.pcie_mlw_mask & CAIL_PCIE_LINK_WIDTH_SUPPORT_X1)
+               pcie_width = 1;
+
+       /* Bit 31:16: LCLK DPM level. 0 is DPM0, and 1 is DPM1
+        * Bit 15:8:  PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4
+        * Bit 7:0:   PCIE lane width, 1 to 7 corresponds is x1 to x32
+        */
+       for (i = 0; i < NUM_LINK_LEVELS; i++) {
+               pcie_gen_arg = (pp_table->PcieGenSpeed[i] > pcie_gen) ? pcie_gen :
+                       pp_table->PcieGenSpeed[i];
+               pcie_width_arg = (pp_table->PcieLaneCount[i] > pcie_width) ? pcie_width :
+                       pp_table->PcieLaneCount[i];
+
+               if (pcie_gen_arg != pp_table->PcieGenSpeed[i] || pcie_width_arg !=
+                   pp_table->PcieLaneCount[i]) {
+                       smu_pcie_arg = (i << 16) | (pcie_gen_arg << 8) | pcie_width_arg;
+                       ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+                               PPSMC_MSG_OverridePcieParameters, smu_pcie_arg,
+                               NULL);
+                       PP_ASSERT_WITH_CODE(!ret,
+                               "[OverridePcieParameters] Attempt to override pcie params failed!",
+                               return ret);
+               }
+
+               /* update the pptable */
+               pp_table->PcieGenSpeed[i] = pcie_gen_arg;
+               pp_table->PcieLaneCount[i] = pcie_width_arg;
+       }
+
+       /* override to the highest if it's disabled from ppfeaturmask */
+       if (data->registry_data.pcie_dpm_key_disabled) {
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       smu_pcie_arg = (i << 16) | (pcie_gen << 8) | pcie_width;
+                       ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+                               PPSMC_MSG_OverridePcieParameters, smu_pcie_arg,
+                               NULL);
+                       PP_ASSERT_WITH_CODE(!ret,
+                               "[OverridePcieParameters] Attempt to override pcie params failed!",
+                               return ret);
+
+                       pp_table->PcieGenSpeed[i] = pcie_gen;
+                       pp_table->PcieLaneCount[i] = pcie_width;
+               }
+               ret = vega12_enable_smc_features(hwmgr,
+                               false,
+                               data->smu_features[GNLD_DPM_LINK].smu_feature_bitmap);
+               PP_ASSERT_WITH_CODE(!ret,
+                               "Attempt to Disable DPM LINK Failed!",
+                               return ret);
+               data->smu_features[GNLD_DPM_LINK].enabled = false;
+               data->smu_features[GNLD_DPM_LINK].supported = false;
+       }
+       return 0;
+}
+
 static int vega12_get_number_of_dpm_level(struct pp_hwmgr *hwmgr,
                PPCLK_e clk_id, uint32_t *num_of_levels)
 {
@@ -968,6 +1053,11 @@ static int vega12_enable_dpm_tasks(struct pp_hwmgr *hwmgr)
                        "Failed to enable all smu features!",
                        return result);
 
+       result = vega12_override_pcie_parameters(hwmgr);
+       PP_ASSERT_WITH_CODE(!result,
+                       "[EnableDPMTasks] Failed to override pcie parameters!",
+                       return result);
+
        tmp_result = vega12_power_control_set_level(hwmgr);
        PP_ASSERT_WITH_CODE(!tmp_result,
                        "Failed to power control set level!",
index 87811b0..b6ee3a2 100644 (file)
@@ -171,6 +171,7 @@ static void vega20_set_default_registry_data(struct pp_hwmgr *hwmgr)
        data->registry_data.gfxoff_controlled_by_driver = 1;
        data->gfxoff_allowed = false;
        data->counter_gfxoff = 0;
+       data->registry_data.pcie_dpm_key_disabled = !(hwmgr->feature_mask & PP_PCIE_DPM_MASK);
 }
 
 static int vega20_set_features_platform_caps(struct pp_hwmgr *hwmgr)
@@ -831,7 +832,9 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
        struct amdgpu_device *adev = (struct amdgpu_device *)(hwmgr->adev);
        struct vega20_hwmgr *data =
                        (struct vega20_hwmgr *)(hwmgr->backend);
-       uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg;
+       uint32_t pcie_gen = 0, pcie_width = 0, smu_pcie_arg, pcie_gen_arg, pcie_width_arg;
+       PPTable_t *pp_table = &(data->smc_state_table.pp_table);
+       int i;
        int ret;
 
        if (adev->pm.pcie_gen_mask & CAIL_PCIE_LINK_SPEED_SUPPORT_GEN4)
@@ -860,17 +863,51 @@ static int vega20_override_pcie_parameters(struct pp_hwmgr *hwmgr)
         * Bit 15:8:  PCIE GEN, 0 to 3 corresponds to GEN1 to GEN4
         * Bit 7:0:   PCIE lane width, 1 to 7 corresponds is x1 to x32
         */
-       smu_pcie_arg = (1 << 16) | (pcie_gen << 8) | pcie_width;
-       ret = smum_send_msg_to_smc_with_parameter(hwmgr,
-                       PPSMC_MSG_OverridePcieParameters, smu_pcie_arg,
-                       NULL);
-       PP_ASSERT_WITH_CODE(!ret,
-               "[OverridePcieParameters] Attempt to override pcie params failed!",
-               return ret);
+       for (i = 0; i < NUM_LINK_LEVELS; i++) {
+               pcie_gen_arg = (pp_table->PcieGenSpeed[i] > pcie_gen) ? pcie_gen :
+                       pp_table->PcieGenSpeed[i];
+               pcie_width_arg = (pp_table->PcieLaneCount[i] > pcie_width) ? pcie_width :
+                       pp_table->PcieLaneCount[i];
+
+               if (pcie_gen_arg != pp_table->PcieGenSpeed[i] || pcie_width_arg !=
+                   pp_table->PcieLaneCount[i]) {
+                       smu_pcie_arg = (i << 16) | (pcie_gen_arg << 8) | pcie_width_arg;
+                       ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+                               PPSMC_MSG_OverridePcieParameters, smu_pcie_arg,
+                               NULL);
+                       PP_ASSERT_WITH_CODE(!ret,
+                               "[OverridePcieParameters] Attempt to override pcie params failed!",
+                               return ret);
+               }
+
+               /* update the pptable */
+               pp_table->PcieGenSpeed[i] = pcie_gen_arg;
+               pp_table->PcieLaneCount[i] = pcie_width_arg;
+       }
+
+       /* override to the highest if it's disabled from ppfeaturmask */
+       if (data->registry_data.pcie_dpm_key_disabled) {
+               for (i = 0; i < NUM_LINK_LEVELS; i++) {
+                       smu_pcie_arg = (i << 16) | (pcie_gen << 8) | pcie_width;
+                       ret = smum_send_msg_to_smc_with_parameter(hwmgr,
+                               PPSMC_MSG_OverridePcieParameters, smu_pcie_arg,
+                               NULL);
+                       PP_ASSERT_WITH_CODE(!ret,
+                               "[OverridePcieParameters] Attempt to override pcie params failed!",
+                               return ret);
 
-       data->pcie_parameters_override = true;
-       data->pcie_gen_level1 = pcie_gen;
-       data->pcie_width_level1 = pcie_width;
+                       pp_table->PcieGenSpeed[i] = pcie_gen;
+                       pp_table->PcieLaneCount[i] = pcie_width;
+               }
+               ret = vega20_enable_smc_features(hwmgr,
+                               false,
+                               data->smu_features[GNLD_DPM_LINK].smu_feature_bitmap);
+               PP_ASSERT_WITH_CODE(!ret,
+                               "Attempt to Disable DPM LINK Failed!",
+                               return ret);
+               data->smu_features[GNLD_DPM_LINK].enabled = false;
+               data->smu_features[GNLD_DPM_LINK].supported = false;
+       }
 
        return 0;
 }
@@ -3319,9 +3356,7 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                        data->od8_settings.od8_settings_array;
        OverDriveTable_t *od_table =
                        &(data->smc_state_table.overdrive_table);
-       struct phm_ppt_v3_information *pptable_information =
-               (struct phm_ppt_v3_information *)hwmgr->pptable;
-       PPTable_t *pptable = (PPTable_t *)pptable_information->smc_pptable;
+       PPTable_t *pptable = &(data->smc_state_table.pp_table);
        struct pp_clock_levels_with_latency clocks;
        struct vega20_single_dpm_table *fclk_dpm_table =
                        &(data->dpm_table.fclk_table);
@@ -3420,13 +3455,9 @@ static int vega20_print_clock_levels(struct pp_hwmgr *hwmgr,
                current_lane_width =
                        vega20_get_current_pcie_link_width_level(hwmgr);
                for (i = 0; i < NUM_LINK_LEVELS; i++) {
-                       if (i == 1 && data->pcie_parameters_override) {
-                               gen_speed = data->pcie_gen_level1;
-                               lane_width = data->pcie_width_level1;
-                       } else {
-                               gen_speed = pptable->PcieGenSpeed[i];
-                               lane_width = pptable->PcieLaneCount[i];
-                       }
+                       gen_speed = pptable->PcieGenSpeed[i];
+                       lane_width = pptable->PcieLaneCount[i];
+
                        size += sprintf(buf + size, "%d: %s %s %dMhz %s\n", i,
                                        (gen_speed == 0) ? "2.5GT/s," :
                                        (gen_speed == 1) ? "5.0GT/s," :
index d143ef1..cd905e4 100644 (file)
@@ -1294,7 +1294,7 @@ static int smu_disable_dpms(struct smu_context *smu)
        bool use_baco = !smu->is_apu &&
                ((amdgpu_in_reset(adev) &&
                  (amdgpu_asic_reset_method(adev) == AMD_RESET_METHOD_BACO)) ||
-                ((adev->in_runpm || adev->in_hibernate) && amdgpu_asic_supports_baco(adev)));
+                ((adev->in_runpm || adev->in_s4) && amdgpu_asic_supports_baco(adev)));
 
        /*
         * For custom pptable uploading, skip the DPM features
@@ -1431,7 +1431,8 @@ static int smu_suspend(void *handle)
 
        smu->watermarks_bitmap &= ~(WATERMARKS_LOADED);
 
-       if (smu->is_apu)
+       /* skip CGPG when in S0ix */
+       if (smu->is_apu && !adev->in_s0ix)
                smu_set_gfx_cgpg(&adev->smu, false);
 
        return 0;
index 45564a7..9f0d03a 100644 (file)
@@ -1322,7 +1322,7 @@ static int arcturus_set_power_profile_mode(struct smu_context *smu,
                                                       CMN2ASIC_MAPPING_WORKLOAD,
                                                       profile_mode);
        if (workload_type < 0) {
-               dev_err(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
+               dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on arcturus\n", profile_mode);
                return -EINVAL;
        }
 
index 9058546..a621185 100644 (file)
@@ -78,6 +78,9 @@ MODULE_FIRMWARE("amdgpu/dimgrey_cavefish_smc.bin");
 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE_MASK 0xC000
 #define PCIE_LC_SPEED_CNTL__LC_CURRENT_DATA_RATE__SHIFT 0xE
 
+#define mmTHM_BACO_CNTL_ARCT                   0xA7
+#define mmTHM_BACO_CNTL_ARCT_BASE_IDX          0
+
 static int link_width[] = {0, 1, 2, 4, 8, 12, 16};
 static int link_speed[] = {25, 50, 80, 160};
 
@@ -1532,9 +1535,15 @@ int smu_v11_0_baco_set_state(struct smu_context *smu, enum smu_baco_state state)
                        break;
                default:
                        if (!ras || !ras->supported) {
-                               data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
-                               data |= 0x80000000;
-                               WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+                               if (adev->asic_type == CHIP_ARCTURUS) {
+                                       data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT);
+                                       data |= 0x80000000;
+                                       WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL_ARCT, data);
+                               } else {
+                                       data = RREG32_SOC15(THM, 0, mmTHM_BACO_CNTL);
+                                       data |= 0x80000000;
+                                       WREG32_SOC15(THM, 0, mmTHM_BACO_CNTL, data);
+                               }
 
                                ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_EnterBaco, 0, NULL);
                        } else {
index 093b011..101eaa2 100644 (file)
@@ -384,10 +384,15 @@ static int vangogh_dpm_set_jpeg_enable(struct smu_context *smu, bool enable)
 
 static bool vangogh_is_dpm_running(struct smu_context *smu)
 {
+       struct amdgpu_device *adev = smu->adev;
        int ret = 0;
        uint32_t feature_mask[2];
        uint64_t feature_enabled;
 
+       /* we need to re-init after suspend so return false */
+       if (adev->in_suspend)
+               return false;
+
        ret = smu_cmn_get_enabled_32_bits_mask(smu, feature_mask, 2);
 
        if (ret)
@@ -810,7 +815,7 @@ static int vangogh_set_power_profile_mode(struct smu_context *smu, long *input,
                                                       CMN2ASIC_MAPPING_WORKLOAD,
                                                       profile_mode);
        if (workload_type < 0) {
-               dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
+               dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on VANGOGH\n",
                                        profile_mode);
                return -EINVAL;
        }
@@ -1685,9 +1690,9 @@ static int vangogh_system_features_control(struct smu_context *smu, bool en)
        uint32_t feature_mask[2];
        int ret = 0;
 
-       if (adev->pm.fw_version >= 0x43f1700)
+       if (adev->pm.fw_version >= 0x43f1700 && !en)
                ret = smu_cmn_send_smc_msg_with_param(smu, SMU_MSG_RlcPowerNotify,
-                                                     en ? RLC_STATUS_NORMAL : RLC_STATUS_OFF, NULL);
+                                                     RLC_STATUS_OFF, NULL);
 
        bitmap_zero(feature->enabled, feature->feature_num);
        bitmap_zero(feature->supported, feature->feature_num);
index 5faa509..5493388 100644 (file)
@@ -844,7 +844,7 @@ static int renoir_set_power_profile_mode(struct smu_context *smu, long *input, u
                 * TODO: If some case need switch to powersave/default power mode
                 * then can consider enter WORKLOAD_COMPUTE/WORKLOAD_CUSTOM for power saving.
                 */
-               dev_err_once(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
+               dev_dbg(smu->adev->dev, "Unsupported power profile mode %d on RENOIR\n", profile_mode);
                return -EINVAL;
        }
 
index bb620fd..bcedd4d 100644 (file)
@@ -762,7 +762,7 @@ void smu_cmn_init_soft_gpu_metrics(void *table, uint8_t frev, uint8_t crev)
                structure_size = sizeof(struct gpu_metrics_v2_0);
                break;
        default:
-               break;
+               return;
        }
 
 #undef METRICS_VERSION
index 25ce42e..61e09f8 100644 (file)
 #include <drm/drm_encoder.h>
 #include <drm/drm_modes.h>
 
-/* drm_fb_helper.c */
-#ifdef CONFIG_DRM_FBDEV_EMULATION
-int drm_fb_helper_modinit(void);
-#else
-static inline int drm_fb_helper_modinit(void)
-{
-       return 0;
-}
-#endif
-
 /* drm_dp_aux_dev.c */
 #ifdef CONFIG_DRM_DP_AUX_CHARDEV
 int drm_dp_aux_dev_init(void);
index a44c3a4..f6baa20 100644 (file)
@@ -2048,7 +2048,7 @@ static void drm_fbdev_cleanup(struct drm_fb_helper *fb_helper)
 
        if (shadow)
                vfree(shadow);
-       else
+       else if (fb_helper->buffer)
                drm_client_buffer_vunmap(fb_helper->buffer);
 
        drm_client_framebuffer_delete(fb_helper->buffer);
@@ -2514,24 +2514,3 @@ void drm_fbdev_generic_setup(struct drm_device *dev,
        drm_client_register(&fb_helper->client);
 }
 EXPORT_SYMBOL(drm_fbdev_generic_setup);
-
-/* The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT)
- * but the module doesn't depend on any fb console symbols.  At least
- * attempt to load fbcon to avoid leaving the system without a usable console.
- */
-int __init drm_fb_helper_modinit(void)
-{
-#if defined(CONFIG_FRAMEBUFFER_CONSOLE_MODULE) && !defined(CONFIG_EXPERT)
-       const char name[] = "fbcon";
-       struct module *fbcon;
-
-       mutex_lock(&module_mutex);
-       fbcon = find_module(name);
-       mutex_unlock(&module_mutex);
-
-       if (!fbcon)
-               request_module_nowait(name);
-#endif
-       return 0;
-}
-EXPORT_SYMBOL(drm_fb_helper_modinit);
index 6b116bf..7efbccf 100644 (file)
@@ -775,20 +775,19 @@ void drm_event_cancel_free(struct drm_device *dev,
 EXPORT_SYMBOL(drm_event_cancel_free);
 
 /**
- * drm_send_event_locked - send DRM event to file descriptor
+ * drm_send_event_helper - send DRM event to file descriptor
  * @dev: DRM device
  * @e: DRM event to deliver
+ * @timestamp: timestamp to set for the fence event in kernel's CLOCK_MONOTONIC
+ * time domain
  *
- * This function sends the event @e, initialized with drm_event_reserve_init(),
- * to its associated userspace DRM file. Callers must already hold
- * &drm_device.event_lock, see drm_send_event() for the unlocked version.
- *
- * Note that the core will take care of unlinking and disarming events when the
- * corresponding DRM file is closed. Drivers need not worry about whether the
- * DRM file for this event still exists and can call this function upon
- * completion of the asynchronous work unconditionally.
+ * This helper function sends the event @e, initialized with
+ * drm_event_reserve_init(), to its associated userspace DRM file.
+ * The timestamp variant of dma_fence_signal is used when the caller
+ * sends a valid timestamp.
  */
-void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
+void drm_send_event_helper(struct drm_device *dev,
+                          struct drm_pending_event *e, ktime_t timestamp)
 {
        assert_spin_locked(&dev->event_lock);
 
@@ -799,7 +798,10 @@ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
        }
 
        if (e->fence) {
-               dma_fence_signal(e->fence);
+               if (timestamp)
+                       dma_fence_signal_timestamp(e->fence, timestamp);
+               else
+                       dma_fence_signal(e->fence);
                dma_fence_put(e->fence);
        }
 
@@ -814,6 +816,48 @@ void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
        wake_up_interruptible_poll(&e->file_priv->event_wait,
                EPOLLIN | EPOLLRDNORM);
 }
+
+/**
+ * drm_send_event_timestamp_locked - send DRM event to file descriptor
+ * @dev: DRM device
+ * @e: DRM event to deliver
+ * @timestamp: timestamp to set for the fence event in kernel's CLOCK_MONOTONIC
+ * time domain
+ *
+ * This function sends the event @e, initialized with drm_event_reserve_init(),
+ * to its associated userspace DRM file. Callers must already hold
+ * &drm_device.event_lock.
+ *
+ * Note that the core will take care of unlinking and disarming events when the
+ * corresponding DRM file is closed. Drivers need not worry about whether the
+ * DRM file for this event still exists and can call this function upon
+ * completion of the asynchronous work unconditionally.
+ */
+void drm_send_event_timestamp_locked(struct drm_device *dev,
+                                    struct drm_pending_event *e, ktime_t timestamp)
+{
+       drm_send_event_helper(dev, e, timestamp);
+}
+EXPORT_SYMBOL(drm_send_event_timestamp_locked);
+
+/**
+ * drm_send_event_locked - send DRM event to file descriptor
+ * @dev: DRM device
+ * @e: DRM event to deliver
+ *
+ * This function sends the event @e, initialized with drm_event_reserve_init(),
+ * to its associated userspace DRM file. Callers must already hold
+ * &drm_device.event_lock, see drm_send_event() for the unlocked version.
+ *
+ * Note that the core will take care of unlinking and disarming events when the
+ * corresponding DRM file is closed. Drivers need not worry about whether the
+ * DRM file for this event still exists and can call this function upon
+ * completion of the asynchronous work unconditionally.
+ */
+void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e)
+{
+       drm_send_event_helper(dev, e, 0);
+}
 EXPORT_SYMBOL(drm_send_event_locked);
 
 /**
@@ -836,7 +880,7 @@ void drm_send_event(struct drm_device *dev, struct drm_pending_event *e)
        unsigned long irqflags;
 
        spin_lock_irqsave(&dev->event_lock, irqflags);
-       drm_send_event_locked(dev, e);
+       drm_send_event_helper(dev, e, 0);
        spin_unlock_irqrestore(&dev->event_lock, irqflags);
 }
 EXPORT_SYMBOL(drm_send_event);
index 9825c37..6d625ce 100644 (file)
@@ -357,13 +357,14 @@ static void drm_gem_shmem_vunmap_locked(struct drm_gem_shmem_object *shmem,
        if (--shmem->vmap_use_count > 0)
                return;
 
-       if (obj->import_attach)
+       if (obj->import_attach) {
                dma_buf_vunmap(obj->import_attach->dmabuf, map);
-       else
+       } else {
                vunmap(shmem->vaddr);
+               drm_gem_shmem_put_pages(shmem);
+       }
 
        shmem->vaddr = NULL;
-       drm_gem_shmem_put_pages(shmem);
 }
 
 /*
@@ -525,14 +526,28 @@ static vm_fault_t drm_gem_shmem_fault(struct vm_fault *vmf)
        struct drm_gem_object *obj = vma->vm_private_data;
        struct drm_gem_shmem_object *shmem = to_drm_gem_shmem_obj(obj);
        loff_t num_pages = obj->size >> PAGE_SHIFT;
+       vm_fault_t ret;
        struct page *page;
+       pgoff_t page_offset;
 
-       if (vmf->pgoff >= num_pages || WARN_ON_ONCE(!shmem->pages))
-               return VM_FAULT_SIGBUS;
+       /* We don't use vmf->pgoff since that has the fake offset */
+       page_offset = (vmf->address - vma->vm_start) >> PAGE_SHIFT;
 
-       page = shmem->pages[vmf->pgoff];
+       mutex_lock(&shmem->pages_lock);
 
-       return vmf_insert_page(vma, vmf->address, page);
+       if (page_offset >= num_pages ||
+           WARN_ON_ONCE(!shmem->pages) ||
+           shmem->madv < 0) {
+               ret = VM_FAULT_SIGBUS;
+       } else {
+               page = shmem->pages[page_offset];
+
+               ret = vmf_insert_page(vma, vmf->address, page);
+       }
+
+       mutex_unlock(&shmem->pages_lock);
+
+       return ret;
 }
 
 static void drm_gem_shmem_vm_open(struct vm_area_struct *vma)
@@ -581,9 +596,6 @@ int drm_gem_shmem_mmap(struct drm_gem_object *obj, struct vm_area_struct *vma)
        struct drm_gem_shmem_object *shmem;
        int ret;
 
-       /* Remove the fake offset */
-       vma->vm_pgoff -= drm_vma_node_start(&obj->vma_node);
-
        if (obj->import_attach) {
                /* Drop the reference drm_gem_mmap_obj() acquired.*/
                drm_gem_object_put(obj);
index f86448a..dc734d4 100644 (file)
@@ -99,6 +99,8 @@ static int compat_drm_version(struct file *file, unsigned int cmd,
        if (copy_from_user(&v32, (void __user *)arg, sizeof(v32)))
                return -EFAULT;
 
+       memset(&v, 0, sizeof(v));
+
        v = (struct drm_version) {
                .name_len = v32.name_len,
                .name = compat_ptr(v32.name),
@@ -137,6 +139,9 @@ static int compat_drm_getunique(struct file *file, unsigned int cmd,
 
        if (copy_from_user(&uq32, (void __user *)arg, sizeof(uq32)))
                return -EFAULT;
+
+       memset(&uq, 0, sizeof(uq));
+
        uq = (struct drm_unique){
                .unique_len = uq32.unique_len,
                .unique = compat_ptr(uq32.unique),
@@ -265,6 +270,8 @@ static int compat_drm_getclient(struct file *file, unsigned int cmd,
        if (copy_from_user(&c32, argp, sizeof(c32)))
                return -EFAULT;
 
+       memset(&client, 0, sizeof(client));
+
        client.idx = c32.idx;
 
        err = drm_ioctl_kernel(file, drm_getclient, &client, 0);
@@ -852,6 +859,8 @@ static int compat_drm_wait_vblank(struct file *file, unsigned int cmd,
        if (copy_from_user(&req32, argp, sizeof(req32)))
                return -EFAULT;
 
+       memset(&req, 0, sizeof(req));
+
        req.request.type = req32.request.type;
        req.request.sequence = req32.request.sequence;
        req.request.signal = req32.request.signal;
@@ -889,6 +898,8 @@ static int compat_drm_mode_addfb2(struct file *file, unsigned int cmd,
        struct drm_mode_fb_cmd2 req64;
        int err;
 
+       memset(&req64, 0, sizeof(req64));
+
        if (copy_from_user(&req64, argp,
                           offsetof(drm_mode_fb_cmd232_t, modifier)))
                return -EFAULT;
index 221a852..f933da1 100644 (file)
@@ -64,19 +64,18 @@ MODULE_PARM_DESC(edid_firmware,
 
 static int __init drm_kms_helper_init(void)
 {
-       int ret;
-
-       /* Call init functions from specific kms helpers here */
-       ret = drm_fb_helper_modinit();
-       if (ret < 0)
-               goto out;
-
-       ret = drm_dp_aux_dev_init();
-       if (ret < 0)
-               goto out;
-
-out:
-       return ret;
+       /*
+        * The Kconfig DRM_KMS_HELPER selects FRAMEBUFFER_CONSOLE (if !EXPERT)
+        * but the module doesn't depend on any fb console symbols.  At least
+        * attempt to load fbcon to avoid leaving the system without a usable
+        * console.
+        */
+       if (IS_ENABLED(CONFIG_DRM_FBDEV_EMULATION) &&
+           IS_MODULE(CONFIG_FRAMEBUFFER_CONSOLE) &&
+           !IS_ENABLED(CONFIG_EXPERT))
+               request_module_nowait("fbcon");
+
+       return drm_dp_aux_dev_init();
 }
 
 static void __exit drm_kms_helper_exit(void)
index 30912d8..893165e 100644 (file)
@@ -1006,7 +1006,14 @@ static void send_vblank_event(struct drm_device *dev,
                break;
        }
        trace_drm_vblank_event_delivered(e->base.file_priv, e->pipe, seq);
-       drm_send_event_locked(dev, &e->base);
+       /*
+        * Use the same timestamp for any associated fence signal to avoid
+        * mismatch in timestamps for vsync & fence events triggered by the
+        * same HW event. Frameworks like SurfaceFlinger in Android expects the
+        * retire-fence timestamp to match exactly with HW vsync as it uses it
+        * for its software vsync modeling.
+        */
+       drm_send_event_timestamp_locked(dev, &e->base, now);
 }
 
 /**
index 6d38c5c..db69f19 100644 (file)
@@ -689,7 +689,8 @@ static int etnaviv_gem_userptr_get_pages(struct etnaviv_gem_object *etnaviv_obj)
                struct page **pages = pvec + pinned;
 
                ret = pin_user_pages_fast(ptr, num_pages,
-                                         !userptr->ro ? FOLL_WRITE : 0, pages);
+                                         FOLL_WRITE | FOLL_FORCE | FOLL_LONGTERM,
+                                         pages);
                if (ret < 0) {
                        unpin_user_pages(pvec, pinned);
                        kvfree(pvec);
index 1f79bc2..1510e4e 100644 (file)
@@ -13,7 +13,6 @@
 #include <linux/irq.h>
 #include <linux/mfd/syscon.h>
 #include <linux/of_device.h>
-#include <linux/of_gpio.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/regmap.h>
index 32bd1fd..2385a75 100644 (file)
@@ -21,7 +21,6 @@ subdir-ccflags-y += $(call cc-disable-warning, unused-but-set-variable)
 subdir-ccflags-y += $(call cc-disable-warning, sign-compare)
 subdir-ccflags-y += $(call cc-disable-warning, sometimes-uninitialized)
 subdir-ccflags-y += $(call cc-disable-warning, initializer-overrides)
-subdir-ccflags-y += $(call cc-disable-warning, uninitialized)
 subdir-ccflags-y += $(call cc-disable-warning, frame-address)
 subdir-ccflags-$(CONFIG_DRM_I915_WERROR) += -Werror
 
@@ -298,7 +297,7 @@ obj-$(CONFIG_DRM_I915_GVT_KVMGT) += gvt/kvmgt.o
 no-header-test := \
        display/intel_vbt_defs.h
 
-extra-$(CONFIG_DRM_I915_WERROR) += \
+always-$(CONFIG_DRM_I915_WERROR) += \
        $(patsubst %.h,%.hdrtest, $(filter-out $(no-header-test), \
                $(shell cd $(srctree)/$(src) && find * -name '*.h')))
 
index e21fb14..833d0c1 100644 (file)
@@ -84,13 +84,31 @@ static void intel_dsm_platform_mux_info(acpi_handle dhandle)
                return;
        }
 
+       if (!pkg->package.count) {
+               DRM_DEBUG_DRIVER("no connection in _DSM\n");
+               return;
+       }
+
        connector_count = &pkg->package.elements[0];
        DRM_DEBUG_DRIVER("MUX info connectors: %lld\n",
                  (unsigned long long)connector_count->integer.value);
        for (i = 1; i < pkg->package.count; i++) {
                union acpi_object *obj = &pkg->package.elements[i];
-               union acpi_object *connector_id = &obj->package.elements[0];
-               union acpi_object *info = &obj->package.elements[1];
+               union acpi_object *connector_id;
+               union acpi_object *info;
+
+               if (obj->type != ACPI_TYPE_PACKAGE || obj->package.count < 2) {
+                       DRM_DEBUG_DRIVER("Invalid object for MUX #%d\n", i);
+                       continue;
+               }
+
+               connector_id = &obj->package.elements[0];
+               info = &obj->package.elements[1];
+               if (info->type != ACPI_TYPE_BUFFER || info->buffer.length < 4) {
+                       DRM_DEBUG_DRIVER("Invalid info for MUX obj #%d\n", i);
+                       continue;
+               }
+
                DRM_DEBUG_DRIVER("Connector id: 0x%016llx\n",
                          (unsigned long long)connector_id->integer.value);
                DRM_DEBUG_DRIVER("  port id: %s\n",
index 4683f98..c3f2962 100644 (file)
@@ -317,12 +317,13 @@ int intel_plane_atomic_check_with_state(const struct intel_crtc_state *old_crtc_
        if (!new_plane_state->hw.crtc && !old_plane_state->hw.crtc)
                return 0;
 
-       new_crtc_state->enabled_planes |= BIT(plane->id);
-
        ret = plane->check_plane(new_crtc_state, new_plane_state);
        if (ret)
                return ret;
 
+       if (fb)
+               new_crtc_state->enabled_planes |= BIT(plane->id);
+
        /* FIXME pre-g4x don't work like this */
        if (new_plane_state->uapi.visible)
                new_crtc_state->active_planes |= BIT(plane->id);
index 57b0a3e..8e77ca7 100644 (file)
@@ -109,7 +109,6 @@ void intel_crtc_state_reset(struct intel_crtc_state *crtc_state,
        crtc_state->cpu_transcoder = INVALID_TRANSCODER;
        crtc_state->master_transcoder = INVALID_TRANSCODER;
        crtc_state->hsw_workaround_pipe = INVALID_PIPE;
-       crtc_state->output_format = INTEL_OUTPUT_FORMAT_INVALID;
        crtc_state->scaler_state.scaler_id = -1;
        crtc_state->mst_master_transcoder = INVALID_TRANSCODER;
 }
index 7ea1e5b..8d7aaa6 100644 (file)
@@ -10211,7 +10211,6 @@ static void snprintf_output_types(char *buf, size_t len,
 }
 
 static const char * const output_format_str[] = {
-       [INTEL_OUTPUT_FORMAT_INVALID] = "Invalid",
        [INTEL_OUTPUT_FORMAT_RGB] = "RGB",
        [INTEL_OUTPUT_FORMAT_YCBCR420] = "YCBCR4:2:0",
        [INTEL_OUTPUT_FORMAT_YCBCR444] = "YCBCR4:4:4",
@@ -10220,7 +10219,7 @@ static const char * const output_format_str[] = {
 static const char *output_formats(enum intel_output_format format)
 {
        if (format >= ARRAY_SIZE(output_format_str))
-               format = INTEL_OUTPUT_FORMAT_INVALID;
+               return "invalid";
        return output_format_str[format];
 }
 
index 3939774..184ecbb 100644 (file)
@@ -830,7 +830,6 @@ struct intel_crtc_wm_state {
 };
 
 enum intel_output_format {
-       INTEL_OUTPUT_FORMAT_INVALID,
        INTEL_OUTPUT_FORMAT_RGB,
        INTEL_OUTPUT_FORMAT_YCBCR420,
        INTEL_OUTPUT_FORMAT_YCBCR444,
index 8c12d53..775d89b 100644 (file)
@@ -3619,9 +3619,7 @@ intel_dp_get_dpcd(struct intel_dp *intel_dp)
 {
        int ret;
 
-       intel_dp_lttpr_init(intel_dp);
-
-       if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd))
+       if (intel_dp_init_lttpr_and_dprx_caps(intel_dp) < 0)
                return false;
 
        /*
index eaebf12..10fe17b 100644 (file)
@@ -133,6 +133,7 @@ static u32 g4x_get_aux_send_ctl(struct intel_dp *intel_dp,
        else
                precharge = 5;
 
+       /* Max timeout value on G4x-BDW: 1.6ms */
        if (IS_BROADWELL(dev_priv))
                timeout = DP_AUX_CH_CTL_TIME_OUT_600us;
        else
@@ -159,6 +160,12 @@ static u32 skl_get_aux_send_ctl(struct intel_dp *intel_dp,
        enum phy phy = intel_port_to_phy(i915, dig_port->base.port);
        u32 ret;
 
+       /*
+        * Max timeout values:
+        * SKL-GLK: 1.6ms
+        * CNL: 3.2ms
+        * ICL+: 4ms
+        */
        ret = DP_AUX_CH_CTL_SEND_BUSY |
              DP_AUX_CH_CTL_DONE |
              DP_AUX_CH_CTL_INTERRUPT |
index 892d7db..be6ac0d 100644 (file)
@@ -34,6 +34,11 @@ intel_dp_dump_link_status(const u8 link_status[DP_LINK_STATUS_SIZE])
                      link_status[3], link_status[4], link_status[5]);
 }
 
+static void intel_dp_reset_lttpr_common_caps(struct intel_dp *intel_dp)
+{
+       memset(&intel_dp->lttpr_common_caps, 0, sizeof(intel_dp->lttpr_common_caps));
+}
+
 static void intel_dp_reset_lttpr_count(struct intel_dp *intel_dp)
 {
        intel_dp->lttpr_common_caps[DP_PHY_REPEATER_CNT -
@@ -81,19 +86,36 @@ static void intel_dp_read_lttpr_phy_caps(struct intel_dp *intel_dp,
 
 static bool intel_dp_read_lttpr_common_caps(struct intel_dp *intel_dp)
 {
-       if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
-                                         intel_dp->lttpr_common_caps) < 0) {
-               memset(intel_dp->lttpr_common_caps, 0,
-                      sizeof(intel_dp->lttpr_common_caps));
+       struct drm_i915_private *i915 = dp_to_i915(intel_dp);
+
+       if (intel_dp_is_edp(intel_dp))
                return false;
-       }
+
+       /*
+        * Detecting LTTPRs must be avoided on platforms with an AUX timeout
+        * period < 3.2ms. (see DP Standard v2.0, 2.11.2, 3.6.6.1).
+        */
+       if (INTEL_GEN(i915) < 10)
+               return false;
+
+       if (drm_dp_read_lttpr_common_caps(&intel_dp->aux,
+                                         intel_dp->lttpr_common_caps) < 0)
+               goto reset_caps;
 
        drm_dbg_kms(&dp_to_i915(intel_dp)->drm,
                    "LTTPR common capabilities: %*ph\n",
                    (int)sizeof(intel_dp->lttpr_common_caps),
                    intel_dp->lttpr_common_caps);
 
+       /* The minimum value of LT_TUNABLE_PHY_REPEATER_FIELD_DATA_STRUCTURE_REV is 1.4 */
+       if (intel_dp->lttpr_common_caps[0] < 0x14)
+               goto reset_caps;
+
        return true;
+
+reset_caps:
+       intel_dp_reset_lttpr_common_caps(intel_dp);
+       return false;
 }
 
 static bool
@@ -106,33 +128,49 @@ intel_dp_set_lttpr_transparent_mode(struct intel_dp *intel_dp, bool enable)
 }
 
 /**
- * intel_dp_lttpr_init - detect LTTPRs and init the LTTPR link training mode
+ * intel_dp_init_lttpr_and_dprx_caps - detect LTTPR and DPRX caps, init the LTTPR link training mode
  * @intel_dp: Intel DP struct
  *
- * Read the LTTPR common capabilities, switch to non-transparent link training
- * mode if any is detected and read the PHY capabilities for all detected
- * LTTPRs. In case of an LTTPR detection error or if the number of
+ * Read the LTTPR common and DPRX capabilities and switch to non-transparent
+ * link training mode if any is detected and read the PHY capabilities for all
+ * detected LTTPRs. In case of an LTTPR detection error or if the number of
  * LTTPRs is more than is supported (8), fall back to the no-LTTPR,
  * transparent mode link training mode.
  *
  * Returns:
- *   >0  if LTTPRs were detected and the non-transparent LT mode was set
+ *   >0  if LTTPRs were detected and the non-transparent LT mode was set. The
+ *       DPRX capabilities are read out.
  *    0  if no LTTPRs or more than 8 LTTPRs were detected or in case of a
- *       detection failure and the transparent LT mode was set
+ *       detection failure and the transparent LT mode was set. The DPRX
+ *       capabilities are read out.
+ *   <0  Reading out the DPRX capabilities failed.
  */
-int intel_dp_lttpr_init(struct intel_dp *intel_dp)
+int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp)
 {
        int lttpr_count;
        bool ret;
        int i;
 
-       if (intel_dp_is_edp(intel_dp))
-               return 0;
-
        ret = intel_dp_read_lttpr_common_caps(intel_dp);
+
+       /* The DPTX shall read the DPRX caps after LTTPR detection. */
+       if (drm_dp_read_dpcd_caps(&intel_dp->aux, intel_dp->dpcd)) {
+               intel_dp_reset_lttpr_common_caps(intel_dp);
+               return -EIO;
+       }
+
        if (!ret)
                return 0;
 
+       /*
+        * The 0xF0000-0xF02FF range is only valid if the DPCD revision is
+        * at least 1.4.
+        */
+       if (intel_dp->dpcd[DP_DPCD_REV] < 0x14) {
+               intel_dp_reset_lttpr_common_caps(intel_dp);
+               return 0;
+       }
+
        lttpr_count = drm_dp_lttpr_count(intel_dp->lttpr_common_caps);
        /*
         * Prevent setting LTTPR transparent mode explicitly if no LTTPRs are
@@ -172,7 +210,7 @@ int intel_dp_lttpr_init(struct intel_dp *intel_dp)
 
        return lttpr_count;
 }
-EXPORT_SYMBOL(intel_dp_lttpr_init);
+EXPORT_SYMBOL(intel_dp_init_lttpr_and_dprx_caps);
 
 static u8 dp_voltage_max(u8 preemph)
 {
@@ -807,7 +845,10 @@ void intel_dp_start_link_train(struct intel_dp *intel_dp,
         * TODO: Reiniting LTTPRs here won't be needed once proper connector
         * HW state readout is added.
         */
-       int lttpr_count = intel_dp_lttpr_init(intel_dp);
+       int lttpr_count = intel_dp_init_lttpr_and_dprx_caps(intel_dp);
+
+       if (lttpr_count < 0)
+               return;
 
        if (!intel_dp_link_train_all_phys(intel_dp, crtc_state, lttpr_count))
                intel_dp_schedule_fallback_link_training(intel_dp, crtc_state);
index 6a1f76b..9cb7c28 100644 (file)
@@ -11,7 +11,7 @@
 struct intel_crtc_state;
 struct intel_dp;
 
-int intel_dp_lttpr_init(struct intel_dp *intel_dp);
+int intel_dp_init_lttpr_and_dprx_caps(struct intel_dp *intel_dp);
 
 void intel_dp_get_adjust_train(struct intel_dp *intel_dp,
                               const struct intel_crtc_state *crtc_state,
index f58cc57..a86c57d 100644 (file)
@@ -1014,20 +1014,14 @@ static i915_reg_t dss_ctl1_reg(const struct intel_crtc_state *crtc_state)
 {
        enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
 
-       if (crtc_state->cpu_transcoder == TRANSCODER_EDP)
-               return DSS_CTL1;
-
-       return ICL_PIPE_DSS_CTL1(pipe);
+       return is_pipe_dsc(crtc_state) ? ICL_PIPE_DSS_CTL1(pipe) : DSS_CTL1;
 }
 
 static i915_reg_t dss_ctl2_reg(const struct intel_crtc_state *crtc_state)
 {
        enum pipe pipe = to_intel_crtc(crtc_state->uapi.crtc)->pipe;
 
-       if (crtc_state->cpu_transcoder == TRANSCODER_EDP)
-               return DSS_CTL2;
-
-       return ICL_PIPE_DSS_CTL2(pipe);
+       return is_pipe_dsc(crtc_state) ? ICL_PIPE_DSS_CTL2(pipe) : DSS_CTL2;
 }
 
 void intel_dsc_enable(struct intel_encoder *encoder,
index fb1b1d0..9cf555d 100644 (file)
@@ -713,9 +713,12 @@ static int engine_setup_common(struct intel_engine_cs *engine)
                goto err_status;
        }
 
+       err = intel_engine_init_cmd_parser(engine);
+       if (err)
+               goto err_cmd_parser;
+
        intel_engine_init_active(engine, ENGINE_PHYSICAL);
        intel_engine_init_execlists(engine);
-       intel_engine_init_cmd_parser(engine);
        intel_engine_init__pm(engine);
        intel_engine_init_retire(engine);
 
@@ -732,6 +735,8 @@ static int engine_setup_common(struct intel_engine_cs *engine)
 
        return 0;
 
+err_cmd_parser:
+       intel_breadcrumbs_free(engine->breadcrumbs);
 err_status:
        cleanup_status_page(engine);
        return err;
index a357bb4..67de2b1 100644 (file)
@@ -316,7 +316,18 @@ void i915_vma_revoke_fence(struct i915_vma *vma)
        WRITE_ONCE(fence->vma, NULL);
        vma->fence = NULL;
 
-       with_intel_runtime_pm_if_in_use(fence_to_uncore(fence)->rpm, wakeref)
+       /*
+        * Skip the write to HW if and only if the device is currently
+        * suspended.
+        *
+        * If the driver does not currently hold a wakeref (if_in_use == 0),
+        * the device may currently be runtime suspended, or it may be woken
+        * up before the suspend takes place. If the device is not suspended
+        * (powered down) and we skip clearing the fence register, the HW is
+        * left in an undefined state where we may end up with multiple
+        * registers overlapping.
+        */
+       with_intel_runtime_pm_if_active(fence_to_uncore(fence)->rpm, wakeref)
                fence_write(fence);
 }
 
index d54ea0e..fef1e85 100644 (file)
@@ -41,6 +41,7 @@
 #include "gt/intel_lrc.h"
 #include "gt/intel_ring.h"
 #include "gt/intel_gt_requests.h"
+#include "gt/shmem_utils.h"
 #include "gvt.h"
 #include "i915_pvinfo.h"
 #include "trace.h"
@@ -3094,71 +3095,28 @@ int intel_gvt_scan_and_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
  */
 void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
 {
+       const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
        struct intel_gvt *gvt = vgpu->gvt;
-       struct drm_i915_private *dev_priv = gvt->gt->i915;
        struct intel_engine_cs *engine;
        enum intel_engine_id id;
-       const unsigned long start = LRC_STATE_PN * PAGE_SIZE;
-       struct i915_request *rq;
-       struct intel_vgpu_submission *s = &vgpu->submission;
-       struct i915_request *requests[I915_NUM_ENGINES] = {};
-       bool is_ctx_pinned[I915_NUM_ENGINES] = {};
-       int ret = 0;
 
        if (gvt->is_reg_whitelist_updated)
                return;
 
-       for_each_engine(engine, &dev_priv->gt, id) {
-               ret = intel_context_pin(s->shadow[id]);
-               if (ret) {
-                       gvt_vgpu_err("fail to pin shadow ctx\n");
-                       goto out;
-               }
-               is_ctx_pinned[id] = true;
-
-               rq = i915_request_create(s->shadow[id]);
-               if (IS_ERR(rq)) {
-                       gvt_vgpu_err("fail to alloc default request\n");
-                       ret = -EIO;
-                       goto out;
-               }
-               requests[id] = i915_request_get(rq);
-               i915_request_add(rq);
-       }
-
-       if (intel_gt_wait_for_idle(&dev_priv->gt,
-                               I915_GEM_IDLE_TIMEOUT) == -ETIME) {
-               ret = -EIO;
-               goto out;
-       }
-
        /* scan init ctx to update cmd accessible list */
-       for_each_engine(engine, &dev_priv->gt, id) {
-               int size = engine->context_size - PAGE_SIZE;
-               void *vaddr;
+       for_each_engine(engine, gvt->gt, id) {
                struct parser_exec_state s;
-               struct drm_i915_gem_object *obj;
-               struct i915_request *rq;
-
-               rq = requests[id];
-               GEM_BUG_ON(!i915_request_completed(rq));
-               GEM_BUG_ON(!intel_context_is_pinned(rq->context));
-               obj = rq->context->state->obj;
-
-               if (!obj) {
-                       ret = -EIO;
-                       goto out;
-               }
+               void *vaddr;
+               int ret;
 
-               i915_gem_object_set_cache_coherency(obj,
-                                                   I915_CACHE_LLC);
+               if (!engine->default_state)
+                       continue;
 
-               vaddr = i915_gem_object_pin_map(obj, I915_MAP_WB);
+               vaddr = shmem_pin_map(engine->default_state);
                if (IS_ERR(vaddr)) {
-                       gvt_err("failed to pin init ctx obj, ring=%d, err=%lx\n",
-                               id, PTR_ERR(vaddr));
-                       ret = PTR_ERR(vaddr);
-                       goto out;
+                       gvt_err("failed to map %s->default state, err:%zd\n",
+                               engine->name, PTR_ERR(vaddr));
+                       return;
                }
 
                s.buf_type = RING_BUFFER_CTX;
@@ -3166,9 +3124,9 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
                s.vgpu = vgpu;
                s.engine = engine;
                s.ring_start = 0;
-               s.ring_size = size;
+               s.ring_size = engine->context_size - start;
                s.ring_head = 0;
-               s.ring_tail = size;
+               s.ring_tail = s.ring_size;
                s.rb_va = vaddr + start;
                s.workload = NULL;
                s.is_ctx_wa = false;
@@ -3176,29 +3134,18 @@ void intel_gvt_update_reg_whitelist(struct intel_vgpu *vgpu)
 
                /* skipping the first RING_CTX_SIZE(0x50) dwords */
                ret = ip_gma_set(&s, RING_CTX_SIZE);
-               if (ret) {
-                       i915_gem_object_unpin_map(obj);
-                       goto out;
+               if (ret == 0) {
+                       ret = command_scan(&s, 0, s.ring_size, 0, s.ring_size);
+                       if (ret)
+                               gvt_err("Scan init ctx error\n");
                }
 
-               ret = command_scan(&s, 0, size, 0, size);
+               shmem_unpin_map(engine->default_state, vaddr);
                if (ret)
-                       gvt_err("Scan init ctx error\n");
-
-               i915_gem_object_unpin_map(obj);
+                       return;
        }
 
-out:
-       if (!ret)
-               gvt->is_reg_whitelist_updated = true;
-
-       for (id = 0; id < I915_NUM_ENGINES ; id++) {
-               if (requests[id])
-                       i915_request_put(requests[id]);
-
-               if (is_ctx_pinned[id])
-                       intel_context_unpin(s->shadow[id]);
-       }
+       gvt->is_reg_whitelist_updated = true;
 }
 
 int intel_gvt_scan_engine_context(struct intel_vgpu_workload *workload)
index 158873f..c8dcda6 100644 (file)
@@ -522,12 +522,11 @@ static void init_vgpu_execlist(struct intel_vgpu *vgpu,
 static void clean_execlist(struct intel_vgpu *vgpu,
                           intel_engine_mask_t engine_mask)
 {
-       struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
-       struct intel_engine_cs *engine;
        struct intel_vgpu_submission *s = &vgpu->submission;
+       struct intel_engine_cs *engine;
        intel_engine_mask_t tmp;
 
-       for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
+       for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) {
                kfree(s->ring_scan_buffer[engine->id]);
                s->ring_scan_buffer[engine->id] = NULL;
                s->ring_scan_buffer_size[engine->id] = 0;
@@ -537,11 +536,10 @@ static void clean_execlist(struct intel_vgpu *vgpu,
 static void reset_execlist(struct intel_vgpu *vgpu,
                           intel_engine_mask_t engine_mask)
 {
-       struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
        struct intel_engine_cs *engine;
        intel_engine_mask_t tmp;
 
-       for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp)
+       for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp)
                init_vgpu_execlist(vgpu, engine);
 }
 
index 43f31c2..fc73569 100644 (file)
@@ -412,7 +412,9 @@ static void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
        if (!wa_ctx->indirect_ctx.obj)
                return;
 
+       i915_gem_object_lock(wa_ctx->indirect_ctx.obj, NULL);
        i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
+       i915_gem_object_unlock(wa_ctx->indirect_ctx.obj);
        i915_gem_object_put(wa_ctx->indirect_ctx.obj);
 
        wa_ctx->indirect_ctx.obj = NULL;
@@ -520,6 +522,7 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
        struct intel_gvt *gvt = workload->vgpu->gvt;
        const int gmadr_bytes = gvt->device_info.gmadr_bytes_in_cmd;
        struct intel_vgpu_shadow_bb *bb;
+       struct i915_gem_ww_ctx ww;
        int ret;
 
        list_for_each_entry(bb, &workload->shadow_bb, list) {
@@ -544,10 +547,19 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
                 * directly
                 */
                if (!bb->ppgtt) {
-                       bb->vma = i915_gem_object_ggtt_pin(bb->obj,
-                                                          NULL, 0, 0, 0);
+                       i915_gem_ww_ctx_init(&ww, false);
+retry:
+                       i915_gem_object_lock(bb->obj, &ww);
+
+                       bb->vma = i915_gem_object_ggtt_pin_ww(bb->obj, &ww,
+                                                             NULL, 0, 0, 0);
                        if (IS_ERR(bb->vma)) {
                                ret = PTR_ERR(bb->vma);
+                               if (ret == -EDEADLK) {
+                                       ret = i915_gem_ww_ctx_backoff(&ww);
+                                       if (!ret)
+                                               goto retry;
+                               }
                                goto err;
                        }
 
@@ -561,13 +573,15 @@ static int prepare_shadow_batch_buffer(struct intel_vgpu_workload *workload)
                                                      0);
                        if (ret)
                                goto err;
-               }
 
-               /* No one is going to touch shadow bb from now on. */
-               i915_gem_object_flush_map(bb->obj);
+                       /* No one is going to touch shadow bb from now on. */
+                       i915_gem_object_flush_map(bb->obj);
+                       i915_gem_object_unlock(bb->obj);
+               }
        }
        return 0;
 err:
+       i915_gem_ww_ctx_fini(&ww);
        release_shadow_batch_buffer(workload);
        return ret;
 }
@@ -594,14 +608,29 @@ static int prepare_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
        unsigned char *per_ctx_va =
                (unsigned char *)wa_ctx->indirect_ctx.shadow_va +
                wa_ctx->indirect_ctx.size;
+       struct i915_gem_ww_ctx ww;
+       int ret;
 
        if (wa_ctx->indirect_ctx.size == 0)
                return 0;
 
-       vma = i915_gem_object_ggtt_pin(wa_ctx->indirect_ctx.obj, NULL,
-                                      0, CACHELINE_BYTES, 0);
-       if (IS_ERR(vma))
-               return PTR_ERR(vma);
+       i915_gem_ww_ctx_init(&ww, false);
+retry:
+       i915_gem_object_lock(wa_ctx->indirect_ctx.obj, &ww);
+
+       vma = i915_gem_object_ggtt_pin_ww(wa_ctx->indirect_ctx.obj, &ww, NULL,
+                                         0, CACHELINE_BYTES, 0);
+       if (IS_ERR(vma)) {
+               ret = PTR_ERR(vma);
+               if (ret == -EDEADLK) {
+                       ret = i915_gem_ww_ctx_backoff(&ww);
+                       if (!ret)
+                               goto retry;
+               }
+               return ret;
+       }
+
+       i915_gem_object_unlock(wa_ctx->indirect_ctx.obj);
 
        /* FIXME: we are not tracking our pinned VMA leaving it
         * up to the core to fix up the stray pin_count upon
@@ -635,12 +664,14 @@ static void release_shadow_batch_buffer(struct intel_vgpu_workload *workload)
 
        list_for_each_entry_safe(bb, pos, &workload->shadow_bb, list) {
                if (bb->obj) {
+                       i915_gem_object_lock(bb->obj, NULL);
                        if (bb->va && !IS_ERR(bb->va))
                                i915_gem_object_unpin_map(bb->obj);
 
                        if (bb->vma && !IS_ERR(bb->vma))
                                i915_vma_unpin(bb->vma);
 
+                       i915_gem_object_unlock(bb->obj);
                        i915_gem_object_put(bb->obj);
                }
                list_del(&bb->list);
@@ -1015,13 +1046,12 @@ void intel_vgpu_clean_workloads(struct intel_vgpu *vgpu,
                                intel_engine_mask_t engine_mask)
 {
        struct intel_vgpu_submission *s = &vgpu->submission;
-       struct drm_i915_private *dev_priv = vgpu->gvt->gt->i915;
        struct intel_engine_cs *engine;
        struct intel_vgpu_workload *pos, *n;
        intel_engine_mask_t tmp;
 
        /* free the unsubmited workloads in the queues. */
-       for_each_engine_masked(engine, &dev_priv->gt, engine_mask, tmp) {
+       for_each_engine_masked(engine, vgpu->gvt->gt, engine_mask, tmp) {
                list_for_each_entry_safe(pos, n,
                        &s->workload_q_head[engine->id], list) {
                        list_del_init(&pos->list);
index ced9a96..5f86f5b 100644 (file)
@@ -940,7 +940,7 @@ static void fini_hash_table(struct intel_engine_cs *engine)
  * struct intel_engine_cs based on whether the platform requires software
  * command parsing.
  */
-void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
+int intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
 {
        const struct drm_i915_cmd_table *cmd_tables;
        int cmd_table_count;
@@ -948,7 +948,7 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
 
        if (!IS_GEN(engine->i915, 7) && !(IS_GEN(engine->i915, 9) &&
                                          engine->class == COPY_ENGINE_CLASS))
-               return;
+               return 0;
 
        switch (engine->class) {
        case RENDER_CLASS:
@@ -1013,19 +1013,19 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
                break;
        default:
                MISSING_CASE(engine->class);
-               return;
+               goto out;
        }
 
        if (!validate_cmds_sorted(engine, cmd_tables, cmd_table_count)) {
                drm_err(&engine->i915->drm,
                        "%s: command descriptions are not sorted\n",
                        engine->name);
-               return;
+               goto out;
        }
        if (!validate_regs_sorted(engine)) {
                drm_err(&engine->i915->drm,
                        "%s: registers are not sorted\n", engine->name);
-               return;
+               goto out;
        }
 
        ret = init_hash_table(engine, cmd_tables, cmd_table_count);
@@ -1033,10 +1033,17 @@ void intel_engine_init_cmd_parser(struct intel_engine_cs *engine)
                drm_err(&engine->i915->drm,
                        "%s: initialised failed!\n", engine->name);
                fini_hash_table(engine);
-               return;
+               goto out;
        }
 
        engine->flags |= I915_ENGINE_USING_CMD_PARSER;
+
+out:
+       if (intel_engine_requires_cmd_parser(engine) &&
+           !intel_engine_using_cmd_parser(engine))
+               return -EINVAL;
+
+       return 0;
 }
 
 /**
index 26d69d0..cb62ddb 100644 (file)
@@ -1952,7 +1952,7 @@ const char *i915_cache_level_str(struct drm_i915_private *i915, int type);
 
 /* i915_cmd_parser.c */
 int i915_cmd_parser_get_version(struct drm_i915_private *dev_priv);
-void intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
+int intel_engine_init_cmd_parser(struct intel_engine_cs *engine);
 void intel_engine_cleanup_cmd_parser(struct intel_engine_cs *engine);
 int intel_engine_cmd_parser(struct intel_engine_cs *engine,
                            struct i915_vma *batch,
index 112ba5f..e62ad69 100644 (file)
@@ -603,7 +603,6 @@ static int append_oa_sample(struct i915_perf_stream *stream,
 {
        int report_size = stream->oa_buffer.format_size;
        struct drm_i915_perf_record_header header;
-       u32 sample_flags = stream->sample_flags;
 
        header.type = DRM_I915_PERF_RECORD_SAMPLE;
        header.pad = 0;
@@ -617,10 +616,8 @@ static int append_oa_sample(struct i915_perf_stream *stream,
                return -EFAULT;
        buf += sizeof(header);
 
-       if (sample_flags & SAMPLE_OA_REPORT) {
-               if (copy_to_user(buf, report, report_size))
-                       return -EFAULT;
-       }
+       if (copy_to_user(buf, report, report_size))
+               return -EFAULT;
 
        (*offset) += header.size;
 
@@ -2682,7 +2679,7 @@ static void i915_oa_stream_enable(struct i915_perf_stream *stream)
 
        stream->perf->ops.oa_enable(stream);
 
-       if (stream->periodic)
+       if (stream->sample_flags & SAMPLE_OA_REPORT)
                hrtimer_start(&stream->poll_check_timer,
                              ns_to_ktime(stream->poll_oa_period),
                              HRTIMER_MODE_REL_PINNED);
@@ -2745,7 +2742,7 @@ static void i915_oa_stream_disable(struct i915_perf_stream *stream)
 {
        stream->perf->ops.oa_disable(stream);
 
-       if (stream->periodic)
+       if (stream->sample_flags & SAMPLE_OA_REPORT)
                hrtimer_cancel(&stream->poll_check_timer);
 }
 
@@ -3028,7 +3025,7 @@ static ssize_t i915_perf_read(struct file *file,
         * disabled stream as an error. In particular it might otherwise lead
         * to a deadlock for blocking file descriptors...
         */
-       if (!stream->enabled)
+       if (!stream->enabled || !(stream->sample_flags & SAMPLE_OA_REPORT))
                return -EIO;
 
        if (!(file->f_flags & O_NONBLOCK)) {
index 7146cd0..aaf1f00 100644 (file)
@@ -3316,7 +3316,18 @@ static inline bool i915_mmio_reg_valid(i915_reg_t reg)
 
 #define ILK_DISPLAY_CHICKEN1   _MMIO(0x42000)
 #define   ILK_FBCQ_DIS         (1 << 22)
-#define          ILK_PABSTRETCH_DIS    (1 << 21)
+#define   ILK_PABSTRETCH_DIS   REG_BIT(21)
+#define   ILK_SABSTRETCH_DIS   REG_BIT(20)
+#define   IVB_PRI_STRETCH_MAX_MASK     REG_GENMASK(21, 20)
+#define   IVB_PRI_STRETCH_MAX_X8       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 0)
+#define   IVB_PRI_STRETCH_MAX_X4       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 1)
+#define   IVB_PRI_STRETCH_MAX_X2       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 2)
+#define   IVB_PRI_STRETCH_MAX_X1       REG_FIELD_PREP(IVB_PRI_STRETCH_MAX_MASK, 3)
+#define   IVB_SPR_STRETCH_MAX_MASK     REG_GENMASK(19, 18)
+#define   IVB_SPR_STRETCH_MAX_X8       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 0)
+#define   IVB_SPR_STRETCH_MAX_X4       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 1)
+#define   IVB_SPR_STRETCH_MAX_X2       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 2)
+#define   IVB_SPR_STRETCH_MAX_X1       REG_FIELD_PREP(IVB_SPR_STRETCH_MAX_MASK, 3)
 
 
 /*
@@ -8039,6 +8050,16 @@ enum {
 
 #define _CHICKEN_PIPESL_1_A    0x420b0
 #define _CHICKEN_PIPESL_1_B    0x420b4
+#define  HSW_PRI_STRETCH_MAX_MASK      REG_GENMASK(28, 27)
+#define  HSW_PRI_STRETCH_MAX_X8                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 0)
+#define  HSW_PRI_STRETCH_MAX_X4                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 1)
+#define  HSW_PRI_STRETCH_MAX_X2                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 2)
+#define  HSW_PRI_STRETCH_MAX_X1                REG_FIELD_PREP(HSW_PRI_STRETCH_MAX_MASK, 3)
+#define  HSW_SPR_STRETCH_MAX_MASK      REG_GENMASK(26, 25)
+#define  HSW_SPR_STRETCH_MAX_X8                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 0)
+#define  HSW_SPR_STRETCH_MAX_X4                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 1)
+#define  HSW_SPR_STRETCH_MAX_X2                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 2)
+#define  HSW_SPR_STRETCH_MAX_X1                REG_FIELD_PREP(HSW_SPR_STRETCH_MAX_MASK, 3)
 #define  HSW_FBCQ_DIS                  (1 << 22)
 #define  BDW_DPRS_MASK_VBLANK_SRD      (1 << 0)
 #define CHICKEN_PIPESL_1(pipe) _MMIO_PIPE(pipe, _CHICKEN_PIPESL_1_A, _CHICKEN_PIPESL_1_B)
index 0c3e63f..97b57ac 100644 (file)
@@ -7245,11 +7245,16 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
        intel_uncore_write(&dev_priv->uncore, CHICKEN_PAR1_1,
                   intel_uncore_read(&dev_priv->uncore, CHICKEN_PAR1_1) | DPA_MASK_VBLANK_SRD);
 
-       /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
        for_each_pipe(dev_priv, pipe) {
+               /* WaPsrDPRSUnmaskVBlankInSRD:bdw */
                intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
                           intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe)) |
                           BDW_DPRS_MASK_VBLANK_SRD);
+
+               /* Undocumented but fixes async flip + VT-d corruption */
+               if (intel_vtd_active())
+                       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
+                                        HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1);
        }
 
        /* WaVSRefCountFullforceMissDisable:bdw */
@@ -7285,11 +7290,20 @@ static void bdw_init_clock_gating(struct drm_i915_private *dev_priv)
 
 static void hsw_init_clock_gating(struct drm_i915_private *dev_priv)
 {
+       enum pipe pipe;
+
        /* WaFbcAsynchFlipDisableFbcQueue:hsw,bdw */
        intel_uncore_write(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A),
                   intel_uncore_read(&dev_priv->uncore, CHICKEN_PIPESL_1(PIPE_A)) |
                   HSW_FBCQ_DIS);
 
+       for_each_pipe(dev_priv, pipe) {
+               /* Undocumented but fixes async flip + VT-d corruption */
+               if (intel_vtd_active())
+                       intel_uncore_rmw(&dev_priv->uncore, CHICKEN_PIPESL_1(pipe),
+                                        HSW_PRI_STRETCH_MAX_MASK, HSW_PRI_STRETCH_MAX_X1);
+       }
+
        /* This is required by WaCatErrorRejectionIssue:hsw */
        intel_uncore_write(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG,
                   intel_uncore_read(&dev_priv->uncore, GEN7_SQ_CHICKEN_MBCUNIT_CONFIG) |
index 153ca9e..8b725ef 100644 (file)
@@ -412,12 +412,20 @@ intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm)
 }
 
 /**
- * intel_runtime_pm_get_if_in_use - grab a runtime pm reference if device in use
+ * __intel_runtime_pm_get_if_active - grab a runtime pm reference if device is active
  * @rpm: the intel_runtime_pm structure
+ * @ignore_usecount: get a ref even if dev->power.usage_count is 0
  *
  * This function grabs a device-level runtime pm reference if the device is
- * already in use and ensures that it is powered up. It is illegal to try
- * and access the HW should intel_runtime_pm_get_if_in_use() report failure.
+ * already active and ensures that it is powered up. It is illegal to try
+ * and access the HW should intel_runtime_pm_get_if_active() report failure.
+ *
+ * If @ignore_usecount=true, a reference will be acquired even if there is no
+ * user requiring the device to be powered up (dev->power.usage_count == 0).
+ * If the function returns false in this case then it's guaranteed that the
+ * device's runtime suspend hook has been called already or that it will be
+ * called (and hence it's also guaranteed that the device's runtime resume
+ * hook will be called eventually).
  *
  * Any runtime pm reference obtained by this function must have a symmetric
  * call to intel_runtime_pm_put() to release the reference again.
@@ -425,7 +433,8 @@ intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm)
  * Returns: the wakeref cookie to pass to intel_runtime_pm_put(), evaluates
  * as True if the wakeref was acquired, or False otherwise.
  */
-intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm)
+static intel_wakeref_t __intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm,
+                                                       bool ignore_usecount)
 {
        if (IS_ENABLED(CONFIG_PM)) {
                /*
@@ -434,7 +443,7 @@ intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm)
                 * function, since the power state is undefined. This applies
                 * atm to the late/early system suspend/resume handlers.
                 */
-               if (pm_runtime_get_if_in_use(rpm->kdev) <= 0)
+               if (pm_runtime_get_if_active(rpm->kdev, ignore_usecount) <= 0)
                        return 0;
        }
 
@@ -443,6 +452,16 @@ intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm)
        return track_intel_runtime_pm_wakeref(rpm);
 }
 
+intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm)
+{
+       return __intel_runtime_pm_get_if_active(rpm, false);
+}
+
+intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm)
+{
+       return __intel_runtime_pm_get_if_active(rpm, true);
+}
+
 /**
  * intel_runtime_pm_get_noresume - grab a runtime pm reference
  * @rpm: the intel_runtime_pm structure
index ae64ff1..1e4ddd1 100644 (file)
@@ -177,6 +177,7 @@ void intel_runtime_pm_driver_release(struct intel_runtime_pm *rpm);
 
 intel_wakeref_t intel_runtime_pm_get(struct intel_runtime_pm *rpm);
 intel_wakeref_t intel_runtime_pm_get_if_in_use(struct intel_runtime_pm *rpm);
+intel_wakeref_t intel_runtime_pm_get_if_active(struct intel_runtime_pm *rpm);
 intel_wakeref_t intel_runtime_pm_get_noresume(struct intel_runtime_pm *rpm);
 intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm);
 
@@ -188,6 +189,10 @@ intel_wakeref_t intel_runtime_pm_get_raw(struct intel_runtime_pm *rpm);
        for ((wf) = intel_runtime_pm_get_if_in_use(rpm); (wf); \
             intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
 
+#define with_intel_runtime_pm_if_active(rpm, wf) \
+       for ((wf) = intel_runtime_pm_get_if_active(rpm); (wf); \
+            intel_runtime_pm_put((rpm), (wf)), (wf) = 0)
+
 void intel_runtime_pm_put_unchecked(struct intel_runtime_pm *rpm);
 #if IS_ENABLED(CONFIG_DRM_I915_DEBUG_RUNTIME_PM)
 void intel_runtime_pm_put(struct intel_runtime_pm *rpm, intel_wakeref_t wref);
index d1a9841..e6a88c8 100644 (file)
@@ -215,7 +215,7 @@ static int imx_drm_bind(struct device *dev)
 
        ret = drmm_mode_config_init(drm);
        if (ret)
-               return ret;
+               goto err_kms;
 
        ret = drm_vblank_init(drm, MAX_CRTC);
        if (ret)
index dbfe39e..ffdc492 100644 (file)
@@ -197,6 +197,11 @@ static void imx_ldb_encoder_enable(struct drm_encoder *encoder)
        int dual = ldb->ldb_ctrl & LDB_SPLIT_MODE_EN;
        int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
 
+       if (mux < 0 || mux >= ARRAY_SIZE(ldb->clk_sel)) {
+               dev_warn(ldb->dev, "%s: invalid mux %d\n", __func__, mux);
+               return;
+       }
+
        drm_panel_prepare(imx_ldb_ch->panel);
 
        if (dual) {
@@ -255,6 +260,11 @@ imx_ldb_encoder_atomic_mode_set(struct drm_encoder *encoder,
        int mux = drm_of_encoder_active_port_id(imx_ldb_ch->child, encoder);
        u32 bus_format = imx_ldb_ch->bus_format;
 
+       if (mux < 0 || mux >= ARRAY_SIZE(ldb->clk_sel)) {
+               dev_warn(ldb->dev, "%s: invalid mux %d\n", __func__, mux);
+               return;
+       }
+
        if (mode->clock > 170000) {
                dev_warn(ldb->dev,
                         "%s: mode exceeds 170 MHz pixel clock\n", __func__);
@@ -583,7 +593,7 @@ static int imx_ldb_bind(struct device *dev, struct device *master, void *data)
                struct imx_ldb_channel *channel = &imx_ldb->channel[i];
 
                if (!channel->ldb)
-                       break;
+                       continue;
 
                ret = imx_ldb_register(drm, channel);
                if (ret)
index 42c5d32..453d8b4 100644 (file)
@@ -482,6 +482,16 @@ static int meson_probe_remote(struct platform_device *pdev,
        return count;
 }
 
+static void meson_drv_shutdown(struct platform_device *pdev)
+{
+       struct meson_drm *priv = dev_get_drvdata(&pdev->dev);
+       struct drm_device *drm = priv->drm;
+
+       DRM_DEBUG_DRIVER("\n");
+       drm_kms_helper_poll_fini(drm);
+       drm_atomic_helper_shutdown(drm);
+}
+
 static int meson_drv_probe(struct platform_device *pdev)
 {
        struct component_match *match = NULL;
@@ -553,6 +563,7 @@ static const struct dev_pm_ops meson_drv_pm_ops = {
 
 static struct platform_driver meson_drm_platform_driver = {
        .probe      = meson_drv_probe,
+       .shutdown   = meson_drv_shutdown,
        .driver     = {
                .name   = "meson-drm",
                .of_match_table = dt_match,
index 346cc6f..7b9fcfe 100644 (file)
@@ -2367,6 +2367,8 @@ static inline uint32_t A5XX_VSC_RESOLVE_CNTL_Y(uint32_t val)
 
 #define REG_A5XX_UCHE_ADDR_MODE_CNTL                           0x00000e80
 
+#define REG_A5XX_UCHE_MODE_CNTL                                        0x00000e81
+
 #define REG_A5XX_UCHE_SVM_CNTL                                 0x00000e82
 
 #define REG_A5XX_UCHE_WRITE_THRU_BASE_LO                       0x00000e87
index a5af223..ce13d49 100644 (file)
@@ -222,7 +222,7 @@ static void a5xx_submit(struct msm_gpu *gpu, struct msm_gem_submit *submit)
        a5xx_preempt_trigger(gpu);
 }
 
-static const struct {
+static const struct adreno_five_hwcg_regs {
        u32 offset;
        u32 value;
 } a5xx_hwcg[] = {
@@ -318,16 +318,124 @@ static const struct {
        {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
        {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
        {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222}
+}, a50x_hwcg[] = {
+       {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+       {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+       {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+       {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+       {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+       {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+       {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+       {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+       {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+       {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00FFFFF4},
+       {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+       {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+       {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+       {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+       {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+       {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+       {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+       {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+       {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+       {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+       {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+       {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+       {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+       {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+       {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+       {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+       {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
+}, a512_hwcg[] = {
+       {REG_A5XX_RBBM_CLOCK_CNTL_SP0, 0x02222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL_SP1, 0x02222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_SP0, 0x02222220},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_SP1, 0x02222220},
+       {REG_A5XX_RBBM_CLOCK_HYST_SP0, 0x0000F3CF},
+       {REG_A5XX_RBBM_CLOCK_HYST_SP1, 0x0000F3CF},
+       {REG_A5XX_RBBM_CLOCK_DELAY_SP0, 0x00000080},
+       {REG_A5XX_RBBM_CLOCK_DELAY_SP1, 0x00000080},
+       {REG_A5XX_RBBM_CLOCK_CNTL_TP0, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL_TP1, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_TP0, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_TP1, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL3_TP0, 0x00002222},
+       {REG_A5XX_RBBM_CLOCK_CNTL3_TP1, 0x00002222},
+       {REG_A5XX_RBBM_CLOCK_HYST_TP0, 0x77777777},
+       {REG_A5XX_RBBM_CLOCK_HYST_TP1, 0x77777777},
+       {REG_A5XX_RBBM_CLOCK_HYST2_TP0, 0x77777777},
+       {REG_A5XX_RBBM_CLOCK_HYST2_TP1, 0x77777777},
+       {REG_A5XX_RBBM_CLOCK_HYST3_TP0, 0x00007777},
+       {REG_A5XX_RBBM_CLOCK_HYST3_TP1, 0x00007777},
+       {REG_A5XX_RBBM_CLOCK_DELAY_TP0, 0x11111111},
+       {REG_A5XX_RBBM_CLOCK_DELAY_TP1, 0x11111111},
+       {REG_A5XX_RBBM_CLOCK_DELAY2_TP0, 0x11111111},
+       {REG_A5XX_RBBM_CLOCK_DELAY2_TP1, 0x11111111},
+       {REG_A5XX_RBBM_CLOCK_DELAY3_TP0, 0x00001111},
+       {REG_A5XX_RBBM_CLOCK_DELAY3_TP1, 0x00001111},
+       {REG_A5XX_RBBM_CLOCK_CNTL_UCHE, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_UCHE, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL3_UCHE, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL4_UCHE, 0x00222222},
+       {REG_A5XX_RBBM_CLOCK_HYST_UCHE, 0x00444444},
+       {REG_A5XX_RBBM_CLOCK_DELAY_UCHE, 0x00000002},
+       {REG_A5XX_RBBM_CLOCK_CNTL_RB0, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL_RB1, 0x22222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_RB0, 0x00222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_RB1, 0x00222222},
+       {REG_A5XX_RBBM_CLOCK_CNTL_CCU0, 0x00022220},
+       {REG_A5XX_RBBM_CLOCK_CNTL_CCU1, 0x00022220},
+       {REG_A5XX_RBBM_CLOCK_CNTL_RAC, 0x05522222},
+       {REG_A5XX_RBBM_CLOCK_CNTL2_RAC, 0x00505555},
+       {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU0, 0x04040404},
+       {REG_A5XX_RBBM_CLOCK_HYST_RB_CCU1, 0x04040404},
+       {REG_A5XX_RBBM_CLOCK_HYST_RAC, 0x07444044},
+       {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_0, 0x00000002},
+       {REG_A5XX_RBBM_CLOCK_DELAY_RB_CCU_L1_1, 0x00000002},
+       {REG_A5XX_RBBM_CLOCK_DELAY_RAC, 0x00010011},
+       {REG_A5XX_RBBM_CLOCK_CNTL_TSE_RAS_RBBM, 0x04222222},
+       {REG_A5XX_RBBM_CLOCK_MODE_GPC, 0x02222222},
+       {REG_A5XX_RBBM_CLOCK_MODE_VFD, 0x00002222},
+       {REG_A5XX_RBBM_CLOCK_HYST_TSE_RAS_RBBM, 0x00000000},
+       {REG_A5XX_RBBM_CLOCK_HYST_GPC, 0x04104004},
+       {REG_A5XX_RBBM_CLOCK_HYST_VFD, 0x00000000},
+       {REG_A5XX_RBBM_CLOCK_DELAY_HLSQ, 0x00000000},
+       {REG_A5XX_RBBM_CLOCK_DELAY_TSE_RAS_RBBM, 0x00004000},
+       {REG_A5XX_RBBM_CLOCK_DELAY_GPC, 0x00000200},
+       {REG_A5XX_RBBM_CLOCK_DELAY_VFD, 0x00002222},
 };
 
 void a5xx_set_hwcg(struct msm_gpu *gpu, bool state)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
-       unsigned int i;
+       const struct adreno_five_hwcg_regs *regs;
+       unsigned int i, sz;
+
+       if (adreno_is_a508(adreno_gpu)) {
+               regs = a50x_hwcg;
+               sz = ARRAY_SIZE(a50x_hwcg);
+       } else if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu)) {
+               regs = a512_hwcg;
+               sz = ARRAY_SIZE(a512_hwcg);
+       } else {
+               regs = a5xx_hwcg;
+               sz = ARRAY_SIZE(a5xx_hwcg);
+       }
 
-       for (i = 0; i < ARRAY_SIZE(a5xx_hwcg); i++)
-               gpu_write(gpu, a5xx_hwcg[i].offset,
-                       state ? a5xx_hwcg[i].value : 0);
+       for (i = 0; i < sz; i++)
+               gpu_write(gpu, regs[i].offset,
+                         state ? regs[i].value : 0);
 
        if (adreno_is_a540(adreno_gpu)) {
                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_DELAY_GPMU, state ? 0x00000770 : 0);
@@ -538,11 +646,13 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
        struct a5xx_gpu *a5xx_gpu = to_a5xx_gpu(adreno_gpu);
+       u32 regbit;
        int ret;
 
        gpu_write(gpu, REG_A5XX_VBIF_ROUND_ROBIN_QOS_ARB, 0x00000003);
 
-       if (adreno_is_a540(adreno_gpu))
+       if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
+           adreno_is_a540(adreno_gpu))
                gpu_write(gpu, REG_A5XX_VBIF_GATE_OFF_WRREQ_EN, 0x00000009);
 
        /* Make all blocks contribute to the GPU BUSY perf counter */
@@ -604,29 +714,48 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
                0x00100000 + adreno_gpu->gmem - 1);
        gpu_write(gpu, REG_A5XX_UCHE_GMEM_RANGE_MAX_HI, 0x00000000);
 
-       if (adreno_is_a510(adreno_gpu)) {
+       if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu)) {
                gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x20);
-               gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
+               if (adreno_is_a508(adreno_gpu))
+                       gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
+               else
+                       gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x20);
                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x40000030);
                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x20100D0A);
-               gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
-                         (0x200 << 11 | 0x200 << 22));
        } else {
                gpu_write(gpu, REG_A5XX_CP_MEQ_THRESHOLDS, 0x40);
                if (adreno_is_a530(adreno_gpu))
                        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x40);
-               if (adreno_is_a540(adreno_gpu))
+               else
                        gpu_write(gpu, REG_A5XX_CP_MERCIU_SIZE, 0x400);
                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_2, 0x80000060);
                gpu_write(gpu, REG_A5XX_CP_ROQ_THRESHOLDS_1, 0x40201B16);
+       }
+
+       if (adreno_is_a508(adreno_gpu))
+               gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
+                         (0x100 << 11 | 0x100 << 22));
+       else if (adreno_is_a509(adreno_gpu) || adreno_is_a510(adreno_gpu) ||
+                adreno_is_a512(adreno_gpu))
+               gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
+                         (0x200 << 11 | 0x200 << 22));
+       else
                gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL,
                          (0x400 << 11 | 0x300 << 22));
-       }
 
        if (adreno_gpu->info->quirks & ADRENO_QUIRK_TWO_PASS_USE_WFI)
                gpu_rmw(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0, (1 << 8));
 
-       gpu_write(gpu, REG_A5XX_PC_DBG_ECO_CNTL, 0xc0200100);
+       /*
+        * Disable the RB sampler datapath DP2 clock gating optimization
+        * for 1-SP GPUs, as it is enabled by default.
+        */
+       if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
+           adreno_is_a512(adreno_gpu))
+               gpu_rmw(gpu, REG_A5XX_RB_DBG_ECO_CNTL, 0, (1 << 9));
+
+       /* Disable UCHE global filter as SP can invalidate/flush independently */
+       gpu_write(gpu, REG_A5XX_UCHE_MODE_CNTL, BIT(29));
 
        /* Enable USE_RETENTION_FLOPS */
        gpu_write(gpu, REG_A5XX_CP_CHICKEN_DBG, 0x02000000);
@@ -653,10 +782,20 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
        gpu_write(gpu, REG_A5XX_RBBM_AHB_CNTL2, 0x0000003F);
 
        /* Set the highest bank bit */
-       gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, 2 << 7);
-       gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, 2 << 1);
        if (adreno_is_a540(adreno_gpu))
-               gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, 2);
+               regbit = 2;
+       else
+               regbit = 1;
+
+       gpu_write(gpu, REG_A5XX_TPL1_MODE_CNTL, regbit << 7);
+       gpu_write(gpu, REG_A5XX_RB_MODE_CNTL, regbit << 1);
+
+       if (adreno_is_a509(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
+           adreno_is_a540(adreno_gpu))
+               gpu_write(gpu, REG_A5XX_UCHE_DBG_ECO_CNTL_2, regbit);
+
+       /* Disable All flat shading optimization (ALLFLATOPTDIS) */
+       gpu_rmw(gpu, REG_A5XX_VPC_DBG_ECO_CNTL, 0, (1 << 10));
 
        /* Protect registers from the CP */
        gpu_write(gpu, REG_A5XX_CP_PROTECT_CNTL, 0x00000007);
@@ -688,12 +827,14 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
 
        /* VPC */
        gpu_write(gpu, REG_A5XX_CP_PROTECT(14), ADRENO_PROTECT_RW(0xE68, 8));
-       gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 4));
+       gpu_write(gpu, REG_A5XX_CP_PROTECT(15), ADRENO_PROTECT_RW(0xE70, 16));
 
        /* UCHE */
        gpu_write(gpu, REG_A5XX_CP_PROTECT(16), ADRENO_PROTECT_RW(0xE80, 16));
 
-       if (adreno_is_a530(adreno_gpu) || adreno_is_a510(adreno_gpu))
+       if (adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
+           adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu) ||
+           adreno_is_a530(adreno_gpu))
                gpu_write(gpu, REG_A5XX_CP_PROTECT(17),
                        ADRENO_PROTECT_RW(0x10000, 0x8000));
 
@@ -735,7 +876,8 @@ static int a5xx_hw_init(struct msm_gpu *gpu)
        if (ret)
                return ret;
 
-       if (!adreno_is_a510(adreno_gpu))
+       if (!(adreno_is_a508(adreno_gpu) || adreno_is_a509(adreno_gpu) ||
+             adreno_is_a510(adreno_gpu) || adreno_is_a512(adreno_gpu)))
                a5xx_gpmu_ucode_init(gpu);
 
        ret = a5xx_ucode_init(gpu);
@@ -1168,7 +1310,8 @@ static int a5xx_pm_resume(struct msm_gpu *gpu)
        if (ret)
                return ret;
 
-       if (adreno_is_a510(adreno_gpu)) {
+       /* Adreno 508, 509, 510, 512 needs manual RBBM sus/res control */
+       if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu))) {
                /* Halt the sp_input_clk at HM level */
                gpu_write(gpu, REG_A5XX_RBBM_CLOCK_CNTL, 0x00000055);
                a5xx_set_hwcg(gpu, true);
@@ -1210,8 +1353,8 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
        u32 mask = 0xf;
        int i, ret;
 
-       /* A510 has 3 XIN ports in VBIF */
-       if (adreno_is_a510(adreno_gpu))
+       /* A508, A510 have 3 XIN ports in VBIF */
+       if (adreno_is_a508(adreno_gpu) || adreno_is_a510(adreno_gpu))
                mask = 0x7;
 
        /* Clear the VBIF pipe before shutting down */
@@ -1223,10 +1366,12 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
 
        /*
         * Reset the VBIF before power collapse to avoid issue with FIFO
-        * entries
+        * entries on Adreno A510 and A530 (the others will tend to lock up)
         */
-       gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
-       gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
+       if (adreno_is_a510(adreno_gpu) || adreno_is_a530(adreno_gpu)) {
+               gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x003C0000);
+               gpu_write(gpu, REG_A5XX_RBBM_BLOCK_SW_RESET_CMD, 0x00000000);
+       }
 
        ret = msm_gpu_pm_suspend(gpu);
        if (ret)
@@ -1241,8 +1386,8 @@ static int a5xx_pm_suspend(struct msm_gpu *gpu)
 
 static int a5xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 {
-       *value = gpu_read64(gpu, REG_A5XX_RBBM_PERFCTR_CP_0_LO,
-               REG_A5XX_RBBM_PERFCTR_CP_0_HI);
+       *value = gpu_read64(gpu, REG_A5XX_RBBM_ALWAYSON_COUNTER_LO,
+               REG_A5XX_RBBM_ALWAYSON_COUNTER_HI);
 
        return 0;
 }
index f176a6f..c35b06b 100644 (file)
@@ -298,13 +298,13 @@ int a5xx_power_init(struct msm_gpu *gpu)
        int ret;
 
        /* Not all A5xx chips have a GPMU */
-       if (adreno_is_a510(adreno_gpu))
+       if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu)))
                return 0;
 
        /* Set up the limits management */
        if (adreno_is_a530(adreno_gpu))
                a530_lm_setup(gpu);
-       else
+       else if (adreno_is_a540(adreno_gpu))
                a540_lm_setup(gpu);
 
        /* Set up SP/TP power collpase */
@@ -330,7 +330,7 @@ void a5xx_gpmu_ucode_init(struct msm_gpu *gpu)
        unsigned int *data, *ptr, *cmds;
        unsigned int cmds_size;
 
-       if (adreno_is_a510(adreno_gpu))
+       if (!(adreno_is_a530(adreno_gpu) || adreno_is_a540(adreno_gpu)))
                return;
 
        if (a5xx_gpu->gpmu_bo)
index 05e0ef5..91cf46f 100644 (file)
@@ -245,37 +245,66 @@ static int a6xx_gmu_hfi_start(struct a6xx_gmu *gmu)
        return ret;
 }
 
+struct a6xx_gmu_oob_bits {
+       int set, ack, set_new, ack_new;
+       const char *name;
+};
+
+/* These are the interrupt / ack bits for each OOB request that are set
+ * in a6xx_gmu_set_oob and a6xx_clear_oob
+ */
+static const struct a6xx_gmu_oob_bits a6xx_gmu_oob_bits[] = {
+       [GMU_OOB_GPU_SET] = {
+               .name = "GPU_SET",
+               .set = 16,
+               .ack = 24,
+               .set_new = 30,
+               .ack_new = 31,
+       },
+
+       [GMU_OOB_PERFCOUNTER_SET] = {
+               .name = "PERFCOUNTER",
+               .set = 17,
+               .ack = 25,
+               .set_new = 28,
+               .ack_new = 30,
+       },
+
+       [GMU_OOB_BOOT_SLUMBER] = {
+               .name = "BOOT_SLUMBER",
+               .set = 22,
+               .ack = 30,
+       },
+
+       [GMU_OOB_DCVS_SET] = {
+               .name = "GPU_DCVS",
+               .set = 23,
+               .ack = 31,
+       },
+};
+
 /* Trigger a OOB (out of band) request to the GMU */
 int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
 {
        int ret;
        u32 val;
        int request, ack;
-       const char *name;
 
-       switch (state) {
-       case GMU_OOB_GPU_SET:
-               if (gmu->legacy) {
-                       request = GMU_OOB_GPU_SET_REQUEST;
-                       ack = GMU_OOB_GPU_SET_ACK;
-               } else {
-                       request = GMU_OOB_GPU_SET_REQUEST_NEW;
-                       ack = GMU_OOB_GPU_SET_ACK_NEW;
-               }
-               name = "GPU_SET";
-               break;
-       case GMU_OOB_BOOT_SLUMBER:
-               request = GMU_OOB_BOOT_SLUMBER_REQUEST;
-               ack = GMU_OOB_BOOT_SLUMBER_ACK;
-               name = "BOOT_SLUMBER";
-               break;
-       case GMU_OOB_DCVS_SET:
-               request = GMU_OOB_DCVS_REQUEST;
-               ack = GMU_OOB_DCVS_ACK;
-               name = "GPU_DCVS";
-               break;
-       default:
+       if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
                return -EINVAL;
+
+       if (gmu->legacy) {
+               request = a6xx_gmu_oob_bits[state].set;
+               ack = a6xx_gmu_oob_bits[state].ack;
+       } else {
+               request = a6xx_gmu_oob_bits[state].set_new;
+               ack = a6xx_gmu_oob_bits[state].ack_new;
+               if (!request || !ack) {
+                       DRM_DEV_ERROR(gmu->dev,
+                                     "Invalid non-legacy GMU request %s\n",
+                                     a6xx_gmu_oob_bits[state].name);
+                       return -EINVAL;
+               }
        }
 
        /* Trigger the equested OOB operation */
@@ -288,7 +317,7 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
        if (ret)
                DRM_DEV_ERROR(gmu->dev,
                        "Timeout waiting for GMU OOB set %s: 0x%x\n",
-                               name,
+                               a6xx_gmu_oob_bits[state].name,
                                gmu_read(gmu, REG_A6XX_GMU_GMU2HOST_INTR_INFO));
 
        /* Clear the acknowledge interrupt */
@@ -300,27 +329,17 @@ int a6xx_gmu_set_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
 /* Clear a pending OOB state in the GMU */
 void a6xx_gmu_clear_oob(struct a6xx_gmu *gmu, enum a6xx_gmu_oob_state state)
 {
-       if (!gmu->legacy) {
-               WARN_ON(state != GMU_OOB_GPU_SET);
-               gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
-                       1 << GMU_OOB_GPU_SET_CLEAR_NEW);
+       int bit;
+
+       if (state >= ARRAY_SIZE(a6xx_gmu_oob_bits))
                return;
-       }
 
-       switch (state) {
-       case GMU_OOB_GPU_SET:
-               gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
-                       1 << GMU_OOB_GPU_SET_CLEAR);
-               break;
-       case GMU_OOB_BOOT_SLUMBER:
-               gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
-                       1 << GMU_OOB_BOOT_SLUMBER_CLEAR);
-               break;
-       case GMU_OOB_DCVS_SET:
-               gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET,
-                       1 << GMU_OOB_DCVS_CLEAR);
-               break;
-       }
+       if (gmu->legacy)
+               bit = a6xx_gmu_oob_bits[state].ack;
+       else
+               bit = a6xx_gmu_oob_bits[state].ack_new;
+
+       gmu_write(gmu, REG_A6XX_GMU_HOST2GMU_INTR_SET, 1 << bit);
 }
 
 /* Enable CPU control of SPTP power power collapse */
index c6d2bce..71dfa60 100644 (file)
@@ -153,44 +153,27 @@ static inline void gmu_write_rscc(struct a6xx_gmu *gmu, u32 offset, u32 value)
  */
 
 enum a6xx_gmu_oob_state {
+       /*
+        * Let the GMU know that a boot or slumber operation has started. The value in
+        * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are
+        * doing
+        */
        GMU_OOB_BOOT_SLUMBER = 0,
+       /*
+        * Let the GMU know to not turn off any GPU registers while the CPU is in a
+        * critical section
+        */
        GMU_OOB_GPU_SET,
+       /*
+        * Set a new power level for the GPU when the CPU is doing frequency scaling
+        */
        GMU_OOB_DCVS_SET,
+       /*
+        * Used to keep the GPU on for CPU-side reads of performance counters.
+        */
+       GMU_OOB_PERFCOUNTER_SET,
 };
 
-/* These are the interrupt / ack bits for each OOB request that are set
- * in a6xx_gmu_set_oob and a6xx_clear_oob
- */
-
-/*
- * Let the GMU know that a boot or slumber operation has started. The value in
- * REG_A6XX_GMU_BOOT_SLUMBER_OPTION lets the GMU know which operation we are
- * doing
- */
-#define GMU_OOB_BOOT_SLUMBER_REQUEST   22
-#define GMU_OOB_BOOT_SLUMBER_ACK       30
-#define GMU_OOB_BOOT_SLUMBER_CLEAR     30
-
-/*
- * Set a new power level for the GPU when the CPU is doing frequency scaling
- */
-#define GMU_OOB_DCVS_REQUEST   23
-#define GMU_OOB_DCVS_ACK       31
-#define GMU_OOB_DCVS_CLEAR     31
-
-/*
- * Let the GMU know to not turn off any GPU registers while the CPU is in a
- * critical section
- */
-#define GMU_OOB_GPU_SET_REQUEST        16
-#define GMU_OOB_GPU_SET_ACK    24
-#define GMU_OOB_GPU_SET_CLEAR  24
-
-#define GMU_OOB_GPU_SET_REQUEST_NEW    30
-#define GMU_OOB_GPU_SET_ACK_NEW                31
-#define GMU_OOB_GPU_SET_CLEAR_NEW      31
-
-
 void a6xx_hfi_init(struct a6xx_gmu *gmu);
 int a6xx_hfi_start(struct a6xx_gmu *gmu, int boot_state);
 void a6xx_hfi_stop(struct a6xx_gmu *gmu);
index 1306618..d553f62 100644 (file)
@@ -10,6 +10,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/devfreq.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/soc/qcom/llcc-qcom.h>
 
 #define GPU_PAS_ID 13
@@ -521,28 +522,73 @@ static int a6xx_cp_init(struct msm_gpu *gpu)
        return a6xx_idle(gpu, ring) ? 0 : -EINVAL;
 }
 
-static void a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
+/*
+ * Check that the microcode version is new enough to include several key
+ * security fixes. Return true if the ucode is safe.
+ */
+static bool a6xx_ucode_check_version(struct a6xx_gpu *a6xx_gpu,
                struct drm_gem_object *obj)
 {
+       struct adreno_gpu *adreno_gpu = &a6xx_gpu->base;
+       struct msm_gpu *gpu = &adreno_gpu->base;
        u32 *buf = msm_gem_get_vaddr(obj);
+       bool ret = false;
 
        if (IS_ERR(buf))
-               return;
+               return false;
 
        /*
-        * If the lowest nibble is 0xa that is an indication that this microcode
-        * has been patched. The actual version is in dword [3] but we only care
-        * about the patchlevel which is the lowest nibble of dword [3]
-        *
-        * Otherwise check that the firmware is greater than or equal to 1.90
-        * which was the first version that had this fix built in
+        * Targets up to a640 (a618, a630 and a640) need to check for a
+        * microcode version that is patched to support the whereami opcode or
+        * one that is new enough to include it by default.
         */
-       if (((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1)
-               a6xx_gpu->has_whereami = true;
-       else if ((buf[0] & 0xfff) > 0x190)
-               a6xx_gpu->has_whereami = true;
+       if (adreno_is_a618(adreno_gpu) || adreno_is_a630(adreno_gpu) ||
+               adreno_is_a640(adreno_gpu)) {
+               /*
+                * If the lowest nibble is 0xa that is an indication that this
+                * microcode has been patched. The actual version is in dword
+                * [3] but we only care about the patchlevel which is the lowest
+                * nibble of dword [3]
+                *
+                * Otherwise check that the firmware is greater than or equal
+                * to 1.90 which was the first version that had this fix built
+                * in
+                */
+               if ((((buf[0] & 0xf) == 0xa) && (buf[2] & 0xf) >= 1) ||
+                       (buf[0] & 0xfff) >= 0x190) {
+                       a6xx_gpu->has_whereami = true;
+                       ret = true;
+                       goto out;
+               }
+
+               DRM_DEV_ERROR(&gpu->pdev->dev,
+                       "a630 SQE ucode is too old. Have version %x need at least %x\n",
+                       buf[0] & 0xfff, 0x190);
+       }  else {
+               /*
+                * a650 tier targets don't need whereami but still need to be
+                * equal to or newer than 0.95 for other security fixes
+                */
+               if (adreno_is_a650(adreno_gpu)) {
+                       if ((buf[0] & 0xfff) >= 0x095) {
+                               ret = true;
+                               goto out;
+                       }
 
+                       DRM_DEV_ERROR(&gpu->pdev->dev,
+                               "a650 SQE ucode is too old. Have version %x need at least %x\n",
+                               buf[0] & 0xfff, 0x095);
+               }
+
+               /*
+                * When a660 is added those targets should return true here
+                * since those have all the critical security fixes built in
+                * from the start
+                */
+       }
+out:
        msm_gem_put_vaddr(obj);
+       return ret;
 }
 
 static int a6xx_ucode_init(struct msm_gpu *gpu)
@@ -565,7 +611,13 @@ static int a6xx_ucode_init(struct msm_gpu *gpu)
                }
 
                msm_gem_object_set_name(a6xx_gpu->sqe_bo, "sqefw");
-               a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo);
+               if (!a6xx_ucode_check_version(a6xx_gpu, a6xx_gpu->sqe_bo)) {
+                       msm_gem_unpin_iova(a6xx_gpu->sqe_bo, gpu->aspace);
+                       drm_gem_object_put(a6xx_gpu->sqe_bo);
+
+                       a6xx_gpu->sqe_bo = NULL;
+                       return -EPERM;
+               }
        }
 
        gpu_write64(gpu, REG_A6XX_CP_SQE_INSTR_BASE_LO,
@@ -1117,7 +1169,7 @@ static void a6xx_llc_slices_init(struct platform_device *pdev,
        a6xx_gpu->llc_slice = llcc_slice_getd(LLCC_GPU);
        a6xx_gpu->htw_llc_slice = llcc_slice_getd(LLCC_GPUHTW);
 
-       if (IS_ERR(a6xx_gpu->llc_slice) && IS_ERR(a6xx_gpu->htw_llc_slice))
+       if (IS_ERR_OR_NULL(a6xx_gpu->llc_slice) && IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
                a6xx_gpu->llc_mmio = ERR_PTR(-EINVAL);
 }
 
@@ -1169,14 +1221,18 @@ static int a6xx_get_timestamp(struct msm_gpu *gpu, uint64_t *value)
 {
        struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
        struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+       static DEFINE_MUTEX(perfcounter_oob);
+
+       mutex_lock(&perfcounter_oob);
 
        /* Force the GPU power on so we can read this register */
-       a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+       a6xx_gmu_set_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
 
-       *value = gpu_read64(gpu, REG_A6XX_RBBM_PERFCTR_CP_0_LO,
-               REG_A6XX_RBBM_PERFCTR_CP_0_HI);
+       *value = gpu_read64(gpu, REG_A6XX_CP_ALWAYS_ON_COUNTER_LO,
+               REG_A6XX_CP_ALWAYS_ON_COUNTER_HI);
 
-       a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_GPU_SET);
+       a6xx_gmu_clear_oob(&a6xx_gpu->gmu, GMU_OOB_PERFCOUNTER_SET);
+       mutex_unlock(&perfcounter_oob);
        return 0;
 }
 
@@ -1208,6 +1264,10 @@ static void a6xx_destroy(struct msm_gpu *gpu)
        a6xx_gmu_remove(a6xx_gpu);
 
        adreno_gpu_cleanup(adreno_gpu);
+
+       if (a6xx_gpu->opp_table)
+               dev_pm_opp_put_supported_hw(a6xx_gpu->opp_table);
+
        kfree(a6xx_gpu);
 }
 
@@ -1240,6 +1300,50 @@ static unsigned long a6xx_gpu_busy(struct msm_gpu *gpu)
 }
 
 static struct msm_gem_address_space *
+a6xx_create_address_space(struct msm_gpu *gpu, struct platform_device *pdev)
+{
+       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
+       struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
+       struct iommu_domain *iommu;
+       struct msm_mmu *mmu;
+       struct msm_gem_address_space *aspace;
+       u64 start, size;
+
+       iommu = iommu_domain_alloc(&platform_bus_type);
+       if (!iommu)
+               return NULL;
+
+       /*
+        * This allows GPU to set the bus attributes required to use system
+        * cache on behalf of the iommu page table walker.
+        */
+       if (!IS_ERR_OR_NULL(a6xx_gpu->htw_llc_slice))
+               adreno_set_llc_attributes(iommu);
+
+       mmu = msm_iommu_new(&pdev->dev, iommu);
+       if (IS_ERR(mmu)) {
+               iommu_domain_free(iommu);
+               return ERR_CAST(mmu);
+       }
+
+       /*
+        * Use the aperture start or SZ_16M, whichever is greater. This will
+        * ensure that we align with the allocated pagetable range while still
+        * allowing room in the lower 32 bits for GMEM and whatnot
+        */
+       start = max_t(u64, SZ_16M, iommu->geometry.aperture_start);
+       size = iommu->geometry.aperture_end - start + 1;
+
+       aspace = msm_gem_address_space_create(mmu, "gpu",
+               start & GENMASK_ULL(48, 0), size);
+
+       if (IS_ERR(aspace) && !IS_ERR(mmu))
+               mmu->funcs->destroy(mmu);
+
+       return aspace;
+}
+
+static struct msm_gem_address_space *
 a6xx_create_private_address_space(struct msm_gpu *gpu)
 {
        struct msm_mmu *mmu;
@@ -1264,6 +1368,69 @@ static uint32_t a6xx_get_rptr(struct msm_gpu *gpu, struct msm_ringbuffer *ring)
        return ring->memptrs->rptr = gpu_read(gpu, REG_A6XX_CP_RB_RPTR);
 }
 
+static u32 a618_get_speed_bin(u32 fuse)
+{
+       if (fuse == 0)
+               return 0;
+       else if (fuse == 169)
+               return 1;
+       else if (fuse == 174)
+               return 2;
+
+       return UINT_MAX;
+}
+
+static u32 fuse_to_supp_hw(struct device *dev, u32 revn, u32 fuse)
+{
+       u32 val = UINT_MAX;
+
+       if (revn == 618)
+               val = a618_get_speed_bin(fuse);
+
+       if (val == UINT_MAX) {
+               DRM_DEV_ERROR(dev,
+                       "missing support for speed-bin: %u. Some OPPs may not be supported by hardware",
+                       fuse);
+               return UINT_MAX;
+       }
+
+       return (1 << val);
+}
+
+static int a6xx_set_supported_hw(struct device *dev, struct a6xx_gpu *a6xx_gpu,
+               u32 revn)
+{
+       struct opp_table *opp_table;
+       u32 supp_hw = UINT_MAX;
+       u16 speedbin;
+       int ret;
+
+       ret = nvmem_cell_read_u16(dev, "speed_bin", &speedbin);
+       /*
+        * -ENOENT means that the platform doesn't support speedbin which is
+        * fine
+        */
+       if (ret == -ENOENT) {
+               return 0;
+       } else if (ret) {
+               DRM_DEV_ERROR(dev,
+                             "failed to read speed-bin (%d). Some OPPs may not be supported by hardware",
+                             ret);
+               goto done;
+       }
+       speedbin = le16_to_cpu(speedbin);
+
+       supp_hw = fuse_to_supp_hw(dev, revn, speedbin);
+
+done:
+       opp_table = dev_pm_opp_set_supported_hw(dev, &supp_hw, 1);
+       if (IS_ERR(opp_table))
+               return PTR_ERR(opp_table);
+
+       a6xx_gpu->opp_table = opp_table;
+       return 0;
+}
+
 static const struct adreno_gpu_funcs funcs = {
        .base = {
                .get_param = adreno_get_param,
@@ -1285,7 +1452,7 @@ static const struct adreno_gpu_funcs funcs = {
                .gpu_state_get = a6xx_gpu_state_get,
                .gpu_state_put = a6xx_gpu_state_put,
 #endif
-               .create_address_space = adreno_iommu_create_address_space,
+               .create_address_space = a6xx_create_address_space,
                .create_private_address_space = a6xx_create_private_address_space,
                .get_rptr = a6xx_get_rptr,
        },
@@ -1325,6 +1492,12 @@ struct msm_gpu *a6xx_gpu_init(struct drm_device *dev)
 
        a6xx_llc_slices_init(pdev, a6xx_gpu);
 
+       ret = a6xx_set_supported_hw(&pdev->dev, a6xx_gpu, info->revn);
+       if (ret) {
+               a6xx_destroy(&(a6xx_gpu->base.base));
+               return ERR_PTR(ret);
+       }
+
        ret = adreno_gpu_init(dev, pdev, adreno_gpu, &funcs, 1);
        if (ret) {
                a6xx_destroy(&(a6xx_gpu->base.base));
index e793d32..ce0610c 100644 (file)
@@ -33,6 +33,8 @@ struct a6xx_gpu {
        void *llc_slice;
        void *htw_llc_slice;
        bool have_mmu500;
+
+       struct opp_table *opp_table;
 };
 
 #define to_a6xx_gpu(x) container_of(x, struct a6xx_gpu, base)
index 12e75ba..600d445 100644 (file)
@@ -134,6 +134,41 @@ static const struct adreno_info gpulist[] = {
                .inactive_period = DRM_MSM_INACTIVE_PERIOD,
                .init  = a4xx_gpu_init,
        }, {
+               .rev   = ADRENO_REV(5, 0, 8, ANY_ID),
+               .revn = 508,
+               .name = "A508",
+               .fw = {
+                       [ADRENO_FW_PM4] = "a530_pm4.fw",
+                       [ADRENO_FW_PFP] = "a530_pfp.fw",
+               },
+               .gmem = (SZ_128K + SZ_8K),
+               /*
+                * Increase inactive period to 250 to avoid bouncing
+                * the GDSC which appears to make it grumpy
+                */
+               .inactive_period = 250,
+               .quirks = ADRENO_QUIRK_LMLOADKILL_DISABLE,
+               .init = a5xx_gpu_init,
+               .zapfw = "a508_zap.mdt",
+       }, {
+               .rev   = ADRENO_REV(5, 0, 9, ANY_ID),
+               .revn = 509,
+               .name = "A509",
+               .fw = {
+                       [ADRENO_FW_PM4] = "a530_pm4.fw",
+                       [ADRENO_FW_PFP] = "a530_pfp.fw",
+               },
+               .gmem = (SZ_256K + SZ_16K),
+               /*
+                * Increase inactive period to 250 to avoid bouncing
+                * the GDSC which appears to make it grumpy
+                */
+               .inactive_period = 250,
+               .quirks = ADRENO_QUIRK_LMLOADKILL_DISABLE,
+               .init = a5xx_gpu_init,
+               /* Adreno 509 uses the same ZAP as 512 */
+               .zapfw = "a512_zap.mdt",
+       }, {
                .rev   = ADRENO_REV(5, 1, 0, ANY_ID),
                .revn = 510,
                .name = "A510",
@@ -149,6 +184,23 @@ static const struct adreno_info gpulist[] = {
                .inactive_period = 250,
                .init = a5xx_gpu_init,
        }, {
+               .rev   = ADRENO_REV(5, 1, 2, ANY_ID),
+               .revn = 512,
+               .name = "A512",
+               .fw = {
+                       [ADRENO_FW_PM4] = "a530_pm4.fw",
+                       [ADRENO_FW_PFP] = "a530_pfp.fw",
+               },
+               .gmem = (SZ_256K + SZ_16K),
+               /*
+                * Increase inactive period to 250 to avoid bouncing
+                * the GDSC which appears to make it grumpy
+                */
+               .inactive_period = 250,
+               .quirks = ADRENO_QUIRK_LMLOADKILL_DISABLE,
+               .init = a5xx_gpu_init,
+               .zapfw = "a512_zap.mdt",
+       }, {
                .rev = ADRENO_REV(5, 3, 0, 2),
                .revn = 530,
                .name = "A530",
@@ -168,7 +220,7 @@ static const struct adreno_info gpulist[] = {
                .init = a5xx_gpu_init,
                .zapfw = "a530_zap.mdt",
        }, {
-               .rev = ADRENO_REV(5, 4, 0, 2),
+               .rev = ADRENO_REV(5, 4, 0, ANY_ID),
                .revn = 540,
                .name = "A540",
                .fw = {
index f091756..0f184c3 100644 (file)
@@ -186,11 +186,18 @@ int adreno_zap_shader_load(struct msm_gpu *gpu, u32 pasid)
        return zap_shader_load_mdt(gpu, adreno_gpu->info->zapfw, pasid);
 }
 
+void adreno_set_llc_attributes(struct iommu_domain *iommu)
+{
+       struct io_pgtable_domain_attr pgtbl_cfg;
+
+       pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
+       iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
+}
+
 struct msm_gem_address_space *
 adreno_iommu_create_address_space(struct msm_gpu *gpu,
                struct platform_device *pdev)
 {
-       struct adreno_gpu *adreno_gpu = to_adreno_gpu(gpu);
        struct iommu_domain *iommu;
        struct msm_mmu *mmu;
        struct msm_gem_address_space *aspace;
@@ -200,20 +207,6 @@ adreno_iommu_create_address_space(struct msm_gpu *gpu,
        if (!iommu)
                return NULL;
 
-
-       if (adreno_is_a6xx(adreno_gpu)) {
-               struct a6xx_gpu *a6xx_gpu = to_a6xx_gpu(adreno_gpu);
-               struct io_pgtable_domain_attr pgtbl_cfg;
-               /*
-               * This allows GPU to set the bus attributes required to use system
-               * cache on behalf of the iommu page table walker.
-               */
-               if (!IS_ERR(a6xx_gpu->htw_llc_slice)) {
-                       pgtbl_cfg.quirks = IO_PGTABLE_QUIRK_ARM_OUTER_WBWA;
-                       iommu_domain_set_attr(iommu, DOMAIN_ATTR_IO_PGTABLE_CFG, &pgtbl_cfg);
-               }
-       }
-
        mmu = msm_iommu_new(&pdev->dev, iommu);
        if (IS_ERR(mmu)) {
                iommu_domain_free(iommu);
index b3d9a33..ccac275 100644 (file)
@@ -197,11 +197,26 @@ static inline int adreno_is_a430(struct adreno_gpu *gpu)
        return gpu->revn == 430;
 }
 
+static inline int adreno_is_a508(struct adreno_gpu *gpu)
+{
+       return gpu->revn == 508;
+}
+
+static inline int adreno_is_a509(struct adreno_gpu *gpu)
+{
+       return gpu->revn == 509;
+}
+
 static inline int adreno_is_a510(struct adreno_gpu *gpu)
 {
        return gpu->revn == 510;
 }
 
+static inline int adreno_is_a512(struct adreno_gpu *gpu)
+{
+       return gpu->revn == 512;
+}
+
 static inline int adreno_is_a530(struct adreno_gpu *gpu)
 {
        return gpu->revn == 530;
@@ -212,11 +227,6 @@ static inline int adreno_is_a540(struct adreno_gpu *gpu)
        return gpu->revn == 540;
 }
 
-static inline bool adreno_is_a6xx(struct adreno_gpu *gpu)
-{
-       return ((gpu->revn < 700 && gpu->revn > 599));
-}
-
 static inline int adreno_is_a618(struct adreno_gpu *gpu)
 {
        return gpu->revn == 618;
@@ -278,6 +288,8 @@ struct msm_gem_address_space *
 adreno_iommu_create_address_space(struct msm_gpu *gpu,
                struct platform_device *pdev);
 
+void adreno_set_llc_attributes(struct iommu_domain *iommu);
+
 /*
  * For a5xx and a6xx targets load the zap shader that is used to pull the GPU
  * out of secure mode
index 5a056c1..b2be39b 100644 (file)
@@ -4,8 +4,10 @@
  */
 
 #define pr_fmt(fmt)    "[drm:%s:%d] " fmt, __func__, __LINE__
+#include <linux/delay.h>
 #include "dpu_encoder_phys.h"
 #include "dpu_hw_interrupts.h"
+#include "dpu_hw_pingpong.h"
 #include "dpu_core_irq.h"
 #include "dpu_formats.h"
 #include "dpu_trace.h"
@@ -35,6 +37,8 @@
 
 #define DPU_ENC_WR_PTR_START_TIMEOUT_US 20000
 
+#define DPU_ENC_MAX_POLL_TIMEOUT_US    2000
+
 static bool dpu_encoder_phys_cmd_is_master(struct dpu_encoder_phys *phys_enc)
 {
        return (phys_enc->split_role != ENC_ROLE_SLAVE) ? true : false;
@@ -368,15 +372,12 @@ static void dpu_encoder_phys_cmd_tearcheck_config(
        tc_cfg.vsync_count = vsync_hz /
                                (mode->vtotal * drm_mode_vrefresh(mode));
 
-       /* enable external TE after kickoff to avoid premature autorefresh */
-       tc_cfg.hw_vsync_mode = 0;
-
        /*
-        * By setting sync_cfg_height to near max register value, we essentially
-        * disable dpu hw generated TE signal, since hw TE will arrive first.
-        * Only caveat is if due to error, we hit wrap-around.
+        * Set the sync_cfg_height to twice vtotal so that if we lose a
+        * TE event coming from the display TE pin we won't stall immediately
         */
-       tc_cfg.sync_cfg_height = 0xFFF0;
+       tc_cfg.hw_vsync_mode = 1;
+       tc_cfg.sync_cfg_height = mode->vtotal * 2;
        tc_cfg.vsync_init_val = mode->vdisplay;
        tc_cfg.sync_threshold_start = DEFAULT_TEARCHECK_SYNC_THRESH_START;
        tc_cfg.sync_threshold_continue = DEFAULT_TEARCHECK_SYNC_THRESH_CONTINUE;
@@ -580,6 +581,69 @@ static void dpu_encoder_phys_cmd_prepare_for_kickoff(
                        atomic_read(&phys_enc->pending_kickoff_cnt));
 }
 
+static bool dpu_encoder_phys_cmd_is_ongoing_pptx(
+               struct dpu_encoder_phys *phys_enc)
+{
+       struct dpu_hw_pp_vsync_info info;
+
+       if (!phys_enc)
+               return false;
+
+       phys_enc->hw_pp->ops.get_vsync_info(phys_enc->hw_pp, &info);
+       if (info.wr_ptr_line_count > 0 &&
+           info.wr_ptr_line_count < phys_enc->cached_mode.vdisplay)
+               return true;
+
+       return false;
+}
+
+static void dpu_encoder_phys_cmd_prepare_commit(
+               struct dpu_encoder_phys *phys_enc)
+{
+       struct dpu_encoder_phys_cmd *cmd_enc =
+               to_dpu_encoder_phys_cmd(phys_enc);
+       int trial = 0;
+
+       if (!phys_enc)
+               return;
+       if (!phys_enc->hw_pp)
+               return;
+       if (!dpu_encoder_phys_cmd_is_master(phys_enc))
+               return;
+
+       /* If autorefresh is already disabled, we have nothing to do */
+       if (!phys_enc->hw_pp->ops.get_autorefresh(phys_enc->hw_pp, NULL))
+               return;
+
+       /*
+        * If autorefresh is enabled, disable it and make sure it is safe to
+        * proceed with current frame commit/push. Sequence fallowed is,
+        * 1. Disable TE
+        * 2. Disable autorefresh config
+        * 4. Poll for frame transfer ongoing to be false
+        * 5. Enable TE back
+        */
+       _dpu_encoder_phys_cmd_connect_te(phys_enc, false);
+       phys_enc->hw_pp->ops.setup_autorefresh(phys_enc->hw_pp, 0, false);
+
+       do {
+               udelay(DPU_ENC_MAX_POLL_TIMEOUT_US);
+               if ((trial * DPU_ENC_MAX_POLL_TIMEOUT_US)
+                               > (KICKOFF_TIMEOUT_MS * USEC_PER_MSEC)) {
+                       DPU_ERROR_CMDENC(cmd_enc,
+                                       "disable autorefresh failed\n");
+                       break;
+               }
+
+               trial++;
+       } while (dpu_encoder_phys_cmd_is_ongoing_pptx(phys_enc));
+
+       _dpu_encoder_phys_cmd_connect_te(phys_enc, true);
+
+       DPU_DEBUG_CMDENC(to_dpu_encoder_phys_cmd(phys_enc),
+                        "disabled autorefresh\n");
+}
+
 static int _dpu_encoder_phys_cmd_wait_for_ctl_start(
                struct dpu_encoder_phys *phys_enc)
 {
@@ -621,20 +685,15 @@ static int dpu_encoder_phys_cmd_wait_for_tx_complete(
 static int dpu_encoder_phys_cmd_wait_for_commit_done(
                struct dpu_encoder_phys *phys_enc)
 {
-       int rc = 0;
        struct dpu_encoder_phys_cmd *cmd_enc;
 
        cmd_enc = to_dpu_encoder_phys_cmd(phys_enc);
 
        /* only required for master controller */
-       if (dpu_encoder_phys_cmd_is_master(phys_enc))
-               rc = _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc);
-
-       /* required for both controllers */
-       if (!rc && cmd_enc->serialize_wait4pp)
-               dpu_encoder_phys_cmd_prepare_for_kickoff(phys_enc);
+       if (!dpu_encoder_phys_cmd_is_master(phys_enc))
+               return 0;
 
-       return rc;
+       return _dpu_encoder_phys_cmd_wait_for_ctl_start(phys_enc);
 }
 
 static int dpu_encoder_phys_cmd_wait_for_vblank(
@@ -681,6 +740,7 @@ static void dpu_encoder_phys_cmd_trigger_start(
 static void dpu_encoder_phys_cmd_init_ops(
                struct dpu_encoder_phys_ops *ops)
 {
+       ops->prepare_commit = dpu_encoder_phys_cmd_prepare_commit;
        ops->is_master = dpu_encoder_phys_cmd_is_master;
        ops->mode_set = dpu_encoder_phys_cmd_mode_set;
        ops->mode_fixup = dpu_encoder_phys_cmd_mode_fixup;
index 90393fe..189f353 100644 (file)
 
 #define VIG_MASK \
        (BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_QOS) |\
-       BIT(DPU_SSPP_CSC_10BIT) | BIT(DPU_SSPP_CDP) | BIT(DPU_SSPP_QOS_8LVL) |\
+       BIT(DPU_SSPP_CSC_10BIT) | BIT(DPU_SSPP_CDP) |\
        BIT(DPU_SSPP_TS_PREFILL) | BIT(DPU_SSPP_EXCL_RECT))
 
 #define VIG_SDM845_MASK \
-       (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3))
+       (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED3))
 
 #define VIG_SC7180_MASK \
-       (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED4))
+       (VIG_MASK | BIT(DPU_SSPP_QOS_8LVL) | BIT(DPU_SSPP_SCALER_QSEED4))
+
+#define VIG_SM8250_MASK \
+       (VIG_MASK | BIT(DPU_SSPP_SCALER_QSEED3LITE))
 
 #define DMA_SDM845_MASK \
        (BIT(DPU_SSPP_SRC) | BIT(DPU_SSPP_QOS) | BIT(DPU_SSPP_QOS_8LVL) |\
@@ -185,7 +188,7 @@ static const struct dpu_caps sm8150_dpu_caps = {
 static const struct dpu_caps sm8250_dpu_caps = {
        .max_mixer_width = DEFAULT_DPU_OUTPUT_LINE_WIDTH,
        .max_mixer_blendstages = 0xb,
-       .qseed_type = DPU_SSPP_SCALER_QSEED3, /* TODO: qseed3 lite */
+       .qseed_type = DPU_SSPP_SCALER_QSEED3LITE,
        .smart_dma_rev = DPU_SSPP_SMART_DMA_V2, /* TODO: v2.5 */
        .ubwc_version = DPU_HW_UBWC_VER_40,
        .has_src_split = true,
@@ -444,6 +447,34 @@ static const struct dpu_sspp_cfg sc7180_sspp[] = {
                sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR1),
 };
 
+static const struct dpu_sspp_sub_blks sm8250_vig_sblk_0 =
+                               _VIG_SBLK("0", 5, DPU_SSPP_SCALER_QSEED3LITE);
+static const struct dpu_sspp_sub_blks sm8250_vig_sblk_1 =
+                               _VIG_SBLK("1", 6, DPU_SSPP_SCALER_QSEED3LITE);
+static const struct dpu_sspp_sub_blks sm8250_vig_sblk_2 =
+                               _VIG_SBLK("2", 7, DPU_SSPP_SCALER_QSEED3LITE);
+static const struct dpu_sspp_sub_blks sm8250_vig_sblk_3 =
+                               _VIG_SBLK("3", 8, DPU_SSPP_SCALER_QSEED3LITE);
+
+static const struct dpu_sspp_cfg sm8250_sspp[] = {
+       SSPP_BLK("sspp_0", SSPP_VIG0, 0x4000, VIG_SM8250_MASK,
+               sm8250_vig_sblk_0, 0,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG0),
+       SSPP_BLK("sspp_1", SSPP_VIG1, 0x6000, VIG_SM8250_MASK,
+               sm8250_vig_sblk_1, 4,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG1),
+       SSPP_BLK("sspp_2", SSPP_VIG2, 0x8000, VIG_SM8250_MASK,
+               sm8250_vig_sblk_2, 8, SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG2),
+       SSPP_BLK("sspp_3", SSPP_VIG3, 0xa000, VIG_SM8250_MASK,
+               sm8250_vig_sblk_3, 12,  SSPP_TYPE_VIG, DPU_CLK_CTRL_VIG3),
+       SSPP_BLK("sspp_8", SSPP_DMA0, 0x24000,  DMA_SDM845_MASK,
+               sdm845_dma_sblk_0, 1, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA0),
+       SSPP_BLK("sspp_9", SSPP_DMA1, 0x26000,  DMA_SDM845_MASK,
+               sdm845_dma_sblk_1, 5, SSPP_TYPE_DMA, DPU_CLK_CTRL_DMA1),
+       SSPP_BLK("sspp_10", SSPP_DMA2, 0x28000,  DMA_CURSOR_SDM845_MASK,
+               sdm845_dma_sblk_2, 9, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR0),
+       SSPP_BLK("sspp_11", SSPP_DMA3, 0x2a000,  DMA_CURSOR_SDM845_MASK,
+               sdm845_dma_sblk_3, 13, SSPP_TYPE_DMA, DPU_CLK_CTRL_CURSOR1),
+};
+
 /*************************************************************
  * MIXER sub blocks config
  *************************************************************/
@@ -532,23 +563,28 @@ static const struct dpu_dspp_sub_blks sm8150_dspp_sblk = {
                .len = 0x90, .version = 0x40000},
 };
 
-#define DSPP_BLK(_name, _id, _base, _sblk) \
+#define DSPP_BLK(_name, _id, _base, _mask, _sblk) \
                {\
                .name = _name, .id = _id, \
                .base = _base, .len = 0x1800, \
-               .features = DSPP_SC7180_MASK, \
+               .features = _mask, \
                .sblk = _sblk \
                }
 
 static const struct dpu_dspp_cfg sc7180_dspp[] = {
-       DSPP_BLK("dspp_0", DSPP_0, 0x54000, &sc7180_dspp_sblk),
+       DSPP_BLK("dspp_0", DSPP_0, 0x54000, DSPP_SC7180_MASK,
+                &sc7180_dspp_sblk),
 };
 
 static const struct dpu_dspp_cfg sm8150_dspp[] = {
-       DSPP_BLK("dspp_0", DSPP_0, 0x54000, &sm8150_dspp_sblk),
-       DSPP_BLK("dspp_1", DSPP_1, 0x56000, &sm8150_dspp_sblk),
-       DSPP_BLK("dspp_2", DSPP_2, 0x58000, &sm8150_dspp_sblk),
-       DSPP_BLK("dspp_3", DSPP_3, 0x5a000, &sm8150_dspp_sblk),
+       DSPP_BLK("dspp_0", DSPP_0, 0x54000, DSPP_SC7180_MASK,
+                &sm8150_dspp_sblk),
+       DSPP_BLK("dspp_1", DSPP_1, 0x56000, DSPP_SC7180_MASK,
+                &sm8150_dspp_sblk),
+       DSPP_BLK("dspp_2", DSPP_2, 0x58000, DSPP_SC7180_MASK,
+                &sm8150_dspp_sblk),
+       DSPP_BLK("dspp_3", DSPP_3, 0x5a000, DSPP_SC7180_MASK,
+                &sm8150_dspp_sblk),
 };
 
 /*************************************************************
@@ -624,33 +660,33 @@ static const struct dpu_merge_3d_cfg sm8150_merge_3d[] = {
 /*************************************************************
  * INTF sub blocks config
  *************************************************************/
-#define INTF_BLK(_name, _id, _base, _type, _ctrl_id, _features) \
+#define INTF_BLK(_name, _id, _base, _type, _ctrl_id, _progfetch, _features) \
        {\
        .name = _name, .id = _id, \
        .base = _base, .len = 0x280, \
        .features = _features, \
        .type = _type, \
        .controller_id = _ctrl_id, \
-       .prog_fetch_lines_worst_case = 24 \
+       .prog_fetch_lines_worst_case = _progfetch \
        }
 
 static const struct dpu_intf_cfg sdm845_intf[] = {
-       INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, INTF_SDM845_MASK),
-       INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, INTF_SDM845_MASK),
-       INTF_BLK("intf_2", INTF_2, 0x6B000, INTF_DSI, 1, INTF_SDM845_MASK),
-       INTF_BLK("intf_3", INTF_3, 0x6B800, INTF_DP, 1, INTF_SDM845_MASK),
+       INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, 24, INTF_SDM845_MASK),
+       INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, 24, INTF_SDM845_MASK),
+       INTF_BLK("intf_2", INTF_2, 0x6B000, INTF_DSI, 1, 24, INTF_SDM845_MASK),
+       INTF_BLK("intf_3", INTF_3, 0x6B800, INTF_DP, 1, 24, INTF_SDM845_MASK),
 };
 
 static const struct dpu_intf_cfg sc7180_intf[] = {
-       INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, INTF_SC7180_MASK),
-       INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, INTF_SC7180_MASK),
+       INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, 24, INTF_SC7180_MASK),
+       INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, 24, INTF_SC7180_MASK),
 };
 
 static const struct dpu_intf_cfg sm8150_intf[] = {
-       INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, INTF_SC7180_MASK),
-       INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, INTF_SC7180_MASK),
-       INTF_BLK("intf_2", INTF_2, 0x6B000, INTF_DSI, 1, INTF_SC7180_MASK),
-       INTF_BLK("intf_3", INTF_3, 0x6B800, INTF_DP, 1, INTF_SC7180_MASK),
+       INTF_BLK("intf_0", INTF_0, 0x6A000, INTF_DP, 0, 24, INTF_SC7180_MASK),
+       INTF_BLK("intf_1", INTF_1, 0x6A800, INTF_DSI, 0, 24, INTF_SC7180_MASK),
+       INTF_BLK("intf_2", INTF_2, 0x6B000, INTF_DSI, 1, 24, INTF_SC7180_MASK),
+       INTF_BLK("intf_3", INTF_3, 0x6B800, INTF_DP, 1, 24, INTF_SC7180_MASK),
 };
 
 /*************************************************************
@@ -969,9 +1005,8 @@ static void sm8250_cfg_init(struct dpu_mdss_cfg *dpu_cfg)
                .mdp = sm8250_mdp,
                .ctl_count = ARRAY_SIZE(sm8150_ctl),
                .ctl = sm8150_ctl,
-               /* TODO: sspp qseed version differs from 845 */
-               .sspp_count = ARRAY_SIZE(sdm845_sspp),
-               .sspp = sdm845_sspp,
+               .sspp_count = ARRAY_SIZE(sm8250_sspp),
+               .sspp = sm8250_sspp,
                .mixer_count = ARRAY_SIZE(sm8150_lm),
                .mixer = sm8150_lm,
                .dspp_count = ARRAY_SIZE(sm8150_dspp),
index eaef99d..ea4647d 100644 (file)
@@ -95,6 +95,7 @@ enum {
  * @DPU_SSPP_SRC             Src and fetch part of the pipes,
  * @DPU_SSPP_SCALER_QSEED2,  QSEED2 algorithm support
  * @DPU_SSPP_SCALER_QSEED3,  QSEED3 alogorithm support
+ * @DPU_SSPP_SCALER_QSEED3LITE,  QSEED3 Lite alogorithm support
  * @DPU_SSPP_SCALER_QSEED4,  QSEED4 algorithm support
  * @DPU_SSPP_SCALER_RGB,     RGB Scaler, supported by RGB pipes
  * @DPU_SSPP_CSC,            Support of Color space converion
@@ -114,6 +115,7 @@ enum {
        DPU_SSPP_SRC = 0x1,
        DPU_SSPP_SCALER_QSEED2,
        DPU_SSPP_SCALER_QSEED3,
+       DPU_SSPP_SCALER_QSEED3LITE,
        DPU_SSPP_SCALER_QSEED4,
        DPU_SSPP_SCALER_RGB,
        DPU_SSPP_CSC,
index 8981cfa..92e6f1b 100644 (file)
@@ -496,7 +496,9 @@ static void dpu_hw_ctl_intf_cfg_v1(struct dpu_hw_ctl *ctx,
 
        DPU_REG_WRITE(c, CTL_TOP, mode_sel);
        DPU_REG_WRITE(c, CTL_INTF_ACTIVE, intf_active);
-       DPU_REG_WRITE(c, CTL_MERGE_3D_ACTIVE, BIT(cfg->merge_3d - MERGE_3D_0));
+       if (cfg->merge_3d)
+               DPU_REG_WRITE(c, CTL_MERGE_3D_ACTIVE,
+                             BIT(cfg->merge_3d - MERGE_3D_0));
 }
 
 static void dpu_hw_ctl_intf_cfg(struct dpu_hw_ctl *ctx,
index bea4ab5..245a7a6 100644 (file)
@@ -23,6 +23,7 @@
 #define PP_WR_PTR_IRQ                   0x024
 #define PP_OUT_LINE_COUNT               0x028
 #define PP_LINE_COUNT                   0x02C
+#define PP_AUTOREFRESH_CONFIG           0x030
 
 #define PP_FBC_MODE                     0x034
 #define PP_FBC_BUDGET_CTL               0x038
@@ -120,6 +121,29 @@ static int dpu_hw_pp_setup_te_config(struct dpu_hw_pingpong *pp,
        return 0;
 }
 
+static void dpu_hw_pp_setup_autorefresh_config(struct dpu_hw_pingpong *pp,
+                                              u32 frame_count, bool enable)
+{
+       DPU_REG_WRITE(&pp->hw, PP_AUTOREFRESH_CONFIG,
+                     enable ? (BIT(31) | frame_count) : 0);
+}
+
+/*
+ * dpu_hw_pp_get_autorefresh_config - Get autorefresh config from HW
+ * @pp:          DPU pingpong structure
+ * @frame_count: Used to return the current frame count from hw
+ *
+ * Returns: True if autorefresh enabled, false if disabled.
+ */
+static bool dpu_hw_pp_get_autorefresh_config(struct dpu_hw_pingpong *pp,
+                                            u32 *frame_count)
+{
+       u32 val = DPU_REG_READ(&pp->hw, PP_AUTOREFRESH_CONFIG);
+       if (frame_count != NULL)
+               *frame_count = val & 0xffff;
+       return !!((val & BIT(31)) >> 31);
+}
+
 static int dpu_hw_pp_poll_timeout_wr_ptr(struct dpu_hw_pingpong *pp,
                u32 timeout_us)
 {
@@ -228,6 +252,8 @@ static void _setup_pingpong_ops(struct dpu_hw_pingpong *c,
        c->ops.enable_tearcheck = dpu_hw_pp_enable_te;
        c->ops.connect_external_te = dpu_hw_pp_connect_external_te;
        c->ops.get_vsync_info = dpu_hw_pp_get_vsync_info;
+       c->ops.setup_autorefresh = dpu_hw_pp_setup_autorefresh_config;
+       c->ops.get_autorefresh = dpu_hw_pp_get_autorefresh_config;
        c->ops.poll_timeout_wr_ptr = dpu_hw_pp_poll_timeout_wr_ptr;
        c->ops.get_line_count = dpu_hw_pp_get_line_count;
 
index 6902b9b..845b9ce 100644 (file)
@@ -63,6 +63,8 @@ struct dpu_hw_dither_cfg {
  *  @setup_tearcheck : program tear check values
  *  @enable_tearcheck : enables tear check
  *  @get_vsync_info : retries timing info of the panel
+ *  @setup_autorefresh : configure and enable the autorefresh config
+ *  @get_autorefresh : retrieve autorefresh config from hardware
  *  @setup_dither : function to program the dither hw block
  *  @get_line_count: obtain current vertical line counter
  */
@@ -95,6 +97,18 @@ struct dpu_hw_pingpong_ops {
                        struct dpu_hw_pp_vsync_info  *info);
 
        /**
+        * configure and enable the autorefresh config
+        */
+       void (*setup_autorefresh)(struct dpu_hw_pingpong *pp,
+                                 u32 frame_count, bool enable);
+
+       /**
+        * retrieve autorefresh config from hardware
+        */
+       bool (*get_autorefresh)(struct dpu_hw_pingpong *pp,
+                               u32 *frame_count);
+
+       /**
         * poll until write pointer transmission starts
         * @Return: 0 on success, -ETIMEDOUT on timeout
         */
index 2c2ca53..34d81aa 100644 (file)
@@ -673,6 +673,7 @@ static void _setup_layer_ops(struct dpu_hw_pipe *c,
                c->ops.setup_multirect = dpu_hw_sspp_setup_multirect;
 
        if (test_bit(DPU_SSPP_SCALER_QSEED3, &features) ||
+                       test_bit(DPU_SSPP_SCALER_QSEED3LITE, &features) ||
                        test_bit(DPU_SSPP_SCALER_QSEED4, &features)) {
                c->ops.setup_scaler = _dpu_hw_sspp_setup_scaler3;
                c->ops.get_scaler_ver = _dpu_hw_sspp_get_scaler3_ver;
index 85b018a..fdfd4b4 100644 (file)
@@ -28,6 +28,7 @@ struct dpu_hw_pipe;
 #define DPU_SSPP_SCALER ((1UL << DPU_SSPP_SCALER_RGB) | \
        (1UL << DPU_SSPP_SCALER_QSEED2) | \
         (1UL << DPU_SSPP_SCALER_QSEED3) | \
+        (1UL << DPU_SSPP_SCALER_QSEED3LITE) | \
          (1UL << DPU_SSPP_SCALER_QSEED4))
 
 /**
index 84e9875..f94584c 100644 (file)
@@ -59,6 +59,19 @@ static u32 dpu_hw_util_log_mask = DPU_DBG_MASK_NONE;
 #define QSEED3_SEP_LUT_SIZE \
        (QSEED3_LUT_SIZE * QSEED3_SEPARABLE_LUTS * sizeof(u32))
 
+/* DPU_SCALER_QSEED3LITE */
+#define QSEED3LITE_COEF_LUT_Y_SEP_BIT         4
+#define QSEED3LITE_COEF_LUT_UV_SEP_BIT        5
+#define QSEED3LITE_COEF_LUT_CTRL              0x4C
+#define QSEED3LITE_COEF_LUT_SWAP_BIT          0
+#define QSEED3LITE_DIR_FILTER_WEIGHT          0x60
+#define QSEED3LITE_FILTERS                 2
+#define QSEED3LITE_SEPARABLE_LUTS             10
+#define QSEED3LITE_LUT_SIZE                   33
+#define QSEED3LITE_SEP_LUT_SIZE \
+               (QSEED3LITE_LUT_SIZE * QSEED3LITE_SEPARABLE_LUTS * sizeof(u32))
+
+
 void dpu_reg_write(struct dpu_hw_blk_reg_map *c,
                u32 reg_off,
                u32 val,
@@ -156,6 +169,57 @@ static void _dpu_hw_setup_scaler3_lut(struct dpu_hw_blk_reg_map *c,
 
 }
 
+static void _dpu_hw_setup_scaler3lite_lut(struct dpu_hw_blk_reg_map *c,
+               struct dpu_hw_scaler3_cfg *scaler3_cfg, u32 offset)
+{
+       int j, filter;
+       int config_lut = 0x0;
+       unsigned long lut_flags;
+       u32 lut_addr, lut_offset;
+       u32 *lut[QSEED3LITE_FILTERS] = {NULL, NULL};
+       static const uint32_t off_tbl[QSEED3_FILTERS] = { 0x000, 0x200 };
+
+       DPU_REG_WRITE(c, QSEED3LITE_DIR_FILTER_WEIGHT + offset, scaler3_cfg->dir_weight);
+
+       if (!scaler3_cfg->sep_lut)
+               return;
+
+       lut_flags = (unsigned long) scaler3_cfg->lut_flag;
+       if (test_bit(QSEED3_COEF_LUT_Y_SEP_BIT, &lut_flags) &&
+               (scaler3_cfg->y_rgb_sep_lut_idx < QSEED3LITE_SEPARABLE_LUTS) &&
+               (scaler3_cfg->sep_len == QSEED3LITE_SEP_LUT_SIZE)) {
+               lut[0] = scaler3_cfg->sep_lut +
+                       scaler3_cfg->y_rgb_sep_lut_idx * QSEED3LITE_LUT_SIZE;
+               config_lut = 1;
+       }
+       if (test_bit(QSEED3_COEF_LUT_UV_SEP_BIT, &lut_flags) &&
+               (scaler3_cfg->uv_sep_lut_idx < QSEED3LITE_SEPARABLE_LUTS) &&
+               (scaler3_cfg->sep_len == QSEED3LITE_SEP_LUT_SIZE)) {
+               lut[1] = scaler3_cfg->sep_lut +
+                       scaler3_cfg->uv_sep_lut_idx * QSEED3LITE_LUT_SIZE;
+               config_lut = 1;
+       }
+
+       if (config_lut) {
+               for (filter = 0; filter < QSEED3LITE_FILTERS; filter++) {
+                       if (!lut[filter])
+                               continue;
+                       lut_offset = 0;
+                       lut_addr = QSEED3_COEF_LUT + offset + off_tbl[filter];
+                       for (j = 0; j < QSEED3LITE_LUT_SIZE; j++) {
+                               DPU_REG_WRITE(c,
+                                       lut_addr,
+                                       (lut[filter])[lut_offset++]);
+                               lut_addr += 4;
+                       }
+               }
+       }
+
+       if (test_bit(QSEED3_COEF_LUT_SWAP_BIT, &lut_flags))
+               DPU_REG_WRITE(c, QSEED3_COEF_LUT_CTRL + offset, BIT(0));
+
+}
+
 static void _dpu_hw_setup_scaler3_de(struct dpu_hw_blk_reg_map *c,
                struct dpu_hw_scaler3_de_cfg *de_cfg, u32 offset)
 {
@@ -242,9 +306,12 @@ void dpu_hw_setup_scaler3(struct dpu_hw_blk_reg_map *c,
                op_mode |= BIT(8);
        }
 
-       if (scaler3_cfg->lut_flag)
-               _dpu_hw_setup_scaler3_lut(c, scaler3_cfg,
-                                                               scaler_offset);
+       if (scaler3_cfg->lut_flag) {
+               if (scaler_version < 0x2004)
+                       _dpu_hw_setup_scaler3_lut(c, scaler3_cfg, scaler_offset);
+               else
+                       _dpu_hw_setup_scaler3lite_lut(c, scaler3_cfg, scaler_offset);
+       }
 
        if (scaler_version == 0x1002) {
                phase_init =
index 234eb7d..ff3cffd 100644 (file)
@@ -97,6 +97,7 @@ struct dpu_hw_scaler3_de_cfg {
  * @ cir_lut:      pointer to circular filter LUT
  * @ sep_lut:      pointer to separable filter LUT
  * @ de: detail enhancer configuration
+ * @ dir_weight:   Directional weight
  */
 struct dpu_hw_scaler3_cfg {
        u32 enable;
@@ -137,6 +138,8 @@ struct dpu_hw_scaler3_cfg {
         * Detail enhancer settings
         */
        struct dpu_hw_scaler3_de_cfg de;
+
+       u32 dir_weight;
 };
 
 /**
index cf867f3..b757054 100644 (file)
@@ -30,7 +30,7 @@
 #define VBIF_XIN_HALT_CTRL0            0x0200
 #define VBIF_XIN_HALT_CTRL1            0x0204
 #define VBIF_XINL_QOS_RP_REMAP_000     0x0550
-#define VBIF_XINL_QOS_LVL_REMAP_000    0x0590
+#define VBIF_XINL_QOS_LVL_REMAP_000(v) (v < DPU_HW_VER_400 ? 0x570 : 0x0590)
 
 static void dpu_hw_clear_errors(struct dpu_hw_vbif *vbif,
                u32 *pnd_errors, u32 *src_errors)
@@ -156,18 +156,19 @@ static void dpu_hw_set_qos_remap(struct dpu_hw_vbif *vbif,
                u32 xin_id, u32 level, u32 remap_level)
 {
        struct dpu_hw_blk_reg_map *c;
-       u32 reg_val, reg_val_lvl, mask, reg_high, reg_shift;
+       u32 reg_lvl, reg_val, reg_val_lvl, mask, reg_high, reg_shift;
 
        if (!vbif)
                return;
 
        c = &vbif->hw;
 
+       reg_lvl = VBIF_XINL_QOS_LVL_REMAP_000(c->hwversion);
        reg_high = ((xin_id & 0x8) >> 3) * 4 + (level * 8);
        reg_shift = (xin_id & 0x7) * 4;
 
        reg_val = DPU_REG_READ(c, VBIF_XINL_QOS_RP_REMAP_000 + reg_high);
-       reg_val_lvl = DPU_REG_READ(c, VBIF_XINL_QOS_LVL_REMAP_000 + reg_high);
+       reg_val_lvl = DPU_REG_READ(c, reg_lvl + reg_high);
 
        mask = 0x7 << reg_shift;
 
@@ -178,7 +179,7 @@ static void dpu_hw_set_qos_remap(struct dpu_hw_vbif *vbif,
        reg_val_lvl |= (remap_level << reg_shift) & mask;
 
        DPU_REG_WRITE(c, VBIF_XINL_QOS_RP_REMAP_000 + reg_high, reg_val);
-       DPU_REG_WRITE(c, VBIF_XINL_QOS_LVL_REMAP_000 + reg_high, reg_val_lvl);
+       DPU_REG_WRITE(c, reg_lvl + reg_high, reg_val_lvl);
 }
 
 static void dpu_hw_set_write_gather_en(struct dpu_hw_vbif *vbif, u32 xin_id)
index 374b0e8..85f2c35 100644 (file)
@@ -43,6 +43,8 @@
 #define DPU_DEBUGFS_DIR "msm_dpu"
 #define DPU_DEBUGFS_HWMASKNAME "hw_log_mask"
 
+#define MIN_IB_BW      400000000ULL /* Min ib vote 400MB */
+
 static int dpu_kms_hw_init(struct msm_kms *kms);
 static void _dpu_kms_mmu_destroy(struct dpu_kms *dpu_kms);
 
@@ -749,7 +751,7 @@ static void _dpu_kms_set_encoder_mode(struct msm_kms *kms,
        case DRM_MODE_ENCODER_TMDS:
                info.num_of_h_tiles = 1;
                break;
-       };
+       }
 
        rc = dpu_encoder_setup(encoder->dev, encoder, &info);
        if (rc)
@@ -931,6 +933,9 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
                DPU_DEBUG("REG_DMA is not defined");
        }
 
+       if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss"))
+               dpu_kms_parse_data_bus_icc_path(dpu_kms);
+
        pm_runtime_get_sync(&dpu_kms->pdev->dev);
 
        dpu_kms->core_rev = readl_relaxed(dpu_kms->mmio + 0x0);
@@ -1032,9 +1037,6 @@ static int dpu_kms_hw_init(struct msm_kms *kms)
 
        dpu_vbif_init_memtypes(dpu_kms);
 
-       if (of_device_is_compatible(dev->dev->of_node, "qcom,sc7180-mdss"))
-               dpu_kms_parse_data_bus_icc_path(dpu_kms);
-
        pm_runtime_put_sync(&dpu_kms->pdev->dev);
 
        return 0;
@@ -1191,10 +1193,10 @@ static int __maybe_unused dpu_runtime_resume(struct device *dev)
 
        ddev = dpu_kms->dev;
 
+       WARN_ON(!(dpu_kms->num_paths));
        /* Min vote of BW is required before turning on AXI clk */
        for (i = 0; i < dpu_kms->num_paths; i++)
-               icc_set_bw(dpu_kms->path[i], 0,
-                       dpu_kms->catalog->perf.min_dram_ib);
+               icc_set_bw(dpu_kms->path[i], 0, Bps_to_icc(MIN_IB_BW));
 
        rc = msm_dss_enable_clk(mp->clk_config, mp->num_clk, true);
        if (rc) {
index bc0231a..f898a8f 100644 (file)
@@ -1465,6 +1465,7 @@ static int _dpu_plane_init_debugfs(struct drm_plane *plane)
                        pdpu->debugfs_root, &pdpu->debugfs_src);
 
        if (cfg->features & BIT(DPU_SSPP_SCALER_QSEED3) ||
+                       cfg->features & BIT(DPU_SSPP_SCALER_QSEED3LITE) ||
                        cfg->features & BIT(DPU_SSPP_SCALER_QSEED2) ||
                        cfg->features & BIT(DPU_SSPP_SCALER_QSEED4)) {
                dpu_debugfs_setup_regset32(&pdpu->debugfs_scaler,
index df10c1a..94ce62a 100644 (file)
@@ -177,7 +177,7 @@ static const struct mdp5_cfg_hw msm8x74v2_config = {
                        [3] = INTF_HDMI,
                },
        },
-       .max_clk = 200000000,
+       .max_clk = 320000000,
 };
 
 static const struct mdp5_cfg_hw apq8084_config = {
index 0c8f9f8..f5d71b2 100644 (file)
@@ -1180,7 +1180,7 @@ static void mdp5_crtc_pp_done_irq(struct mdp_irq *irq, uint32_t irqstatus)
        struct mdp5_crtc *mdp5_crtc = container_of(irq, struct mdp5_crtc,
                                                                pp_done);
 
-       complete(&mdp5_crtc->pp_completion);
+       complete_all(&mdp5_crtc->pp_completion);
 }
 
 static void mdp5_crtc_wait_for_pp_done(struct drm_crtc *crtc)
index 19b35ae..7c22bfe 100644 (file)
@@ -32,6 +32,8 @@ struct dp_aux_private {
        struct drm_dp_aux dp_aux;
 };
 
+#define MAX_AUX_RETRIES                        5
+
 static const char *dp_aux_get_error(u32 aux_error)
 {
        switch (aux_error) {
@@ -336,7 +338,6 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
        ssize_t ret;
        int const aux_cmd_native_max = 16;
        int const aux_cmd_i2c_max = 128;
-       int const retry_count = 5;
        struct dp_aux_private *aux = container_of(dp_aux,
                struct dp_aux_private, dp_aux);
 
@@ -380,9 +381,8 @@ static ssize_t dp_aux_transfer(struct drm_dp_aux *dp_aux,
        if (ret < 0) {
                if (aux->native) {
                        aux->retry_cnt++;
-                       if (!(aux->retry_cnt % retry_count))
+                       if (!(aux->retry_cnt % MAX_AUX_RETRIES))
                                dp_catalog_aux_update_cfg(aux->catalog);
-                       dp_catalog_aux_reset(aux->catalog);
                }
                usleep_range(400, 500); /* at least 400us to next try */
                goto unlock_exit;
index 44f0c57..b1a9b1b 100644 (file)
@@ -190,6 +190,18 @@ int dp_catalog_aux_clear_hw_interrupts(struct dp_catalog *dp_catalog)
        return 0;
 }
 
+/**
+ * dp_catalog_aux_reset() - reset AUX controller
+ *
+ * @aux: DP catalog structure
+ *
+ * return: void
+ *
+ * This function reset AUX controller
+ *
+ * NOTE: reset AUX controller will also clear any pending HPD related interrupts
+ * 
+ */
 void dp_catalog_aux_reset(struct dp_catalog *dp_catalog)
 {
        u32 aux_ctrl;
@@ -483,6 +495,18 @@ int dp_catalog_ctrl_set_pattern(struct dp_catalog *dp_catalog,
        return 0;
 }
 
+/**
+ * dp_catalog_ctrl_reset() - reset DP controller
+ *
+ * @dp_catalog: DP catalog structure
+ *
+ * return: void
+ *
+ * This function reset the DP controller
+ *
+ * NOTE: reset DP controller will also clear any pending HPD related interrupts
+ * 
+ */
 void dp_catalog_ctrl_reset(struct dp_catalog *dp_catalog)
 {
        u32 sw_reset;
index 36b39c3..1390f35 100644 (file)
@@ -631,7 +631,7 @@ static void _dp_ctrl_calc_tu(struct dp_tu_calc_input *in,
 
        tu = kzalloc(sizeof(*tu), GFP_KERNEL);
        if (!tu)
-               return
+               return;
 
        dp_panel_update_tu_timings(in, tu);
 
@@ -1158,7 +1158,7 @@ static int dp_ctrl_link_rate_down_shift(struct dp_ctrl_private *ctrl)
        default:
                ret = -EINVAL;
                break;
-       };
+       }
 
        if (!ret)
                DRM_DEBUG_DP("new rate=0x%x\n", ctrl->link->link_params.rate);
@@ -1296,7 +1296,6 @@ static int dp_ctrl_setup_main_link(struct dp_ctrl_private *ctrl,
         * transitioned to PUSH_IDLE. In order to start transmitting
         * a link training pattern, we have to first do soft reset.
         */
-       dp_catalog_ctrl_reset(ctrl->catalog);
 
        ret = dp_ctrl_link_train(ctrl, cr, training_step);
 
@@ -1365,7 +1364,7 @@ static int dp_ctrl_enable_stream_clocks(struct dp_ctrl_private *ctrl)
        return ret;
 }
 
-int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip)
+int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset)
 {
        struct dp_ctrl_private *ctrl;
        struct dp_io *dp_io;
@@ -1382,6 +1381,9 @@ int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip)
 
        ctrl->dp_ctrl.orientation = flip;
 
+       if (reset)
+               dp_catalog_ctrl_reset(ctrl->catalog);
+
        dp_catalog_ctrl_phy_reset(ctrl->catalog);
        phy_init(phy);
        dp_catalog_ctrl_enable_irq(ctrl->catalog, true);
@@ -1496,7 +1498,6 @@ static int dp_ctrl_link_maintenance(struct dp_ctrl_private *ctrl)
        int training_step = DP_TRAINING_NONE;
 
        dp_ctrl_push_idle(&ctrl->dp_ctrl);
-       dp_catalog_ctrl_reset(ctrl->catalog);
 
        ctrl->dp_ctrl.pixel_rate = ctrl->panel->dp_mode.drm_mode.clock;
 
@@ -1785,14 +1786,14 @@ int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl)
         * Set up transfer unit values and set controller state to send
         * video.
         */
+       reinit_completion(&ctrl->video_comp);
+
        dp_ctrl_configure_source_params(ctrl);
 
        dp_catalog_ctrl_config_msa(ctrl->catalog,
                ctrl->link->link_params.rate,
                ctrl->dp_ctrl.pixel_rate, dp_ctrl_use_fixed_nvid(ctrl));
 
-       reinit_completion(&ctrl->video_comp);
-
        dp_ctrl_setup_tr_unit(ctrl);
 
        dp_catalog_ctrl_state_ctrl(ctrl->catalog, DP_STATE_CTRL_SEND_VIDEO);
index f60ba93..a836bd3 100644 (file)
@@ -19,7 +19,7 @@ struct dp_ctrl {
        u32 pixel_rate;
 };
 
-int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip);
+int dp_ctrl_host_init(struct dp_ctrl *dp_ctrl, bool flip, bool reset);
 void dp_ctrl_host_deinit(struct dp_ctrl *dp_ctrl);
 int dp_ctrl_on_link(struct dp_ctrl *dp_ctrl);
 int dp_ctrl_on_stream(struct dp_ctrl *dp_ctrl);
index 3bc7ed2..5a39da6 100644 (file)
@@ -350,7 +350,7 @@ end:
        return rc;
 }
 
-static void dp_display_host_init(struct dp_display_private *dp)
+static void dp_display_host_init(struct dp_display_private *dp, int reset)
 {
        bool flip = false;
 
@@ -365,7 +365,7 @@ static void dp_display_host_init(struct dp_display_private *dp)
        dp_display_set_encoder_mode(dp);
 
        dp_power_init(dp->power, flip);
-       dp_ctrl_host_init(dp->ctrl, flip);
+       dp_ctrl_host_init(dp->ctrl, flip, reset);
        dp_aux_init(dp->aux);
        dp->core_initialized = true;
 }
@@ -403,7 +403,7 @@ static int dp_display_usbpd_configure_cb(struct device *dev)
                goto end;
        }
 
-       dp_display_host_init(dp);
+       dp_display_host_init(dp, false);
 
        /*
         * set sink to normal operation mode -- D0
@@ -651,8 +651,8 @@ static int dp_hpd_unplug_handle(struct dp_display_private *dp, u32 data)
        dp_add_event(dp, EV_DISCONNECT_PENDING_TIMEOUT, 0, DP_TIMEOUT_5_SECOND);
 
        /* signal the disconnect event early to ensure proper teardown */
-       dp_display_handle_plugged_change(g_dp_display, false);
        reinit_completion(&dp->audio_comp);
+       dp_display_handle_plugged_change(g_dp_display, false);
 
        dp_catalog_hpd_config_intr(dp->catalog, DP_DP_HPD_PLUG_INT_MASK |
                                        DP_DP_IRQ_HPD_INT_MASK, true);
@@ -700,6 +700,13 @@ static int dp_irq_hpd_handle(struct dp_display_private *dp, u32 data)
                return 0;
        }
 
+       if (state == ST_CONNECT_PENDING || state == ST_DISCONNECT_PENDING) {
+               /* wait until ST_CONNECTED */
+               dp_add_event(dp, EV_IRQ_HPD_INT, 0, 1); /* delay = 1 */
+               mutex_unlock(&dp->event_mutex);
+               return 0;
+       }
+
        ret = dp_display_usbpd_attention_cb(&dp->pdev->dev);
        if (ret == -ECONNRESET) { /* cable unplugged */
                dp->core_initialized = false;
@@ -890,6 +897,9 @@ static int dp_display_disable(struct dp_display_private *dp, u32 data)
 
        /* wait only if audio was enabled */
        if (dp_display->audio_enabled) {
+               /* signal the disconnect event */
+               reinit_completion(&dp->audio_comp);
+               dp_display_handle_plugged_change(dp_display, false);
                if (!wait_for_completion_timeout(&dp->audio_comp,
                                HZ * 5))
                        DRM_ERROR("audio comp timeout\n");
@@ -1002,7 +1012,7 @@ int dp_display_get_test_bpp(struct msm_dp *dp)
 static void dp_display_config_hpd(struct dp_display_private *dp)
 {
 
-       dp_display_host_init(dp);
+       dp_display_host_init(dp, true);
        dp_catalog_ctrl_hpd_config(dp->catalog);
 
        /* Enable interrupt first time
@@ -1256,7 +1266,7 @@ static int dp_pm_resume(struct device *dev)
        dp->hpd_state = ST_DISCONNECTED;
 
        /* turn on dp ctrl/phy */
-       dp_display_host_init(dp);
+       dp_display_host_init(dp, true);
 
        dp_catalog_ctrl_hpd_config(dp->catalog);
 
@@ -1439,7 +1449,7 @@ int msm_dp_display_enable(struct msm_dp *dp, struct drm_encoder *encoder)
        state =  dp_display->hpd_state;
 
        if (state == ST_DISPLAY_OFF)
-               dp_display_host_init(dp_display);
+               dp_display_host_init(dp_display, true);
 
        dp_display_enable(dp_display, 0);
 
index d1780bc..9cc8166 100644 (file)
@@ -409,7 +409,6 @@ int dp_panel_timing_cfg(struct dp_panel *dp_panel)
 
 int dp_panel_init_panel_info(struct dp_panel *dp_panel)
 {
-       int rc = 0;
        struct drm_display_mode *drm_mode;
 
        drm_mode = &dp_panel->dp_mode.drm_mode;
@@ -436,7 +435,7 @@ int dp_panel_init_panel_info(struct dp_panel *dp_panel)
                                        min_t(u32, dp_panel->dp_mode.bpp, 30));
        DRM_DEBUG_DP("updated bpp = %d\n", dp_panel->dp_mode.bpp);
 
-       return rc;
+       return 0;
 }
 
 struct dp_panel *dp_panel_get(struct dp_panel_in *in)
index 1afb7c5..eca86bf 100644 (file)
@@ -139,7 +139,7 @@ const struct msm_dsi_phy_cfg dsi_phy_20nm_cfgs = {
                .disable = dsi_20nm_phy_disable,
                .init = msm_dsi_phy_init_common,
        },
-       .io_start = { 0xfd998300, 0xfd9a0300 },
+       .io_start = { 0xfd998500, 0xfd9a0500 },
        .num_dsi_phy = 2,
 };
 
index a45fe95..3dc6587 100644 (file)
@@ -163,7 +163,7 @@ struct msm_dsi_pll *msm_dsi_pll_init(struct platform_device *pdev,
                break;
        case MSM_DSI_PHY_7NM:
        case MSM_DSI_PHY_7NM_V4_1:
-               pll = msm_dsi_pll_7nm_init(pdev, id);
+               pll = msm_dsi_pll_7nm_init(pdev, type, id);
                break;
        default:
                pll = ERR_PTR(-ENXIO);
index 3405982..bbecb1d 100644 (file)
@@ -117,10 +117,12 @@ msm_dsi_pll_10nm_init(struct platform_device *pdev, int id)
 }
 #endif
 #ifdef CONFIG_DRM_MSM_DSI_7NM_PHY
-struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, int id);
+struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev,
+                                       enum msm_dsi_phy_type type, int id);
 #else
 static inline struct msm_dsi_pll *
-msm_dsi_pll_7nm_init(struct platform_device *pdev, int id)
+msm_dsi_pll_7nm_init(struct platform_device *pdev,
+                                       enum msm_dsi_phy_type type, int id)
 {
        return ERR_PTR(-ENODEV);
 }
index e4e9bf0..de3b802 100644 (file)
@@ -172,9 +172,7 @@ static void dsi_pll_calc_dec_frac(struct dsi_pll_10nm *pll)
 
        multiplier = 1 << config->frac_bits;
        dec_multiple = div_u64(pll_freq * multiplier, divider);
-       div_u64_rem(dec_multiple, multiplier, &frac);
-
-       dec = div_u64(dec_multiple, multiplier);
+       dec = div_u64_rem(dec_multiple, multiplier, &frac);
 
        if (pll_freq <= 1900000000UL)
                regs->pll_prop_gain_rate = 8;
@@ -306,7 +304,8 @@ static void dsi_pll_commit(struct dsi_pll_10nm *pll)
                  reg->frac_div_start_mid);
        pll_write(base + REG_DSI_10nm_PHY_PLL_FRAC_DIV_START_HIGH_1,
                  reg->frac_div_start_high);
-       pll_write(base + REG_DSI_10nm_PHY_PLL_PLL_LOCKDET_RATE_1, 0x40);
+       pll_write(base + REG_DSI_10nm_PHY_PLL_PLL_LOCKDET_RATE_1,
+                 reg->pll_lockdet_rate);
        pll_write(base + REG_DSI_10nm_PHY_PLL_PLL_LOCK_DELAY, 0x06);
        pll_write(base + REG_DSI_10nm_PHY_PLL_CMODE, 0x10);
        pll_write(base + REG_DSI_10nm_PHY_PLL_CLOCK_INVERTERS,
@@ -345,6 +344,7 @@ static int dsi_pll_10nm_vco_set_rate(struct clk_hw *hw, unsigned long rate,
 
 static int dsi_pll_10nm_lock_status(struct dsi_pll_10nm *pll)
 {
+       struct device *dev = &pll->pdev->dev;
        int rc;
        u32 status = 0;
        u32 const delay_us = 100;
@@ -357,8 +357,8 @@ static int dsi_pll_10nm_lock_status(struct dsi_pll_10nm *pll)
                                       delay_us,
                                       timeout_us);
        if (rc)
-               pr_err("DSI PLL(%d) lock failed, status=0x%08x\n",
-                      pll->id, status);
+               DRM_DEV_ERROR(dev, "DSI PLL(%d) lock failed, status=0x%08x\n",
+                             pll->id, status);
 
        return rc;
 }
@@ -405,6 +405,7 @@ static int dsi_pll_10nm_vco_prepare(struct clk_hw *hw)
 {
        struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
        struct dsi_pll_10nm *pll_10nm = to_pll_10nm(pll);
+       struct device *dev = &pll_10nm->pdev->dev;
        int rc;
 
        dsi_pll_enable_pll_bias(pll_10nm);
@@ -413,7 +414,7 @@ static int dsi_pll_10nm_vco_prepare(struct clk_hw *hw)
 
        rc = dsi_pll_10nm_vco_set_rate(hw,pll_10nm->vco_current_rate, 0);
        if (rc) {
-               pr_err("vco_set_rate failed, rc=%d\n", rc);
+               DRM_DEV_ERROR(dev, "vco_set_rate failed, rc=%d\n", rc);
                return rc;
        }
 
@@ -430,7 +431,7 @@ static int dsi_pll_10nm_vco_prepare(struct clk_hw *hw)
        /* Check for PLL lock */
        rc = dsi_pll_10nm_lock_status(pll_10nm);
        if (rc) {
-               pr_err("PLL(%d) lock failed\n", pll_10nm->id);
+               DRM_DEV_ERROR(dev, "PLL(%d) lock failed\n", pll_10nm->id);
                goto error;
        }
 
@@ -483,6 +484,7 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw,
 {
        struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
        struct dsi_pll_10nm *pll_10nm = to_pll_10nm(pll);
+       struct dsi_pll_config *config = &pll_10nm->pll_configuration;
        void __iomem *base = pll_10nm->mmio;
        u64 ref_clk = pll_10nm->vco_ref_clk_rate;
        u64 vco_rate = 0x0;
@@ -503,9 +505,8 @@ static unsigned long dsi_pll_10nm_vco_recalc_rate(struct clk_hw *hw,
        /*
         * TODO:
         *      1. Assumes prescaler is disabled
-        *      2. Multiplier is 2^18. it should be 2^(num_of_frac_bits)
         */
-       multiplier = 1 << 18;
+       multiplier = 1 << config->frac_bits;
        pll_freq = dec * (ref_clk * 2);
        tmp64 = (ref_clk * 2 * frac);
        pll_freq += div_u64(tmp64, multiplier);
index 93bf142..e29b3bf 100644 (file)
@@ -325,7 +325,7 @@ static void dsi_pll_commit(struct dsi_pll_7nm *pll)
        pll_write(base + REG_DSI_7nm_PHY_PLL_FRAC_DIV_START_LOW_1, reg->frac_div_start_low);
        pll_write(base + REG_DSI_7nm_PHY_PLL_FRAC_DIV_START_MID_1, reg->frac_div_start_mid);
        pll_write(base + REG_DSI_7nm_PHY_PLL_FRAC_DIV_START_HIGH_1, reg->frac_div_start_high);
-       pll_write(base + REG_DSI_7nm_PHY_PLL_PLL_LOCKDET_RATE_1, 0x40);
+       pll_write(base + REG_DSI_7nm_PHY_PLL_PLL_LOCKDET_RATE_1, reg->pll_lockdet_rate);
        pll_write(base + REG_DSI_7nm_PHY_PLL_PLL_LOCK_DELAY, 0x06);
        pll_write(base + REG_DSI_7nm_PHY_PLL_CMODE_1, 0x10); /* TODO: 0x00 for CPHY */
        pll_write(base + REG_DSI_7nm_PHY_PLL_CLOCK_INVERTERS, reg->pll_clock_inverters);
@@ -509,6 +509,7 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw,
 {
        struct msm_dsi_pll *pll = hw_clk_to_pll(hw);
        struct dsi_pll_7nm *pll_7nm = to_pll_7nm(pll);
+       struct dsi_pll_config *config = &pll_7nm->pll_configuration;
        void __iomem *base = pll_7nm->mmio;
        u64 ref_clk = pll_7nm->vco_ref_clk_rate;
        u64 vco_rate = 0x0;
@@ -529,9 +530,8 @@ static unsigned long dsi_pll_7nm_vco_recalc_rate(struct clk_hw *hw,
        /*
         * TODO:
         *      1. Assumes prescaler is disabled
-        *      2. Multiplier is 2^18. it should be 2^(num_of_frac_bits)
         */
-       multiplier = 1 << 18;
+       multiplier = 1 << config->frac_bits;
        pll_freq = dec * (ref_clk * 2);
        tmp64 = (ref_clk * 2 * frac);
        pll_freq += div_u64(tmp64, multiplier);
@@ -852,7 +852,8 @@ err_base_clk_hw:
        return ret;
 }
 
-struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, int id)
+struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev,
+                                       enum msm_dsi_phy_type type, int id)
 {
        struct dsi_pll_7nm *pll_7nm;
        struct msm_dsi_pll *pll;
@@ -885,7 +886,7 @@ struct msm_dsi_pll *msm_dsi_pll_7nm_init(struct platform_device *pdev, int id)
        pll = &pll_7nm->base;
        pll->min_rate = 1000000000UL;
        pll->max_rate = 3500000000UL;
-       if (pll->type == MSM_DSI_PHY_7NM_V4_1) {
+       if (type == MSM_DSI_PHY_7NM_V4_1) {
                pll->min_rate = 600000000UL;
                pll->max_rate = (unsigned long)5000000000ULL;
                /* workaround for max rate overflowing on 32-bit builds: */
index 6a32676..edcacca 100644 (file)
@@ -57,10 +57,13 @@ static void vblank_put(struct msm_kms *kms, unsigned crtc_mask)
 
 static void lock_crtcs(struct msm_kms *kms, unsigned int crtc_mask)
 {
+       int crtc_index;
        struct drm_crtc *crtc;
 
-       for_each_crtc_mask(kms->dev, crtc, crtc_mask)
-               mutex_lock(&kms->commit_lock[drm_crtc_index(crtc)]);
+       for_each_crtc_mask(kms->dev, crtc, crtc_mask) {
+               crtc_index = drm_crtc_index(crtc);
+               mutex_lock_nested(&kms->commit_lock[crtc_index], crtc_index);
+       }
 }
 
 static void unlock_crtcs(struct msm_kms *kms, unsigned int crtc_mask)
index 108c405..1969076 100644 (file)
@@ -570,6 +570,7 @@ err_free_priv:
        kfree(priv);
 err_put_drm_dev:
        drm_dev_put(ddev);
+       platform_set_drvdata(pdev, NULL);
        return ret;
 }
 
@@ -788,9 +789,10 @@ static int msm_ioctl_gem_info_iova(struct drm_device *dev,
                struct drm_file *file, struct drm_gem_object *obj,
                uint64_t *iova)
 {
+       struct msm_drm_private *priv = dev->dev_private;
        struct msm_file_private *ctx = file->driver_priv;
 
-       if (!ctx->aspace)
+       if (!priv->gpu)
                return -EINVAL;
 
        /*
@@ -1071,6 +1073,10 @@ static int __maybe_unused msm_pm_resume(struct device *dev)
 static int __maybe_unused msm_pm_prepare(struct device *dev)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
+       struct msm_drm_private *priv = ddev ? ddev->dev_private : NULL;
+
+       if (!priv || !priv->kms)
+               return 0;
 
        return drm_mode_config_helper_suspend(ddev);
 }
@@ -1078,6 +1084,10 @@ static int __maybe_unused msm_pm_prepare(struct device *dev)
 static void __maybe_unused msm_pm_complete(struct device *dev)
 {
        struct drm_device *ddev = dev_get_drvdata(dev);
+       struct msm_drm_private *priv = ddev ? ddev->dev_private : NULL;
+
+       if (!priv || !priv->kms)
+               return;
 
        drm_mode_config_helper_resume(ddev);
 }
@@ -1310,6 +1320,10 @@ static int msm_pdev_remove(struct platform_device *pdev)
 static void msm_pdev_shutdown(struct platform_device *pdev)
 {
        struct drm_device *drm = platform_get_drvdata(pdev);
+       struct msm_drm_private *priv = drm ? drm->dev_private : NULL;
+
+       if (!priv || !priv->kms)
+               return;
 
        drm_atomic_helper_shutdown(drm);
 }
index ad27036..cd59a59 100644 (file)
@@ -45,7 +45,7 @@ int msm_wait_fence(struct msm_fence_context *fctx, uint32_t fence,
        int ret;
 
        if (fence > fctx->last_fence) {
-               DRM_ERROR("%s: waiting on invalid fence: %u (of %u)\n",
+               DRM_ERROR_RATELIMITED("%s: waiting on invalid fence: %u (of %u)\n",
                                fctx->name, fence, fctx->last_fence);
                return -EINVAL;
        }
index a588b03..f091c1e 100644 (file)
@@ -987,8 +987,7 @@ void msm_gem_free_object(struct drm_gem_object *obj)
                /* Don't drop the pages for imported dmabuf, as they are not
                 * ours, just free the array we allocated:
                 */
-               if (msm_obj->pages)
-                       kvfree(msm_obj->pages);
+               kvfree(msm_obj->pages);
 
                put_iova_vmas(obj);
 
index d04c349..5480852 100644 (file)
@@ -198,6 +198,8 @@ static int submit_lookup_cmds(struct msm_gem_submit *submit,
                submit->cmd[i].idx  = submit_cmd.submit_idx;
                submit->cmd[i].nr_relocs = submit_cmd.nr_relocs;
 
+               userptr = u64_to_user_ptr(submit_cmd.relocs);
+
                sz = array_size(submit_cmd.nr_relocs,
                                sizeof(struct drm_msm_gem_submit_reloc));
                /* check for overflow: */
index 196612a..1c9c0cd 100644 (file)
@@ -2693,9 +2693,20 @@ nv50_display_create(struct drm_device *dev)
        else
                nouveau_display(dev)->format_modifiers = disp50xx_modifiers;
 
-       if (disp->disp->object.oclass >= GK104_DISP) {
+       /* FIXME: 256x256 cursors are supported on Kepler, however unlike Maxwell and later
+        * generations Kepler requires that we use small pages (4K) for cursor scanout surfaces. The
+        * proper fix for this is to teach nouveau to migrate fbs being used for the cursor plane to
+        * small page allocations in prepare_fb(). When this is implemented, we should also force
+        * large pages (128K) for ovly fbs in order to fix Kepler ovlys.
+        * But until then, just limit cursors to 128x128 - which is small enough to avoid ever using
+        * large pages.
+        */
+       if (disp->disp->object.oclass >= GM107_DISP) {
                dev->mode_config.cursor_width = 256;
                dev->mode_config.cursor_height = 256;
+       } else if (disp->disp->object.oclass >= GK104_DISP) {
+               dev->mode_config.cursor_width = 128;
+               dev->mode_config.cursor_height = 128;
        } else {
                dev->mode_config.cursor_width = 64;
                dev->mode_config.cursor_height = 64;
index 2375711..f2720a0 100644 (file)
@@ -551,12 +551,17 @@ nouveau_bo_sync_for_device(struct nouveau_bo *nvbo)
 
        if (!ttm_dma)
                return;
+       if (!ttm_dma->pages) {
+               NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma);
+               return;
+       }
 
        /* Don't waste time looping if the object is coherent */
        if (nvbo->force_coherent)
                return;
 
-       for (i = 0; i < ttm_dma->num_pages; ++i) {
+       i = 0;
+       while (i < ttm_dma->num_pages) {
                struct page *p = ttm_dma->pages[i];
                size_t num_pages = 1;
 
@@ -582,12 +587,17 @@ nouveau_bo_sync_for_cpu(struct nouveau_bo *nvbo)
 
        if (!ttm_dma)
                return;
+       if (!ttm_dma->pages) {
+               NV_DEBUG(drm, "ttm_dma 0x%p: pages NULL\n", ttm_dma);
+               return;
+       }
 
        /* Don't waste time looping if the object is coherent */
        if (nvbo->force_coherent)
                return;
 
-       for (i = 0; i < ttm_dma->num_pages; ++i) {
+       i = 0;
+       while (i < ttm_dma->num_pages) {
                struct page *p = ttm_dma->pages[i];
                size_t num_pages = 1;
 
index 69da601..e771bd5 100644 (file)
@@ -261,6 +261,9 @@ gk104_fifo_pbdma = {
 struct nvkm_engine *
 gk104_fifo_id_engine(struct nvkm_fifo *base, int engi)
 {
+       if (engi == GK104_FIFO_ENGN_SW)
+               return nvkm_device_engine(base->engine.subdev.device, NVKM_ENGINE_SW, 0);
+
        return gk104_fifo(base)->engine[engi].engine;
 }
 
index 8e11612..b31d750 100644 (file)
@@ -2149,11 +2149,12 @@ static int dsi_vc_send_short(struct dsi_data *dsi, int vc,
                             const struct mipi_dsi_msg *msg)
 {
        struct mipi_dsi_packet pkt;
+       int ret;
        u32 r;
 
-       r = mipi_dsi_create_packet(&pkt, msg);
-       if (r < 0)
-               return r;
+       ret = mipi_dsi_create_packet(&pkt, msg);
+       if (ret < 0)
+               return ret;
 
        WARN_ON(!dsi_bus_is_locked(dsi));
 
index af381d7..5fbfb71 100644 (file)
@@ -37,6 +37,7 @@ struct dsic_panel_data {
        u32 height_mm;
        u32 max_hs_rate;
        u32 max_lp_rate;
+       bool te_support;
 };
 
 struct panel_drv_data {
@@ -334,9 +335,11 @@ static int dsicm_power_on(struct panel_drv_data *ddata)
        if (r)
                goto err;
 
-       r = mipi_dsi_dcs_set_tear_on(ddata->dsi, MIPI_DSI_DCS_TEAR_MODE_VBLANK);
-       if (r)
-               goto err;
+       if (ddata->panel_data->te_support) {
+               r = mipi_dsi_dcs_set_tear_on(ddata->dsi, MIPI_DSI_DCS_TEAR_MODE_VBLANK);
+               if (r)
+                       goto err;
+       }
 
        /* possible panel bug */
        msleep(100);
@@ -619,6 +622,7 @@ static const struct dsic_panel_data taal_data = {
        .height_mm = 0,
        .max_hs_rate = 300000000,
        .max_lp_rate = 10000000,
+       .te_support = true,
 };
 
 static const struct dsic_panel_data himalaya_data = {
@@ -629,6 +633,7 @@ static const struct dsic_panel_data himalaya_data = {
        .height_mm = 88,
        .max_hs_rate = 300000000,
        .max_lp_rate = 10000000,
+       .te_support = false,
 };
 
 static const struct dsic_panel_data droid4_data = {
@@ -639,6 +644,7 @@ static const struct dsic_panel_data droid4_data = {
        .height_mm = 89,
        .max_hs_rate = 300000000,
        .max_lp_rate = 10000000,
+       .te_support = false,
 };
 
 static const struct of_device_id dsicm_of_match[] = {
index bc36aa3..fe5ac3e 100644 (file)
@@ -265,7 +265,8 @@ static int kd35t133_probe(struct mipi_dsi_device *dsi)
        dsi->lanes = 1;
        dsi->format = MIPI_DSI_FMT_RGB888;
        dsi->mode_flags = MIPI_DSI_MODE_VIDEO | MIPI_DSI_MODE_VIDEO_BURST |
-                         MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET;
+                         MIPI_DSI_MODE_LPM | MIPI_DSI_MODE_EOT_PACKET |
+                         MIPI_DSI_CLOCK_NON_CONTINUOUS;
 
        drm_panel_init(&ctx->panel, &dsi->dev, &kd35t133_funcs,
                       DRM_MODE_CONNECTOR_DSI);
index 012bce0..10738e0 100644 (file)
@@ -328,6 +328,7 @@ static void qxl_crtc_update_monitors_config(struct drm_crtc *crtc,
 
        head.id = i;
        head.flags = 0;
+       head.surface_id = 0;
        oldcount = qdev->monitors_config->count;
        if (crtc->state->active) {
                struct drm_display_mode *mode = &crtc->mode;
index fb5f6a5..1864467 100644 (file)
@@ -141,7 +141,7 @@ static void qxl_drm_release(struct drm_device *dev)
 
        /*
         * TODO: qxl_device_fini() call should be in qxl_pci_remove(),
-        * reodering qxl_modeset_fini() + qxl_device_fini() calls is
+        * reordering qxl_modeset_fini() + qxl_device_fini() calls is
         * non-trivial though.
         */
        qxl_modeset_fini(qdev);
index 0fcfc95..b372455 100644 (file)
@@ -321,7 +321,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
                                       int type, struct qxl_release **release,
                                       struct qxl_bo **rbo)
 {
-       struct qxl_bo *bo;
+       struct qxl_bo *bo, *free_bo = NULL;
        int idr_ret;
        int ret = 0;
        union qxl_release_info *info;
@@ -347,7 +347,7 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
 
        mutex_lock(&qdev->release_mutex);
        if (qdev->current_release_bo_offset[cur_idx] + 1 >= releases_per_bo[cur_idx]) {
-               qxl_bo_unref(&qdev->current_release_bo[cur_idx]);
+               free_bo = qdev->current_release_bo[cur_idx];
                qdev->current_release_bo_offset[cur_idx] = 0;
                qdev->current_release_bo[cur_idx] = NULL;
        }
@@ -355,6 +355,10 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
                ret = qxl_release_bo_alloc(qdev, &qdev->current_release_bo[cur_idx]);
                if (ret) {
                        mutex_unlock(&qdev->release_mutex);
+                       if (free_bo) {
+                               qxl_bo_unpin(free_bo);
+                               qxl_bo_unref(&free_bo);
+                       }
                        qxl_release_free(qdev, *release);
                        return ret;
                }
@@ -370,6 +374,10 @@ int qxl_alloc_release_reserved(struct qxl_device *qdev, unsigned long size,
                *rbo = bo;
 
        mutex_unlock(&qdev->release_mutex);
+       if (free_bo) {
+               qxl_bo_unpin(free_bo);
+               qxl_bo_unref(&free_bo);
+       }
 
        ret = qxl_release_list_add(*release, bo);
        qxl_bo_unref(&bo);
index f09989b..3effc8c 100644 (file)
@@ -574,6 +574,8 @@ struct radeon_gem {
        struct list_head        objects;
 };
 
+extern const struct drm_gem_object_funcs radeon_gem_object_funcs;
+
 int radeon_gem_init(struct radeon_device *rdev);
 void radeon_gem_fini(struct radeon_device *rdev);
 int radeon_gem_object_create(struct radeon_device *rdev, unsigned long size,
index 9418269..db14a82 100644 (file)
@@ -43,7 +43,7 @@ struct sg_table *radeon_gem_prime_get_sg_table(struct drm_gem_object *obj);
 int radeon_gem_prime_pin(struct drm_gem_object *obj);
 void radeon_gem_prime_unpin(struct drm_gem_object *obj);
 
-static const struct drm_gem_object_funcs radeon_gem_object_funcs;
+const struct drm_gem_object_funcs radeon_gem_object_funcs;
 
 static void radeon_gem_object_free(struct drm_gem_object *gobj)
 {
@@ -227,7 +227,7 @@ static int radeon_gem_handle_lockup(struct radeon_device *rdev, int r)
        return r;
 }
 
-static const struct drm_gem_object_funcs radeon_gem_object_funcs = {
+const struct drm_gem_object_funcs radeon_gem_object_funcs = {
        .free = radeon_gem_object_free,
        .open = radeon_gem_object_open,
        .close = radeon_gem_object_close,
index ab29eb9..42a8794 100644 (file)
@@ -56,6 +56,8 @@ struct drm_gem_object *radeon_gem_prime_import_sg_table(struct drm_device *dev,
        if (ret)
                return ERR_PTR(ret);
 
+       bo->tbo.base.funcs = &radeon_gem_object_funcs;
+
        mutex_lock(&rdev->gem.mutex);
        list_add_tail(&bo->list, &rdev->gem.objects);
        mutex_unlock(&rdev->gem.mutex);
index e8c66d1..78893be 100644 (file)
@@ -364,7 +364,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_bo_device *bdev, struct ttm_tt *
        if (gtt->userflags & RADEON_GEM_USERPTR_ANONONLY) {
                /* check that we only pin down anonymous memory
                   to prevent problems with writeback */
-               unsigned long end = gtt->userptr + ttm->num_pages * PAGE_SIZE;
+               unsigned long end = gtt->userptr + (u64)ttm->num_pages * PAGE_SIZE;
                struct vm_area_struct *vma;
                vma = find_vma(gtt->usermm, gtt->userptr);
                if (!vma || vma->vm_file || vma->vm_end < end)
@@ -386,7 +386,7 @@ static int radeon_ttm_tt_pin_userptr(struct ttm_bo_device *bdev, struct ttm_tt *
        } while (pinned < ttm->num_pages);
 
        r = sg_alloc_table_from_pages(ttm->sg, ttm->pages, ttm->num_pages, 0,
-                                     ttm->num_pages << PAGE_SHIFT,
+                                     (u64)ttm->num_pages << PAGE_SHIFT,
                                      GFP_KERNEL);
        if (r)
                goto release_sg;
index ba8c603..ca37617 100644 (file)
@@ -48,21 +48,12 @@ static unsigned int rcar_du_encoder_count_ports(struct device_node *node)
 static const struct drm_encoder_funcs rcar_du_encoder_funcs = {
 };
 
-static void rcar_du_encoder_release(struct drm_device *dev, void *res)
-{
-       struct rcar_du_encoder *renc = res;
-
-       drm_encoder_cleanup(&renc->base);
-       kfree(renc);
-}
-
 int rcar_du_encoder_init(struct rcar_du_device *rcdu,
                         enum rcar_du_output output,
                         struct device_node *enc_node)
 {
        struct rcar_du_encoder *renc;
        struct drm_bridge *bridge;
-       int ret;
 
        /*
         * Locate the DRM bridge from the DT node. For the DPAD outputs, if the
@@ -101,26 +92,16 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
                        return -ENOLINK;
        }
 
-       renc = kzalloc(sizeof(*renc), GFP_KERNEL);
-       if (renc == NULL)
-               return -ENOMEM;
-
-       renc->output = output;
-
        dev_dbg(rcdu->dev, "initializing encoder %pOF for output %u\n",
                enc_node, output);
 
-       ret = drm_encoder_init(&rcdu->ddev, &renc->base, &rcar_du_encoder_funcs,
-                              DRM_MODE_ENCODER_NONE, NULL);
-       if (ret < 0) {
-               kfree(renc);
-               return ret;
-       }
+       renc = drmm_encoder_alloc(&rcdu->ddev, struct rcar_du_encoder, base,
+                                 &rcar_du_encoder_funcs, DRM_MODE_ENCODER_NONE,
+                                 NULL);
+       if (!renc)
+               return -ENOMEM;
 
-       ret = drmm_add_action_or_reset(&rcdu->ddev, rcar_du_encoder_release,
-                                      renc);
-       if (ret)
-               return ret;
+       renc->output = output;
 
        /*
         * Attach the bridge to the encoder. The bridge will create the
index 4a2099c..857d97c 100644 (file)
 
 #define NUM_YUV2YUV_COEFFICIENTS 12
 
+/* AFBC supports a number of configurable modes. Relevant to us is block size
+ * (16x16 or 32x8), storage modifiers (SPARSE, SPLIT), and the YUV-like
+ * colourspace transform (YTR). 16x16 SPARSE mode is always used. SPLIT mode
+ * could be enabled via the hreg_block_split register, but is not currently
+ * handled. The colourspace transform is implicitly always assumed by the
+ * decoder, so consumers must use this transform as well.
+ *
+ * Failure to match modifiers will cause errors displaying AFBC buffers
+ * produced by conformant AFBC producers, including Mesa.
+ */
 #define ROCKCHIP_AFBC_MOD \
        DRM_FORMAT_MOD_ARM_AFBC( \
                AFBC_FORMAT_MOD_BLOCK_SIZE_16x16 | AFBC_FORMAT_MOD_SPARSE \
+                       | AFBC_FORMAT_MOD_YTR \
        )
 
 enum vop_data_format {
index 0ae3a02..134986d 100644 (file)
@@ -1688,6 +1688,11 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
                        dev_err(dc->dev,
                                "failed to set clock rate to %lu Hz\n",
                                state->pclk);
+
+               err = clk_set_rate(dc->clk, state->pclk);
+               if (err < 0)
+                       dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
+                               dc->clk, state->pclk, err);
        }
 
        DRM_DEBUG_KMS("rate: %lu, div: %u\n", clk_get_rate(dc->clk),
@@ -1698,11 +1703,6 @@ static void tegra_dc_commit_state(struct tegra_dc *dc,
                value = SHIFT_CLK_DIVIDER(state->div) | PIXEL_CLK_DIVIDER_PCD1;
                tegra_dc_writel(dc, value, DC_DISP_DISP_CLOCK_CONTROL);
        }
-
-       err = clk_set_rate(dc->clk, state->pclk);
-       if (err < 0)
-               dev_err(dc->dev, "failed to set clock %pC to %lu Hz: %d\n",
-                       dc->clk, state->pclk, err);
 }
 
 static void tegra_dc_stop(struct tegra_dc *dc)
@@ -2501,22 +2501,18 @@ static int tegra_dc_couple(struct tegra_dc *dc)
         * POWER_CONTROL registers during CRTC enabling.
         */
        if (dc->soc->coupled_pm && dc->pipe == 1) {
-               u32 flags = DL_FLAG_PM_RUNTIME | DL_FLAG_AUTOREMOVE_CONSUMER;
-               struct device_link *link;
-               struct device *partner;
+               struct device *companion;
+               struct tegra_dc *parent;
 
-               partner = driver_find_device(dc->dev->driver, NULL, NULL,
-                                            tegra_dc_match_by_pipe);
-               if (!partner)
+               companion = driver_find_device(dc->dev->driver, NULL, (const void *)0,
+                                              tegra_dc_match_by_pipe);
+               if (!companion)
                        return -EPROBE_DEFER;
 
-               link = device_link_add(dc->dev, partner, flags);
-               if (!link) {
-                       dev_err(dc->dev, "failed to link controllers\n");
-                       return -EINVAL;
-               }
+               parent = dev_get_drvdata(companion);
+               dc->client.parent = &parent->client;
 
-               dev_dbg(dc->dev, "coupled to %s\n", dev_name(partner));
+               dev_dbg(dc->dev, "coupled to %s\n", dev_name(companion));
        }
 
        return 0;
index f02a035..7b88261 100644 (file)
@@ -3115,6 +3115,12 @@ static int tegra_sor_init(struct host1x_client *client)
         * kernel is possible.
         */
        if (sor->rst) {
+               err = pm_runtime_resume_and_get(sor->dev);
+               if (err < 0) {
+                       dev_err(sor->dev, "failed to get runtime PM: %d\n", err);
+                       return err;
+               }
+
                err = reset_control_acquire(sor->rst);
                if (err < 0) {
                        dev_err(sor->dev, "failed to acquire SOR reset: %d\n",
@@ -3148,6 +3154,7 @@ static int tegra_sor_init(struct host1x_client *client)
                }
 
                reset_control_release(sor->rst);
+               pm_runtime_put(sor->dev);
        }
 
        err = clk_prepare_enable(sor->clk_safe);
index 662bf3a..f519047 100644 (file)
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: GPL-2.0
-ifeq (, $(findstring -W,$(EXTRA_CFLAGS)))
+ifeq (, $(findstring -W,$(KCFLAGS)))
        ccflags-y += -Werror
 endif
 
index 33f65f4..23866a5 100644 (file)
@@ -83,6 +83,7 @@ MODULE_PARM_DESC(eco_mode, "Turn on Eco mode (less bright, more silent)");
 
 struct gm12u320_device {
        struct drm_device                dev;
+       struct device                   *dmadev;
        struct drm_simple_display_pipe   pipe;
        struct drm_connector             conn;
        unsigned char                   *cmd_buf;
@@ -601,6 +602,22 @@ static const uint64_t gm12u320_pipe_modifiers[] = {
        DRM_FORMAT_MOD_INVALID
 };
 
+/*
+ * FIXME: Dma-buf sharing requires DMA support by the importing device.
+ *        This function is a workaround to make USB devices work as well.
+ *        See todo.rst for how to fix the issue in the dma-buf framework.
+ */
+static struct drm_gem_object *gm12u320_gem_prime_import(struct drm_device *dev,
+                                                       struct dma_buf *dma_buf)
+{
+       struct gm12u320_device *gm12u320 = to_gm12u320(dev);
+
+       if (!gm12u320->dmadev)
+               return ERR_PTR(-ENODEV);
+
+       return drm_gem_prime_import_dev(dev, dma_buf, gm12u320->dmadev);
+}
+
 DEFINE_DRM_GEM_FOPS(gm12u320_fops);
 
 static const struct drm_driver gm12u320_drm_driver = {
@@ -614,6 +631,7 @@ static const struct drm_driver gm12u320_drm_driver = {
 
        .fops            = &gm12u320_fops,
        DRM_GEM_SHMEM_DRIVER_OPS,
+       .gem_prime_import = gm12u320_gem_prime_import,
 };
 
 static const struct drm_mode_config_funcs gm12u320_mode_config_funcs = {
@@ -640,15 +658,18 @@ static int gm12u320_usb_probe(struct usb_interface *interface,
                                      struct gm12u320_device, dev);
        if (IS_ERR(gm12u320))
                return PTR_ERR(gm12u320);
+       dev = &gm12u320->dev;
+
+       gm12u320->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev));
+       if (!gm12u320->dmadev)
+               drm_warn(dev, "buffer sharing not supported"); /* not an error */
 
        INIT_DELAYED_WORK(&gm12u320->fb_update.work, gm12u320_fb_update_work);
        mutex_init(&gm12u320->fb_update.lock);
 
-       dev = &gm12u320->dev;
-
        ret = drmm_mode_config_init(dev);
        if (ret)
-               return ret;
+               goto err_put_device;
 
        dev->mode_config.min_width = GM12U320_USER_WIDTH;
        dev->mode_config.max_width = GM12U320_USER_WIDTH;
@@ -658,15 +679,15 @@ static int gm12u320_usb_probe(struct usb_interface *interface,
 
        ret = gm12u320_usb_alloc(gm12u320);
        if (ret)
-               return ret;
+               goto err_put_device;
 
        ret = gm12u320_set_ecomode(gm12u320);
        if (ret)
-               return ret;
+               goto err_put_device;
 
        ret = gm12u320_conn_init(gm12u320);
        if (ret)
-               return ret;
+               goto err_put_device;
 
        ret = drm_simple_display_pipe_init(&gm12u320->dev,
                                           &gm12u320->pipe,
@@ -676,24 +697,31 @@ static int gm12u320_usb_probe(struct usb_interface *interface,
                                           gm12u320_pipe_modifiers,
                                           &gm12u320->conn);
        if (ret)
-               return ret;
+               goto err_put_device;
 
        drm_mode_config_reset(dev);
 
        usb_set_intfdata(interface, dev);
        ret = drm_dev_register(dev, 0);
        if (ret)
-               return ret;
+               goto err_put_device;
 
        drm_fbdev_generic_setup(dev, 0);
 
        return 0;
+
+err_put_device:
+       put_device(gm12u320->dmadev);
+       return ret;
 }
 
 static void gm12u320_usb_disconnect(struct usb_interface *interface)
 {
        struct drm_device *dev = usb_get_intfdata(interface);
+       struct gm12u320_device *gm12u320 = to_gm12u320(dev);
 
+       put_device(gm12u320->dmadev);
+       gm12u320->dmadev = NULL;
        drm_dev_unplug(dev);
        drm_atomic_helper_shutdown(dev);
 }
index b65f4b1..101a68d 100644 (file)
@@ -136,7 +136,8 @@ void ttm_bo_move_to_lru_tail(struct ttm_buffer_object *bo,
        struct ttm_bo_device *bdev = bo->bdev;
        struct ttm_resource_manager *man;
 
-       dma_resv_assert_held(bo->base.resv);
+       if (!bo->deleted)
+               dma_resv_assert_held(bo->base.resv);
 
        if (bo->pin_count) {
                ttm_bo_del_from_lru(bo);
@@ -508,8 +509,11 @@ static void ttm_bo_release(struct kref *kref)
                 * Make pinned bos immediately available to
                 * shrinkers, now that they are queued for
                 * destruction.
+                *
+                * FIXME: QXL is triggering this. Can be removed when the
+                * driver is fixed.
                 */
-               if (WARN_ON(bo->pin_count)) {
+               if (WARN_ON_ONCE(bo->pin_count)) {
                        bo->pin_count = 0;
                        ttm_bo_move_to_lru_tail(bo, &bo->mem, NULL);
                }
@@ -959,8 +963,10 @@ static int ttm_bo_bounce_temp_buffer(struct ttm_buffer_object *bo,
                return ret;
        /* move to the bounce domain */
        ret = ttm_bo_handle_move_mem(bo, &hop_mem, false, ctx, NULL);
-       if (ret)
+       if (ret) {
+               ttm_resource_free(bo, &hop_mem);
                return ret;
+       }
        return 0;
 }
 
@@ -991,18 +997,19 @@ static int ttm_bo_move_buffer(struct ttm_buffer_object *bo,
         * stop and the driver will be called to make
         * the second hop.
         */
-bounce:
        ret = ttm_bo_mem_space(bo, placement, &mem, ctx);
        if (ret)
                return ret;
+bounce:
        ret = ttm_bo_handle_move_mem(bo, &mem, false, ctx, &hop);
        if (ret == -EMULTIHOP) {
                ret = ttm_bo_bounce_temp_buffer(bo, &mem, ctx, &hop);
                if (ret)
-                       return ret;
+                       goto out;
                /* try and move to final place now. */
                goto bounce;
        }
+out:
        if (ret)
                ttm_resource_free(bo, &mem);
        return ret;
index 6e27cb1..4eb6efb 100644 (file)
@@ -268,13 +268,13 @@ static void ttm_pool_type_init(struct ttm_pool_type *pt, struct ttm_pool *pool,
 /* Remove a pool_type from the global shrinker list and free all pages */
 static void ttm_pool_type_fini(struct ttm_pool_type *pt)
 {
-       struct page *p, *tmp;
+       struct page *p;
 
        mutex_lock(&shrinker_lock);
        list_del(&pt->shrinker_list);
        mutex_unlock(&shrinker_lock);
 
-       list_for_each_entry_safe(p, tmp, &pt->pages, lru)
+       while ((p = ttm_pool_type_take(pt)))
                ttm_pool_free_page(pt->pool, pt->caching, pt->order, p);
 }
 
index 9269092..5703277 100644 (file)
@@ -32,6 +32,22 @@ static int udl_usb_resume(struct usb_interface *interface)
        return drm_mode_config_helper_resume(dev);
 }
 
+/*
+ * FIXME: Dma-buf sharing requires DMA support by the importing device.
+ *        This function is a workaround to make USB devices work as well.
+ *        See todo.rst for how to fix the issue in the dma-buf framework.
+ */
+static struct drm_gem_object *udl_driver_gem_prime_import(struct drm_device *dev,
+                                                         struct dma_buf *dma_buf)
+{
+       struct udl_device *udl = to_udl(dev);
+
+       if (!udl->dmadev)
+               return ERR_PTR(-ENODEV);
+
+       return drm_gem_prime_import_dev(dev, dma_buf, udl->dmadev);
+}
+
 DEFINE_DRM_GEM_FOPS(udl_driver_fops);
 
 static const struct drm_driver driver = {
@@ -40,6 +56,7 @@ static const struct drm_driver driver = {
        /* GEM hooks */
        .fops = &udl_driver_fops,
        DRM_GEM_SHMEM_DRIVER_OPS,
+       .gem_prime_import = udl_driver_gem_prime_import,
 
        .name = DRIVER_NAME,
        .desc = DRIVER_DESC,
index 875e735..cc16a13 100644 (file)
@@ -50,6 +50,7 @@ struct urb_list {
 struct udl_device {
        struct drm_device drm;
        struct device *dev;
+       struct device *dmadev;
 
        struct drm_simple_display_pipe display_pipe;
 
index 0e2a376..853f147 100644 (file)
@@ -315,6 +315,10 @@ int udl_init(struct udl_device *udl)
 
        DRM_DEBUG("\n");
 
+       udl->dmadev = usb_intf_get_dma_device(to_usb_interface(dev->dev));
+       if (!udl->dmadev)
+               drm_warn(dev, "buffer sharing not supported"); /* not an error */
+
        mutex_init(&udl->gem_lock);
 
        if (!udl_parse_vendor_descriptor(udl)) {
@@ -343,12 +347,18 @@ int udl_init(struct udl_device *udl)
 err:
        if (udl->urbs.count)
                udl_free_urb_list(dev);
+       put_device(udl->dmadev);
        DRM_ERROR("%d\n", ret);
        return ret;
 }
 
 int udl_drop_usb(struct drm_device *dev)
 {
+       struct udl_device *udl = to_udl(dev);
+
        udl_free_urb_list(dev);
+       put_device(udl->dmadev);
+       udl->dmadev = NULL;
+
        return 0;
 }
index 269390b..76657dc 100644 (file)
@@ -210,6 +210,7 @@ static u32 vc4_get_fifo_full_level(struct vc4_crtc *vc4_crtc, u32 format)
 {
        const struct vc4_crtc_data *crtc_data = vc4_crtc_to_vc4_crtc_data(vc4_crtc);
        const struct vc4_pv_data *pv_data = vc4_crtc_to_vc4_pv_data(vc4_crtc);
+       struct vc4_dev *vc4 = to_vc4_dev(vc4_crtc->base.dev);
        u32 fifo_len_bytes = pv_data->fifo_depth;
 
        /*
@@ -238,6 +239,22 @@ static u32 vc4_get_fifo_full_level(struct vc4_crtc *vc4_crtc, u32 format)
                if (crtc_data->hvs_output == 5)
                        return 32;
 
+               /*
+                * It looks like in some situations, we will overflow
+                * the PixelValve FIFO (with the bit 10 of PV stat being
+                * set) and stall the HVS / PV, eventually resulting in
+                * a page flip timeout.
+                *
+                * Displaying the video overlay during a playback with
+                * Kodi on an RPi3 seems to be a great solution with a
+                * failure rate around 50%.
+                *
+                * Removing 1 from the FIFO full level however
+                * seems to completely remove that issue.
+                */
+               if (!vc4->hvs->hvs5)
+                       return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX - 1;
+
                return fifo_len_bytes - 3 * HVS_FIFO_LATENCY_PIX;
        }
 }
index 7322169..1e9c84c 100644 (file)
@@ -1146,7 +1146,6 @@ static void vc4_plane_atomic_async_update(struct drm_plane *plane,
        plane->state->src_y = state->src_y;
        plane->state->src_w = state->src_w;
        plane->state->src_h = state->src_h;
-       plane->state->src_h = state->src_h;
        plane->state->alpha = state->alpha;
        plane->state->pixel_blend_mode = state->pixel_blend_mode;
        plane->state->rotation = state->rotation;
index 0a900af..45c9c6a 100644 (file)
@@ -500,8 +500,6 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
        vm_fault_t ret;
        pgoff_t fault_page_size;
        bool write = vmf->flags & FAULT_FLAG_WRITE;
-       bool is_cow_mapping =
-               (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
 
        switch (pe_size) {
        case PE_SIZE_PMD:
@@ -518,7 +516,7 @@ vm_fault_t vmw_bo_vm_huge_fault(struct vm_fault *vmf,
        }
 
        /* Always do write dirty-tracking and COW on PTE level. */
-       if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping))
+       if (write && (READ_ONCE(vbo->dirty) || is_cow_mapping(vma->vm_flags)))
                return VM_FAULT_FALLBACK;
 
        ret = ttm_bo_vm_reserve(bo, vmf);
index 3c03b17..cb99758 100644 (file)
@@ -49,7 +49,7 @@ int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
        vma->vm_ops = &vmw_vm_ops;
 
        /* Use VM_PFNMAP rather than VM_MIXEDMAP if not a COW mapping */
-       if ((vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) != VM_MAYWRITE)
+       if (!is_cow_mapping(vma->vm_flags))
                vma->vm_flags = (vma->vm_flags & ~VM_MIXEDMAP) | VM_PFNMAP;
 
        return 0;
index 30d9adf..9f14d99 100644 (file)
@@ -521,7 +521,7 @@ static int xen_drm_drv_init(struct xen_drm_front_info *front_info)
        drm_dev = drm_dev_alloc(&xen_drm_driver, dev);
        if (IS_ERR(drm_dev)) {
                ret = PTR_ERR(drm_dev);
-               goto fail;
+               goto fail_dev;
        }
 
        drm_info->drm_dev = drm_dev;
@@ -551,8 +551,10 @@ fail_modeset:
        drm_kms_helper_poll_fini(drm_dev);
        drm_mode_config_cleanup(drm_dev);
        drm_dev_put(drm_dev);
-fail:
+fail_dev:
        kfree(drm_info);
+       front_info->drm_info = NULL;
+fail:
        return ret;
 }
 
index 3adacba..e5f4314 100644 (file)
@@ -16,7 +16,6 @@
 struct drm_connector;
 struct xen_drm_front_drm_info;
 
-struct xen_drm_front_drm_info;
 
 int xen_drm_front_conn_init(struct xen_drm_front_drm_info *drm_info,
                            struct drm_connector *connector);
index 347fb96..68a766f 100644 (file)
@@ -705,8 +705,9 @@ void host1x_driver_unregister(struct host1x_driver *driver)
 EXPORT_SYMBOL(host1x_driver_unregister);
 
 /**
- * host1x_client_register() - register a host1x client
+ * __host1x_client_register() - register a host1x client
  * @client: host1x client
+ * @key: lock class key for the client-specific mutex
  *
  * Registers a host1x client with each host1x controller instance. Note that
  * each client will only match their parent host1x controller and will only be
@@ -715,13 +716,14 @@ EXPORT_SYMBOL(host1x_driver_unregister);
  * device and call host1x_device_init(), which will in turn call each client's
  * &host1x_client_ops.init implementation.
  */
-int host1x_client_register(struct host1x_client *client)
+int __host1x_client_register(struct host1x_client *client,
+                            struct lock_class_key *key)
 {
        struct host1x *host1x;
        int err;
 
        INIT_LIST_HEAD(&client->list);
-       mutex_init(&client->lock);
+       __mutex_init(&client->lock, "host1x client lock", key);
        client->usecount = 0;
 
        mutex_lock(&devices_lock);
@@ -742,7 +744,7 @@ int host1x_client_register(struct host1x_client *client)
 
        return 0;
 }
-EXPORT_SYMBOL(host1x_client_register);
+EXPORT_SYMBOL(__host1x_client_register);
 
 /**
  * host1x_client_unregister() - unregister a host1x client
index 1df6ab5..48ad154 100644 (file)
@@ -567,12 +567,9 @@ static int cport_enable(struct gb_host_device *hd, u16 cport_id,
                              USB_DIR_OUT | USB_TYPE_VENDOR |
                              USB_RECIP_INTERFACE, cport_id, 0,
                              req, sizeof(*req), ES2_USB_CTRL_TIMEOUT);
-       if (ret != sizeof(*req)) {
+       if (ret < 0) {
                dev_err(&udev->dev, "failed to set cport flags for port %d\n",
                        cport_id);
-               if (ret >= 0)
-                       ret = -EIO;
-
                goto out;
        }
 
@@ -961,12 +958,10 @@ static int arpc_send(struct es2_ap_dev *es2, struct arpc *rpc, int timeout)
                                 0, 0,
                                 rpc->req, le16_to_cpu(rpc->req->size),
                                 ES2_USB_CTRL_TIMEOUT);
-       if (retval != le16_to_cpu(rpc->req->size)) {
+       if (retval < 0) {
                dev_err(&udev->dev,
                        "failed to send ARPC request %d: %d\n",
                        rpc->req->type, retval);
-               if (retval > 0)
-                       retval = -EIO;
                return retval;
        }
 
index 1bc9f12..616a3bd 100644 (file)
@@ -40,7 +40,7 @@ DECLARE_EVENT_CLASS(gb_message,
                __entry->result = message->header->result;
        ),
 
-       TP_printk("size=%hu operation_id=0x%04x type=0x%02x result=0x%02x",
+       TP_printk("size=%u operation_id=0x%04x type=0x%02x result=0x%02x",
                  __entry->size, __entry->operation_id,
                  __entry->type, __entry->result)
 );
@@ -317,7 +317,7 @@ DECLARE_EVENT_CLASS(gb_interface,
                __entry->mode_switch = intf->mode_switch;
        ),
 
-       TP_printk("intf_id=%hhu device_id=%hhu module_id=%hhu D=%d J=%d A=%d E=%d M=%d",
+       TP_printk("intf_id=%u device_id=%u module_id=%u D=%d J=%d A=%d E=%d M=%d",
                __entry->id, __entry->device_id, __entry->module_id,
                __entry->disconnected, __entry->ejected, __entry->active,
                __entry->enabled, __entry->mode_switch)
@@ -391,7 +391,7 @@ DECLARE_EVENT_CLASS(gb_module,
                __entry->disconnected = module->disconnected;
        ),
 
-       TP_printk("hd_bus_id=%d module_id=%hhu num_interfaces=%zu disconnected=%d",
+       TP_printk("hd_bus_id=%d module_id=%u num_interfaces=%zu disconnected=%d",
                __entry->hd_bus_id, __entry->module_id,
                __entry->num_interfaces, __entry->disconnected)
 );
index 09fa75a..786b71e 100644 (file)
@@ -853,6 +853,24 @@ config HID_PLANTRONICS
 
          Say M here if you may ever plug in a Plantronics USB audio device.
 
+config HID_PLAYSTATION
+       tristate "PlayStation HID Driver"
+       depends on HID
+       select CRC32
+       select POWER_SUPPLY
+       help
+         Provides support for Sony PS5 controllers including support for
+         its special functionalities e.g. touchpad, lights and motion
+         sensors.
+
+config PLAYSTATION_FF
+       bool "PlayStation force feedback support"
+       depends on HID_PLAYSTATION
+       select INPUT_FF_MEMLESS
+       help
+         Say Y here if you would like to enable force feedback support for
+         PlayStation game controllers.
+
 config HID_PRIMAX
        tristate "Primax non-fully HID-compliant devices"
        depends on HID
@@ -909,6 +927,7 @@ config HID_SONY
          * Sony PS3 Blue-ray Disk Remote Control (Bluetooth)
          * Logitech Harmony adapter for Sony Playstation 3 (Bluetooth)
          * Guitar Hero Live PS3 and Wii U guitar dongles
+         * Guitar Hero PS3 and PC guitar dongles
 
 config SONY_FF
        bool "Sony PS2/3/4 accessories force feedback support" 
index 014d21f..c4f6d5c 100644 (file)
@@ -94,6 +94,7 @@ hid-picolcd-$(CONFIG_HID_PICOLCD_CIR) += hid-picolcd_cir.o
 hid-picolcd-$(CONFIG_DEBUG_FS)         += hid-picolcd_debugfs.o
 
 obj-$(CONFIG_HID_PLANTRONICS)  += hid-plantronics.o
+obj-$(CONFIG_HID_PLAYSTATION)  += hid-playstation.o
 obj-$(CONFIG_HID_PRIMAX)       += hid-primax.o
 obj-$(CONFIG_HID_REDRAGON)     += hid-redragon.o
 obj-$(CONFIG_HID_RETRODE)      += hid-retrode.o
@@ -138,7 +139,7 @@ obj-$(CONFIG_USB_HID)               += usbhid/
 obj-$(CONFIG_USB_MOUSE)                += usbhid/
 obj-$(CONFIG_USB_KBD)          += usbhid/
 
-obj-$(CONFIG_I2C_HID)          += i2c-hid/
+obj-$(CONFIG_I2C_HID_CORE)     += i2c-hid/
 
 obj-$(CONFIG_INTEL_ISH_HID)    += intel-ish-hid/
 obj-$(INTEL_ISH_FIRMWARE_DOWNLOADER)   += intel-ish-hid/
index 3f0ed6a..ca556d3 100644 (file)
 
 #include "hid-ids.h"
 
+#define CH_WIRELESS_CTL_REPORT_ID      0x11
+
+static int ch_report_wireless(struct hid_report *report, u8 *data, int size)
+{
+       struct hid_device *hdev = report->device;
+       struct input_dev *input;
+
+       if (report->id != CH_WIRELESS_CTL_REPORT_ID || report->maxfield != 1)
+               return 0;
+
+       input = report->field[0]->hidinput->input;
+       if (!input) {
+               hid_warn(hdev, "can't find wireless radio control's input");
+               return 0;
+       }
+
+       input_report_key(input, KEY_RFKILL, 1);
+       input_sync(input);
+       input_report_key(input, KEY_RFKILL, 0);
+       input_sync(input);
+
+       return 1;
+}
+
+static int ch_raw_event(struct hid_device *hdev,
+               struct hid_report *report, u8 *data, int size)
+{
+       if (report->application == HID_GD_WIRELESS_RADIO_CTLS)
+               return ch_report_wireless(report, data, size);
+
+       return 0;
+}
+
 #define ch_map_key_clear(c)    hid_map_usage_clear(hi, usage, bit, max, \
                                        EV_KEY, (c))
 static int ch_input_mapping(struct hid_device *hdev, struct hid_input *hi,
@@ -77,10 +110,30 @@ static __u8 *ch_switch12_report_fixup(struct hid_device *hdev, __u8 *rdesc,
        return rdesc;
 }
 
+static int ch_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+       int ret;
+
+       hdev->quirks |= HID_QUIRK_INPUT_PER_APP;
+       ret = hid_parse(hdev);
+       if (ret) {
+               hid_err(hdev, "Chicony hid parse failed: %d\n", ret);
+               return ret;
+       }
+
+       ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+       if (ret) {
+               hid_err(hdev, "Chicony hw start failed: %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
 
 static const struct hid_device_id ch_devices[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_TACTICAL_PAD) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS2) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_WIRELESS3) },
        { HID_USB_DEVICE(USB_VENDOR_ID_CHICONY, USB_DEVICE_ID_CHICONY_ACER_SWITCH12) },
        { }
 };
@@ -91,6 +144,8 @@ static struct hid_driver ch_driver = {
        .id_table = ch_devices,
        .report_fixup = ch_switch12_report_fixup,
        .input_mapping = ch_input_mapping,
+       .probe = ch_probe,
+       .raw_event = ch_raw_event,
 };
 module_hid_driver(ch_driver);
 
index 56172fe..097cb1e 100644 (file)
@@ -90,7 +90,7 @@ EXPORT_SYMBOL_GPL(hid_register_report);
  * Register a new field for this report.
  */
 
-static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages, unsigned values)
+static struct hid_field *hid_register_field(struct hid_report *report, unsigned usages)
 {
        struct hid_field *field;
 
@@ -101,7 +101,7 @@ static struct hid_field *hid_register_field(struct hid_report *report, unsigned
 
        field = kzalloc((sizeof(struct hid_field) +
                         usages * sizeof(struct hid_usage) +
-                        values * sizeof(unsigned)), GFP_KERNEL);
+                        usages * sizeof(unsigned)), GFP_KERNEL);
        if (!field)
                return NULL;
 
@@ -300,7 +300,7 @@ static int hid_add_field(struct hid_parser *parser, unsigned report_type, unsign
        usages = max_t(unsigned, parser->local.usage_index,
                                 parser->global.report_count);
 
-       field = hid_register_field(report, usages, parser->global.report_count);
+       field = hid_register_field(report, usages);
        if (!field)
                return 0;
 
@@ -1307,6 +1307,9 @@ EXPORT_SYMBOL_GPL(hid_open_report);
 
 static s32 snto32(__u32 value, unsigned n)
 {
+       if (!value || !n)
+               return 0;
+
        switch (n) {
        case 8:  return ((__s8)value);
        case 16: return ((__s16)value);
index 85a054f..d931962 100644 (file)
@@ -392,30 +392,34 @@ static int hammer_input_mapping(struct hid_device *hdev, struct hid_input *hi,
        return 0;
 }
 
-static int hammer_event(struct hid_device *hid, struct hid_field *field,
-                       struct hid_usage *usage, __s32 value)
+static void hammer_folded_event(struct hid_device *hdev, bool folded)
 {
        unsigned long flags;
 
-       if (usage->hid == HID_USAGE_KBD_FOLDED) {
-               spin_lock_irqsave(&cbas_ec_lock, flags);
+       spin_lock_irqsave(&cbas_ec_lock, flags);
 
-               /*
-                * If we are getting events from Whiskers that means that it
-                * is attached to the lid.
-                */
-               cbas_ec.base_present = true;
-               cbas_ec.base_folded = value;
-               hid_dbg(hid, "%s: base: %d, folded: %d\n", __func__,
-                       cbas_ec.base_present, cbas_ec.base_folded);
-
-               if (cbas_ec.input) {
-                       input_report_switch(cbas_ec.input,
-                                           SW_TABLET_MODE, value);
-                       input_sync(cbas_ec.input);
-               }
+       /*
+        * If we are getting events from Whiskers that means that it
+        * is attached to the lid.
+        */
+       cbas_ec.base_present = true;
+       cbas_ec.base_folded = folded;
+       hid_dbg(hdev, "%s: base: %d, folded: %d\n", __func__,
+               cbas_ec.base_present, cbas_ec.base_folded);
 
-               spin_unlock_irqrestore(&cbas_ec_lock, flags);
+       if (cbas_ec.input) {
+               input_report_switch(cbas_ec.input, SW_TABLET_MODE, folded);
+               input_sync(cbas_ec.input);
+       }
+
+       spin_unlock_irqrestore(&cbas_ec_lock, flags);
+}
+
+static int hammer_event(struct hid_device *hid, struct hid_field *field,
+                       struct hid_usage *usage, __s32 value)
+{
+       if (usage->hid == HID_USAGE_KBD_FOLDED) {
+               hammer_folded_event(hid, value);
                return 1; /* We handled this event */
        }
 
@@ -457,6 +461,47 @@ static bool hammer_has_backlight_control(struct hid_device *hdev)
                                HID_GD_KEYBOARD, HID_AD_BRIGHTNESS);
 }
 
+static void hammer_get_folded_state(struct hid_device *hdev)
+{
+       struct hid_report *report;
+       char *buf;
+       int len, rlen;
+       int a;
+
+       report = hdev->report_enum[HID_INPUT_REPORT].report_id_hash[0x0];
+
+       if (!report || report->maxfield < 1)
+               return;
+
+       len = hid_report_len(report) + 1;
+
+       buf = kmalloc(len, GFP_KERNEL);
+       if (!buf)
+               return;
+
+       rlen = hid_hw_raw_request(hdev, report->id, buf, len, report->type, HID_REQ_GET_REPORT);
+
+       if (rlen != len) {
+               hid_warn(hdev, "Unable to read base folded state: %d (expected %d)\n", rlen, len);
+               goto out;
+       }
+
+       for (a = 0; a < report->maxfield; a++) {
+               struct hid_field *field = report->field[a];
+
+               if (field->usage->hid == HID_USAGE_KBD_FOLDED) {
+                       u32 value = hid_field_extract(hdev, buf+1,
+                                       field->report_offset, field->report_size);
+
+                       hammer_folded_event(hdev, value);
+                       break;
+               }
+       }
+
+out:
+       kfree(buf);
+}
+
 static int hammer_probe(struct hid_device *hdev,
                        const struct hid_device_id *id)
 {
@@ -481,6 +526,8 @@ static int hammer_probe(struct hid_device *hdev,
                error = hid_hw_open(hdev);
                if (error)
                        return error;
+
+               hammer_get_folded_state(hdev);
        }
 
        if (hammer_has_backlight_control(hdev)) {
index 5ba0aa1..e42aaae 100644 (file)
@@ -40,6 +40,9 @@
 #define USB_VENDOR_ID_ACTIONSTAR       0x2101
 #define USB_DEVICE_ID_ACTIONSTAR_1011  0x1011
 
+#define USB_VENDOR_ID_ACTIVISION       0x1430
+#define USB_DEVICE_ID_ACTIVISION_GUITAR_DONGLE 0x474c
+
 #define USB_VENDOR_ID_ADS_TECH         0x06e1
 #define USB_DEVICE_ID_ADS_TECH_RADIO_SI470X    0xa155
 
 #define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE 0x1053
 #define USB_DEVICE_ID_CHICONY_PIXART_USB_OPTICAL_MOUSE2        0x0939
 #define USB_DEVICE_ID_CHICONY_WIRELESS2        0x1123
+#define USB_DEVICE_ID_CHICONY_WIRELESS3        0x1236
 #define USB_DEVICE_ID_ASUS_AK1D                0x1125
 #define USB_DEVICE_ID_CHICONY_TOSHIBA_WT10A    0x1408
 #define USB_DEVICE_ID_CHICONY_ACER_SWITCH12    0x1421
 #define USB_DEVICE_ID_TOSHIBA_CLICK_L9W        0x0401
 #define USB_DEVICE_ID_HP_X2            0x074d
 #define USB_DEVICE_ID_HP_X2_10_COVER   0x0755
+#define I2C_DEVICE_ID_HP_SPECTRE_X360_15       0x2817
 #define USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN   0x2706
 
 #define USB_VENDOR_ID_ELECOM           0x056e
 #define USB_DEVICE_ID_INNEX_GENESIS_ATARI      0x4745
 
 #define USB_VENDOR_ID_ITE               0x048d
+#define I2C_VENDOR_ID_ITE              0x103c
+#define I2C_DEVICE_ID_ITE_VOYO_WINPAD_A15      0x184f
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA   0x8386
 #define USB_DEVICE_ID_ITE_LENOVO_YOGA2  0x8350
 #define I2C_DEVICE_ID_ITE_LENOVO_LEGION_Y720   0x837a
 #define USB_DEVICE_ID_SONY_PS4_CONTROLLER      0x05c4
 #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_2    0x09cc
 #define USB_DEVICE_ID_SONY_PS4_CONTROLLER_DONGLE       0x0ba0
+#define USB_DEVICE_ID_SONY_PS5_CONTROLLER      0x0ce6
 #define USB_DEVICE_ID_SONY_MOTION_CONTROLLER   0x03d5
 #define USB_DEVICE_ID_SONY_NAVIGATION_CONTROLLER       0x042f
 #define USB_DEVICE_ID_SONY_BUZZ_CONTROLLER             0x0002
 #define USB_DEVICE_ID_SONY_WIRELESS_BUZZ_CONTROLLER    0x1000
 
-#define USB_VENDOR_ID_SONY_GHLIVE                      0x12ba
+#define USB_VENDOR_ID_SONY_RHYTHM      0x12ba
 #define USB_DEVICE_ID_SONY_PS3WIIU_GHLIVE_DONGLE       0x074b
+#define USB_DEVICE_ID_SONY_PS3_GUITAR_DONGLE   0x0100
 
 #define USB_VENDOR_ID_SINO_LITE                        0x1345
 #define USB_DEVICE_ID_SINO_LITE_CONTROLLER     0x3008
index f23027d..236bccd 100644 (file)
@@ -324,6 +324,8 @@ static const struct hid_device_id hid_battery_quirks[] = {
          HID_BATTERY_QUIRK_IGNORE },
        { HID_USB_DEVICE(USB_VENDOR_ID_ELAN, USB_DEVICE_ID_ASUS_UX550_TOUCHSCREEN),
          HID_BATTERY_QUIRK_IGNORE },
+       { HID_I2C_DEVICE(USB_VENDOR_ID_ELAN, I2C_DEVICE_ID_HP_SPECTRE_X360_15),
+         HID_BATTERY_QUIRK_IGNORE },
        {}
 };
 
@@ -1854,6 +1856,16 @@ static struct hid_input *hidinput_match_application(struct hid_report *report)
        list_for_each_entry(hidinput, &hid->inputs, list) {
                if (hidinput->application == report->application)
                        return hidinput;
+
+               /*
+                * Keep SystemControl and ConsumerControl applications together
+                * with the main keyboard, if present.
+                */
+               if ((report->application == HID_GD_SYSTEM_CONTROL ||
+                    report->application == HID_CP_CONSUMER_CONTROL) &&
+                   hidinput->application == HID_GD_KEYBOARD) {
+                       return hidinput;
+               }
        }
 
        return NULL;
index 22bfbeb..14fc068 100644 (file)
@@ -23,11 +23,16 @@ static __u8 *ite_report_fixup(struct hid_device *hdev, __u8 *rdesc, unsigned int
                        hid_info(hdev, "Fixing up Acer Sw5-012 ITE keyboard report descriptor\n");
                        rdesc[163] = HID_MAIN_ITEM_RELATIVE;
                }
-               /* For Acer One S1002 keyboard-dock */
+               /* For Acer One S1002/S1003 keyboard-dock */
                if (*rsize == 188 && rdesc[185] == 0x81 && rdesc[186] == 0x02) {
-                       hid_info(hdev, "Fixing up Acer S1002 ITE keyboard report descriptor\n");
+                       hid_info(hdev, "Fixing up Acer S1002/S1003 ITE keyboard report descriptor\n");
                        rdesc[186] = HID_MAIN_ITEM_RELATIVE;
                }
+               /* For Acer Aspire Switch 10E (SW3-016) keyboard-dock */
+               if (*rsize == 210 && rdesc[184] == 0x81 && rdesc[185] == 0x02) {
+                       hid_info(hdev, "Fixing up Acer Aspire Switch 10E (SW3-016) ITE keyboard report descriptor\n");
+                       rdesc[185] = HID_MAIN_ITEM_RELATIVE;
+               }
        }
 
        return rdesc;
@@ -114,7 +119,8 @@ static const struct hid_device_id ite_devices[] = {
        /* ITE8910 USB kbd ctlr, with Synaptics touchpad connected to it. */
        { HID_DEVICE(BUS_USB, HID_GROUP_GENERIC,
                     USB_VENDOR_ID_SYNAPTICS,
-                    USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003) },
+                    USB_DEVICE_ID_SYNAPTICS_ACER_ONE_S1003),
+         .driver_data = QUIRK_TOUCHPAD_ON_OFF_REPORT },
        { }
 };
 MODULE_DEVICE_TABLE(hid, ite_devices);
index fcaf846..bfbba0d 100644 (file)
@@ -647,7 +647,7 @@ static void lg_g15_input_close(struct input_dev *dev)
 
 static int lg_g15_register_led(struct lg_g15_data *g15, int i)
 {
-       const char * const led_names[] = {
+       static const char * const led_names[] = {
                "g15::kbd_backlight",
                "g15::lcd_backlight",
                "g15::macro_preset1",
index 45e7e0b..271bd8d 100644 (file)
@@ -980,6 +980,7 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
        case 0x07:
                device_type = "eQUAD step 4 Gaming";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
+               workitem.reports_supported |= STD_KEYBOARD;
                break;
        case 0x08:
                device_type = "eQUAD step 4 for gamepads";
@@ -994,7 +995,12 @@ static void logi_hidpp_recv_queue_notif(struct hid_device *hdev,
                workitem.reports_supported |= STD_KEYBOARD;
                break;
        case 0x0d:
-               device_type = "eQUAD Lightspeed 1_1";
+               device_type = "eQUAD Lightspeed 1.1";
+               logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
+               workitem.reports_supported |= STD_KEYBOARD;
+               break;
+       case 0x0f:
+               device_type = "eQUAD Lightspeed 1.2";
                logi_hidpp_dev_conn_notif_equad(hdev, hidpp_report, &workitem);
                workitem.reports_supported |= STD_KEYBOARD;
                break;
index 7eb9a6d..d459e2d 100644 (file)
@@ -92,6 +92,8 @@ MODULE_PARM_DESC(disable_tap_to_click,
 #define HIDPP_CAPABILITY_BATTERY_MILEAGE       BIT(2)
 #define HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS  BIT(3)
 #define HIDPP_CAPABILITY_BATTERY_VOLTAGE       BIT(4)
+#define HIDPP_CAPABILITY_BATTERY_PERCENTAGE    BIT(5)
+#define HIDPP_CAPABILITY_UNIFIED_BATTERY       BIT(6)
 
 #define lg_map_key_clear(c)  hid_map_usage_clear(hi, usage, bit, max, EV_KEY, (c))
 
@@ -152,6 +154,7 @@ struct hidpp_battery {
        int voltage;
        int charge_type;
        bool online;
+       u8 supported_levels_1004;
 };
 
 /**
@@ -1171,7 +1174,7 @@ static int hidpp20_batterylevel_get_battery_info(struct hidpp_device *hidpp,
        return 0;
 }
 
-static int hidpp20_query_battery_info(struct hidpp_device *hidpp)
+static int hidpp20_query_battery_info_1000(struct hidpp_device *hidpp)
 {
        u8 feature_type;
        int ret;
@@ -1208,7 +1211,7 @@ static int hidpp20_query_battery_info(struct hidpp_device *hidpp)
        return 0;
 }
 
-static int hidpp20_battery_event(struct hidpp_device *hidpp,
+static int hidpp20_battery_event_1000(struct hidpp_device *hidpp,
                                 u8 *data, int size)
 {
        struct hidpp_report *report = (struct hidpp_report *)data;
@@ -1380,6 +1383,224 @@ static int hidpp20_battery_voltage_event(struct hidpp_device *hidpp,
        return 0;
 }
 
+/* -------------------------------------------------------------------------- */
+/* 0x1004: Unified battery                                                    */
+/* -------------------------------------------------------------------------- */
+
+#define HIDPP_PAGE_UNIFIED_BATTERY                             0x1004
+
+#define CMD_UNIFIED_BATTERY_GET_CAPABILITIES                   0x00
+#define CMD_UNIFIED_BATTERY_GET_STATUS                         0x10
+
+#define EVENT_UNIFIED_BATTERY_STATUS_EVENT                     0x00
+
+#define FLAG_UNIFIED_BATTERY_LEVEL_CRITICAL                    BIT(0)
+#define FLAG_UNIFIED_BATTERY_LEVEL_LOW                         BIT(1)
+#define FLAG_UNIFIED_BATTERY_LEVEL_GOOD                                BIT(2)
+#define FLAG_UNIFIED_BATTERY_LEVEL_FULL                                BIT(3)
+
+#define FLAG_UNIFIED_BATTERY_FLAGS_RECHARGEABLE                        BIT(0)
+#define FLAG_UNIFIED_BATTERY_FLAGS_STATE_OF_CHARGE             BIT(1)
+
+static int hidpp20_unifiedbattery_get_capabilities(struct hidpp_device *hidpp,
+                                                  u8 feature_index)
+{
+       struct hidpp_report response;
+       int ret;
+       u8 *params = (u8 *)response.fap.params;
+
+       if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS ||
+           hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) {
+               /* we have already set the device capabilities, so let's skip */
+               return 0;
+       }
+
+       ret = hidpp_send_fap_command_sync(hidpp, feature_index,
+                                         CMD_UNIFIED_BATTERY_GET_CAPABILITIES,
+                                         NULL, 0, &response);
+       /* Ignore these intermittent errors */
+       if (ret == HIDPP_ERROR_RESOURCE_ERROR)
+               return -EIO;
+       if (ret > 0) {
+               hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
+                       __func__, ret);
+               return -EPROTO;
+       }
+       if (ret)
+               return ret;
+
+       /*
+        * If the device supports state of charge (battery percentage) we won't
+        * export the battery level information. there are 4 possible battery
+        * levels and they all are optional, this means that the device might
+        * not support any of them, we are just better off with the battery
+        * percentage.
+        */
+       if (params[1] & FLAG_UNIFIED_BATTERY_FLAGS_STATE_OF_CHARGE) {
+               hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_PERCENTAGE;
+               hidpp->battery.supported_levels_1004 = 0;
+       } else {
+               hidpp->capabilities |= HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS;
+               hidpp->battery.supported_levels_1004 = params[0];
+       }
+
+       return 0;
+}
+
+static int hidpp20_unifiedbattery_map_status(struct hidpp_device *hidpp,
+                                            u8 charging_status,
+                                            u8 external_power_status)
+{
+       int status;
+
+       switch (charging_status) {
+               case 0: /* discharging */
+                       status = POWER_SUPPLY_STATUS_DISCHARGING;
+                       break;
+               case 1: /* charging */
+               case 2: /* charging slow */
+                       status = POWER_SUPPLY_STATUS_CHARGING;
+                       break;
+               case 3: /* complete */
+                       status = POWER_SUPPLY_STATUS_FULL;
+                       break;
+               case 4: /* error */
+                       status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+                       hid_info(hidpp->hid_dev, "%s: charging error",
+                                hidpp->name);
+                       break;
+               default:
+                       status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+                       break;
+       }
+
+       return status;
+}
+
+static int hidpp20_unifiedbattery_map_level(struct hidpp_device *hidpp,
+                                           u8 battery_level)
+{
+       /* cler unsupported level bits */
+       battery_level &= hidpp->battery.supported_levels_1004;
+
+       if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_FULL)
+               return POWER_SUPPLY_CAPACITY_LEVEL_FULL;
+       else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_GOOD)
+               return POWER_SUPPLY_CAPACITY_LEVEL_NORMAL;
+       else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_LOW)
+               return POWER_SUPPLY_CAPACITY_LEVEL_LOW;
+       else if (battery_level & FLAG_UNIFIED_BATTERY_LEVEL_CRITICAL)
+               return POWER_SUPPLY_CAPACITY_LEVEL_CRITICAL;
+
+       return POWER_SUPPLY_CAPACITY_LEVEL_UNKNOWN;
+}
+
+static int hidpp20_unifiedbattery_get_status(struct hidpp_device *hidpp,
+                                            u8 feature_index,
+                                            u8 *state_of_charge,
+                                            int *status,
+                                            int *level)
+{
+       struct hidpp_report response;
+       int ret;
+       u8 *params = (u8 *)response.fap.params;
+
+       ret = hidpp_send_fap_command_sync(hidpp, feature_index,
+                                         CMD_UNIFIED_BATTERY_GET_STATUS,
+                                         NULL, 0, &response);
+       /* Ignore these intermittent errors */
+       if (ret == HIDPP_ERROR_RESOURCE_ERROR)
+               return -EIO;
+       if (ret > 0) {
+               hid_err(hidpp->hid_dev, "%s: received protocol error 0x%02x\n",
+                       __func__, ret);
+               return -EPROTO;
+       }
+       if (ret)
+               return ret;
+
+       *state_of_charge = params[0];
+       *status = hidpp20_unifiedbattery_map_status(hidpp, params[2], params[3]);
+       *level = hidpp20_unifiedbattery_map_level(hidpp, params[1]);
+
+       return 0;
+}
+
+static int hidpp20_query_battery_info_1004(struct hidpp_device *hidpp)
+{
+       u8 feature_type;
+       int ret;
+       u8 state_of_charge;
+       int status, level;
+
+       if (hidpp->battery.feature_index == 0xff) {
+               ret = hidpp_root_get_feature(hidpp,
+                                            HIDPP_PAGE_UNIFIED_BATTERY,
+                                            &hidpp->battery.feature_index,
+                                            &feature_type);
+               if (ret)
+                       return ret;
+       }
+
+       ret = hidpp20_unifiedbattery_get_capabilities(hidpp,
+                                       hidpp->battery.feature_index);
+       if (ret)
+               return ret;
+
+       ret = hidpp20_unifiedbattery_get_status(hidpp,
+                                               hidpp->battery.feature_index,
+                                               &state_of_charge,
+                                               &status,
+                                               &level);
+       if (ret)
+               return ret;
+
+       hidpp->capabilities |= HIDPP_CAPABILITY_UNIFIED_BATTERY;
+       hidpp->battery.capacity = state_of_charge;
+       hidpp->battery.status = status;
+       hidpp->battery.level = level;
+       hidpp->battery.online = true;
+
+       return 0;
+}
+
+static int hidpp20_battery_event_1004(struct hidpp_device *hidpp,
+                                u8 *data, int size)
+{
+       struct hidpp_report *report = (struct hidpp_report *)data;
+       u8 *params = (u8 *)report->fap.params;
+       int state_of_charge, status, level;
+       bool changed;
+
+       if (report->fap.feature_index != hidpp->battery.feature_index ||
+           report->fap.funcindex_clientid != EVENT_UNIFIED_BATTERY_STATUS_EVENT)
+               return 0;
+
+       state_of_charge = params[0];
+       status = hidpp20_unifiedbattery_map_status(hidpp, params[2], params[3]);
+       level = hidpp20_unifiedbattery_map_level(hidpp, params[1]);
+
+       changed = status != hidpp->battery.status ||
+                 (state_of_charge != hidpp->battery.capacity &&
+                  hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE) ||
+                 (level != hidpp->battery.level &&
+                  hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_LEVEL_STATUS);
+
+       if (changed) {
+               hidpp->battery.capacity = state_of_charge;
+               hidpp->battery.status = status;
+               hidpp->battery.level = level;
+               if (hidpp->battery.ps)
+                       power_supply_changed(hidpp->battery.ps);
+       }
+
+       return 0;
+}
+
+/* -------------------------------------------------------------------------- */
+/* Battery feature helpers                                                    */
+/* -------------------------------------------------------------------------- */
+
 static enum power_supply_property hidpp_battery_props[] = {
        POWER_SUPPLY_PROP_ONLINE,
        POWER_SUPPLY_PROP_STATUS,
@@ -3307,7 +3528,10 @@ static int hidpp_raw_hidpp_event(struct hidpp_device *hidpp, u8 *data,
        }
 
        if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) {
-               ret = hidpp20_battery_event(hidpp, data, size);
+               ret = hidpp20_battery_event_1000(hidpp, data, size);
+               if (ret != 0)
+                       return ret;
+               ret = hidpp20_battery_event_1004(hidpp, data, size);
                if (ret != 0)
                        return ret;
                ret = hidpp_solar_battery_event(hidpp, data, size);
@@ -3443,9 +3667,14 @@ static int hidpp_initialize_battery(struct hidpp_device *hidpp)
                if (hidpp->quirks & HIDPP_QUIRK_CLASS_K750)
                        ret = hidpp_solar_request_battery_event(hidpp);
                else {
-                       ret = hidpp20_query_battery_voltage_info(hidpp);
+                       /* we only support one battery feature right now, so let's
+                          first check the ones that support battery level first
+                          and leave voltage for last */
+                       ret = hidpp20_query_battery_info_1000(hidpp);
+                       if (ret)
+                               ret = hidpp20_query_battery_info_1004(hidpp);
                        if (ret)
-                               ret = hidpp20_query_battery_info(hidpp);
+                               ret = hidpp20_query_battery_voltage_info(hidpp);
                }
 
                if (ret)
@@ -3473,7 +3702,8 @@ static int hidpp_initialize_battery(struct hidpp_device *hidpp)
 
        num_battery_props = ARRAY_SIZE(hidpp_battery_props) - 3;
 
-       if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE)
+       if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_MILEAGE ||
+           hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_PERCENTAGE)
                battery_props[num_battery_props++] =
                                POWER_SUPPLY_PROP_CAPACITY;
 
@@ -3650,8 +3880,10 @@ static void hidpp_connect_event(struct hidpp_device *hidpp)
        } else if (hidpp->capabilities & HIDPP_CAPABILITY_HIDPP20_BATTERY) {
                if (hidpp->capabilities & HIDPP_CAPABILITY_BATTERY_VOLTAGE)
                        hidpp20_query_battery_voltage_info(hidpp);
+               else if (hidpp->capabilities & HIDPP_CAPABILITY_UNIFIED_BATTERY)
+                       hidpp20_query_battery_info_1004(hidpp);
                else
-                       hidpp20_query_battery_info(hidpp);
+                       hidpp20_query_battery_info_1000(hidpp);
        }
        if (hidpp->battery.ps)
                power_supply_changed(hidpp->battery.ps);
index 8429ebe..9d9f3e1 100644 (file)
@@ -1747,6 +1747,13 @@ static int mt_probe(struct hid_device *hdev, const struct hid_device_id *id)
 }
 
 #ifdef CONFIG_PM
+static int mt_suspend(struct hid_device *hdev, pm_message_t state)
+{
+       /* High latency is desirable for power savings during S3/S0ix */
+       mt_set_modes(hdev, HID_LATENCY_HIGH, true, true);
+       return 0;
+}
+
 static int mt_reset_resume(struct hid_device *hdev)
 {
        mt_release_contacts(hdev);
@@ -1762,6 +1769,8 @@ static int mt_resume(struct hid_device *hdev)
 
        hid_hw_idle(hdev, 0, 0, HID_REQ_SET_IDLE);
 
+       mt_set_modes(hdev, HID_LATENCY_NORMAL, true, true);
+
        return 0;
 }
 #endif
@@ -2155,6 +2164,7 @@ static struct hid_driver mt_driver = {
        .event = mt_event,
        .report = mt_report,
 #ifdef CONFIG_PM
+       .suspend = mt_suspend,
        .reset_resume = mt_reset_resume,
        .resume = mt_resume,
 #endif
diff --git a/drivers/hid/hid-playstation.c b/drivers/hid/hid-playstation.c
new file mode 100644 (file)
index 0000000..ab7c82c
--- /dev/null
@@ -0,0 +1,1351 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ *  HID driver for Sony DualSense(TM) controller.
+ *
+ *  Copyright (c) 2020 Sony Interactive Entertainment
+ */
+
+#include <linux/bits.h>
+#include <linux/crc32.h>
+#include <linux/device.h>
+#include <linux/hid.h>
+#include <linux/idr.h>
+#include <linux/input/mt.h>
+#include <linux/module.h>
+
+#include <asm/unaligned.h>
+
+#include "hid-ids.h"
+
+/* List of connected playstation devices. */
+static DEFINE_MUTEX(ps_devices_lock);
+static LIST_HEAD(ps_devices_list);
+
+static DEFINE_IDA(ps_player_id_allocator);
+
+#define HID_PLAYSTATION_VERSION_PATCH 0x8000
+
+/* Base class for playstation devices. */
+struct ps_device {
+       struct list_head list;
+       struct hid_device *hdev;
+       spinlock_t lock;
+
+       uint32_t player_id;
+
+       struct power_supply_desc battery_desc;
+       struct power_supply *battery;
+       uint8_t battery_capacity;
+       int battery_status;
+
+       uint8_t mac_address[6]; /* Note: stored in little endian order. */
+       uint32_t hw_version;
+       uint32_t fw_version;
+
+       int (*parse_report)(struct ps_device *dev, struct hid_report *report, u8 *data, int size);
+};
+
+/* Calibration data for playstation motion sensors. */
+struct ps_calibration_data {
+       int abs_code;
+       short bias;
+       int sens_numer;
+       int sens_denom;
+};
+
+/* Seed values for DualShock4 / DualSense CRC32 for different report types. */
+#define PS_INPUT_CRC32_SEED    0xA1
+#define PS_OUTPUT_CRC32_SEED   0xA2
+#define PS_FEATURE_CRC32_SEED  0xA3
+
+#define DS_INPUT_REPORT_USB                    0x01
+#define DS_INPUT_REPORT_USB_SIZE               64
+#define DS_INPUT_REPORT_BT                     0x31
+#define DS_INPUT_REPORT_BT_SIZE                        78
+#define DS_OUTPUT_REPORT_USB                   0x02
+#define DS_OUTPUT_REPORT_USB_SIZE              63
+#define DS_OUTPUT_REPORT_BT                    0x31
+#define DS_OUTPUT_REPORT_BT_SIZE               78
+
+#define DS_FEATURE_REPORT_CALIBRATION          0x05
+#define DS_FEATURE_REPORT_CALIBRATION_SIZE     41
+#define DS_FEATURE_REPORT_PAIRING_INFO         0x09
+#define DS_FEATURE_REPORT_PAIRING_INFO_SIZE    20
+#define DS_FEATURE_REPORT_FIRMWARE_INFO                0x20
+#define DS_FEATURE_REPORT_FIRMWARE_INFO_SIZE   64
+
+/* Button masks for DualSense input report. */
+#define DS_BUTTONS0_HAT_SWITCH GENMASK(3, 0)
+#define DS_BUTTONS0_SQUARE     BIT(4)
+#define DS_BUTTONS0_CROSS      BIT(5)
+#define DS_BUTTONS0_CIRCLE     BIT(6)
+#define DS_BUTTONS0_TRIANGLE   BIT(7)
+#define DS_BUTTONS1_L1         BIT(0)
+#define DS_BUTTONS1_R1         BIT(1)
+#define DS_BUTTONS1_L2         BIT(2)
+#define DS_BUTTONS1_R2         BIT(3)
+#define DS_BUTTONS1_CREATE     BIT(4)
+#define DS_BUTTONS1_OPTIONS    BIT(5)
+#define DS_BUTTONS1_L3         BIT(6)
+#define DS_BUTTONS1_R3         BIT(7)
+#define DS_BUTTONS2_PS_HOME    BIT(0)
+#define DS_BUTTONS2_TOUCHPAD   BIT(1)
+#define DS_BUTTONS2_MIC_MUTE   BIT(2)
+
+/* Status field of DualSense input report. */
+#define DS_STATUS_BATTERY_CAPACITY     GENMASK(3, 0)
+#define DS_STATUS_CHARGING             GENMASK(7, 4)
+#define DS_STATUS_CHARGING_SHIFT       4
+
+/*
+ * Status of a DualSense touch point contact.
+ * Contact IDs, with highest bit set are 'inactive'
+ * and any associated data is then invalid.
+ */
+#define DS_TOUCH_POINT_INACTIVE BIT(7)
+
+ /* Magic value required in tag field of Bluetooth output report. */
+#define DS_OUTPUT_TAG 0x10
+/* Flags for DualSense output report. */
+#define DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION BIT(0)
+#define DS_OUTPUT_VALID_FLAG0_HAPTICS_SELECT BIT(1)
+#define DS_OUTPUT_VALID_FLAG1_MIC_MUTE_LED_CONTROL_ENABLE BIT(0)
+#define DS_OUTPUT_VALID_FLAG1_POWER_SAVE_CONTROL_ENABLE BIT(1)
+#define DS_OUTPUT_VALID_FLAG1_LIGHTBAR_CONTROL_ENABLE BIT(2)
+#define DS_OUTPUT_VALID_FLAG1_RELEASE_LEDS BIT(3)
+#define DS_OUTPUT_VALID_FLAG1_PLAYER_INDICATOR_CONTROL_ENABLE BIT(4)
+#define DS_OUTPUT_VALID_FLAG2_LIGHTBAR_SETUP_CONTROL_ENABLE BIT(1)
+#define DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE BIT(4)
+#define DS_OUTPUT_LIGHTBAR_SETUP_LIGHT_OUT BIT(1)
+
+/* DualSense hardware limits */
+#define DS_ACC_RES_PER_G       8192
+#define DS_ACC_RANGE           (4*DS_ACC_RES_PER_G)
+#define DS_GYRO_RES_PER_DEG_S  1024
+#define DS_GYRO_RANGE          (2048*DS_GYRO_RES_PER_DEG_S)
+#define DS_TOUCHPAD_WIDTH      1920
+#define DS_TOUCHPAD_HEIGHT     1080
+
+struct dualsense {
+       struct ps_device base;
+       struct input_dev *gamepad;
+       struct input_dev *sensors;
+       struct input_dev *touchpad;
+
+       /* Calibration data for accelerometer and gyroscope. */
+       struct ps_calibration_data accel_calib_data[3];
+       struct ps_calibration_data gyro_calib_data[3];
+
+       /* Timestamp for sensor data */
+       bool sensor_timestamp_initialized;
+       uint32_t prev_sensor_timestamp;
+       uint32_t sensor_timestamp_us;
+
+       /* Compatible rumble state */
+       bool update_rumble;
+       uint8_t motor_left;
+       uint8_t motor_right;
+
+       /* RGB lightbar */
+       bool update_lightbar;
+       uint8_t lightbar_red;
+       uint8_t lightbar_green;
+       uint8_t lightbar_blue;
+
+       /* Microphone */
+       bool update_mic_mute;
+       bool mic_muted;
+       bool last_btn_mic_state;
+
+       /* Player leds */
+       bool update_player_leds;
+       uint8_t player_leds_state;
+       struct led_classdev player_leds[5];
+
+       struct work_struct output_worker;
+       void *output_report_dmabuf;
+       uint8_t output_seq; /* Sequence number for output report. */
+};
+
+struct dualsense_touch_point {
+       uint8_t contact;
+       uint8_t x_lo;
+       uint8_t x_hi:4, y_lo:4;
+       uint8_t y_hi;
+} __packed;
+static_assert(sizeof(struct dualsense_touch_point) == 4);
+
+/* Main DualSense input report excluding any BT/USB specific headers. */
+struct dualsense_input_report {
+       uint8_t x, y;
+       uint8_t rx, ry;
+       uint8_t z, rz;
+       uint8_t seq_number;
+       uint8_t buttons[4];
+       uint8_t reserved[4];
+
+       /* Motion sensors */
+       __le16 gyro[3]; /* x, y, z */
+       __le16 accel[3]; /* x, y, z */
+       __le32 sensor_timestamp;
+       uint8_t reserved2;
+
+       /* Touchpad */
+       struct dualsense_touch_point points[2];
+
+       uint8_t reserved3[12];
+       uint8_t status;
+       uint8_t reserved4[10];
+} __packed;
+/* Common input report size shared equals the size of the USB report minus 1 byte for ReportID. */
+static_assert(sizeof(struct dualsense_input_report) == DS_INPUT_REPORT_USB_SIZE - 1);
+
+/* Common data between DualSense BT/USB main output report. */
+struct dualsense_output_report_common {
+       uint8_t valid_flag0;
+       uint8_t valid_flag1;
+
+       /* For DualShock 4 compatibility mode. */
+       uint8_t motor_right;
+       uint8_t motor_left;
+
+       /* Audio controls */
+       uint8_t reserved[4];
+       uint8_t mute_button_led;
+
+       uint8_t power_save_control;
+       uint8_t reserved2[28];
+
+       /* LEDs and lightbar */
+       uint8_t valid_flag2;
+       uint8_t reserved3[2];
+       uint8_t lightbar_setup;
+       uint8_t led_brightness;
+       uint8_t player_leds;
+       uint8_t lightbar_red;
+       uint8_t lightbar_green;
+       uint8_t lightbar_blue;
+} __packed;
+static_assert(sizeof(struct dualsense_output_report_common) == 47);
+
+struct dualsense_output_report_bt {
+       uint8_t report_id; /* 0x31 */
+       uint8_t seq_tag;
+       uint8_t tag;
+       struct dualsense_output_report_common common;
+       uint8_t reserved[24];
+       __le32 crc32;
+} __packed;
+static_assert(sizeof(struct dualsense_output_report_bt) == DS_OUTPUT_REPORT_BT_SIZE);
+
+struct dualsense_output_report_usb {
+       uint8_t report_id; /* 0x02 */
+       struct dualsense_output_report_common common;
+       uint8_t reserved[15];
+} __packed;
+static_assert(sizeof(struct dualsense_output_report_usb) == DS_OUTPUT_REPORT_USB_SIZE);
+
+/*
+ * The DualSense has a main output report used to control most features. It is
+ * largely the same between Bluetooth and USB except for different headers and CRC.
+ * This structure hide the differences between the two to simplify sending output reports.
+ */
+struct dualsense_output_report {
+       uint8_t *data; /* Start of data */
+       uint8_t len; /* Size of output report */
+
+       /* Points to Bluetooth data payload in case for a Bluetooth report else NULL. */
+       struct dualsense_output_report_bt *bt;
+       /* Points to USB data payload in case for a USB report else NULL. */
+       struct dualsense_output_report_usb *usb;
+       /* Points to common section of report, so past any headers. */
+       struct dualsense_output_report_common *common;
+};
+
+/*
+ * Common gamepad buttons across DualShock 3 / 4 and DualSense.
+ * Note: for device with a touchpad, touchpad button is not included
+ *        as it will be part of the touchpad device.
+ */
+static const int ps_gamepad_buttons[] = {
+       BTN_WEST, /* Square */
+       BTN_NORTH, /* Triangle */
+       BTN_EAST, /* Circle */
+       BTN_SOUTH, /* Cross */
+       BTN_TL, /* L1 */
+       BTN_TR, /* R1 */
+       BTN_TL2, /* L2 */
+       BTN_TR2, /* R2 */
+       BTN_SELECT, /* Create (PS5) / Share (PS4) */
+       BTN_START, /* Option */
+       BTN_THUMBL, /* L3 */
+       BTN_THUMBR, /* R3 */
+       BTN_MODE, /* PS Home */
+};
+
+static const struct {int x; int y; } ps_gamepad_hat_mapping[] = {
+       {0, -1}, {1, -1}, {1, 0}, {1, 1}, {0, 1}, {-1, 1}, {-1, 0}, {-1, -1},
+       {0, 0},
+};
+
+/*
+ * Add a new ps_device to ps_devices if it doesn't exist.
+ * Return error on duplicate device, which can happen if the same
+ * device is connected using both Bluetooth and USB.
+ */
+static int ps_devices_list_add(struct ps_device *dev)
+{
+       struct ps_device *entry;
+
+       mutex_lock(&ps_devices_lock);
+       list_for_each_entry(entry, &ps_devices_list, list) {
+               if (!memcmp(entry->mac_address, dev->mac_address, sizeof(dev->mac_address))) {
+                       hid_err(dev->hdev, "Duplicate device found for MAC address %pMR.\n",
+                                       dev->mac_address);
+                       mutex_unlock(&ps_devices_lock);
+                       return -EEXIST;
+               }
+       }
+
+       list_add_tail(&dev->list, &ps_devices_list);
+       mutex_unlock(&ps_devices_lock);
+       return 0;
+}
+
+static int ps_devices_list_remove(struct ps_device *dev)
+{
+       mutex_lock(&ps_devices_lock);
+       list_del(&dev->list);
+       mutex_unlock(&ps_devices_lock);
+       return 0;
+}
+
+static int ps_device_set_player_id(struct ps_device *dev)
+{
+       int ret = ida_alloc(&ps_player_id_allocator, GFP_KERNEL);
+
+       if (ret < 0)
+               return ret;
+
+       dev->player_id = ret;
+       return 0;
+}
+
+static void ps_device_release_player_id(struct ps_device *dev)
+{
+       ida_free(&ps_player_id_allocator, dev->player_id);
+
+       dev->player_id = U32_MAX;
+}
+
+static struct input_dev *ps_allocate_input_dev(struct hid_device *hdev, const char *name_suffix)
+{
+       struct input_dev *input_dev;
+
+       input_dev = devm_input_allocate_device(&hdev->dev);
+       if (!input_dev)
+               return ERR_PTR(-ENOMEM);
+
+       input_dev->id.bustype = hdev->bus;
+       input_dev->id.vendor = hdev->vendor;
+       input_dev->id.product = hdev->product;
+       input_dev->id.version = hdev->version;
+       input_dev->uniq = hdev->uniq;
+
+       if (name_suffix) {
+               input_dev->name = devm_kasprintf(&hdev->dev, GFP_KERNEL, "%s %s", hdev->name,
+                               name_suffix);
+               if (!input_dev->name)
+                       return ERR_PTR(-ENOMEM);
+       } else {
+               input_dev->name = hdev->name;
+       }
+
+       input_set_drvdata(input_dev, hdev);
+
+       return input_dev;
+}
+
+static enum power_supply_property ps_power_supply_props[] = {
+       POWER_SUPPLY_PROP_STATUS,
+       POWER_SUPPLY_PROP_PRESENT,
+       POWER_SUPPLY_PROP_CAPACITY,
+       POWER_SUPPLY_PROP_SCOPE,
+};
+
+static int ps_battery_get_property(struct power_supply *psy,
+               enum power_supply_property psp,
+               union power_supply_propval *val)
+{
+       struct ps_device *dev = power_supply_get_drvdata(psy);
+       uint8_t battery_capacity;
+       int battery_status;
+       unsigned long flags;
+       int ret = 0;
+
+       spin_lock_irqsave(&dev->lock, flags);
+       battery_capacity = dev->battery_capacity;
+       battery_status = dev->battery_status;
+       spin_unlock_irqrestore(&dev->lock, flags);
+
+       switch (psp) {
+       case POWER_SUPPLY_PROP_STATUS:
+               val->intval = battery_status;
+               break;
+       case POWER_SUPPLY_PROP_PRESENT:
+               val->intval = 1;
+               break;
+       case POWER_SUPPLY_PROP_CAPACITY:
+               val->intval = battery_capacity;
+               break;
+       case POWER_SUPPLY_PROP_SCOPE:
+               val->intval = POWER_SUPPLY_SCOPE_DEVICE;
+               break;
+       default:
+               ret = -EINVAL;
+               break;
+       }
+
+       return ret;
+}
+
+static int ps_device_register_battery(struct ps_device *dev)
+{
+       struct power_supply *battery;
+       struct power_supply_config battery_cfg = { .drv_data = dev };
+       int ret;
+
+       dev->battery_desc.type = POWER_SUPPLY_TYPE_BATTERY;
+       dev->battery_desc.properties = ps_power_supply_props;
+       dev->battery_desc.num_properties = ARRAY_SIZE(ps_power_supply_props);
+       dev->battery_desc.get_property = ps_battery_get_property;
+       dev->battery_desc.name = devm_kasprintf(&dev->hdev->dev, GFP_KERNEL,
+                       "ps-controller-battery-%pMR", dev->mac_address);
+       if (!dev->battery_desc.name)
+               return -ENOMEM;
+
+       battery = devm_power_supply_register(&dev->hdev->dev, &dev->battery_desc, &battery_cfg);
+       if (IS_ERR(battery)) {
+               ret = PTR_ERR(battery);
+               hid_err(dev->hdev, "Unable to register battery device: %d\n", ret);
+               return ret;
+       }
+       dev->battery = battery;
+
+       ret = power_supply_powers(dev->battery, &dev->hdev->dev);
+       if (ret) {
+               hid_err(dev->hdev, "Unable to activate battery device: %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+
+/* Compute crc32 of HID data and compare against expected CRC. */
+static bool ps_check_crc32(uint8_t seed, uint8_t *data, size_t len, uint32_t report_crc)
+{
+       uint32_t crc;
+
+       crc = crc32_le(0xFFFFFFFF, &seed, 1);
+       crc = ~crc32_le(crc, data, len);
+
+       return crc == report_crc;
+}
+
+static struct input_dev *ps_gamepad_create(struct hid_device *hdev,
+               int (*play_effect)(struct input_dev *, void *, struct ff_effect *))
+{
+       struct input_dev *gamepad;
+       unsigned int i;
+       int ret;
+
+       gamepad = ps_allocate_input_dev(hdev, NULL);
+       if (IS_ERR(gamepad))
+               return ERR_CAST(gamepad);
+
+       input_set_abs_params(gamepad, ABS_X, 0, 255, 0, 0);
+       input_set_abs_params(gamepad, ABS_Y, 0, 255, 0, 0);
+       input_set_abs_params(gamepad, ABS_Z, 0, 255, 0, 0);
+       input_set_abs_params(gamepad, ABS_RX, 0, 255, 0, 0);
+       input_set_abs_params(gamepad, ABS_RY, 0, 255, 0, 0);
+       input_set_abs_params(gamepad, ABS_RZ, 0, 255, 0, 0);
+
+       input_set_abs_params(gamepad, ABS_HAT0X, -1, 1, 0, 0);
+       input_set_abs_params(gamepad, ABS_HAT0Y, -1, 1, 0, 0);
+
+       for (i = 0; i < ARRAY_SIZE(ps_gamepad_buttons); i++)
+               input_set_capability(gamepad, EV_KEY, ps_gamepad_buttons[i]);
+
+#if IS_ENABLED(CONFIG_PLAYSTATION_FF)
+       if (play_effect) {
+               input_set_capability(gamepad, EV_FF, FF_RUMBLE);
+               input_ff_create_memless(gamepad, NULL, play_effect);
+       }
+#endif
+
+       ret = input_register_device(gamepad);
+       if (ret)
+               return ERR_PTR(ret);
+
+       return gamepad;
+}
+
+static int ps_get_report(struct hid_device *hdev, uint8_t report_id, uint8_t *buf, size_t size)
+{
+       int ret;
+
+       ret = hid_hw_raw_request(hdev, report_id, buf, size, HID_FEATURE_REPORT,
+                                HID_REQ_GET_REPORT);
+       if (ret < 0) {
+               hid_err(hdev, "Failed to retrieve feature with reportID %d: %d\n", report_id, ret);
+               return ret;
+       }
+
+       if (ret != size) {
+               hid_err(hdev, "Invalid byte count transferred, expected %zu got %d\n", size, ret);
+               return -EINVAL;
+       }
+
+       if (buf[0] != report_id) {
+               hid_err(hdev, "Invalid reportID received, expected %d got %d\n", report_id, buf[0]);
+               return -EINVAL;
+       }
+
+       if (hdev->bus == BUS_BLUETOOTH) {
+               /* Last 4 bytes contains crc32. */
+               uint8_t crc_offset = size - 4;
+               uint32_t report_crc = get_unaligned_le32(&buf[crc_offset]);
+
+               if (!ps_check_crc32(PS_FEATURE_CRC32_SEED, buf, crc_offset, report_crc)) {
+                       hid_err(hdev, "CRC check failed for reportID=%d\n", report_id);
+                       return -EILSEQ;
+               }
+       }
+
+       return 0;
+}
+
+static struct input_dev *ps_sensors_create(struct hid_device *hdev, int accel_range, int accel_res,
+               int gyro_range, int gyro_res)
+{
+       struct input_dev *sensors;
+       int ret;
+
+       sensors = ps_allocate_input_dev(hdev, "Motion Sensors");
+       if (IS_ERR(sensors))
+               return ERR_CAST(sensors);
+
+       __set_bit(INPUT_PROP_ACCELEROMETER, sensors->propbit);
+       __set_bit(EV_MSC, sensors->evbit);
+       __set_bit(MSC_TIMESTAMP, sensors->mscbit);
+
+       /* Accelerometer */
+       input_set_abs_params(sensors, ABS_X, -accel_range, accel_range, 16, 0);
+       input_set_abs_params(sensors, ABS_Y, -accel_range, accel_range, 16, 0);
+       input_set_abs_params(sensors, ABS_Z, -accel_range, accel_range, 16, 0);
+       input_abs_set_res(sensors, ABS_X, accel_res);
+       input_abs_set_res(sensors, ABS_Y, accel_res);
+       input_abs_set_res(sensors, ABS_Z, accel_res);
+
+       /* Gyroscope */
+       input_set_abs_params(sensors, ABS_RX, -gyro_range, gyro_range, 16, 0);
+       input_set_abs_params(sensors, ABS_RY, -gyro_range, gyro_range, 16, 0);
+       input_set_abs_params(sensors, ABS_RZ, -gyro_range, gyro_range, 16, 0);
+       input_abs_set_res(sensors, ABS_RX, gyro_res);
+       input_abs_set_res(sensors, ABS_RY, gyro_res);
+       input_abs_set_res(sensors, ABS_RZ, gyro_res);
+
+       ret = input_register_device(sensors);
+       if (ret)
+               return ERR_PTR(ret);
+
+       return sensors;
+}
+
+static struct input_dev *ps_touchpad_create(struct hid_device *hdev, int width, int height,
+               unsigned int num_contacts)
+{
+       struct input_dev *touchpad;
+       int ret;
+
+       touchpad = ps_allocate_input_dev(hdev, "Touchpad");
+       if (IS_ERR(touchpad))
+               return ERR_CAST(touchpad);
+
+       /* Map button underneath touchpad to BTN_LEFT. */
+       input_set_capability(touchpad, EV_KEY, BTN_LEFT);
+       __set_bit(INPUT_PROP_BUTTONPAD, touchpad->propbit);
+
+       input_set_abs_params(touchpad, ABS_MT_POSITION_X, 0, width - 1, 0, 0);
+       input_set_abs_params(touchpad, ABS_MT_POSITION_Y, 0, height - 1, 0, 0);
+
+       ret = input_mt_init_slots(touchpad, num_contacts, INPUT_MT_POINTER);
+       if (ret)
+               return ERR_PTR(ret);
+
+       ret = input_register_device(touchpad);
+       if (ret)
+               return ERR_PTR(ret);
+
+       return touchpad;
+}
+
+static ssize_t firmware_version_show(struct device *dev,
+                               struct device_attribute
+                               *attr, char *buf)
+{
+       struct hid_device *hdev = to_hid_device(dev);
+       struct ps_device *ps_dev = hid_get_drvdata(hdev);
+
+       return sysfs_emit(buf, "0x%08x\n", ps_dev->fw_version);
+}
+
+static DEVICE_ATTR_RO(firmware_version);
+
+static ssize_t hardware_version_show(struct device *dev,
+                               struct device_attribute
+                               *attr, char *buf)
+{
+       struct hid_device *hdev = to_hid_device(dev);
+       struct ps_device *ps_dev = hid_get_drvdata(hdev);
+
+       return sysfs_emit(buf, "0x%08x\n", ps_dev->hw_version);
+}
+
+static DEVICE_ATTR_RO(hardware_version);
+
+static struct attribute *ps_device_attributes[] = {
+       &dev_attr_firmware_version.attr,
+       &dev_attr_hardware_version.attr,
+       NULL
+};
+
+static const struct attribute_group ps_device_attribute_group = {
+       .attrs = ps_device_attributes,
+};
+
+static int dualsense_get_calibration_data(struct dualsense *ds)
+{
+       short gyro_pitch_bias, gyro_pitch_plus, gyro_pitch_minus;
+       short gyro_yaw_bias, gyro_yaw_plus, gyro_yaw_minus;
+       short gyro_roll_bias, gyro_roll_plus, gyro_roll_minus;
+       short gyro_speed_plus, gyro_speed_minus;
+       short acc_x_plus, acc_x_minus;
+       short acc_y_plus, acc_y_minus;
+       short acc_z_plus, acc_z_minus;
+       int speed_2x;
+       int range_2g;
+       int ret = 0;
+       uint8_t *buf;
+
+       buf = kzalloc(DS_FEATURE_REPORT_CALIBRATION_SIZE, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = ps_get_report(ds->base.hdev, DS_FEATURE_REPORT_CALIBRATION, buf,
+                       DS_FEATURE_REPORT_CALIBRATION_SIZE);
+       if (ret) {
+               hid_err(ds->base.hdev, "Failed to retrieve DualSense calibration info: %d\n", ret);
+               goto err_free;
+       }
+
+       gyro_pitch_bias  = get_unaligned_le16(&buf[1]);
+       gyro_yaw_bias    = get_unaligned_le16(&buf[3]);
+       gyro_roll_bias   = get_unaligned_le16(&buf[5]);
+       gyro_pitch_plus  = get_unaligned_le16(&buf[7]);
+       gyro_pitch_minus = get_unaligned_le16(&buf[9]);
+       gyro_yaw_plus    = get_unaligned_le16(&buf[11]);
+       gyro_yaw_minus   = get_unaligned_le16(&buf[13]);
+       gyro_roll_plus   = get_unaligned_le16(&buf[15]);
+       gyro_roll_minus  = get_unaligned_le16(&buf[17]);
+       gyro_speed_plus  = get_unaligned_le16(&buf[19]);
+       gyro_speed_minus = get_unaligned_le16(&buf[21]);
+       acc_x_plus       = get_unaligned_le16(&buf[23]);
+       acc_x_minus      = get_unaligned_le16(&buf[25]);
+       acc_y_plus       = get_unaligned_le16(&buf[27]);
+       acc_y_minus      = get_unaligned_le16(&buf[29]);
+       acc_z_plus       = get_unaligned_le16(&buf[31]);
+       acc_z_minus      = get_unaligned_le16(&buf[33]);
+
+       /*
+        * Set gyroscope calibration and normalization parameters.
+        * Data values will be normalized to 1/DS_GYRO_RES_PER_DEG_S degree/s.
+        */
+       speed_2x = (gyro_speed_plus + gyro_speed_minus);
+       ds->gyro_calib_data[0].abs_code = ABS_RX;
+       ds->gyro_calib_data[0].bias = gyro_pitch_bias;
+       ds->gyro_calib_data[0].sens_numer = speed_2x*DS_GYRO_RES_PER_DEG_S;
+       ds->gyro_calib_data[0].sens_denom = gyro_pitch_plus - gyro_pitch_minus;
+
+       ds->gyro_calib_data[1].abs_code = ABS_RY;
+       ds->gyro_calib_data[1].bias = gyro_yaw_bias;
+       ds->gyro_calib_data[1].sens_numer = speed_2x*DS_GYRO_RES_PER_DEG_S;
+       ds->gyro_calib_data[1].sens_denom = gyro_yaw_plus - gyro_yaw_minus;
+
+       ds->gyro_calib_data[2].abs_code = ABS_RZ;
+       ds->gyro_calib_data[2].bias = gyro_roll_bias;
+       ds->gyro_calib_data[2].sens_numer = speed_2x*DS_GYRO_RES_PER_DEG_S;
+       ds->gyro_calib_data[2].sens_denom = gyro_roll_plus - gyro_roll_minus;
+
+       /*
+        * Set accelerometer calibration and normalization parameters.
+        * Data values will be normalized to 1/DS_ACC_RES_PER_G g.
+        */
+       range_2g = acc_x_plus - acc_x_minus;
+       ds->accel_calib_data[0].abs_code = ABS_X;
+       ds->accel_calib_data[0].bias = acc_x_plus - range_2g / 2;
+       ds->accel_calib_data[0].sens_numer = 2*DS_ACC_RES_PER_G;
+       ds->accel_calib_data[0].sens_denom = range_2g;
+
+       range_2g = acc_y_plus - acc_y_minus;
+       ds->accel_calib_data[1].abs_code = ABS_Y;
+       ds->accel_calib_data[1].bias = acc_y_plus - range_2g / 2;
+       ds->accel_calib_data[1].sens_numer = 2*DS_ACC_RES_PER_G;
+       ds->accel_calib_data[1].sens_denom = range_2g;
+
+       range_2g = acc_z_plus - acc_z_minus;
+       ds->accel_calib_data[2].abs_code = ABS_Z;
+       ds->accel_calib_data[2].bias = acc_z_plus - range_2g / 2;
+       ds->accel_calib_data[2].sens_numer = 2*DS_ACC_RES_PER_G;
+       ds->accel_calib_data[2].sens_denom = range_2g;
+
+err_free:
+       kfree(buf);
+       return ret;
+}
+
+static int dualsense_get_firmware_info(struct dualsense *ds)
+{
+       uint8_t *buf;
+       int ret;
+
+       buf = kzalloc(DS_FEATURE_REPORT_FIRMWARE_INFO_SIZE, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = ps_get_report(ds->base.hdev, DS_FEATURE_REPORT_FIRMWARE_INFO, buf,
+                       DS_FEATURE_REPORT_FIRMWARE_INFO_SIZE);
+       if (ret) {
+               hid_err(ds->base.hdev, "Failed to retrieve DualSense firmware info: %d\n", ret);
+               goto err_free;
+       }
+
+       ds->base.hw_version = get_unaligned_le32(&buf[24]);
+       ds->base.fw_version = get_unaligned_le32(&buf[28]);
+
+err_free:
+       kfree(buf);
+       return ret;
+}
+
+static int dualsense_get_mac_address(struct dualsense *ds)
+{
+       uint8_t *buf;
+       int ret = 0;
+
+       buf = kzalloc(DS_FEATURE_REPORT_PAIRING_INFO_SIZE, GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       ret = ps_get_report(ds->base.hdev, DS_FEATURE_REPORT_PAIRING_INFO, buf,
+                       DS_FEATURE_REPORT_PAIRING_INFO_SIZE);
+       if (ret) {
+               hid_err(ds->base.hdev, "Failed to retrieve DualSense pairing info: %d\n", ret);
+               goto err_free;
+       }
+
+       memcpy(ds->base.mac_address, &buf[1], sizeof(ds->base.mac_address));
+
+err_free:
+       kfree(buf);
+       return ret;
+}
+
+static void dualsense_init_output_report(struct dualsense *ds, struct dualsense_output_report *rp,
+               void *buf)
+{
+       struct hid_device *hdev = ds->base.hdev;
+
+       if (hdev->bus == BUS_BLUETOOTH) {
+               struct dualsense_output_report_bt *bt = buf;
+
+               memset(bt, 0, sizeof(*bt));
+               bt->report_id = DS_OUTPUT_REPORT_BT;
+               bt->tag = DS_OUTPUT_TAG; /* Tag must be set. Exact meaning is unclear. */
+
+               /*
+                * Highest 4-bit is a sequence number, which needs to be increased
+                * every report. Lowest 4-bit is tag and can be zero for now.
+                */
+               bt->seq_tag = (ds->output_seq << 4) | 0x0;
+               if (++ds->output_seq == 16)
+                       ds->output_seq = 0;
+
+               rp->data = buf;
+               rp->len = sizeof(*bt);
+               rp->bt = bt;
+               rp->usb = NULL;
+               rp->common = &bt->common;
+       } else { /* USB */
+               struct dualsense_output_report_usb *usb = buf;
+
+               memset(usb, 0, sizeof(*usb));
+               usb->report_id = DS_OUTPUT_REPORT_USB;
+
+               rp->data = buf;
+               rp->len = sizeof(*usb);
+               rp->bt = NULL;
+               rp->usb = usb;
+               rp->common = &usb->common;
+       }
+}
+
+/*
+ * Helper function to send DualSense output reports. Applies a CRC at the end of a report
+ * for Bluetooth reports.
+ */
+static void dualsense_send_output_report(struct dualsense *ds,
+               struct dualsense_output_report *report)
+{
+       struct hid_device *hdev = ds->base.hdev;
+
+       /* Bluetooth packets need to be signed with a CRC in the last 4 bytes. */
+       if (report->bt) {
+               uint32_t crc;
+               uint8_t seed = PS_OUTPUT_CRC32_SEED;
+
+               crc = crc32_le(0xFFFFFFFF, &seed, 1);
+               crc = ~crc32_le(crc, report->data, report->len - 4);
+
+               report->bt->crc32 = cpu_to_le32(crc);
+       }
+
+       hid_hw_output_report(hdev, report->data, report->len);
+}
+
+static void dualsense_output_worker(struct work_struct *work)
+{
+       struct dualsense *ds = container_of(work, struct dualsense, output_worker);
+       struct dualsense_output_report report;
+       struct dualsense_output_report_common *common;
+       unsigned long flags;
+
+       dualsense_init_output_report(ds, &report, ds->output_report_dmabuf);
+       common = report.common;
+
+       spin_lock_irqsave(&ds->base.lock, flags);
+
+       if (ds->update_rumble) {
+               /* Select classic rumble style haptics and enable it. */
+               common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_HAPTICS_SELECT;
+               common->valid_flag0 |= DS_OUTPUT_VALID_FLAG0_COMPATIBLE_VIBRATION;
+               common->motor_left = ds->motor_left;
+               common->motor_right = ds->motor_right;
+               ds->update_rumble = false;
+       }
+
+       if (ds->update_lightbar) {
+               common->valid_flag1 |= DS_OUTPUT_VALID_FLAG1_LIGHTBAR_CONTROL_ENABLE;
+               common->lightbar_red = ds->lightbar_red;
+               common->lightbar_green = ds->lightbar_green;
+               common->lightbar_blue = ds->lightbar_blue;
+
+               ds->update_lightbar = false;
+       }
+
+       if (ds->update_player_leds) {
+               common->valid_flag1 |= DS_OUTPUT_VALID_FLAG1_PLAYER_INDICATOR_CONTROL_ENABLE;
+               common->player_leds = ds->player_leds_state;
+
+               ds->update_player_leds = false;
+       }
+
+       if (ds->update_mic_mute) {
+               common->valid_flag1 |= DS_OUTPUT_VALID_FLAG1_MIC_MUTE_LED_CONTROL_ENABLE;
+               common->mute_button_led = ds->mic_muted;
+
+               if (ds->mic_muted) {
+                       /* Disable microphone */
+                       common->valid_flag1 |= DS_OUTPUT_VALID_FLAG1_POWER_SAVE_CONTROL_ENABLE;
+                       common->power_save_control |= DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE;
+               } else {
+                       /* Enable microphone */
+                       common->valid_flag1 |= DS_OUTPUT_VALID_FLAG1_POWER_SAVE_CONTROL_ENABLE;
+                       common->power_save_control &= ~DS_OUTPUT_POWER_SAVE_CONTROL_MIC_MUTE;
+               }
+
+               ds->update_mic_mute = false;
+       }
+
+       spin_unlock_irqrestore(&ds->base.lock, flags);
+
+       dualsense_send_output_report(ds, &report);
+}
+
+static int dualsense_parse_report(struct ps_device *ps_dev, struct hid_report *report,
+               u8 *data, int size)
+{
+       struct hid_device *hdev = ps_dev->hdev;
+       struct dualsense *ds = container_of(ps_dev, struct dualsense, base);
+       struct dualsense_input_report *ds_report;
+       uint8_t battery_data, battery_capacity, charging_status, value;
+       int battery_status;
+       uint32_t sensor_timestamp;
+       bool btn_mic_state;
+       unsigned long flags;
+       int i;
+
+       /*
+        * DualSense in USB uses the full HID report for reportID 1, but
+        * Bluetooth uses a minimal HID report for reportID 1 and reports
+        * the full report using reportID 49.
+        */
+       if (hdev->bus == BUS_USB && report->id == DS_INPUT_REPORT_USB &&
+                       size == DS_INPUT_REPORT_USB_SIZE) {
+               ds_report = (struct dualsense_input_report *)&data[1];
+       } else if (hdev->bus == BUS_BLUETOOTH && report->id == DS_INPUT_REPORT_BT &&
+                       size == DS_INPUT_REPORT_BT_SIZE) {
+               /* Last 4 bytes of input report contain crc32 */
+               uint32_t report_crc = get_unaligned_le32(&data[size - 4]);
+
+               if (!ps_check_crc32(PS_INPUT_CRC32_SEED, data, size - 4, report_crc)) {
+                       hid_err(hdev, "DualSense input CRC's check failed\n");
+                       return -EILSEQ;
+               }
+
+               ds_report = (struct dualsense_input_report *)&data[2];
+       } else {
+               hid_err(hdev, "Unhandled reportID=%d\n", report->id);
+               return -1;
+       }
+
+       input_report_abs(ds->gamepad, ABS_X,  ds_report->x);
+       input_report_abs(ds->gamepad, ABS_Y,  ds_report->y);
+       input_report_abs(ds->gamepad, ABS_RX, ds_report->rx);
+       input_report_abs(ds->gamepad, ABS_RY, ds_report->ry);
+       input_report_abs(ds->gamepad, ABS_Z,  ds_report->z);
+       input_report_abs(ds->gamepad, ABS_RZ, ds_report->rz);
+
+       value = ds_report->buttons[0] & DS_BUTTONS0_HAT_SWITCH;
+       if (value >= ARRAY_SIZE(ps_gamepad_hat_mapping))
+               value = 8; /* center */
+       input_report_abs(ds->gamepad, ABS_HAT0X, ps_gamepad_hat_mapping[value].x);
+       input_report_abs(ds->gamepad, ABS_HAT0Y, ps_gamepad_hat_mapping[value].y);
+
+       input_report_key(ds->gamepad, BTN_WEST,   ds_report->buttons[0] & DS_BUTTONS0_SQUARE);
+       input_report_key(ds->gamepad, BTN_SOUTH,  ds_report->buttons[0] & DS_BUTTONS0_CROSS);
+       input_report_key(ds->gamepad, BTN_EAST,   ds_report->buttons[0] & DS_BUTTONS0_CIRCLE);
+       input_report_key(ds->gamepad, BTN_NORTH,  ds_report->buttons[0] & DS_BUTTONS0_TRIANGLE);
+       input_report_key(ds->gamepad, BTN_TL,     ds_report->buttons[1] & DS_BUTTONS1_L1);
+       input_report_key(ds->gamepad, BTN_TR,     ds_report->buttons[1] & DS_BUTTONS1_R1);
+       input_report_key(ds->gamepad, BTN_TL2,    ds_report->buttons[1] & DS_BUTTONS1_L2);
+       input_report_key(ds->gamepad, BTN_TR2,    ds_report->buttons[1] & DS_BUTTONS1_R2);
+       input_report_key(ds->gamepad, BTN_SELECT, ds_report->buttons[1] & DS_BUTTONS1_CREATE);
+       input_report_key(ds->gamepad, BTN_START,  ds_report->buttons[1] & DS_BUTTONS1_OPTIONS);
+       input_report_key(ds->gamepad, BTN_THUMBL, ds_report->buttons[1] & DS_BUTTONS1_L3);
+       input_report_key(ds->gamepad, BTN_THUMBR, ds_report->buttons[1] & DS_BUTTONS1_R3);
+       input_report_key(ds->gamepad, BTN_MODE,   ds_report->buttons[2] & DS_BUTTONS2_PS_HOME);
+       input_sync(ds->gamepad);
+
+       /*
+        * The DualSense has an internal microphone, which can be muted through a mute button
+        * on the device. The driver is expected to read the button state and program the device
+        * to mute/unmute audio at the hardware level.
+        */
+       btn_mic_state = !!(ds_report->buttons[2] & DS_BUTTONS2_MIC_MUTE);
+       if (btn_mic_state && !ds->last_btn_mic_state) {
+               spin_lock_irqsave(&ps_dev->lock, flags);
+               ds->update_mic_mute = true;
+               ds->mic_muted = !ds->mic_muted; /* toggle */
+               spin_unlock_irqrestore(&ps_dev->lock, flags);
+
+               /* Schedule updating of microphone state at hardware level. */
+               schedule_work(&ds->output_worker);
+       }
+       ds->last_btn_mic_state = btn_mic_state;
+
+       /* Parse and calibrate gyroscope data. */
+       for (i = 0; i < ARRAY_SIZE(ds_report->gyro); i++) {
+               int raw_data = (short)le16_to_cpu(ds_report->gyro[i]);
+               int calib_data = mult_frac(ds->gyro_calib_data[i].sens_numer,
+                                          raw_data - ds->gyro_calib_data[i].bias,
+                                          ds->gyro_calib_data[i].sens_denom);
+
+               input_report_abs(ds->sensors, ds->gyro_calib_data[i].abs_code, calib_data);
+       }
+
+       /* Parse and calibrate accelerometer data. */
+       for (i = 0; i < ARRAY_SIZE(ds_report->accel); i++) {
+               int raw_data = (short)le16_to_cpu(ds_report->accel[i]);
+               int calib_data = mult_frac(ds->accel_calib_data[i].sens_numer,
+                                          raw_data - ds->accel_calib_data[i].bias,
+                                          ds->accel_calib_data[i].sens_denom);
+
+               input_report_abs(ds->sensors, ds->accel_calib_data[i].abs_code, calib_data);
+       }
+
+       /* Convert timestamp (in 0.33us unit) to timestamp_us */
+       sensor_timestamp = le32_to_cpu(ds_report->sensor_timestamp);
+       if (!ds->sensor_timestamp_initialized) {
+               ds->sensor_timestamp_us = DIV_ROUND_CLOSEST(sensor_timestamp, 3);
+               ds->sensor_timestamp_initialized = true;
+       } else {
+               uint32_t delta;
+
+               if (ds->prev_sensor_timestamp > sensor_timestamp)
+                       delta = (U32_MAX - ds->prev_sensor_timestamp + sensor_timestamp + 1);
+               else
+                       delta = sensor_timestamp - ds->prev_sensor_timestamp;
+               ds->sensor_timestamp_us += DIV_ROUND_CLOSEST(delta, 3);
+       }
+       ds->prev_sensor_timestamp = sensor_timestamp;
+       input_event(ds->sensors, EV_MSC, MSC_TIMESTAMP, ds->sensor_timestamp_us);
+       input_sync(ds->sensors);
+
+       for (i = 0; i < ARRAY_SIZE(ds_report->points); i++) {
+               struct dualsense_touch_point *point = &ds_report->points[i];
+               bool active = (point->contact & DS_TOUCH_POINT_INACTIVE) ? false : true;
+
+               input_mt_slot(ds->touchpad, i);
+               input_mt_report_slot_state(ds->touchpad, MT_TOOL_FINGER, active);
+
+               if (active) {
+                       int x = (point->x_hi << 8) | point->x_lo;
+                       int y = (point->y_hi << 4) | point->y_lo;
+
+                       input_report_abs(ds->touchpad, ABS_MT_POSITION_X, x);
+                       input_report_abs(ds->touchpad, ABS_MT_POSITION_Y, y);
+               }
+       }
+       input_mt_sync_frame(ds->touchpad);
+       input_report_key(ds->touchpad, BTN_LEFT, ds_report->buttons[2] & DS_BUTTONS2_TOUCHPAD);
+       input_sync(ds->touchpad);
+
+       battery_data = ds_report->status & DS_STATUS_BATTERY_CAPACITY;
+       charging_status = (ds_report->status & DS_STATUS_CHARGING) >> DS_STATUS_CHARGING_SHIFT;
+
+       switch (charging_status) {
+       case 0x0:
+               /*
+                * Each unit of battery data corresponds to 10%
+                * 0 = 0-9%, 1 = 10-19%, .. and 10 = 100%
+                */
+               battery_capacity = min(battery_data * 10 + 5, 100);
+               battery_status = POWER_SUPPLY_STATUS_DISCHARGING;
+               break;
+       case 0x1:
+               battery_capacity = min(battery_data * 10 + 5, 100);
+               battery_status = POWER_SUPPLY_STATUS_CHARGING;
+               break;
+       case 0x2:
+               battery_capacity = 100;
+               battery_status = POWER_SUPPLY_STATUS_FULL;
+               break;
+       case 0xa: /* voltage or temperature out of range */
+       case 0xb: /* temperature error */
+               battery_capacity = 0;
+               battery_status = POWER_SUPPLY_STATUS_NOT_CHARGING;
+               break;
+       case 0xf: /* charging error */
+       default:
+               battery_capacity = 0;
+               battery_status = POWER_SUPPLY_STATUS_UNKNOWN;
+       }
+
+       spin_lock_irqsave(&ps_dev->lock, flags);
+       ps_dev->battery_capacity = battery_capacity;
+       ps_dev->battery_status = battery_status;
+       spin_unlock_irqrestore(&ps_dev->lock, flags);
+
+       return 0;
+}
+
+static int dualsense_play_effect(struct input_dev *dev, void *data, struct ff_effect *effect)
+{
+       struct hid_device *hdev = input_get_drvdata(dev);
+       struct dualsense *ds = hid_get_drvdata(hdev);
+       unsigned long flags;
+
+       if (effect->type != FF_RUMBLE)
+               return 0;
+
+       spin_lock_irqsave(&ds->base.lock, flags);
+       ds->update_rumble = true;
+       ds->motor_left = effect->u.rumble.strong_magnitude / 256;
+       ds->motor_right = effect->u.rumble.weak_magnitude / 256;
+       spin_unlock_irqrestore(&ds->base.lock, flags);
+
+       schedule_work(&ds->output_worker);
+       return 0;
+}
+
+static int dualsense_reset_leds(struct dualsense *ds)
+{
+       struct dualsense_output_report report;
+       uint8_t *buf;
+
+       buf = kzalloc(sizeof(struct dualsense_output_report_bt), GFP_KERNEL);
+       if (!buf)
+               return -ENOMEM;
+
+       dualsense_init_output_report(ds, &report, buf);
+       /*
+        * On Bluetooth the DualSense outputs an animation on the lightbar
+        * during startup and maintains a color afterwards. We need to explicitly
+        * reconfigure the lightbar before we can do any programming later on.
+        * In USB the lightbar is not on by default, but redoing the setup there
+        * doesn't hurt.
+        */
+       report.common->valid_flag2 = DS_OUTPUT_VALID_FLAG2_LIGHTBAR_SETUP_CONTROL_ENABLE;
+       report.common->lightbar_setup = DS_OUTPUT_LIGHTBAR_SETUP_LIGHT_OUT; /* Fade light out. */
+       dualsense_send_output_report(ds, &report);
+
+       kfree(buf);
+       return 0;
+}
+
+static void dualsense_set_lightbar(struct dualsense *ds, uint8_t red, uint8_t green, uint8_t blue)
+{
+       ds->update_lightbar = true;
+       ds->lightbar_red = red;
+       ds->lightbar_green = green;
+       ds->lightbar_blue = blue;
+
+       schedule_work(&ds->output_worker);
+}
+
+static void dualsense_set_player_leds(struct dualsense *ds)
+{
+       /*
+        * The DualSense controller has a row of 5 LEDs used for player ids.
+        * Behavior on the PlayStation 5 console is to center the player id
+        * across the LEDs, so e.g. player 1 would be "--x--" with x being 'on'.
+        * Follow a similar mapping here.
+        */
+       static const int player_ids[5] = {
+               BIT(2),
+               BIT(3) | BIT(1),
+               BIT(4) | BIT(2) | BIT(0),
+               BIT(4) | BIT(3) | BIT(1) | BIT(0),
+               BIT(4) | BIT(3) | BIT(2) | BIT(1) | BIT(0)
+       };
+
+       uint8_t player_id = ds->base.player_id % ARRAY_SIZE(player_ids);
+
+       ds->update_player_leds = true;
+       ds->player_leds_state = player_ids[player_id];
+       schedule_work(&ds->output_worker);
+}
+
+static struct ps_device *dualsense_create(struct hid_device *hdev)
+{
+       struct dualsense *ds;
+       struct ps_device *ps_dev;
+       uint8_t max_output_report_size;
+       int ret;
+
+       ds = devm_kzalloc(&hdev->dev, sizeof(*ds), GFP_KERNEL);
+       if (!ds)
+               return ERR_PTR(-ENOMEM);
+
+       /*
+        * Patch version to allow userspace to distinguish between
+        * hid-generic vs hid-playstation axis and button mapping.
+        */
+       hdev->version |= HID_PLAYSTATION_VERSION_PATCH;
+
+       ps_dev = &ds->base;
+       ps_dev->hdev = hdev;
+       spin_lock_init(&ps_dev->lock);
+       ps_dev->battery_capacity = 100; /* initial value until parse_report. */
+       ps_dev->battery_status = POWER_SUPPLY_STATUS_UNKNOWN;
+       ps_dev->parse_report = dualsense_parse_report;
+       INIT_WORK(&ds->output_worker, dualsense_output_worker);
+       hid_set_drvdata(hdev, ds);
+
+       max_output_report_size = sizeof(struct dualsense_output_report_bt);
+       ds->output_report_dmabuf = devm_kzalloc(&hdev->dev, max_output_report_size, GFP_KERNEL);
+       if (!ds->output_report_dmabuf)
+               return ERR_PTR(-ENOMEM);
+
+       ret = dualsense_get_mac_address(ds);
+       if (ret) {
+               hid_err(hdev, "Failed to get MAC address from DualSense\n");
+               return ERR_PTR(ret);
+       }
+       snprintf(hdev->uniq, sizeof(hdev->uniq), "%pMR", ds->base.mac_address);
+
+       ret = dualsense_get_firmware_info(ds);
+       if (ret) {
+               hid_err(hdev, "Failed to get firmware info from DualSense\n");
+               return ERR_PTR(ret);
+       }
+
+       ret = ps_devices_list_add(ps_dev);
+       if (ret)
+               return ERR_PTR(ret);
+
+       ret = dualsense_get_calibration_data(ds);
+       if (ret) {
+               hid_err(hdev, "Failed to get calibration data from DualSense\n");
+               goto err;
+       }
+
+       ds->gamepad = ps_gamepad_create(hdev, dualsense_play_effect);
+       if (IS_ERR(ds->gamepad)) {
+               ret = PTR_ERR(ds->gamepad);
+               goto err;
+       }
+
+       ds->sensors = ps_sensors_create(hdev, DS_ACC_RANGE, DS_ACC_RES_PER_G,
+                       DS_GYRO_RANGE, DS_GYRO_RES_PER_DEG_S);
+       if (IS_ERR(ds->sensors)) {
+               ret = PTR_ERR(ds->sensors);
+               goto err;
+       }
+
+       ds->touchpad = ps_touchpad_create(hdev, DS_TOUCHPAD_WIDTH, DS_TOUCHPAD_HEIGHT, 2);
+       if (IS_ERR(ds->touchpad)) {
+               ret = PTR_ERR(ds->touchpad);
+               goto err;
+       }
+
+       ret = ps_device_register_battery(ps_dev);
+       if (ret)
+               goto err;
+
+       /*
+        * The hardware may have control over the LEDs (e.g. in Bluetooth on startup).
+        * Reset the LEDs (lightbar, mute, player leds), so we can control them
+        * from software.
+        */
+       ret = dualsense_reset_leds(ds);
+       if (ret)
+               goto err;
+
+       dualsense_set_lightbar(ds, 0, 0, 128); /* blue */
+
+       ret = ps_device_set_player_id(ps_dev);
+       if (ret) {
+               hid_err(hdev, "Failed to assign player id for DualSense: %d\n", ret);
+               goto err;
+       }
+
+       /* Set player LEDs to our player id. */
+       dualsense_set_player_leds(ds);
+
+       /*
+        * Reporting hardware and firmware is important as there are frequent updates, which
+        * can change behavior.
+        */
+       hid_info(hdev, "Registered DualSense controller hw_version=0x%08x fw_version=0x%08x\n",
+                       ds->base.hw_version, ds->base.fw_version);
+
+       return &ds->base;
+
+err:
+       ps_devices_list_remove(ps_dev);
+       return ERR_PTR(ret);
+}
+
+static int ps_raw_event(struct hid_device *hdev, struct hid_report *report,
+               u8 *data, int size)
+{
+       struct ps_device *dev = hid_get_drvdata(hdev);
+
+       if (dev && dev->parse_report)
+               return dev->parse_report(dev, report, data, size);
+
+       return 0;
+}
+
+static int ps_probe(struct hid_device *hdev, const struct hid_device_id *id)
+{
+       struct ps_device *dev;
+       int ret;
+
+       ret = hid_parse(hdev);
+       if (ret) {
+               hid_err(hdev, "Parse failed\n");
+               return ret;
+       }
+
+       ret = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
+       if (ret) {
+               hid_err(hdev, "Failed to start HID device\n");
+               return ret;
+       }
+
+       ret = hid_hw_open(hdev);
+       if (ret) {
+               hid_err(hdev, "Failed to open HID device\n");
+               goto err_stop;
+       }
+
+       if (hdev->product == USB_DEVICE_ID_SONY_PS5_CONTROLLER) {
+               dev = dualsense_create(hdev);
+               if (IS_ERR(dev)) {
+                       hid_err(hdev, "Failed to create dualsense.\n");
+                       ret = PTR_ERR(dev);
+                       goto err_close;
+               }
+       }
+
+       ret = devm_device_add_group(&hdev->dev, &ps_device_attribute_group);
+       if (ret) {
+               hid_err(hdev, "Failed to register sysfs nodes.\n");
+               goto err_close;
+       }
+
+       return ret;
+
+err_close:
+       hid_hw_close(hdev);
+err_stop:
+       hid_hw_stop(hdev);
+       return ret;
+}
+
+static void ps_remove(struct hid_device *hdev)
+{
+       struct ps_device *dev = hid_get_drvdata(hdev);
+
+       ps_devices_list_remove(dev);
+       ps_device_release_player_id(dev);
+
+       hid_hw_close(hdev);
+       hid_hw_stop(hdev);
+}
+
+static const struct hid_device_id ps_devices[] = {
+       { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_SONY, USB_DEVICE_ID_SONY_PS5_CONTROLLER) },
+       { }
+};
+MODULE_DEVICE_TABLE(hid, ps_devices);
+
+static struct hid_driver ps_driver = {
+       .name           = "playstation",
+       .id_table       = ps_devices,
+       .probe          = ps_probe,
+       .remove         = ps_remove,
+       .raw_event      = ps_raw_event,
+};
+
+static int __init ps_init(void)
+{
+       return hid_register_driver(&ps_driver);
+}
+
+static void __exit ps_exit(void)
+{
+       hid_unregister_driver(&ps_driver);
+       ida_destroy(&ps_player_id_allocator);
+}
+
+module_init(ps_init);
+module_exit(ps_exit);
+
+MODULE_AUTHOR("Sony Interactive Entertainment");
+MODULE_DESCRIPTION("HID Driver for PlayStation peripherals.");
+MODULE_LICENSE("GPL");
index d9ca874..1a9daf0 100644 (file)
@@ -180,7 +180,6 @@ static const struct hid_device_id hid_quirks[] = {
        { HID_USB_DEVICE(USB_VENDOR_ID_TOUCHPACK, USB_DEVICE_ID_TOUCHPACK_RTS), HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN_8882), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_TPV, USB_DEVICE_ID_TPV_OPTICAL_TOUCHSCREEN_8883), HID_QUIRK_NOGET },
-       { HID_USB_DEVICE(USB_VENDOR_ID_TRUST, USB_DEVICE_ID_TRUST_PANORA_TABLET), HID_QUIRK_MULTI_INPUT | HID_QUIRK_HIDINPUT_FORCE },
        { HID_USB_DEVICE(USB_VENDOR_ID_TURBOX, USB_DEVICE_ID_TURBOX_KEYBOARD), HID_QUIRK_NOGET },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_KNA5), HID_QUIRK_MULTI_INPUT },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC, USB_DEVICE_ID_UCLOGIC_TABLET_TWA60), HID_QUIRK_MULTI_INPUT },
@@ -1029,7 +1028,7 @@ static DEFINE_MUTEX(dquirks_lock);
 /* Runtime ("dynamic") quirks manipulation functions */
 
 /**
- * hid_exists_dquirk: find any dynamic quirks for a HID device
+ * hid_exists_dquirk - find any dynamic quirks for a HID device
  * @hdev: the HID device to match
  *
  * Description:
@@ -1037,7 +1036,7 @@ static DEFINE_MUTEX(dquirks_lock);
  *         the pointer to the relevant struct hid_device_id if found.
  *         Must be called with a read lock held on dquirks_lock.
  *
- * Returns: NULL if no quirk found, struct hid_device_id * if found.
+ * Return: NULL if no quirk found, struct hid_device_id * if found.
  */
 static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev)
 {
@@ -1061,7 +1060,7 @@ static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev)
 
 
 /**
- * hid_modify_dquirk: add/replace a HID quirk
+ * hid_modify_dquirk - add/replace a HID quirk
  * @id: the HID device to match
  * @quirks: the unsigned long quirks value to add/replace
  *
@@ -1070,7 +1069,7 @@ static struct hid_device_id *hid_exists_dquirk(const struct hid_device *hdev)
  *         quirks value with what was provided.  Otherwise, add the quirk
  *         to the dynamic quirks list.
  *
- * Returns: 0 OK, -error on failure.
+ * Return: 0 OK, -error on failure.
  */
 static int hid_modify_dquirk(const struct hid_device_id *id,
                             const unsigned long quirks)
@@ -1122,7 +1121,7 @@ static int hid_modify_dquirk(const struct hid_device_id *id,
 }
 
 /**
- * hid_remove_all_dquirks: remove all runtime HID quirks from memory
+ * hid_remove_all_dquirks - remove all runtime HID quirks from memory
  * @bus: bus to match against. Use HID_BUS_ANY if all need to be removed.
  *
  * Description:
@@ -1146,7 +1145,10 @@ static void hid_remove_all_dquirks(__u16 bus)
 }
 
 /**
- * hid_quirks_init: apply HID quirks specified at module load time
+ * hid_quirks_init - apply HID quirks specified at module load time
+ * @quirks_param: array of quirks strings (vendor:product:quirks)
+ * @bus: bus type
+ * @count: number of quirks to check
  */
 int hid_quirks_init(char **quirks_param, __u16 bus, int count)
 {
@@ -1177,7 +1179,7 @@ int hid_quirks_init(char **quirks_param, __u16 bus, int count)
 EXPORT_SYMBOL_GPL(hid_quirks_init);
 
 /**
- * hid_quirks_exit: release memory associated with dynamic_quirks
+ * hid_quirks_exit - release memory associated with dynamic_quirks
  * @bus: a bus to match against
  *
  * Description:
@@ -1194,14 +1196,14 @@ void hid_quirks_exit(__u16 bus)
 EXPORT_SYMBOL_GPL(hid_quirks_exit);
 
 /**
- * hid_gets_squirk: return any static quirks for a HID device
+ * hid_gets_squirk - return any static quirks for a HID device
  * @hdev: the HID device to match
  *
  * Description:
  *     Given a HID device, return a pointer to the quirked hid_device_id entry
  *     associated with that device.
  *
- * Returns: the quirks.
+ * Return: the quirks.
  */
 static unsigned long hid_gets_squirk(const struct hid_device *hdev)
 {
@@ -1225,13 +1227,13 @@ static unsigned long hid_gets_squirk(const struct hid_device *hdev)
 }
 
 /**
- * hid_lookup_quirk: return any quirks associated with a HID device
+ * hid_lookup_quirk - return any quirks associated with a HID device
  * @hdev: the HID device to look for
  *
  * Description:
  *     Given a HID device, return any quirks associated with that device.
  *
- * Returns: an unsigned long quirks value.
+ * Return: an unsigned long quirks value.
  */
 unsigned long hid_lookup_quirk(const struct hid_device *hdev)
 {
index ffcd444..4556d2a 100644 (file)
@@ -42,7 +42,7 @@ static ssize_t arvo_sysfs_show_mode_key(struct device *dev,
        if (retval)
                return retval;
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", temp_buf.state);
+       return sysfs_emit(buf, "%d\n", temp_buf.state);
 }
 
 static ssize_t arvo_sysfs_set_mode_key(struct device *dev,
@@ -92,7 +92,7 @@ static ssize_t arvo_sysfs_show_key_mask(struct device *dev,
        if (retval)
                return retval;
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", temp_buf.key_mask);
+       return sysfs_emit(buf, "%d\n", temp_buf.key_mask);
 }
 
 static ssize_t arvo_sysfs_set_key_mask(struct device *dev,
@@ -146,7 +146,7 @@ static ssize_t arvo_sysfs_show_actual_profile(struct device *dev,
        struct arvo_device *arvo =
                        hid_get_drvdata(dev_get_drvdata(dev->parent->parent));
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", arvo->actual_profile);
+       return sysfs_emit(buf, "%d\n", arvo->actual_profile);
 }
 
 static ssize_t arvo_sysfs_set_actual_profile(struct device *dev,
index e3a557d..8319b0c 100644 (file)
@@ -12,6 +12,7 @@
  *  Copyright (c) 2014-2016 Frank Praznik <frank.praznik@gmail.com>
  *  Copyright (c) 2018 Todd Kelner
  *  Copyright (c) 2020 Pascal Giard <pascal.giard@etsmtl.ca>
+ *  Copyright (c) 2020 Sanjay Govind <sanjay.govind9@gmail.com>
  */
 
 /*
@@ -59,7 +60,8 @@
 #define NSG_MR5U_REMOTE_BT        BIT(14)
 #define NSG_MR7U_REMOTE_BT        BIT(15)
 #define SHANWAN_GAMEPAD           BIT(16)
-#define GHL_GUITAR_PS3WIIU        BIT(17)
+#define GH_GUITAR_CONTROLLER      BIT(17)
+#define GHL_GUITAR_PS3WIIU        BIT(18)
 
 #define SIXAXIS_CONTROLLER (SIXAXIS_CONTROLLER_USB | SIXAXIS_CONTROLLER_BT)
 #define MOTION_CONTROLLER (MOTION_CONTROLLER_USB | MOTION_CONTROLLER_BT)
@@ -84,7 +86,7 @@
 #define NSG_MRXU_MAX_Y 1868
 
 #define GHL_GUITAR_POKE_INTERVAL 10 /* In seconds */
-#define GHL_GUITAR_TILT_USAGE 44
+#define GUITAR_TILT_USAGE 44
 
 /* Magic value and data taken from GHLtarUtility:
  * https://github.com/ghlre/GHLtarUtility/blob/master/PS3Guitar.cs
@@ -692,7 +694,7 @@ static int guitar_mapping(struct hid_device *hdev, struct hid_input *hi,
        if ((usage->hid & HID_USAGE_PAGE) == HID_UP_MSVENDOR) {
                unsigned int abs = usage->hid & HID_USAGE;
 
-               if (abs == GHL_GUITAR_TILT_USAGE) {
+               if (abs == GUITAR_TILT_USAGE) {
                        hid_map_usage_clear(hi, usage, bit, max, EV_ABS, ABS_RY);
                        return 1;
                }
@@ -1481,7 +1483,7 @@ static int sony_mapping(struct hid_device *hdev, struct hid_input *hi,
        if (sc->quirks & DUALSHOCK4_CONTROLLER)
                return ds4_mapping(hdev, hi, field, usage, bit, max);
 
-       if (sc->quirks & GHL_GUITAR_PS3WIIU)
+       if (sc->quirks & GH_GUITAR_CONTROLLER)
                return guitar_mapping(hdev, hi, field, usage, bit, max);
 
        /* Let hid-core decide for the others */
@@ -3167,8 +3169,14 @@ static const struct hid_device_id sony_devices[] = {
        { HID_BLUETOOTH_DEVICE(USB_VENDOR_ID_SMK, USB_DEVICE_ID_SMK_NSG_MR7U_REMOTE),
                .driver_data = NSG_MR7U_REMOTE_BT },
        /* Guitar Hero Live PS3 and Wii U guitar dongles */
-       { HID_USB_DEVICE(USB_VENDOR_ID_SONY_GHLIVE, USB_DEVICE_ID_SONY_PS3WIIU_GHLIVE_DONGLE),
-               .driver_data = GHL_GUITAR_PS3WIIU},
+       { HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3WIIU_GHLIVE_DONGLE),
+               .driver_data = GHL_GUITAR_PS3WIIU | GH_GUITAR_CONTROLLER },
+       /* Guitar Hero PC Guitar Dongle */
+       { HID_USB_DEVICE(USB_VENDOR_ID_ACTIVISION, USB_DEVICE_ID_ACTIVISION_GUITAR_DONGLE),
+               .driver_data = GH_GUITAR_CONTROLLER },
+       /* Guitar Hero PS3 World Tour Guitar Dongle */
+       { HID_USB_DEVICE(USB_VENDOR_ID_SONY_RHYTHM, USB_DEVICE_ID_SONY_PS3_GUITAR_DONGLE),
+               .driver_data = GH_GUITAR_CONTROLLER },
        { }
 };
 MODULE_DEVICE_TABLE(hid, sony_devices);
index 8e9c9e6..6a9865d 100644 (file)
@@ -371,6 +371,8 @@ static const struct hid_device_id uclogic_devices[] = {
                                USB_DEVICE_ID_HUION_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_HUION,
                                USB_DEVICE_ID_HUION_HS64) },
+       { HID_USB_DEVICE(USB_VENDOR_ID_TRUST,
+                               USB_DEVICE_ID_TRUST_PANORA_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
                                USB_DEVICE_ID_HUION_TABLET) },
        { HID_USB_DEVICE(USB_VENDOR_ID_UCLOGIC,
index 56406ce..6af25c3 100644 (file)
@@ -1045,6 +1045,8 @@ int uclogic_params_init(struct uclogic_params *params,
                        uclogic_params_init_with_pen_unused(&p);
                }
                break;
+       case VID_PID(USB_VENDOR_ID_TRUST,
+                    USB_DEVICE_ID_TRUST_PANORA_TABLET):
        case VID_PID(USB_VENDOR_ID_UGEE,
                     USB_DEVICE_ID_UGEE_TABLET_G5):
                /* Ignore non-pen interfaces */
index c4e5dfe..a16c6a6 100644 (file)
@@ -2,18 +2,55 @@
 menu "I2C HID support"
        depends on I2C
 
-config I2C_HID
-       tristate "HID over I2C transport layer"
+config I2C_HID_ACPI
+       tristate "HID over I2C transport layer ACPI driver"
        default n
-       depends on I2C && INPUT
-       select HID
+       depends on I2C && INPUT && ACPI
+       help
+         Say Y here if you use a keyboard, a touchpad, a touchscreen, or any
+         other HID based devices which is connected to your computer via I2C.
+         This driver supports ACPI-based systems.
+
+         If unsure, say N.
+
+         This support is also available as a module.  If so, the module
+         will be called i2c-hid-acpi.  It will also build/depend on the
+         module i2c-hid.
+
+config I2C_HID_OF
+       tristate "HID over I2C transport layer Open Firmware driver"
+       default n
+       depends on I2C && INPUT && OF
        help
          Say Y here if you use a keyboard, a touchpad, a touchscreen, or any
          other HID based devices which is connected to your computer via I2C.
+         This driver supports Open Firmware (Device Tree)-based systems.
 
          If unsure, say N.
 
          This support is also available as a module.  If so, the module
-         will be called i2c-hid.
+         will be called i2c-hid-of.  It will also build/depend on the
+         module i2c-hid.
+
+config I2C_HID_OF_GOODIX
+       tristate "Driver for Goodix hid-i2c based devices on OF systems"
+       default n
+       depends on I2C && INPUT && OF
+       help
+         Say Y here if you want support for Goodix i2c devices that use
+         the i2c-hid protocol on Open Firmware (Device Tree)-based
+         systems.
+
+         If unsure, say N.
+
+         This support is also available as a module.  If so, the module
+         will be called i2c-hid-of-goodix.  It will also build/depend on
+         the module i2c-hid.
 
 endmenu
+
+config I2C_HID_CORE
+       tristate
+       default y if I2C_HID_ACPI=y || I2C_HID_OF=y || I2C_HID_OF_GOODIX=y
+       default m if I2C_HID_ACPI=m || I2C_HID_OF=m || I2C_HID_OF_GOODIX=m
+       select HID
index 681b389..302545a 100644 (file)
@@ -3,7 +3,11 @@
 # Makefile for the I2C input drivers
 #
 
-obj-$(CONFIG_I2C_HID)                          += i2c-hid.o
+obj-$(CONFIG_I2C_HID_CORE)                     += i2c-hid.o
 
 i2c-hid-objs                                   =  i2c-hid-core.o
 i2c-hid-$(CONFIG_DMI)                          += i2c-hid-dmi-quirks.o
+
+obj-$(CONFIG_I2C_HID_ACPI)                     += i2c-hid-acpi.o
+obj-$(CONFIG_I2C_HID_OF)                       += i2c-hid-of.o
+obj-$(CONFIG_I2C_HID_OF_GOODIX)                        += i2c-hid-of-goodix.o
diff --git a/drivers/hid/i2c-hid/i2c-hid-acpi.c b/drivers/hid/i2c-hid/i2c-hid-acpi.c
new file mode 100644 (file)
index 0000000..bb8c00e
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * HID over I2C ACPI Subclass
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ * Copyright (c) 2012 Red Hat, Inc
+ *
+ * This code was forked out of the core code, which was partly based on
+ * "USB HID support for Linux":
+ *
+ *  Copyright (c) 1999 Andreas Gal
+ *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+ *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+ *  Copyright (c) 2007-2008 Oliver Neukum
+ *  Copyright (c) 2006-2010 Jiri Kosina
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pm.h>
+
+#include "i2c-hid.h"
+
+struct i2c_hid_acpi {
+       struct i2chid_ops ops;
+       struct i2c_client *client;
+};
+
+static const struct acpi_device_id i2c_hid_acpi_blacklist[] = {
+       /*
+        * The CHPN0001 ACPI device, which is used to describe the Chipone
+        * ICN8505 controller, has a _CID of PNP0C50 but is not HID compatible.
+        */
+       {"CHPN0001", 0 },
+       { },
+};
+
+static int i2c_hid_acpi_get_descriptor(struct i2c_client *client)
+{
+       static guid_t i2c_hid_guid =
+               GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
+                         0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
+       union acpi_object *obj;
+       struct acpi_device *adev;
+       acpi_handle handle;
+       u16 hid_descriptor_address;
+
+       handle = ACPI_HANDLE(&client->dev);
+       if (!handle || acpi_bus_get_device(handle, &adev)) {
+               dev_err(&client->dev, "Error could not get ACPI device\n");
+               return -ENODEV;
+       }
+
+       if (acpi_match_device_ids(adev, i2c_hid_acpi_blacklist) == 0)
+               return -ENODEV;
+
+       obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
+                                     ACPI_TYPE_INTEGER);
+       if (!obj) {
+               dev_err(&client->dev, "Error _DSM call to get HID descriptor address failed\n");
+               return -ENODEV;
+       }
+
+       hid_descriptor_address = obj->integer.value;
+       ACPI_FREE(obj);
+
+       return hid_descriptor_address;
+}
+
+static void i2c_hid_acpi_shutdown_tail(struct i2chid_ops *ops)
+{
+       struct i2c_hid_acpi *ihid_acpi =
+               container_of(ops, struct i2c_hid_acpi, ops);
+       struct device *dev = &ihid_acpi->client->dev;
+       acpi_device_set_power(ACPI_COMPANION(dev), ACPI_STATE_D3_COLD);
+}
+
+static int i2c_hid_acpi_probe(struct i2c_client *client,
+                             const struct i2c_device_id *dev_id)
+{
+       struct device *dev = &client->dev;
+       struct i2c_hid_acpi *ihid_acpi;
+       struct acpi_device *adev;
+       u16 hid_descriptor_address;
+       int ret;
+
+       ihid_acpi = devm_kzalloc(&client->dev, sizeof(*ihid_acpi), GFP_KERNEL);
+       if (!ihid_acpi)
+               return -ENOMEM;
+
+       ihid_acpi->client = client;
+       ihid_acpi->ops.shutdown_tail = i2c_hid_acpi_shutdown_tail;
+
+       ret = i2c_hid_acpi_get_descriptor(client);
+       if (ret < 0)
+               return ret;
+       hid_descriptor_address = ret;
+
+       adev = ACPI_COMPANION(dev);
+       if (adev)
+               acpi_device_fix_up_power(adev);
+
+       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
+               device_set_wakeup_capable(dev, true);
+               device_set_wakeup_enable(dev, false);
+       }
+
+       return i2c_hid_core_probe(client, &ihid_acpi->ops,
+                                 hid_descriptor_address);
+}
+
+static const struct acpi_device_id i2c_hid_acpi_match[] = {
+       {"ACPI0C50", 0 },
+       {"PNP0C50", 0 },
+       { },
+};
+MODULE_DEVICE_TABLE(acpi, i2c_hid_acpi_match);
+
+static struct i2c_driver i2c_hid_acpi_driver = {
+       .driver = {
+               .name   = "i2c_hid_acpi",
+               .pm     = &i2c_hid_core_pm,
+               .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+               .acpi_match_table = ACPI_PTR(i2c_hid_acpi_match),
+       },
+
+       .probe          = i2c_hid_acpi_probe,
+       .remove         = i2c_hid_core_remove,
+       .shutdown       = i2c_hid_core_shutdown,
+};
+
+module_i2c_driver(i2c_hid_acpi_driver);
+
+MODULE_DESCRIPTION("HID over I2C ACPI driver");
+MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
+MODULE_LICENSE("GPL");
index bfe716d..9993133 100644 (file)
 #include <linux/kernel.h>
 #include <linux/hid.h>
 #include <linux/mutex.h>
-#include <linux/acpi.h>
-#include <linux/of.h>
-#include <linux/regulator/consumer.h>
-
-#include <linux/platform_data/i2c-hid.h>
 
 #include "../hid-ids.h"
 #include "i2c-hid.h"
@@ -156,10 +151,10 @@ struct i2c_hid {
 
        wait_queue_head_t       wait;           /* For waiting the interrupt */
 
-       struct i2c_hid_platform_data pdata;
-
        bool                    irq_wake_enabled;
        struct mutex            reset_lock;
+
+       struct i2chid_ops       *ops;
 };
 
 static const struct i2c_hid_quirks {
@@ -171,6 +166,8 @@ static const struct i2c_hid_quirks {
                I2C_HID_QUIRK_SET_PWR_WAKEUP_DEV },
        { I2C_VENDOR_ID_HANTICK, I2C_PRODUCT_ID_HANTICK_5288,
                I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
+       { I2C_VENDOR_ID_ITE, I2C_DEVICE_ID_ITE_VOYO_WINPAD_A15,
+               I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
        { I2C_VENDOR_ID_RAYDIUM, I2C_PRODUCT_ID_RAYDIUM_3118,
                I2C_HID_QUIRK_NO_IRQ_AFTER_RESET },
        { USB_VENDOR_ID_ELAN, HID_ANY_ID,
@@ -884,144 +881,36 @@ static int i2c_hid_fetch_hid_descriptor(struct i2c_hid *ihid)
        return 0;
 }
 
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id i2c_hid_acpi_blacklist[] = {
-       /*
-        * The CHPN0001 ACPI device, which is used to describe the Chipone
-        * ICN8505 controller, has a _CID of PNP0C50 but is not HID compatible.
-        */
-       {"CHPN0001", 0 },
-       { },
-};
-
-static int i2c_hid_acpi_pdata(struct i2c_client *client,
-               struct i2c_hid_platform_data *pdata)
-{
-       static guid_t i2c_hid_guid =
-               GUID_INIT(0x3CDFF6F7, 0x4267, 0x4555,
-                         0xAD, 0x05, 0xB3, 0x0A, 0x3D, 0x89, 0x38, 0xDE);
-       union acpi_object *obj;
-       struct acpi_device *adev;
-       acpi_handle handle;
-
-       handle = ACPI_HANDLE(&client->dev);
-       if (!handle || acpi_bus_get_device(handle, &adev)) {
-               dev_err(&client->dev, "Error could not get ACPI device\n");
-               return -ENODEV;
-       }
-
-       if (acpi_match_device_ids(adev, i2c_hid_acpi_blacklist) == 0)
-               return -ENODEV;
-
-       obj = acpi_evaluate_dsm_typed(handle, &i2c_hid_guid, 1, 1, NULL,
-                                     ACPI_TYPE_INTEGER);
-       if (!obj) {
-               dev_err(&client->dev, "Error _DSM call to get HID descriptor address failed\n");
-               return -ENODEV;
-       }
-
-       pdata->hid_descriptor_address = obj->integer.value;
-       ACPI_FREE(obj);
-
-       return 0;
-}
-
-static void i2c_hid_acpi_fix_up_power(struct device *dev)
-{
-       struct acpi_device *adev;
-
-       adev = ACPI_COMPANION(dev);
-       if (adev)
-               acpi_device_fix_up_power(adev);
-}
-
-static void i2c_hid_acpi_enable_wakeup(struct device *dev)
-{
-       if (acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0) {
-               device_set_wakeup_capable(dev, true);
-               device_set_wakeup_enable(dev, false);
-       }
-}
-
-static void i2c_hid_acpi_shutdown(struct device *dev)
+static int i2c_hid_core_power_up(struct i2c_hid *ihid)
 {
-       acpi_device_set_power(ACPI_COMPANION(dev), ACPI_STATE_D3_COLD);
-}
+       if (!ihid->ops->power_up)
+               return 0;
 
-static const struct acpi_device_id i2c_hid_acpi_match[] = {
-       {"ACPI0C50", 0 },
-       {"PNP0C50", 0 },
-       { },
-};
-MODULE_DEVICE_TABLE(acpi, i2c_hid_acpi_match);
-#else
-static inline int i2c_hid_acpi_pdata(struct i2c_client *client,
-               struct i2c_hid_platform_data *pdata)
-{
-       return -ENODEV;
+       return ihid->ops->power_up(ihid->ops);
 }
 
-static inline void i2c_hid_acpi_fix_up_power(struct device *dev) {}
-
-static inline void i2c_hid_acpi_enable_wakeup(struct device *dev) {}
-
-static inline void i2c_hid_acpi_shutdown(struct device *dev) {}
-#endif
-
-#ifdef CONFIG_OF
-static int i2c_hid_of_probe(struct i2c_client *client,
-               struct i2c_hid_platform_data *pdata)
+static void i2c_hid_core_power_down(struct i2c_hid *ihid)
 {
-       struct device *dev = &client->dev;
-       u32 val;
-       int ret;
-
-       ret = of_property_read_u32(dev->of_node, "hid-descr-addr", &val);
-       if (ret) {
-               dev_err(&client->dev, "HID register address not provided\n");
-               return -ENODEV;
-       }
-       if (val >> 16) {
-               dev_err(&client->dev, "Bad HID register address: 0x%08x\n",
-                       val);
-               return -EINVAL;
-       }
-       pdata->hid_descriptor_address = val;
-
-       return 0;
-}
+       if (!ihid->ops->power_down)
+               return;
 
-static const struct of_device_id i2c_hid_of_match[] = {
-       { .compatible = "hid-over-i2c" },
-       {},
-};
-MODULE_DEVICE_TABLE(of, i2c_hid_of_match);
-#else
-static inline int i2c_hid_of_probe(struct i2c_client *client,
-               struct i2c_hid_platform_data *pdata)
-{
-       return -ENODEV;
+       ihid->ops->power_down(ihid->ops);
 }
-#endif
 
-static void i2c_hid_fwnode_probe(struct i2c_client *client,
-                                struct i2c_hid_platform_data *pdata)
+static void i2c_hid_core_shutdown_tail(struct i2c_hid *ihid)
 {
-       u32 val;
+       if (!ihid->ops->shutdown_tail)
+               return;
 
-       if (!device_property_read_u32(&client->dev, "post-power-on-delay-ms",
-                                     &val))
-               pdata->post_power_delay_ms = val;
+       ihid->ops->shutdown_tail(ihid->ops);
 }
 
-static int i2c_hid_probe(struct i2c_client *client,
-                        const struct i2c_device_id *dev_id)
+int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops,
+                      u16 hid_descriptor_address)
 {
        int ret;
        struct i2c_hid *ihid;
        struct hid_device *hid;
-       __u16 hidRegister;
-       struct i2c_hid_platform_data *platform_data = client->dev.platform_data;
 
        dbg_hid("HID probe called for i2c 0x%02x\n", client->addr);
 
@@ -1042,44 +931,17 @@ static int i2c_hid_probe(struct i2c_client *client,
        if (!ihid)
                return -ENOMEM;
 
-       if (client->dev.of_node) {
-               ret = i2c_hid_of_probe(client, &ihid->pdata);
-               if (ret)
-                       return ret;
-       } else if (!platform_data) {
-               ret = i2c_hid_acpi_pdata(client, &ihid->pdata);
-               if (ret)
-                       return ret;
-       } else {
-               ihid->pdata = *platform_data;
-       }
-
-       /* Parse platform agnostic common properties from ACPI / device tree */
-       i2c_hid_fwnode_probe(client, &ihid->pdata);
+       ihid->ops = ops;
 
-       ihid->pdata.supplies[0].supply = "vdd";
-       ihid->pdata.supplies[1].supply = "vddl";
-
-       ret = devm_regulator_bulk_get(&client->dev,
-                                     ARRAY_SIZE(ihid->pdata.supplies),
-                                     ihid->pdata.supplies);
+       ret = i2c_hid_core_power_up(ihid);
        if (ret)
                return ret;
 
-       ret = regulator_bulk_enable(ARRAY_SIZE(ihid->pdata.supplies),
-                                   ihid->pdata.supplies);
-       if (ret < 0)
-               return ret;
-
-       if (ihid->pdata.post_power_delay_ms)
-               msleep(ihid->pdata.post_power_delay_ms);
-
        i2c_set_clientdata(client, ihid);
 
        ihid->client = client;
 
-       hidRegister = ihid->pdata.hid_descriptor_address;
-       ihid->wHIDDescRegister = cpu_to_le16(hidRegister);
+       ihid->wHIDDescRegister = cpu_to_le16(hid_descriptor_address);
 
        init_waitqueue_head(&ihid->wait);
        mutex_init(&ihid->reset_lock);
@@ -1089,11 +951,7 @@ static int i2c_hid_probe(struct i2c_client *client,
         * real computation later. */
        ret = i2c_hid_alloc_buffers(ihid, HID_MIN_BUFFER_SIZE);
        if (ret < 0)
-               goto err_regulator;
-
-       i2c_hid_acpi_fix_up_power(&client->dev);
-
-       i2c_hid_acpi_enable_wakeup(&client->dev);
+               goto err_powered;
 
        device_enable_async_suspend(&client->dev);
 
@@ -1102,19 +960,19 @@ static int i2c_hid_probe(struct i2c_client *client,
        if (ret < 0) {
                dev_dbg(&client->dev, "nothing at this address: %d\n", ret);
                ret = -ENXIO;
-               goto err_regulator;
+               goto err_powered;
        }
 
        ret = i2c_hid_fetch_hid_descriptor(ihid);
        if (ret < 0) {
                dev_err(&client->dev,
                        "Failed to fetch the HID Descriptor\n");
-               goto err_regulator;
+               goto err_powered;
        }
 
        ret = i2c_hid_init_irq(client);
        if (ret < 0)
-               goto err_regulator;
+               goto err_powered;
 
        hid = hid_allocate_device();
        if (IS_ERR(hid)) {
@@ -1153,14 +1011,14 @@ err_mem_free:
 err_irq:
        free_irq(client->irq, ihid);
 
-err_regulator:
-       regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
-                              ihid->pdata.supplies);
+err_powered:
+       i2c_hid_core_power_down(ihid);
        i2c_hid_free_buffers(ihid);
        return ret;
 }
+EXPORT_SYMBOL_GPL(i2c_hid_core_probe);
 
-static int i2c_hid_remove(struct i2c_client *client)
+int i2c_hid_core_remove(struct i2c_client *client)
 {
        struct i2c_hid *ihid = i2c_get_clientdata(client);
        struct hid_device *hid;
@@ -1173,24 +1031,25 @@ static int i2c_hid_remove(struct i2c_client *client)
        if (ihid->bufsize)
                i2c_hid_free_buffers(ihid);
 
-       regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
-                              ihid->pdata.supplies);
+       i2c_hid_core_power_down(ihid);
 
        return 0;
 }
+EXPORT_SYMBOL_GPL(i2c_hid_core_remove);
 
-static void i2c_hid_shutdown(struct i2c_client *client)
+void i2c_hid_core_shutdown(struct i2c_client *client)
 {
        struct i2c_hid *ihid = i2c_get_clientdata(client);
 
        i2c_hid_set_power(client, I2C_HID_PWR_SLEEP);
        free_irq(client->irq, ihid);
 
-       i2c_hid_acpi_shutdown(&client->dev);
+       i2c_hid_core_shutdown_tail(ihid);
 }
+EXPORT_SYMBOL_GPL(i2c_hid_core_shutdown);
 
 #ifdef CONFIG_PM_SLEEP
-static int i2c_hid_suspend(struct device *dev)
+static int i2c_hid_core_suspend(struct device *dev)
 {
        struct i2c_client *client = to_i2c_client(dev);
        struct i2c_hid *ihid = i2c_get_clientdata(client);
@@ -1217,14 +1076,13 @@ static int i2c_hid_suspend(struct device *dev)
                        hid_warn(hid, "Failed to enable irq wake: %d\n",
                                wake_status);
        } else {
-               regulator_bulk_disable(ARRAY_SIZE(ihid->pdata.supplies),
-                                      ihid->pdata.supplies);
+               i2c_hid_core_power_down(ihid);
        }
 
        return 0;
 }
 
-static int i2c_hid_resume(struct device *dev)
+static int i2c_hid_core_resume(struct device *dev)
 {
        int ret;
        struct i2c_client *client = to_i2c_client(dev);
@@ -1233,13 +1091,7 @@ static int i2c_hid_resume(struct device *dev)
        int wake_status;
 
        if (!device_may_wakeup(&client->dev)) {
-               ret = regulator_bulk_enable(ARRAY_SIZE(ihid->pdata.supplies),
-                                           ihid->pdata.supplies);
-               if (ret)
-                       hid_warn(hid, "Failed to enable supplies: %d\n", ret);
-
-               if (ihid->pdata.post_power_delay_ms)
-                       msleep(ihid->pdata.post_power_delay_ms);
+               i2c_hid_core_power_up(ihid);
        } else if (ihid->irq_wake_enabled) {
                wake_status = disable_irq_wake(client->irq);
                if (!wake_status)
@@ -1276,34 +1128,10 @@ static int i2c_hid_resume(struct device *dev)
 }
 #endif
 
-static const struct dev_pm_ops i2c_hid_pm = {
-       SET_SYSTEM_SLEEP_PM_OPS(i2c_hid_suspend, i2c_hid_resume)
+const struct dev_pm_ops i2c_hid_core_pm = {
+       SET_SYSTEM_SLEEP_PM_OPS(i2c_hid_core_suspend, i2c_hid_core_resume)
 };
-
-static const struct i2c_device_id i2c_hid_id_table[] = {
-       { "hid", 0 },
-       { "hid-over-i2c", 0 },
-       { },
-};
-MODULE_DEVICE_TABLE(i2c, i2c_hid_id_table);
-
-
-static struct i2c_driver i2c_hid_driver = {
-       .driver = {
-               .name   = "i2c_hid",
-               .pm     = &i2c_hid_pm,
-               .probe_type = PROBE_PREFER_ASYNCHRONOUS,
-               .acpi_match_table = ACPI_PTR(i2c_hid_acpi_match),
-               .of_match_table = of_match_ptr(i2c_hid_of_match),
-       },
-
-       .probe          = i2c_hid_probe,
-       .remove         = i2c_hid_remove,
-       .shutdown       = i2c_hid_shutdown,
-       .id_table       = i2c_hid_id_table,
-};
-
-module_i2c_driver(i2c_hid_driver);
+EXPORT_SYMBOL_GPL(i2c_hid_core_pm);
 
 MODULE_DESCRIPTION("HID over I2C core driver");
 MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
diff --git a/drivers/hid/i2c-hid/i2c-hid-of-goodix.c b/drivers/hid/i2c-hid/i2c-hid-of-goodix.c
new file mode 100644 (file)
index 0000000..ee02259
--- /dev/null
@@ -0,0 +1,116 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Driver for Goodix touchscreens that use the i2c-hid protocol.
+ *
+ * Copyright 2020 Google LLC
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/gpio/consumer.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pm.h>
+#include <linux/regulator/consumer.h>
+
+#include "i2c-hid.h"
+
+struct goodix_i2c_hid_timing_data {
+       unsigned int post_gpio_reset_delay_ms;
+       unsigned int post_power_delay_ms;
+};
+
+struct i2c_hid_of_goodix {
+       struct i2chid_ops ops;
+
+       struct regulator *vdd;
+       struct gpio_desc *reset_gpio;
+       const struct goodix_i2c_hid_timing_data *timings;
+};
+
+static int goodix_i2c_hid_power_up(struct i2chid_ops *ops)
+{
+       struct i2c_hid_of_goodix *ihid_goodix =
+               container_of(ops, struct i2c_hid_of_goodix, ops);
+       int ret;
+
+       ret = regulator_enable(ihid_goodix->vdd);
+       if (ret)
+               return ret;
+
+       if (ihid_goodix->timings->post_power_delay_ms)
+               msleep(ihid_goodix->timings->post_power_delay_ms);
+
+       gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 0);
+       if (ihid_goodix->timings->post_gpio_reset_delay_ms)
+               msleep(ihid_goodix->timings->post_gpio_reset_delay_ms);
+
+       return 0;
+}
+
+static void goodix_i2c_hid_power_down(struct i2chid_ops *ops)
+{
+       struct i2c_hid_of_goodix *ihid_goodix =
+               container_of(ops, struct i2c_hid_of_goodix, ops);
+
+       gpiod_set_value_cansleep(ihid_goodix->reset_gpio, 1);
+       regulator_disable(ihid_goodix->vdd);
+}
+
+static int i2c_hid_of_goodix_probe(struct i2c_client *client,
+                                  const struct i2c_device_id *id)
+{
+       struct i2c_hid_of_goodix *ihid_goodix;
+
+       ihid_goodix = devm_kzalloc(&client->dev, sizeof(*ihid_goodix),
+                                  GFP_KERNEL);
+       if (!ihid_goodix)
+               return -ENOMEM;
+
+       ihid_goodix->ops.power_up = goodix_i2c_hid_power_up;
+       ihid_goodix->ops.power_down = goodix_i2c_hid_power_down;
+
+       /* Start out with reset asserted */
+       ihid_goodix->reset_gpio =
+               devm_gpiod_get_optional(&client->dev, "reset", GPIOD_OUT_HIGH);
+       if (IS_ERR(ihid_goodix->reset_gpio))
+               return PTR_ERR(ihid_goodix->reset_gpio);
+
+       ihid_goodix->vdd = devm_regulator_get(&client->dev, "vdd");
+       if (IS_ERR(ihid_goodix->vdd))
+               return PTR_ERR(ihid_goodix->vdd);
+
+       ihid_goodix->timings = device_get_match_data(&client->dev);
+
+       return i2c_hid_core_probe(client, &ihid_goodix->ops, 0x0001);
+}
+
+static const struct goodix_i2c_hid_timing_data goodix_gt7375p_timing_data = {
+       .post_power_delay_ms = 10,
+       .post_gpio_reset_delay_ms = 180,
+};
+
+static const struct of_device_id goodix_i2c_hid_of_match[] = {
+       { .compatible = "goodix,gt7375p", .data = &goodix_gt7375p_timing_data },
+       { }
+};
+MODULE_DEVICE_TABLE(of, goodix_i2c_hid_of_match);
+
+static struct i2c_driver goodix_i2c_hid_ts_driver = {
+       .driver = {
+               .name   = "i2c_hid_of_goodix",
+               .pm     = &i2c_hid_core_pm,
+               .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+               .of_match_table = of_match_ptr(goodix_i2c_hid_of_match),
+       },
+       .probe          = i2c_hid_of_goodix_probe,
+       .remove         = i2c_hid_core_remove,
+       .shutdown       = i2c_hid_core_shutdown,
+};
+module_i2c_driver(goodix_i2c_hid_ts_driver);
+
+MODULE_AUTHOR("Douglas Anderson <dianders@chromium.org>");
+MODULE_DESCRIPTION("Goodix i2c-hid touchscreen driver");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/hid/i2c-hid/i2c-hid-of.c b/drivers/hid/i2c-hid/i2c-hid-of.c
new file mode 100644 (file)
index 0000000..4bf7cea
--- /dev/null
@@ -0,0 +1,143 @@
+/*
+ * HID over I2C Open Firmware Subclass
+ *
+ * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
+ * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
+ * Copyright (c) 2012 Red Hat, Inc
+ *
+ * This code was forked out of the core code, which was partly based on
+ * "USB HID support for Linux":
+ *
+ *  Copyright (c) 1999 Andreas Gal
+ *  Copyright (c) 2000-2005 Vojtech Pavlik <vojtech@suse.cz>
+ *  Copyright (c) 2005 Michael Haboustak <mike-@cinci.rr.com> for Concept2, Inc
+ *  Copyright (c) 2007-2008 Oliver Neukum
+ *  Copyright (c) 2006-2010 Jiri Kosina
+ *
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file COPYING in the main directory of this archive for
+ * more details.
+ */
+
+#include <linux/delay.h>
+#include <linux/device.h>
+#include <linux/i2c.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/pm.h>
+#include <linux/regulator/consumer.h>
+
+#include "i2c-hid.h"
+
+struct i2c_hid_of {
+       struct i2chid_ops ops;
+
+       struct i2c_client *client;
+       struct regulator_bulk_data supplies[2];
+       int post_power_delay_ms;
+};
+
+static int i2c_hid_of_power_up(struct i2chid_ops *ops)
+{
+       struct i2c_hid_of *ihid_of = container_of(ops, struct i2c_hid_of, ops);
+       struct device *dev = &ihid_of->client->dev;
+       int ret;
+
+       ret = regulator_bulk_enable(ARRAY_SIZE(ihid_of->supplies),
+                                   ihid_of->supplies);
+       if (ret) {
+               dev_warn(dev, "Failed to enable supplies: %d\n", ret);
+               return ret;
+       }
+
+       if (ihid_of->post_power_delay_ms)
+               msleep(ihid_of->post_power_delay_ms);
+
+       return 0;
+}
+
+static void i2c_hid_of_power_down(struct i2chid_ops *ops)
+{
+       struct i2c_hid_of *ihid_of = container_of(ops, struct i2c_hid_of, ops);
+
+       regulator_bulk_disable(ARRAY_SIZE(ihid_of->supplies),
+                              ihid_of->supplies);
+}
+
+static int i2c_hid_of_probe(struct i2c_client *client,
+                           const struct i2c_device_id *dev_id)
+{
+       struct device *dev = &client->dev;
+       struct i2c_hid_of *ihid_of;
+       u16 hid_descriptor_address;
+       int ret;
+       u32 val;
+
+       ihid_of = devm_kzalloc(&client->dev, sizeof(*ihid_of), GFP_KERNEL);
+       if (!ihid_of)
+               return -ENOMEM;
+
+       ihid_of->ops.power_up = i2c_hid_of_power_up;
+       ihid_of->ops.power_down = i2c_hid_of_power_down;
+
+       ret = of_property_read_u32(dev->of_node, "hid-descr-addr", &val);
+       if (ret) {
+               dev_err(&client->dev, "HID register address not provided\n");
+               return -ENODEV;
+       }
+       if (val >> 16) {
+               dev_err(&client->dev, "Bad HID register address: 0x%08x\n",
+                       val);
+               return -EINVAL;
+       }
+       hid_descriptor_address = val;
+
+       if (!device_property_read_u32(&client->dev, "post-power-on-delay-ms",
+                                     &val))
+               ihid_of->post_power_delay_ms = val;
+
+       ihid_of->supplies[0].supply = "vdd";
+       ihid_of->supplies[1].supply = "vddl";
+       ret = devm_regulator_bulk_get(&client->dev,
+                                     ARRAY_SIZE(ihid_of->supplies),
+                                     ihid_of->supplies);
+       if (ret)
+               return ret;
+
+       return i2c_hid_core_probe(client, &ihid_of->ops,
+                                 hid_descriptor_address);
+}
+
+static const struct of_device_id i2c_hid_of_match[] = {
+       { .compatible = "hid-over-i2c" },
+       {},
+};
+MODULE_DEVICE_TABLE(of, i2c_hid_of_match);
+
+static const struct i2c_device_id i2c_hid_of_id_table[] = {
+       { "hid", 0 },
+       { "hid-over-i2c", 0 },
+       { },
+};
+MODULE_DEVICE_TABLE(i2c, i2c_hid_of_id_table);
+
+static struct i2c_driver i2c_hid_of_driver = {
+       .driver = {
+               .name   = "i2c_hid_of",
+               .pm     = &i2c_hid_core_pm,
+               .probe_type = PROBE_PREFER_ASYNCHRONOUS,
+               .of_match_table = of_match_ptr(i2c_hid_of_match),
+       },
+
+       .probe          = i2c_hid_of_probe,
+       .remove         = i2c_hid_core_remove,
+       .shutdown       = i2c_hid_core_shutdown,
+       .id_table       = i2c_hid_of_id_table,
+};
+
+module_i2c_driver(i2c_hid_of_driver);
+
+MODULE_DESCRIPTION("HID over I2C OF driver");
+MODULE_AUTHOR("Benjamin Tissoires <benjamin.tissoires@gmail.com>");
+MODULE_LICENSE("GPL");
index a8c19ae..05a7827 100644 (file)
@@ -3,6 +3,7 @@
 #ifndef I2C_HID_H
 #define I2C_HID_H
 
+#include <linux/i2c.h>
 
 #ifdef CONFIG_DMI
 struct i2c_hid_desc *i2c_hid_get_dmi_i2c_hid_desc_override(uint8_t *i2c_name);
@@ -17,4 +18,25 @@ static inline char *i2c_hid_get_dmi_hid_report_desc_override(uint8_t *i2c_name,
 { return NULL; }
 #endif
 
+/**
+ * struct i2chid_ops - Ops provided to the core.
+ *
+ * @power_up: do sequencing to power up the device.
+ * @power_down: do sequencing to power down the device.
+ * @shutdown_tail: called at the end of shutdown.
+ */
+struct i2chid_ops {
+       int (*power_up)(struct i2chid_ops *ops);
+       void (*power_down)(struct i2chid_ops *ops);
+       void (*shutdown_tail)(struct i2chid_ops *ops);
+};
+
+int i2c_hid_core_probe(struct i2c_client *client, struct i2chid_ops *ops,
+                      u16 hid_descriptor_address);
+int i2c_hid_core_remove(struct i2c_client *client);
+
+void i2c_hid_core_shutdown(struct i2c_client *client);
+
+extern const struct dev_pm_ops i2c_hid_core_pm;
+
 #endif
index 1fb294c..21b87e4 100644 (file)
@@ -27,6 +27,7 @@
 #define CMP_H_DEVICE_ID                0x06FC
 #define EHL_Ax_DEVICE_ID       0x4BB3
 #define TGL_LP_DEVICE_ID       0xA0FC
+#define TGL_H_DEVICE_ID                0x43FC
 
 #define        REVISION_ID_CHT_A0      0x6
 #define        REVISION_ID_CHT_Ax_SI   0x0
@@ -81,5 +82,6 @@ struct ishtp_device *ish_dev_init(struct pci_dev *pdev);
 int ish_hw_start(struct ishtp_device *dev);
 void ish_device_disable(struct ishtp_device *dev);
 int ish_disable_dma(struct ishtp_device *dev);
+void ish_set_host_ready(struct ishtp_device *dev);
 
 #endif /* _ISHTP_HW_ISH_H_ */
index a45ac7f..47bbeb8 100644 (file)
@@ -193,6 +193,33 @@ static void ish_clr_host_rdy(struct ishtp_device *dev)
        ish_reg_write(dev, IPC_REG_HOST_COMM, host_status);
 }
 
+static bool ish_chk_host_rdy(struct ishtp_device *dev)
+{
+       uint32_t host_status = ish_reg_read(dev, IPC_REG_HOST_COMM);
+
+       return (host_status & IPC_HOSTCOMM_READY_BIT);
+}
+
+/**
+ * ish_set_host_ready() - reconfig ipc host registers
+ * @dev: ishtp device pointer
+ *
+ * Set host to ready state
+ * This API is called in some case:
+ *    fw is still on, but ipc is powered down.
+ *    such as OOB case.
+ *
+ * Return: 0 for success else error fault code
+ */
+void ish_set_host_ready(struct ishtp_device *dev)
+{
+       if (ish_chk_host_rdy(dev))
+               return;
+
+       ish_set_host_rdy(dev);
+       set_host_ready(dev);
+}
+
 /**
  * _ishtp_read_hdr() - Read message header
  * @dev: ISHTP device pointer
index c6d48a8..06081cf 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (c) 2014-2016, Intel Corporation.
  */
 
+#include <linux/acpi.h>
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
@@ -37,6 +38,7 @@ static const struct pci_device_id ish_pci_tbl[] = {
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, CMP_H_DEVICE_ID)},
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, EHL_Ax_DEVICE_ID)},
        {PCI_DEVICE(PCI_VENDOR_ID_INTEL, TGL_LP_DEVICE_ID)},
+       {PCI_DEVICE(PCI_VENDOR_ID_INTEL, TGL_H_DEVICE_ID)},
        {0, }
 };
 MODULE_DEVICE_TABLE(pci, ish_pci_tbl);
@@ -111,6 +113,42 @@ static inline bool ish_should_leave_d0i3(struct pci_dev *pdev)
        return !pm_resume_via_firmware() || pdev->device == CHV_DEVICE_ID;
 }
 
+static int enable_gpe(struct device *dev)
+{
+#ifdef CONFIG_ACPI
+       acpi_status acpi_sts;
+       struct acpi_device *adev;
+       struct acpi_device_wakeup *wakeup;
+
+       adev = ACPI_COMPANION(dev);
+       if (!adev) {
+               dev_err(dev, "get acpi handle failed\n");
+               return -ENODEV;
+       }
+       wakeup = &adev->wakeup;
+
+       acpi_sts = acpi_enable_gpe(wakeup->gpe_device, wakeup->gpe_number);
+       if (ACPI_FAILURE(acpi_sts)) {
+               dev_err(dev, "enable ose_gpe failed\n");
+               return -EIO;
+       }
+
+       return 0;
+#else
+       return -ENODEV;
+#endif
+}
+
+static void enable_pme_wake(struct pci_dev *pdev)
+{
+       if ((pci_pme_capable(pdev, PCI_D0) ||
+            pci_pme_capable(pdev, PCI_D3hot) ||
+            pci_pme_capable(pdev, PCI_D3cold)) && !enable_gpe(&pdev->dev)) {
+               pci_pme_active(pdev, true);
+               dev_dbg(&pdev->dev, "ish ipc driver pme wake enabled\n");
+       }
+}
+
 /**
  * ish_probe() - PCI driver probe callback
  * @pdev:      pci device
@@ -179,6 +217,10 @@ static int ish_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        init_waitqueue_head(&ishtp->suspend_wait);
        init_waitqueue_head(&ishtp->resume_wait);
 
+       /* Enable PME for EHL */
+       if (pdev->device == EHL_Ax_DEVICE_ID)
+               enable_pme_wake(pdev);
+
        ret = ish_init(ishtp);
        if (ret)
                return ret;
@@ -218,11 +260,15 @@ static void __maybe_unused ish_resume_handler(struct work_struct *work)
 {
        struct pci_dev *pdev = to_pci_dev(ish_resume_device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
+       uint32_t fwsts = dev->ops->get_fw_status(dev);
        int ret;
 
-       if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag) {
+       if (ish_should_leave_d0i3(pdev) && !dev->suspend_flag
+                       && IPC_IS_ISH_ILUP(fwsts)) {
                disable_irq_wake(pdev->irq);
 
+               ish_set_host_ready(dev);
+
                ishtp_send_resume(dev);
 
                /* Waiting to get resume response */
@@ -317,6 +363,13 @@ static int __maybe_unused ish_resume(struct device *device)
        struct pci_dev *pdev = to_pci_dev(device);
        struct ishtp_device *dev = pci_get_drvdata(pdev);
 
+       /* add this to finish power flow for EHL */
+       if (dev->pdev->device == EHL_Ax_DEVICE_ID) {
+               pci_set_power_state(pdev, PCI_D0);
+               enable_pme_wake(pdev);
+               dev_dbg(dev->devc, "set power state to D0 for ehl\n");
+       }
+
        ish_resume_device = device;
        dev->resume_flag = 1;
 
index aa9e488..8328ef1 100644 (file)
@@ -1825,7 +1825,7 @@ static ssize_t wacom_show_speed(struct device *dev,
        struct hid_device *hdev = to_hid_device(dev);
        struct wacom *wacom = hid_get_drvdata(hdev);
 
-       return snprintf(buf, PAGE_SIZE, "%i\n", wacom->wacom_wac.bt_high_speed);
+       return sysfs_emit(buf, "%i\n", wacom->wacom_wac.bt_high_speed);
 }
 
 static ssize_t wacom_store_speed(struct device *dev,
index 1bd0eb7..44d715c 100644 (file)
@@ -2600,7 +2600,12 @@ static void wacom_wac_finger_event(struct hid_device *hdev,
                wacom_wac->is_invalid_bt_frame = !value;
                return;
        case HID_DG_CONTACTMAX:
-               features->touch_max = value;
+               if (!features->touch_max) {
+                       features->touch_max = value;
+               } else {
+                       hid_warn(hdev, "%s: ignoring attempt to overwrite non-zero touch_max "
+                                "%d -> %d\n", __func__, features->touch_max, value);
+               }
                return;
        }
 
index 8c47182..2f776d7 100644 (file)
@@ -726,7 +726,7 @@ static void hv_mem_hot_add(unsigned long start, unsigned long size,
 
                nid = memory_add_physaddr_to_nid(PFN_PHYS(start_pfn));
                ret = add_memory(nid, PFN_PHYS((start_pfn)),
-                               (HA_CHUNK << PAGE_SHIFT), MEMHP_MERGE_RESOURCE);
+                               (HA_CHUNK << PAGE_SHIFT), MHP_MERGE_RESOURCE);
 
                if (ret) {
                        pr_err("hot_add memory failed error is %d\n", ret);
index 3b05560..33eeff9 100644 (file)
@@ -2,11 +2,12 @@
 /*
  * OMAP hardware spinlock driver
  *
- * Copyright (C) 2010-2015 Texas Instruments Incorporated - http://www.ti.com
+ * Copyright (C) 2010-2021 Texas Instruments Incorporated - https://www.ti.com
  *
  * Contact: Simon Que <sque@ti.com>
  *          Hari Kanigeri <h-kanigeri2@ti.com>
  *          Ohad Ben-Cohen <ohad@wizery.com>
+ *          Suman Anna <s-anna@ti.com>
  */
 
 #include <linux/kernel.h>
@@ -164,6 +165,7 @@ static int omap_hwspinlock_remove(struct platform_device *pdev)
 
 static const struct of_device_id omap_hwspinlock_of_match[] = {
        { .compatible = "ti,omap4-hwspinlock", },
+       { .compatible = "ti,am64-hwspinlock", },
        { .compatible = "ti,am654-hwspinlock", },
        { /* end */ },
 };
index 8e19e8c..e0740c6 100644 (file)
@@ -401,8 +401,9 @@ static const struct attribute_group *catu_groups[] = {
 
 static inline int catu_wait_for_ready(struct catu_drvdata *drvdata)
 {
-       return coresight_timeout(drvdata->base,
-                                CATU_STATUS, CATU_STATUS_READY, 1);
+       struct csdev_access *csa = &drvdata->csdev->access;
+
+       return coresight_timeout(csa, CATU_STATUS, CATU_STATUS_READY, 1);
 }
 
 static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
@@ -411,6 +412,7 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
        u32 control, mode;
        struct etr_buf *etr_buf = data;
        struct device *dev = &drvdata->csdev->dev;
+       struct coresight_device *csdev = drvdata->csdev;
 
        if (catu_wait_for_ready(drvdata))
                dev_warn(dev, "Timeout while waiting for READY\n");
@@ -421,7 +423,7 @@ static int catu_enable_hw(struct catu_drvdata *drvdata, void *data)
                return -EBUSY;
        }
 
-       rc = coresight_claim_device_unlocked(drvdata->base);
+       rc = coresight_claim_device_unlocked(csdev);
        if (rc)
                return rc;
 
@@ -465,9 +467,10 @@ static int catu_disable_hw(struct catu_drvdata *drvdata)
 {
        int rc = 0;
        struct device *dev = &drvdata->csdev->dev;
+       struct coresight_device *csdev = drvdata->csdev;
 
        catu_write_control(drvdata, 0);
-       coresight_disclaim_device_unlocked(drvdata->base);
+       coresight_disclaim_device_unlocked(csdev);
        if (catu_wait_for_ready(drvdata)) {
                dev_info(dev, "Timeout while waiting for READY\n");
                rc = -EAGAIN;
@@ -551,6 +554,7 @@ static int catu_probe(struct amba_device *adev, const struct amba_id *id)
        dev->platform_data = pdata;
 
        drvdata->base = base;
+       catu_desc.access = CSDEV_ACCESS_IOMEM(base);
        catu_desc.pdata = pdata;
        catu_desc.dev = dev;
        catu_desc.groups = catu_groups;
index 4ba801d..0062c89 100644 (file)
@@ -145,30 +145,32 @@ static int coresight_find_link_outport(struct coresight_device *csdev,
        return -ENODEV;
 }
 
-static inline u32 coresight_read_claim_tags(void __iomem *base)
+static inline u32 coresight_read_claim_tags(struct coresight_device *csdev)
 {
-       return readl_relaxed(base + CORESIGHT_CLAIMCLR);
+       return csdev_access_relaxed_read32(&csdev->access, CORESIGHT_CLAIMCLR);
 }
 
-static inline bool coresight_is_claimed_self_hosted(void __iomem *base)
+static inline bool coresight_is_claimed_self_hosted(struct coresight_device *csdev)
 {
-       return coresight_read_claim_tags(base) == CORESIGHT_CLAIM_SELF_HOSTED;
+       return coresight_read_claim_tags(csdev) == CORESIGHT_CLAIM_SELF_HOSTED;
 }
 
-static inline bool coresight_is_claimed_any(void __iomem *base)
+static inline bool coresight_is_claimed_any(struct coresight_device *csdev)
 {
-       return coresight_read_claim_tags(base) != 0;
+       return coresight_read_claim_tags(csdev) != 0;
 }
 
-static inline void coresight_set_claim_tags(void __iomem *base)
+static inline void coresight_set_claim_tags(struct coresight_device *csdev)
 {
-       writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMSET);
+       csdev_access_relaxed_write32(&csdev->access, CORESIGHT_CLAIM_SELF_HOSTED,
+                                    CORESIGHT_CLAIMSET);
        isb();
 }
 
-static inline void coresight_clear_claim_tags(void __iomem *base)
+static inline void coresight_clear_claim_tags(struct coresight_device *csdev)
 {
-       writel_relaxed(CORESIGHT_CLAIM_SELF_HOSTED, base + CORESIGHT_CLAIMCLR);
+       csdev_access_relaxed_write32(&csdev->access, CORESIGHT_CLAIM_SELF_HOSTED,
+                                    CORESIGHT_CLAIMCLR);
        isb();
 }
 
@@ -182,27 +184,33 @@ static inline void coresight_clear_claim_tags(void __iomem *base)
  * Called with CS_UNLOCKed for the component.
  * Returns : 0 on success
  */
-int coresight_claim_device_unlocked(void __iomem *base)
+int coresight_claim_device_unlocked(struct coresight_device *csdev)
 {
-       if (coresight_is_claimed_any(base))
+       if (WARN_ON(!csdev))
+               return -EINVAL;
+
+       if (coresight_is_claimed_any(csdev))
                return -EBUSY;
 
-       coresight_set_claim_tags(base);
-       if (coresight_is_claimed_self_hosted(base))
+       coresight_set_claim_tags(csdev);
+       if (coresight_is_claimed_self_hosted(csdev))
                return 0;
        /* There was a race setting the tags, clean up and fail */
-       coresight_clear_claim_tags(base);
+       coresight_clear_claim_tags(csdev);
        return -EBUSY;
 }
 EXPORT_SYMBOL_GPL(coresight_claim_device_unlocked);
 
-int coresight_claim_device(void __iomem *base)
+int coresight_claim_device(struct coresight_device *csdev)
 {
        int rc;
 
-       CS_UNLOCK(base);
-       rc = coresight_claim_device_unlocked(base);
-       CS_LOCK(base);
+       if (WARN_ON(!csdev))
+               return -EINVAL;
+
+       CS_UNLOCK(csdev->access.base);
+       rc = coresight_claim_device_unlocked(csdev);
+       CS_LOCK(csdev->access.base);
 
        return rc;
 }
@@ -212,11 +220,14 @@ EXPORT_SYMBOL_GPL(coresight_claim_device);
  * coresight_disclaim_device_unlocked : Clear the claim tags for the device.
  * Called with CS_UNLOCKed for the component.
  */
-void coresight_disclaim_device_unlocked(void __iomem *base)
+void coresight_disclaim_device_unlocked(struct coresight_device *csdev)
 {
 
-       if (coresight_is_claimed_self_hosted(base))
-               coresight_clear_claim_tags(base);
+       if (WARN_ON(!csdev))
+               return;
+
+       if (coresight_is_claimed_self_hosted(csdev))
+               coresight_clear_claim_tags(csdev);
        else
                /*
                 * The external agent may have not honoured our claim
@@ -227,11 +238,14 @@ void coresight_disclaim_device_unlocked(void __iomem *base)
 }
 EXPORT_SYMBOL_GPL(coresight_disclaim_device_unlocked);
 
-void coresight_disclaim_device(void __iomem *base)
+void coresight_disclaim_device(struct coresight_device *csdev)
 {
-       CS_UNLOCK(base);
-       coresight_disclaim_device_unlocked(base);
-       CS_LOCK(base);
+       if (WARN_ON(!csdev))
+               return;
+
+       CS_UNLOCK(csdev->access.base);
+       coresight_disclaim_device_unlocked(csdev);
+       CS_LOCK(csdev->access.base);
 }
 EXPORT_SYMBOL_GPL(coresight_disclaim_device);
 
@@ -1418,23 +1432,24 @@ static void coresight_remove_conns(struct coresight_device *csdev)
 }
 
 /**
- * coresight_timeout - loop until a bit has changed to a specific state.
- * @addr: base address of the area of interest.
- * @offset: address of a register, starting from @addr.
+ * coresight_timeout - loop until a bit has changed to a specific register
+ *                     state.
+ * @csa: coresight device access for the device
+ * @offset: Offset of the register from the base of the device.
  * @position: the position of the bit of interest.
  * @value: the value the bit should have.
  *
  * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
  * TIMEOUT_US has elapsed, which ever happens first.
  */
-
-int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
+int coresight_timeout(struct csdev_access *csa, u32 offset,
+                     int position, int value)
 {
        int i;
        u32 val;
 
        for (i = TIMEOUT_US; i > 0; i--) {
-               val = __raw_readl(addr + offset);
+               val = csdev_access_read32(csa, offset);
                /* waiting on the bit to go from 0 to 1 */
                if (value) {
                        if (val & BIT(position))
@@ -1458,6 +1473,48 @@ int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
 }
 EXPORT_SYMBOL_GPL(coresight_timeout);
 
+u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset)
+{
+       return csdev_access_relaxed_read32(&csdev->access, offset);
+}
+
+u32 coresight_read32(struct coresight_device *csdev, u32 offset)
+{
+       return csdev_access_read32(&csdev->access, offset);
+}
+
+void coresight_relaxed_write32(struct coresight_device *csdev,
+                              u32 val, u32 offset)
+{
+       csdev_access_relaxed_write32(&csdev->access, val, offset);
+}
+
+void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset)
+{
+       csdev_access_write32(&csdev->access, val, offset);
+}
+
+u64 coresight_relaxed_read64(struct coresight_device *csdev, u32 offset)
+{
+       return csdev_access_relaxed_read64(&csdev->access, offset);
+}
+
+u64 coresight_read64(struct coresight_device *csdev, u32 offset)
+{
+       return csdev_access_read64(&csdev->access, offset);
+}
+
+void coresight_relaxed_write64(struct coresight_device *csdev,
+                              u64 val, u32 offset)
+{
+       csdev_access_relaxed_write64(&csdev->access, val, offset);
+}
+
+void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
+{
+       csdev_access_write64(&csdev->access, val, offset);
+}
+
 /*
  * coresight_release_platform_data: Release references to the devices connected
  * to the output port of this device.
@@ -1522,6 +1579,7 @@ struct coresight_device *coresight_register(struct coresight_desc *desc)
        csdev->type = desc->type;
        csdev->subtype = desc->subtype;
        csdev->ops = desc->ops;
+       csdev->access = desc->access;
        csdev->orphan = false;
 
        csdev->dev.type = &coresight_dev_type[desc->type];
index 30e4880..e2a3620 100644 (file)
@@ -102,7 +102,7 @@ static int cti_enable_hw(struct cti_drvdata *drvdata)
                goto cti_state_unchanged;
 
        /* claim the device */
-       rc = coresight_claim_device(drvdata->base);
+       rc = coresight_claim_device(drvdata->csdev);
        if (rc)
                goto cti_err_not_enabled;
 
@@ -136,7 +136,7 @@ static void cti_cpuhp_enable_hw(struct cti_drvdata *drvdata)
                goto cti_hp_not_enabled;
 
        /* try to claim the device */
-       if (coresight_claim_device(drvdata->base))
+       if (coresight_claim_device(drvdata->csdev))
                goto cti_hp_not_enabled;
 
        cti_write_all_hw_regs(drvdata);
@@ -154,6 +154,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata)
 {
        struct cti_config *config = &drvdata->config;
        struct device *dev = &drvdata->csdev->dev;
+       struct coresight_device *csdev = drvdata->csdev;
 
        spin_lock(&drvdata->spinlock);
 
@@ -171,7 +172,7 @@ static int cti_disable_hw(struct cti_drvdata *drvdata)
        writel_relaxed(0, drvdata->base + CTICONTROL);
        config->hw_enabled = false;
 
-       coresight_disclaim_device_unlocked(drvdata->base);
+       coresight_disclaim_device_unlocked(csdev);
        CS_LOCK(drvdata->base);
        spin_unlock(&drvdata->spinlock);
        pm_runtime_put(dev);
@@ -655,6 +656,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
                             void *v)
 {
        struct cti_drvdata *drvdata;
+       struct coresight_device *csdev;
        unsigned int cpu = smp_processor_id();
        int notify_res = NOTIFY_OK;
 
@@ -662,6 +664,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
                return NOTIFY_OK;
 
        drvdata = cti_cpu_drvdata[cpu];
+       csdev = drvdata->csdev;
 
        if (WARN_ON_ONCE(drvdata->ctidev.cpu != cpu))
                return NOTIFY_BAD;
@@ -673,13 +676,13 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
                /* CTI regs all static - we have a copy & nothing to save */
                drvdata->config.hw_powered = false;
                if (drvdata->config.hw_enabled)
-                       coresight_disclaim_device(drvdata->base);
+                       coresight_disclaim_device(csdev);
                break;
 
        case CPU_PM_ENTER_FAILED:
                drvdata->config.hw_powered = true;
                if (drvdata->config.hw_enabled) {
-                       if (coresight_claim_device(drvdata->base))
+                       if (coresight_claim_device(csdev))
                                drvdata->config.hw_enabled = false;
                }
                break;
@@ -692,7 +695,7 @@ static int cti_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
                /* check enable reference count to enable HW */
                if (atomic_read(&drvdata->config.enable_req_count)) {
                        /* check we can claim the device as we re-power */
-                       if (coresight_claim_device(drvdata->base))
+                       if (coresight_claim_device(csdev))
                                goto cti_notify_exit;
 
                        drvdata->config.hw_enabled = true;
@@ -736,7 +739,7 @@ static int cti_dying_cpu(unsigned int cpu)
        spin_lock(&drvdata->spinlock);
        drvdata->config.hw_powered = false;
        if (drvdata->config.hw_enabled)
-               coresight_disclaim_device(drvdata->base);
+               coresight_disclaim_device(drvdata->csdev);
        spin_unlock(&drvdata->spinlock);
        return 0;
 }
@@ -868,6 +871,7 @@ static int cti_probe(struct amba_device *adev, const struct amba_id *id)
                return PTR_ERR(base);
 
        drvdata->base = base;
+       cti_desc.access = CSDEV_ACCESS_IOMEM(base);
 
        dev_set_drvdata(dev, drvdata);
 
index 98f830c..ccef04f 100644 (file)
@@ -343,7 +343,6 @@ static int cti_plat_create_connection(struct device *dev,
 {
        struct cti_trig_con *tc = NULL;
        int cpuid = -1, err = 0;
-       struct fwnode_handle *cs_fwnode = NULL;
        struct coresight_device *csdev = NULL;
        const char *assoc_name = "unknown";
        char cpu_name_str[16];
@@ -397,8 +396,9 @@ static int cti_plat_create_connection(struct device *dev,
                assoc_name = cpu_name_str;
        } else {
                /* associated device ? */
-               cs_fwnode = fwnode_find_reference(fwnode,
-                                                 CTI_DT_CSDEV_ASSOC, 0);
+               struct fwnode_handle *cs_fwnode = fwnode_find_reference(fwnode,
+                                                                       CTI_DT_CSDEV_ASSOC,
+                                                                       0);
                if (!IS_ERR(cs_fwnode)) {
                        assoc_name = cti_plat_get_csdev_or_node_name(cs_fwnode,
                                                                     &csdev);
index 51c801c..f775cbe 100644 (file)
@@ -132,7 +132,7 @@ static void __etb_enable_hw(struct etb_drvdata *drvdata)
 
 static int etb_enable_hw(struct etb_drvdata *drvdata)
 {
-       int rc = coresight_claim_device(drvdata->base);
+       int rc = coresight_claim_device(drvdata->csdev);
 
        if (rc)
                return rc;
@@ -252,6 +252,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
 {
        u32 ffcr;
        struct device *dev = &drvdata->csdev->dev;
+       struct csdev_access *csa = &drvdata->csdev->access;
 
        CS_UNLOCK(drvdata->base);
 
@@ -263,7 +264,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
        ffcr |= ETB_FFCR_FON_MAN;
        writel_relaxed(ffcr, drvdata->base + ETB_FFCR);
 
-       if (coresight_timeout(drvdata->base, ETB_FFCR, ETB_FFCR_BIT, 0)) {
+       if (coresight_timeout(csa, ETB_FFCR, ETB_FFCR_BIT, 0)) {
                dev_err(dev,
                "timeout while waiting for completion of Manual Flush\n");
        }
@@ -271,7 +272,7 @@ static void __etb_disable_hw(struct etb_drvdata *drvdata)
        /* disable trace capture */
        writel_relaxed(0x0, drvdata->base + ETB_CTL_REG);
 
-       if (coresight_timeout(drvdata->base, ETB_FFSR, ETB_FFSR_BIT, 1)) {
+       if (coresight_timeout(csa, ETB_FFSR, ETB_FFSR_BIT, 1)) {
                dev_err(dev,
                        "timeout while waiting for Formatter to Stop\n");
        }
@@ -344,7 +345,7 @@ static void etb_disable_hw(struct etb_drvdata *drvdata)
 {
        __etb_disable_hw(drvdata);
        etb_dump_hw(drvdata);
-       coresight_disclaim_device(drvdata->base);
+       coresight_disclaim_device(drvdata->csdev);
 }
 
 static int etb_disable(struct coresight_device *csdev)
@@ -757,6 +758,7 @@ static int etb_probe(struct amba_device *adev, const struct amba_id *id)
                return PTR_ERR(base);
 
        drvdata->base = base;
+       desc.access = CSDEV_ACCESS_IOMEM(base);
 
        spin_lock_init(&drvdata->spinlock);
 
index bdc34ca..0f603b4 100644 (file)
@@ -27,17 +27,45 @@ static bool etm_perf_up;
 static DEFINE_PER_CPU(struct perf_output_handle, ctx_handle);
 static DEFINE_PER_CPU(struct coresight_device *, csdev_src);
 
-/* ETMv3.5/PTM's ETMCR is 'config' */
+/*
+ * The PMU formats were orignally for ETMv3.5/PTM's ETMCR 'config';
+ * now take them as general formats and apply on all ETMs.
+ */
 PMU_FORMAT_ATTR(cycacc,                "config:" __stringify(ETM_OPT_CYCACC));
-PMU_FORMAT_ATTR(contextid,     "config:" __stringify(ETM_OPT_CTXTID));
+/* contextid1 enables tracing CONTEXTIDR_EL1 for ETMv4 */
+PMU_FORMAT_ATTR(contextid1,    "config:" __stringify(ETM_OPT_CTXTID));
+/* contextid2 enables tracing CONTEXTIDR_EL2 for ETMv4 */
+PMU_FORMAT_ATTR(contextid2,    "config:" __stringify(ETM_OPT_CTXTID2));
 PMU_FORMAT_ATTR(timestamp,     "config:" __stringify(ETM_OPT_TS));
 PMU_FORMAT_ATTR(retstack,      "config:" __stringify(ETM_OPT_RETSTK));
 /* Sink ID - same for all ETMs */
 PMU_FORMAT_ATTR(sinkid,                "config2:0-31");
 
+/*
+ * contextid always traces the "PID".  The PID is in CONTEXTIDR_EL1
+ * when the kernel is running at EL1; when the kernel is at EL2,
+ * the PID is in CONTEXTIDR_EL2.
+ */
+static ssize_t format_attr_contextid_show(struct device *dev,
+                                         struct device_attribute *attr,
+                                         char *page)
+{
+       int pid_fmt = ETM_OPT_CTXTID;
+
+#if defined(CONFIG_CORESIGHT_SOURCE_ETM4X)
+       pid_fmt = is_kernel_in_hyp_mode() ? ETM_OPT_CTXTID2 : ETM_OPT_CTXTID;
+#endif
+       return sprintf(page, "config:%d\n", pid_fmt);
+}
+
+struct device_attribute format_attr_contextid =
+       __ATTR(contextid, 0444, format_attr_contextid_show, NULL);
+
 static struct attribute *etm_config_formats_attr[] = {
        &format_attr_cycacc.attr,
        &format_attr_contextid.attr,
+       &format_attr_contextid1.attr,
+       &format_attr_contextid2.attr,
        &format_attr_timestamp.attr,
        &format_attr_retstack.attr,
        &format_attr_sinkid.attr,
index 683a69e..cf64ce7 100644 (file)
@@ -358,10 +358,11 @@ static int etm_enable_hw(struct etm_drvdata *drvdata)
        int i, rc;
        u32 etmcr;
        struct etm_config *config = &drvdata->config;
+       struct coresight_device *csdev = drvdata->csdev;
 
        CS_UNLOCK(drvdata->base);
 
-       rc = coresight_claim_device_unlocked(drvdata->base);
+       rc = coresight_claim_device_unlocked(csdev);
        if (rc)
                goto done;
 
@@ -566,6 +567,7 @@ static void etm_disable_hw(void *info)
        int i;
        struct etm_drvdata *drvdata = info;
        struct etm_config *config = &drvdata->config;
+       struct coresight_device *csdev = drvdata->csdev;
 
        CS_UNLOCK(drvdata->base);
        etm_set_prog(drvdata);
@@ -577,7 +579,7 @@ static void etm_disable_hw(void *info)
                config->cntr_val[i] = etm_readl(drvdata, ETMCNTVRn(i));
 
        etm_set_pwrdwn(drvdata);
-       coresight_disclaim_device_unlocked(drvdata->base);
+       coresight_disclaim_device_unlocked(csdev);
 
        CS_LOCK(drvdata->base);
 
@@ -602,7 +604,7 @@ static void etm_disable_perf(struct coresight_device *csdev)
         * power down the tracer.
         */
        etm_set_pwrdwn(drvdata);
-       coresight_disclaim_device_unlocked(drvdata->base);
+       coresight_disclaim_device_unlocked(csdev);
 
        CS_LOCK(drvdata->base);
 }
@@ -839,6 +841,7 @@ static int etm_probe(struct amba_device *adev, const struct amba_id *id)
                return PTR_ERR(base);
 
        drvdata->base = base;
+       desc.access = CSDEV_ACCESS_IOMEM(base);
 
        spin_lock_init(&drvdata->spinlock);
 
index 82787cb..15016f7 100644 (file)
@@ -27,6 +27,7 @@
 #include <linux/seq_file.h>
 #include <linux/uaccess.h>
 #include <linux/perf_event.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/property.h>
 
@@ -59,32 +60,99 @@ static u64 etm4_get_access_type(struct etmv4_config *config);
 
 static enum cpuhp_state hp_online;
 
-static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+struct etm4_init_arg {
+       unsigned int            pid;
+       struct etmv4_drvdata    *drvdata;
+       struct csdev_access     *csa;
+};
+
+/*
+ * Check if TRCSSPCICRn(i) is implemented for a given instance.
+ *
+ * TRCSSPCICRn is implemented only if :
+ *     TRCSSPCICR<n> is present only if all of the following are true:
+ *             TRCIDR4.NUMSSCC > n.
+ *             TRCIDR4.NUMPC > 0b0000 .
+ *             TRCSSCSR<n>.PC == 0b1
+ */
+static inline bool etm4x_sspcicrn_present(struct etmv4_drvdata *drvdata, int n)
+{
+       return (n < drvdata->nr_ss_cmp) &&
+              drvdata->nr_pe &&
+              (drvdata->config.ss_status[n] & TRCSSCSRn_PC);
+}
+
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit)
+{
+       u64 res = 0;
+
+       switch (offset) {
+       ETM4x_READ_SYSREG_CASES(res)
+       default :
+               pr_warn_ratelimited("etm4x: trying to read unsupported register @%x\n",
+                        offset);
+       }
+
+       if (!_relaxed)
+               __iormb(res);   /* Imitate the !relaxed I/O helpers */
+
+       return res;
+}
+
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit)
+{
+       if (!_relaxed)
+               __iowmb();      /* Imitate the !relaxed I/O helpers */
+       if (!_64bit)
+               val &= GENMASK(31, 0);
+
+       switch (offset) {
+       ETM4x_WRITE_SYSREG_CASES(val)
+       default :
+               pr_warn_ratelimited("etm4x: trying to write to unsupported register @%x\n",
+                       offset);
+       }
+}
+
+static void etm4_os_unlock_csa(struct etmv4_drvdata *drvdata, struct csdev_access *csa)
 {
        /* Writing 0 to TRCOSLAR unlocks the trace registers */
-       writel_relaxed(0x0, drvdata->base + TRCOSLAR);
+       etm4x_relaxed_write32(csa, 0x0, TRCOSLAR);
        drvdata->os_unlock = true;
        isb();
 }
 
+static void etm4_os_unlock(struct etmv4_drvdata *drvdata)
+{
+       if (!WARN_ON(!drvdata->csdev))
+               etm4_os_unlock_csa(drvdata, &drvdata->csdev->access);
+
+}
+
 static void etm4_os_lock(struct etmv4_drvdata *drvdata)
 {
+       if (WARN_ON(!drvdata->csdev))
+               return;
+
        /* Writing 0x1 to TRCOSLAR locks the trace registers */
-       writel_relaxed(0x1, drvdata->base + TRCOSLAR);
+       etm4x_relaxed_write32(&drvdata->csdev->access, 0x1, TRCOSLAR);
        drvdata->os_unlock = false;
        isb();
 }
 
-static bool etm4_arch_supported(u8 arch)
+static void etm4_cs_lock(struct etmv4_drvdata *drvdata,
+                        struct csdev_access *csa)
 {
-       /* Mask out the minor version number */
-       switch (arch & 0xf0) {
-       case ETM_ARCH_V4:
-               break;
-       default:
-               return false;
-       }
-       return true;
+       /* Software Lock is only accessible via memory mapped interface */
+       if (csa->io_mem)
+               CS_LOCK(csa->base);
+}
+
+static void etm4_cs_unlock(struct etmv4_drvdata *drvdata,
+                          struct csdev_access *csa)
+{
+       if (csa->io_mem)
+               CS_UNLOCK(csa->base);
 }
 
 static int etm4_cpu_id(struct coresight_device *csdev)
@@ -201,57 +269,64 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
 {
        int i, rc;
        struct etmv4_config *config = &drvdata->config;
-       struct device *etm_dev = &drvdata->csdev->dev;
+       struct coresight_device *csdev = drvdata->csdev;
+       struct device *etm_dev = &csdev->dev;
+       struct csdev_access *csa = &csdev->access;
+
 
-       CS_UNLOCK(drvdata->base);
+       etm4_cs_unlock(drvdata, csa);
        etm4_enable_arch_specific(drvdata);
 
        etm4_os_unlock(drvdata);
 
-       rc = coresight_claim_device_unlocked(drvdata->base);
+       rc = coresight_claim_device_unlocked(csdev);
        if (rc)
                goto done;
 
        /* Disable the trace unit before programming trace registers */
-       writel_relaxed(0, drvdata->base + TRCPRGCTLR);
+       etm4x_relaxed_write32(csa, 0, TRCPRGCTLR);
+
+       /*
+        * If we use system instructions, we need to synchronize the
+        * write to the TRCPRGCTLR, before accessing the TRCSTATR.
+        * See ARM IHI0064F, section
+        * "4.3.7 Synchronization of register updates"
+        */
+       if (!csa->io_mem)
+               isb();
 
        /* wait for TRCSTATR.IDLE to go up */
-       if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
+       if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1))
                dev_err(etm_dev,
                        "timeout while waiting for Idle Trace Status\n");
        if (drvdata->nr_pe)
-               writel_relaxed(config->pe_sel, drvdata->base + TRCPROCSELR);
-       writel_relaxed(config->cfg, drvdata->base + TRCCONFIGR);
+               etm4x_relaxed_write32(csa, config->pe_sel, TRCPROCSELR);
+       etm4x_relaxed_write32(csa, config->cfg, TRCCONFIGR);
        /* nothing specific implemented */
-       writel_relaxed(0x0, drvdata->base + TRCAUXCTLR);
-       writel_relaxed(config->eventctrl0, drvdata->base + TRCEVENTCTL0R);
-       writel_relaxed(config->eventctrl1, drvdata->base + TRCEVENTCTL1R);
-       writel_relaxed(config->stall_ctrl, drvdata->base + TRCSTALLCTLR);
-       writel_relaxed(config->ts_ctrl, drvdata->base + TRCTSCTLR);
-       writel_relaxed(config->syncfreq, drvdata->base + TRCSYNCPR);
-       writel_relaxed(config->ccctlr, drvdata->base + TRCCCCTLR);
-       writel_relaxed(config->bb_ctrl, drvdata->base + TRCBBCTLR);
-       writel_relaxed(drvdata->trcid, drvdata->base + TRCTRACEIDR);
-       writel_relaxed(config->vinst_ctrl, drvdata->base + TRCVICTLR);
-       writel_relaxed(config->viiectlr, drvdata->base + TRCVIIECTLR);
-       writel_relaxed(config->vissctlr,
-                      drvdata->base + TRCVISSCTLR);
+       etm4x_relaxed_write32(csa, 0x0, TRCAUXCTLR);
+       etm4x_relaxed_write32(csa, config->eventctrl0, TRCEVENTCTL0R);
+       etm4x_relaxed_write32(csa, config->eventctrl1, TRCEVENTCTL1R);
+       if (drvdata->stallctl)
+               etm4x_relaxed_write32(csa, config->stall_ctrl, TRCSTALLCTLR);
+       etm4x_relaxed_write32(csa, config->ts_ctrl, TRCTSCTLR);
+       etm4x_relaxed_write32(csa, config->syncfreq, TRCSYNCPR);
+       etm4x_relaxed_write32(csa, config->ccctlr, TRCCCCTLR);
+       etm4x_relaxed_write32(csa, config->bb_ctrl, TRCBBCTLR);
+       etm4x_relaxed_write32(csa, drvdata->trcid, TRCTRACEIDR);
+       etm4x_relaxed_write32(csa, config->vinst_ctrl, TRCVICTLR);
+       etm4x_relaxed_write32(csa, config->viiectlr, TRCVIIECTLR);
+       etm4x_relaxed_write32(csa, config->vissctlr, TRCVISSCTLR);
        if (drvdata->nr_pe_cmp)
-               writel_relaxed(config->vipcssctlr,
-                              drvdata->base + TRCVIPCSSCTLR);
+               etm4x_relaxed_write32(csa, config->vipcssctlr, TRCVIPCSSCTLR);
        for (i = 0; i < drvdata->nrseqstate - 1; i++)
-               writel_relaxed(config->seq_ctrl[i],
-                              drvdata->base + TRCSEQEVRn(i));
-       writel_relaxed(config->seq_rst, drvdata->base + TRCSEQRSTEVR);
-       writel_relaxed(config->seq_state, drvdata->base + TRCSEQSTR);
-       writel_relaxed(config->ext_inp, drvdata->base + TRCEXTINSELR);
+               etm4x_relaxed_write32(csa, config->seq_ctrl[i], TRCSEQEVRn(i));
+       etm4x_relaxed_write32(csa, config->seq_rst, TRCSEQRSTEVR);
+       etm4x_relaxed_write32(csa, config->seq_state, TRCSEQSTR);
+       etm4x_relaxed_write32(csa, config->ext_inp, TRCEXTINSELR);
        for (i = 0; i < drvdata->nr_cntr; i++) {
-               writel_relaxed(config->cntrldvr[i],
-                              drvdata->base + TRCCNTRLDVRn(i));
-               writel_relaxed(config->cntr_ctrl[i],
-                              drvdata->base + TRCCNTCTLRn(i));
-               writel_relaxed(config->cntr_val[i],
-                              drvdata->base + TRCCNTVRn(i));
+               etm4x_relaxed_write32(csa, config->cntrldvr[i], TRCCNTRLDVRn(i));
+               etm4x_relaxed_write32(csa, config->cntr_ctrl[i], TRCCNTCTLRn(i));
+               etm4x_relaxed_write32(csa, config->cntr_val[i], TRCCNTVRn(i));
        }
 
        /*
@@ -259,54 +334,52 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
         * such start at 2.
         */
        for (i = 2; i < drvdata->nr_resource * 2; i++)
-               writel_relaxed(config->res_ctrl[i],
-                              drvdata->base + TRCRSCTLRn(i));
+               etm4x_relaxed_write32(csa, config->res_ctrl[i], TRCRSCTLRn(i));
 
        for (i = 0; i < drvdata->nr_ss_cmp; i++) {
                /* always clear status bit on restart if using single-shot */
                if (config->ss_ctrl[i] || config->ss_pe_cmp[i])
                        config->ss_status[i] &= ~BIT(31);
-               writel_relaxed(config->ss_ctrl[i],
-                              drvdata->base + TRCSSCCRn(i));
-               writel_relaxed(config->ss_status[i],
-                              drvdata->base + TRCSSCSRn(i));
-               writel_relaxed(config->ss_pe_cmp[i],
-                              drvdata->base + TRCSSPCICRn(i));
+               etm4x_relaxed_write32(csa, config->ss_ctrl[i], TRCSSCCRn(i));
+               etm4x_relaxed_write32(csa, config->ss_status[i], TRCSSCSRn(i));
+               if (etm4x_sspcicrn_present(drvdata, i))
+                       etm4x_relaxed_write32(csa, config->ss_pe_cmp[i], TRCSSPCICRn(i));
        }
        for (i = 0; i < drvdata->nr_addr_cmp; i++) {
-               writeq_relaxed(config->addr_val[i],
-                              drvdata->base + TRCACVRn(i));
-               writeq_relaxed(config->addr_acc[i],
-                              drvdata->base + TRCACATRn(i));
+               etm4x_relaxed_write64(csa, config->addr_val[i], TRCACVRn(i));
+               etm4x_relaxed_write64(csa, config->addr_acc[i], TRCACATRn(i));
        }
        for (i = 0; i < drvdata->numcidc; i++)
-               writeq_relaxed(config->ctxid_pid[i],
-                              drvdata->base + TRCCIDCVRn(i));
-       writel_relaxed(config->ctxid_mask0, drvdata->base + TRCCIDCCTLR0);
+               etm4x_relaxed_write64(csa, config->ctxid_pid[i], TRCCIDCVRn(i));
+       etm4x_relaxed_write32(csa, config->ctxid_mask0, TRCCIDCCTLR0);
        if (drvdata->numcidc > 4)
-               writel_relaxed(config->ctxid_mask1, drvdata->base + TRCCIDCCTLR1);
+               etm4x_relaxed_write32(csa, config->ctxid_mask1, TRCCIDCCTLR1);
 
        for (i = 0; i < drvdata->numvmidc; i++)
-               writeq_relaxed(config->vmid_val[i],
-                              drvdata->base + TRCVMIDCVRn(i));
-       writel_relaxed(config->vmid_mask0, drvdata->base + TRCVMIDCCTLR0);
+               etm4x_relaxed_write64(csa, config->vmid_val[i], TRCVMIDCVRn(i));
+       etm4x_relaxed_write32(csa, config->vmid_mask0, TRCVMIDCCTLR0);
        if (drvdata->numvmidc > 4)
-               writel_relaxed(config->vmid_mask1, drvdata->base + TRCVMIDCCTLR1);
+               etm4x_relaxed_write32(csa, config->vmid_mask1, TRCVMIDCCTLR1);
 
        if (!drvdata->skip_power_up) {
+               u32 trcpdcr = etm4x_relaxed_read32(csa, TRCPDCR);
+
                /*
                 * Request to keep the trace unit powered and also
                 * emulation of powerdown
                 */
-               writel_relaxed(readl_relaxed(drvdata->base + TRCPDCR) |
-                              TRCPDCR_PU, drvdata->base + TRCPDCR);
+               etm4x_relaxed_write32(csa, trcpdcr | TRCPDCR_PU, TRCPDCR);
        }
 
        /* Enable the trace unit */
-       writel_relaxed(1, drvdata->base + TRCPRGCTLR);
+       etm4x_relaxed_write32(csa, 1, TRCPRGCTLR);
+
+       /* Synchronize the register updates for sysreg access */
+       if (!csa->io_mem)
+               isb();
 
        /* wait for TRCSTATR.IDLE to go back down to '0' */
-       if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
+       if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 0))
                dev_err(etm_dev,
                        "timeout while waiting for Idle Trace Status\n");
 
@@ -318,7 +391,7 @@ static int etm4_enable_hw(struct etmv4_drvdata *drvdata)
        isb();
 
 done:
-       CS_LOCK(drvdata->base);
+       etm4_cs_lock(drvdata, csa);
 
        dev_dbg(etm_dev, "cpu: %d enable smp call done: %d\n",
                drvdata->cpu, rc);
@@ -477,6 +550,19 @@ static int etm4_parse_event_config(struct etmv4_drvdata *drvdata,
                /* bit[6], Context ID tracing bit */
                config->cfg |= BIT(ETM4_CFG_BIT_CTXTID);
 
+       /*
+        * If set bit ETM_OPT_CTXTID2 in perf config, this asks to trace VMID
+        * for recording CONTEXTIDR_EL2.  Do not enable VMID tracing if the
+        * kernel is not running in EL2.
+        */
+       if (attr->config & BIT(ETM_OPT_CTXTID2)) {
+               if (!is_kernel_in_hyp_mode()) {
+                       ret = -EINVAL;
+                       goto out;
+               }
+               config->cfg |= BIT(ETM4_CFG_BIT_VMID) | BIT(ETM4_CFG_BIT_VMID_OPT);
+       }
+
        /* return stack - enable if selected and supported */
        if ((attr->config & BIT(ETM_OPT_RETSTK)) && drvdata->retstack)
                /* bit[12], Return stack enable bit */
@@ -570,20 +656,22 @@ static void etm4_disable_hw(void *info)
        u32 control;
        struct etmv4_drvdata *drvdata = info;
        struct etmv4_config *config = &drvdata->config;
-       struct device *etm_dev = &drvdata->csdev->dev;
+       struct coresight_device *csdev = drvdata->csdev;
+       struct device *etm_dev = &csdev->dev;
+       struct csdev_access *csa = &csdev->access;
        int i;
 
-       CS_UNLOCK(drvdata->base);
+       etm4_cs_unlock(drvdata, csa);
        etm4_disable_arch_specific(drvdata);
 
        if (!drvdata->skip_power_up) {
                /* power can be removed from the trace unit now */
-               control = readl_relaxed(drvdata->base + TRCPDCR);
+               control = etm4x_relaxed_read32(csa, TRCPDCR);
                control &= ~TRCPDCR_PU;
-               writel_relaxed(control, drvdata->base + TRCPDCR);
+               etm4x_relaxed_write32(csa, control, TRCPDCR);
        }
 
-       control = readl_relaxed(drvdata->base + TRCPRGCTLR);
+       control = etm4x_relaxed_read32(csa, TRCPRGCTLR);
 
        /* EN, bit[0] Trace unit enable bit */
        control &= ~0x1;
@@ -595,29 +683,27 @@ static void etm4_disable_hw(void *info)
         */
        dsb(sy);
        isb();
-       writel_relaxed(control, drvdata->base + TRCPRGCTLR);
+       etm4x_relaxed_write32(csa, control, TRCPRGCTLR);
 
        /* wait for TRCSTATR.PMSTABLE to go to '1' */
-       if (coresight_timeout(drvdata->base, TRCSTATR,
-                             TRCSTATR_PMSTABLE_BIT, 1))
+       if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1))
                dev_err(etm_dev,
                        "timeout while waiting for PM stable Trace Status\n");
 
        /* read the status of the single shot comparators */
        for (i = 0; i < drvdata->nr_ss_cmp; i++) {
                config->ss_status[i] =
-                       readl_relaxed(drvdata->base + TRCSSCSRn(i));
+                       etm4x_relaxed_read32(csa, TRCSSCSRn(i));
        }
 
        /* read back the current counter values */
        for (i = 0; i < drvdata->nr_cntr; i++) {
                config->cntr_val[i] =
-                       readl_relaxed(drvdata->base + TRCCNTVRn(i));
+                       etm4x_relaxed_read32(csa, TRCCNTVRn(i));
        }
 
-       coresight_disclaim_device_unlocked(drvdata->base);
-
-       CS_LOCK(drvdata->base);
+       coresight_disclaim_device_unlocked(csdev);
+       etm4_cs_lock(drvdata, csa);
 
        dev_dbg(&drvdata->csdev->dev,
                "cpu: %d disable smp call done\n", drvdata->cpu);
@@ -641,7 +727,7 @@ static int etm4_disable_perf(struct coresight_device *csdev,
         * scheduled again.  Configuration of the start/stop logic happens in
         * function etm4_set_event_filters().
         */
-       control = readl_relaxed(drvdata->base + TRCVICTLR);
+       control = etm4x_relaxed_read32(&csdev->access, TRCVICTLR);
        /* TRCVICTLR::SSSTATUS, bit[9] */
        filters->ssstatus = (control & BIT(9));
 
@@ -712,24 +798,136 @@ static const struct coresight_ops etm4_cs_ops = {
        .source_ops     = &etm4_source_ops,
 };
 
+static inline bool cpu_supports_sysreg_trace(void)
+{
+       u64 dfr0 = read_sysreg_s(SYS_ID_AA64DFR0_EL1);
+
+       return ((dfr0 >> ID_AA64DFR0_TRACEVER_SHIFT) & 0xfUL) > 0;
+}
+
+static bool etm4_init_sysreg_access(struct etmv4_drvdata *drvdata,
+                                   struct csdev_access *csa)
+{
+       u32 devarch;
+
+       if (!cpu_supports_sysreg_trace())
+               return false;
+
+       /*
+        * ETMs implementing sysreg access must implement TRCDEVARCH.
+        */
+       devarch = read_etm4x_sysreg_const_offset(TRCDEVARCH);
+       if ((devarch & ETM_DEVARCH_ID_MASK) != ETM_DEVARCH_ETMv4x_ARCH)
+               return false;
+       *csa = (struct csdev_access) {
+               .io_mem = false,
+               .read   = etm4x_sysreg_read,
+               .write  = etm4x_sysreg_write,
+       };
+
+       drvdata->arch = etm_devarch_to_arch(devarch);
+       return true;
+}
+
+static bool etm4_init_iomem_access(struct etmv4_drvdata *drvdata,
+                                  struct csdev_access *csa)
+{
+       u32 devarch = readl_relaxed(drvdata->base + TRCDEVARCH);
+       u32 idr1 = readl_relaxed(drvdata->base + TRCIDR1);
+
+       /*
+        * All ETMs must implement TRCDEVARCH to indicate that
+        * the component is an ETMv4. To support any broken
+        * implementations we fall back to TRCIDR1 check, which
+        * is not really reliable.
+        */
+       if ((devarch & ETM_DEVARCH_ID_MASK) == ETM_DEVARCH_ETMv4x_ARCH) {
+               drvdata->arch = etm_devarch_to_arch(devarch);
+       } else {
+               pr_warn("CPU%d: ETM4x incompatible TRCDEVARCH: %x, falling back to TRCIDR1\n",
+                       smp_processor_id(), devarch);
+
+               if (ETM_TRCIDR1_ARCH_MAJOR(idr1) != ETM_TRCIDR1_ARCH_ETMv4)
+                       return false;
+               drvdata->arch = etm_trcidr_to_arch(idr1);
+       }
+
+       *csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+       return true;
+}
+
+static bool etm4_init_csdev_access(struct etmv4_drvdata *drvdata,
+                                  struct csdev_access *csa)
+{
+       /*
+        * Always choose the memory mapped io, if there is
+        * a memory map to prevent sysreg access on broken
+        * systems.
+        */
+       if (drvdata->base)
+               return etm4_init_iomem_access(drvdata, csa);
+
+       if (etm4_init_sysreg_access(drvdata, csa))
+               return true;
+
+       return false;
+}
+
+static void cpu_enable_tracing(void)
+{
+       u64 dfr0 = read_sysreg(id_aa64dfr0_el1);
+       u64 trfcr;
+
+       if (!cpuid_feature_extract_unsigned_field(dfr0, ID_AA64DFR0_TRACE_FILT_SHIFT))
+               return;
+
+       /*
+        * If the CPU supports v8.4 SelfHosted Tracing, enable
+        * tracing at the kernel EL and EL0, forcing to use the
+        * virtual time as the timestamp.
+        */
+       trfcr = (TRFCR_ELx_TS_VIRTUAL |
+                TRFCR_ELx_ExTRE |
+                TRFCR_ELx_E0TRE);
+
+       /* If we are running at EL2, allow tracing the CONTEXTIDR_EL2. */
+       if (is_kernel_in_hyp_mode())
+               trfcr |= TRFCR_EL2_CX;
+
+       write_sysreg_s(trfcr, SYS_TRFCR_EL1);
+}
+
 static void etm4_init_arch_data(void *info)
 {
        u32 etmidr0;
-       u32 etmidr1;
        u32 etmidr2;
        u32 etmidr3;
        u32 etmidr4;
        u32 etmidr5;
-       struct etmv4_drvdata *drvdata = info;
+       struct etm4_init_arg *init_arg = info;
+       struct etmv4_drvdata *drvdata;
+       struct csdev_access *csa;
        int i;
 
+       drvdata = init_arg->drvdata;
+       csa = init_arg->csa;
+
+       /*
+        * If we are unable to detect the access mechanism,
+        * or unable to detect the trace unit type, fail
+        * early.
+        */
+       if (!etm4_init_csdev_access(drvdata, csa))
+               return;
+
        /* Make sure all registers are accessible */
-       etm4_os_unlock(drvdata);
+       etm4_os_unlock_csa(drvdata, csa);
+       etm4_cs_unlock(drvdata, csa);
 
-       CS_UNLOCK(drvdata->base);
+       etm4_check_arch_features(drvdata, init_arg->pid);
 
        /* find all capabilities of the tracing unit */
-       etmidr0 = readl_relaxed(drvdata->base + TRCIDR0);
+       etmidr0 = etm4x_relaxed_read32(csa, TRCIDR0);
 
        /* INSTP0, bits[2:1] P0 tracing support field */
        if (BMVAL(etmidr0, 1, 1) && BMVAL(etmidr0, 2, 2))
@@ -768,17 +966,8 @@ static void etm4_init_arch_data(void *info)
        /* TSSIZE, bits[28:24] Global timestamp size field */
        drvdata->ts_size = BMVAL(etmidr0, 24, 28);
 
-       /* base architecture of trace unit */
-       etmidr1 = readl_relaxed(drvdata->base + TRCIDR1);
-       /*
-        * TRCARCHMIN, bits[7:4] architecture the minor version number
-        * TRCARCHMAJ, bits[11:8] architecture major versin number
-        */
-       drvdata->arch = BMVAL(etmidr1, 4, 11);
-       drvdata->config.arch = drvdata->arch;
-
        /* maximum size of resources */
-       etmidr2 = readl_relaxed(drvdata->base + TRCIDR2);
+       etmidr2 = etm4x_relaxed_read32(csa, TRCIDR2);
        /* CIDSIZE, bits[9:5] Indicates the Context ID size */
        drvdata->ctxid_size = BMVAL(etmidr2, 5, 9);
        /* VMIDSIZE, bits[14:10] Indicates the VMID size */
@@ -786,11 +975,12 @@ static void etm4_init_arch_data(void *info)
        /* CCSIZE, bits[28:25] size of the cycle counter in bits minus 12 */
        drvdata->ccsize = BMVAL(etmidr2, 25, 28);
 
-       etmidr3 = readl_relaxed(drvdata->base + TRCIDR3);
+       etmidr3 = etm4x_relaxed_read32(csa, TRCIDR3);
        /* CCITMIN, bits[11:0] minimum threshold value that can be programmed */
        drvdata->ccitmin = BMVAL(etmidr3, 0, 11);
        /* EXLEVEL_S, bits[19:16] Secure state instruction tracing */
        drvdata->s_ex_level = BMVAL(etmidr3, 16, 19);
+       drvdata->config.s_ex_level = drvdata->s_ex_level;
        /* EXLEVEL_NS, bits[23:20] Non-secure state instruction tracing */
        drvdata->ns_ex_level = BMVAL(etmidr3, 20, 23);
 
@@ -836,7 +1026,7 @@ static void etm4_init_arch_data(void *info)
                drvdata->nooverflow = false;
 
        /* number of resources trace unit supports */
-       etmidr4 = readl_relaxed(drvdata->base + TRCIDR4);
+       etmidr4 = etm4x_relaxed_read32(csa, TRCIDR4);
        /* NUMACPAIRS, bits[0:3] number of addr comparator pairs for tracing */
        drvdata->nr_addr_cmp = BMVAL(etmidr4, 0, 3);
        /* NUMPC, bits[15:12] number of PE comparator inputs for tracing */
@@ -852,7 +1042,7 @@ static void etm4_init_arch_data(void *info)
         * Otherwise for values 0x1 and above the number is N + 1 as per v4.2.
         */
        drvdata->nr_resource = BMVAL(etmidr4, 16, 19);
-       if ((drvdata->arch < ETM4X_ARCH_4V3) || (drvdata->nr_resource > 0))
+       if ((drvdata->arch < ETM_ARCH_V4_3) || (drvdata->nr_resource > 0))
                drvdata->nr_resource += 1;
        /*
         * NUMSSCC, bits[23:20] the number of single-shot
@@ -862,14 +1052,14 @@ static void etm4_init_arch_data(void *info)
        drvdata->nr_ss_cmp = BMVAL(etmidr4, 20, 23);
        for (i = 0; i < drvdata->nr_ss_cmp; i++) {
                drvdata->config.ss_status[i] =
-                       readl_relaxed(drvdata->base + TRCSSCSRn(i));
+                       etm4x_relaxed_read32(csa, TRCSSCSRn(i));
        }
        /* NUMCIDC, bits[27:24] number of Context ID comparators for tracing */
        drvdata->numcidc = BMVAL(etmidr4, 24, 27);
        /* NUMVMIDC, bits[31:28] number of VMID comparators for tracing */
        drvdata->numvmidc = BMVAL(etmidr4, 28, 31);
 
-       etmidr5 = readl_relaxed(drvdata->base + TRCIDR5);
+       etmidr5 = etm4x_relaxed_read32(csa, TRCIDR5);
        /* NUMEXTIN, bits[8:0] number of external inputs implemented */
        drvdata->nr_ext_inp = BMVAL(etmidr5, 0, 8);
        /* TRACEIDSIZE, bits[21:16] indicates the trace ID width */
@@ -891,23 +1081,20 @@ static void etm4_init_arch_data(void *info)
        drvdata->nrseqstate = BMVAL(etmidr5, 25, 27);
        /* NUMCNTR, bits[30:28] number of counters available for tracing */
        drvdata->nr_cntr = BMVAL(etmidr5, 28, 30);
-       CS_LOCK(drvdata->base);
+       etm4_cs_lock(drvdata, csa);
+       cpu_enable_tracing();
+}
+
+static inline u32 etm4_get_victlr_access_type(struct etmv4_config *config)
+{
+       return etm4_get_access_type(config) << TRCVICTLR_EXLEVEL_SHIFT;
 }
 
 /* Set ELx trace filter access in the TRCVICTLR register */
 static void etm4_set_victlr_access(struct etmv4_config *config)
 {
-       u64 access_type;
-
-       config->vinst_ctrl &= ~(ETM_EXLEVEL_S_VICTLR_MASK | ETM_EXLEVEL_NS_VICTLR_MASK);
-
-       /*
-        * TRCVICTLR::EXLEVEL_NS:EXLEVELS: Set kernel / user filtering
-        * bits in vinst_ctrl, same bit pattern as TRCACATRn values returned by
-        * etm4_get_access_type() but with a relative shift in this register.
-        */
-       access_type = etm4_get_access_type(config) << ETM_EXLEVEL_LSHIFT_TRCVICTLR;
-       config->vinst_ctrl |= (u32)access_type;
+       config->vinst_ctrl &= ~TRCVICTLR_EXLEVEL_MASK;
+       config->vinst_ctrl |= etm4_get_victlr_access_type(config);
 }
 
 static void etm4_set_default_config(struct etmv4_config *config)
@@ -937,12 +1124,9 @@ static u64 etm4_get_ns_access_type(struct etmv4_config *config)
        u64 access_type = 0;
 
        /*
-        * EXLEVEL_NS, bits[15:12]
-        * The Exception levels are:
-        *   Bit[12] Exception level 0 - Application
-        *   Bit[13] Exception level 1 - OS
-        *   Bit[14] Exception level 2 - Hypervisor
-        *   Bit[15] Never implemented
+        * EXLEVEL_NS, for NonSecure Exception levels.
+        * The mask here is a generic value and must be
+        * shifted to the corresponding field for the registers
         */
        if (!is_kernel_in_hyp_mode()) {
                /* Stay away from hypervisor mode for non-VHE */
@@ -959,27 +1143,26 @@ static u64 etm4_get_ns_access_type(struct etmv4_config *config)
        return access_type;
 }
 
+/*
+ * Construct the exception level masks for a given config.
+ * This must be shifted to the corresponding register field
+ * for usage.
+ */
 static u64 etm4_get_access_type(struct etmv4_config *config)
 {
-       u64 access_type = etm4_get_ns_access_type(config);
-       u64 s_hyp = (config->arch & 0x0f) >= 0x4 ? ETM_EXLEVEL_S_HYP : 0;
-
-       /*
-        * EXLEVEL_S, bits[11:8], don't trace anything happening
-        * in secure state.
-        */
-       access_type |= (ETM_EXLEVEL_S_APP       |
-                       ETM_EXLEVEL_S_OS        |
-                       s_hyp                   |
-                       ETM_EXLEVEL_S_MON);
+       /* All Secure exception levels are excluded from the trace */
+       return etm4_get_ns_access_type(config) | (u64)config->s_ex_level;
+}
 
-       return access_type;
+static u64 etm4_get_comparator_access_type(struct etmv4_config *config)
+{
+       return etm4_get_access_type(config) << TRCACATR_EXLEVEL_SHIFT;
 }
 
 static void etm4_set_comparator_filter(struct etmv4_config *config,
                                       u64 start, u64 stop, int comparator)
 {
-       u64 access_type = etm4_get_access_type(config);
+       u64 access_type = etm4_get_comparator_access_type(config);
 
        /* First half of default address comparator */
        config->addr_val[comparator] = start;
@@ -1014,7 +1197,7 @@ static void etm4_set_start_stop_filter(struct etmv4_config *config,
                                       enum etm_addr_type type)
 {
        int shift;
-       u64 access_type = etm4_get_access_type(config);
+       u64 access_type = etm4_get_comparator_access_type(config);
 
        /* Configure the comparator */
        config->addr_val[comparator] = address;
@@ -1255,7 +1438,15 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 {
        int i, ret = 0;
        struct etmv4_save_state *state;
-       struct device *etm_dev = &drvdata->csdev->dev;
+       struct coresight_device *csdev = drvdata->csdev;
+       struct csdev_access *csa;
+       struct device *etm_dev;
+
+       if (WARN_ON(!csdev))
+               return -ENODEV;
+
+       etm_dev = &csdev->dev;
+       csa = &csdev->access;
 
        /*
         * As recommended by 3.4.1 ("The procedure when powering down the PE")
@@ -1264,14 +1455,12 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
        dsb(sy);
        isb();
 
-       CS_UNLOCK(drvdata->base);
-
+       etm4_cs_unlock(drvdata, csa);
        /* Lock the OS lock to disable trace and external debugger access */
        etm4_os_lock(drvdata);
 
        /* wait for TRCSTATR.PMSTABLE to go up */
-       if (coresight_timeout(drvdata->base, TRCSTATR,
-                             TRCSTATR_PMSTABLE_BIT, 1)) {
+       if (coresight_timeout(csa, TRCSTATR, TRCSTATR_PMSTABLE_BIT, 1)) {
                dev_err(etm_dev,
                        "timeout while waiting for PM Stable Status\n");
                etm4_os_unlock(drvdata);
@@ -1281,55 +1470,57 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
 
        state = drvdata->save_state;
 
-       state->trcprgctlr = readl(drvdata->base + TRCPRGCTLR);
+       state->trcprgctlr = etm4x_read32(csa, TRCPRGCTLR);
        if (drvdata->nr_pe)
-               state->trcprocselr = readl(drvdata->base + TRCPROCSELR);
-       state->trcconfigr = readl(drvdata->base + TRCCONFIGR);
-       state->trcauxctlr = readl(drvdata->base + TRCAUXCTLR);
-       state->trceventctl0r = readl(drvdata->base + TRCEVENTCTL0R);
-       state->trceventctl1r = readl(drvdata->base + TRCEVENTCTL1R);
-       state->trcstallctlr = readl(drvdata->base + TRCSTALLCTLR);
-       state->trctsctlr = readl(drvdata->base + TRCTSCTLR);
-       state->trcsyncpr = readl(drvdata->base + TRCSYNCPR);
-       state->trcccctlr = readl(drvdata->base + TRCCCCTLR);
-       state->trcbbctlr = readl(drvdata->base + TRCBBCTLR);
-       state->trctraceidr = readl(drvdata->base + TRCTRACEIDR);
-       state->trcqctlr = readl(drvdata->base + TRCQCTLR);
-
-       state->trcvictlr = readl(drvdata->base + TRCVICTLR);
-       state->trcviiectlr = readl(drvdata->base + TRCVIIECTLR);
-       state->trcvissctlr = readl(drvdata->base + TRCVISSCTLR);
+               state->trcprocselr = etm4x_read32(csa, TRCPROCSELR);
+       state->trcconfigr = etm4x_read32(csa, TRCCONFIGR);
+       state->trcauxctlr = etm4x_read32(csa, TRCAUXCTLR);
+       state->trceventctl0r = etm4x_read32(csa, TRCEVENTCTL0R);
+       state->trceventctl1r = etm4x_read32(csa, TRCEVENTCTL1R);
+       if (drvdata->stallctl)
+               state->trcstallctlr = etm4x_read32(csa, TRCSTALLCTLR);
+       state->trctsctlr = etm4x_read32(csa, TRCTSCTLR);
+       state->trcsyncpr = etm4x_read32(csa, TRCSYNCPR);
+       state->trcccctlr = etm4x_read32(csa, TRCCCCTLR);
+       state->trcbbctlr = etm4x_read32(csa, TRCBBCTLR);
+       state->trctraceidr = etm4x_read32(csa, TRCTRACEIDR);
+       state->trcqctlr = etm4x_read32(csa, TRCQCTLR);
+
+       state->trcvictlr = etm4x_read32(csa, TRCVICTLR);
+       state->trcviiectlr = etm4x_read32(csa, TRCVIIECTLR);
+       state->trcvissctlr = etm4x_read32(csa, TRCVISSCTLR);
        if (drvdata->nr_pe_cmp)
-               state->trcvipcssctlr = readl(drvdata->base + TRCVIPCSSCTLR);
-       state->trcvdctlr = readl(drvdata->base + TRCVDCTLR);
-       state->trcvdsacctlr = readl(drvdata->base + TRCVDSACCTLR);
-       state->trcvdarcctlr = readl(drvdata->base + TRCVDARCCTLR);
+               state->trcvipcssctlr = etm4x_read32(csa, TRCVIPCSSCTLR);
+       state->trcvdctlr = etm4x_read32(csa, TRCVDCTLR);
+       state->trcvdsacctlr = etm4x_read32(csa, TRCVDSACCTLR);
+       state->trcvdarcctlr = etm4x_read32(csa, TRCVDARCCTLR);
 
        for (i = 0; i < drvdata->nrseqstate - 1; i++)
-               state->trcseqevr[i] = readl(drvdata->base + TRCSEQEVRn(i));
+               state->trcseqevr[i] = etm4x_read32(csa, TRCSEQEVRn(i));
 
-       state->trcseqrstevr = readl(drvdata->base + TRCSEQRSTEVR);
-       state->trcseqstr = readl(drvdata->base + TRCSEQSTR);
-       state->trcextinselr = readl(drvdata->base + TRCEXTINSELR);
+       state->trcseqrstevr = etm4x_read32(csa, TRCSEQRSTEVR);
+       state->trcseqstr = etm4x_read32(csa, TRCSEQSTR);
+       state->trcextinselr = etm4x_read32(csa, TRCEXTINSELR);
 
        for (i = 0; i < drvdata->nr_cntr; i++) {
-               state->trccntrldvr[i] = readl(drvdata->base + TRCCNTRLDVRn(i));
-               state->trccntctlr[i] = readl(drvdata->base + TRCCNTCTLRn(i));
-               state->trccntvr[i] = readl(drvdata->base + TRCCNTVRn(i));
+               state->trccntrldvr[i] = etm4x_read32(csa, TRCCNTRLDVRn(i));
+               state->trccntctlr[i] = etm4x_read32(csa, TRCCNTCTLRn(i));
+               state->trccntvr[i] = etm4x_read32(csa, TRCCNTVRn(i));
        }
 
        for (i = 0; i < drvdata->nr_resource * 2; i++)
-               state->trcrsctlr[i] = readl(drvdata->base + TRCRSCTLRn(i));
+               state->trcrsctlr[i] = etm4x_read32(csa, TRCRSCTLRn(i));
 
        for (i = 0; i < drvdata->nr_ss_cmp; i++) {
-               state->trcssccr[i] = readl(drvdata->base + TRCSSCCRn(i));
-               state->trcsscsr[i] = readl(drvdata->base + TRCSSCSRn(i));
-               state->trcsspcicr[i] = readl(drvdata->base + TRCSSPCICRn(i));
+               state->trcssccr[i] = etm4x_read32(csa, TRCSSCCRn(i));
+               state->trcsscsr[i] = etm4x_read32(csa, TRCSSCSRn(i));
+               if (etm4x_sspcicrn_present(drvdata, i))
+                       state->trcsspcicr[i] = etm4x_read32(csa, TRCSSPCICRn(i));
        }
 
        for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-               state->trcacvr[i] = readq(drvdata->base + TRCACVRn(i));
-               state->trcacatr[i] = readq(drvdata->base + TRCACATRn(i));
+               state->trcacvr[i] = etm4x_read64(csa, TRCACVRn(i));
+               state->trcacatr[i] = etm4x_read64(csa, TRCACATRn(i));
        }
 
        /*
@@ -1340,25 +1531,26 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
         */
 
        for (i = 0; i < drvdata->numcidc; i++)
-               state->trccidcvr[i] = readq(drvdata->base + TRCCIDCVRn(i));
+               state->trccidcvr[i] = etm4x_read64(csa, TRCCIDCVRn(i));
 
        for (i = 0; i < drvdata->numvmidc; i++)
-               state->trcvmidcvr[i] = readq(drvdata->base + TRCVMIDCVRn(i));
+               state->trcvmidcvr[i] = etm4x_read64(csa, TRCVMIDCVRn(i));
 
-       state->trccidcctlr0 = readl(drvdata->base + TRCCIDCCTLR0);
+       state->trccidcctlr0 = etm4x_read32(csa, TRCCIDCCTLR0);
        if (drvdata->numcidc > 4)
-               state->trccidcctlr1 = readl(drvdata->base + TRCCIDCCTLR1);
+               state->trccidcctlr1 = etm4x_read32(csa, TRCCIDCCTLR1);
 
-       state->trcvmidcctlr0 = readl(drvdata->base + TRCVMIDCCTLR0);
+       state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR0);
        if (drvdata->numvmidc > 4)
-               state->trcvmidcctlr1 = readl(drvdata->base + TRCVMIDCCTLR1);
+               state->trcvmidcctlr0 = etm4x_read32(csa, TRCVMIDCCTLR1);
 
-       state->trcclaimset = readl(drvdata->base + TRCCLAIMCLR);
+       state->trcclaimset = etm4x_read32(csa, TRCCLAIMCLR);
 
-       state->trcpdcr = readl(drvdata->base + TRCPDCR);
+       if (!drvdata->skip_power_up)
+               state->trcpdcr = etm4x_read32(csa, TRCPDCR);
 
        /* wait for TRCSTATR.IDLE to go up */
-       if (coresight_timeout(drvdata->base, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
+       if (coresight_timeout(csa, TRCSTATR, TRCSTATR_IDLE_BIT, 1)) {
                dev_err(etm_dev,
                        "timeout while waiting for Idle Trace Status\n");
                etm4_os_unlock(drvdata);
@@ -1373,11 +1565,11 @@ static int etm4_cpu_save(struct etmv4_drvdata *drvdata)
         * potentially save power on systems that respect the TRCPDCR_PU
         * despite requesting software to save/restore state.
         */
-       writel_relaxed((state->trcpdcr & ~TRCPDCR_PU),
-                       drvdata->base + TRCPDCR);
-
+       if (!drvdata->skip_power_up)
+               etm4x_relaxed_write32(csa, (state->trcpdcr & ~TRCPDCR_PU),
+                                     TRCPDCR);
 out:
-       CS_LOCK(drvdata->base);
+       etm4_cs_lock(drvdata, csa);
        return ret;
 }
 
@@ -1385,91 +1577,83 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 {
        int i;
        struct etmv4_save_state *state = drvdata->save_state;
+       struct csdev_access tmp_csa = CSDEV_ACCESS_IOMEM(drvdata->base);
+       struct csdev_access *csa = &tmp_csa;
 
-       CS_UNLOCK(drvdata->base);
-
-       writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+       etm4_cs_unlock(drvdata, csa);
+       etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
 
-       writel_relaxed(state->trcprgctlr, drvdata->base + TRCPRGCTLR);
+       etm4x_relaxed_write32(csa, state->trcprgctlr, TRCPRGCTLR);
        if (drvdata->nr_pe)
-               writel_relaxed(state->trcprocselr, drvdata->base + TRCPROCSELR);
-       writel_relaxed(state->trcconfigr, drvdata->base + TRCCONFIGR);
-       writel_relaxed(state->trcauxctlr, drvdata->base + TRCAUXCTLR);
-       writel_relaxed(state->trceventctl0r, drvdata->base + TRCEVENTCTL0R);
-       writel_relaxed(state->trceventctl1r, drvdata->base + TRCEVENTCTL1R);
-       writel_relaxed(state->trcstallctlr, drvdata->base + TRCSTALLCTLR);
-       writel_relaxed(state->trctsctlr, drvdata->base + TRCTSCTLR);
-       writel_relaxed(state->trcsyncpr, drvdata->base + TRCSYNCPR);
-       writel_relaxed(state->trcccctlr, drvdata->base + TRCCCCTLR);
-       writel_relaxed(state->trcbbctlr, drvdata->base + TRCBBCTLR);
-       writel_relaxed(state->trctraceidr, drvdata->base + TRCTRACEIDR);
-       writel_relaxed(state->trcqctlr, drvdata->base + TRCQCTLR);
-
-       writel_relaxed(state->trcvictlr, drvdata->base + TRCVICTLR);
-       writel_relaxed(state->trcviiectlr, drvdata->base + TRCVIIECTLR);
-       writel_relaxed(state->trcvissctlr, drvdata->base + TRCVISSCTLR);
+               etm4x_relaxed_write32(csa, state->trcprocselr, TRCPROCSELR);
+       etm4x_relaxed_write32(csa, state->trcconfigr, TRCCONFIGR);
+       etm4x_relaxed_write32(csa, state->trcauxctlr, TRCAUXCTLR);
+       etm4x_relaxed_write32(csa, state->trceventctl0r, TRCEVENTCTL0R);
+       etm4x_relaxed_write32(csa, state->trceventctl1r, TRCEVENTCTL1R);
+       if (drvdata->stallctl)
+               etm4x_relaxed_write32(csa, state->trcstallctlr, TRCSTALLCTLR);
+       etm4x_relaxed_write32(csa, state->trctsctlr, TRCTSCTLR);
+       etm4x_relaxed_write32(csa, state->trcsyncpr, TRCSYNCPR);
+       etm4x_relaxed_write32(csa, state->trcccctlr, TRCCCCTLR);
+       etm4x_relaxed_write32(csa, state->trcbbctlr, TRCBBCTLR);
+       etm4x_relaxed_write32(csa, state->trctraceidr, TRCTRACEIDR);
+       etm4x_relaxed_write32(csa, state->trcqctlr, TRCQCTLR);
+
+       etm4x_relaxed_write32(csa, state->trcvictlr, TRCVICTLR);
+       etm4x_relaxed_write32(csa, state->trcviiectlr, TRCVIIECTLR);
+       etm4x_relaxed_write32(csa, state->trcvissctlr, TRCVISSCTLR);
        if (drvdata->nr_pe_cmp)
-               writel_relaxed(state->trcvipcssctlr, drvdata->base + TRCVIPCSSCTLR);
-       writel_relaxed(state->trcvdctlr, drvdata->base + TRCVDCTLR);
-       writel_relaxed(state->trcvdsacctlr, drvdata->base + TRCVDSACCTLR);
-       writel_relaxed(state->trcvdarcctlr, drvdata->base + TRCVDARCCTLR);
+               etm4x_relaxed_write32(csa, state->trcvipcssctlr, TRCVIPCSSCTLR);
+       etm4x_relaxed_write32(csa, state->trcvdctlr, TRCVDCTLR);
+       etm4x_relaxed_write32(csa, state->trcvdsacctlr, TRCVDSACCTLR);
+       etm4x_relaxed_write32(csa, state->trcvdarcctlr, TRCVDARCCTLR);
 
        for (i = 0; i < drvdata->nrseqstate - 1; i++)
-               writel_relaxed(state->trcseqevr[i],
-                              drvdata->base + TRCSEQEVRn(i));
+               etm4x_relaxed_write32(csa, state->trcseqevr[i], TRCSEQEVRn(i));
 
-       writel_relaxed(state->trcseqrstevr, drvdata->base + TRCSEQRSTEVR);
-       writel_relaxed(state->trcseqstr, drvdata->base + TRCSEQSTR);
-       writel_relaxed(state->trcextinselr, drvdata->base + TRCEXTINSELR);
+       etm4x_relaxed_write32(csa, state->trcseqrstevr, TRCSEQRSTEVR);
+       etm4x_relaxed_write32(csa, state->trcseqstr, TRCSEQSTR);
+       etm4x_relaxed_write32(csa, state->trcextinselr, TRCEXTINSELR);
 
        for (i = 0; i < drvdata->nr_cntr; i++) {
-               writel_relaxed(state->trccntrldvr[i],
-                              drvdata->base + TRCCNTRLDVRn(i));
-               writel_relaxed(state->trccntctlr[i],
-                              drvdata->base + TRCCNTCTLRn(i));
-               writel_relaxed(state->trccntvr[i],
-                              drvdata->base + TRCCNTVRn(i));
+               etm4x_relaxed_write32(csa, state->trccntrldvr[i], TRCCNTRLDVRn(i));
+               etm4x_relaxed_write32(csa, state->trccntctlr[i], TRCCNTCTLRn(i));
+               etm4x_relaxed_write32(csa, state->trccntvr[i], TRCCNTVRn(i));
        }
 
        for (i = 0; i < drvdata->nr_resource * 2; i++)
-               writel_relaxed(state->trcrsctlr[i],
-                              drvdata->base + TRCRSCTLRn(i));
+               etm4x_relaxed_write32(csa, state->trcrsctlr[i], TRCRSCTLRn(i));
 
        for (i = 0; i < drvdata->nr_ss_cmp; i++) {
-               writel_relaxed(state->trcssccr[i],
-                              drvdata->base + TRCSSCCRn(i));
-               writel_relaxed(state->trcsscsr[i],
-                              drvdata->base + TRCSSCSRn(i));
-               writel_relaxed(state->trcsspcicr[i],
-                              drvdata->base + TRCSSPCICRn(i));
+               etm4x_relaxed_write32(csa, state->trcssccr[i], TRCSSCCRn(i));
+               etm4x_relaxed_write32(csa, state->trcsscsr[i], TRCSSCSRn(i));
+               if (etm4x_sspcicrn_present(drvdata, i))
+                       etm4x_relaxed_write32(csa, state->trcsspcicr[i], TRCSSPCICRn(i));
        }
 
        for (i = 0; i < drvdata->nr_addr_cmp * 2; i++) {
-               writeq_relaxed(state->trcacvr[i],
-                              drvdata->base + TRCACVRn(i));
-               writeq_relaxed(state->trcacatr[i],
-                              drvdata->base + TRCACATRn(i));
+               etm4x_relaxed_write64(csa, state->trcacvr[i], TRCACVRn(i));
+               etm4x_relaxed_write64(csa, state->trcacatr[i], TRCACATRn(i));
        }
 
        for (i = 0; i < drvdata->numcidc; i++)
-               writeq_relaxed(state->trccidcvr[i],
-                              drvdata->base + TRCCIDCVRn(i));
+               etm4x_relaxed_write64(csa, state->trccidcvr[i], TRCCIDCVRn(i));
 
        for (i = 0; i < drvdata->numvmidc; i++)
-               writeq_relaxed(state->trcvmidcvr[i],
-                              drvdata->base + TRCVMIDCVRn(i));
+               etm4x_relaxed_write64(csa, state->trcvmidcvr[i], TRCVMIDCVRn(i));
 
-       writel_relaxed(state->trccidcctlr0, drvdata->base + TRCCIDCCTLR0);
+       etm4x_relaxed_write32(csa, state->trccidcctlr0, TRCCIDCCTLR0);
        if (drvdata->numcidc > 4)
-               writel_relaxed(state->trccidcctlr1, drvdata->base + TRCCIDCCTLR1);
+               etm4x_relaxed_write32(csa, state->trccidcctlr1, TRCCIDCCTLR1);
 
-       writel_relaxed(state->trcvmidcctlr0, drvdata->base + TRCVMIDCCTLR0);
+       etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR0);
        if (drvdata->numvmidc > 4)
-               writel_relaxed(state->trcvmidcctlr1, drvdata->base + TRCVMIDCCTLR1);
+               etm4x_relaxed_write32(csa, state->trcvmidcctlr0, TRCVMIDCCTLR1);
 
-       writel_relaxed(state->trcclaimset, drvdata->base + TRCCLAIMSET);
+       etm4x_relaxed_write32(csa, state->trcclaimset, TRCCLAIMSET);
 
-       writel_relaxed(state->trcpdcr, drvdata->base + TRCPDCR);
+       if (!drvdata->skip_power_up)
+               etm4x_relaxed_write32(csa, state->trcpdcr, TRCPDCR);
 
        drvdata->state_needs_restore = false;
 
@@ -1482,7 +1666,7 @@ static void etm4_cpu_restore(struct etmv4_drvdata *drvdata)
 
        /* Unlock the OS lock to re-enable trace and external debug access */
        etm4_os_unlock(drvdata);
-       CS_LOCK(drvdata->base);
+       etm4_cs_lock(drvdata, csa);
 }
 
 static int etm4_cpu_pm_notify(struct notifier_block *nb, unsigned long cmd,
@@ -1569,15 +1753,13 @@ static void etm4_pm_clear(void)
        }
 }
 
-static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
+static int etm4_probe(struct device *dev, void __iomem *base, u32 etm_pid)
 {
        int ret;
-       void __iomem *base;
-       struct device *dev = &adev->dev;
        struct coresight_platform_data *pdata = NULL;
        struct etmv4_drvdata *drvdata;
-       struct resource *res = &adev->res;
        struct coresight_desc desc = { 0 };
+       struct etm4_init_arg init_arg = { 0 };
 
        drvdata = devm_kzalloc(dev, sizeof(*drvdata), GFP_KERNEL);
        if (!drvdata)
@@ -1596,14 +1778,6 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
                        return -ENOMEM;
        }
 
-       if (fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
-               drvdata->skip_power_up = true;
-
-       /* Validity for the resource is already checked by the AMBA core */
-       base = devm_ioremap_resource(dev, res);
-       if (IS_ERR(base))
-               return PTR_ERR(base);
-
        drvdata->base = base;
 
        spin_lock_init(&drvdata->spinlock);
@@ -1616,13 +1790,22 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
        if (!desc.name)
                return -ENOMEM;
 
+       init_arg.drvdata = drvdata;
+       init_arg.csa = &desc.access;
+       init_arg.pid = etm_pid;
+
        if (smp_call_function_single(drvdata->cpu,
-                               etm4_init_arch_data,  drvdata, 1))
+                               etm4_init_arch_data,  &init_arg, 1))
                dev_err(dev, "ETM arch init failed\n");
 
-       if (etm4_arch_supported(drvdata->arch) == false)
+       if (!drvdata->arch)
                return -EINVAL;
 
+       /* TRCPDCR is not accessible with system instructions. */
+       if (!desc.access.io_mem ||
+           fwnode_property_present(dev_fwnode(dev), "qcom,skip-power-up"))
+               drvdata->skip_power_up = true;
+
        etm4_init_trace_id(drvdata);
        etm4_set_default(&drvdata->config);
 
@@ -1630,7 +1813,7 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
        if (IS_ERR(pdata))
                return PTR_ERR(pdata);
 
-       adev->dev.platform_data = pdata;
+       dev->platform_data = pdata;
 
        desc.type = CORESIGHT_DEV_TYPE_SOURCE;
        desc.subtype.source_subtype = CORESIGHT_DEV_SUBTYPE_SOURCE_PROC;
@@ -1650,25 +1833,61 @@ static int etm4_probe(struct amba_device *adev, const struct amba_id *id)
 
        etmdrvdata[drvdata->cpu] = drvdata;
 
-       pm_runtime_put(&adev->dev);
        dev_info(&drvdata->csdev->dev, "CPU%d: ETM v%d.%d initialized\n",
-                drvdata->cpu, drvdata->arch >> 4, drvdata->arch & 0xf);
+                drvdata->cpu, ETM_ARCH_MAJOR_VERSION(drvdata->arch),
+                ETM_ARCH_MINOR_VERSION(drvdata->arch));
 
        if (boot_enable) {
                coresight_enable(drvdata->csdev);
                drvdata->boot_enable = true;
        }
 
-       etm4_check_arch_features(drvdata, id->id);
-
        return 0;
 }
 
+static int etm4_probe_amba(struct amba_device *adev, const struct amba_id *id)
+{
+       void __iomem *base;
+       struct device *dev = &adev->dev;
+       struct resource *res = &adev->res;
+       int ret;
+
+       /* Validity for the resource is already checked by the AMBA core */
+       base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(base))
+               return PTR_ERR(base);
+
+       ret = etm4_probe(dev, base, id->id);
+       if (!ret)
+               pm_runtime_put(&adev->dev);
+
+       return ret;
+}
+
+static int etm4_probe_platform_dev(struct platform_device *pdev)
+{
+       int ret;
+
+       pm_runtime_get_noresume(&pdev->dev);
+       pm_runtime_set_active(&pdev->dev);
+       pm_runtime_enable(&pdev->dev);
+
+       /*
+        * System register based devices could match the
+        * HW by reading appropriate registers on the HW
+        * and thus we could skip the PID.
+        */
+       ret = etm4_probe(&pdev->dev, NULL, 0);
+
+       pm_runtime_put(&pdev->dev);
+       return ret;
+}
+
 static struct amba_cs_uci_id uci_id_etm4[] = {
        {
                /*  ETMv4 UCI data */
-               .devarch        = 0x47704a13,
-               .devarch_mask   = 0xfff0ffff,
+               .devarch        = ETM_DEVARCH_ETMv4x_ARCH,
+               .devarch_mask   = ETM_DEVARCH_ID_MASK,
                .devtype        = 0x00000013,
        }
 };
@@ -1680,15 +1899,12 @@ static void clear_etmdrvdata(void *info)
        etmdrvdata[cpu] = NULL;
 }
 
-static void etm4_remove(struct amba_device *adev)
+static int __exit etm4_remove_dev(struct etmv4_drvdata *drvdata)
 {
-       struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev);
-
        etm_perf_symlink(drvdata->csdev, false);
-
        /*
-        * Taking hotplug lock here to avoid racing between etm4_remove and
-        * CPU hotplug call backs.
+        * Taking hotplug lock here to avoid racing between etm4_remove_dev()
+        * and CPU hotplug call backs.
         */
        cpus_read_lock();
        /*
@@ -1703,6 +1919,27 @@ static void etm4_remove(struct amba_device *adev)
        cpus_read_unlock();
 
        coresight_unregister(drvdata->csdev);
+
+       return 0;
+}
+
+static void __exit etm4_remove_amba(struct amba_device *adev)
+{
+       struct etmv4_drvdata *drvdata = dev_get_drvdata(&adev->dev);
+
+       if (drvdata)
+               etm4_remove_dev(drvdata);
+}
+
+static int __exit etm4_remove_platform_dev(struct platform_device *pdev)
+{
+       int ret = 0;
+       struct etmv4_drvdata *drvdata = dev_get_drvdata(&pdev->dev);
+
+       if (drvdata)
+               ret = etm4_remove_dev(drvdata);
+       pm_runtime_disable(&pdev->dev);
+       return ret;
 }
 
 static const struct amba_id etm4_ids[] = {
@@ -1711,6 +1948,8 @@ static const struct amba_id etm4_ids[] = {
        CS_AMBA_ID(0x000bb95a),                 /* Cortex-A72 */
        CS_AMBA_ID(0x000bb959),                 /* Cortex-A73 */
        CS_AMBA_UCI_ID(0x000bb9da, uci_id_etm4),/* Cortex-A35 */
+       CS_AMBA_UCI_ID(0x000bbd05, uci_id_etm4),/* Cortex-A55 */
+       CS_AMBA_UCI_ID(0x000bbd0a, uci_id_etm4),/* Cortex-A75 */
        CS_AMBA_UCI_ID(0x000bbd0c, uci_id_etm4),/* Neoverse N1 */
        CS_AMBA_UCI_ID(0x000f0205, uci_id_etm4),/* Qualcomm Kryo */
        CS_AMBA_UCI_ID(0x000f0211, uci_id_etm4),/* Qualcomm Kryo */
@@ -1726,17 +1965,32 @@ static const struct amba_id etm4_ids[] = {
 
 MODULE_DEVICE_TABLE(amba, etm4_ids);
 
-static struct amba_driver etm4x_driver = {
+static struct amba_driver etm4x_amba_driver = {
        .drv = {
                .name   = "coresight-etm4x",
                .owner  = THIS_MODULE,
                .suppress_bind_attrs = true,
        },
-       .probe          = etm4_probe,
-       .remove         = etm4_remove,
+       .probe          = etm4_probe_amba,
+       .remove         = etm4_remove_amba,
        .id_table       = etm4_ids,
 };
 
+static const struct of_device_id etm4_sysreg_match[] = {
+       { .compatible   = "arm,coresight-etm4x-sysreg" },
+       {}
+};
+
+static struct platform_driver etm4_platform_driver = {
+       .probe          = etm4_probe_platform_dev,
+       .remove         = etm4_remove_platform_dev,
+       .driver                 = {
+               .name                   = "coresight-etm4x",
+               .of_match_table         = etm4_sysreg_match,
+               .suppress_bind_attrs    = true,
+       },
+};
+
 static int __init etm4x_init(void)
 {
        int ret;
@@ -1747,18 +2001,28 @@ static int __init etm4x_init(void)
        if (ret)
                return ret;
 
-       ret = amba_driver_register(&etm4x_driver);
+       ret = amba_driver_register(&etm4x_amba_driver);
        if (ret) {
-               pr_err("Error registering etm4x driver\n");
-               etm4_pm_clear();
+               pr_err("Error registering etm4x AMBA driver\n");
+               goto clear_pm;
        }
 
+       ret = platform_driver_register(&etm4_platform_driver);
+       if (!ret)
+               return 0;
+
+       pr_err("Error registering etm4x platform driver\n");
+       amba_driver_unregister(&etm4x_amba_driver);
+
+clear_pm:
+       etm4_pm_clear();
        return ret;
 }
 
 static void __exit etm4x_exit(void)
 {
-       amba_driver_unregister(&etm4x_driver);
+       amba_driver_unregister(&etm4x_amba_driver);
+       platform_driver_unregister(&etm4_platform_driver);
        etm4_pm_clear();
 }
 
index 989ce7b..0995a10 100644 (file)
@@ -389,7 +389,7 @@ static ssize_t mode_store(struct device *dev,
                config->eventctrl1 &= ~BIT(12);
 
        /* bit[8], Instruction stall bit */
-       if (config->mode & ETM_MODE_ISTALL_EN)
+       if ((config->mode & ETM_MODE_ISTALL_EN) && (drvdata->stallctl == true))
                config->stall_ctrl |= BIT(8);
        else
                config->stall_ctrl &= ~BIT(8);
@@ -743,7 +743,7 @@ static ssize_t s_exlevel_vinst_show(struct device *dev,
        struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
        struct etmv4_config *config = &drvdata->config;
 
-       val = (config->vinst_ctrl & ETM_EXLEVEL_S_VICTLR_MASK) >> 16;
+       val = (config->vinst_ctrl & TRCVICTLR_EXLEVEL_S_MASK) >> TRCVICTLR_EXLEVEL_S_SHIFT;
        return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
 }
 
@@ -760,10 +760,10 @@ static ssize_t s_exlevel_vinst_store(struct device *dev,
 
        spin_lock(&drvdata->spinlock);
        /* clear all EXLEVEL_S bits  */
-       config->vinst_ctrl &= ~(ETM_EXLEVEL_S_VICTLR_MASK);
+       config->vinst_ctrl &= ~(TRCVICTLR_EXLEVEL_S_MASK);
        /* enable instruction tracing for corresponding exception level */
        val &= drvdata->s_ex_level;
-       config->vinst_ctrl |= (val << 16);
+       config->vinst_ctrl |= (val << TRCVICTLR_EXLEVEL_S_SHIFT);
        spin_unlock(&drvdata->spinlock);
        return size;
 }
@@ -778,7 +778,7 @@ static ssize_t ns_exlevel_vinst_show(struct device *dev,
        struct etmv4_config *config = &drvdata->config;
 
        /* EXLEVEL_NS, bits[23:20] */
-       val = (config->vinst_ctrl & ETM_EXLEVEL_NS_VICTLR_MASK) >> 20;
+       val = (config->vinst_ctrl & TRCVICTLR_EXLEVEL_NS_MASK) >> TRCVICTLR_EXLEVEL_NS_SHIFT;
        return scnprintf(buf, PAGE_SIZE, "%#lx\n", val);
 }
 
@@ -795,10 +795,10 @@ static ssize_t ns_exlevel_vinst_store(struct device *dev,
 
        spin_lock(&drvdata->spinlock);
        /* clear EXLEVEL_NS bits  */
-       config->vinst_ctrl &= ~(ETM_EXLEVEL_NS_VICTLR_MASK);
+       config->vinst_ctrl &= ~(TRCVICTLR_EXLEVEL_NS_MASK);
        /* enable instruction tracing for corresponding exception level */
        val &= drvdata->ns_ex_level;
-       config->vinst_ctrl |= (val << 20);
+       config->vinst_ctrl |= (val << TRCVICTLR_EXLEVEL_NS_SHIFT);
        spin_unlock(&drvdata->spinlock);
        return size;
 }
@@ -2319,7 +2319,8 @@ static struct attribute *coresight_etmv4_attrs[] = {
 };
 
 struct etmv4_reg {
-       void __iomem *addr;
+       struct coresight_device *csdev;
+       u32 offset;
        u32 data;
 };
 
@@ -2327,15 +2328,16 @@ static void do_smp_cross_read(void *data)
 {
        struct etmv4_reg *reg = data;
 
-       reg->data = readl_relaxed(reg->addr);
+       reg->data = etm4x_relaxed_read32(&reg->csdev->access, reg->offset);
 }
 
-static u32 etmv4_cross_read(const struct device *dev, u32 offset)
+static u32 etmv4_cross_read(const struct etmv4_drvdata *drvdata, u32 offset)
 {
-       struct etmv4_drvdata *drvdata = dev_get_drvdata(dev);
        struct etmv4_reg reg;
 
-       reg.addr = drvdata->base + offset;
+       reg.offset = offset;
+       reg.csdev = drvdata->csdev;
+
        /*
         * smp cross call ensures the CPU will be powered up before
         * accessing the ETMv4 trace core registers
@@ -2344,72 +2346,120 @@ static u32 etmv4_cross_read(const struct device *dev, u32 offset)
        return reg.data;
 }
 
-#define coresight_etm4x_reg(name, offset)                      \
-       coresight_simple_reg32(struct etmv4_drvdata, name, offset)
+static inline u32 coresight_etm4x_attr_to_offset(struct device_attribute *attr)
+{
+       struct dev_ext_attribute *eattr;
+
+       eattr = container_of(attr, struct dev_ext_attribute, attr);
+       return (u32)(unsigned long)eattr->var;
+}
+
+static ssize_t coresight_etm4x_reg_show(struct device *dev,
+                                       struct device_attribute *d_attr,
+                                       char *buf)
+{
+       u32 val, offset;
+       struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
 
-#define coresight_etm4x_cross_read(name, offset)                       \
-       coresight_simple_func(struct etmv4_drvdata, etmv4_cross_read,   \
-                             name, offset)
+       offset = coresight_etm4x_attr_to_offset(d_attr);
 
-coresight_etm4x_reg(trcpdcr, TRCPDCR);
-coresight_etm4x_reg(trcpdsr, TRCPDSR);
-coresight_etm4x_reg(trclsr, TRCLSR);
-coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS);
-coresight_etm4x_reg(trcdevid, TRCDEVID);
-coresight_etm4x_reg(trcdevtype, TRCDEVTYPE);
-coresight_etm4x_reg(trcpidr0, TRCPIDR0);
-coresight_etm4x_reg(trcpidr1, TRCPIDR1);
-coresight_etm4x_reg(trcpidr2, TRCPIDR2);
-coresight_etm4x_reg(trcpidr3, TRCPIDR3);
-coresight_etm4x_cross_read(trcoslsr, TRCOSLSR);
-coresight_etm4x_cross_read(trcconfig, TRCCONFIGR);
-coresight_etm4x_cross_read(trctraceid, TRCTRACEIDR);
+       pm_runtime_get_sync(dev->parent);
+       val = etmv4_cross_read(drvdata, offset);
+       pm_runtime_put_sync(dev->parent);
+
+       return scnprintf(buf, PAGE_SIZE, "0x%x\n", val);
+}
+
+static inline bool
+etm4x_register_implemented(struct etmv4_drvdata *drvdata, u32 offset)
+{
+       switch (offset) {
+       ETM4x_SYSREG_LIST_CASES
+               /*
+                * Registers accessible via system instructions are always
+                * implemented.
+                */
+               return true;
+       ETM4x_MMAP_LIST_CASES
+               /*
+                * Registers accessible only via memory-mapped registers
+                * must not be accessed via system instructions.
+                * We cannot access the drvdata->csdev here, as this
+                * function is called during the device creation, via
+                * coresight_register() and the csdev is not initialized
+                * until that is done. So rely on the drvdata->base to
+                * detect if we have a memory mapped access.
+                */
+               return !!drvdata->base;
+       }
+
+       return false;
+}
+
+/*
+ * Hide the ETM4x registers that may not be available on the
+ * hardware.
+ * There are certain management registers unavailable via system
+ * instructions. Make those sysfs attributes hidden on such
+ * systems.
+ */
+static umode_t
+coresight_etm4x_attr_reg_implemented(struct kobject *kobj,
+                                    struct attribute *attr, int unused)
+{
+       struct device *dev = kobj_to_dev(kobj);
+       struct etmv4_drvdata *drvdata = dev_get_drvdata(dev->parent);
+       struct device_attribute *d_attr;
+       u32 offset;
+
+       d_attr = container_of(attr, struct device_attribute, attr);
+       offset = coresight_etm4x_attr_to_offset(d_attr);
+
+       if (etm4x_register_implemented(drvdata, offset))
+               return attr->mode;
+       return 0;
+}
+
+#define coresight_etm4x_reg(name, offset)                              \
+       &((struct dev_ext_attribute[]) {                                \
+          {                                                            \
+               __ATTR(name, 0444, coresight_etm4x_reg_show, NULL),     \
+               (void *)(unsigned long)offset                           \
+          }                                                            \
+       })[0].attr.attr
 
 static struct attribute *coresight_etmv4_mgmt_attrs[] = {
-       &dev_attr_trcoslsr.attr,
-       &dev_attr_trcpdcr.attr,
-       &dev_attr_trcpdsr.attr,
-       &dev_attr_trclsr.attr,
-       &dev_attr_trcconfig.attr,
-       &dev_attr_trctraceid.attr,
-       &dev_attr_trcauthstatus.attr,
-       &dev_attr_trcdevid.attr,
-       &dev_attr_trcdevtype.attr,
-       &dev_attr_trcpidr0.attr,
-       &dev_attr_trcpidr1.attr,
-       &dev_attr_trcpidr2.attr,
-       &dev_attr_trcpidr3.attr,
+       coresight_etm4x_reg(trcpdcr, TRCPDCR),
+       coresight_etm4x_reg(trcpdsr, TRCPDSR),
+       coresight_etm4x_reg(trclsr, TRCLSR),
+       coresight_etm4x_reg(trcauthstatus, TRCAUTHSTATUS),
+       coresight_etm4x_reg(trcdevid, TRCDEVID),
+       coresight_etm4x_reg(trcdevtype, TRCDEVTYPE),
+       coresight_etm4x_reg(trcpidr0, TRCPIDR0),
+       coresight_etm4x_reg(trcpidr1, TRCPIDR1),
+       coresight_etm4x_reg(trcpidr2, TRCPIDR2),
+       coresight_etm4x_reg(trcpidr3, TRCPIDR3),
+       coresight_etm4x_reg(trcoslsr, TRCOSLSR),
+       coresight_etm4x_reg(trcconfig, TRCCONFIGR),
+       coresight_etm4x_reg(trctraceid, TRCTRACEIDR),
+       coresight_etm4x_reg(trcdevarch, TRCDEVARCH),
        NULL,
 };
 
-coresight_etm4x_cross_read(trcidr0, TRCIDR0);
-coresight_etm4x_cross_read(trcidr1, TRCIDR1);
-coresight_etm4x_cross_read(trcidr2, TRCIDR2);
-coresight_etm4x_cross_read(trcidr3, TRCIDR3);
-coresight_etm4x_cross_read(trcidr4, TRCIDR4);
-coresight_etm4x_cross_read(trcidr5, TRCIDR5);
-/* trcidr[6,7] are reserved */
-coresight_etm4x_cross_read(trcidr8, TRCIDR8);
-coresight_etm4x_cross_read(trcidr9, TRCIDR9);
-coresight_etm4x_cross_read(trcidr10, TRCIDR10);
-coresight_etm4x_cross_read(trcidr11, TRCIDR11);
-coresight_etm4x_cross_read(trcidr12, TRCIDR12);
-coresight_etm4x_cross_read(trcidr13, TRCIDR13);
-
 static struct attribute *coresight_etmv4_trcidr_attrs[] = {
-       &dev_attr_trcidr0.attr,
-       &dev_attr_trcidr1.attr,
-       &dev_attr_trcidr2.attr,
-       &dev_attr_trcidr3.attr,
-       &dev_attr_trcidr4.attr,
-       &dev_attr_trcidr5.attr,
+       coresight_etm4x_reg(trcidr0, TRCIDR0),
+       coresight_etm4x_reg(trcidr1, TRCIDR1),
+       coresight_etm4x_reg(trcidr2, TRCIDR2),
+       coresight_etm4x_reg(trcidr3, TRCIDR3),
+       coresight_etm4x_reg(trcidr4, TRCIDR4),
+       coresight_etm4x_reg(trcidr5, TRCIDR5),
        /* trcidr[6,7] are reserved */
-       &dev_attr_trcidr8.attr,
-       &dev_attr_trcidr9.attr,
-       &dev_attr_trcidr10.attr,
-       &dev_attr_trcidr11.attr,
-       &dev_attr_trcidr12.attr,
-       &dev_attr_trcidr13.attr,
+       coresight_etm4x_reg(trcidr8, TRCIDR8),
+       coresight_etm4x_reg(trcidr9, TRCIDR9),
+       coresight_etm4x_reg(trcidr10, TRCIDR10),
+       coresight_etm4x_reg(trcidr11, TRCIDR11),
+       coresight_etm4x_reg(trcidr12, TRCIDR12),
+       coresight_etm4x_reg(trcidr13, TRCIDR13),
        NULL,
 };
 
@@ -2418,6 +2468,7 @@ static const struct attribute_group coresight_etmv4_group = {
 };
 
 static const struct attribute_group coresight_etmv4_mgmt_group = {
+       .is_visible = coresight_etm4x_attr_reg_implemented,
        .attrs = coresight_etmv4_mgmt_attrs,
        .name = "mgmt",
 };
index 3dd3e06..0af6057 100644 (file)
 #define TRCVDSACCTLR                   0x0A4
 #define TRCVDARCCTLR                   0x0A8
 /* Derived resources registers */
-#define TRCSEQEVRn(n)                  (0x100 + (n * 4))
+#define TRCSEQEVRn(n)                  (0x100 + (n * 4)) /* n = 0-2 */
 #define TRCSEQRSTEVR                   0x118
 #define TRCSEQSTR                      0x11C
 #define TRCEXTINSELR                   0x120
-#define TRCCNTRLDVRn(n)                        (0x140 + (n * 4))
-#define TRCCNTCTLRn(n)                 (0x150 + (n * 4))
-#define TRCCNTVRn(n)                   (0x160 + (n * 4))
+#define TRCCNTRLDVRn(n)                        (0x140 + (n * 4)) /* n = 0-3 */
+#define TRCCNTCTLRn(n)                 (0x150 + (n * 4)) /* n = 0-3 */
+#define TRCCNTVRn(n)                   (0x160 + (n * 4)) /* n = 0-3 */
 /* ID registers */
 #define TRCIDR8                                0x180
 #define TRCIDR9                                0x184
@@ -60,7 +60,7 @@
 #define TRCIDR12                       0x190
 #define TRCIDR13                       0x194
 #define TRCIMSPEC0                     0x1C0
-#define TRCIMSPECn(n)                  (0x1C0 + (n * 4))
+#define TRCIMSPECn(n)                  (0x1C0 + (n * 4)) /* n = 1-7 */
 #define TRCIDR0                                0x1E0
 #define TRCIDR1                                0x1E4
 #define TRCIDR2                                0x1E8
 #define TRCIDR5                                0x1F4
 #define TRCIDR6                                0x1F8
 #define TRCIDR7                                0x1FC
-/* Resource selection registers */
+/*
+ * Resource selection registers, n = 2-31.
+ * First pair (regs 0, 1) is always present and is reserved.
+ */
 #define TRCRSCTLRn(n)                  (0x200 + (n * 4))
-/* Single-shot comparator registers */
+/* Single-shot comparator registers, n = 0-7 */
 #define TRCSSCCRn(n)                   (0x280 + (n * 4))
 #define TRCSSCSRn(n)                   (0x2A0 + (n * 4))
 #define TRCSSPCICRn(n)                 (0x2C0 + (n * 4))
 #define TRCPDCR                                0x310
 #define TRCPDSR                                0x314
 /* Trace registers (0x318-0xEFC) */
-/* Comparator registers */
+/* Address Comparator registers n = 0-15 */
 #define TRCACVRn(n)                    (0x400 + (n * 8))
 #define TRCACATRn(n)                   (0x480 + (n * 8))
+/* Data Value Comparator Value registers, n = 0-7 */
 #define TRCDVCVRn(n)                   (0x500 + (n * 16))
 #define TRCDVCMRn(n)                   (0x580 + (n * 16))
+/* ContextID/Virtual ContextID comparators, n = 0-7 */
 #define TRCCIDCVRn(n)                  (0x600 + (n * 8))
 #define TRCVMIDCVRn(n)                 (0x640 + (n * 8))
 #define TRCCIDCCTLR0                   0x680
 #define TRCCIDR2                       0xFF8
 #define TRCCIDR3                       0xFFC
 
+/*
+ * System instructions to access ETM registers.
+ * See ETMv4.4 spec ARM IHI0064F section 4.3.6 System instructions
+ */
+#define ETM4x_OFFSET_TO_REG(x)         ((x) >> 2)
+
+#define ETM4x_CRn(n)                   (((n) >> 7) & 0x7)
+#define ETM4x_Op2(n)                   (((n) >> 4) & 0x7)
+#define ETM4x_CRm(n)                   ((n) & 0xf)
+
+#include <asm/sysreg.h>
+#define ETM4x_REG_NUM_TO_SYSREG(n)                             \
+       sys_reg(2, 1, ETM4x_CRn(n), ETM4x_CRm(n), ETM4x_Op2(n))
+
+#define READ_ETM4x_REG(reg)                                    \
+       read_sysreg_s(ETM4x_REG_NUM_TO_SYSREG((reg)))
+#define WRITE_ETM4x_REG(val, reg)                              \
+       write_sysreg_s(val, ETM4x_REG_NUM_TO_SYSREG((reg)))
+
+#define read_etm4x_sysreg_const_offset(offset)                 \
+       READ_ETM4x_REG(ETM4x_OFFSET_TO_REG(offset))
+
+#define write_etm4x_sysreg_const_offset(val, offset)           \
+       WRITE_ETM4x_REG(val, ETM4x_OFFSET_TO_REG(offset))
+
+#define CASE_READ(res, x)                                      \
+       case (x): { (res) = read_etm4x_sysreg_const_offset((x)); break; }
+
+#define CASE_WRITE(val, x)                                     \
+       case (x): { write_etm4x_sysreg_const_offset((val), (x)); break; }
+
+#define CASE_NOP(__unused, x)                                  \
+       case (x):       /* fall through */
+
+/* List of registers accessible via System instructions */
+#define ETM_SYSREG_LIST(op, val)               \
+       CASE_##op((val), TRCPRGCTLR)            \
+       CASE_##op((val), TRCPROCSELR)           \
+       CASE_##op((val), TRCSTATR)              \
+       CASE_##op((val), TRCCONFIGR)            \
+       CASE_##op((val), TRCAUXCTLR)            \
+       CASE_##op((val), TRCEVENTCTL0R)         \
+       CASE_##op((val), TRCEVENTCTL1R)         \
+       CASE_##op((val), TRCSTALLCTLR)          \
+       CASE_##op((val), TRCTSCTLR)             \
+       CASE_##op((val), TRCSYNCPR)             \
+       CASE_##op((val), TRCCCCTLR)             \
+       CASE_##op((val), TRCBBCTLR)             \
+       CASE_##op((val), TRCTRACEIDR)           \
+       CASE_##op((val), TRCQCTLR)              \
+       CASE_##op((val), TRCVICTLR)             \
+       CASE_##op((val), TRCVIIECTLR)           \
+       CASE_##op((val), TRCVISSCTLR)           \
+       CASE_##op((val), TRCVIPCSSCTLR)         \
+       CASE_##op((val), TRCVDCTLR)             \
+       CASE_##op((val), TRCVDSACCTLR)          \
+       CASE_##op((val), TRCVDARCCTLR)          \
+       CASE_##op((val), TRCSEQEVRn(0))         \
+       CASE_##op((val), TRCSEQEVRn(1))         \
+       CASE_##op((val), TRCSEQEVRn(2))         \
+       CASE_##op((val), TRCSEQRSTEVR)          \
+       CASE_##op((val), TRCSEQSTR)             \
+       CASE_##op((val), TRCEXTINSELR)          \
+       CASE_##op((val), TRCCNTRLDVRn(0))       \
+       CASE_##op((val), TRCCNTRLDVRn(1))       \
+       CASE_##op((val), TRCCNTRLDVRn(2))       \
+       CASE_##op((val), TRCCNTRLDVRn(3))       \
+       CASE_##op((val), TRCCNTCTLRn(0))        \
+       CASE_##op((val), TRCCNTCTLRn(1))        \
+       CASE_##op((val), TRCCNTCTLRn(2))        \
+       CASE_##op((val), TRCCNTCTLRn(3))        \
+       CASE_##op((val), TRCCNTVRn(0))          \
+       CASE_##op((val), TRCCNTVRn(1))          \
+       CASE_##op((val), TRCCNTVRn(2))          \
+       CASE_##op((val), TRCCNTVRn(3))          \
+       CASE_##op((val), TRCIDR8)               \
+       CASE_##op((val), TRCIDR9)               \
+       CASE_##op((val), TRCIDR10)              \
+       CASE_##op((val), TRCIDR11)              \
+       CASE_##op((val), TRCIDR12)              \
+       CASE_##op((val), TRCIDR13)              \
+       CASE_##op((val), TRCIMSPECn(0))         \
+       CASE_##op((val), TRCIMSPECn(1))         \
+       CASE_##op((val), TRCIMSPECn(2))         \
+       CASE_##op((val), TRCIMSPECn(3))         \
+       CASE_##op((val), TRCIMSPECn(4))         \
+       CASE_##op((val), TRCIMSPECn(5))         \
+       CASE_##op((val), TRCIMSPECn(6))         \
+       CASE_##op((val), TRCIMSPECn(7))         \
+       CASE_##op((val), TRCIDR0)               \
+       CASE_##op((val), TRCIDR1)               \
+       CASE_##op((val), TRCIDR2)               \
+       CASE_##op((val), TRCIDR3)               \
+       CASE_##op((val), TRCIDR4)               \
+       CASE_##op((val), TRCIDR5)               \
+       CASE_##op((val), TRCIDR6)               \
+       CASE_##op((val), TRCIDR7)               \
+       CASE_##op((val), TRCRSCTLRn(2))         \
+       CASE_##op((val), TRCRSCTLRn(3))         \
+       CASE_##op((val), TRCRSCTLRn(4))         \
+       CASE_##op((val), TRCRSCTLRn(5))         \
+       CASE_##op((val), TRCRSCTLRn(6))         \
+       CASE_##op((val), TRCRSCTLRn(7))         \
+       CASE_##op((val), TRCRSCTLRn(8))         \
+       CASE_##op((val), TRCRSCTLRn(9))         \
+       CASE_##op((val), TRCRSCTLRn(10))        \
+       CASE_##op((val), TRCRSCTLRn(11))        \
+       CASE_##op((val), TRCRSCTLRn(12))        \
+       CASE_##op((val), TRCRSCTLRn(13))        \
+       CASE_##op((val), TRCRSCTLRn(14))        \
+       CASE_##op((val), TRCRSCTLRn(15))        \
+       CASE_##op((val), TRCRSCTLRn(16))        \
+       CASE_##op((val), TRCRSCTLRn(17))        \
+       CASE_##op((val), TRCRSCTLRn(18))        \
+       CASE_##op((val), TRCRSCTLRn(19))        \
+       CASE_##op((val), TRCRSCTLRn(20))        \
+       CASE_##op((val), TRCRSCTLRn(21))        \
+       CASE_##op((val), TRCRSCTLRn(22))        \
+       CASE_##op((val), TRCRSCTLRn(23))        \
+       CASE_##op((val), TRCRSCTLRn(24))        \
+       CASE_##op((val), TRCRSCTLRn(25))        \
+       CASE_##op((val), TRCRSCTLRn(26))        \
+       CASE_##op((val), TRCRSCTLRn(27))        \
+       CASE_##op((val), TRCRSCTLRn(28))        \
+       CASE_##op((val), TRCRSCTLRn(29))        \
+       CASE_##op((val), TRCRSCTLRn(30))        \
+       CASE_##op((val), TRCRSCTLRn(31))        \
+       CASE_##op((val), TRCSSCCRn(0))          \
+       CASE_##op((val), TRCSSCCRn(1))          \
+       CASE_##op((val), TRCSSCCRn(2))          \
+       CASE_##op((val), TRCSSCCRn(3))          \
+       CASE_##op((val), TRCSSCCRn(4))          \
+       CASE_##op((val), TRCSSCCRn(5))          \
+       CASE_##op((val), TRCSSCCRn(6))          \
+       CASE_##op((val), TRCSSCCRn(7))          \
+       CASE_##op((val), TRCSSCSRn(0))          \
+       CASE_##op((val), TRCSSCSRn(1))          \
+       CASE_##op((val), TRCSSCSRn(2))          \
+       CASE_##op((val), TRCSSCSRn(3))          \
+       CASE_##op((val), TRCSSCSRn(4))          \
+       CASE_##op((val), TRCSSCSRn(5))          \
+       CASE_##op((val), TRCSSCSRn(6))          \
+       CASE_##op((val), TRCSSCSRn(7))          \
+       CASE_##op((val), TRCSSPCICRn(0))        \
+       CASE_##op((val), TRCSSPCICRn(1))        \
+       CASE_##op((val), TRCSSPCICRn(2))        \
+       CASE_##op((val), TRCSSPCICRn(3))        \
+       CASE_##op((val), TRCSSPCICRn(4))        \
+       CASE_##op((val), TRCSSPCICRn(5))        \
+       CASE_##op((val), TRCSSPCICRn(6))        \
+       CASE_##op((val), TRCSSPCICRn(7))        \
+       CASE_##op((val), TRCOSLAR)              \
+       CASE_##op((val), TRCOSLSR)              \
+       CASE_##op((val), TRCACVRn(0))           \
+       CASE_##op((val), TRCACVRn(1))           \
+       CASE_##op((val), TRCACVRn(2))           \
+       CASE_##op((val), TRCACVRn(3))           \
+       CASE_##op((val), TRCACVRn(4))           \
+       CASE_##op((val), TRCACVRn(5))           \
+       CASE_##op((val), TRCACVRn(6))           \
+       CASE_##op((val), TRCACVRn(7))           \
+       CASE_##op((val), TRCACVRn(8))           \
+       CASE_##op((val), TRCACVRn(9))           \
+       CASE_##op((val), TRCACVRn(10))          \
+       CASE_##op((val), TRCACVRn(11))          \
+       CASE_##op((val), TRCACVRn(12))          \
+       CASE_##op((val), TRCACVRn(13))          \
+       CASE_##op((val), TRCACVRn(14))          \
+       CASE_##op((val), TRCACVRn(15))          \
+       CASE_##op((val), TRCACATRn(0))          \
+       CASE_##op((val), TRCACATRn(1))          \
+       CASE_##op((val), TRCACATRn(2))          \
+       CASE_##op((val), TRCACATRn(3))          \
+       CASE_##op((val), TRCACATRn(4))          \
+       CASE_##op((val), TRCACATRn(5))          \
+       CASE_##op((val), TRCACATRn(6))          \
+       CASE_##op((val), TRCACATRn(7))          \
+       CASE_##op((val), TRCACATRn(8))          \
+       CASE_##op((val), TRCACATRn(9))          \
+       CASE_##op((val), TRCACATRn(10))         \
+       CASE_##op((val), TRCACATRn(11))         \
+       CASE_##op((val), TRCACATRn(12))         \
+       CASE_##op((val), TRCACATRn(13))         \
+       CASE_##op((val), TRCACATRn(14))         \
+       CASE_##op((val), TRCACATRn(15))         \
+       CASE_##op((val), TRCDVCVRn(0))          \
+       CASE_##op((val), TRCDVCVRn(1))          \
+       CASE_##op((val), TRCDVCVRn(2))          \
+       CASE_##op((val), TRCDVCVRn(3))          \
+       CASE_##op((val), TRCDVCVRn(4))          \
+       CASE_##op((val), TRCDVCVRn(5))          \
+       CASE_##op((val), TRCDVCVRn(6))          \
+       CASE_##op((val), TRCDVCVRn(7))          \
+       CASE_##op((val), TRCDVCMRn(0))          \
+       CASE_##op((val), TRCDVCMRn(1))          \
+       CASE_##op((val), TRCDVCMRn(2))          \
+       CASE_##op((val), TRCDVCMRn(3))          \
+       CASE_##op((val), TRCDVCMRn(4))          \
+       CASE_##op((val), TRCDVCMRn(5))          \
+       CASE_##op((val), TRCDVCMRn(6))          \
+       CASE_##op((val), TRCDVCMRn(7))          \
+       CASE_##op((val), TRCCIDCVRn(0))         \
+       CASE_##op((val), TRCCIDCVRn(1))         \
+       CASE_##op((val), TRCCIDCVRn(2))         \
+       CASE_##op((val), TRCCIDCVRn(3))         \
+       CASE_##op((val), TRCCIDCVRn(4))         \
+       CASE_##op((val), TRCCIDCVRn(5))         \
+       CASE_##op((val), TRCCIDCVRn(6))         \
+       CASE_##op((val), TRCCIDCVRn(7))         \
+       CASE_##op((val), TRCVMIDCVRn(0))        \
+       CASE_##op((val), TRCVMIDCVRn(1))        \
+       CASE_##op((val), TRCVMIDCVRn(2))        \
+       CASE_##op((val), TRCVMIDCVRn(3))        \
+       CASE_##op((val), TRCVMIDCVRn(4))        \
+       CASE_##op((val), TRCVMIDCVRn(5))        \
+       CASE_##op((val), TRCVMIDCVRn(6))        \
+       CASE_##op((val), TRCVMIDCVRn(7))        \
+       CASE_##op((val), TRCCIDCCTLR0)          \
+       CASE_##op((val), TRCCIDCCTLR1)          \
+       CASE_##op((val), TRCVMIDCCTLR0)         \
+       CASE_##op((val), TRCVMIDCCTLR1)         \
+       CASE_##op((val), TRCCLAIMSET)           \
+       CASE_##op((val), TRCCLAIMCLR)           \
+       CASE_##op((val), TRCAUTHSTATUS)         \
+       CASE_##op((val), TRCDEVARCH)            \
+       CASE_##op((val), TRCDEVID)
+
+/* List of registers only accessible via memory-mapped interface */
+#define ETM_MMAP_LIST(op, val)                 \
+       CASE_##op((val), TRCDEVTYPE)            \
+       CASE_##op((val), TRCPDCR)               \
+       CASE_##op((val), TRCPDSR)               \
+       CASE_##op((val), TRCDEVAFF0)            \
+       CASE_##op((val), TRCDEVAFF1)            \
+       CASE_##op((val), TRCLAR)                \
+       CASE_##op((val), TRCLSR)                \
+       CASE_##op((val), TRCITCTRL)             \
+       CASE_##op((val), TRCPIDR4)              \
+       CASE_##op((val), TRCPIDR0)              \
+       CASE_##op((val), TRCPIDR1)              \
+       CASE_##op((val), TRCPIDR2)              \
+       CASE_##op((val), TRCPIDR3)
+
+#define ETM4x_READ_SYSREG_CASES(res)   ETM_SYSREG_LIST(READ, (res))
+#define ETM4x_WRITE_SYSREG_CASES(val)  ETM_SYSREG_LIST(WRITE, (val))
+
+#define ETM4x_SYSREG_LIST_CASES                ETM_SYSREG_LIST(NOP, __unused)
+#define ETM4x_MMAP_LIST_CASES          ETM_MMAP_LIST(NOP, __unused)
+
+#define read_etm4x_sysreg_offset(offset, _64bit)                               \
+       ({                                                                      \
+               u64 __val;                                                      \
+                                                                               \
+               if (__builtin_constant_p((offset)))                             \
+                       __val = read_etm4x_sysreg_const_offset((offset));       \
+               else                                                            \
+                       __val = etm4x_sysreg_read((offset), true, (_64bit));    \
+               __val;                                                          \
+        })
+
+#define write_etm4x_sysreg_offset(val, offset, _64bit)                 \
+       do {                                                            \
+               if (__builtin_constant_p((offset)))                     \
+                       write_etm4x_sysreg_const_offset((val),          \
+                                                       (offset));      \
+               else                                                    \
+                       etm4x_sysreg_write((val), (offset), true,       \
+                                          (_64bit));                   \
+       } while (0)
+
+
+#define etm4x_relaxed_read32(csa, offset)                              \
+       ((u32)((csa)->io_mem ?                                          \
+                readl_relaxed((csa)->base + (offset)) :                \
+                read_etm4x_sysreg_offset((offset), false)))
+
+#define etm4x_relaxed_read64(csa, offset)                              \
+       ((u64)((csa)->io_mem ?                                          \
+                readq_relaxed((csa)->base + (offset)) :                \
+                read_etm4x_sysreg_offset((offset), true)))
+
+#define etm4x_read32(csa, offset)                                      \
+       ({                                                              \
+               u32 __val = etm4x_relaxed_read32((csa), (offset));      \
+               __iormb(__val);                                         \
+               __val;                                                  \
+        })
+
+#define etm4x_read64(csa, offset)                                      \
+       ({                                                              \
+               u64 __val = etm4x_relaxed_read64((csa), (offset));      \
+               __iormb(__val);                                         \
+               __val;                                                  \
+        })
+
+#define etm4x_relaxed_write32(csa, val, offset)                                \
+       do {                                                            \
+               if ((csa)->io_mem)                                      \
+                       writel_relaxed((val), (csa)->base + (offset));  \
+               else                                                    \
+                       write_etm4x_sysreg_offset((val), (offset),      \
+                                                 false);               \
+       } while (0)
+
+#define etm4x_relaxed_write64(csa, val, offset)                                \
+       do {                                                            \
+               if ((csa)->io_mem)                                      \
+                       writeq_relaxed((val), (csa)->base + (offset));  \
+               else                                                    \
+                       write_etm4x_sysreg_offset((val), (offset),      \
+                                                 true);                \
+       } while (0)
+
+#define etm4x_write32(csa, val, offset)                                        \
+       do {                                                            \
+               __iowmb();                                              \
+               etm4x_relaxed_write32((csa), (val), (offset));          \
+       } while (0)
+
+#define etm4x_write64(csa, val, offset)                                        \
+       do {                                                            \
+               __iowmb();                                              \
+               etm4x_relaxed_write64((csa), (val), (offset));          \
+       } while (0)
+
+
 /* ETMv4 resources */
 #define ETM_MAX_NR_PE                  8
 #define ETMv4_MAX_CNTR                 4
 #define ETM_MAX_RES_SEL                        32
 #define ETM_MAX_SS_CMP                 8
 
-#define ETM_ARCH_V4                    0x40
 #define ETMv4_SYNC_MASK                        0x1F
 #define ETM_CYC_THRESHOLD_MASK         0xFFF
 #define ETM_CYC_THRESHOLD_DEFAULT       0x100
                                         ETM_MODE_EXCL_KERN | \
                                         ETM_MODE_EXCL_USER)
 
+/*
+ * TRCDEVARCH Bit field definitions
+ * Bits[31:21] - ARCHITECT = Always Arm Ltd.
+ *                * Bits[31:28] = 0x4
+ *                * Bits[27:21] = 0b0111011
+ * Bit[20]     - PRESENT,  Indicates the presence of this register.
+ *
+ * Bit[19:16]  - REVISION, Revision of the architecture.
+ *
+ * Bit[15:0]   - ARCHID, Identifies this component as an ETM
+ *                * Bits[15:12] - architecture version of ETM
+ *                *             = 4 for ETMv4
+ *                * Bits[11:0] = 0xA13, architecture part number for ETM.
+ */
+#define ETM_DEVARCH_ARCHITECT_MASK             GENMASK(31, 21)
+#define ETM_DEVARCH_ARCHITECT_ARM              ((0x4 << 28) | (0b0111011 << 21))
+#define ETM_DEVARCH_PRESENT                    BIT(20)
+#define ETM_DEVARCH_REVISION_SHIFT             16
+#define ETM_DEVARCH_REVISION_MASK              GENMASK(19, 16)
+#define ETM_DEVARCH_REVISION(x)                        \
+       (((x) & ETM_DEVARCH_REVISION_MASK) >> ETM_DEVARCH_REVISION_SHIFT)
+#define ETM_DEVARCH_ARCHID_MASK                        GENMASK(15, 0)
+#define ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT      12
+#define ETM_DEVARCH_ARCHID_ARCH_VER_MASK       GENMASK(15, 12)
+#define ETM_DEVARCH_ARCHID_ARCH_VER(x)         \
+       (((x) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK) >> ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT)
+
+#define ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(ver)                  \
+       (((ver) << ETM_DEVARCH_ARCHID_ARCH_VER_SHIFT) & ETM_DEVARCH_ARCHID_ARCH_VER_MASK)
+
+#define ETM_DEVARCH_ARCHID_ARCH_PART(x)                ((x) & 0xfffUL)
+
+#define ETM_DEVARCH_MAKE_ARCHID(major)                 \
+       ((ETM_DEVARCH_MAKE_ARCHID_ARCH_VER(major)) | ETM_DEVARCH_ARCHID_ARCH_PART(0xA13))
+
+#define ETM_DEVARCH_ARCHID_ETMv4x              ETM_DEVARCH_MAKE_ARCHID(0x4)
+
+#define ETM_DEVARCH_ID_MASK                                            \
+       (ETM_DEVARCH_ARCHITECT_MASK | ETM_DEVARCH_ARCHID_MASK | ETM_DEVARCH_PRESENT)
+#define ETM_DEVARCH_ETMv4x_ARCH                                                \
+       (ETM_DEVARCH_ARCHITECT_ARM | ETM_DEVARCH_ARCHID_ETMv4x | ETM_DEVARCH_PRESENT)
+
 #define TRCSTATR_IDLE_BIT              0
 #define TRCSTATR_PMSTABLE_BIT          1
 #define ETM_DEFAULT_ADDR_COMP          0
 
+#define TRCSSCSRn_PC                   BIT(3)
+
 /* PowerDown Control Register bits */
 #define TRCPDCR_PU                     BIT(3)
 
-/* secure state access levels - TRCACATRn */
-#define ETM_EXLEVEL_S_APP              BIT(8)
-#define ETM_EXLEVEL_S_OS               BIT(9)
-#define ETM_EXLEVEL_S_HYP              BIT(10)
-#define ETM_EXLEVEL_S_MON              BIT(11)
-/* non-secure state access levels - TRCACATRn */
-#define ETM_EXLEVEL_NS_APP             BIT(12)
-#define ETM_EXLEVEL_NS_OS              BIT(13)
-#define ETM_EXLEVEL_NS_HYP             BIT(14)
-#define ETM_EXLEVEL_NS_NA              BIT(15)
+#define TRCACATR_EXLEVEL_SHIFT         8
+
+/*
+ * Exception level mask for Secure and Non-Secure ELs.
+ * ETM defines the bits for EL control (e.g, TRVICTLR, TRCACTRn).
+ * The Secure and Non-Secure ELs are always to gether.
+ * Non-secure EL3 is never implemented.
+ * We use the following generic mask as they appear in different
+ * registers and this can be shifted for the appropriate
+ * fields.
+ */
+#define ETM_EXLEVEL_S_APP              BIT(0)  /* Secure EL0           */
+#define ETM_EXLEVEL_S_OS               BIT(1)  /* Secure EL1           */
+#define ETM_EXLEVEL_S_HYP              BIT(2)  /* Secure EL2           */
+#define ETM_EXLEVEL_S_MON              BIT(3)  /* Secure EL3/Monitor   */
+#define ETM_EXLEVEL_NS_APP             BIT(4)  /* NonSecure EL0        */
+#define ETM_EXLEVEL_NS_OS              BIT(5)  /* NonSecure EL1        */
+#define ETM_EXLEVEL_NS_HYP             BIT(6)  /* NonSecure EL2        */
+
+#define ETM_EXLEVEL_MASK               (GENMASK(6, 0))
+#define ETM_EXLEVEL_S_MASK             (GENMASK(3, 0))
+#define ETM_EXLEVEL_NS_MASK            (GENMASK(6, 4))
 
-/* access level control in TRCVICTLR - same bits as TRCACATRn but shifted */
-#define ETM_EXLEVEL_LSHIFT_TRCVICTLR   8
+/* access level controls in TRCACATRn */
+#define TRCACATR_EXLEVEL_SHIFT         8
+
+/* access level control in TRCVICTLR */
+#define TRCVICTLR_EXLEVEL_SHIFT                16
+#define TRCVICTLR_EXLEVEL_S_SHIFT      16
+#define TRCVICTLR_EXLEVEL_NS_SHIFT     20
 
 /* secure / non secure masks - TRCVICTLR, IDR3 */
-#define ETM_EXLEVEL_S_VICTLR_MASK      GENMASK(19, 16)
-/* NS MON (EL3) mode never implemented */
-#define ETM_EXLEVEL_NS_VICTLR_MASK     GENMASK(22, 20)
+#define TRCVICTLR_EXLEVEL_MASK         (ETM_EXLEVEL_MASK << TRCVICTLR_EXLEVEL_SHIFT)
+#define TRCVICTLR_EXLEVEL_S_MASK       (ETM_EXLEVEL_S_MASK << TRCVICTLR_EXLEVEL_SHIFT)
+#define TRCVICTLR_EXLEVEL_NS_MASK      (ETM_EXLEVEL_NS_MASK << TRCVICTLR_EXLEVEL_SHIFT)
+
+#define ETM_TRCIDR1_ARCH_MAJOR_SHIFT   8
+#define ETM_TRCIDR1_ARCH_MAJOR_MASK    (0xfU << ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MAJOR(x)      \
+       (((x) & ETM_TRCIDR1_ARCH_MAJOR_MASK) >> ETM_TRCIDR1_ARCH_MAJOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR_SHIFT   4
+#define ETM_TRCIDR1_ARCH_MINOR_MASK    (0xfU << ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_MINOR(x)      \
+       (((x) & ETM_TRCIDR1_ARCH_MINOR_MASK) >> ETM_TRCIDR1_ARCH_MINOR_SHIFT)
+#define ETM_TRCIDR1_ARCH_SHIFT         ETM_TRCIDR1_ARCH_MINOR_SHIFT
+#define ETM_TRCIDR1_ARCH_MASK          \
+       (ETM_TRCIDR1_ARCH_MAJOR_MASK | ETM_TRCIDR1_ARCH_MINOR_MASK)
 
+#define ETM_TRCIDR1_ARCH_ETMv4         0x4
+
+/*
+ * Driver representation of the ETM architecture.
+ * The version of an ETM component can be detected from
+ *
+ * TRCDEVARCH  - CoreSight architected register
+ *                - Bits[15:12] - Major version
+ *                - Bits[19:16] - Minor version
+ * TRCIDR1     - ETM architected register
+ *                - Bits[11:8] - Major version
+ *                - Bits[7:4]  - Minor version
+ * We must rely on TRCDEVARCH for the version information,
+ * however we don't want to break the support for potential
+ * old implementations which might not implement it. Thus
+ * we fall back to TRCIDR1 if TRCDEVARCH is not implemented
+ * for memory mapped components.
+ * Now to make certain decisions easier based on the version
+ * we use an internal representation of the version in the
+ * driver, as follows :
+ *
+ * ETM_ARCH_VERSION[7:0], where :
+ *      Bits[7:4] - Major version
+ *      Bits[3:0] - Minro version
+ */
+#define ETM_ARCH_VERSION(major, minor)         \
+       ((((major) & 0xfU) << 4) | (((minor) & 0xfU)))
+#define ETM_ARCH_MAJOR_VERSION(arch)   (((arch) >> 4) & 0xfU)
+#define ETM_ARCH_MINOR_VERSION(arch)   ((arch) & 0xfU)
+
+#define ETM_ARCH_V4    ETM_ARCH_VERSION(4, 0)
 /* Interpretation of resource numbers change at ETM v4.3 architecture */
-#define ETM4X_ARCH_4V3 0x43
+#define ETM_ARCH_V4_3  ETM_ARCH_VERSION(4, 3)
+
+static inline u8 etm_devarch_to_arch(u32 devarch)
+{
+       return ETM_ARCH_VERSION(ETM_DEVARCH_ARCHID_ARCH_VER(devarch),
+                               ETM_DEVARCH_REVISION(devarch));
+}
+
+static inline u8 etm_trcidr_to_arch(u32 trcidr1)
+{
+       return ETM_ARCH_VERSION(ETM_TRCIDR1_ARCH_MAJOR(trcidr1),
+                               ETM_TRCIDR1_ARCH_MINOR(trcidr1));
+}
 
 enum etm_impdef_type {
        ETM4_IMPDEF_HISI_CORE_COMMIT,
@@ -256,7 +702,7 @@ enum etm_impdef_type {
  * @vmid_mask0:        VM ID comparator mask for comparator 0-3.
  * @vmid_mask1:        VM ID comparator mask for comparator 4-7.
  * @ext_inp:   External input selection.
- * @arch:      ETM architecture version (for arch dependent config).
+ * @s_ex_level: Secure ELs where tracing is supported.
  */
 struct etmv4_config {
        u32                             mode;
@@ -300,7 +746,7 @@ struct etmv4_config {
        u32                             vmid_mask0;
        u32                             vmid_mask1;
        u32                             ext_inp;
-       u8                              arch;
+       u8                              s_ex_level;
 };
 
 /**
@@ -369,7 +815,7 @@ struct etmv4_save_state {
  * @spinlock:   Only one at a time pls.
  * @mode:      This tracer's mode, i.e sysFS, Perf or disabled.
  * @cpu:        The cpu this component is affined to.
- * @arch:       ETM version number.
+ * @arch:       ETM architecture version.
  * @nr_pe:     The number of processing entity available for tracing.
  * @nr_pe_cmp: The number of processing entity comparator inputs that are
  *             available for tracing.
@@ -491,4 +937,7 @@ enum etm_addr_ctxtype {
 
 extern const struct attribute_group *coresight_etmv4_groups[];
 void etm4_config_trace_mode(struct etmv4_config *config);
+
+u64 etm4x_sysreg_read(u32 offset, bool _relaxed, bool _64bit);
+void etm4x_sysreg_write(u64 val, u32 offset, bool _relaxed, bool _64bit);
 #endif
index 01f8f92..b363dd6 100644 (file)
@@ -52,13 +52,14 @@ static int dynamic_funnel_enable_hw(struct funnel_drvdata *drvdata, int port)
 {
        u32 functl;
        int rc = 0;
+       struct coresight_device *csdev = drvdata->csdev;
 
        CS_UNLOCK(drvdata->base);
 
        functl = readl_relaxed(drvdata->base + FUNNEL_FUNCTL);
        /* Claim the device only when we enable the first slave */
        if (!(functl & FUNNEL_ENSx_MASK)) {
-               rc = coresight_claim_device_unlocked(drvdata->base);
+               rc = coresight_claim_device_unlocked(csdev);
                if (rc)
                        goto done;
        }
@@ -101,6 +102,7 @@ static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
                                      int inport)
 {
        u32 functl;
+       struct coresight_device *csdev = drvdata->csdev;
 
        CS_UNLOCK(drvdata->base);
 
@@ -110,7 +112,7 @@ static void dynamic_funnel_disable_hw(struct funnel_drvdata *drvdata,
 
        /* Disclaim the device if none of the slaves are now active */
        if (!(functl & FUNNEL_ENSx_MASK))
-               coresight_disclaim_device_unlocked(drvdata->base);
+               coresight_disclaim_device_unlocked(csdev);
 
        CS_LOCK(drvdata->base);
 }
@@ -242,6 +244,7 @@ static int funnel_probe(struct device *dev, struct resource *res)
                }
                drvdata->base = base;
                desc.groups = coresight_funnel_groups;
+               desc.access = CSDEV_ACCESS_IOMEM(base);
        }
 
        dev_set_drvdata(dev, drvdata);
index 34fc2f6..b86acbc 100644 (file)
@@ -45,12 +45,14 @@ struct replicator_drvdata {
 
 static void dynamic_replicator_reset(struct replicator_drvdata *drvdata)
 {
+       struct coresight_device *csdev = drvdata->csdev;
+
        CS_UNLOCK(drvdata->base);
 
-       if (!coresight_claim_device_unlocked(drvdata->base)) {
+       if (!coresight_claim_device_unlocked(csdev)) {
                writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER0);
                writel_relaxed(0xff, drvdata->base + REPLICATOR_IDFILTER1);
-               coresight_disclaim_device_unlocked(drvdata->base);
+               coresight_disclaim_device_unlocked(csdev);
        }
 
        CS_LOCK(drvdata->base);
@@ -70,6 +72,7 @@ static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
 {
        int rc = 0;
        u32 id0val, id1val;
+       struct coresight_device *csdev = drvdata->csdev;
 
        CS_UNLOCK(drvdata->base);
 
@@ -84,7 +87,7 @@ static int dynamic_replicator_enable(struct replicator_drvdata *drvdata,
                id0val = id1val = 0xff;
 
        if (id0val == 0xff && id1val == 0xff)
-               rc = coresight_claim_device_unlocked(drvdata->base);
+               rc = coresight_claim_device_unlocked(csdev);
 
        if (!rc) {
                switch (outport) {
@@ -140,6 +143,7 @@ static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
                                       int inport, int outport)
 {
        u32 reg;
+       struct coresight_device *csdev = drvdata->csdev;
 
        switch (outport) {
        case 0:
@@ -160,7 +164,7 @@ static void dynamic_replicator_disable(struct replicator_drvdata *drvdata,
 
        if ((readl_relaxed(drvdata->base + REPLICATOR_IDFILTER0) == 0xff) &&
            (readl_relaxed(drvdata->base + REPLICATOR_IDFILTER1) == 0xff))
-               coresight_disclaim_device_unlocked(drvdata->base);
+               coresight_disclaim_device_unlocked(csdev);
        CS_LOCK(drvdata->base);
 }
 
@@ -254,6 +258,7 @@ static int replicator_probe(struct device *dev, struct resource *res)
                }
                drvdata->base = base;
                desc.groups = replicator_groups;
+               desc.access = CSDEV_ACCESS_IOMEM(base);
        }
 
        if (fwnode_property_present(dev_fwnode(dev),
index 423df0d..58062a5 100644 (file)
@@ -258,6 +258,7 @@ static void stm_disable(struct coresight_device *csdev,
                        struct perf_event *event)
 {
        struct stm_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
+       struct csdev_access *csa = &csdev->access;
 
        /*
         * For as long as the tracer isn't disabled another entity can't
@@ -270,7 +271,7 @@ static void stm_disable(struct coresight_device *csdev,
                spin_unlock(&drvdata->spinlock);
 
                /* Wait until the engine has completely stopped */
-               coresight_timeout(drvdata->base, STMTCSR, STMTCSR_BUSY_BIT, 0);
+               coresight_timeout(csa, STMTCSR, STMTCSR_BUSY_BIT, 0);
 
                pm_runtime_put(csdev->dev.parent);
 
@@ -884,6 +885,7 @@ static int stm_probe(struct amba_device *adev, const struct amba_id *id)
        if (IS_ERR(base))
                return PTR_ERR(base);
        drvdata->base = base;
+       desc.access = CSDEV_ACCESS_IOMEM(base);
 
        ret = stm_get_stimulus_area(dev, &ch_res);
        if (ret)
index e29b391..74c6323 100644 (file)
@@ -33,16 +33,20 @@ DEFINE_CORESIGHT_DEVLIST(etr_devs, "tmc_etr");
 
 void tmc_wait_for_tmcready(struct tmc_drvdata *drvdata)
 {
+       struct coresight_device *csdev = drvdata->csdev;
+       struct csdev_access *csa = &csdev->access;
+
        /* Ensure formatter, unformatter and hardware fifo are empty */
-       if (coresight_timeout(drvdata->base,
-                             TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
-               dev_err(&drvdata->csdev->dev,
+       if (coresight_timeout(csa, TMC_STS, TMC_STS_TMCREADY_BIT, 1)) {
+               dev_err(&csdev->dev,
                        "timeout while waiting for TMC to be Ready\n");
        }
 }
 
 void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
 {
+       struct coresight_device *csdev = drvdata->csdev;
+       struct csdev_access *csa = &csdev->access;
        u32 ffcr;
 
        ffcr = readl_relaxed(drvdata->base + TMC_FFCR);
@@ -51,9 +55,8 @@ void tmc_flush_and_stop(struct tmc_drvdata *drvdata)
        ffcr |= BIT(TMC_FFCR_FLUSHMAN_BIT);
        writel_relaxed(ffcr, drvdata->base + TMC_FFCR);
        /* Ensure flush completes */
-       if (coresight_timeout(drvdata->base,
-                             TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
-               dev_err(&drvdata->csdev->dev,
+       if (coresight_timeout(csa, TMC_FFCR, TMC_FFCR_FLUSHMAN_BIT, 0)) {
+               dev_err(&csdev->dev,
                "timeout while waiting for completion of Manual Flush\n");
        }
 
@@ -456,6 +459,7 @@ static int tmc_probe(struct amba_device *adev, const struct amba_id *id)
        }
 
        drvdata->base = base;
+       desc.access = CSDEV_ACCESS_IOMEM(base);
 
        spin_lock_init(&drvdata->spinlock);
 
index 989d965..45b85ed 100644 (file)
@@ -37,7 +37,7 @@ static void __tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 
 static int tmc_etb_enable_hw(struct tmc_drvdata *drvdata)
 {
-       int rc = coresight_claim_device(drvdata->base);
+       int rc = coresight_claim_device(drvdata->csdev);
 
        if (rc)
                return rc;
@@ -88,7 +88,7 @@ static void __tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 static void tmc_etb_disable_hw(struct tmc_drvdata *drvdata)
 {
        __tmc_etb_disable_hw(drvdata);
-       coresight_disclaim_device(drvdata->base);
+       coresight_disclaim_device(drvdata->csdev);
 }
 
 static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
@@ -109,7 +109,7 @@ static void __tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 
 static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 {
-       int rc = coresight_claim_device(drvdata->base);
+       int rc = coresight_claim_device(drvdata->csdev);
 
        if (rc)
                return rc;
@@ -120,11 +120,13 @@ static int tmc_etf_enable_hw(struct tmc_drvdata *drvdata)
 
 static void tmc_etf_disable_hw(struct tmc_drvdata *drvdata)
 {
+       struct coresight_device *csdev = drvdata->csdev;
+
        CS_UNLOCK(drvdata->base);
 
        tmc_flush_and_stop(drvdata);
        tmc_disable_hw(drvdata);
-       coresight_disclaim_device_unlocked(drvdata->base);
+       coresight_disclaim_device_unlocked(csdev);
        CS_LOCK(drvdata->base);
 }
 
index bf5230e..acdb59e 100644 (file)
@@ -1040,7 +1040,7 @@ static int tmc_etr_enable_hw(struct tmc_drvdata *drvdata,
        rc = tmc_etr_enable_catu(drvdata, etr_buf);
        if (rc)
                return rc;
-       rc = coresight_claim_device(drvdata->base);
+       rc = coresight_claim_device(drvdata->csdev);
        if (!rc) {
                drvdata->etr_buf = etr_buf;
                __tmc_etr_enable_hw(drvdata);
@@ -1134,7 +1134,7 @@ void tmc_etr_disable_hw(struct tmc_drvdata *drvdata)
        __tmc_etr_disable_hw(drvdata);
        /* Disable CATU device if this ETR is connected to one */
        tmc_etr_disable_catu(drvdata);
-       coresight_disclaim_device(drvdata->base);
+       coresight_disclaim_device(drvdata->csdev);
        /* Reset the ETR buf used by hardware */
        drvdata->etr_buf = NULL;
 }
index f77c4b0..34d37ab 100644 (file)
@@ -60,49 +60,45 @@ struct tpiu_drvdata {
        struct coresight_device *csdev;
 };
 
-static void tpiu_enable_hw(struct tpiu_drvdata *drvdata)
+static void tpiu_enable_hw(struct csdev_access *csa)
 {
-       CS_UNLOCK(drvdata->base);
+       CS_UNLOCK(csa->base);
 
        /* TODO: fill this up */
 
-       CS_LOCK(drvdata->base);
+       CS_LOCK(csa->base);
 }
 
 static int tpiu_enable(struct coresight_device *csdev, u32 mode, void *__unused)
 {
-       struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-
-       tpiu_enable_hw(drvdata);
+       tpiu_enable_hw(&csdev->access);
        atomic_inc(csdev->refcnt);
        dev_dbg(&csdev->dev, "TPIU enabled\n");
        return 0;
 }
 
-static void tpiu_disable_hw(struct tpiu_drvdata *drvdata)
+static void tpiu_disable_hw(struct csdev_access *csa)
 {
-       CS_UNLOCK(drvdata->base);
+       CS_UNLOCK(csa->base);
 
        /* Clear formatter and stop on flush */
-       writel_relaxed(FFCR_STOP_FI, drvdata->base + TPIU_FFCR);
+       csdev_access_relaxed_write32(csa, FFCR_STOP_FI, TPIU_FFCR);
        /* Generate manual flush */
-       writel_relaxed(FFCR_STOP_FI | FFCR_FON_MAN, drvdata->base + TPIU_FFCR);
+       csdev_access_relaxed_write32(csa, FFCR_STOP_FI | FFCR_FON_MAN, TPIU_FFCR);
        /* Wait for flush to complete */
-       coresight_timeout(drvdata->base, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
+       coresight_timeout(csa, TPIU_FFCR, FFCR_FON_MAN_BIT, 0);
        /* Wait for formatter to stop */
-       coresight_timeout(drvdata->base, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
+       coresight_timeout(csa, TPIU_FFSR, FFSR_FT_STOPPED_BIT, 1);
 
-       CS_LOCK(drvdata->base);
+       CS_LOCK(csa->base);
 }
 
 static int tpiu_disable(struct coresight_device *csdev)
 {
-       struct tpiu_drvdata *drvdata = dev_get_drvdata(csdev->dev.parent);
-
        if (atomic_dec_return(csdev->refcnt))
                return -EBUSY;
 
-       tpiu_disable_hw(drvdata);
+       tpiu_disable_hw(&csdev->access);
 
        dev_dbg(&csdev->dev, "TPIU disabled\n");
        return 0;
@@ -149,9 +145,10 @@ static int tpiu_probe(struct amba_device *adev, const struct amba_id *id)
                return PTR_ERR(base);
 
        drvdata->base = base;
+       desc.access = CSDEV_ACCESS_IOMEM(base);
 
        /* Disable tpiu to support older devices */
-       tpiu_disable_hw(drvdata);
+       tpiu_disable_hw(&desc.access);
 
        pdata = coresight_get_platform_data(dev);
        if (IS_ERR(pdata))
index d4e0a0f..ba766d2 100644 (file)
@@ -316,7 +316,7 @@ static int brcmstb_send_i2c_cmd(struct brcmstb_i2c_dev *dev,
                goto cmd_out;
        }
 
-       if ((CMD_RD || CMD_WR) &&
+       if ((cmd == CMD_RD || cmd == CMD_WR) &&
            bsc_readl(dev, iic_enable) & BSC_IIC_EN_NOACK_MASK) {
                rc = -EREMOTEIO;
                dev_dbg(dev->device, "controller received NOACK intr for %s\n",
index 85307cf..5392b82 100644 (file)
@@ -38,6 +38,8 @@
 #define DW_IC_CON_TX_EMPTY_CTRL                        BIT(8)
 #define DW_IC_CON_RX_FIFO_FULL_HLD_CTRL                BIT(9)
 
+#define DW_IC_DATA_CMD_DAT                     GENMASK(7, 0)
+
 /*
  * Registers offset
  */
index d6425ad..dd27b9d 100644 (file)
@@ -432,7 +432,7 @@ i2c_dw_read(struct dw_i2c_dev *dev)
                        regmap_read(dev->map, DW_IC_DATA_CMD, &tmp);
                        /* Ensure length byte is a valid value */
                        if (flags & I2C_M_RECV_LEN &&
-                           tmp <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
+                           (tmp & DW_IC_DATA_CMD_DAT) <= I2C_SMBUS_BLOCK_MAX && tmp > 0) {
                                len = i2c_dw_recv_len(dev, tmp);
                        }
                        *buf++ = tmp;
index 20a9881..5ac30d9 100644 (file)
@@ -606,6 +606,7 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
        u32 i2c_ctl;
        u32 int_en = 0;
        u32 i2c_auto_conf = 0;
+       u32 i2c_addr = 0;
        u32 fifo_ctl;
        unsigned long flags;
        unsigned short trig_lvl;
@@ -640,7 +641,12 @@ static void exynos5_i2c_message_start(struct exynos5_i2c *i2c, int stop)
                int_en |= HSI2C_INT_TX_ALMOSTEMPTY_EN;
        }
 
-       writel(HSI2C_SLV_ADDR_MAS(i2c->msg->addr), i2c->regs + HSI2C_ADDR);
+       i2c_addr = HSI2C_SLV_ADDR_MAS(i2c->msg->addr);
+
+       if (i2c->op_clock >= I2C_MAX_FAST_MODE_PLUS_FREQ)
+               i2c_addr |= HSI2C_MASTER_ID(MASTER_ID(i2c->adap.nr));
+
+       writel(i2c_addr, i2c->regs + HSI2C_ADDR);
 
        writel(fifo_ctl, i2c->regs + HSI2C_FIFO_CTL);
        writel(i2c_ctl, i2c->regs + HSI2C_CTL);
index c3f5847..214b4c9 100644 (file)
@@ -375,32 +375,6 @@ static void geni_i2c_tx_msg_cleanup(struct geni_i2c_dev *gi2c,
        }
 }
 
-static void geni_i2c_stop_xfer(struct geni_i2c_dev *gi2c)
-{
-       int ret;
-       u32 geni_status;
-       struct i2c_msg *cur;
-
-       /* Resume device, as runtime suspend can happen anytime during transfer */
-       ret = pm_runtime_get_sync(gi2c->se.dev);
-       if (ret < 0) {
-               dev_err(gi2c->se.dev, "Failed to resume device: %d\n", ret);
-               return;
-       }
-
-       geni_status = readl_relaxed(gi2c->se.base + SE_GENI_STATUS);
-       if (geni_status & M_GENI_CMD_ACTIVE) {
-               cur = gi2c->cur;
-               geni_i2c_abort_xfer(gi2c);
-               if (cur->flags & I2C_M_RD)
-                       geni_i2c_rx_msg_cleanup(gi2c, cur);
-               else
-                       geni_i2c_tx_msg_cleanup(gi2c, cur);
-       }
-
-       pm_runtime_put_sync_suspend(gi2c->se.dev);
-}
-
 static int geni_i2c_rx_one_msg(struct geni_i2c_dev *gi2c, struct i2c_msg *msg,
                                u32 m_param)
 {
@@ -676,13 +650,6 @@ static int geni_i2c_remove(struct platform_device *pdev)
        return 0;
 }
 
-static void  geni_i2c_shutdown(struct platform_device *pdev)
-{
-       struct geni_i2c_dev *gi2c = platform_get_drvdata(pdev);
-
-       geni_i2c_stop_xfer(gi2c);
-}
-
 static int __maybe_unused geni_i2c_runtime_suspend(struct device *dev)
 {
        int ret;
@@ -747,7 +714,6 @@ MODULE_DEVICE_TABLE(of, geni_i2c_dt_match);
 static struct platform_driver geni_i2c_driver = {
        .probe  = geni_i2c_probe,
        .remove = geni_i2c_remove,
-       .shutdown = geni_i2c_shutdown,
        .driver = {
                .name = "geni_i2c",
                .pm = &geni_i2c_pm_ops,
index 77af4c1..bb86d84 100644 (file)
@@ -164,6 +164,7 @@ static int __init falconide_init(struct platform_device *pdev)
        if (rc)
                goto err_free;
 
+       platform_set_drvdata(pdev, host);
        return 0;
 err_free:
        ide_host_free(host);
@@ -174,7 +175,7 @@ err:
 
 static int falconide_remove(struct platform_device *pdev)
 {
-       struct ide_host *host = dev_get_drvdata(&pdev->dev);
+       struct ide_host *host = platform_get_drvdata(pdev);
 
        ide_host_remove(host);
 
index bf7d22f..e0667c4 100644 (file)
@@ -266,6 +266,8 @@ config ADI_AXI_ADC
        select IIO_BUFFER
        select IIO_BUFFER_HW_CONSUMER
        select IIO_BUFFER_DMAENGINE
+       depends on HAS_IOMEM
+       depends on OF
        help
          Say yes here to build support for Analog Devices Generic
          AXI ADC IP core. The IP core is used for interfacing with
@@ -923,6 +925,7 @@ config STM32_ADC_CORE
        depends on ARCH_STM32 || COMPILE_TEST
        depends on OF
        depends on REGULATOR
+       depends on HAS_IOMEM
        select IIO_BUFFER
        select MFD_STM32_TIMERS
        select IIO_STM32_TIMER_TRIGGER
index 6f9a3e2..7b5212b 100644 (file)
@@ -918,7 +918,7 @@ static int ab8500_gpadc_read_raw(struct iio_dev *indio_dev,
                        return processed;
 
                /* Return millivolt or milliamps or millicentigrades */
-               *val = processed * 1000;
+               *val = processed;
                return IIO_VAL_INT;
        }
 
index 5d597e5..1b4b320 100644 (file)
@@ -91,7 +91,7 @@ static int ad7949_spi_read_channel(struct ad7949_adc_chip *ad7949_adc, int *val,
        int ret;
        int i;
        int bits_per_word = ad7949_adc->resolution;
-       int mask = GENMASK(ad7949_adc->resolution, 0);
+       int mask = GENMASK(ad7949_adc->resolution - 1, 0);
        struct spi_message msg;
        struct spi_transfer tx[] = {
                {
index 05ff948..07b1a99 100644 (file)
@@ -597,7 +597,7 @@ static const struct vadc_channels vadc_chans[] = {
        VADC_CHAN_NO_SCALE(P_MUX16_1_3, 1)
 
        VADC_CHAN_NO_SCALE(LR_MUX1_BAT_THERM, 0)
-       VADC_CHAN_NO_SCALE(LR_MUX2_BAT_ID, 0)
+       VADC_CHAN_VOLT(LR_MUX2_BAT_ID, 0, SCALE_DEFAULT)
        VADC_CHAN_NO_SCALE(LR_MUX3_XO_THERM, 0)
        VADC_CHAN_NO_SCALE(LR_MUX4_AMUX_THM1, 0)
        VADC_CHAN_NO_SCALE(LR_MUX5_AMUX_THM2, 0)
index dfa31a2..ac90be0 100644 (file)
@@ -551,6 +551,8 @@ static irqreturn_t mpu3050_trigger_handler(int irq, void *p)
                                               MPU3050_FIFO_R,
                                               &fifo_values[offset],
                                               toread);
+                       if (ret)
+                               goto out_trigger_unlock;
 
                        dev_dbg(mpu3050->dev,
                                "%04x %04x %04x %04x %04x\n",
index 52f6051..d627054 100644 (file)
 struct hid_humidity_state {
        struct hid_sensor_common common_attributes;
        struct hid_sensor_hub_attribute_info humidity_attr;
-       s32 humidity_data;
+       struct {
+               s32 humidity_data;
+               u64 timestamp __aligned(8);
+       } scan;
        int scale_pre_decml;
        int scale_post_decml;
        int scale_precision;
@@ -125,9 +128,8 @@ static int humidity_proc_event(struct hid_sensor_hub_device *hsdev,
        struct hid_humidity_state *humid_st = iio_priv(indio_dev);
 
        if (atomic_read(&humid_st->common_attributes.data_ready))
-               iio_push_to_buffers_with_timestamp(indio_dev,
-                                       &humid_st->humidity_data,
-                                       iio_get_time_ns(indio_dev));
+               iio_push_to_buffers_with_timestamp(indio_dev, &humid_st->scan,
+                                                  iio_get_time_ns(indio_dev));
 
        return 0;
 }
@@ -142,7 +144,7 @@ static int humidity_capture_sample(struct hid_sensor_hub_device *hsdev,
 
        switch (usage_id) {
        case HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY:
-               humid_st->humidity_data = *(s32 *)raw_data;
+               humid_st->scan.humidity_data = *(s32 *)raw_data;
 
                return 0;
        default:
index 54af2ed..785a4ce 100644 (file)
@@ -462,8 +462,7 @@ static int adis16400_initial_setup(struct iio_dev *indio_dev)
                if (ret)
                        goto err_ret;
 
-               ret = sscanf(indio_dev->name, "adis%u\n", &device_id);
-               if (ret != 1) {
+               if (sscanf(indio_dev->name, "adis%u\n", &device_id) != 1) {
                        ret = -EINVAL;
                        goto err_ret;
                }
index 330cf35..e9e00ce 100644 (file)
@@ -23,6 +23,9 @@ struct prox_state {
        struct hid_sensor_common common_attributes;
        struct hid_sensor_hub_attribute_info prox_attr;
        u32 human_presence;
+       int scale_pre_decml;
+       int scale_post_decml;
+       int scale_precision;
 };
 
 /* Channel definitions */
@@ -93,8 +96,9 @@ static int prox_read_raw(struct iio_dev *indio_dev,
                ret_type = IIO_VAL_INT;
                break;
        case IIO_CHAN_INFO_SCALE:
-               *val = prox_state->prox_attr.units;
-               ret_type = IIO_VAL_INT;
+               *val = prox_state->scale_pre_decml;
+               *val2 = prox_state->scale_post_decml;
+               ret_type = prox_state->scale_precision;
                break;
        case IIO_CHAN_INFO_OFFSET:
                *val = hid_sensor_convert_exponent(
@@ -234,6 +238,11 @@ static int prox_parse_report(struct platform_device *pdev,
                        HID_USAGE_SENSOR_HUMAN_PRESENCE,
                        &st->common_attributes.sensitivity);
 
+       st->scale_precision = hid_sensor_format_scale(
+                               hsdev->usage,
+                               &st->prox_attr,
+                               &st->scale_pre_decml, &st->scale_post_decml);
+
        return ret;
 }
 
index 81688f1..da9a247 100644 (file)
 struct temperature_state {
        struct hid_sensor_common common_attributes;
        struct hid_sensor_hub_attribute_info temperature_attr;
-       s32 temperature_data;
+       struct {
+               s32 temperature_data;
+               u64 timestamp __aligned(8);
+       } scan;
        int scale_pre_decml;
        int scale_post_decml;
        int scale_precision;
@@ -32,7 +35,7 @@ static const struct iio_chan_spec temperature_channels[] = {
                        BIT(IIO_CHAN_INFO_SAMP_FREQ) |
                        BIT(IIO_CHAN_INFO_HYSTERESIS),
        },
-       IIO_CHAN_SOFT_TIMESTAMP(3),
+       IIO_CHAN_SOFT_TIMESTAMP(1),
 };
 
 /* Adjust channel real bits based on report descriptor */
@@ -123,9 +126,8 @@ static int temperature_proc_event(struct hid_sensor_hub_device *hsdev,
        struct temperature_state *temp_st = iio_priv(indio_dev);
 
        if (atomic_read(&temp_st->common_attributes.data_ready))
-               iio_push_to_buffers_with_timestamp(indio_dev,
-                               &temp_st->temperature_data,
-                               iio_get_time_ns(indio_dev));
+               iio_push_to_buffers_with_timestamp(indio_dev, &temp_st->scan,
+                                                  iio_get_time_ns(indio_dev));
 
        return 0;
 }
@@ -140,7 +142,7 @@ static int temperature_capture_sample(struct hid_sensor_hub_device *hsdev,
 
        switch (usage_id) {
        case HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE:
-               temp_st->temperature_data = *(s32 *)raw_data;
+               temp_st->scan.temperature_data = *(s32 *)raw_data;
                return 0;
        default:
                return -EINVAL;
index 0abce00..65e3e7d 100644 (file)
@@ -76,7 +76,9 @@ static struct workqueue_struct *addr_wq;
 
 static const struct nla_policy ib_nl_addr_policy[LS_NLA_TYPE_MAX] = {
        [LS_NLA_TYPE_DGID] = {.type = NLA_BINARY,
-               .len = sizeof(struct rdma_nla_ls_gid)},
+               .len = sizeof(struct rdma_nla_ls_gid),
+               .validation_type = NLA_VALIDATE_MIN,
+               .min = sizeof(struct rdma_nla_ls_gid)},
 };
 
 static inline bool ib_nl_is_good_ip_resp(const struct nlmsghdr *nlh)
index be996db..3d194bb 100644 (file)
@@ -3651,6 +3651,7 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
                                   struct ib_cm_sidr_rep_param *param)
 {
        struct ib_mad_send_buf *msg;
+       unsigned long flags;
        int ret;
 
        lockdep_assert_held(&cm_id_priv->lock);
@@ -3676,12 +3677,12 @@ static int cm_send_sidr_rep_locked(struct cm_id_private *cm_id_priv,
                return ret;
        }
        cm_id_priv->id.state = IB_CM_IDLE;
-       spin_lock_irq(&cm.lock);
+       spin_lock_irqsave(&cm.lock, flags);
        if (!RB_EMPTY_NODE(&cm_id_priv->sidr_id_node)) {
                rb_erase(&cm_id_priv->sidr_id_node, &cm.remote_sidr_table);
                RB_CLEAR_NODE(&cm_id_priv->sidr_id_node);
        }
-       spin_unlock_irq(&cm.lock);
+       spin_unlock_irqrestore(&cm.lock, flags);
        return 0;
 }
 
index e47c594..ff047eb 100644 (file)
@@ -91,7 +91,7 @@ void uapi_compute_bundle_size(struct uverbs_api_ioctl_method *method_elm,
 }
 
 /**
- * uverbs_alloc() - Quickly allocate memory for use with a bundle
+ * _uverbs_alloc() - Quickly allocate memory for use with a bundle
  * @bundle: The bundle
  * @size: Number of bytes to allocate
  * @flags: Allocator flags
index 8769e7a..e42c812 100644 (file)
@@ -3610,13 +3610,14 @@ int c4iw_destroy_listen(struct iw_cm_id *cm_id)
            ep->com.local_addr.ss_family == AF_INET) {
                err = cxgb4_remove_server_filter(
                        ep->com.dev->rdev.lldi.ports[0], ep->stid,
-                       ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+                       ep->com.dev->rdev.lldi.rxq_ids[0], false);
        } else {
                struct sockaddr_in6 *sin6;
                c4iw_init_wr_wait(ep->com.wr_waitp);
                err = cxgb4_remove_server(
                                ep->com.dev->rdev.lldi.ports[0], ep->stid,
-                               ep->com.dev->rdev.lldi.rxq_ids[0], 0);
+                               ep->com.dev->rdev.lldi.rxq_ids[0],
+                               ep->com.local_addr.ss_family == AF_INET6);
                if (err)
                        goto done;
                err = c4iw_wait_for_reply(&ep->com.dev->rdev, ep->com.wr_waitp,
index 2a91b8d..04b1e8f 100644 (file)
@@ -632,22 +632,11 @@ static void _dev_comp_vect_cpu_mask_clean_up(struct hfi1_devdata *dd,
  */
 int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
 {
-       int node = pcibus_to_node(dd->pcidev->bus);
        struct hfi1_affinity_node *entry;
        const struct cpumask *local_mask;
        int curr_cpu, possible, i, ret;
        bool new_entry = false;
 
-       /*
-        * If the BIOS does not have the NUMA node information set, select
-        * NUMA 0 so we get consistent performance.
-        */
-       if (node < 0) {
-               dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
-               node = 0;
-       }
-       dd->node = node;
-
        local_mask = cpumask_of_node(dd->node);
        if (cpumask_first(local_mask) >= nr_cpu_ids)
                local_mask = topology_core_cpumask(0);
@@ -660,7 +649,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
         * create an entry in the global affinity structure and initialize it.
         */
        if (!entry) {
-               entry = node_affinity_allocate(node);
+               entry = node_affinity_allocate(dd->node);
                if (!entry) {
                        dd_dev_err(dd,
                                   "Unable to allocate global affinity node\n");
@@ -751,6 +740,7 @@ int hfi1_dev_affinity_init(struct hfi1_devdata *dd)
        if (new_entry)
                node_affinity_add_tail(entry);
 
+       dd->affinity_entry = entry;
        mutex_unlock(&node_affinity.lock);
 
        return 0;
@@ -766,10 +756,9 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
 {
        struct hfi1_affinity_node *entry;
 
-       if (dd->node < 0)
-               return;
-
        mutex_lock(&node_affinity.lock);
+       if (!dd->affinity_entry)
+               goto unlock;
        entry = node_affinity_lookup(dd->node);
        if (!entry)
                goto unlock;
@@ -780,8 +769,8 @@ void hfi1_dev_affinity_clean_up(struct hfi1_devdata *dd)
         */
        _dev_comp_vect_cpu_mask_clean_up(dd, entry);
 unlock:
+       dd->affinity_entry = NULL;
        mutex_unlock(&node_affinity.lock);
-       dd->node = NUMA_NO_NODE;
 }
 
 /*
index e09e824..2a9a040 100644 (file)
@@ -1409,6 +1409,7 @@ struct hfi1_devdata {
        spinlock_t irq_src_lock;
        int vnic_num_vports;
        struct net_device *dummy_netdev;
+       struct hfi1_affinity_node *affinity_entry;
 
        /* Keeps track of IPoIB RSM rule users */
        atomic_t ipoib_rsm_usr_num;
index cb7ad12..786c631 100644 (file)
@@ -1277,7 +1277,6 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
        dd->pport = (struct hfi1_pportdata *)(dd + 1);
        dd->pcidev = pdev;
        pci_set_drvdata(pdev, dd);
-       dd->node = NUMA_NO_NODE;
 
        ret = xa_alloc_irq(&hfi1_dev_table, &dd->unit, dd, xa_limit_32b,
                        GFP_KERNEL);
@@ -1287,6 +1286,15 @@ static struct hfi1_devdata *hfi1_alloc_devdata(struct pci_dev *pdev,
                goto bail;
        }
        rvt_set_ibdev_name(&dd->verbs_dev.rdi, "%s_%d", class_name(), dd->unit);
+       /*
+        * If the BIOS does not have the NUMA node information set, select
+        * NUMA 0 so we get consistent performance.
+        */
+       dd->node = pcibus_to_node(pdev->bus);
+       if (dd->node == NUMA_NO_NODE) {
+               dd_dev_err(dd, "Invalid PCI NUMA node. Performance may be affected\n");
+               dd->node = 0;
+       }
 
        /*
         * Initialize all locks for the device. This needs to be as early as
index 1fb6e1a..1bcab99 100644 (file)
@@ -173,8 +173,7 @@ u32 hfi1_num_netdev_contexts(struct hfi1_devdata *dd, u32 available_contexts,
                return 0;
        }
 
-       cpumask_and(node_cpu_mask, cpu_mask,
-                   cpumask_of_node(pcibus_to_node(dd->pcidev->bus)));
+       cpumask_and(node_cpu_mask, cpu_mask, cpumask_of_node(dd->node));
 
        available_cpus = cpumask_weight(node_cpu_mask);
 
index c3934ab..ce26f97 100644 (file)
@@ -1194,8 +1194,10 @@ static void hns_roce_cmq_init_regs(struct hns_roce_dev *hr_dev, bool ring_type)
                           upper_32_bits(dma));
                roce_write(hr_dev, ROCEE_TX_CMQ_DEPTH_REG,
                           (u32)ring->desc_num >> HNS_ROCE_CMQ_DESC_NUM_S);
-               roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
+
+               /* Make sure to write tail first and then head */
                roce_write(hr_dev, ROCEE_TX_CMQ_TAIL_REG, 0);
+               roce_write(hr_dev, ROCEE_TX_CMQ_HEAD_REG, 0);
        } else {
                roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_L_REG, (u32)dma);
                roce_write(hr_dev, ROCEE_RX_CMQ_BASEADDR_H_REG,
index ebc2a43..07b8350 100644 (file)
@@ -1116,7 +1116,7 @@ static void devx_obj_build_destroy_cmd(void *in, void *out, void *din,
        case MLX5_CMD_OP_CREATE_MKEY:
                MLX5_SET(destroy_mkey_in, din, opcode,
                         MLX5_CMD_OP_DESTROY_MKEY);
-               MLX5_SET(destroy_mkey_in, in, mkey_index, *obj_id);
+               MLX5_SET(destroy_mkey_in, din, mkey_index, *obj_id);
                break;
        case MLX5_CMD_OP_CREATE_CQ:
                MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
@@ -2073,8 +2073,10 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_SUBSCRIBE_EVENT)(
 
                num_alloc_xa_entries++;
                event_sub = kzalloc(sizeof(*event_sub), GFP_KERNEL);
-               if (!event_sub)
+               if (!event_sub) {
+                       err = -ENOMEM;
                        goto err;
+               }
 
                list_add_tail(&event_sub->event_list, &sub_list);
                uverbs_uobject_get(&ev_file->uobj);
index 3746981..b103555 100644 (file)
@@ -1082,7 +1082,7 @@ end:
        return ret ? ret : npages;
 }
 
-/**
+/*
  * Parse a series of data segments for page fault handling.
  *
  * @dev:  Pointer to mlx5 IB device
index ec4b3f6..f5a52a6 100644 (file)
@@ -1078,7 +1078,7 @@ static int _create_kernel_qp(struct mlx5_ib_dev *dev,
 
        qpc = MLX5_ADDR_OF(create_qp_in, *in, qpc);
        MLX5_SET(qpc, qpc, uar_page, uar_index);
-       MLX5_SET(qpc, qpc, ts_format, MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT);
+       MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
        MLX5_SET(qpc, qpc, log_page_size, qp->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT);
 
        /* Set "fast registration enabled" for all kernel QPs */
@@ -1188,7 +1188,8 @@ static int get_rq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
                }
                return MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING;
        }
-       return MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT;
+       return fr_supported ? MLX5_RQC_TIMESTAMP_FORMAT_FREE_RUNNING :
+                             MLX5_RQC_TIMESTAMP_FORMAT_DEFAULT;
 }
 
 static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
@@ -1206,7 +1207,8 @@ static int get_sq_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq)
                }
                return MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING;
        }
-       return MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT;
+       return fr_supported ? MLX5_SQC_TIMESTAMP_FORMAT_FREE_RUNNING :
+                             MLX5_SQC_TIMESTAMP_FORMAT_DEFAULT;
 }
 
 static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
@@ -1217,7 +1219,8 @@ static int get_qp_ts_format(struct mlx5_ib_dev *dev, struct mlx5_ib_cq *send_cq,
                        MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING ||
                MLX5_CAP_ROCE(dev->mdev, qp_ts_format) ==
                        MLX5_QP_TIMESTAMP_FORMAT_CAP_FREE_RUNNING_AND_REAL_TIME;
-       int ts_format = MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
+       int ts_format = fr_supported ? MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING :
+                                      MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
 
        if (recv_cq &&
            recv_cq->create_flags & IB_UVERBS_CQ_FLAGS_TIMESTAMP_COMPLETION)
@@ -1930,6 +1933,7 @@ static int create_xrc_tgt_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
        if (qp->flags & IB_QP_CREATE_MANAGED_RECV)
                MLX5_SET(qpc, qpc, cd_slave_receive, 1);
 
+       MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(dev->mdev));
        MLX5_SET(qpc, qpc, rq_type, MLX5_SRQ_RQ);
        MLX5_SET(qpc, qpc, no_sq, 1);
        MLX5_SET(qpc, qpc, cqn_rcv, to_mcq(devr->c0)->mcq.cqn);
@@ -4873,6 +4877,7 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
        struct mlx5_ib_dev *dev;
        int has_net_offloads;
        __be64 *rq_pas0;
+       int ts_format;
        void *in;
        void *rqc;
        void *wq;
@@ -4881,6 +4886,10 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 
        dev = to_mdev(pd->device);
 
+       ts_format = get_rq_ts_format(dev, to_mcq(init_attr->cq));
+       if (ts_format < 0)
+               return ts_format;
+
        inlen = MLX5_ST_SZ_BYTES(create_rq_in) + sizeof(u64) * rwq->rq_num_pas;
        in = kvzalloc(inlen, GFP_KERNEL);
        if (!in)
@@ -4890,6 +4899,7 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
        rqc = MLX5_ADDR_OF(create_rq_in, in, ctx);
        MLX5_SET(rqc,  rqc, mem_rq_type,
                 MLX5_RQC_MEM_RQ_TYPE_MEMORY_RQ_INLINE);
+       MLX5_SET(rqc, rqc, ts_format, ts_format);
        MLX5_SET(rqc, rqc, user_index, rwq->user_index);
        MLX5_SET(rqc,  rqc, cqn, to_mcq(init_attr->cq)->mcq.cqn);
        MLX5_SET(rqc,  rqc, state, MLX5_RQC_STATE_RST);
index 0eb6a7a..9ea5422 100644 (file)
@@ -1244,7 +1244,8 @@ static int qedr_check_qp_attrs(struct ib_pd *ibpd, struct qedr_dev *dev,
         * TGT QP isn't associated with RQ/SQ
         */
        if ((attrs->qp_type != IB_QPT_GSI) && (dev->gsi_qp_created) &&
-           (attrs->qp_type != IB_QPT_XRC_TGT)) {
+           (attrs->qp_type != IB_QPT_XRC_TGT) &&
+           (attrs->qp_type != IB_QPT_XRC_INI)) {
                struct qedr_cq *send_cq = get_qedr_cq(attrs->send_cq);
                struct qedr_cq *recv_cq = get_qedr_cq(attrs->recv_cq);
 
index 4521490..06b8dc5 100644 (file)
@@ -4,6 +4,7 @@ config RDMA_RXE
        depends on INET && PCI && INFINIBAND
        depends on INFINIBAND_VIRT_DMA
        select NET_UDP_TUNNEL
+       select CRYPTO
        select CRYPTO_CRC32
        help
        This driver implements the InfiniBand RDMA transport over
index a8ac791..17a361b 100644 (file)
@@ -547,6 +547,7 @@ int rxe_completer(void *arg)
        struct sk_buff *skb = NULL;
        struct rxe_pkt_info *pkt = NULL;
        enum comp_state state;
+       int ret = 0;
 
        rxe_add_ref(qp);
 
@@ -554,7 +555,8 @@ int rxe_completer(void *arg)
            qp->req.state == QP_STATE_RESET) {
                rxe_drain_resp_pkts(qp, qp->valid &&
                                    qp->req.state == QP_STATE_ERROR);
-               goto exit;
+               ret = -EAGAIN;
+               goto done;
        }
 
        if (qp->comp.timeout) {
@@ -564,8 +566,10 @@ int rxe_completer(void *arg)
                qp->comp.timeout_retry = 0;
        }
 
-       if (qp->req.need_retry)
-               goto exit;
+       if (qp->req.need_retry) {
+               ret = -EAGAIN;
+               goto done;
+       }
 
        state = COMPST_GET_ACK;
 
@@ -636,8 +640,6 @@ int rxe_completer(void *arg)
                        break;
 
                case COMPST_DONE:
-                       if (pkt)
-                               free_pkt(pkt);
                        goto done;
 
                case COMPST_EXIT:
@@ -660,7 +662,8 @@ int rxe_completer(void *arg)
                            qp->qp_timeout_jiffies)
                                mod_timer(&qp->retrans_timer,
                                          jiffies + qp->qp_timeout_jiffies);
-                       goto exit;
+                       ret = -EAGAIN;
+                       goto done;
 
                case COMPST_ERROR_RETRY:
                        /* we come here if the retry timer fired and we did
@@ -672,18 +675,18 @@ int rxe_completer(void *arg)
                         */
 
                        /* there is nothing to retry in this case */
-                       if (!wqe || (wqe->state == wqe_state_posted))
-                               goto exit;
+                       if (!wqe || (wqe->state == wqe_state_posted)) {
+                               pr_warn("Retry attempted without a valid wqe\n");
+                               ret = -EAGAIN;
+                               goto done;
+                       }
 
                        /* if we've started a retry, don't start another
                         * retry sequence, unless this is a timeout.
                         */
                        if (qp->comp.started_retry &&
-                           !qp->comp.timeout_retry) {
-                               if (pkt)
-                                       free_pkt(pkt);
+                           !qp->comp.timeout_retry)
                                goto done;
-                       }
 
                        if (qp->comp.retry_cnt > 0) {
                                if (qp->comp.retry_cnt != 7)
@@ -704,8 +707,6 @@ int rxe_completer(void *arg)
                                        qp->comp.started_retry = 1;
                                        rxe_run_task(&qp->req.task, 0);
                                }
-                               if (pkt)
-                                       free_pkt(pkt);
                                goto done;
 
                        } else {
@@ -726,8 +727,8 @@ int rxe_completer(void *arg)
                                mod_timer(&qp->rnr_nak_timer,
                                          jiffies + rnrnak_jiffies(aeth_syn(pkt)
                                                & ~AETH_TYPE_MASK));
-                               free_pkt(pkt);
-                               goto exit;
+                               ret = -EAGAIN;
+                               goto done;
                        } else {
                                rxe_counter_inc(rxe,
                                                RXE_CNT_RNR_RETRY_EXCEEDED);
@@ -740,25 +741,15 @@ int rxe_completer(void *arg)
                        WARN_ON_ONCE(wqe->status == IB_WC_SUCCESS);
                        do_complete(qp, wqe);
                        rxe_qp_error(qp);
-                       if (pkt)
-                               free_pkt(pkt);
-                       goto exit;
+                       ret = -EAGAIN;
+                       goto done;
                }
        }
 
-exit:
-       /* we come here if we are done with processing and want the task to
-        * exit from the loop calling us
-        */
-       WARN_ON_ONCE(skb);
-       rxe_drop_ref(qp);
-       return -EAGAIN;
-
 done:
-       /* we come here if we have processed a packet we want the task to call
-        * us again to see if there is anything else to do
-        */
-       WARN_ON_ONCE(skb);
+       if (pkt)
+               free_pkt(pkt);
        rxe_drop_ref(qp);
-       return 0;
+
+       return ret;
 }
index 0701bd1..0166272 100644 (file)
@@ -407,14 +407,22 @@ int rxe_send(struct rxe_pkt_info *pkt, struct sk_buff *skb)
        return 0;
 }
 
+/* fix up a send packet to match the packets
+ * received from UDP before looping them back
+ */
 void rxe_loopback(struct sk_buff *skb)
 {
+       struct rxe_pkt_info *pkt = SKB_TO_PKT(skb);
+
        if (skb->protocol == htons(ETH_P_IP))
                skb_pull(skb, sizeof(struct iphdr));
        else
                skb_pull(skb, sizeof(struct ipv6hdr));
 
-       rxe_rcv(skb);
+       if (WARN_ON(!ib_device_try_get(&pkt->rxe->ib_dev)))
+               kfree_skb(skb);
+       else
+               rxe_rcv(skb);
 }
 
 struct sk_buff *rxe_init_packet(struct rxe_dev *rxe, struct rxe_av *av,
index 45d2f71..7a49e27 100644 (file)
@@ -237,8 +237,6 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
        struct rxe_mc_elem *mce;
        struct rxe_qp *qp;
        union ib_gid dgid;
-       struct sk_buff *per_qp_skb;
-       struct rxe_pkt_info *per_qp_pkt;
        int err;
 
        if (skb->protocol == htons(ETH_P_IP))
@@ -250,10 +248,15 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
        /* lookup mcast group corresponding to mgid, takes a ref */
        mcg = rxe_pool_get_key(&rxe->mc_grp_pool, &dgid);
        if (!mcg)
-               goto err1;      /* mcast group not registered */
+               goto drop;      /* mcast group not registered */
 
        spin_lock_bh(&mcg->mcg_lock);
 
+       /* this is unreliable datagram service so we let
+        * failures to deliver a multicast packet to a
+        * single QP happen and just move on and try
+        * the rest of them on the list
+        */
        list_for_each_entry(mce, &mcg->qp_list, qp_list) {
                qp = mce->qp;
 
@@ -266,39 +269,47 @@ static void rxe_rcv_mcast_pkt(struct rxe_dev *rxe, struct sk_buff *skb)
                if (err)
                        continue;
 
-               /* for all but the last qp create a new clone of the
-                * skb and pass to the qp. If an error occurs in the
-                * checks for the last qp in the list we need to
-                * free the skb since it hasn't been passed on to
-                * rxe_rcv_pkt() which would free it later.
+               /* for all but the last QP create a new clone of the
+                * skb and pass to the QP. Pass the original skb to
+                * the last QP in the list.
                 */
                if (mce->qp_list.next != &mcg->qp_list) {
-                       per_qp_skb = skb_clone(skb, GFP_ATOMIC);
-                       if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
-                               kfree_skb(per_qp_skb);
+                       struct sk_buff *cskb;
+                       struct rxe_pkt_info *cpkt;
+
+                       cskb = skb_clone(skb, GFP_ATOMIC);
+                       if (unlikely(!cskb))
                                continue;
+
+                       if (WARN_ON(!ib_device_try_get(&rxe->ib_dev))) {
+                               kfree_skb(cskb);
+                               break;
                        }
+
+                       cpkt = SKB_TO_PKT(cskb);
+                       cpkt->qp = qp;
+                       rxe_add_ref(qp);
+                       rxe_rcv_pkt(cpkt, cskb);
                } else {
-                       per_qp_skb = skb;
-                       /* show we have consumed the skb */
-                       skb = NULL;
+                       pkt->qp = qp;
+                       rxe_add_ref(qp);
+                       rxe_rcv_pkt(pkt, skb);
+                       skb = NULL;     /* mark consumed */
                }
-
-               if (unlikely(!per_qp_skb))
-                       continue;
-
-               per_qp_pkt = SKB_TO_PKT(per_qp_skb);
-               per_qp_pkt->qp = qp;
-               rxe_add_ref(qp);
-               rxe_rcv_pkt(per_qp_pkt, per_qp_skb);
        }
 
        spin_unlock_bh(&mcg->mcg_lock);
 
        rxe_drop_ref(mcg);      /* drop ref from rxe_pool_get_key. */
 
-err1:
-       /* free skb if not consumed */
+       if (likely(!skb))
+               return;
+
+       /* This only occurs if one of the checks fails on the last
+        * QP in the list above
+        */
+
+drop:
        kfree_skb(skb);
        ib_device_put(&rxe->ib_dev);
 }
index 0a08b4b..6734329 100644 (file)
@@ -2720,8 +2720,8 @@ void rtrs_clt_close(struct rtrs_clt *clt)
 
        /* Now it is safe to iterate over all paths without locks */
        list_for_each_entry_safe(sess, tmp, &clt->paths_list, s.entry) {
-               rtrs_clt_destroy_sess_files(sess, NULL);
                rtrs_clt_close_conns(sess, true);
+               rtrs_clt_destroy_sess_files(sess, NULL);
                kobject_put(&sess->kobj);
        }
        free_clt(clt);
index a2b5fbb..da8963a 100644 (file)
@@ -26,7 +26,6 @@
 
 MODULE_AUTHOR("Vojtech Pavlik <vojtech@ucw.cz>");
 MODULE_DESCRIPTION("Joystick device interfaces");
-MODULE_SUPPORTED_DEVICE("input/js");
 MODULE_LICENSE("GPL");
 
 #define JOYDEV_MINOR_BASE      0
@@ -456,7 +455,7 @@ static int joydev_handle_JSIOCSAXMAP(struct joydev *joydev,
        if (IS_ERR(abspam))
                return PTR_ERR(abspam);
 
-       for (i = 0; i < joydev->nabs; i++) {
+       for (i = 0; i < len && i < joydev->nabs; i++) {
                if (abspam[i] > ABS_MAX) {
                        retval = -EINVAL;
                        goto out;
@@ -480,6 +479,9 @@ static int joydev_handle_JSIOCSBTNMAP(struct joydev *joydev,
        int i;
        int retval = 0;
 
+       if (len % sizeof(*keypam))
+               return -EINVAL;
+
        len = min(len, sizeof(joydev->keypam));
 
        /* Validate the map. */
@@ -487,7 +489,7 @@ static int joydev_handle_JSIOCSBTNMAP(struct joydev *joydev,
        if (IS_ERR(keypam))
                return PTR_ERR(keypam);
 
-       for (i = 0; i < joydev->nkey; i++) {
+       for (i = 0; i < (len / 2) && i < joydev->nkey; i++) {
                if (keypam[i] > KEY_MAX || keypam[i] < BTN_MISC) {
                        retval = -EINVAL;
                        goto out;
index b080f0c..5e38899 100644 (file)
@@ -382,4 +382,11 @@ config JOYSTICK_FSIA6B
          To compile this driver as a module, choose M here: the
          module will be called fsia6b.
 
+config JOYSTICK_N64
+       bool "N64 controller"
+       depends on MACH_NINTENDO64
+       help
+         Say Y here if you want enable support for the four
+         built-in controller ports on the Nintendo 64 console.
+
 endif
index 58232b3..31d720c 100644 (file)
@@ -24,6 +24,7 @@ obj-$(CONFIG_JOYSTICK_INTERACT)               += interact.o
 obj-$(CONFIG_JOYSTICK_JOYDUMP)         += joydump.o
 obj-$(CONFIG_JOYSTICK_MAGELLAN)                += magellan.o
 obj-$(CONFIG_JOYSTICK_MAPLE)           += maplecontrol.o
+obj-$(CONFIG_JOYSTICK_N64)             += n64joy.o
 obj-$(CONFIG_JOYSTICK_PSXPAD_SPI)      += psxpad-spi.o
 obj-$(CONFIG_JOYSTICK_PXRC)            += pxrc.o
 obj-$(CONFIG_JOYSTICK_SIDEWINDER)      += sidewinder.o
@@ -37,4 +38,3 @@ obj-$(CONFIG_JOYSTICK_WARRIOR)                += warrior.o
 obj-$(CONFIG_JOYSTICK_WALKERA0701)     += walkera0701.o
 obj-$(CONFIG_JOYSTICK_XPAD)            += xpad.o
 obj-$(CONFIG_JOYSTICK_ZHENHUA)         += zhenhua.o
-
diff --git a/drivers/input/joystick/n64joy.c b/drivers/input/joystick/n64joy.c
new file mode 100644 (file)
index 0000000..8bcc529
--- /dev/null
@@ -0,0 +1,345 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Support for the four N64 controllers.
+ *
+ * Copyright (c) 2021 Lauri Kasanen
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/input.h>
+#include <linux/limits.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+#include <linux/timer.h>
+
+MODULE_AUTHOR("Lauri Kasanen <cand@gmx.com>");
+MODULE_DESCRIPTION("Driver for N64 controllers");
+MODULE_LICENSE("GPL");
+
+#define PIF_RAM 0x1fc007c0
+
+#define SI_DRAM_REG 0
+#define SI_READ_REG 1
+#define SI_WRITE_REG 4
+#define SI_STATUS_REG 6
+
+#define SI_STATUS_DMA_BUSY  BIT(0)
+#define SI_STATUS_IO_BUSY   BIT(1)
+
+#define N64_CONTROLLER_ID 0x0500
+
+#define MAX_CONTROLLERS 4
+
+static const char *n64joy_phys[MAX_CONTROLLERS] = {
+       "n64joy/port0",
+       "n64joy/port1",
+       "n64joy/port2",
+       "n64joy/port3",
+};
+
+struct n64joy_priv {
+       u64 si_buf[8] ____cacheline_aligned;
+       struct timer_list timer;
+       struct mutex n64joy_mutex;
+       struct input_dev *n64joy_dev[MAX_CONTROLLERS];
+       u32 __iomem *reg_base;
+       u8 n64joy_opened;
+};
+
+struct joydata {
+       unsigned int: 16; /* unused */
+       unsigned int err: 2;
+       unsigned int: 14; /* unused */
+
+       union {
+               u32 data;
+
+               struct {
+                       unsigned int a: 1;
+                       unsigned int b: 1;
+                       unsigned int z: 1;
+                       unsigned int start: 1;
+                       unsigned int up: 1;
+                       unsigned int down: 1;
+                       unsigned int left: 1;
+                       unsigned int right: 1;
+                       unsigned int: 2; /* unused */
+                       unsigned int l: 1;
+                       unsigned int r: 1;
+                       unsigned int c_up: 1;
+                       unsigned int c_down: 1;
+                       unsigned int c_left: 1;
+                       unsigned int c_right: 1;
+                       signed int x: 8;
+                       signed int y: 8;
+               };
+       };
+};
+
+static void n64joy_write_reg(u32 __iomem *reg_base, const u8 reg, const u32 value)
+{
+       writel(value, reg_base + reg);
+}
+
+static u32 n64joy_read_reg(u32 __iomem *reg_base, const u8 reg)
+{
+       return readl(reg_base + reg);
+}
+
+static void n64joy_wait_si_dma(u32 __iomem *reg_base)
+{
+       while (n64joy_read_reg(reg_base, SI_STATUS_REG) &
+              (SI_STATUS_DMA_BUSY | SI_STATUS_IO_BUSY))
+               cpu_relax();
+}
+
+static void n64joy_exec_pif(struct n64joy_priv *priv, const u64 in[8])
+{
+       unsigned long flags;
+
+       dma_cache_wback_inv((unsigned long) in, 8 * 8);
+       dma_cache_inv((unsigned long) priv->si_buf, 8 * 8);
+
+       local_irq_save(flags);
+
+       n64joy_wait_si_dma(priv->reg_base);
+
+       barrier();
+       n64joy_write_reg(priv->reg_base, SI_DRAM_REG, virt_to_phys(in));
+       barrier();
+       n64joy_write_reg(priv->reg_base, SI_WRITE_REG, PIF_RAM);
+       barrier();
+
+       n64joy_wait_si_dma(priv->reg_base);
+
+       barrier();
+       n64joy_write_reg(priv->reg_base, SI_DRAM_REG, virt_to_phys(priv->si_buf));
+       barrier();
+       n64joy_write_reg(priv->reg_base, SI_READ_REG, PIF_RAM);
+       barrier();
+
+       n64joy_wait_si_dma(priv->reg_base);
+
+       local_irq_restore(flags);
+}
+
+static const u64 polldata[] ____cacheline_aligned = {
+       0xff010401ffffffff,
+       0xff010401ffffffff,
+       0xff010401ffffffff,
+       0xff010401ffffffff,
+       0xfe00000000000000,
+       0,
+       0,
+       1
+};
+
+static void n64joy_poll(struct timer_list *t)
+{
+       const struct joydata *data;
+       struct n64joy_priv *priv = container_of(t, struct n64joy_priv, timer);
+       struct input_dev *dev;
+       u32 i;
+
+       n64joy_exec_pif(priv, polldata);
+
+       data = (struct joydata *) priv->si_buf;
+
+       for (i = 0; i < MAX_CONTROLLERS; i++) {
+               if (!priv->n64joy_dev[i])
+                       continue;
+
+               dev = priv->n64joy_dev[i];
+
+               /* d-pad */
+               input_report_key(dev, BTN_DPAD_UP, data[i].up);
+               input_report_key(dev, BTN_DPAD_DOWN, data[i].down);
+               input_report_key(dev, BTN_DPAD_LEFT, data[i].left);
+               input_report_key(dev, BTN_DPAD_RIGHT, data[i].right);
+
+               /* c buttons */
+               input_report_key(dev, BTN_FORWARD, data[i].c_up);
+               input_report_key(dev, BTN_BACK, data[i].c_down);
+               input_report_key(dev, BTN_LEFT, data[i].c_left);
+               input_report_key(dev, BTN_RIGHT, data[i].c_right);
+
+               /* matching buttons */
+               input_report_key(dev, BTN_START, data[i].start);
+               input_report_key(dev, BTN_Z, data[i].z);
+
+               /* remaining ones: a, b, l, r */
+               input_report_key(dev, BTN_0, data[i].a);
+               input_report_key(dev, BTN_1, data[i].b);
+               input_report_key(dev, BTN_2, data[i].l);
+               input_report_key(dev, BTN_3, data[i].r);
+
+               input_report_abs(dev, ABS_X, data[i].x);
+               input_report_abs(dev, ABS_Y, data[i].y);
+
+               input_sync(dev);
+       }
+
+       mod_timer(&priv->timer, jiffies + msecs_to_jiffies(16));
+}
+
+static int n64joy_open(struct input_dev *dev)
+{
+       struct n64joy_priv *priv = input_get_drvdata(dev);
+       int err;
+
+       err = mutex_lock_interruptible(&priv->n64joy_mutex);
+       if (err)
+               return err;
+
+       if (!priv->n64joy_opened) {
+               /*
+                * We could use the vblank irq, but it's not important if
+                * the poll point slightly changes.
+                */
+               timer_setup(&priv->timer, n64joy_poll, 0);
+               mod_timer(&priv->timer, jiffies + msecs_to_jiffies(16));
+       }
+
+       priv->n64joy_opened++;
+
+       mutex_unlock(&priv->n64joy_mutex);
+       return err;
+}
+
+static void n64joy_close(struct input_dev *dev)
+{
+       struct n64joy_priv *priv = input_get_drvdata(dev);
+
+       mutex_lock(&priv->n64joy_mutex);
+       if (!--priv->n64joy_opened)
+               del_timer_sync(&priv->timer);
+       mutex_unlock(&priv->n64joy_mutex);
+}
+
+static const u64 __initconst scandata[] ____cacheline_aligned = {
+       0xff010300ffffffff,
+       0xff010300ffffffff,
+       0xff010300ffffffff,
+       0xff010300ffffffff,
+       0xfe00000000000000,
+       0,
+       0,
+       1
+};
+
+/*
+ * The target device is embedded and RAM-constrained. We save RAM
+ * by initializing in __init code that gets dropped late in boot.
+ * For the same reason there is no module or unloading support.
+ */
+static int __init n64joy_probe(struct platform_device *pdev)
+{
+       const struct joydata *data;
+       struct n64joy_priv *priv;
+       struct input_dev *dev;
+       int err = 0;
+       u32 i, j, found = 0;
+
+       priv = kzalloc(sizeof(struct n64joy_priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       mutex_init(&priv->n64joy_mutex);
+
+       priv->reg_base = devm_platform_ioremap_resource(pdev, 0);
+       if (!priv->reg_base) {
+               err = -EINVAL;
+               goto fail;
+       }
+
+       /* The controllers are not hotpluggable, so we can scan in init */
+       n64joy_exec_pif(priv, scandata);
+
+       data = (struct joydata *) priv->si_buf;
+
+       for (i = 0; i < MAX_CONTROLLERS; i++) {
+               if (!data[i].err && data[i].data >> 16 == N64_CONTROLLER_ID) {
+                       found++;
+
+                       dev = priv->n64joy_dev[i] = input_allocate_device();
+                       if (!priv->n64joy_dev[i]) {
+                               err = -ENOMEM;
+                               goto fail;
+                       }
+
+                       input_set_drvdata(dev, priv);
+
+                       dev->name = "N64 controller";
+                       dev->phys = n64joy_phys[i];
+                       dev->id.bustype = BUS_HOST;
+                       dev->id.vendor = 0;
+                       dev->id.product = data[i].data >> 16;
+                       dev->id.version = 0;
+                       dev->dev.parent = &pdev->dev;
+
+                       dev->open = n64joy_open;
+                       dev->close = n64joy_close;
+
+                       /* d-pad */
+                       input_set_capability(dev, EV_KEY, BTN_DPAD_UP);
+                       input_set_capability(dev, EV_KEY, BTN_DPAD_DOWN);
+                       input_set_capability(dev, EV_KEY, BTN_DPAD_LEFT);
+                       input_set_capability(dev, EV_KEY, BTN_DPAD_RIGHT);
+                       /* c buttons */
+                       input_set_capability(dev, EV_KEY, BTN_LEFT);
+                       input_set_capability(dev, EV_KEY, BTN_RIGHT);
+                       input_set_capability(dev, EV_KEY, BTN_FORWARD);
+                       input_set_capability(dev, EV_KEY, BTN_BACK);
+                       /* matching buttons */
+                       input_set_capability(dev, EV_KEY, BTN_START);
+                       input_set_capability(dev, EV_KEY, BTN_Z);
+                       /* remaining ones: a, b, l, r */
+                       input_set_capability(dev, EV_KEY, BTN_0);
+                       input_set_capability(dev, EV_KEY, BTN_1);
+                       input_set_capability(dev, EV_KEY, BTN_2);
+                       input_set_capability(dev, EV_KEY, BTN_3);
+
+                       for (j = 0; j < 2; j++)
+                               input_set_abs_params(dev, ABS_X + j,
+                                                    S8_MIN, S8_MAX, 0, 0);
+
+                       err = input_register_device(dev);
+                       if (err) {
+                               input_free_device(dev);
+                               goto fail;
+                       }
+               }
+       }
+
+       pr_info("%u controller(s) connected\n", found);
+
+       if (!found)
+               return -ENODEV;
+
+       return 0;
+fail:
+       for (i = 0; i < MAX_CONTROLLERS; i++) {
+               if (!priv->n64joy_dev[i])
+                       continue;
+               input_unregister_device(priv->n64joy_dev[i]);
+       }
+       return err;
+}
+
+static struct platform_driver n64joy_driver = {
+       .driver = {
+               .name = "n64joy",
+       },
+};
+
+static int __init n64joy_init(void)
+{
+       return platform_driver_probe(&n64joy_driver, n64joy_probe);
+}
+
+module_init(n64joy_init);
index 8cc8ca4..9f0d07d 100644 (file)
@@ -305,6 +305,7 @@ static const struct xpad_device {
        { 0x1bad, 0xfd00, "Razer Onza TE", 0, XTYPE_XBOX360 },
        { 0x1bad, 0xfd01, "Razer Onza", 0, XTYPE_XBOX360 },
        { 0x20d6, 0x2001, "BDA Xbox Series X Wired Controller", 0, XTYPE_XBOXONE },
+       { 0x20d6, 0x2009, "PowerA Enhanced Wired Controller for Xbox Series X|S", 0, XTYPE_XBOXONE },
        { 0x20d6, 0x281f, "PowerA Wired Controller For Xbox 360", 0, XTYPE_XBOX360 },
        { 0x2e24, 0x0652, "Hyperkin Duke X-Box One pad", 0, XTYPE_XBOXONE },
        { 0x24c6, 0x5000, "Razer Atrox Arcade Stick", MAP_TRIGGERS_TO_BUTTONS, XTYPE_XBOX360 },
index 2b321c1..32d1580 100644 (file)
@@ -446,7 +446,7 @@ config KEYBOARD_MPR121
 
 config KEYBOARD_SNVS_PWRKEY
        tristate "IMX SNVS Power Key Driver"
-       depends on ARCH_MXC || COMPILE_TEST
+       depends on ARCH_MXC || (COMPILE_TEST && HAS_IOMEM)
        depends on OF
        help
          This is the snvs powerkey driver for the Freescale i.MX application
@@ -685,7 +685,7 @@ config KEYBOARD_OMAP
 
 config KEYBOARD_OMAP4
        tristate "TI OMAP4+ keypad support"
-       depends on OF || ARCH_OMAP2PLUS
+       depends on (OF && HAS_IOMEM) || ARCH_OMAP2PLUS
        select INPUT_MATRIXKMAP
        help
          Say Y here if you want to use the OMAP4+ keypad.
@@ -773,7 +773,7 @@ config KEYBOARD_CAP11XX
 
 config KEYBOARD_BCM
        tristate "Broadcom keypad driver"
-       depends on OF && HAVE_CLK
+       depends on OF && HAVE_CLK && HAS_IOMEM
        select INPUT_MATRIXKMAP
        default ARCH_BCM_CYGNUS
        help
index d222231..eda1b23 100644 (file)
@@ -48,6 +48,7 @@
 #include <linux/efi.h>
 #include <linux/input.h>
 #include <linux/input/mt.h>
+#include <linux/ktime.h>
 #include <linux/leds.h>
 #include <linux/module.h>
 #include <linux/spinlock.h>
@@ -409,7 +410,7 @@ struct applespi_data {
        unsigned int                    cmd_msg_cntr;
        /* lock to protect the above parameters and flags below */
        spinlock_t                      cmd_msg_lock;
-       bool                            cmd_msg_queued;
+       ktime_t                         cmd_msg_queued;
        enum applespi_evt_type          cmd_evt_type;
 
        struct led_classdev             backlight_info;
@@ -729,7 +730,7 @@ static void applespi_msg_complete(struct applespi_data *applespi,
                wake_up_all(&applespi->drain_complete);
 
        if (is_write_msg) {
-               applespi->cmd_msg_queued = false;
+               applespi->cmd_msg_queued = 0;
                applespi_send_cmd_msg(applespi);
        }
 
@@ -748,6 +749,8 @@ static void applespi_async_write_complete(void *context)
                                         applespi->tx_status,
                                         APPLESPI_STATUS_SIZE);
 
+       udelay(SPI_RW_CHG_DELAY_US);
+
        if (!applespi_check_write_status(applespi, applespi->wr_m.status)) {
                /*
                 * If we got an error, we presumably won't get the expected
@@ -771,8 +774,16 @@ static int applespi_send_cmd_msg(struct applespi_data *applespi)
                return 0;
 
        /* check whether send is in progress */
-       if (applespi->cmd_msg_queued)
-               return 0;
+       if (applespi->cmd_msg_queued) {
+               if (ktime_ms_delta(ktime_get(), applespi->cmd_msg_queued) < 1000)
+                       return 0;
+
+               dev_warn(&applespi->spi->dev, "Command %d timed out\n",
+                        applespi->cmd_evt_type);
+
+               applespi->cmd_msg_queued = 0;
+               applespi->write_active = false;
+       }
 
        /* set up packet */
        memset(packet, 0, APPLESPI_PACKET_SIZE);
@@ -869,7 +880,7 @@ static int applespi_send_cmd_msg(struct applespi_data *applespi)
                return sts;
        }
 
-       applespi->cmd_msg_queued = true;
+       applespi->cmd_msg_queued = ktime_get_coarse();
        applespi->write_active = true;
 
        return 0;
@@ -1921,7 +1932,7 @@ static int __maybe_unused applespi_resume(struct device *dev)
        applespi->drain = false;
        applespi->have_cl_led_on = false;
        applespi->have_bl_level = 0;
-       applespi->cmd_msg_queued = false;
+       applespi->cmd_msg_queued = 0;
        applespi->read_active = false;
        applespi->write_active = false;
 
index b379ed7..38457d9 100644 (file)
@@ -27,6 +27,8 @@
 
 #include <asm/unaligned.h>
 
+#define MAX_NUM_TOP_ROW_KEYS   15
+
 /**
  * struct cros_ec_keyb - Structure representing EC keyboard device
  *
@@ -42,6 +44,9 @@
  * @idev: The input device for the matrix keys.
  * @bs_idev: The input device for non-matrix buttons and switches (or NULL).
  * @notifier: interrupt event notifier for transport devices
+ * @function_row_physmap: An array of the encoded rows/columns for the top
+ *                        row function keys, in an order from left to right
+ * @num_function_row_keys: The number of top row keys in a custom keyboard
  */
 struct cros_ec_keyb {
        unsigned int rows;
@@ -58,6 +63,9 @@ struct cros_ec_keyb {
        struct input_dev *idev;
        struct input_dev *bs_idev;
        struct notifier_block notifier;
+
+       u16 function_row_physmap[MAX_NUM_TOP_ROW_KEYS];
+       size_t num_function_row_keys;
 };
 
 /**
@@ -527,6 +535,11 @@ static int cros_ec_keyb_register_matrix(struct cros_ec_keyb *ckdev)
        struct input_dev *idev;
        const char *phys;
        int err;
+       struct property *prop;
+       const __be32 *p;
+       u16 *physmap;
+       u32 key_pos;
+       int row, col;
 
        err = matrix_keypad_parse_properties(dev, &ckdev->rows, &ckdev->cols);
        if (err)
@@ -578,6 +591,21 @@ static int cros_ec_keyb_register_matrix(struct cros_ec_keyb *ckdev)
        ckdev->idev = idev;
        cros_ec_keyb_compute_valid_keys(ckdev);
 
+       physmap = ckdev->function_row_physmap;
+       of_property_for_each_u32(dev->of_node, "function-row-physmap",
+                                prop, p, key_pos) {
+               if (ckdev->num_function_row_keys == MAX_NUM_TOP_ROW_KEYS) {
+                       dev_warn(dev, "Only support up to %d top row keys\n",
+                                MAX_NUM_TOP_ROW_KEYS);
+                       break;
+               }
+               row = KEY_ROW(key_pos);
+               col = KEY_COL(key_pos);
+               *physmap = MATRIX_SCAN_CODE(row, col, ckdev->row_shift);
+               physmap++;
+               ckdev->num_function_row_keys++;
+       }
+
        err = input_register_device(ckdev->idev);
        if (err) {
                dev_err(dev, "cannot register input device\n");
@@ -587,6 +615,51 @@ static int cros_ec_keyb_register_matrix(struct cros_ec_keyb *ckdev)
        return 0;
 }
 
+static ssize_t function_row_physmap_show(struct device *dev,
+                                        struct device_attribute *attr,
+                                        char *buf)
+{
+       ssize_t size = 0;
+       int i;
+       struct cros_ec_keyb *ckdev = dev_get_drvdata(dev);
+       u16 *physmap = ckdev->function_row_physmap;
+
+       for (i = 0; i < ckdev->num_function_row_keys; i++)
+               size += scnprintf(buf + size, PAGE_SIZE - size,
+                                 "%s%02X", size ? " " : "", physmap[i]);
+       if (size)
+               size += scnprintf(buf + size, PAGE_SIZE - size, "\n");
+
+       return size;
+}
+
+static DEVICE_ATTR_RO(function_row_physmap);
+
+static struct attribute *cros_ec_keyb_attrs[] = {
+       &dev_attr_function_row_physmap.attr,
+       NULL,
+};
+
+static umode_t cros_ec_keyb_attr_is_visible(struct kobject *kobj,
+                                           struct attribute *attr,
+                                           int n)
+{
+       struct device *dev = container_of(kobj, struct device, kobj);
+       struct cros_ec_keyb *ckdev = dev_get_drvdata(dev);
+
+       if (attr == &dev_attr_function_row_physmap.attr &&
+           !ckdev->num_function_row_keys)
+               return 0;
+
+       return attr->mode;
+}
+
+static const struct attribute_group cros_ec_keyb_attr_group = {
+       .is_visible = cros_ec_keyb_attr_is_visible,
+       .attrs = cros_ec_keyb_attrs,
+};
+
+
 static int cros_ec_keyb_probe(struct platform_device *pdev)
 {
        struct cros_ec_device *ec = dev_get_drvdata(pdev->dev.parent);
@@ -617,6 +690,12 @@ static int cros_ec_keyb_probe(struct platform_device *pdev)
                return err;
        }
 
+       err = devm_device_add_group(dev, &cros_ec_keyb_attr_group);
+       if (err) {
+               dev_err(dev, "failed to create attributes. err=%d\n", err);
+               return err;
+       }
+
        ckdev->notifier.notifier_call = cros_ec_keyb_work;
        err = blocking_notifier_chain_register(&ckdev->ec->event_notifier,
                                               &ckdev->notifier);
index b17ac2a..43375b3 100644 (file)
@@ -60,6 +60,8 @@
        ((((dbms) * 1000) / ((1 << ((ptv) + 1)) * (1000000 / 32768))) - 1)
 #define OMAP4_VAL_DEBOUNCINGTIME_16MS                                  \
        OMAP4_KEYPAD_DEBOUNCINGTIME_MS(16, OMAP4_KEYPAD_PTV_DIV_128)
+#define OMAP4_KEYPAD_AUTOIDLE_MS       50      /* Approximate measured time */
+#define OMAP4_KEYPAD_IDLE_CHECK_MS     (OMAP4_KEYPAD_AUTOIDLE_MS / 2)
 
 enum {
        KBD_REVISION_OMAP4 = 0,
@@ -71,6 +73,7 @@ struct omap4_keypad {
 
        void __iomem *base;
        unsigned int irq;
+       struct mutex lock;              /* for key scan */
 
        unsigned int rows;
        unsigned int cols;
@@ -78,7 +81,7 @@ struct omap4_keypad {
        u32 irqreg_offset;
        unsigned int row_shift;
        bool no_autorepeat;
-       unsigned char key_state[8];
+       u64 keys;
        unsigned short *keymap;
 };
 
@@ -107,6 +110,55 @@ static void kbd_write_irqreg(struct omap4_keypad *keypad_data,
                     keypad_data->base + keypad_data->irqreg_offset + offset);
 }
 
+static int omap4_keypad_report_keys(struct omap4_keypad *keypad_data,
+                                   u64 keys, bool down)
+{
+       struct input_dev *input_dev = keypad_data->input;
+       unsigned int col, row, code;
+       DECLARE_BITMAP(mask, 64);
+       unsigned long bit;
+       int events = 0;
+
+       bitmap_from_u64(mask, keys);
+
+       for_each_set_bit(bit, mask, keypad_data->rows * BITS_PER_BYTE) {
+               row = bit / BITS_PER_BYTE;
+               col = bit % BITS_PER_BYTE;
+               code = MATRIX_SCAN_CODE(row, col, keypad_data->row_shift);
+
+               input_event(input_dev, EV_MSC, MSC_SCAN, code);
+               input_report_key(input_dev, keypad_data->keymap[code], down);
+
+               events++;
+       }
+
+       if (events)
+               input_sync(input_dev);
+
+       return events;
+}
+
+static void omap4_keypad_scan_keys(struct omap4_keypad *keypad_data, u64 keys)
+{
+       u64 changed;
+
+       mutex_lock(&keypad_data->lock);
+
+       changed = keys ^ keypad_data->keys;
+
+       /*
+        * Report key up events separately and first. This matters in case we
+        * lost key-up interrupt and just now catching up.
+        */
+       omap4_keypad_report_keys(keypad_data, changed & ~keys, false);
+
+       /* Report key down events */
+       omap4_keypad_report_keys(keypad_data, changed & keys, true);
+
+       keypad_data->keys = keys;
+
+       mutex_unlock(&keypad_data->lock);
+}
 
 /* Interrupt handlers */
 static irqreturn_t omap4_keypad_irq_handler(int irq, void *dev_id)
@@ -122,48 +174,44 @@ static irqreturn_t omap4_keypad_irq_handler(int irq, void *dev_id)
 static irqreturn_t omap4_keypad_irq_thread_fn(int irq, void *dev_id)
 {
        struct omap4_keypad *keypad_data = dev_id;
-       struct input_dev *input_dev = keypad_data->input;
-       unsigned char key_state[ARRAY_SIZE(keypad_data->key_state)];
-       unsigned int col, row, code, changed;
-       u32 *new_state = (u32 *) key_state;
-
-       *new_state = kbd_readl(keypad_data, OMAP4_KBD_FULLCODE31_0);
-       *(new_state + 1) = kbd_readl(keypad_data, OMAP4_KBD_FULLCODE63_32);
-
-       for (row = 0; row < keypad_data->rows; row++) {
-               changed = key_state[row] ^ keypad_data->key_state[row];
-               if (!changed)
-                       continue;
-
-               for (col = 0; col < keypad_data->cols; col++) {
-                       if (changed & (1 << col)) {
-                               code = MATRIX_SCAN_CODE(row, col,
-                                               keypad_data->row_shift);
-                               input_event(input_dev, EV_MSC, MSC_SCAN, code);
-                               input_report_key(input_dev,
-                                                keypad_data->keymap[code],
-                                                key_state[row] & (1 << col));
-                       }
-               }
+       struct device *dev = keypad_data->input->dev.parent;
+       u32 low, high;
+       int error;
+       u64 keys;
+
+       error = pm_runtime_get_sync(dev);
+       if (error < 0) {
+               pm_runtime_put_noidle(dev);
+               return IRQ_NONE;
        }
 
-       input_sync(input_dev);
+       low = kbd_readl(keypad_data, OMAP4_KBD_FULLCODE31_0);
+       high = kbd_readl(keypad_data, OMAP4_KBD_FULLCODE63_32);
+       keys = low | (u64)high << 32;
 
-       memcpy(keypad_data->key_state, key_state,
-               sizeof(keypad_data->key_state));
+       omap4_keypad_scan_keys(keypad_data, keys);
 
        /* clear pending interrupts */
        kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS,
                         kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS));
 
+       pm_runtime_mark_last_busy(dev);
+       pm_runtime_put_autosuspend(dev);
+
        return IRQ_HANDLED;
 }
 
 static int omap4_keypad_open(struct input_dev *input)
 {
        struct omap4_keypad *keypad_data = input_get_drvdata(input);
+       struct device *dev = input->dev.parent;
+       int error;
 
-       pm_runtime_get_sync(input->dev.parent);
+       error = pm_runtime_get_sync(dev);
+       if (error < 0) {
+               pm_runtime_put_noidle(dev);
+               return error;
+       }
 
        disable_irq(keypad_data->irq);
 
@@ -176,13 +224,15 @@ static int omap4_keypad_open(struct input_dev *input)
        kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS,
                         kbd_read_irqreg(keypad_data, OMAP4_KBD_IRQSTATUS));
        kbd_write_irqreg(keypad_data, OMAP4_KBD_IRQENABLE,
-                       OMAP4_DEF_IRQENABLE_EVENTEN |
-                               OMAP4_DEF_IRQENABLE_LONGKEY);
+                       OMAP4_DEF_IRQENABLE_EVENTEN);
        kbd_writel(keypad_data, OMAP4_KBD_WAKEUPENABLE,
-                       OMAP4_DEF_WUP_EVENT_ENA | OMAP4_DEF_WUP_LONG_KEY_ENA);
+                       OMAP4_DEF_WUP_EVENT_ENA);
 
        enable_irq(keypad_data->irq);
 
+       pm_runtime_mark_last_busy(dev);
+       pm_runtime_put_autosuspend(dev);
+
        return 0;
 }
 
@@ -200,14 +250,20 @@ static void omap4_keypad_stop(struct omap4_keypad *keypad_data)
 
 static void omap4_keypad_close(struct input_dev *input)
 {
-       struct omap4_keypad *keypad_data;
+       struct omap4_keypad *keypad_data = input_get_drvdata(input);
+       struct device *dev = input->dev.parent;
+       int error;
+
+       error = pm_runtime_get_sync(dev);
+       if (error < 0)
+               pm_runtime_put_noidle(dev);
 
-       keypad_data = input_get_drvdata(input);
        disable_irq(keypad_data->irq);
        omap4_keypad_stop(keypad_data);
        enable_irq(keypad_data->irq);
 
-       pm_runtime_put_sync(input->dev.parent);
+       pm_runtime_mark_last_busy(dev);
+       pm_runtime_put_autosuspend(dev);
 }
 
 static int omap4_keypad_parse_dt(struct device *dev,
@@ -252,8 +308,41 @@ static int omap4_keypad_check_revision(struct device *dev,
        return 0;
 }
 
+/*
+ * Errata ID i689 "1.32 Keyboard Key Up Event Can Be Missed".
+ * Interrupt may not happen for key-up events. We must clear stuck
+ * key-up events after the keyboard hardware has auto-idled.
+ */
+static int __maybe_unused omap4_keypad_runtime_suspend(struct device *dev)
+{
+       struct platform_device *pdev = to_platform_device(dev);
+       struct omap4_keypad *keypad_data = platform_get_drvdata(pdev);
+       u32 active;
+
+       active = kbd_readl(keypad_data, OMAP4_KBD_STATEMACHINE);
+       if (active) {
+               pm_runtime_mark_last_busy(dev);
+               return -EBUSY;
+       }
+
+       omap4_keypad_scan_keys(keypad_data, 0);
+
+       return 0;
+}
+
+static const struct dev_pm_ops omap4_keypad_pm_ops = {
+       SET_RUNTIME_PM_OPS(omap4_keypad_runtime_suspend, NULL, NULL)
+};
+
+static void omap4_disable_pm(void *d)
+{
+       pm_runtime_dont_use_autosuspend(d);
+       pm_runtime_disable(d);
+}
+
 static int omap4_keypad_probe(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
        struct omap4_keypad *keypad_data;
        struct input_dev *input_dev;
        struct resource *res;
@@ -271,63 +360,62 @@ static int omap4_keypad_probe(struct platform_device *pdev)
        if (irq < 0)
                return irq;
 
-       keypad_data = kzalloc(sizeof(struct omap4_keypad), GFP_KERNEL);
+       keypad_data = devm_kzalloc(dev, sizeof(*keypad_data), GFP_KERNEL);
        if (!keypad_data) {
-               dev_err(&pdev->dev, "keypad_data memory allocation failed\n");
+               dev_err(dev, "keypad_data memory allocation failed\n");
                return -ENOMEM;
        }
 
        keypad_data->irq = irq;
+       mutex_init(&keypad_data->lock);
+       platform_set_drvdata(pdev, keypad_data);
 
-       error = omap4_keypad_parse_dt(&pdev->dev, keypad_data);
+       error = omap4_keypad_parse_dt(dev, keypad_data);
        if (error)
-               goto err_free_keypad;
+               return error;
 
-       res = request_mem_region(res->start, resource_size(res), pdev->name);
-       if (!res) {
-               dev_err(&pdev->dev, "can't request mem region\n");
-               error = -EBUSY;
-               goto err_free_keypad;
-       }
+       keypad_data->base = devm_ioremap_resource(dev, res);
+       if (IS_ERR(keypad_data->base))
+               return PTR_ERR(keypad_data->base);
 
-       keypad_data->base = ioremap(res->start, resource_size(res));
-       if (!keypad_data->base) {
-               dev_err(&pdev->dev, "can't ioremap mem resource\n");
-               error = -ENOMEM;
-               goto err_release_mem;
-       }
+       pm_runtime_use_autosuspend(dev);
+       pm_runtime_set_autosuspend_delay(dev, OMAP4_KEYPAD_IDLE_CHECK_MS);
+       pm_runtime_enable(dev);
 
-       pm_runtime_enable(&pdev->dev);
+       error = devm_add_action_or_reset(dev, omap4_disable_pm, dev);
+       if (error) {
+               dev_err(dev, "unable to register cleanup action\n");
+               return error;
+       }
 
        /*
         * Enable clocks for the keypad module so that we can read
         * revision register.
         */
-       error = pm_runtime_get_sync(&pdev->dev);
+       error = pm_runtime_get_sync(dev);
        if (error) {
-               dev_err(&pdev->dev, "pm_runtime_get_sync() failed\n");
-               pm_runtime_put_noidle(&pdev->dev);
-       } else {
-               error = omap4_keypad_check_revision(&pdev->dev,
-                                                   keypad_data);
-               if (!error) {
-                       /* Ensure device does not raise interrupts */
-                       omap4_keypad_stop(keypad_data);
-               }
-               pm_runtime_put_sync(&pdev->dev);
+               dev_err(dev, "pm_runtime_get_sync() failed\n");
+               pm_runtime_put_noidle(dev);
+               return error;
+       }
+
+       error = omap4_keypad_check_revision(dev, keypad_data);
+       if (!error) {
+               /* Ensure device does not raise interrupts */
+               omap4_keypad_stop(keypad_data);
        }
+
+       pm_runtime_mark_last_busy(dev);
+       pm_runtime_put_autosuspend(dev);
        if (error)
-               goto err_pm_disable;
+               return error;
 
        /* input device allocation */
-       keypad_data->input = input_dev = input_allocate_device();
-       if (!input_dev) {
-               error = -ENOMEM;
-               goto err_pm_disable;
-       }
+       keypad_data->input = input_dev = devm_input_allocate_device(dev);
+       if (!input_dev)
+               return -ENOMEM;
 
        input_dev->name = pdev->name;
-       input_dev->dev.parent = &pdev->dev;
        input_dev->id.bustype = BUS_HOST;
        input_dev->id.vendor = 0x0001;
        input_dev->id.product = 0x0001;
@@ -344,84 +432,51 @@ static int omap4_keypad_probe(struct platform_device *pdev)
 
        keypad_data->row_shift = get_count_order(keypad_data->cols);
        max_keys = keypad_data->rows << keypad_data->row_shift;
-       keypad_data->keymap = kcalloc(max_keys,
-                                     sizeof(keypad_data->keymap[0]),
-                                     GFP_KERNEL);
+       keypad_data->keymap = devm_kcalloc(dev,
+                                          max_keys,
+                                          sizeof(keypad_data->keymap[0]),
+                                          GFP_KERNEL);
        if (!keypad_data->keymap) {
-               dev_err(&pdev->dev, "Not enough memory for keymap\n");
-               error = -ENOMEM;
-               goto err_free_input;
+               dev_err(dev, "Not enough memory for keymap\n");
+               return -ENOMEM;
        }
 
        error = matrix_keypad_build_keymap(NULL, NULL,
                                           keypad_data->rows, keypad_data->cols,
                                           keypad_data->keymap, input_dev);
        if (error) {
-               dev_err(&pdev->dev, "failed to build keymap\n");
-               goto err_free_keymap;
+               dev_err(dev, "failed to build keymap\n");
+               return error;
        }
 
-       error = request_threaded_irq(keypad_data->irq, omap4_keypad_irq_handler,
-                                    omap4_keypad_irq_thread_fn, IRQF_ONESHOT,
-                                    "omap4-keypad", keypad_data);
+       error = devm_request_threaded_irq(dev, keypad_data->irq,
+                                         omap4_keypad_irq_handler,
+                                         omap4_keypad_irq_thread_fn,
+                                         IRQF_ONESHOT,
+                                         "omap4-keypad", keypad_data);
        if (error) {
-               dev_err(&pdev->dev, "failed to register interrupt\n");
-               goto err_free_keymap;
+               dev_err(dev, "failed to register interrupt\n");
+               return error;
        }
 
        error = input_register_device(keypad_data->input);
-       if (error < 0) {
-               dev_err(&pdev->dev, "failed to register input device\n");
-               goto err_free_irq;
+       if (error) {
+               dev_err(dev, "failed to register input device\n");
+               return error;
        }
 
-       device_init_wakeup(&pdev->dev, true);
-       error = dev_pm_set_wake_irq(&pdev->dev, keypad_data->irq);
+       device_init_wakeup(dev, true);
+       error = dev_pm_set_wake_irq(dev, keypad_data->irq);
        if (error)
-               dev_warn(&pdev->dev,
-                        "failed to set up wakeup irq: %d\n", error);
-
-       platform_set_drvdata(pdev, keypad_data);
+               dev_warn(dev, "failed to set up wakeup irq: %d\n", error);
 
        return 0;
-
-err_free_irq:
-       free_irq(keypad_data->irq, keypad_data);
-err_free_keymap:
-       kfree(keypad_data->keymap);
-err_free_input:
-       input_free_device(input_dev);
-err_pm_disable:
-       pm_runtime_disable(&pdev->dev);
-       iounmap(keypad_data->base);
-err_release_mem:
-       release_mem_region(res->start, resource_size(res));
-err_free_keypad:
-       kfree(keypad_data);
-       return error;
 }
 
 static int omap4_keypad_remove(struct platform_device *pdev)
 {
-       struct omap4_keypad *keypad_data = platform_get_drvdata(pdev);
-       struct resource *res;
-
        dev_pm_clear_wake_irq(&pdev->dev);
 
-       free_irq(keypad_data->irq, keypad_data);
-
-       pm_runtime_disable(&pdev->dev);
-
-       input_unregister_device(keypad_data->input);
-
-       iounmap(keypad_data->base);
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       release_mem_region(res->start, resource_size(res));
-
-       kfree(keypad_data->keymap);
-       kfree(keypad_data);
-
        return 0;
 }
 
@@ -437,6 +492,7 @@ static struct platform_driver omap4_keypad_driver = {
        .driver         = {
                .name   = "omap4-keypad",
                .of_match_table = omap_keypad_dt_match,
+               .pm = &omap4_keypad_pm_ops,
        },
 };
 module_platform_driver(omap4_keypad_driver);
index 37568b0..b08610d 100644 (file)
@@ -863,6 +863,7 @@ static void da7280_parse_properties(struct device *dev,
                gpi_str3[7] = '0' + i;
                haptics->gpi_ctl[i].polarity = 0;
                error = device_property_read_string(dev, gpi_str3, &str);
+               if (!error)
                        haptics->gpi_ctl[i].polarity =
                                da7280_haptic_of_gpi_pol_str(dev, str);
        }
@@ -1299,11 +1300,13 @@ static int __maybe_unused da7280_resume(struct device *dev)
        return retval;
 }
 
+#ifdef CONFIG_OF
 static const struct of_device_id da7280_of_match[] = {
        { .compatible = "dlg,da7280", },
        { }
 };
 MODULE_DEVICE_TABLE(of, da7280_of_match);
+#endif
 
 static const struct i2c_device_id da7280_i2c_id[] = {
        { "da7280", },
index b067bfd..4a6b33b 100644 (file)
@@ -986,7 +986,7 @@ static void alps_get_finger_coordinate_v7(struct input_mt_pos *mt,
        case V7_PACKET_ID_TWO:
                mt[1].x &= ~0x000F;
                mt[1].y |= 0x000F;
-               /* Detect false-postive touches where x & y report max value */
+               /* Detect false-positive touches where x & y report max value */
                if (mt[1].y == 0x7ff && mt[1].x == 0xff0) {
                        mt[1].x = 0;
                        /* y gets set to 0 at the end of this function */
index 8fb7b43..ffad142 100644 (file)
@@ -1106,8 +1106,11 @@ static void synaptics_process_packet(struct psmouse *psmouse)
                                        num_fingers = hw.w + 2;
                                break;
                        case 2:
-                               if (SYN_MODEL_PEN(info->model_id))
-                                       ;   /* Nothing, treat a pen as a single finger */
+                               /*
+                                * SYN_MODEL_PEN(info->model_id): even if
+                                * the device supports pen, we treat it as
+                                * a single finger.
+                                */
                                break;
                        case 4 ... 15:
                                if (SYN_CAP_PALMDETECT(info->capabilities))
index 0754744..f39b7b3 100644 (file)
@@ -255,7 +255,7 @@ config SERIO_ARC_PS2
 
 config SERIO_APBPS2
        tristate "GRLIB APBPS2 PS/2 keyboard/mouse controller"
-       depends on OF
+       depends on OF && HAS_IOMEM
        help
          Say Y here if you want support for GRLIB APBPS2 peripherals used
          to connect to PS/2 keyboard and/or mouse.
index c74b020..9119e12 100644 (file)
@@ -588,6 +588,10 @@ static const struct dmi_system_id i8042_dmi_noselftest_table[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
                        DMI_MATCH(DMI_CHASSIS_TYPE, "10"), /* Notebook */
                },
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."),
+                       DMI_MATCH(DMI_CHASSIS_TYPE, "31"), /* Convertible Notebook */
+               },
        },
        { }
 };
index e08b0ef..fcb1b64 100644 (file)
@@ -1036,9 +1036,9 @@ static ssize_t show_tabletSize(struct device *dev, struct device_attribute *attr
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%dx%d\n",
-                       input_abs_get_max(aiptek->inputdev, ABS_X) + 1,
-                       input_abs_get_max(aiptek->inputdev, ABS_Y) + 1);
+       return sysfs_emit(buf, "%dx%d\n",
+                         input_abs_get_max(aiptek->inputdev, ABS_X) + 1,
+                         input_abs_get_max(aiptek->inputdev, ABS_Y) + 1);
 }
 
 /* These structs define the sysfs files, param #1 is the name of the
@@ -1064,9 +1064,8 @@ static ssize_t show_tabletPointerMode(struct device *dev, struct device_attribut
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(pointer_mode_map,
-                                       aiptek->curSetting.pointerMode));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(pointer_mode_map,
+                                                     aiptek->curSetting.pointerMode));
 }
 
 static ssize_t
@@ -1101,9 +1100,8 @@ static ssize_t show_tabletCoordinateMode(struct device *dev, struct device_attri
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(coordinate_mode_map,
-                                       aiptek->curSetting.coordinateMode));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(coordinate_mode_map,
+                                                     aiptek->curSetting.coordinateMode));
 }
 
 static ssize_t
@@ -1143,9 +1141,8 @@ static ssize_t show_tabletToolMode(struct device *dev, struct device_attribute *
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(tool_mode_map,
-                                       aiptek->curSetting.toolMode));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(tool_mode_map,
+                                                     aiptek->curSetting.toolMode));
 }
 
 static ssize_t
@@ -1174,10 +1171,9 @@ static ssize_t show_tabletXtilt(struct device *dev, struct device_attribute *att
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
        if (aiptek->curSetting.xTilt == AIPTEK_TILT_DISABLE) {
-               return snprintf(buf, PAGE_SIZE, "disable\n");
+               return sysfs_emit(buf, "disable\n");
        } else {
-               return snprintf(buf, PAGE_SIZE, "%d\n",
-                               aiptek->curSetting.xTilt);
+               return sysfs_emit(buf, "%d\n", aiptek->curSetting.xTilt);
        }
 }
 
@@ -1216,10 +1212,9 @@ static ssize_t show_tabletYtilt(struct device *dev, struct device_attribute *att
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
        if (aiptek->curSetting.yTilt == AIPTEK_TILT_DISABLE) {
-               return snprintf(buf, PAGE_SIZE, "disable\n");
+               return sysfs_emit(buf, "disable\n");
        } else {
-               return snprintf(buf, PAGE_SIZE, "%d\n",
-                               aiptek->curSetting.yTilt);
+               return sysfs_emit(buf, "%d\n", aiptek->curSetting.yTilt);
        }
 }
 
@@ -1257,7 +1252,7 @@ static ssize_t show_tabletJitterDelay(struct device *dev, struct device_attribut
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", aiptek->curSetting.jitterDelay);
+       return sysfs_emit(buf, "%d\n", aiptek->curSetting.jitterDelay);
 }
 
 static ssize_t
@@ -1286,8 +1281,7 @@ static ssize_t show_tabletProgrammableDelay(struct device *dev, struct device_at
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%d\n",
-                       aiptek->curSetting.programmableDelay);
+       return sysfs_emit(buf, "%d\n", aiptek->curSetting.programmableDelay);
 }
 
 static ssize_t
@@ -1316,7 +1310,7 @@ static ssize_t show_tabletEventsReceived(struct device *dev, struct device_attri
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%ld\n", aiptek->eventCount);
+       return sysfs_emit(buf, "%ld\n", aiptek->eventCount);
 }
 
 static DEVICE_ATTR(event_count, S_IRUGO, show_tabletEventsReceived, NULL);
@@ -1355,7 +1349,7 @@ static ssize_t show_tabletDiagnosticMessage(struct device *dev, struct device_at
        default:
                return 0;
        }
-       return snprintf(buf, PAGE_SIZE, retMsg);
+       return sysfs_emit(buf, retMsg);
 }
 
 static DEVICE_ATTR(diagnostic, S_IRUGO, show_tabletDiagnosticMessage, NULL);
@@ -1375,9 +1369,8 @@ static ssize_t show_tabletStylusUpper(struct device *dev, struct device_attribut
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(stylus_button_map,
-                                       aiptek->curSetting.stylusButtonUpper));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(stylus_button_map,
+                                                     aiptek->curSetting.stylusButtonUpper));
 }
 
 static ssize_t
@@ -1406,9 +1399,8 @@ static ssize_t show_tabletStylusLower(struct device *dev, struct device_attribut
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(stylus_button_map,
-                                       aiptek->curSetting.stylusButtonLower));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(stylus_button_map,
+                                                     aiptek->curSetting.stylusButtonLower));
 }
 
 static ssize_t
@@ -1444,9 +1436,8 @@ static ssize_t show_tabletMouseLeft(struct device *dev, struct device_attribute
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(mouse_button_map,
-                                       aiptek->curSetting.mouseButtonLeft));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map,
+                                                     aiptek->curSetting.mouseButtonLeft));
 }
 
 static ssize_t
@@ -1474,9 +1465,8 @@ static ssize_t show_tabletMouseMiddle(struct device *dev, struct device_attribut
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(mouse_button_map,
-                                       aiptek->curSetting.mouseButtonMiddle));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map,
+                                                     aiptek->curSetting.mouseButtonMiddle));
 }
 
 static ssize_t
@@ -1504,9 +1494,8 @@ static ssize_t show_tabletMouseRight(struct device *dev, struct device_attribute
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%s\n",
-                       map_val_to_str(mouse_button_map,
-                                       aiptek->curSetting.mouseButtonRight));
+       return sysfs_emit(buf, "%s\n", map_val_to_str(mouse_button_map,
+                                                     aiptek->curSetting.mouseButtonRight));
 }
 
 static ssize_t
@@ -1535,10 +1524,9 @@ static ssize_t show_tabletWheel(struct device *dev, struct device_attribute *att
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
        if (aiptek->curSetting.wheel == AIPTEK_WHEEL_DISABLE) {
-               return snprintf(buf, PAGE_SIZE, "disable\n");
+               return sysfs_emit(buf, "disable\n");
        } else {
-               return snprintf(buf, PAGE_SIZE, "%d\n",
-                               aiptek->curSetting.wheel);
+               return sysfs_emit(buf, "%d\n", aiptek->curSetting.wheel);
        }
 }
 
@@ -1568,8 +1556,7 @@ static ssize_t show_tabletExecute(struct device *dev, struct device_attribute *a
        /* There is nothing useful to display, so a one-line manual
         * is in order...
         */
-       return snprintf(buf, PAGE_SIZE,
-                       "Write anything to this file to program your tablet.\n");
+       return sysfs_emit(buf, "Write anything to this file to program your tablet.\n");
 }
 
 static ssize_t
@@ -1600,7 +1587,7 @@ static ssize_t show_tabletODMCode(struct device *dev, struct device_attribute *a
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "0x%04x\n", aiptek->features.odmCode);
+       return sysfs_emit(buf, "0x%04x\n", aiptek->features.odmCode);
 }
 
 static DEVICE_ATTR(odm_code, S_IRUGO, show_tabletODMCode, NULL);
@@ -1613,7 +1600,7 @@ static ssize_t show_tabletModelCode(struct device *dev, struct device_attribute
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "0x%04x\n", aiptek->features.modelCode);
+       return sysfs_emit(buf, "0x%04x\n", aiptek->features.modelCode);
 }
 
 static DEVICE_ATTR(model_code, S_IRUGO, show_tabletModelCode, NULL);
@@ -1626,8 +1613,7 @@ static ssize_t show_firmwareCode(struct device *dev, struct device_attribute *at
 {
        struct aiptek *aiptek = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%04x\n",
-                       aiptek->features.firmwareCode);
+       return sysfs_emit(buf, "%04x\n", aiptek->features.firmwareCode);
 }
 
 static DEVICE_ATTR(firmware_code, S_IRUGO, show_firmwareCode, NULL);
index cc18f54..529614d 100644 (file)
@@ -608,7 +608,7 @@ config TOUCHSCREEN_MTOUCH
 
 config TOUCHSCREEN_IMX6UL_TSC
        tristate "Freescale i.MX6UL touchscreen controller"
-       depends on (OF && GPIOLIB) || COMPILE_TEST
+       depends on ((OF && GPIOLIB) || COMPILE_TEST) && HAS_IOMEM
        help
          Say Y here if you have a Freescale i.MX6UL, and want to
          use the internal touchscreen controller.
index a703870..f113a27 100644 (file)
 
 struct ads7846_buf {
        u8 cmd;
-       /*
-        * This union is a temporary hack. The driver does an in-place
-        * endianness conversion. This will be cleaned up in the next
-        * patch.
-        */
-       union {
-               __be16 data_be16;
-               u16 data;
-       };
+       __be16 data;
 } __packed;
 
-
-struct ts_event {
-       bool ignore;
-       struct ads7846_buf x;
-       struct ads7846_buf y;
-       struct ads7846_buf z1;
-       struct ads7846_buf z2;
+struct ads7846_buf_layout {
+       unsigned int offset;
+       unsigned int count;
+       unsigned int skip;
 };
 
 /*
@@ -90,12 +79,18 @@ struct ts_event {
  * systems where main memory is not DMA-coherent (most non-x86 boards).
  */
 struct ads7846_packet {
-       struct ts_event tc;
-       struct ads7846_buf read_x_cmd;
-       struct ads7846_buf read_y_cmd;
-       struct ads7846_buf read_z1_cmd;
-       struct ads7846_buf read_z2_cmd;
+       unsigned int count;
+       unsigned int count_skip;
+       unsigned int cmds;
+       unsigned int last_cmd_idx;
+       struct ads7846_buf_layout l[5];
+       struct ads7846_buf *rx;
+       struct ads7846_buf *tx;
+
        struct ads7846_buf pwrdown_cmd;
+
+       bool ignore;
+       u16 x, y, z1, z2;
 };
 
 struct ads7846 {
@@ -194,7 +189,6 @@ struct ads7846 {
 #define        READ_Y(vref)    (READ_12BIT_DFR(y,  1, vref))
 #define        READ_Z1(vref)   (READ_12BIT_DFR(z1, 1, vref))
 #define        READ_Z2(vref)   (READ_12BIT_DFR(z2, 1, vref))
-
 #define        READ_X(vref)    (READ_12BIT_DFR(x,  1, vref))
 #define        PWRDOWN         (READ_12BIT_DFR(y,  0, 0))      /* LAST */
 
@@ -207,6 +201,21 @@ struct ads7846 {
 #define        REF_ON  (READ_12BIT_DFR(x, 1, 1))
 #define        REF_OFF (READ_12BIT_DFR(y, 0, 0))
 
+/* Order commands in the most optimal way to reduce Vref switching and
+ * settling time:
+ * Measure:  X; Vref: X+, X-; IN: Y+
+ * Measure:  Y; Vref: Y+, Y-; IN: X+
+ * Measure: Z1; Vref: Y+, X-; IN: X+
+ * Measure: Z2; Vref: Y+, X-; IN: Y-
+ */
+enum ads7846_cmds {
+       ADS7846_X,
+       ADS7846_Y,
+       ADS7846_Z1,
+       ADS7846_Z2,
+       ADS7846_PWDOWN,
+};
+
 static int get_pendown_state(struct ads7846 *ts)
 {
        if (ts->get_pendown_state)
@@ -689,26 +698,109 @@ static int ads7846_no_filter(void *ads, int data_idx, int *val)
        return ADS7846_FILTER_OK;
 }
 
-static int ads7846_get_value(struct ads7846 *ts, struct spi_message *m)
+static int ads7846_get_value(struct ads7846_buf *buf)
 {
        int value;
-       struct spi_transfer *t =
-               list_entry(m->transfers.prev, struct spi_transfer, transfer_list);
-       struct ads7846_buf *buf = t->rx_buf;
 
-       value = be16_to_cpup(&buf->data_be16);
+       value = be16_to_cpup(&buf->data);
 
        /* enforce ADC output is 12 bits width */
        return (value >> 3) & 0xfff;
 }
 
-static void ads7846_update_value(struct spi_message *m, int val)
+static void ads7846_set_cmd_val(struct ads7846 *ts, enum ads7846_cmds cmd_idx,
+                               u16 val)
+{
+       struct ads7846_packet *packet = ts->packet;
+
+       switch (cmd_idx) {
+       case ADS7846_Y:
+               packet->y = val;
+               break;
+       case ADS7846_X:
+               packet->x = val;
+               break;
+       case ADS7846_Z1:
+               packet->z1 = val;
+               break;
+       case ADS7846_Z2:
+               packet->z2 = val;
+               break;
+       default:
+               WARN_ON_ONCE(1);
+       }
+}
+
+static u8 ads7846_get_cmd(enum ads7846_cmds cmd_idx, int vref)
+{
+       switch (cmd_idx) {
+       case ADS7846_Y:
+               return READ_Y(vref);
+       case ADS7846_X:
+               return READ_X(vref);
+
+       /* 7846 specific commands  */
+       case ADS7846_Z1:
+               return READ_Z1(vref);
+       case ADS7846_Z2:
+               return READ_Z2(vref);
+       case ADS7846_PWDOWN:
+               return PWRDOWN;
+       default:
+               WARN_ON_ONCE(1);
+       }
+
+       return 0;
+}
+
+static bool ads7846_cmd_need_settle(enum ads7846_cmds cmd_idx)
+{
+       switch (cmd_idx) {
+       case ADS7846_X:
+       case ADS7846_Y:
+       case ADS7846_Z1:
+       case ADS7846_Z2:
+               return true;
+       case ADS7846_PWDOWN:
+               return false;
+       default:
+               WARN_ON_ONCE(1);
+       }
+
+       return false;
+}
+
+static int ads7846_filter(struct ads7846 *ts)
 {
-       struct spi_transfer *t =
-               list_entry(m->transfers.prev, struct spi_transfer, transfer_list);
-       struct ads7846_buf *buf = t->rx_buf;
+       struct ads7846_packet *packet = ts->packet;
+       int action;
+       int val;
+       unsigned int cmd_idx, b;
 
-       buf->data = val;
+       packet->ignore = false;
+       for (cmd_idx = packet->last_cmd_idx; cmd_idx < packet->cmds - 1; cmd_idx++) {
+               struct ads7846_buf_layout *l = &packet->l[cmd_idx];
+
+               packet->last_cmd_idx = cmd_idx;
+
+               for (b = l->skip; b < l->count; b++) {
+                       val = ads7846_get_value(&packet->rx[l->offset + b]);
+
+                       action = ts->filter(ts->filter_data, cmd_idx, &val);
+                       if (action == ADS7846_FILTER_REPEAT) {
+                               if (b == l->count - 1)
+                                       return -EAGAIN;
+                       } else if (action == ADS7846_FILTER_OK) {
+                               ads7846_set_cmd_val(ts, cmd_idx, val);
+                               break;
+                       } else {
+                               packet->ignore = true;
+                               return 0;
+                       }
+               }
+       }
+
+       return 0;
 }
 
 static void ads7846_read_state(struct ads7846 *ts)
@@ -716,52 +808,26 @@ static void ads7846_read_state(struct ads7846 *ts)
        struct ads7846_packet *packet = ts->packet;
        struct spi_message *m;
        int msg_idx = 0;
-       int val;
-       int action;
        int error;
 
-       while (msg_idx < ts->msg_count) {
+       packet->last_cmd_idx = 0;
 
+       while (true) {
                ts->wait_for_sync();
 
                m = &ts->msg[msg_idx];
                error = spi_sync(ts->spi, m);
                if (error) {
                        dev_err(&ts->spi->dev, "spi_sync --> %d\n", error);
-                       packet->tc.ignore = true;
+                       packet->ignore = true;
                        return;
                }
 
-               /*
-                * Last message is power down request, no need to convert
-                * or filter the value.
-                */
-               if (msg_idx < ts->msg_count - 1) {
-
-                       val = ads7846_get_value(ts, m);
-
-                       action = ts->filter(ts->filter_data, msg_idx, &val);
-                       switch (action) {
-                       case ADS7846_FILTER_REPEAT:
-                               continue;
-
-                       case ADS7846_FILTER_IGNORE:
-                               packet->tc.ignore = true;
-                               msg_idx = ts->msg_count - 1;
-                               continue;
-
-                       case ADS7846_FILTER_OK:
-                               ads7846_update_value(m, val);
-                               packet->tc.ignore = false;
-                               msg_idx++;
-                               break;
+               error = ads7846_filter(ts);
+               if (error)
+                       continue;
 
-                       default:
-                               BUG();
-                       }
-               } else {
-                       msg_idx++;
-               }
+               return;
        }
 }
 
@@ -771,19 +837,14 @@ static void ads7846_report_state(struct ads7846 *ts)
        unsigned int Rt;
        u16 x, y, z1, z2;
 
-       /*
-        * ads7846_get_value() does in-place conversion (including byte swap)
-        * from on-the-wire format as part of debouncing to get stable
-        * readings.
-        */
-       x = packet->tc.x.data;
-       y = packet->tc.y.data;
+       x = packet->x;
+       y = packet->y;
        if (ts->model == 7845) {
                z1 = 0;
                z2 = 0;
        } else {
-               z1 = packet->tc.z1.data;
-               z2 = packet->tc.z2.data;
+               z1 = packet->z1;
+               z2 = packet->z2;
        }
 
        /* range filtering */
@@ -816,9 +877,9 @@ static void ads7846_report_state(struct ads7846 *ts)
         * the maximum. Don't report it to user space, repeat at least
         * once more the measurement
         */
-       if (packet->tc.ignore || Rt > ts->pressure_max) {
+       if (packet->ignore || Rt > ts->pressure_max) {
                dev_vdbg(&ts->spi->dev, "ignored %d pressure %d\n",
-                        packet->tc.ignore, Rt);
+                        packet->ignore, Rt);
                return;
        }
 
@@ -979,13 +1040,59 @@ static int ads7846_setup_pendown(struct spi_device *spi,
  * Set up the transfers to read touchscreen state; this assumes we
  * use formula #2 for pressure, not #3.
  */
-static void ads7846_setup_spi_msg(struct ads7846 *ts,
+static int ads7846_setup_spi_msg(struct ads7846 *ts,
                                  const struct ads7846_platform_data *pdata)
 {
        struct spi_message *m = &ts->msg[0];
        struct spi_transfer *x = ts->xfer;
        struct ads7846_packet *packet = ts->packet;
        int vref = pdata->keep_vref_on;
+       unsigned int count, offset = 0;
+       unsigned int cmd_idx, b;
+       unsigned long time;
+       size_t size = 0;
+
+       /* time per bit */
+       time = NSEC_PER_SEC / ts->spi->max_speed_hz;
+
+       count = pdata->settle_delay_usecs * NSEC_PER_USEC / time;
+       packet->count_skip = DIV_ROUND_UP(count, 24);
+
+       if (ts->debounce_max && ts->debounce_rep)
+               /* ads7846_debounce_filter() is making ts->debounce_rep + 2
+                * reads. So we need to get all samples for normal case. */
+               packet->count = ts->debounce_rep + 2;
+       else
+               packet->count = 1;
+
+       if (ts->model == 7846)
+               packet->cmds = 5; /* x, y, z1, z2, pwdown */
+       else
+               packet->cmds = 3; /* x, y, pwdown */
+
+       for (cmd_idx = 0; cmd_idx < packet->cmds; cmd_idx++) {
+               struct ads7846_buf_layout *l = &packet->l[cmd_idx];
+               unsigned int max_count;
+
+               if (ads7846_cmd_need_settle(cmd_idx))
+                       max_count = packet->count + packet->count_skip;
+               else
+                       max_count = packet->count;
+
+               l->offset = offset;
+               offset += max_count;
+               l->count = max_count;
+               l->skip = packet->count_skip;
+               size += sizeof(*packet->tx) * max_count;
+       }
+
+       packet->tx = devm_kzalloc(&ts->spi->dev, size, GFP_KERNEL);
+       if (!packet->tx)
+               return -ENOMEM;
+
+       packet->rx = devm_kzalloc(&ts->spi->dev, size, GFP_KERNEL);
+       if (!packet->rx)
+               return -ENOMEM;
 
        if (ts->model == 7873) {
                /*
@@ -1001,117 +1108,20 @@ static void ads7846_setup_spi_msg(struct ads7846 *ts,
        spi_message_init(m);
        m->context = ts;
 
-       packet->read_y_cmd.cmd = READ_Y(vref);
-       x->tx_buf = &packet->read_y_cmd;
-       x->rx_buf = &packet->tc.y;
-       x->len = 3;
-       spi_message_add_tail(x, m);
+       for (cmd_idx = 0; cmd_idx < packet->cmds; cmd_idx++) {
+               struct ads7846_buf_layout *l = &packet->l[cmd_idx];
+               u8 cmd = ads7846_get_cmd(cmd_idx, vref);
 
-       /*
-        * The first sample after switching drivers can be low quality;
-        * optionally discard it, using a second one after the signals
-        * have had enough time to stabilize.
-        */
-       if (pdata->settle_delay_usecs) {
-               x->delay.value = pdata->settle_delay_usecs;
-               x->delay.unit = SPI_DELAY_UNIT_USECS;
-               x++;
-
-               x->tx_buf = &packet->read_y_cmd;
-               x->rx_buf = &packet->tc.y;
-               x->len = 3;
-               spi_message_add_tail(x, m);
+               for (b = 0; b < l->count; b++)
+                       packet->tx[l->offset + b].cmd = cmd;
        }
 
-       ts->msg_count++;
-       m++;
-       spi_message_init(m);
-       m->context = ts;
-
-       /* turn y- off, x+ on, then leave in lowpower */
-       x++;
-       packet->read_x_cmd.cmd = READ_X(vref);
-       x->tx_buf = &packet->read_x_cmd;
-       x->rx_buf = &packet->tc.x;
-       x->len = 3;
+       x->tx_buf = packet->tx;
+       x->rx_buf = packet->rx;
+       x->len = size;
        spi_message_add_tail(x, m);
 
-       /* ... maybe discard first sample ... */
-       if (pdata->settle_delay_usecs) {
-               x->delay.value = pdata->settle_delay_usecs;
-               x->delay.unit = SPI_DELAY_UNIT_USECS;
-
-               x++;
-               x->tx_buf = &packet->read_x_cmd;
-               x->rx_buf = &packet->tc.x;
-               x->len = 3;
-               spi_message_add_tail(x, m);
-       }
-
-       /* turn y+ off, x- on; we'll use formula #2 */
-       if (ts->model == 7846) {
-               ts->msg_count++;
-               m++;
-               spi_message_init(m);
-               m->context = ts;
-
-               x++;
-               packet->read_z1_cmd.cmd = READ_Z1(vref);
-               x->tx_buf = &packet->read_z1_cmd;
-               x->rx_buf = &packet->tc.z1;
-               x->len = 3;
-               spi_message_add_tail(x, m);
-
-               /* ... maybe discard first sample ... */
-               if (pdata->settle_delay_usecs) {
-                       x->delay.value = pdata->settle_delay_usecs;
-                       x->delay.unit = SPI_DELAY_UNIT_USECS;
-
-                       x++;
-                       x->tx_buf = &packet->read_z1_cmd;
-                       x->rx_buf = &packet->tc.z1;
-                       x->len = 3;
-                       spi_message_add_tail(x, m);
-               }
-
-               ts->msg_count++;
-               m++;
-               spi_message_init(m);
-               m->context = ts;
-
-               x++;
-               packet->read_z2_cmd.cmd = READ_Z2(vref);
-               x->tx_buf = &packet->read_z2_cmd;
-               x->rx_buf = &packet->tc.z2;
-               x->len = 3;
-               spi_message_add_tail(x, m);
-
-               /* ... maybe discard first sample ... */
-               if (pdata->settle_delay_usecs) {
-                       x->delay.value = pdata->settle_delay_usecs;
-                       x->delay.unit = SPI_DELAY_UNIT_USECS;
-
-                       x++;
-                       x->tx_buf = &packet->read_z2_cmd;
-                       x->rx_buf = &packet->tc.z2;
-                       x->len = 3;
-                       spi_message_add_tail(x, m);
-               }
-       }
-
-       /* power down */
-       ts->msg_count++;
-       m++;
-       spi_message_init(m);
-       m->context = ts;
-
-       x++;
-       packet->pwrdown_cmd.cmd = PWRDOWN;
-       x->tx_buf = &packet->pwrdown_cmd;
-       x->len = 3;
-
-       CS_CHANGE(*x);
-       spi_message_add_tail(x, m);
+       return 0;
 }
 
 #ifdef CONFIG_OF
index d51cb91..4c2b579 100644 (file)
@@ -56,6 +56,7 @@
 #define QUEUE_HEADER_SINGLE    0x62
 #define QUEUE_HEADER_NORMAL    0X63
 #define QUEUE_HEADER_WAIT      0x64
+#define QUEUE_HEADER_NORMAL2   0x66
 
 /* Command header definition */
 #define CMD_HEADER_WRITE       0x54
@@ -69,6 +70,7 @@
 #define CMD_HEADER_REK         0x66
 
 /* FW position data */
+#define PACKET_SIZE_OLD                40
 #define PACKET_SIZE            55
 #define MAX_CONTACT_NUM                10
 #define FW_POS_HEADER          0
@@ -90,6 +92,8 @@
 /* FW read command, 0x53 0x?? 0x0, 0x01 */
 #define E_ELAN_INFO_FW_VER     0x00
 #define E_ELAN_INFO_BC_VER     0x10
+#define E_ELAN_INFO_X_RES      0x60
+#define E_ELAN_INFO_Y_RES      0x63
 #define E_ELAN_INFO_REK                0xD0
 #define E_ELAN_INFO_TEST_VER   0xE0
 #define E_ELAN_INFO_FW_ID      0xF0
 #define ELAN_POWERON_DELAY_USEC        500
 #define ELAN_RESET_DELAY_MSEC  20
 
+enum elants_chip_id {
+       EKTH3500,
+       EKTF3624,
+};
+
 enum elants_state {
        ELAN_STATE_NORMAL,
        ELAN_WAIT_QUEUE_HEADER,
@@ -143,9 +152,12 @@ struct elants_data {
        unsigned int y_res;
        unsigned int x_max;
        unsigned int y_max;
+       unsigned int phy_x;
+       unsigned int phy_y;
        struct touchscreen_properties prop;
 
        enum elants_state state;
+       enum elants_chip_id chip_id;
        enum elants_iap_mode iap_mode;
 
        /* Guards against concurrent access to the device via sysfs */
@@ -433,7 +445,51 @@ static int elants_i2c_query_bc_version(struct elants_data *ts)
        return 0;
 }
 
-static int elants_i2c_query_ts_info(struct elants_data *ts)
+static int elants_i2c_query_ts_info_ektf(struct elants_data *ts)
+{
+       struct i2c_client *client = ts->client;
+       int error;
+       u8 resp[4];
+       u16 phy_x, phy_y;
+       const u8 get_xres_cmd[] = {
+               CMD_HEADER_READ, E_ELAN_INFO_X_RES, 0x00, 0x00
+       };
+       const u8 get_yres_cmd[] = {
+               CMD_HEADER_READ, E_ELAN_INFO_Y_RES, 0x00, 0x00
+       };
+
+       /* Get X/Y size in mm */
+       error = elants_i2c_execute_command(client, get_xres_cmd,
+                                          sizeof(get_xres_cmd),
+                                          resp, sizeof(resp), 1,
+                                          "get X size");
+       if (error)
+               return error;
+
+       phy_x = resp[2] | ((resp[3] & 0xF0) << 4);
+
+       error = elants_i2c_execute_command(client, get_yres_cmd,
+                                          sizeof(get_yres_cmd),
+                                          resp, sizeof(resp), 1,
+                                          "get Y size");
+       if (error)
+               return error;
+
+       phy_y = resp[2] | ((resp[3] & 0xF0) << 4);
+
+       dev_dbg(&client->dev, "phy_x=%d, phy_y=%d\n", phy_x, phy_y);
+
+       ts->phy_x = phy_x;
+       ts->phy_y = phy_y;
+
+       /* eKTF doesn't report max size, set it to default values */
+       ts->x_max = 2240 - 1;
+       ts->y_max = 1408 - 1;
+
+       return 0;
+}
+
+static int elants_i2c_query_ts_info_ekth(struct elants_data *ts)
 {
        struct i2c_client *client = ts->client;
        int error;
@@ -508,6 +564,8 @@ static int elants_i2c_query_ts_info(struct elants_data *ts)
                ts->x_res = DIV_ROUND_CLOSEST(ts->x_max, phy_x);
                ts->y_max = ELAN_TS_RESOLUTION(cols, osr);
                ts->y_res = DIV_ROUND_CLOSEST(ts->y_max, phy_y);
+               ts->phy_x = phy_x;
+               ts->phy_y = phy_y;
        }
 
        return 0;
@@ -587,8 +645,19 @@ static int elants_i2c_initialize(struct elants_data *ts)
                error = elants_i2c_query_fw_version(ts);
        if (!error)
                error = elants_i2c_query_test_version(ts);
-       if (!error)
-               error = elants_i2c_query_ts_info(ts);
+
+       switch (ts->chip_id) {
+       case EKTH3500:
+               if (!error)
+                       error = elants_i2c_query_ts_info_ekth(ts);
+               break;
+       case EKTF3624:
+               if (!error)
+                       error = elants_i2c_query_ts_info_ektf(ts);
+               break;
+       default:
+               BUG();
+       }
 
        if (error)
                ts->iap_mode = ELAN_IAP_RECOVERY;
@@ -853,7 +922,8 @@ out:
  * Event reporting.
  */
 
-static void elants_i2c_mt_event(struct elants_data *ts, u8 *buf)
+static void elants_i2c_mt_event(struct elants_data *ts, u8 *buf,
+                               size_t packet_size)
 {
        struct input_dev *input = ts->input;
        unsigned int n_fingers;
@@ -880,8 +950,24 @@ static void elants_i2c_mt_event(struct elants_data *ts, u8 *buf)
                        pos = &buf[FW_POS_XY + i * 3];
                        x = (((u16)pos[0] & 0xf0) << 4) | pos[1];
                        y = (((u16)pos[0] & 0x0f) << 8) | pos[2];
-                       p = buf[FW_POS_PRESSURE + i];
-                       w = buf[FW_POS_WIDTH + i];
+
+                       /*
+                        * eKTF3624 may have use "old" touch-report format,
+                        * depending on a device and TS firmware version.
+                        * For example, ASUS Transformer devices use the "old"
+                        * format, while ASUS Nexus 7 uses the "new" formant.
+                        */
+                       if (packet_size == PACKET_SIZE_OLD &&
+                           ts->chip_id == EKTF3624) {
+                               w = buf[FW_POS_WIDTH + i / 2];
+                               w >>= 4 * (~i & 1);
+                               w |= w << 4;
+                               w |= !w;
+                               p = w;
+                       } else {
+                               p = buf[FW_POS_PRESSURE + i];
+                               w = buf[FW_POS_WIDTH + i];
+                       }
 
                        dev_dbg(&ts->client->dev, "i=%d x=%d y=%d p=%d w=%d\n",
                                i, x, y, p, w);
@@ -913,7 +999,8 @@ static u8 elants_i2c_calculate_checksum(u8 *buf)
        return checksum;
 }
 
-static void elants_i2c_event(struct elants_data *ts, u8 *buf)
+static void elants_i2c_event(struct elants_data *ts, u8 *buf,
+                            size_t packet_size)
 {
        u8 checksum = elants_i2c_calculate_checksum(buf);
 
@@ -927,7 +1014,7 @@ static void elants_i2c_event(struct elants_data *ts, u8 *buf)
                         "%s: unknown packet type: %02x\n",
                         __func__, buf[FW_POS_HEADER]);
        else
-               elants_i2c_mt_event(ts, buf);
+               elants_i2c_mt_event(ts, buf, packet_size);
 }
 
 static irqreturn_t elants_i2c_irq(int irq, void *_dev)
@@ -970,7 +1057,6 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev)
                switch (ts->buf[FW_HDR_TYPE]) {
                case CMD_HEADER_HELLO:
                case CMD_HEADER_RESP:
-               case CMD_HEADER_REK:
                        break;
 
                case QUEUE_HEADER_WAIT:
@@ -985,9 +1071,24 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev)
                        break;
 
                case QUEUE_HEADER_SINGLE:
-                       elants_i2c_event(ts, &ts->buf[HEADER_SIZE]);
+                       elants_i2c_event(ts, &ts->buf[HEADER_SIZE],
+                                        ts->buf[FW_HDR_LENGTH]);
                        break;
 
+               case QUEUE_HEADER_NORMAL2: /* CMD_HEADER_REK */
+                       /*
+                        * Depending on firmware version, eKTF3624 touchscreens
+                        * may utilize one of these opcodes for the touch events:
+                        * 0x63 (NORMAL) and 0x66 (NORMAL2).  The 0x63 is used by
+                        * older firmware version and differs from 0x66 such that
+                        * touch pressure value needs to be adjusted.  The 0x66
+                        * opcode of newer firmware is equal to 0x63 of eKTH3500.
+                        */
+                       if (ts->chip_id != EKTF3624)
+                               break;
+
+                       fallthrough;
+
                case QUEUE_HEADER_NORMAL:
                        report_count = ts->buf[FW_HDR_COUNT];
                        if (report_count == 0 || report_count > 3) {
@@ -998,7 +1099,12 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev)
                        }
 
                        report_len = ts->buf[FW_HDR_LENGTH] / report_count;
-                       if (report_len != PACKET_SIZE) {
+
+                       if (report_len == PACKET_SIZE_OLD &&
+                           ts->chip_id == EKTF3624) {
+                               dev_dbg_once(&client->dev,
+                                            "using old report format\n");
+                       } else if (report_len != PACKET_SIZE) {
                                dev_err(&client->dev,
                                        "mismatching report length: %*ph\n",
                                        HEADER_SIZE, ts->buf);
@@ -1007,8 +1113,8 @@ static irqreturn_t elants_i2c_irq(int irq, void *_dev)
 
                        for (i = 0; i < report_count; i++) {
                                u8 *buf = ts->buf + HEADER_SIZE +
-                                                       i * PACKET_SIZE;
-                               elants_i2c_event(ts, buf);
+                                                       i * report_len;
+                               elants_i2c_event(ts, buf, report_len);
                        }
                        break;
 
@@ -1250,6 +1356,7 @@ static int elants_i2c_probe(struct i2c_client *client,
        init_completion(&ts->cmd_done);
 
        ts->client = client;
+       ts->chip_id = (enum elants_chip_id)id->driver_data;
        i2c_set_clientdata(client, ts);
 
        ts->vcc33 = devm_regulator_get(&client->dev, "vcc33");
@@ -1331,13 +1438,20 @@ static int elants_i2c_probe(struct i2c_client *client,
        input_set_abs_params(ts->input, ABS_MT_PRESSURE, 0, 255, 0, 0);
        input_set_abs_params(ts->input, ABS_MT_TOOL_TYPE,
                             0, MT_TOOL_PALM, 0, 0);
+
+       touchscreen_parse_properties(ts->input, true, &ts->prop);
+
+       if (ts->chip_id == EKTF3624) {
+               /* calculate resolution from size */
+               ts->x_res = DIV_ROUND_CLOSEST(ts->prop.max_x, ts->phy_x);
+               ts->y_res = DIV_ROUND_CLOSEST(ts->prop.max_y, ts->phy_y);
+       }
+
        input_abs_set_res(ts->input, ABS_MT_POSITION_X, ts->x_res);
        input_abs_set_res(ts->input, ABS_MT_POSITION_Y, ts->y_res);
        if (ts->major_res > 0)
                input_abs_set_res(ts->input, ABS_MT_TOUCH_MAJOR, ts->major_res);
 
-       touchscreen_parse_properties(ts->input, true, &ts->prop);
-
        error = input_mt_init_slots(ts->input, MAX_CONTACT_NUM,
                                    INPUT_MT_DIRECT | INPUT_MT_DROP_UNUSED);
        if (error) {
@@ -1466,14 +1580,16 @@ static SIMPLE_DEV_PM_OPS(elants_i2c_pm_ops,
                         elants_i2c_suspend, elants_i2c_resume);
 
 static const struct i2c_device_id elants_i2c_id[] = {
-       { DEVICE_NAME, 0 },
+       { DEVICE_NAME, EKTH3500 },
+       { "ekth3500", EKTH3500 },
+       { "ektf3624", EKTF3624 },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, elants_i2c_id);
 
 #ifdef CONFIG_ACPI
 static const struct acpi_device_id elants_acpi_id[] = {
-       { "ELAN0001", 0 },
+       { "ELAN0001", EKTH3500 },
        { }
 };
 MODULE_DEVICE_TABLE(acpi, elants_acpi_id);
@@ -1482,6 +1598,7 @@ MODULE_DEVICE_TABLE(acpi, elants_acpi_id);
 #ifdef CONFIG_OF
 static const struct of_device_id elants_of_match[] = {
        { .compatible = "elan,ekth3500" },
+       { .compatible = "elan,ektf3624" },
        { /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, elants_of_match);
index e0bacd3..9617323 100644 (file)
@@ -341,8 +341,10 @@ static int elo_connect(struct serio *serio, struct serio_driver *drv)
        switch (elo->id) {
 
        case 0: /* 10-byte protocol */
-               if (elo_setup_10(elo))
+               if (elo_setup_10(elo)) {
+                       err = -EIO;
                        goto fail3;
+               }
 
                break;
 
index 4fd21bc..54f3003 100644 (file)
@@ -2,8 +2,7 @@
 /*
  * Azoteq IQS550/572/525 Trackpad/Touchscreen Controller
  *
- * Copyright (C) 2018
- * Author: Jeff LaBundy <jeff@labundy.com>
+ * Copyright (C) 2018 Jeff LaBundy <jeff@labundy.com>
  *
  * These devices require firmware exported from a PC-based configuration tool
  * made available by the vendor. Firmware files may be pushed to the device's
@@ -12,6 +11,7 @@
  * Link to PC-based configuration tool and data sheet: http://www.azoteq.com/
  */
 
+#include <linux/bits.h>
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/err.h>
@@ -30,9 +30,9 @@
 
 #define IQS5XX_FW_FILE_LEN     64
 #define IQS5XX_NUM_RETRIES     10
-#define IQS5XX_NUM_POINTS      256
 #define IQS5XX_NUM_CONTACTS    5
 #define IQS5XX_WR_BYTES_MAX    2
+#define IQS5XX_XY_RES_MAX      0xFFFE
 
 #define IQS5XX_PROD_NUM_IQS550 40
 #define IQS5XX_PROD_NUM_IQS572 58
 #define IQS5XX_PROJ_NUM_B000   15
 #define IQS5XX_MAJOR_VER_MIN   2
 
-#define IQS5XX_RESUME          0x00
-#define IQS5XX_SUSPEND         0x01
+#define IQS5XX_SHOW_RESET      BIT(7)
+#define IQS5XX_ACK_RESET       BIT(7)
 
-#define IQS5XX_SW_INPUT_EVENT  0x10
-#define IQS5XX_SETUP_COMPLETE  0x40
-#define IQS5XX_EVENT_MODE      0x01
-#define IQS5XX_TP_EVENT                0x04
+#define IQS5XX_SUSPEND         BIT(0)
+#define IQS5XX_RESUME          0
 
-#define IQS5XX_FLIP_X          0x01
-#define IQS5XX_FLIP_Y          0x02
-#define IQS5XX_SWITCH_XY_AXIS  0x04
+#define IQS5XX_SETUP_COMPLETE  BIT(6)
+#define IQS5XX_WDT             BIT(5)
+#define IQS5XX_ALP_REATI       BIT(3)
+#define IQS5XX_REATI           BIT(2)
+
+#define IQS5XX_TP_EVENT                BIT(2)
+#define IQS5XX_EVENT_MODE      BIT(0)
 
 #define IQS5XX_PROD_NUM                0x0000
-#define IQS5XX_ABS_X           0x0016
-#define IQS5XX_ABS_Y           0x0018
+#define IQS5XX_SYS_INFO0       0x000F
+#define IQS5XX_SYS_INFO1       0x0010
 #define IQS5XX_SYS_CTRL0       0x0431
 #define IQS5XX_SYS_CTRL1       0x0432
 #define IQS5XX_SYS_CFG0                0x058E
 #define IQS5XX_SYS_CFG1                0x058F
-#define IQS5XX_TOTAL_RX                0x063D
-#define IQS5XX_TOTAL_TX                0x063E
-#define IQS5XX_XY_CFG0         0x0669
 #define IQS5XX_X_RES           0x066E
 #define IQS5XX_Y_RES           0x0670
 #define IQS5XX_CHKSM           0x83C0
@@ -99,6 +98,7 @@ struct iqs5xx_private {
        struct i2c_client *client;
        struct input_dev *input;
        struct gpio_desc *reset_gpio;
+       struct touchscreen_properties prop;
        struct mutex lock;
        u8 bl_status;
 };
@@ -126,6 +126,14 @@ struct iqs5xx_touch_data {
        u8 area;
 } __packed;
 
+struct iqs5xx_status {
+       u8 sys_info[2];
+       u8 num_active;
+       __be16 rel_x;
+       __be16 rel_y;
+       struct iqs5xx_touch_data touch_data[IQS5XX_NUM_CONTACTS];
+} __packed;
+
 static int iqs5xx_read_burst(struct i2c_client *client,
                             u16 reg, void *val, u16 len)
 {
@@ -182,11 +190,6 @@ static int iqs5xx_read_word(struct i2c_client *client, u16 reg, u16 *val)
        return 0;
 }
 
-static int iqs5xx_read_byte(struct i2c_client *client, u16 reg, u8 *val)
-{
-       return iqs5xx_read_burst(client, reg, val, sizeof(*val));
-}
-
 static int iqs5xx_write_burst(struct i2c_client *client,
                              u16 reg, const void *val, u16 len)
 {
@@ -337,11 +340,16 @@ static int iqs5xx_bl_open(struct i2c_client *client)
         */
        for (i = 0; i < IQS5XX_BL_ATTEMPTS; i++) {
                iqs5xx_reset(client);
+               usleep_range(350, 400);
 
                for (j = 0; j < IQS5XX_NUM_RETRIES; j++) {
                        error = iqs5xx_bl_cmd(client, IQS5XX_BL_CMD_VER, 0);
-                       if (!error || error == -EINVAL)
-                               return error;
+                       if (!error)
+                               usleep_range(10000, 10100);
+                       else if (error != -EINVAL)
+                               continue;
+
+                       return error;
                }
        }
 
@@ -481,12 +489,10 @@ static void iqs5xx_close(struct input_dev *input)
 static int iqs5xx_axis_init(struct i2c_client *client)
 {
        struct iqs5xx_private *iqs5xx = i2c_get_clientdata(client);
-       struct touchscreen_properties prop;
+       struct touchscreen_properties *prop = &iqs5xx->prop;
        struct input_dev *input;
+       u16 max_x, max_y;
        int error;
-       u16 max_x, max_x_hw;
-       u16 max_y, max_y_hw;
-       u8 val;
 
        if (!iqs5xx->input) {
                input = devm_input_allocate_device(&client->dev);
@@ -506,89 +512,39 @@ static int iqs5xx_axis_init(struct i2c_client *client)
                iqs5xx->input = input;
        }
 
-       touchscreen_parse_properties(iqs5xx->input, true, &prop);
-
-       error = iqs5xx_read_byte(client, IQS5XX_TOTAL_RX, &val);
-       if (error)
-               return error;
-       max_x_hw = (val - 1) * IQS5XX_NUM_POINTS;
-
-       error = iqs5xx_read_byte(client, IQS5XX_TOTAL_TX, &val);
+       error = iqs5xx_read_word(client, IQS5XX_X_RES, &max_x);
        if (error)
                return error;
-       max_y_hw = (val - 1) * IQS5XX_NUM_POINTS;
 
-       error = iqs5xx_read_byte(client, IQS5XX_XY_CFG0, &val);
+       error = iqs5xx_read_word(client, IQS5XX_Y_RES, &max_y);
        if (error)
                return error;
 
-       if (val & IQS5XX_SWITCH_XY_AXIS)
-               swap(max_x_hw, max_y_hw);
+       input_abs_set_max(iqs5xx->input, ABS_MT_POSITION_X, max_x);
+       input_abs_set_max(iqs5xx->input, ABS_MT_POSITION_Y, max_y);
 
-       if (prop.swap_x_y)
-               val ^= IQS5XX_SWITCH_XY_AXIS;
-
-       if (prop.invert_x)
-               val ^= prop.swap_x_y ? IQS5XX_FLIP_Y : IQS5XX_FLIP_X;
-
-       if (prop.invert_y)
-               val ^= prop.swap_x_y ? IQS5XX_FLIP_X : IQS5XX_FLIP_Y;
-
-       error = iqs5xx_write_byte(client, IQS5XX_XY_CFG0, val);
-       if (error)
-               return error;
+       touchscreen_parse_properties(iqs5xx->input, true, prop);
 
-       if (prop.max_x > max_x_hw) {
+       if (prop->max_x > IQS5XX_XY_RES_MAX) {
                dev_err(&client->dev, "Invalid maximum x-coordinate: %u > %u\n",
-                       prop.max_x, max_x_hw);
+                       prop->max_x, IQS5XX_XY_RES_MAX);
                return -EINVAL;
-       } else if (prop.max_x == 0) {
-               error = iqs5xx_read_word(client, IQS5XX_X_RES, &max_x);
+       } else if (prop->max_x != max_x) {
+               error = iqs5xx_write_word(client, IQS5XX_X_RES, prop->max_x);
                if (error)
                        return error;
-
-               input_abs_set_max(iqs5xx->input,
-                                 prop.swap_x_y ? ABS_MT_POSITION_Y :
-                                                 ABS_MT_POSITION_X,
-                                 max_x);
-       } else {
-               max_x = (u16)prop.max_x;
        }
 
-       if (prop.max_y > max_y_hw) {
+       if (prop->max_y > IQS5XX_XY_RES_MAX) {
                dev_err(&client->dev, "Invalid maximum y-coordinate: %u > %u\n",
-                       prop.max_y, max_y_hw);
+                       prop->max_y, IQS5XX_XY_RES_MAX);
                return -EINVAL;
-       } else if (prop.max_y == 0) {
-               error = iqs5xx_read_word(client, IQS5XX_Y_RES, &max_y);
+       } else if (prop->max_y != max_y) {
+               error = iqs5xx_write_word(client, IQS5XX_Y_RES, prop->max_y);
                if (error)
                        return error;
-
-               input_abs_set_max(iqs5xx->input,
-                                 prop.swap_x_y ? ABS_MT_POSITION_X :
-                                                 ABS_MT_POSITION_Y,
-                                 max_y);
-       } else {
-               max_y = (u16)prop.max_y;
        }
 
-       /*
-        * Write horizontal and vertical resolution to the device in case its
-        * original defaults were overridden or swapped as per the properties
-        * specified in the device tree.
-        */
-       error = iqs5xx_write_word(client,
-                                 prop.swap_x_y ? IQS5XX_Y_RES : IQS5XX_X_RES,
-                                 max_x);
-       if (error)
-               return error;
-
-       error = iqs5xx_write_word(client,
-                                 prop.swap_x_y ? IQS5XX_X_RES : IQS5XX_Y_RES,
-                                 max_y);
-       if (error)
-               return error;
-
        error = input_mt_init_slots(iqs5xx->input, IQS5XX_NUM_CONTACTS,
                                    INPUT_MT_DIRECT);
        if (error)
@@ -603,7 +559,6 @@ static int iqs5xx_dev_init(struct i2c_client *client)
        struct iqs5xx_private *iqs5xx = i2c_get_clientdata(client);
        struct iqs5xx_dev_id_info *dev_id_info;
        int error;
-       u8 val;
        u8 buf[sizeof(*dev_id_info) + 1];
 
        error = iqs5xx_read_burst(client, IQS5XX_PROD_NUM,
@@ -666,18 +621,18 @@ static int iqs5xx_dev_init(struct i2c_client *client)
        if (error)
                return error;
 
-       error = iqs5xx_read_byte(client, IQS5XX_SYS_CFG0, &val);
+       error = iqs5xx_write_byte(client, IQS5XX_SYS_CTRL0, IQS5XX_ACK_RESET);
        if (error)
                return error;
 
-       val |= IQS5XX_SETUP_COMPLETE;
-       val &= ~IQS5XX_SW_INPUT_EVENT;
-       error = iqs5xx_write_byte(client, IQS5XX_SYS_CFG0, val);
+       error = iqs5xx_write_byte(client, IQS5XX_SYS_CFG0,
+                                 IQS5XX_SETUP_COMPLETE | IQS5XX_WDT |
+                                 IQS5XX_ALP_REATI | IQS5XX_REATI);
        if (error)
                return error;
 
-       val = IQS5XX_TP_EVENT | IQS5XX_EVENT_MODE;
-       error = iqs5xx_write_byte(client, IQS5XX_SYS_CFG1, val);
+       error = iqs5xx_write_byte(client, IQS5XX_SYS_CFG1,
+                                 IQS5XX_TP_EVENT | IQS5XX_EVENT_MODE);
        if (error)
                return error;
 
@@ -688,13 +643,12 @@ static int iqs5xx_dev_init(struct i2c_client *client)
        iqs5xx->bl_status = dev_id_info->bl_status;
 
        /*
-        * Closure of the first communication window that appears following the
-        * release of reset appears to kick off an initialization period during
-        * which further communication is met with clock stretching. The return
-        * from this function is delayed so that further communication attempts
-        * avoid this period.
+        * The following delay allows ATI to complete before the open and close
+        * callbacks are free to elicit I2C communication. Any attempts to read
+        * from or write to the device during this time may face extended clock
+        * stretching and prompt the I2C controller to report an error.
         */
-       msleep(100);
+       msleep(250);
 
        return 0;
 }
@@ -702,7 +656,7 @@ static int iqs5xx_dev_init(struct i2c_client *client)
 static irqreturn_t iqs5xx_irq(int irq, void *data)
 {
        struct iqs5xx_private *iqs5xx = data;
-       struct iqs5xx_touch_data touch_data[IQS5XX_NUM_CONTACTS];
+       struct iqs5xx_status status;
        struct i2c_client *client = iqs5xx->client;
        struct input_dev *input = iqs5xx->input;
        int error, i;
@@ -715,21 +669,35 @@ static irqreturn_t iqs5xx_irq(int irq, void *data)
        if (iqs5xx->bl_status == IQS5XX_BL_STATUS_RESET)
                return IRQ_NONE;
 
-       error = iqs5xx_read_burst(client, IQS5XX_ABS_X,
-                                 touch_data, sizeof(touch_data));
+       error = iqs5xx_read_burst(client, IQS5XX_SYS_INFO0,
+                                 &status, sizeof(status));
        if (error)
                return IRQ_NONE;
 
-       for (i = 0; i < ARRAY_SIZE(touch_data); i++) {
-               u16 pressure = be16_to_cpu(touch_data[i].strength);
+       if (status.sys_info[0] & IQS5XX_SHOW_RESET) {
+               dev_err(&client->dev, "Unexpected device reset\n");
+
+               error = iqs5xx_dev_init(client);
+               if (error) {
+                       dev_err(&client->dev,
+                               "Failed to re-initialize device: %d\n", error);
+                       return IRQ_NONE;
+               }
+
+               return IRQ_HANDLED;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(status.touch_data); i++) {
+               struct iqs5xx_touch_data *touch_data = &status.touch_data[i];
+               u16 pressure = be16_to_cpu(touch_data->strength);
 
                input_mt_slot(input, i);
                if (input_mt_report_slot_state(input, MT_TOOL_FINGER,
                                               pressure != 0)) {
-                       input_report_abs(input, ABS_MT_POSITION_X,
-                                        be16_to_cpu(touch_data[i].abs_x));
-                       input_report_abs(input, ABS_MT_POSITION_Y,
-                                        be16_to_cpu(touch_data[i].abs_y));
+                       touchscreen_report_pos(iqs5xx->input, &iqs5xx->prop,
+                                              be16_to_cpu(touch_data->abs_x),
+                                              be16_to_cpu(touch_data->abs_y),
+                                              true);
                        input_report_abs(input, ABS_MT_PRESSURE, pressure);
                }
        }
@@ -884,7 +852,7 @@ static int iqs5xx_fw_file_parse(struct i2c_client *client,
 static int iqs5xx_fw_file_write(struct i2c_client *client, const char *fw_file)
 {
        struct iqs5xx_private *iqs5xx = i2c_get_clientdata(client);
-       int error;
+       int error, error_bl = 0;
        u8 *pmap;
 
        if (iqs5xx->bl_status == IQS5XX_BL_STATUS_NONE)
@@ -938,6 +906,7 @@ err_reset:
                usleep_range(10000, 10100);
        }
 
+       error_bl = error;
        error = iqs5xx_dev_init(client);
        if (!error && iqs5xx->bl_status == IQS5XX_BL_STATUS_RESET)
                error = -EINVAL;
@@ -949,11 +918,15 @@ err_reset:
 err_kfree:
        kfree(pmap);
 
+       if (error_bl)
+               return error_bl;
+
        return error;
 }
 
-static ssize_t fw_file_store(struct device *dev, struct device_attribute *attr,
-                               const char *buf, size_t count)
+static ssize_t fw_file_store(struct device *dev,
+                            struct device_attribute *attr, const char *buf,
+                            size_t count)
 {
        struct iqs5xx_private *iqs5xx = dev_get_drvdata(dev);
        struct i2c_client *client = iqs5xx->client;
@@ -1012,7 +985,7 @@ static int __maybe_unused iqs5xx_suspend(struct device *dev)
        struct input_dev *input = iqs5xx->input;
        int error = 0;
 
-       if (!input)
+       if (!input || device_may_wakeup(dev))
                return error;
 
        mutex_lock(&input->mutex);
@@ -1031,7 +1004,7 @@ static int __maybe_unused iqs5xx_resume(struct device *dev)
        struct input_dev *input = iqs5xx->input;
        int error = 0;
 
-       if (!input)
+       if (!input || device_may_wakeup(dev))
                return error;
 
        mutex_lock(&input->mutex);
index c005004..225796a 100644 (file)
@@ -465,13 +465,13 @@ static void mip4_report_keys(struct mip4_ts *ts, u8 *packet)
 static void mip4_report_touch(struct mip4_ts *ts, u8 *packet)
 {
        int id;
-       bool hover;
-       bool palm;
+       bool __always_unused hover;
+       bool __always_unused palm;
        bool state;
        u16 x, y;
-       u8 pressure_stage = 0;
+       u8 __always_unused pressure_stage = 0;
        u8 pressure;
-       u8 size;
+       u8 __always_unused size;
        u8 touch_major;
        u8 touch_minor;
 
index 603a948..4d2d22a 100644 (file)
@@ -445,6 +445,7 @@ static int raydium_i2c_write_object(struct i2c_client *client,
                                    enum raydium_bl_ack state)
 {
        int error;
+       static const u8 cmd[] = { 0xFF, 0x39 };
 
        error = raydium_i2c_send(client, RM_CMD_BOOT_WRT, data, len);
        if (error) {
@@ -453,7 +454,7 @@ static int raydium_i2c_write_object(struct i2c_client *client,
                return error;
        }
 
-       error = raydium_i2c_send(client, RM_CMD_BOOT_ACK, NULL, 0);
+       error = raydium_i2c_send(client, RM_CMD_BOOT_ACK, cmd, sizeof(cmd));
        if (error) {
                dev_err(&client->dev, "Ack obj command failed: %d\n", error);
                return error;
index b4e7bcb..6abae66 100644 (file)
@@ -94,8 +94,13 @@ static int st1232_ts_wait_ready(struct st1232_ts_data *ts)
 
        for (retries = 10; retries; retries--) {
                error = st1232_ts_read_data(ts, REG_STATUS, 1);
-               if (!error && ts->read_buf[0] == (STATUS_NORMAL | ERROR_NONE))
-                       return 0;
+               if (!error) {
+                       switch (ts->read_buf[0]) {
+                       case STATUS_NORMAL | ERROR_NONE:
+                       case STATUS_IDLE | ERROR_NONE:
+                               return 0;
+                       }
+               }
 
                usleep_range(1000, 2000);
        }
index cd74772..25c45c3 100644 (file)
@@ -52,6 +52,7 @@
  * @idev: registered input device
  * @work: a work item used to scan the device
  * @dev: a pointer back to the MFD cell struct device*
+ * @prop: Touchscreen properties
  * @ave_ctrl: Sample average control
  * (0 -> 1 sample, 1 -> 2 samples, 2 -> 4 samples, 3 -> 8 samples)
  * @touch_det_delay: Touch detect interrupt delay
index 620cdd7..12f2562 100644 (file)
@@ -787,6 +787,7 @@ static int sur40_probe(struct usb_interface *interface,
                dev_err(&interface->dev,
                        "Unable to register video controls.");
                v4l2_ctrl_handler_free(&sur40->hdl);
+               error = sur40->hdl.error;
                goto err_unreg_v4l2;
        }
 
index 7314545..1da23e5 100644 (file)
@@ -94,9 +94,7 @@ static void surface3_spi_report_touch(struct surface3_ts_data *ts_data,
 
 static void surface3_spi_process_touch(struct surface3_ts_data *ts_data, u8 *data)
 {
-       u16 timestamp;
        unsigned int i;
-       timestamp = get_unaligned_le16(&data[15]);
 
        for (i = 0; i < 13; i++) {
                struct surface3_ts_data_finger *finger;
index 397cb1d..c847453 100644 (file)
@@ -1044,6 +1044,7 @@ static void nexio_exit(struct usbtouch_usb *usbtouch)
 
 static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt)
 {
+       struct device *dev = &usbtouch->interface->dev;
        struct nexio_touch_packet *packet = (void *) pkt;
        struct nexio_priv *priv = usbtouch->priv;
        unsigned int data_len = be16_to_cpu(packet->data_len);
@@ -1062,6 +1063,8 @@ static int nexio_read_data(struct usbtouch_usb *usbtouch, unsigned char *pkt)
 
        /* send ACK */
        ret = usb_submit_urb(priv->ack, GFP_ATOMIC);
+       if (ret)
+               dev_warn(dev, "Failed to submit ACK URB: %d\n", ret);
 
        if (!usbtouch->type->max_xc) {
                usbtouch->type->max_xc = 2 * x_len;
index a3e3adb..3b636be 100644 (file)
@@ -161,7 +161,7 @@ static int zinitix_read_data(struct i2c_client *client,
 
        ret = i2c_master_recv(client, (u8 *)values, length);
        if (ret != length)
-               return ret < 0 ? ret : -EIO; ;
+               return ret < 0 ? ret : -EIO;
 
        return 0;
 }
@@ -190,7 +190,7 @@ static int zinitix_write_cmd(struct i2c_client *client, u16 reg)
        return 0;
 }
 
-static bool zinitix_init_touch(struct bt541_ts_data *bt541)
+static int zinitix_init_touch(struct bt541_ts_data *bt541)
 {
        struct i2c_client *client = bt541->client;
        int i;
index 73e2c8d..448cc53 100644 (file)
@@ -53,7 +53,7 @@ void icc_bulk_put(int num_paths, struct icc_bulk_data *paths)
 EXPORT_SYMBOL_GPL(icc_bulk_put);
 
 /**
- * icc_bulk_set() - set bandwidth to a set of paths
+ * icc_bulk_set_bw() - set bandwidth to a set of paths
  * @num_paths: the number of icc_bulk_data
  * @paths: the icc_bulk_data table containing the paths and bandwidth
  *
index 5ad519c..8a1e70e 100644 (file)
@@ -942,6 +942,8 @@ int icc_link_destroy(struct icc_node *src, struct icc_node *dst)
                       GFP_KERNEL);
        if (new)
                src->links = new;
+       else
+               ret = -ENOMEM;
 
 out:
        mutex_unlock(&icc_lock);
index b3fb5b0..ca52647 100644 (file)
@@ -17,6 +17,15 @@ config INTERCONNECT_QCOM_MSM8916
          This is a driver for the Qualcomm Network-on-Chip on msm8916-based
          platforms.
 
+config INTERCONNECT_QCOM_MSM8939
+       tristate "Qualcomm MSM8939 interconnect driver"
+       depends on INTERCONNECT_QCOM
+       depends on QCOM_SMD_RPM
+       select INTERCONNECT_QCOM_SMD_RPM
+       help
+         This is a driver for the Qualcomm Network-on-Chip on msm8939-based
+         platforms.
+
 config INTERCONNECT_QCOM_MSM8974
        tristate "Qualcomm MSM8974 interconnect driver"
        depends on INTERCONNECT_QCOM
@@ -74,6 +83,15 @@ config INTERCONNECT_QCOM_SDM845
          This is a driver for the Qualcomm Network-on-Chip on sdm845-based
          platforms.
 
+config INTERCONNECT_QCOM_SDX55
+       tristate "Qualcomm SDX55 interconnect driver"
+       depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
+       select INTERCONNECT_QCOM_RPMH
+       select INTERCONNECT_QCOM_BCM_VOTER
+       help
+         This is a driver for the Qualcomm Network-on-Chip on sdx55-based
+         platforms.
+
 config INTERCONNECT_QCOM_SM8150
        tristate "Qualcomm SM8150 interconnect driver"
        depends on INTERCONNECT_QCOM_RPMH_POSSIBLE
index cf628f7..c6a735d 100644 (file)
@@ -2,24 +2,28 @@
 
 icc-bcm-voter-objs                     := bcm-voter.o
 qnoc-msm8916-objs                      := msm8916.o
+qnoc-msm8939-objs                      := msm8939.o
 qnoc-msm8974-objs                      := msm8974.o
 icc-osm-l3-objs                                := osm-l3.o
 qnoc-qcs404-objs                       := qcs404.o
 icc-rpmh-obj                           := icc-rpmh.o
 qnoc-sc7180-objs                       := sc7180.o
 qnoc-sdm845-objs                       := sdm845.o
+qnoc-sdx55-objs                                := sdx55.o
 qnoc-sm8150-objs                       := sm8150.o
 qnoc-sm8250-objs                       := sm8250.o
-icc-smd-rpm-objs                       := smd-rpm.o
+icc-smd-rpm-objs                       := smd-rpm.o icc-rpm.o
 
 obj-$(CONFIG_INTERCONNECT_QCOM_BCM_VOTER) += icc-bcm-voter.o
 obj-$(CONFIG_INTERCONNECT_QCOM_MSM8916) += qnoc-msm8916.o
+obj-$(CONFIG_INTERCONNECT_QCOM_MSM8939) += qnoc-msm8939.o
 obj-$(CONFIG_INTERCONNECT_QCOM_MSM8974) += qnoc-msm8974.o
 obj-$(CONFIG_INTERCONNECT_QCOM_OSM_L3) += icc-osm-l3.o
 obj-$(CONFIG_INTERCONNECT_QCOM_QCS404) += qnoc-qcs404.o
 obj-$(CONFIG_INTERCONNECT_QCOM_RPMH) += icc-rpmh.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SC7180) += qnoc-sc7180.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SDM845) += qnoc-sdm845.o
+obj-$(CONFIG_INTERCONNECT_QCOM_SDX55) += qnoc-sdx55.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SM8150) += qnoc-sm8150.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SM8250) += qnoc-sm8250.o
 obj-$(CONFIG_INTERCONNECT_QCOM_SMD_RPM) += icc-smd-rpm.o
diff --git a/drivers/interconnect/qcom/icc-rpm.c b/drivers/interconnect/qcom/icc-rpm.c
new file mode 100644 (file)
index 0000000..cc60954
--- /dev/null
@@ -0,0 +1,191 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Linaro Ltd
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/of_device.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/slab.h>
+
+#include "smd-rpm.h"
+#include "icc-rpm.h"
+
+static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
+{
+       struct qcom_icc_provider *qp;
+       struct qcom_icc_node *qn;
+       struct icc_provider *provider;
+       struct icc_node *n;
+       u64 sum_bw;
+       u64 max_peak_bw;
+       u64 rate;
+       u32 agg_avg = 0;
+       u32 agg_peak = 0;
+       int ret, i;
+
+       qn = src->data;
+       provider = src->provider;
+       qp = to_qcom_provider(provider);
+
+       list_for_each_entry(n, &provider->nodes, node_list)
+               provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
+                                   &agg_avg, &agg_peak);
+
+       sum_bw = icc_units_to_bps(agg_avg);
+       max_peak_bw = icc_units_to_bps(agg_peak);
+
+       /* send bandwidth request message to the RPM processor */
+       if (qn->mas_rpm_id != -1) {
+               ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
+                                           RPM_BUS_MASTER_REQ,
+                                           qn->mas_rpm_id,
+                                           sum_bw);
+               if (ret) {
+                       pr_err("qcom_icc_rpm_smd_send mas %d error %d\n",
+                              qn->mas_rpm_id, ret);
+                       return ret;
+               }
+       }
+
+       if (qn->slv_rpm_id != -1) {
+               ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
+                                           RPM_BUS_SLAVE_REQ,
+                                           qn->slv_rpm_id,
+                                           sum_bw);
+               if (ret) {
+                       pr_err("qcom_icc_rpm_smd_send slv error %d\n",
+                              ret);
+                       return ret;
+               }
+       }
+
+       rate = max(sum_bw, max_peak_bw);
+
+       do_div(rate, qn->buswidth);
+
+       if (qn->rate == rate)
+               return 0;
+
+       for (i = 0; i < qp->num_clks; i++) {
+               ret = clk_set_rate(qp->bus_clks[i].clk, rate);
+               if (ret) {
+                       pr_err("%s clk_set_rate error: %d\n",
+                              qp->bus_clks[i].id, ret);
+                       return ret;
+               }
+       }
+
+       qn->rate = rate;
+
+       return 0;
+}
+
+int qnoc_probe(struct platform_device *pdev, size_t cd_size, int cd_num,
+              const struct clk_bulk_data *cd)
+{
+       struct device *dev = &pdev->dev;
+       const struct qcom_icc_desc *desc;
+       struct icc_onecell_data *data;
+       struct icc_provider *provider;
+       struct qcom_icc_node **qnodes;
+       struct qcom_icc_provider *qp;
+       struct icc_node *node;
+       size_t num_nodes, i;
+       int ret;
+
+       /* wait for the RPM proxy */
+       if (!qcom_icc_rpm_smd_available())
+               return -EPROBE_DEFER;
+
+       desc = of_device_get_match_data(dev);
+       if (!desc)
+               return -EINVAL;
+
+       qnodes = desc->nodes;
+       num_nodes = desc->num_nodes;
+
+       qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
+       if (!qp)
+               return -ENOMEM;
+
+       data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
+                           GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       qp->bus_clks = devm_kmemdup(dev, cd, cd_size,
+                                   GFP_KERNEL);
+       if (!qp->bus_clks)
+               return -ENOMEM;
+
+       qp->num_clks = cd_num;
+       ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
+       if (ret)
+               return ret;
+
+       ret = clk_bulk_prepare_enable(qp->num_clks, qp->bus_clks);
+       if (ret)
+               return ret;
+
+       provider = &qp->provider;
+       INIT_LIST_HEAD(&provider->nodes);
+       provider->dev = dev;
+       provider->set = qcom_icc_set;
+       provider->aggregate = icc_std_aggregate;
+       provider->xlate = of_icc_xlate_onecell;
+       provider->data = data;
+
+       ret = icc_provider_add(provider);
+       if (ret) {
+               dev_err(dev, "error adding interconnect provider: %d\n", ret);
+               clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
+               return ret;
+       }
+
+       for (i = 0; i < num_nodes; i++) {
+               size_t j;
+
+               node = icc_node_create(qnodes[i]->id);
+               if (IS_ERR(node)) {
+                       ret = PTR_ERR(node);
+                       goto err;
+               }
+
+               node->name = qnodes[i]->name;
+               node->data = qnodes[i];
+               icc_node_add(node, provider);
+
+               for (j = 0; j < qnodes[i]->num_links; j++)
+                       icc_link_create(node, qnodes[i]->links[j]);
+
+               data->nodes[i] = node;
+       }
+       data->num_nodes = num_nodes;
+
+       platform_set_drvdata(pdev, qp);
+
+       return 0;
+err:
+       icc_nodes_remove(provider);
+       clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
+       icc_provider_del(provider);
+
+       return ret;
+}
+EXPORT_SYMBOL(qnoc_probe);
+
+int qnoc_remove(struct platform_device *pdev)
+{
+       struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+       icc_nodes_remove(&qp->provider);
+       clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
+       return icc_provider_del(&qp->provider);
+}
+EXPORT_SYMBOL(qnoc_remove);
diff --git a/drivers/interconnect/qcom/icc-rpm.h b/drivers/interconnect/qcom/icc-rpm.h
new file mode 100644 (file)
index 0000000..79a6f68
--- /dev/null
@@ -0,0 +1,73 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Linaro Ltd
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_ICC_RPM_H
+#define __DRIVERS_INTERCONNECT_QCOM_ICC_RPM_H
+
+#define RPM_BUS_MASTER_REQ     0x73616d62
+#define RPM_BUS_SLAVE_REQ      0x766c7362
+
+#define QCOM_MAX_LINKS 12
+
+#define to_qcom_provider(_provider) \
+       container_of(_provider, struct qcom_icc_provider, provider)
+
+/**
+ * struct qcom_icc_provider - Qualcomm specific interconnect provider
+ * @provider: generic interconnect provider
+ * @bus_clks: the clk_bulk_data table of bus clocks
+ * @num_clks: the total number of clk_bulk_data entries
+ */
+struct qcom_icc_provider {
+       struct icc_provider provider;
+       struct clk_bulk_data *bus_clks;
+       int num_clks;
+};
+
+/**
+ * struct qcom_icc_node - Qualcomm specific interconnect nodes
+ * @name: the node name used in debugfs
+ * @id: a unique node identifier
+ * @links: an array of nodes where we can go next while traversing
+ * @num_links: the total number of @links
+ * @buswidth: width of the interconnect between a node and the bus (bytes)
+ * @mas_rpm_id:        RPM id for devices that are bus masters
+ * @slv_rpm_id:        RPM id for devices that are bus slaves
+ * @rate: current bus clock rate in Hz
+ */
+struct qcom_icc_node {
+       unsigned char *name;
+       u16 id;
+       u16 links[QCOM_MAX_LINKS];
+       u16 num_links;
+       u16 buswidth;
+       int mas_rpm_id;
+       int slv_rpm_id;
+       u64 rate;
+};
+
+struct qcom_icc_desc {
+       struct qcom_icc_node **nodes;
+       size_t num_nodes;
+};
+
+#define DEFINE_QNODE(_name, _id, _buswidth, _mas_rpm_id, _slv_rpm_id,  \
+                    ...)                                               \
+               static struct qcom_icc_node _name = {                   \
+               .name = #_name,                                         \
+               .id = _id,                                              \
+               .buswidth = _buswidth,                                  \
+               .mas_rpm_id = _mas_rpm_id,                              \
+               .slv_rpm_id = _slv_rpm_id,                              \
+               .num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),      \
+               .links = { __VA_ARGS__ },                               \
+       }
+
+
+int qnoc_probe(struct platform_device *pdev, size_t cd_size, int cd_num,
+              const struct clk_bulk_data *cd);
+int qnoc_remove(struct platform_device *pdev);
+
+#endif
index e8371d4..fc3689c 100644 (file)
@@ -15,9 +15,7 @@
 #include <dt-bindings/interconnect/qcom,msm8916.h>
 
 #include "smd-rpm.h"
-
-#define RPM_BUS_MASTER_REQ      0x73616d62
-#define RPM_BUS_SLAVE_REQ       0x766c7362
+#include "icc-rpm.h"
 
 enum {
        MSM8916_BIMC_SNOC_MAS = 1,
@@ -107,67 +105,11 @@ enum {
        MSM8916_SNOC_PNOC_SLV,
 };
 
-#define to_msm8916_provider(_provider) \
-       container_of(_provider, struct msm8916_icc_provider, provider)
-
 static const struct clk_bulk_data msm8916_bus_clocks[] = {
        { .id = "bus" },
        { .id = "bus_a" },
 };
 
-/**
- * struct msm8916_icc_provider - Qualcomm specific interconnect provider
- * @provider: generic interconnect provider
- * @bus_clks: the clk_bulk_data table of bus clocks
- * @num_clks: the total number of clk_bulk_data entries
- */
-struct msm8916_icc_provider {
-       struct icc_provider provider;
-       struct clk_bulk_data *bus_clks;
-       int num_clks;
-};
-
-#define MSM8916_MAX_LINKS      8
-
-/**
- * struct msm8916_icc_node - Qualcomm specific interconnect nodes
- * @name: the node name used in debugfs
- * @id: a unique node identifier
- * @links: an array of nodes where we can go next while traversing
- * @num_links: the total number of @links
- * @buswidth: width of the interconnect between a node and the bus (bytes)
- * @mas_rpm_id:        RPM ID for devices that are bus masters
- * @slv_rpm_id:        RPM ID for devices that are bus slaves
- * @rate: current bus clock rate in Hz
- */
-struct msm8916_icc_node {
-       unsigned char *name;
-       u16 id;
-       u16 links[MSM8916_MAX_LINKS];
-       u16 num_links;
-       u16 buswidth;
-       int mas_rpm_id;
-       int slv_rpm_id;
-       u64 rate;
-};
-
-struct msm8916_icc_desc {
-       struct msm8916_icc_node **nodes;
-       size_t num_nodes;
-};
-
-#define DEFINE_QNODE(_name, _id, _buswidth, _mas_rpm_id, _slv_rpm_id,  \
-                                       ...)                            \
-               static struct msm8916_icc_node _name = {                \
-               .name = #_name,                                         \
-               .id = _id,                                              \
-               .buswidth = _buswidth,                                  \
-               .mas_rpm_id = _mas_rpm_id,                              \
-               .slv_rpm_id = _slv_rpm_id,                              \
-               .num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),      \
-               .links = { __VA_ARGS__ },                               \
-       }
-
 DEFINE_QNODE(bimc_snoc_mas, MSM8916_BIMC_SNOC_MAS, 8, -1, -1, MSM8916_BIMC_SNOC_SLV);
 DEFINE_QNODE(bimc_snoc_slv, MSM8916_BIMC_SNOC_SLV, 8, -1, -1, MSM8916_SNOC_INT_0, MSM8916_SNOC_INT_1);
 DEFINE_QNODE(mas_apss, MSM8916_MASTER_AMPSS_M0, 8, -1, -1, MSM8916_SLAVE_EBI_CH0, MSM8916_BIMC_SNOC_MAS, MSM8916_SLAVE_AMPSS_L2);
@@ -254,7 +196,7 @@ DEFINE_QNODE(snoc_int_bimc, MSM8916_SNOC_INT_BIMC, 8, 101, 132, MSM8916_SNOC_BIM
 DEFINE_QNODE(snoc_pcnoc_mas, MSM8916_SNOC_PNOC_MAS, 8, -1, -1, MSM8916_SNOC_PNOC_SLV);
 DEFINE_QNODE(snoc_pcnoc_slv, MSM8916_SNOC_PNOC_SLV, 8, -1, -1, MSM8916_PNOC_INT_0);
 
-static struct msm8916_icc_node *msm8916_snoc_nodes[] = {
+static struct qcom_icc_node *msm8916_snoc_nodes[] = {
        [BIMC_SNOC_SLV] = &bimc_snoc_slv,
        [MASTER_JPEG] = &mas_jpeg,
        [MASTER_MDP_PORT0] = &mas_mdp,
@@ -283,12 +225,12 @@ static struct msm8916_icc_node *msm8916_snoc_nodes[] = {
        [SNOC_QDSS_INT] = &qdss_int,
 };
 
-static struct msm8916_icc_desc msm8916_snoc = {
+static struct qcom_icc_desc msm8916_snoc = {
        .nodes = msm8916_snoc_nodes,
        .num_nodes = ARRAY_SIZE(msm8916_snoc_nodes),
 };
 
-static struct msm8916_icc_node *msm8916_bimc_nodes[] = {
+static struct qcom_icc_node *msm8916_bimc_nodes[] = {
        [BIMC_SNOC_MAS] = &bimc_snoc_mas,
        [MASTER_AMPSS_M0] = &mas_apss,
        [MASTER_GRAPHICS_3D] = &mas_gfx,
@@ -300,12 +242,12 @@ static struct msm8916_icc_node *msm8916_bimc_nodes[] = {
        [SNOC_BIMC_1_SLV] = &snoc_bimc_1_slv,
 };
 
-static struct msm8916_icc_desc msm8916_bimc = {
+static struct qcom_icc_desc msm8916_bimc = {
        .nodes = msm8916_bimc_nodes,
        .num_nodes = ARRAY_SIZE(msm8916_bimc_nodes),
 };
 
-static struct msm8916_icc_node *msm8916_pcnoc_nodes[] = {
+static struct qcom_icc_node *msm8916_pcnoc_nodes[] = {
        [MASTER_BLSP_1] = &mas_blsp_1,
        [MASTER_DEHR] = &mas_dehr,
        [MASTER_LPASS] = &mas_audio,
@@ -358,178 +300,15 @@ static struct msm8916_icc_node *msm8916_pcnoc_nodes[] = {
        [SNOC_PCNOC_SLV] = &snoc_pcnoc_slv,
 };
 
-static struct msm8916_icc_desc msm8916_pcnoc = {
+static struct qcom_icc_desc msm8916_pcnoc = {
        .nodes = msm8916_pcnoc_nodes,
        .num_nodes = ARRAY_SIZE(msm8916_pcnoc_nodes),
 };
 
-static int msm8916_icc_set(struct icc_node *src, struct icc_node *dst)
-{
-       struct msm8916_icc_provider *qp;
-       struct msm8916_icc_node *qn;
-       u64 sum_bw, max_peak_bw, rate;
-       u32 agg_avg = 0, agg_peak = 0;
-       struct icc_provider *provider;
-       struct icc_node *n;
-       int ret, i;
-
-       qn = src->data;
-       provider = src->provider;
-       qp = to_msm8916_provider(provider);
-
-       list_for_each_entry(n, &provider->nodes, node_list)
-               provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
-                                   &agg_avg, &agg_peak);
-
-       sum_bw = icc_units_to_bps(agg_avg);
-       max_peak_bw = icc_units_to_bps(agg_peak);
-
-       /* send bandwidth request message to the RPM processor */
-       if (qn->mas_rpm_id != -1) {
-               ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-                                           RPM_BUS_MASTER_REQ,
-                                           qn->mas_rpm_id,
-                                           sum_bw);
-               if (ret) {
-                       pr_err("qcom_icc_rpm_smd_send mas %d error %d\n",
-                              qn->mas_rpm_id, ret);
-                       return ret;
-               }
-       }
-
-       if (qn->slv_rpm_id != -1) {
-               ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-                                           RPM_BUS_SLAVE_REQ,
-                                           qn->slv_rpm_id,
-                                           sum_bw);
-               if (ret) {
-                       pr_err("qcom_icc_rpm_smd_send slv error %d\n",
-                              ret);
-                       return ret;
-               }
-       }
-
-       rate = max(sum_bw, max_peak_bw);
-
-       do_div(rate, qn->buswidth);
-
-       if (qn->rate == rate)
-               return 0;
-
-       for (i = 0; i < qp->num_clks; i++) {
-               ret = clk_set_rate(qp->bus_clks[i].clk, rate);
-               if (ret) {
-                       pr_err("%s clk_set_rate error: %d\n",
-                              qp->bus_clks[i].id, ret);
-                       return ret;
-               }
-       }
-
-       qn->rate = rate;
-
-       return 0;
-}
-
 static int msm8916_qnoc_probe(struct platform_device *pdev)
 {
-       const struct msm8916_icc_desc *desc;
-       struct msm8916_icc_node **qnodes;
-       struct msm8916_icc_provider *qp;
-       struct device *dev = &pdev->dev;
-       struct icc_onecell_data *data;
-       struct icc_provider *provider;
-       struct icc_node *node;
-       size_t num_nodes, i;
-       int ret;
-
-       /* wait for the RPM proxy */
-       if (!qcom_icc_rpm_smd_available())
-               return -EPROBE_DEFER;
-
-       desc = of_device_get_match_data(dev);
-       if (!desc)
-               return -EINVAL;
-
-       qnodes = desc->nodes;
-       num_nodes = desc->num_nodes;
-
-       qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
-       if (!qp)
-               return -ENOMEM;
-
-       data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
-                           GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       qp->bus_clks = devm_kmemdup(dev, msm8916_bus_clocks,
-                                   sizeof(msm8916_bus_clocks), GFP_KERNEL);
-       if (!qp->bus_clks)
-               return -ENOMEM;
-
-       qp->num_clks = ARRAY_SIZE(msm8916_bus_clocks);
-       ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
-       if (ret)
-               return ret;
-
-       ret = clk_bulk_prepare_enable(qp->num_clks, qp->bus_clks);
-       if (ret)
-               return ret;
-
-       provider = &qp->provider;
-       INIT_LIST_HEAD(&provider->nodes);
-       provider->dev = dev;
-       provider->set = msm8916_icc_set;
-       provider->aggregate = icc_std_aggregate;
-       provider->xlate = of_icc_xlate_onecell;
-       provider->data = data;
-
-       ret = icc_provider_add(provider);
-       if (ret) {
-               dev_err(dev, "error adding interconnect provider: %d\n", ret);
-               clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-               return ret;
-       }
-
-       for (i = 0; i < num_nodes; i++) {
-               size_t j;
-
-               node = icc_node_create(qnodes[i]->id);
-               if (IS_ERR(node)) {
-                       ret = PTR_ERR(node);
-                       goto err;
-               }
-
-               node->name = qnodes[i]->name;
-               node->data = qnodes[i];
-               icc_node_add(node, provider);
-
-               for (j = 0; j < qnodes[i]->num_links; j++)
-                       icc_link_create(node, qnodes[i]->links[j]);
-
-               data->nodes[i] = node;
-       }
-       data->num_nodes = num_nodes;
-
-       platform_set_drvdata(pdev, qp);
-
-       return 0;
-
-err:
-       icc_nodes_remove(provider);
-       icc_provider_del(provider);
-       clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-
-       return ret;
-}
-
-static int msm8916_qnoc_remove(struct platform_device *pdev)
-{
-       struct msm8916_icc_provider *qp = platform_get_drvdata(pdev);
-
-       icc_nodes_remove(&qp->provider);
-       clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-       return icc_provider_del(&qp->provider);
+       return qnoc_probe(pdev, sizeof(msm8916_bus_clocks),
+                         ARRAY_SIZE(msm8916_bus_clocks), msm8916_bus_clocks);
 }
 
 static const struct of_device_id msm8916_noc_of_match[] = {
@@ -542,7 +321,7 @@ MODULE_DEVICE_TABLE(of, msm8916_noc_of_match);
 
 static struct platform_driver msm8916_noc_driver = {
        .probe = msm8916_qnoc_probe,
-       .remove = msm8916_qnoc_remove,
+       .remove = qnoc_remove,
        .driver = {
                .name = "qnoc-msm8916",
                .of_match_table = msm8916_noc_of_match,
diff --git a/drivers/interconnect/qcom/msm8939.c b/drivers/interconnect/qcom/msm8939.c
new file mode 100644 (file)
index 0000000..20f31a1
--- /dev/null
@@ -0,0 +1,355 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2020 Linaro Ltd
+ * Author: Jun Nie <jun.nie@linaro.org>
+ * With reference of msm8916 interconnect driver of Georgi Djakov.
+ */
+
+#include <linux/clk.h>
+#include <linux/device.h>
+#include <linux/interconnect-provider.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/of_device.h>
+
+#include <dt-bindings/interconnect/qcom,msm8939.h>
+
+#include "smd-rpm.h"
+#include "icc-rpm.h"
+
+enum {
+       MSM8939_BIMC_SNOC_MAS = 1,
+       MSM8939_BIMC_SNOC_SLV,
+       MSM8939_MASTER_AMPSS_M0,
+       MSM8939_MASTER_LPASS,
+       MSM8939_MASTER_BLSP_1,
+       MSM8939_MASTER_DEHR,
+       MSM8939_MASTER_GRAPHICS_3D,
+       MSM8939_MASTER_JPEG,
+       MSM8939_MASTER_MDP_PORT0,
+       MSM8939_MASTER_MDP_PORT1,
+       MSM8939_MASTER_CPP,
+       MSM8939_MASTER_CRYPTO_CORE0,
+       MSM8939_MASTER_SDCC_1,
+       MSM8939_MASTER_SDCC_2,
+       MSM8939_MASTER_QDSS_BAM,
+       MSM8939_MASTER_QDSS_ETR,
+       MSM8939_MASTER_SNOC_CFG,
+       MSM8939_MASTER_SPDM,
+       MSM8939_MASTER_TCU0,
+       MSM8939_MASTER_USB_HS1,
+       MSM8939_MASTER_USB_HS2,
+       MSM8939_MASTER_VFE,
+       MSM8939_MASTER_VIDEO_P0,
+       MSM8939_SNOC_MM_INT_0,
+       MSM8939_SNOC_MM_INT_1,
+       MSM8939_SNOC_MM_INT_2,
+       MSM8939_PNOC_INT_0,
+       MSM8939_PNOC_INT_1,
+       MSM8939_PNOC_MAS_0,
+       MSM8939_PNOC_MAS_1,
+       MSM8939_PNOC_SLV_0,
+       MSM8939_PNOC_SLV_1,
+       MSM8939_PNOC_SLV_2,
+       MSM8939_PNOC_SLV_3,
+       MSM8939_PNOC_SLV_4,
+       MSM8939_PNOC_SLV_8,
+       MSM8939_PNOC_SLV_9,
+       MSM8939_PNOC_SNOC_MAS,
+       MSM8939_PNOC_SNOC_SLV,
+       MSM8939_SNOC_QDSS_INT,
+       MSM8939_SLAVE_AMPSS_L2,
+       MSM8939_SLAVE_APSS,
+       MSM8939_SLAVE_LPASS,
+       MSM8939_SLAVE_BIMC_CFG,
+       MSM8939_SLAVE_BLSP_1,
+       MSM8939_SLAVE_BOOT_ROM,
+       MSM8939_SLAVE_CAMERA_CFG,
+       MSM8939_SLAVE_CATS_128,
+       MSM8939_SLAVE_OCMEM_64,
+       MSM8939_SLAVE_CLK_CTL,
+       MSM8939_SLAVE_CRYPTO_0_CFG,
+       MSM8939_SLAVE_DEHR_CFG,
+       MSM8939_SLAVE_DISPLAY_CFG,
+       MSM8939_SLAVE_EBI_CH0,
+       MSM8939_SLAVE_GRAPHICS_3D_CFG,
+       MSM8939_SLAVE_IMEM_CFG,
+       MSM8939_SLAVE_IMEM,
+       MSM8939_SLAVE_MPM,
+       MSM8939_SLAVE_MSG_RAM,
+       MSM8939_SLAVE_MSS,
+       MSM8939_SLAVE_PDM,
+       MSM8939_SLAVE_PMIC_ARB,
+       MSM8939_SLAVE_PNOC_CFG,
+       MSM8939_SLAVE_PRNG,
+       MSM8939_SLAVE_QDSS_CFG,
+       MSM8939_SLAVE_QDSS_STM,
+       MSM8939_SLAVE_RBCPR_CFG,
+       MSM8939_SLAVE_SDCC_1,
+       MSM8939_SLAVE_SDCC_2,
+       MSM8939_SLAVE_SECURITY,
+       MSM8939_SLAVE_SNOC_CFG,
+       MSM8939_SLAVE_SPDM,
+       MSM8939_SLAVE_SRVC_SNOC,
+       MSM8939_SLAVE_TCSR,
+       MSM8939_SLAVE_TLMM,
+       MSM8939_SLAVE_USB_HS1,
+       MSM8939_SLAVE_USB_HS2,
+       MSM8939_SLAVE_VENUS_CFG,
+       MSM8939_SNOC_BIMC_0_MAS,
+       MSM8939_SNOC_BIMC_0_SLV,
+       MSM8939_SNOC_BIMC_1_MAS,
+       MSM8939_SNOC_BIMC_1_SLV,
+       MSM8939_SNOC_BIMC_2_MAS,
+       MSM8939_SNOC_BIMC_2_SLV,
+       MSM8939_SNOC_INT_0,
+       MSM8939_SNOC_INT_1,
+       MSM8939_SNOC_INT_BIMC,
+       MSM8939_SNOC_PNOC_MAS,
+       MSM8939_SNOC_PNOC_SLV,
+};
+
+static const struct clk_bulk_data msm8939_bus_clocks[] = {
+       { .id = "bus" },
+       { .id = "bus_a" },
+};
+
+DEFINE_QNODE(bimc_snoc_mas, MSM8939_BIMC_SNOC_MAS, 8, -1, -1, MSM8939_BIMC_SNOC_SLV);
+DEFINE_QNODE(bimc_snoc_slv, MSM8939_BIMC_SNOC_SLV, 16, -1, 2, MSM8939_SNOC_INT_0, MSM8939_SNOC_INT_1);
+DEFINE_QNODE(mas_apss, MSM8939_MASTER_AMPSS_M0, 16, -1, -1, MSM8939_SLAVE_EBI_CH0, MSM8939_BIMC_SNOC_MAS, MSM8939_SLAVE_AMPSS_L2);
+DEFINE_QNODE(mas_audio, MSM8939_MASTER_LPASS, 4, -1, -1, MSM8939_PNOC_MAS_0);
+DEFINE_QNODE(mas_blsp_1, MSM8939_MASTER_BLSP_1, 4, -1, -1, MSM8939_PNOC_MAS_1);
+DEFINE_QNODE(mas_dehr, MSM8939_MASTER_DEHR, 4, -1, -1, MSM8939_PNOC_MAS_0);
+DEFINE_QNODE(mas_gfx, MSM8939_MASTER_GRAPHICS_3D, 16, -1, -1, MSM8939_SLAVE_EBI_CH0, MSM8939_BIMC_SNOC_MAS, MSM8939_SLAVE_AMPSS_L2);
+DEFINE_QNODE(mas_jpeg, MSM8939_MASTER_JPEG, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_mdp0, MSM8939_MASTER_MDP_PORT0, 16, -1, -1, MSM8939_SNOC_MM_INT_1, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_mdp1, MSM8939_MASTER_MDP_PORT1, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_cpp, MSM8939_MASTER_CPP, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_pcnoc_crypto_0, MSM8939_MASTER_CRYPTO_CORE0, 8, -1, -1, MSM8939_PNOC_INT_1);
+DEFINE_QNODE(mas_pcnoc_sdcc_1, MSM8939_MASTER_SDCC_1, 8, -1, -1, MSM8939_PNOC_INT_1);
+DEFINE_QNODE(mas_pcnoc_sdcc_2, MSM8939_MASTER_SDCC_2, 8, -1, -1, MSM8939_PNOC_INT_1);
+DEFINE_QNODE(mas_qdss_bam, MSM8939_MASTER_QDSS_BAM, 8, -1, -1, MSM8939_SNOC_QDSS_INT);
+DEFINE_QNODE(mas_qdss_etr, MSM8939_MASTER_QDSS_ETR, 8, -1, -1, MSM8939_SNOC_QDSS_INT);
+DEFINE_QNODE(mas_snoc_cfg, MSM8939_MASTER_SNOC_CFG, 4, -1, -1, MSM8939_SLAVE_SRVC_SNOC);
+DEFINE_QNODE(mas_spdm, MSM8939_MASTER_SPDM, 4, -1, -1, MSM8939_PNOC_MAS_0);
+DEFINE_QNODE(mas_tcu0, MSM8939_MASTER_TCU0, 16, -1, -1, MSM8939_SLAVE_EBI_CH0, MSM8939_BIMC_SNOC_MAS, MSM8939_SLAVE_AMPSS_L2);
+DEFINE_QNODE(mas_usb_hs1, MSM8939_MASTER_USB_HS1, 4, -1, -1, MSM8939_PNOC_MAS_1);
+DEFINE_QNODE(mas_usb_hs2, MSM8939_MASTER_USB_HS2, 4, -1, -1, MSM8939_PNOC_MAS_1);
+DEFINE_QNODE(mas_vfe, MSM8939_MASTER_VFE, 16, -1, -1, MSM8939_SNOC_MM_INT_1, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mas_video, MSM8939_MASTER_VIDEO_P0, 16, -1, -1, MSM8939_SNOC_MM_INT_0, MSM8939_SNOC_MM_INT_2);
+DEFINE_QNODE(mm_int_0, MSM8939_SNOC_MM_INT_0, 16, -1, -1, MSM8939_SNOC_BIMC_2_MAS);
+DEFINE_QNODE(mm_int_1, MSM8939_SNOC_MM_INT_1, 16, -1, -1, MSM8939_SNOC_BIMC_1_MAS);
+DEFINE_QNODE(mm_int_2, MSM8939_SNOC_MM_INT_2, 16, -1, -1, MSM8939_SNOC_INT_0);
+DEFINE_QNODE(pcnoc_int_0, MSM8939_PNOC_INT_0, 8, -1, -1, MSM8939_PNOC_SNOC_MAS, MSM8939_PNOC_SLV_0, MSM8939_PNOC_SLV_1, MSM8939_PNOC_SLV_2, MSM8939_PNOC_SLV_3, MSM8939_PNOC_SLV_4, MSM8939_PNOC_SLV_8, MSM8939_PNOC_SLV_9);
+DEFINE_QNODE(pcnoc_int_1, MSM8939_PNOC_INT_1, 8, -1, -1, MSM8939_PNOC_SNOC_MAS);
+DEFINE_QNODE(pcnoc_m_0, MSM8939_PNOC_MAS_0, 8, -1, -1, MSM8939_PNOC_INT_0);
+DEFINE_QNODE(pcnoc_m_1, MSM8939_PNOC_MAS_1, 8, -1, -1, MSM8939_PNOC_SNOC_MAS);
+DEFINE_QNODE(pcnoc_s_0, MSM8939_PNOC_SLV_0, 4, -1, -1, MSM8939_SLAVE_CLK_CTL, MSM8939_SLAVE_TLMM, MSM8939_SLAVE_TCSR, MSM8939_SLAVE_SECURITY, MSM8939_SLAVE_MSS);
+DEFINE_QNODE(pcnoc_s_1, MSM8939_PNOC_SLV_1, 4, -1, -1, MSM8939_SLAVE_IMEM_CFG, MSM8939_SLAVE_CRYPTO_0_CFG, MSM8939_SLAVE_MSG_RAM, MSM8939_SLAVE_PDM, MSM8939_SLAVE_PRNG);
+DEFINE_QNODE(pcnoc_s_2, MSM8939_PNOC_SLV_2, 4, -1, -1, MSM8939_SLAVE_SPDM, MSM8939_SLAVE_BOOT_ROM, MSM8939_SLAVE_BIMC_CFG, MSM8939_SLAVE_PNOC_CFG, MSM8939_SLAVE_PMIC_ARB);
+DEFINE_QNODE(pcnoc_s_3, MSM8939_PNOC_SLV_3, 4, -1, -1, MSM8939_SLAVE_MPM, MSM8939_SLAVE_SNOC_CFG, MSM8939_SLAVE_RBCPR_CFG, MSM8939_SLAVE_QDSS_CFG, MSM8939_SLAVE_DEHR_CFG);
+DEFINE_QNODE(pcnoc_s_4, MSM8939_PNOC_SLV_4, 4, -1, -1, MSM8939_SLAVE_VENUS_CFG, MSM8939_SLAVE_CAMERA_CFG, MSM8939_SLAVE_DISPLAY_CFG);
+DEFINE_QNODE(pcnoc_s_8, MSM8939_PNOC_SLV_8, 4, -1, -1, MSM8939_SLAVE_USB_HS1, MSM8939_SLAVE_SDCC_1, MSM8939_SLAVE_BLSP_1);
+DEFINE_QNODE(pcnoc_s_9, MSM8939_PNOC_SLV_9, 4, -1, -1, MSM8939_SLAVE_SDCC_2, MSM8939_SLAVE_LPASS, MSM8939_SLAVE_USB_HS2);
+DEFINE_QNODE(pcnoc_snoc_mas, MSM8939_PNOC_SNOC_MAS, 8, 29, -1, MSM8939_PNOC_SNOC_SLV);
+DEFINE_QNODE(pcnoc_snoc_slv, MSM8939_PNOC_SNOC_SLV, 8, -1, 45, MSM8939_SNOC_INT_0, MSM8939_SNOC_INT_BIMC, MSM8939_SNOC_INT_1);
+DEFINE_QNODE(qdss_int, MSM8939_SNOC_QDSS_INT, 8, -1, -1, MSM8939_SNOC_INT_0, MSM8939_SNOC_INT_BIMC);
+DEFINE_QNODE(slv_apps_l2, MSM8939_SLAVE_AMPSS_L2, 16, -1, -1, 0);
+DEFINE_QNODE(slv_apss, MSM8939_SLAVE_APSS, 4, -1, -1, 0);
+DEFINE_QNODE(slv_audio, MSM8939_SLAVE_LPASS, 4, -1, -1, 0);
+DEFINE_QNODE(slv_bimc_cfg, MSM8939_SLAVE_BIMC_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_blsp_1, MSM8939_SLAVE_BLSP_1, 4, -1, -1, 0);
+DEFINE_QNODE(slv_boot_rom, MSM8939_SLAVE_BOOT_ROM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_camera_cfg, MSM8939_SLAVE_CAMERA_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_cats_0, MSM8939_SLAVE_CATS_128, 16, -1, -1, 0);
+DEFINE_QNODE(slv_cats_1, MSM8939_SLAVE_OCMEM_64, 8, -1, -1, 0);
+DEFINE_QNODE(slv_clk_ctl, MSM8939_SLAVE_CLK_CTL, 4, -1, -1, 0);
+DEFINE_QNODE(slv_crypto_0_cfg, MSM8939_SLAVE_CRYPTO_0_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_dehr_cfg, MSM8939_SLAVE_DEHR_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_display_cfg, MSM8939_SLAVE_DISPLAY_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_ebi_ch0, MSM8939_SLAVE_EBI_CH0, 16, -1, 0, 0);
+DEFINE_QNODE(slv_gfx_cfg, MSM8939_SLAVE_GRAPHICS_3D_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_imem_cfg, MSM8939_SLAVE_IMEM_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_imem, MSM8939_SLAVE_IMEM, 8, -1, 26, 0);
+DEFINE_QNODE(slv_mpm, MSM8939_SLAVE_MPM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_msg_ram, MSM8939_SLAVE_MSG_RAM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_mss, MSM8939_SLAVE_MSS, 4, -1, -1, 0);
+DEFINE_QNODE(slv_pdm, MSM8939_SLAVE_PDM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_pmic_arb, MSM8939_SLAVE_PMIC_ARB, 4, -1, -1, 0);
+DEFINE_QNODE(slv_pcnoc_cfg, MSM8939_SLAVE_PNOC_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_prng, MSM8939_SLAVE_PRNG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_qdss_cfg, MSM8939_SLAVE_QDSS_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_qdss_stm, MSM8939_SLAVE_QDSS_STM, 4, -1, 30, 0);
+DEFINE_QNODE(slv_rbcpr_cfg, MSM8939_SLAVE_RBCPR_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_sdcc_1, MSM8939_SLAVE_SDCC_1, 4, -1, -1, 0);
+DEFINE_QNODE(slv_sdcc_2, MSM8939_SLAVE_SDCC_2, 4, -1, -1, 0);
+DEFINE_QNODE(slv_security, MSM8939_SLAVE_SECURITY, 4, -1, -1, 0);
+DEFINE_QNODE(slv_snoc_cfg, MSM8939_SLAVE_SNOC_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(slv_spdm, MSM8939_SLAVE_SPDM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_srvc_snoc, MSM8939_SLAVE_SRVC_SNOC, 8, -1, -1, 0);
+DEFINE_QNODE(slv_tcsr, MSM8939_SLAVE_TCSR, 4, -1, -1, 0);
+DEFINE_QNODE(slv_tlmm, MSM8939_SLAVE_TLMM, 4, -1, -1, 0);
+DEFINE_QNODE(slv_usb_hs1, MSM8939_SLAVE_USB_HS1, 4, -1, -1, 0);
+DEFINE_QNODE(slv_usb_hs2, MSM8939_SLAVE_USB_HS2, 4, -1, -1, 0);
+DEFINE_QNODE(slv_venus_cfg, MSM8939_SLAVE_VENUS_CFG, 4, -1, -1, 0);
+DEFINE_QNODE(snoc_bimc_0_mas, MSM8939_SNOC_BIMC_0_MAS, 16, -1, -1, MSM8939_SNOC_BIMC_0_SLV);
+DEFINE_QNODE(snoc_bimc_0_slv, MSM8939_SNOC_BIMC_0_SLV, 16, -1, -1, MSM8939_SLAVE_EBI_CH0);
+DEFINE_QNODE(snoc_bimc_1_mas, MSM8939_SNOC_BIMC_1_MAS, 16, 76, -1, MSM8939_SNOC_BIMC_1_SLV);
+DEFINE_QNODE(snoc_bimc_1_slv, MSM8939_SNOC_BIMC_1_SLV, 16, -1, 104, MSM8939_SLAVE_EBI_CH0);
+DEFINE_QNODE(snoc_bimc_2_mas, MSM8939_SNOC_BIMC_2_MAS, 16, -1, -1, MSM8939_SNOC_BIMC_2_SLV);
+DEFINE_QNODE(snoc_bimc_2_slv, MSM8939_SNOC_BIMC_2_SLV, 16, -1, -1, MSM8939_SLAVE_EBI_CH0);
+DEFINE_QNODE(snoc_int_0, MSM8939_SNOC_INT_0, 8, 99, 130, MSM8939_SLAVE_QDSS_STM, MSM8939_SLAVE_IMEM, MSM8939_SNOC_PNOC_MAS);
+DEFINE_QNODE(snoc_int_1, MSM8939_SNOC_INT_1, 8, -1, -1, MSM8939_SLAVE_APSS, MSM8939_SLAVE_CATS_128, MSM8939_SLAVE_OCMEM_64);
+DEFINE_QNODE(snoc_int_bimc, MSM8939_SNOC_INT_BIMC, 8, 101, 132, MSM8939_SNOC_BIMC_1_MAS);
+DEFINE_QNODE(snoc_pcnoc_mas, MSM8939_SNOC_PNOC_MAS, 8, -1, -1, MSM8939_SNOC_PNOC_SLV);
+DEFINE_QNODE(snoc_pcnoc_slv, MSM8939_SNOC_PNOC_SLV, 8, -1, -1, MSM8939_PNOC_INT_0);
+
+static struct qcom_icc_node *msm8939_snoc_nodes[] = {
+       [BIMC_SNOC_SLV] = &bimc_snoc_slv,
+       [MASTER_QDSS_BAM] = &mas_qdss_bam,
+       [MASTER_QDSS_ETR] = &mas_qdss_etr,
+       [MASTER_SNOC_CFG] = &mas_snoc_cfg,
+       [PCNOC_SNOC_SLV] = &pcnoc_snoc_slv,
+       [SLAVE_APSS] = &slv_apss,
+       [SLAVE_CATS_128] = &slv_cats_0,
+       [SLAVE_OCMEM_64] = &slv_cats_1,
+       [SLAVE_IMEM] = &slv_imem,
+       [SLAVE_QDSS_STM] = &slv_qdss_stm,
+       [SLAVE_SRVC_SNOC] = &slv_srvc_snoc,
+       [SNOC_BIMC_0_MAS] = &snoc_bimc_0_mas,
+       [SNOC_BIMC_1_MAS] = &snoc_bimc_1_mas,
+       [SNOC_BIMC_2_MAS] = &snoc_bimc_2_mas,
+       [SNOC_INT_0] = &snoc_int_0,
+       [SNOC_INT_1] = &snoc_int_1,
+       [SNOC_INT_BIMC] = &snoc_int_bimc,
+       [SNOC_PCNOC_MAS] = &snoc_pcnoc_mas,
+       [SNOC_QDSS_INT] = &qdss_int,
+};
+
+static struct qcom_icc_desc msm8939_snoc = {
+       .nodes = msm8939_snoc_nodes,
+       .num_nodes = ARRAY_SIZE(msm8939_snoc_nodes),
+};
+
+static struct qcom_icc_node *msm8939_snoc_mm_nodes[] = {
+       [MASTER_VIDEO_P0] = &mas_video,
+       [MASTER_JPEG] = &mas_jpeg,
+       [MASTER_VFE] = &mas_vfe,
+       [MASTER_MDP_PORT0] = &mas_mdp0,
+       [MASTER_MDP_PORT1] = &mas_mdp1,
+       [MASTER_CPP] = &mas_cpp,
+       [SNOC_MM_INT_0] = &mm_int_0,
+       [SNOC_MM_INT_1] = &mm_int_1,
+       [SNOC_MM_INT_2] = &mm_int_2,
+};
+
+static struct qcom_icc_desc msm8939_snoc_mm = {
+       .nodes = msm8939_snoc_mm_nodes,
+       .num_nodes = ARRAY_SIZE(msm8939_snoc_mm_nodes),
+};
+
+static struct qcom_icc_node *msm8939_bimc_nodes[] = {
+       [BIMC_SNOC_MAS] = &bimc_snoc_mas,
+       [MASTER_AMPSS_M0] = &mas_apss,
+       [MASTER_GRAPHICS_3D] = &mas_gfx,
+       [MASTER_TCU0] = &mas_tcu0,
+       [SLAVE_AMPSS_L2] = &slv_apps_l2,
+       [SLAVE_EBI_CH0] = &slv_ebi_ch0,
+       [SNOC_BIMC_0_SLV] = &snoc_bimc_0_slv,
+       [SNOC_BIMC_1_SLV] = &snoc_bimc_1_slv,
+       [SNOC_BIMC_2_SLV] = &snoc_bimc_2_slv,
+};
+
+static struct qcom_icc_desc msm8939_bimc = {
+       .nodes = msm8939_bimc_nodes,
+       .num_nodes = ARRAY_SIZE(msm8939_bimc_nodes),
+};
+
+static struct qcom_icc_node *msm8939_pcnoc_nodes[] = {
+       [MASTER_BLSP_1] = &mas_blsp_1,
+       [MASTER_DEHR] = &mas_dehr,
+       [MASTER_LPASS] = &mas_audio,
+       [MASTER_CRYPTO_CORE0] = &mas_pcnoc_crypto_0,
+       [MASTER_SDCC_1] = &mas_pcnoc_sdcc_1,
+       [MASTER_SDCC_2] = &mas_pcnoc_sdcc_2,
+       [MASTER_SPDM] = &mas_spdm,
+       [MASTER_USB_HS1] = &mas_usb_hs1,
+       [MASTER_USB_HS2] = &mas_usb_hs2,
+       [PCNOC_INT_0] = &pcnoc_int_0,
+       [PCNOC_INT_1] = &pcnoc_int_1,
+       [PCNOC_MAS_0] = &pcnoc_m_0,
+       [PCNOC_MAS_1] = &pcnoc_m_1,
+       [PCNOC_SLV_0] = &pcnoc_s_0,
+       [PCNOC_SLV_1] = &pcnoc_s_1,
+       [PCNOC_SLV_2] = &pcnoc_s_2,
+       [PCNOC_SLV_3] = &pcnoc_s_3,
+       [PCNOC_SLV_4] = &pcnoc_s_4,
+       [PCNOC_SLV_8] = &pcnoc_s_8,
+       [PCNOC_SLV_9] = &pcnoc_s_9,
+       [PCNOC_SNOC_MAS] = &pcnoc_snoc_mas,
+       [SLAVE_BIMC_CFG] = &slv_bimc_cfg,
+       [SLAVE_BLSP_1] = &slv_blsp_1,
+       [SLAVE_BOOT_ROM] = &slv_boot_rom,
+       [SLAVE_CAMERA_CFG] = &slv_camera_cfg,
+       [SLAVE_CLK_CTL] = &slv_clk_ctl,
+       [SLAVE_CRYPTO_0_CFG] = &slv_crypto_0_cfg,
+       [SLAVE_DEHR_CFG] = &slv_dehr_cfg,
+       [SLAVE_DISPLAY_CFG] = &slv_display_cfg,
+       [SLAVE_GRAPHICS_3D_CFG] = &slv_gfx_cfg,
+       [SLAVE_IMEM_CFG] = &slv_imem_cfg,
+       [SLAVE_LPASS] = &slv_audio,
+       [SLAVE_MPM] = &slv_mpm,
+       [SLAVE_MSG_RAM] = &slv_msg_ram,
+       [SLAVE_MSS] = &slv_mss,
+       [SLAVE_PDM] = &slv_pdm,
+       [SLAVE_PMIC_ARB] = &slv_pmic_arb,
+       [SLAVE_PCNOC_CFG] = &slv_pcnoc_cfg,
+       [SLAVE_PRNG] = &slv_prng,
+       [SLAVE_QDSS_CFG] = &slv_qdss_cfg,
+       [SLAVE_RBCPR_CFG] = &slv_rbcpr_cfg,
+       [SLAVE_SDCC_1] = &slv_sdcc_1,
+       [SLAVE_SDCC_2] = &slv_sdcc_2,
+       [SLAVE_SECURITY] = &slv_security,
+       [SLAVE_SNOC_CFG] = &slv_snoc_cfg,
+       [SLAVE_SPDM] = &slv_spdm,
+       [SLAVE_TCSR] = &slv_tcsr,
+       [SLAVE_TLMM] = &slv_tlmm,
+       [SLAVE_USB_HS1] = &slv_usb_hs1,
+       [SLAVE_USB_HS2] = &slv_usb_hs2,
+       [SLAVE_VENUS_CFG] = &slv_venus_cfg,
+       [SNOC_PCNOC_SLV] = &snoc_pcnoc_slv,
+};
+
+static struct qcom_icc_desc msm8939_pcnoc = {
+       .nodes = msm8939_pcnoc_nodes,
+       .num_nodes = ARRAY_SIZE(msm8939_pcnoc_nodes),
+};
+
+static int msm8939_qnoc_probe(struct platform_device *pdev)
+{
+       return qnoc_probe(pdev, sizeof(msm8939_bus_clocks),
+                         ARRAY_SIZE(msm8939_bus_clocks), msm8939_bus_clocks);
+}
+
+static const struct of_device_id msm8939_noc_of_match[] = {
+       { .compatible = "qcom,msm8939-bimc", .data = &msm8939_bimc },
+       { .compatible = "qcom,msm8939-pcnoc", .data = &msm8939_pcnoc },
+       { .compatible = "qcom,msm8939-snoc", .data = &msm8939_snoc },
+       { .compatible = "qcom,msm8939-snoc-mm", .data = &msm8939_snoc_mm },
+       { }
+};
+MODULE_DEVICE_TABLE(of, msm8939_noc_of_match);
+
+static struct platform_driver msm8939_noc_driver = {
+       .probe = msm8939_qnoc_probe,
+       .remove = qnoc_remove,
+       .driver = {
+               .name = "qnoc-msm8939",
+               .of_match_table = msm8939_noc_of_match,
+       },
+};
+module_platform_driver(msm8939_noc_driver);
+MODULE_AUTHOR("Jun Nie <jun.nie@linaro.org>");
+MODULE_DESCRIPTION("Qualcomm MSM8939 NoC driver");
+MODULE_LICENSE("GPL v2");
index 9820709..36a7e30 100644 (file)
@@ -9,15 +9,12 @@
 #include <linux/interconnect-provider.h>
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/of_device.h>
-#include <linux/of_platform.h>
 #include <linux/platform_device.h>
-#include <linux/slab.h>
+#include <linux/of_device.h>
 
-#include "smd-rpm.h"
 
-#define RPM_BUS_MASTER_REQ     0x73616d62
-#define RPM_BUS_SLAVE_REQ      0x766c7362
+#include "smd-rpm.h"
+#include "icc-rpm.h"
 
 enum {
        QCS404_MASTER_AMPSS_M0 = 1,
@@ -95,67 +92,11 @@ enum {
        QCS404_SLAVE_LPASS,
 };
 
-#define to_qcom_provider(_provider) \
-       container_of(_provider, struct qcom_icc_provider, provider)
-
-static const struct clk_bulk_data bus_clocks[] = {
+static const struct clk_bulk_data qcs404_bus_clocks[] = {
        { .id = "bus" },
        { .id = "bus_a" },
 };
 
-/**
- * struct qcom_icc_provider - Qualcomm specific interconnect provider
- * @provider: generic interconnect provider
- * @bus_clks: the clk_bulk_data table of bus clocks
- * @num_clks: the total number of clk_bulk_data entries
- */
-struct qcom_icc_provider {
-       struct icc_provider provider;
-       struct clk_bulk_data *bus_clks;
-       int num_clks;
-};
-
-#define QCS404_MAX_LINKS       12
-
-/**
- * struct qcom_icc_node - Qualcomm specific interconnect nodes
- * @name: the node name used in debugfs
- * @id: a unique node identifier
- * @links: an array of nodes where we can go next while traversing
- * @num_links: the total number of @links
- * @buswidth: width of the interconnect between a node and the bus (bytes)
- * @mas_rpm_id:        RPM id for devices that are bus masters
- * @slv_rpm_id:        RPM id for devices that are bus slaves
- * @rate: current bus clock rate in Hz
- */
-struct qcom_icc_node {
-       unsigned char *name;
-       u16 id;
-       u16 links[QCS404_MAX_LINKS];
-       u16 num_links;
-       u16 buswidth;
-       int mas_rpm_id;
-       int slv_rpm_id;
-       u64 rate;
-};
-
-struct qcom_icc_desc {
-       struct qcom_icc_node **nodes;
-       size_t num_nodes;
-};
-
-#define DEFINE_QNODE(_name, _id, _buswidth, _mas_rpm_id, _slv_rpm_id,  \
-                    ...)                                               \
-               static struct qcom_icc_node _name = {                   \
-               .name = #_name,                                         \
-               .id = _id,                                              \
-               .buswidth = _buswidth,                                  \
-               .mas_rpm_id = _mas_rpm_id,                              \
-               .slv_rpm_id = _slv_rpm_id,                              \
-               .num_links = ARRAY_SIZE(((int[]){ __VA_ARGS__ })),      \
-               .links = { __VA_ARGS__ },                               \
-       }
-
 DEFINE_QNODE(mas_apps_proc, QCS404_MASTER_AMPSS_M0, 8, 0, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
 DEFINE_QNODE(mas_oxili, QCS404_MASTER_GRAPHICS_3D, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
 DEFINE_QNODE(mas_mdp, QCS404_MASTER_MDP_PORT0, 8, -1, -1, QCS404_SLAVE_EBI_CH0, QCS404_BIMC_SNOC_SLV);
@@ -327,178 +268,11 @@ static struct qcom_icc_desc qcs404_snoc = {
        .num_nodes = ARRAY_SIZE(qcs404_snoc_nodes),
 };
 
-static int qcom_icc_set(struct icc_node *src, struct icc_node *dst)
-{
-       struct qcom_icc_provider *qp;
-       struct qcom_icc_node *qn;
-       struct icc_provider *provider;
-       struct icc_node *n;
-       u64 sum_bw;
-       u64 max_peak_bw;
-       u64 rate;
-       u32 agg_avg = 0;
-       u32 agg_peak = 0;
-       int ret, i;
-
-       qn = src->data;
-       provider = src->provider;
-       qp = to_qcom_provider(provider);
-
-       list_for_each_entry(n, &provider->nodes, node_list)
-               provider->aggregate(n, 0, n->avg_bw, n->peak_bw,
-                                   &agg_avg, &agg_peak);
-
-       sum_bw = icc_units_to_bps(agg_avg);
-       max_peak_bw = icc_units_to_bps(agg_peak);
-
-       /* send bandwidth request message to the RPM processor */
-       if (qn->mas_rpm_id != -1) {
-               ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-                                           RPM_BUS_MASTER_REQ,
-                                           qn->mas_rpm_id,
-                                           sum_bw);
-               if (ret) {
-                       pr_err("qcom_icc_rpm_smd_send mas %d error %d\n",
-                              qn->mas_rpm_id, ret);
-                       return ret;
-               }
-       }
-
-       if (qn->slv_rpm_id != -1) {
-               ret = qcom_icc_rpm_smd_send(QCOM_SMD_RPM_ACTIVE_STATE,
-                                           RPM_BUS_SLAVE_REQ,
-                                           qn->slv_rpm_id,
-                                           sum_bw);
-               if (ret) {
-                       pr_err("qcom_icc_rpm_smd_send slv error %d\n",
-                              ret);
-                       return ret;
-               }
-       }
-
-       rate = max(sum_bw, max_peak_bw);
-
-       do_div(rate, qn->buswidth);
-
-       if (qn->rate == rate)
-               return 0;
-
-       for (i = 0; i < qp->num_clks; i++) {
-               ret = clk_set_rate(qp->bus_clks[i].clk, rate);
-               if (ret) {
-                       pr_err("%s clk_set_rate error: %d\n",
-                              qp->bus_clks[i].id, ret);
-                       return ret;
-               }
-       }
-
-       qn->rate = rate;
 
-       return 0;
-}
-
-static int qnoc_probe(struct platform_device *pdev)
+static int qcs404_qnoc_probe(struct platform_device *pdev)
 {
-       struct device *dev = &pdev->dev;
-       const struct qcom_icc_desc *desc;
-       struct icc_onecell_data *data;
-       struct icc_provider *provider;
-       struct qcom_icc_node **qnodes;
-       struct qcom_icc_provider *qp;
-       struct icc_node *node;
-       size_t num_nodes, i;
-       int ret;
-
-       /* wait for the RPM proxy */
-       if (!qcom_icc_rpm_smd_available())
-               return -EPROBE_DEFER;
-
-       desc = of_device_get_match_data(dev);
-       if (!desc)
-               return -EINVAL;
-
-       qnodes = desc->nodes;
-       num_nodes = desc->num_nodes;
-
-       qp = devm_kzalloc(dev, sizeof(*qp), GFP_KERNEL);
-       if (!qp)
-               return -ENOMEM;
-
-       data = devm_kzalloc(dev, struct_size(data, nodes, num_nodes),
-                           GFP_KERNEL);
-       if (!data)
-               return -ENOMEM;
-
-       qp->bus_clks = devm_kmemdup(dev, bus_clocks, sizeof(bus_clocks),
-                                   GFP_KERNEL);
-       if (!qp->bus_clks)
-               return -ENOMEM;
-
-       qp->num_clks = ARRAY_SIZE(bus_clocks);
-       ret = devm_clk_bulk_get(dev, qp->num_clks, qp->bus_clks);
-       if (ret)
-               return ret;
-
-       ret = clk_bulk_prepare_enable(qp->num_clks, qp->bus_clks);
-       if (ret)
-               return ret;
-
-       provider = &qp->provider;
-       INIT_LIST_HEAD(&provider->nodes);
-       provider->dev = dev;
-       provider->set = qcom_icc_set;
-       provider->aggregate = icc_std_aggregate;
-       provider->xlate = of_icc_xlate_onecell;
-       provider->data = data;
-
-       ret = icc_provider_add(provider);
-       if (ret) {
-               dev_err(dev, "error adding interconnect provider: %d\n", ret);
-               clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-               return ret;
-       }
-
-       for (i = 0; i < num_nodes; i++) {
-               size_t j;
-
-               node = icc_node_create(qnodes[i]->id);
-               if (IS_ERR(node)) {
-                       ret = PTR_ERR(node);
-                       goto err;
-               }
-
-               node->name = qnodes[i]->name;
-               node->data = qnodes[i];
-               icc_node_add(node, provider);
-
-               dev_dbg(dev, "registered node %s\n", node->name);
-
-               /* populate links */
-               for (j = 0; j < qnodes[i]->num_links; j++)
-                       icc_link_create(node, qnodes[i]->links[j]);
-
-               data->nodes[i] = node;
-       }
-       data->num_nodes = num_nodes;
-
-       platform_set_drvdata(pdev, qp);
-
-       return 0;
-err:
-       icc_nodes_remove(provider);
-       clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-       icc_provider_del(provider);
-
-       return ret;
-}
-
-static int qnoc_remove(struct platform_device *pdev)
-{
-       struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
-
-       icc_nodes_remove(&qp->provider);
-       clk_bulk_disable_unprepare(qp->num_clks, qp->bus_clks);
-       return icc_provider_del(&qp->provider);
+       return qnoc_probe(pdev, sizeof(qcs404_bus_clocks),
+                         ARRAY_SIZE(qcs404_bus_clocks), qcs404_bus_clocks);
 }
 
 static const struct of_device_id qcs404_noc_of_match[] = {
@@ -510,7 +284,7 @@ static const struct of_device_id qcs404_noc_of_match[] = {
 MODULE_DEVICE_TABLE(of, qcs404_noc_of_match);
 
 static struct platform_driver qcs404_noc_driver = {
-       .probe = qnoc_probe,
+       .probe = qcs404_qnoc_probe,
        .remove = qnoc_remove,
        .driver = {
                .name = "qnoc-qcs404",
diff --git a/drivers/interconnect/qcom/sdx55.c b/drivers/interconnect/qcom/sdx55.c
new file mode 100644 (file)
index 0000000..a5a122e
--- /dev/null
@@ -0,0 +1,356 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Qualcomm SDX55 interconnect driver
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ *
+ * Copyright (c) 2021, Linaro Ltd.
+ *
+ */
+
+#include <linux/device.h>
+#include <linux/interconnect.h>
+#include <linux/interconnect-provider.h>
+#include <linux/module.h>
+#include <linux/of_platform.h>
+#include <dt-bindings/interconnect/qcom,sdx55.h>
+
+#include "bcm-voter.h"
+#include "icc-rpmh.h"
+#include "sdx55.h"
+
+DEFINE_QNODE(ipa_core_master, SDX55_MASTER_IPA_CORE, 1, 8, SDX55_SLAVE_IPA_CORE);
+DEFINE_QNODE(llcc_mc, SDX55_MASTER_LLCC, 4, 4, SDX55_SLAVE_EBI_CH0);
+DEFINE_QNODE(acm_tcu, SDX55_MASTER_TCU_0, 1, 8, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(qnm_snoc_gc, SDX55_MASTER_SNOC_GC_MEM_NOC, 1, 8, SDX55_SLAVE_LLCC);
+DEFINE_QNODE(xm_apps_rdwr, SDX55_MASTER_AMPSS_M0, 1, 16, SDX55_SLAVE_LLCC, SDX55_SLAVE_MEM_NOC_SNOC, SDX55_SLAVE_MEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(qhm_audio, SDX55_MASTER_AUDIO, 1, 4, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(qhm_blsp1, SDX55_MASTER_BLSP_1, 1, 4, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(qhm_qdss_bam, SDX55_MASTER_QDSS_BAM, 1, 4, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_TLMM, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qhm_qpic, SDX55_MASTER_QPIC, 1, 4, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
+DEFINE_QNODE(qhm_snoc_cfg, SDX55_MASTER_SNOC_CFG, 1, 4, SDX55_SLAVE_SERVICE_SNOC);
+DEFINE_QNODE(qhm_spmi_fetcher1, SDX55_MASTER_SPMI_FETCHER, 1, 4, SDX55_SLAVE_AOSS, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP);
+DEFINE_QNODE(qnm_aggre_noc, SDX55_MASTER_ANOC_SNOC, 1, 8, SDX55_SLAVE_PCIE_0, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_TLMM, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QDSS_STM, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_USB3, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_APPSS, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qnm_ipa, SDX55_MASTER_IPA, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_QDSS_STM, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_TLMM, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qnm_memnoc, SDX55_MASTER_MEM_NOC_SNOC, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_TLMM, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QDSS_STM, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_APPSS,  SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(qnm_memnoc_pcie, SDX55_MASTER_MEM_NOC_PCIE_SNOC, 1, 8, SDX55_SLAVE_PCIE_0);
+DEFINE_QNODE(qxm_crypto, SDX55_MASTER_CRYPTO_CORE_0, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP);
+DEFINE_QNODE(xm_emac, SDX55_MASTER_EMAC, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(xm_ipa2pcie_slv, SDX55_MASTER_IPA_PCIE, 1, 8, SDX55_SLAVE_PCIE_0);
+DEFINE_QNODE(xm_pcie, SDX55_MASTER_PCIE, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(xm_qdss_etr, SDX55_MASTER_QDSS_ETR, 1, 8, SDX55_SLAVE_SNOC_CFG, SDX55_SLAVE_EMAC_CFG, SDX55_SLAVE_USB3, SDX55_SLAVE_AOSS, SDX55_SLAVE_SPMI_FETCHER, SDX55_SLAVE_QDSS_CFG, SDX55_SLAVE_PDM, SDX55_SLAVE_SNOC_MEM_NOC_GC, SDX55_SLAVE_TCSR, SDX55_SLAVE_CNOC_DDRSS, SDX55_SLAVE_SPMI_VGI_COEX, SDX55_SLAVE_QPIC, SDX55_SLAVE_OCIMEM, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_USB3_PHY_CFG, SDX55_SLAVE_AOP, SDX55_SLAVE_BLSP_1, SDX55_SLAVE_SDCC_1, SDX55_SLAVE_CNOC_MSS, SDX55_SLAVE_PCIE_PARF, SDX55_SLAVE_ECC_CFG, SDX55_SLAVE_AUDIO, SDX55_SLAVE_AOSS, SDX55_SLAVE_PRNG, SDX55_SLAVE_CRYPTO_0_CFG, SDX55_SLAVE_TCU, SDX55_SLAVE_CLK_CTL, SDX55_SLAVE_IMEM_CFG);
+DEFINE_QNODE(xm_sdc1, SDX55_MASTER_SDCC_1, 1, 8, SDX55_SLAVE_AOSS, SDX55_SLAVE_IPA_CFG, SDX55_SLAVE_ANOC_SNOC, SDX55_SLAVE_AOP, SDX55_SLAVE_AUDIO);
+DEFINE_QNODE(xm_usb3, SDX55_MASTER_USB3, 1, 8, SDX55_SLAVE_ANOC_SNOC);
+DEFINE_QNODE(ipa_core_slave, SDX55_SLAVE_IPA_CORE, 1, 8);
+DEFINE_QNODE(ebi, SDX55_SLAVE_EBI_CH0, 1, 4);
+DEFINE_QNODE(qns_llcc, SDX55_SLAVE_LLCC, 1, 16, SDX55_SLAVE_EBI_CH0);
+DEFINE_QNODE(qns_memnoc_snoc, SDX55_SLAVE_MEM_NOC_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_SNOC);
+DEFINE_QNODE(qns_sys_pcie, SDX55_SLAVE_MEM_NOC_PCIE_SNOC, 1, 8, SDX55_MASTER_MEM_NOC_PCIE_SNOC);
+DEFINE_QNODE(qhs_aop, SDX55_SLAVE_AOP, 1, 4);
+DEFINE_QNODE(qhs_aoss, SDX55_SLAVE_AOSS, 1, 4);
+DEFINE_QNODE(qhs_apss, SDX55_SLAVE_APPSS, 1, 4);
+DEFINE_QNODE(qhs_audio, SDX55_SLAVE_AUDIO, 1, 4);
+DEFINE_QNODE(qhs_blsp1, SDX55_SLAVE_BLSP_1, 1, 4);
+DEFINE_QNODE(qhs_clk_ctl, SDX55_SLAVE_CLK_CTL, 1, 4);
+DEFINE_QNODE(qhs_crypto0_cfg, SDX55_SLAVE_CRYPTO_0_CFG, 1, 4);
+DEFINE_QNODE(qhs_ddrss_cfg, SDX55_SLAVE_CNOC_DDRSS, 1, 4);
+DEFINE_QNODE(qhs_ecc_cfg, SDX55_SLAVE_ECC_CFG, 1, 4);
+DEFINE_QNODE(qhs_emac_cfg, SDX55_SLAVE_EMAC_CFG, 1, 4);
+DEFINE_QNODE(qhs_imem_cfg, SDX55_SLAVE_IMEM_CFG, 1, 4);
+DEFINE_QNODE(qhs_ipa, SDX55_SLAVE_IPA_CFG, 1, 4);
+DEFINE_QNODE(qhs_mss_cfg, SDX55_SLAVE_CNOC_MSS, 1, 4);
+DEFINE_QNODE(qhs_pcie_parf, SDX55_SLAVE_PCIE_PARF, 1, 4);
+DEFINE_QNODE(qhs_pdm, SDX55_SLAVE_PDM, 1, 4);
+DEFINE_QNODE(qhs_prng, SDX55_SLAVE_PRNG, 1, 4);
+DEFINE_QNODE(qhs_qdss_cfg, SDX55_SLAVE_QDSS_CFG, 1, 4);
+DEFINE_QNODE(qhs_qpic, SDX55_SLAVE_QPIC, 1, 4);
+DEFINE_QNODE(qhs_sdc1, SDX55_SLAVE_SDCC_1, 1, 4);
+DEFINE_QNODE(qhs_snoc_cfg, SDX55_SLAVE_SNOC_CFG, 1, 4, SDX55_MASTER_SNOC_CFG);
+DEFINE_QNODE(qhs_spmi_fetcher, SDX55_SLAVE_SPMI_FETCHER, 1, 4);
+DEFINE_QNODE(qhs_spmi_vgi_coex, SDX55_SLAVE_SPMI_VGI_COEX, 1, 4);
+DEFINE_QNODE(qhs_tcsr, SDX55_SLAVE_TCSR, 1, 4);
+DEFINE_QNODE(qhs_tlmm, SDX55_SLAVE_TLMM, 1, 4);
+DEFINE_QNODE(qhs_usb3, SDX55_SLAVE_USB3, 1, 4);
+DEFINE_QNODE(qhs_usb3_phy, SDX55_SLAVE_USB3_PHY_CFG, 1, 4);
+DEFINE_QNODE(qns_aggre_noc, SDX55_SLAVE_ANOC_SNOC, 1, 8, SDX55_MASTER_ANOC_SNOC);
+DEFINE_QNODE(qns_snoc_memnoc, SDX55_SLAVE_SNOC_MEM_NOC_GC, 1, 8, SDX55_MASTER_SNOC_GC_MEM_NOC);
+DEFINE_QNODE(qxs_imem, SDX55_SLAVE_OCIMEM, 1, 8);
+DEFINE_QNODE(srvc_snoc, SDX55_SLAVE_SERVICE_SNOC, 1, 4);
+DEFINE_QNODE(xs_pcie, SDX55_SLAVE_PCIE_0, 1, 8);
+DEFINE_QNODE(xs_qdss_stm, SDX55_SLAVE_QDSS_STM, 1, 4);
+DEFINE_QNODE(xs_sys_tcu_cfg, SDX55_SLAVE_TCU, 1, 8);
+
+DEFINE_QBCM(bcm_mc0, "MC0", true, &ebi);
+DEFINE_QBCM(bcm_sh0, "SH0", true, &qns_llcc);
+DEFINE_QBCM(bcm_ce0, "CE0", false, &qxm_crypto);
+DEFINE_QBCM(bcm_ip0, "IP0", false, &ipa_core_slave);
+DEFINE_QBCM(bcm_pn0, "PN0", false, &qhm_snoc_cfg);
+DEFINE_QBCM(bcm_sh3, "SH3", false, &xm_apps_rdwr);
+DEFINE_QBCM(bcm_sh4, "SH4", false, &qns_memnoc_snoc, &qns_sys_pcie);
+DEFINE_QBCM(bcm_sn0, "SN0", true, &qns_snoc_memnoc);
+DEFINE_QBCM(bcm_sn1, "SN1", false, &qxs_imem);
+DEFINE_QBCM(bcm_pn1, "PN1", false, &xm_sdc1);
+DEFINE_QBCM(bcm_pn2, "PN2", false, &qhm_audio, &qhm_spmi_fetcher1);
+DEFINE_QBCM(bcm_sn3, "SN3", false, &xs_qdss_stm);
+DEFINE_QBCM(bcm_pn3, "PN3", false, &qhm_blsp1, &qhm_qpic);
+DEFINE_QBCM(bcm_sn4, "SN4", false, &xs_sys_tcu_cfg);
+DEFINE_QBCM(bcm_pn5, "PN5", false, &qxm_crypto);
+DEFINE_QBCM(bcm_sn6, "SN6", false, &xs_pcie);
+DEFINE_QBCM(bcm_sn7, "SN7", false, &qnm_aggre_noc, &xm_emac, &xm_emac, &xm_usb3,
+           &qns_aggre_noc);
+DEFINE_QBCM(bcm_sn8, "SN8", false, &qhm_qdss_bam, &xm_qdss_etr);
+DEFINE_QBCM(bcm_sn9, "SN9", false, &qnm_memnoc);
+DEFINE_QBCM(bcm_sn10, "SN10", false, &qnm_memnoc_pcie);
+DEFINE_QBCM(bcm_sn11, "SN11", false, &qnm_ipa, &xm_ipa2pcie_slv);
+
+static struct qcom_icc_bcm *mc_virt_bcms[] = {
+       &bcm_mc0,
+};
+
+static struct qcom_icc_node *mc_virt_nodes[] = {
+       [MASTER_LLCC] = &llcc_mc,
+       [SLAVE_EBI_CH0] = &ebi,
+};
+
+static const struct qcom_icc_desc sdx55_mc_virt = {
+       .nodes = mc_virt_nodes,
+       .num_nodes = ARRAY_SIZE(mc_virt_nodes),
+       .bcms = mc_virt_bcms,
+       .num_bcms = ARRAY_SIZE(mc_virt_bcms),
+};
+
+static struct qcom_icc_bcm *mem_noc_bcms[] = {
+       &bcm_sh0,
+       &bcm_sh3,
+       &bcm_sh4,
+};
+
+static struct qcom_icc_node *mem_noc_nodes[] = {
+       [MASTER_TCU_0] = &acm_tcu,
+       [MASTER_SNOC_GC_MEM_NOC] = &qnm_snoc_gc,
+       [MASTER_AMPSS_M0] = &xm_apps_rdwr,
+       [SLAVE_LLCC] = &qns_llcc,
+       [SLAVE_MEM_NOC_SNOC] = &qns_memnoc_snoc,
+       [SLAVE_MEM_NOC_PCIE_SNOC] = &qns_sys_pcie,
+};
+
+static const struct qcom_icc_desc sdx55_mem_noc = {
+       .nodes = mem_noc_nodes,
+       .num_nodes = ARRAY_SIZE(mem_noc_nodes),
+       .bcms = mem_noc_bcms,
+       .num_bcms = ARRAY_SIZE(mem_noc_bcms),
+};
+
+static struct qcom_icc_bcm *system_noc_bcms[] = {
+       &bcm_ce0,
+       &bcm_pn0,
+       &bcm_pn1,
+       &bcm_pn2,
+       &bcm_pn3,
+       &bcm_pn5,
+       &bcm_sn0,
+       &bcm_sn1,
+       &bcm_sn3,
+       &bcm_sn4,
+       &bcm_sn6,
+       &bcm_sn7,
+       &bcm_sn8,
+       &bcm_sn9,
+       &bcm_sn10,
+       &bcm_sn11,
+};
+
+static struct qcom_icc_node *system_noc_nodes[] = {
+       [MASTER_AUDIO] = &qhm_audio,
+       [MASTER_BLSP_1] = &qhm_blsp1,
+       [MASTER_QDSS_BAM] = &qhm_qdss_bam,
+       [MASTER_QPIC] = &qhm_qpic,
+       [MASTER_SNOC_CFG] = &qhm_snoc_cfg,
+       [MASTER_SPMI_FETCHER] = &qhm_spmi_fetcher1,
+       [MASTER_ANOC_SNOC] = &qnm_aggre_noc,
+       [MASTER_IPA] = &qnm_ipa,
+       [MASTER_MEM_NOC_SNOC] = &qnm_memnoc,
+       [MASTER_MEM_NOC_PCIE_SNOC] = &qnm_memnoc_pcie,
+       [MASTER_CRYPTO_CORE_0] = &qxm_crypto,
+       [MASTER_EMAC] = &xm_emac,
+       [MASTER_IPA_PCIE] = &xm_ipa2pcie_slv,
+       [MASTER_PCIE] = &xm_pcie,
+       [MASTER_QDSS_ETR] = &xm_qdss_etr,
+       [MASTER_SDCC_1] = &xm_sdc1,
+       [MASTER_USB3] = &xm_usb3,
+       [SLAVE_AOP] = &qhs_aop,
+       [SLAVE_AOSS] = &qhs_aoss,
+       [SLAVE_APPSS] = &qhs_apss,
+       [SLAVE_AUDIO] = &qhs_audio,
+       [SLAVE_BLSP_1] = &qhs_blsp1,
+       [SLAVE_CLK_CTL] = &qhs_clk_ctl,
+       [SLAVE_CRYPTO_0_CFG] = &qhs_crypto0_cfg,
+       [SLAVE_CNOC_DDRSS] = &qhs_ddrss_cfg,
+       [SLAVE_ECC_CFG] = &qhs_ecc_cfg,
+       [SLAVE_EMAC_CFG] = &qhs_emac_cfg,
+       [SLAVE_IMEM_CFG] = &qhs_imem_cfg,
+       [SLAVE_IPA_CFG] = &qhs_ipa,
+       [SLAVE_CNOC_MSS] = &qhs_mss_cfg,
+       [SLAVE_PCIE_PARF] = &qhs_pcie_parf,
+       [SLAVE_PDM] = &qhs_pdm,
+       [SLAVE_PRNG] = &qhs_prng,
+       [SLAVE_QDSS_CFG] = &qhs_qdss_cfg,
+       [SLAVE_QPIC] = &qhs_qpic,
+       [SLAVE_SDCC_1] = &qhs_sdc1,
+       [SLAVE_SNOC_CFG] = &qhs_snoc_cfg,
+       [SLAVE_SPMI_FETCHER] = &qhs_spmi_fetcher,
+       [SLAVE_SPMI_VGI_COEX] = &qhs_spmi_vgi_coex,
+       [SLAVE_TCSR] = &qhs_tcsr,
+       [SLAVE_TLMM] = &qhs_tlmm,
+       [SLAVE_USB3] = &qhs_usb3,
+       [SLAVE_USB3_PHY_CFG] = &qhs_usb3_phy,
+       [SLAVE_ANOC_SNOC] = &qns_aggre_noc,
+       [SLAVE_SNOC_MEM_NOC_GC] = &qns_snoc_memnoc,
+       [SLAVE_OCIMEM] = &qxs_imem,
+       [SLAVE_SERVICE_SNOC] = &srvc_snoc,
+       [SLAVE_PCIE_0] = &xs_pcie,
+       [SLAVE_QDSS_STM] = &xs_qdss_stm,
+       [SLAVE_TCU] = &xs_sys_tcu_cfg,
+};
+
+static const struct qcom_icc_desc sdx55_system_noc = {
+       .nodes = system_noc_nodes,
+       .num_nodes = ARRAY_SIZE(system_noc_nodes),
+       .bcms = system_noc_bcms,
+       .num_bcms = ARRAY_SIZE(system_noc_bcms),
+};
+
+static struct qcom_icc_bcm *ipa_virt_bcms[] = {
+       &bcm_ip0,
+};
+
+static struct qcom_icc_node *ipa_virt_nodes[] = {
+       [MASTER_IPA_CORE] = &ipa_core_master,
+       [SLAVE_IPA_CORE] = &ipa_core_slave,
+};
+
+static const struct qcom_icc_desc sdx55_ipa_virt = {
+       .nodes = ipa_virt_nodes,
+       .num_nodes = ARRAY_SIZE(ipa_virt_nodes),
+       .bcms = ipa_virt_bcms,
+       .num_bcms = ARRAY_SIZE(ipa_virt_bcms),
+};
+
+static int qnoc_probe(struct platform_device *pdev)
+{
+       const struct qcom_icc_desc *desc;
+       struct icc_onecell_data *data;
+       struct icc_provider *provider;
+       struct qcom_icc_node **qnodes;
+       struct qcom_icc_provider *qp;
+       struct icc_node *node;
+       size_t num_nodes, i;
+       int ret;
+
+       desc = device_get_match_data(&pdev->dev);
+       if (!desc)
+               return -EINVAL;
+
+       qnodes = desc->nodes;
+       num_nodes = desc->num_nodes;
+
+       qp = devm_kzalloc(&pdev->dev, sizeof(*qp), GFP_KERNEL);
+       if (!qp)
+               return -ENOMEM;
+
+       data = devm_kcalloc(&pdev->dev, num_nodes, sizeof(*node), GFP_KERNEL);
+       if (!data)
+               return -ENOMEM;
+
+       provider = &qp->provider;
+       provider->dev = &pdev->dev;
+       provider->set = qcom_icc_set;
+       provider->pre_aggregate = qcom_icc_pre_aggregate;
+       provider->aggregate = qcom_icc_aggregate;
+       provider->xlate = of_icc_xlate_onecell;
+       INIT_LIST_HEAD(&provider->nodes);
+       provider->data = data;
+
+       qp->dev = &pdev->dev;
+       qp->bcms = desc->bcms;
+       qp->num_bcms = desc->num_bcms;
+
+       qp->voter = of_bcm_voter_get(qp->dev, NULL);
+       if (IS_ERR(qp->voter))
+               return PTR_ERR(qp->voter);
+
+       ret = icc_provider_add(provider);
+       if (ret) {
+               dev_err(&pdev->dev, "error adding interconnect provider\n");
+               return ret;
+       }
+
+       for (i = 0; i < qp->num_bcms; i++)
+               qcom_icc_bcm_init(qp->bcms[i], &pdev->dev);
+
+       for (i = 0; i < num_nodes; i++) {
+               size_t j;
+
+               if (!qnodes[i])
+                       continue;
+
+               node = icc_node_create(qnodes[i]->id);
+               if (IS_ERR(node)) {
+                       ret = PTR_ERR(node);
+                       goto err;
+               }
+
+               node->name = qnodes[i]->name;
+               node->data = qnodes[i];
+               icc_node_add(node, provider);
+
+               for (j = 0; j < qnodes[i]->num_links; j++)
+                       icc_link_create(node, qnodes[i]->links[j]);
+
+               data->nodes[i] = node;
+       }
+       data->num_nodes = num_nodes;
+
+       platform_set_drvdata(pdev, qp);
+
+       return 0;
+err:
+       icc_nodes_remove(provider);
+       icc_provider_del(provider);
+       return ret;
+}
+
+static int qnoc_remove(struct platform_device *pdev)
+{
+       struct qcom_icc_provider *qp = platform_get_drvdata(pdev);
+
+       icc_nodes_remove(&qp->provider);
+       return icc_provider_del(&qp->provider);
+}
+
+static const struct of_device_id qnoc_of_match[] = {
+       { .compatible = "qcom,sdx55-mc-virt",
+         .data = &sdx55_mc_virt},
+       { .compatible = "qcom,sdx55-mem-noc",
+         .data = &sdx55_mem_noc},
+       { .compatible = "qcom,sdx55-system-noc",
+         .data = &sdx55_system_noc},
+       { .compatible = "qcom,sdx55-ipa-virt",
+         .data = &sdx55_ipa_virt},
+       { }
+};
+MODULE_DEVICE_TABLE(of, qnoc_of_match);
+
+static struct platform_driver qnoc_driver = {
+       .probe = qnoc_probe,
+       .remove = qnoc_remove,
+       .driver = {
+               .name = "qnoc-sdx55",
+               .of_match_table = qnoc_of_match,
+               .sync_state = icc_sync_state,
+       },
+};
+module_platform_driver(qnoc_driver);
+
+MODULE_DESCRIPTION("Qualcomm SDX55 NoC driver");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/interconnect/qcom/sdx55.h b/drivers/interconnect/qcom/sdx55.h
new file mode 100644 (file)
index 0000000..deff8af
--- /dev/null
@@ -0,0 +1,70 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (c) 2021, Linaro Ltd.
+ */
+
+#ifndef __DRIVERS_INTERCONNECT_QCOM_SDX55_H
+#define __DRIVERS_INTERCONNECT_QCOM_SDX55_H
+
+#define SDX55_MASTER_IPA_CORE                  0
+#define SDX55_MASTER_LLCC                      1
+#define SDX55_MASTER_TCU_0                     2
+#define SDX55_MASTER_SNOC_GC_MEM_NOC           3
+#define SDX55_MASTER_AMPSS_M0                  4
+#define SDX55_MASTER_AUDIO                     5
+#define SDX55_MASTER_BLSP_1                    6
+#define SDX55_MASTER_QDSS_BAM                  7
+#define SDX55_MASTER_QPIC                      8
+#define SDX55_MASTER_SNOC_CFG                  9
+#define SDX55_MASTER_SPMI_FETCHER              10
+#define SDX55_MASTER_ANOC_SNOC                 11
+#define SDX55_MASTER_IPA                       12
+#define SDX55_MASTER_MEM_NOC_SNOC              13
+#define SDX55_MASTER_MEM_NOC_PCIE_SNOC         14
+#define SDX55_MASTER_CRYPTO_CORE_0             15
+#define SDX55_MASTER_EMAC                      16
+#define SDX55_MASTER_IPA_PCIE                  17
+#define SDX55_MASTER_PCIE                      18
+#define SDX55_MASTER_QDSS_ETR                  19
+#define SDX55_MASTER_SDCC_1                    20
+#define SDX55_MASTER_USB3                      21
+#define SDX55_SLAVE_IPA_CORE                   22
+#define SDX55_SLAVE_EBI_CH0                    23
+#define SDX55_SLAVE_LLCC                       24
+#define SDX55_SLAVE_MEM_NOC_SNOC               25
+#define SDX55_SLAVE_MEM_NOC_PCIE_SNOC          26
+#define SDX55_SLAVE_ANOC_SNOC                  27
+#define SDX55_SLAVE_SNOC_CFG                   28
+#define SDX55_SLAVE_EMAC_CFG                   29
+#define SDX55_SLAVE_USB3                       30
+#define SDX55_SLAVE_TLMM                       31
+#define SDX55_SLAVE_SPMI_FETCHER               32
+#define SDX55_SLAVE_QDSS_CFG                   33
+#define SDX55_SLAVE_PDM                                34
+#define SDX55_SLAVE_SNOC_MEM_NOC_GC            35
+#define SDX55_SLAVE_TCSR                       36
+#define SDX55_SLAVE_CNOC_DDRSS                 37
+#define SDX55_SLAVE_SPMI_VGI_COEX              38
+#define SDX55_SLAVE_QPIC                       39
+#define SDX55_SLAVE_OCIMEM                     40
+#define SDX55_SLAVE_IPA_CFG                    41
+#define SDX55_SLAVE_USB3_PHY_CFG               42
+#define SDX55_SLAVE_AOP                                43
+#define SDX55_SLAVE_BLSP_1                     44
+#define SDX55_SLAVE_SDCC_1                     45
+#define SDX55_SLAVE_CNOC_MSS                   46
+#define SDX55_SLAVE_PCIE_PARF                  47
+#define SDX55_SLAVE_ECC_CFG                    48
+#define SDX55_SLAVE_AUDIO                      49
+#define SDX55_SLAVE_AOSS                       51
+#define SDX55_SLAVE_PRNG                       52
+#define SDX55_SLAVE_CRYPTO_0_CFG               53
+#define SDX55_SLAVE_TCU                                54
+#define SDX55_SLAVE_CLK_CTL                    55
+#define SDX55_SLAVE_IMEM_CFG                   56
+#define SDX55_SLAVE_SERVICE_SNOC               57
+#define SDX55_SLAVE_PCIE_0                     58
+#define SDX55_SLAVE_QDSS_STM                   59
+#define SDX55_SLAVE_APPSS                      60
+
+#endif
index 9126efc..321f590 100644 (file)
@@ -2714,7 +2714,6 @@ static int __init early_amd_iommu_init(void)
        struct acpi_table_header *ivrs_base;
        int i, remap_cache_sz, ret;
        acpi_status status;
-       u32 pci_id;
 
        if (!amd_iommu_detected)
                return -ENODEV;
@@ -2804,16 +2803,6 @@ static int __init early_amd_iommu_init(void)
        if (ret)
                goto out;
 
-       /* Disable IOMMU if there's Stoney Ridge graphics */
-       for (i = 0; i < 32; i++) {
-               pci_id = read_pci_config(0, i, 0, 0);
-               if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
-                       pr_info("Disable IOMMU on Stoney Ridge\n");
-                       amd_iommu_disabled = true;
-                       break;
-               }
-       }
-
        /* Disable any previously enabled IOMMUs */
        if (!is_kdump_kernel() || amd_iommu_disabled)
                disable_iommus();
@@ -2880,6 +2869,7 @@ static bool detect_ivrs(void)
 {
        struct acpi_table_header *ivrs_base;
        acpi_status status;
+       int i;
 
        status = acpi_get_table("IVRS", 0, &ivrs_base);
        if (status == AE_NOT_FOUND)
@@ -2892,6 +2882,17 @@ static bool detect_ivrs(void)
 
        acpi_put_table(ivrs_base);
 
+       /* Don't use IOMMU if there is Stoney Ridge graphics */
+       for (i = 0; i < 32; i++) {
+               u32 pci_id;
+
+               pci_id = read_pci_config(0, i, 0, 0);
+               if ((pci_id & 0xffff) == 0x1002 && (pci_id >> 16) == 0x98e4) {
+                       pr_info("Disable IOMMU on Stoney Ridge\n");
+                       return false;
+               }
+       }
+
        /* Make sure ACS will be enabled during PCI probe */
        pci_request_acs();
 
@@ -2918,12 +2919,12 @@ static int __init state_next(void)
                }
                break;
        case IOMMU_IVRS_DETECTED:
-               ret = early_amd_iommu_init();
-               init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
-               if (init_state == IOMMU_ACPI_FINISHED && amd_iommu_disabled) {
-                       pr_info("AMD IOMMU disabled\n");
+               if (amd_iommu_disabled) {
                        init_state = IOMMU_CMDLINE_DISABLED;
                        ret = -EINVAL;
+               } else {
+                       ret = early_amd_iommu_init();
+                       init_state = ret ? IOMMU_INIT_ERROR : IOMMU_ACPI_FINISHED;
                }
                break;
        case IOMMU_ACPI_FINISHED:
@@ -3001,8 +3002,11 @@ int __init amd_iommu_prepare(void)
        amd_iommu_irq_remap = true;
 
        ret = iommu_go_to_state(IOMMU_ACPI_FINISHED);
-       if (ret)
+       if (ret) {
+               amd_iommu_irq_remap = false;
                return ret;
+       }
+
        return amd_iommu_irq_remap ? 0 : -ENODEV;
 }
 
index 1c4961e..bb0ee5c 100644 (file)
@@ -182,6 +182,10 @@ static bool increase_address_space(struct protection_domain *domain,
        bool ret = true;
        u64 *pte;
 
+       pte = (void *)get_zeroed_page(gfp);
+       if (!pte)
+               return false;
+
        spin_lock_irqsave(&domain->lock, flags);
 
        if (address <= PM_LEVEL_SIZE(domain->iop.mode))
@@ -191,10 +195,6 @@ static bool increase_address_space(struct protection_domain *domain,
        if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL))
                goto out;
 
-       pte = (void *)get_zeroed_page(gfp);
-       if (!pte)
-               goto out;
-
        *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root));
 
        domain->iop.root  = pte;
@@ -208,10 +208,12 @@ static bool increase_address_space(struct protection_domain *domain,
         */
        amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode);
 
+       pte = NULL;
        ret = true;
 
 out:
        spin_unlock_irqrestore(&domain->lock, flags);
+       free_page((unsigned long)pte);
 
        return ret;
 }
index f659395..af765c8 100644 (file)
@@ -311,6 +311,11 @@ static void iommu_dma_flush_iotlb_all(struct iova_domain *iovad)
        domain->ops->flush_iotlb_all(domain);
 }
 
+static bool dev_is_untrusted(struct device *dev)
+{
+       return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
+}
+
 /**
  * iommu_dma_init_domain - Initialise a DMA mapping domain
  * @domain: IOMMU domain previously prepared by iommu_get_dma_cookie()
@@ -365,8 +370,9 @@ static int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base,
 
        init_iova_domain(iovad, 1UL << order, base_pfn);
 
-       if (!cookie->fq_domain && !iommu_domain_get_attr(domain,
-                       DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) && attr) {
+       if (!cookie->fq_domain && (!dev || !dev_is_untrusted(dev)) &&
+           !iommu_domain_get_attr(domain, DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE, &attr) &&
+           attr) {
                if (init_iova_flush_queue(iovad, iommu_dma_flush_iotlb_all,
                                          iommu_dma_entry_dtor))
                        pr_warn("iova flush queue initialization failed\n");
@@ -508,11 +514,6 @@ static void __iommu_dma_unmap_swiotlb(struct device *dev, dma_addr_t dma_addr,
                                iova_align(iovad, size), dir, attrs);
 }
 
-static bool dev_is_untrusted(struct device *dev)
-{
-       return dev_is_pci(dev) && to_pci_dev(dev)->untrusted;
-}
-
 static dma_addr_t __iommu_dma_map(struct device *dev, phys_addr_t phys,
                size_t size, int prot, u64 dma_mask)
 {
@@ -1187,34 +1188,6 @@ static void *iommu_dma_alloc(struct device *dev, size_t size,
        return cpu_addr;
 }
 
-#ifdef CONFIG_DMA_REMAP
-static void *iommu_dma_alloc_noncoherent(struct device *dev, size_t size,
-               dma_addr_t *handle, enum dma_data_direction dir, gfp_t gfp)
-{
-       if (!gfpflags_allow_blocking(gfp)) {
-               struct page *page;
-
-               page = dma_common_alloc_pages(dev, size, handle, dir, gfp);
-               if (!page)
-                       return NULL;
-               return page_address(page);
-       }
-
-       return iommu_dma_alloc_remap(dev, size, handle, gfp | __GFP_ZERO,
-                                    PAGE_KERNEL, 0);
-}
-
-static void iommu_dma_free_noncoherent(struct device *dev, size_t size,
-               void *cpu_addr, dma_addr_t handle, enum dma_data_direction dir)
-{
-       __iommu_dma_unmap(dev, handle, size);
-       __iommu_dma_free(dev, size, cpu_addr);
-}
-#else
-#define iommu_dma_alloc_noncoherent            NULL
-#define iommu_dma_free_noncoherent             NULL
-#endif /* CONFIG_DMA_REMAP */
-
 static int iommu_dma_mmap(struct device *dev, struct vm_area_struct *vma,
                void *cpu_addr, dma_addr_t dma_addr, size_t size,
                unsigned long attrs)
@@ -1285,8 +1258,6 @@ static const struct dma_map_ops iommu_dma_ops = {
        .free                   = iommu_dma_free,
        .alloc_pages            = dma_common_alloc_pages,
        .free_pages             = dma_common_free_pages,
-       .alloc_noncoherent      = iommu_dma_alloc_noncoherent,
-       .free_noncoherent       = iommu_dma_free_noncoherent,
        .mmap                   = iommu_dma_mmap,
        .get_sgtable            = iommu_dma_get_sgtable,
        .map_page               = iommu_dma_map_page,
index 97dfcff..444c0be 100644 (file)
@@ -30,8 +30,8 @@
 #define VCMD_VRSP_IP                   0x1
 #define VCMD_VRSP_SC(e)                        (((e) >> 1) & 0x3)
 #define VCMD_VRSP_SC_SUCCESS           0
-#define VCMD_VRSP_SC_NO_PASID_AVAIL    1
-#define VCMD_VRSP_SC_INVALID_PASID     1
+#define VCMD_VRSP_SC_NO_PASID_AVAIL    2
+#define VCMD_VRSP_SC_INVALID_PASID     2
 #define VCMD_VRSP_RESULT_PASID(e)      (((e) >> 8) & 0xfffff)
 #define VCMD_CMD_OPERAND(e)            ((e) << 8)
 /*
index 4a3f095..602aab9 100644 (file)
@@ -798,10 +798,69 @@ static phys_addr_t tegra_smmu_iova_to_phys(struct iommu_domain *domain,
        return SMMU_PFN_PHYS(pfn) + SMMU_OFFSET_IN_PAGE(iova);
 }
 
+static struct tegra_smmu *tegra_smmu_find(struct device_node *np)
+{
+       struct platform_device *pdev;
+       struct tegra_mc *mc;
+
+       pdev = of_find_device_by_node(np);
+       if (!pdev)
+               return NULL;
+
+       mc = platform_get_drvdata(pdev);
+       if (!mc)
+               return NULL;
+
+       return mc->smmu;
+}
+
+static int tegra_smmu_configure(struct tegra_smmu *smmu, struct device *dev,
+                               struct of_phandle_args *args)
+{
+       const struct iommu_ops *ops = smmu->iommu.ops;
+       int err;
+
+       err = iommu_fwspec_init(dev, &dev->of_node->fwnode, ops);
+       if (err < 0) {
+               dev_err(dev, "failed to initialize fwspec: %d\n", err);
+               return err;
+       }
+
+       err = ops->of_xlate(dev, args);
+       if (err < 0) {
+               dev_err(dev, "failed to parse SW group ID: %d\n", err);
+               iommu_fwspec_free(dev);
+               return err;
+       }
+
+       return 0;
+}
+
 static struct iommu_device *tegra_smmu_probe_device(struct device *dev)
 {
-       struct tegra_smmu *smmu = dev_iommu_priv_get(dev);
+       struct device_node *np = dev->of_node;
+       struct tegra_smmu *smmu = NULL;
+       struct of_phandle_args args;
+       unsigned int index = 0;
+       int err;
+
+       while (of_parse_phandle_with_args(np, "iommus", "#iommu-cells", index,
+                                         &args) == 0) {
+               smmu = tegra_smmu_find(args.np);
+               if (smmu) {
+                       err = tegra_smmu_configure(smmu, dev, &args);
 
+                       if (err < 0) {
+                               of_node_put(args.np);
+                               return ERR_PTR(err);
+                       }
+               }
+
+               of_node_put(args.np);
+               index++;
+       }
+
+       smmu = dev_iommu_priv_get(dev);
        if (!smmu)
                return ERR_PTR(-ENODEV);
 
@@ -1028,6 +1087,16 @@ struct tegra_smmu *tegra_smmu_probe(struct device *dev,
        if (!smmu)
                return ERR_PTR(-ENOMEM);
 
+       /*
+        * This is a bit of a hack. Ideally we'd want to simply return this
+        * value. However the IOMMU registration process will attempt to add
+        * all devices to the IOMMU when bus_set_iommu() is called. In order
+        * not to rely on global variables to track the IOMMU instance, we
+        * set it here so that it can be looked up from the .probe_device()
+        * callback via the IOMMU device's .drvdata field.
+        */
+       mc->smmu = smmu;
+
        size = BITS_TO_LONGS(soc->num_asids) * sizeof(long);
 
        smmu->asids = devm_kzalloc(dev, size, GFP_KERNEL);
index 9267a85..7de9605 100644 (file)
@@ -64,9 +64,6 @@ static int ipack_bus_probe(struct device *device)
        struct ipack_device *dev = to_ipack_dev(device);
        struct ipack_driver *drv = to_ipack_driver(device->driver);
 
-       if (!drv->ops->probe)
-               return -EINVAL;
-
        return drv->ops->probe(dev);
 }
 
@@ -75,10 +72,9 @@ static int ipack_bus_remove(struct device *device)
        struct ipack_device *dev = to_ipack_dev(device);
        struct ipack_driver *drv = to_ipack_driver(device->driver);
 
-       if (!drv->ops->remove)
-               return -EINVAL;
+       if (drv->ops->remove)
+               drv->ops->remove(dev);
 
-       drv->ops->remove(dev);
        return 0;
 }
 
@@ -252,6 +248,9 @@ EXPORT_SYMBOL_GPL(ipack_bus_unregister);
 int ipack_driver_register(struct ipack_driver *edrv, struct module *owner,
                          const char *name)
 {
+       if (!edrv->ops->probe)
+               return -EINVAL;
+
        edrv->driver.owner = owner;
        edrv->driver.name = name;
        edrv->driver.bus = &ipack_bus_type;
index e74fa20..15536e3 100644 (file)
@@ -8,7 +8,6 @@ config IRQCHIP
 config ARM_GIC
        bool
        select IRQ_DOMAIN_HIERARCHY
-       select GENERIC_IRQ_MULTI_HANDLER
        select GENERIC_IRQ_EFFECTIVE_AFF_MASK
 
 config ARM_GIC_PM
@@ -33,7 +32,6 @@ config GIC_NON_BANKED
 
 config ARM_GIC_V3
        bool
-       select GENERIC_IRQ_MULTI_HANDLER
        select IRQ_DOMAIN_HIERARCHY
        select PARTITION_PERCPU
        select GENERIC_IRQ_EFFECTIVE_AFF_MASK
@@ -64,7 +62,6 @@ config ARM_NVIC
 config ARM_VIC
        bool
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
 
 config ARM_VIC_NR
        int
@@ -99,14 +96,12 @@ config ATMEL_AIC_IRQ
        bool
        select GENERIC_IRQ_CHIP
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
        select SPARSE_IRQ
 
 config ATMEL_AIC5_IRQ
        bool
        select GENERIC_IRQ_CHIP
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
        select SPARSE_IRQ
 
 config I8259
@@ -153,7 +148,6 @@ config DW_APB_ICTL
 config FARADAY_FTINTC010
        bool
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
        select SPARSE_IRQ
 
 config HISILICON_IRQ_MBIGEN
@@ -169,7 +163,6 @@ config IMGPDC_IRQ
 config IXP4XX_IRQ
        bool
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
        select SPARSE_IRQ
 
 config MADERA_IRQ
@@ -186,7 +179,6 @@ config CLPS711X_IRQCHIP
        bool
        depends on ARCH_CLPS711X
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
        select SPARSE_IRQ
        default y
 
@@ -205,7 +197,6 @@ config OMAP_IRQCHIP
 config ORION_IRQCHIP
        bool
        select IRQ_DOMAIN
-       select GENERIC_IRQ_MULTI_HANDLER
 
 config PIC32_EVIC
        bool
index 7a7222d..b938d1d 100644 (file)
@@ -179,5 +179,6 @@ err_free_tcu:
 }
 IRQCHIP_DECLARE(jz4740_tcu_irq, "ingenic,jz4740-tcu", ingenic_tcu_irq_init);
 IRQCHIP_DECLARE(jz4725b_tcu_irq, "ingenic,jz4725b-tcu", ingenic_tcu_irq_init);
+IRQCHIP_DECLARE(jz4760_tcu_irq, "ingenic,jz4760-tcu", ingenic_tcu_irq_init);
 IRQCHIP_DECLARE(jz4770_tcu_irq, "ingenic,jz4770-tcu", ingenic_tcu_irq_init);
 IRQCHIP_DECLARE(x1000_tcu_irq, "ingenic,x1000-tcu", ingenic_tcu_irq_init);
index b61a890..ea36bb0 100644 (file)
@@ -155,6 +155,7 @@ static int __init intc_2chip_of_init(struct device_node *node,
 {
        return ingenic_intc_of_init(node, 2);
 }
+IRQCHIP_DECLARE(jz4760_intc, "ingenic,jz4760-intc", intc_2chip_of_init);
 IRQCHIP_DECLARE(jz4770_intc, "ingenic,jz4770-intc", intc_2chip_of_init);
 IRQCHIP_DECLARE(jz4775_intc, "ingenic,jz4775-intc", intc_2chip_of_init);
 IRQCHIP_DECLARE(jz4780_intc, "ingenic,jz4780-intc", intc_2chip_of_init);
index 7168778..cb0afe8 100644 (file)
@@ -721,7 +721,7 @@ u16 capi20_put_message(struct capi20_appl *ap, struct sk_buff *skb)
  * Return value: CAPI result code
  */
 
-u16 capi20_get_manufacturer(u32 contr, u8 *buf)
+u16 capi20_get_manufacturer(u32 contr, u8 buf[CAPI_MANUFACTURER_LEN])
 {
        struct capi_ctr *ctr;
        u16 ret;
@@ -787,7 +787,7 @@ u16 capi20_get_version(u32 contr, struct capi_version *verp)
  * Return value: CAPI result code
  */
 
-u16 capi20_get_serial(u32 contr, u8 *serial)
+u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN])
 {
        struct capi_ctr *ctr;
        u16 ret;
index ec47508..39f841b 100644 (file)
@@ -694,7 +694,7 @@ isac_release(struct isac_hw *isac)
 {
        if (isac->type & IPAC_TYPE_ISACX)
                WriteISAC(isac, ISACX_MASK, 0xff);
-       else
+       else if (isac->type != 0)
                WriteISAC(isac, ISAC_MASK, 0xff);
        if (isac->dch.timer.function != NULL) {
                del_timer(&isac->dch.timer);
index 6c1d8b6..b6742b4 100644 (file)
@@ -934,4 +934,7 @@ source "drivers/leds/flash/Kconfig"
 comment "LED Triggers"
 source "drivers/leds/trigger/Kconfig"
 
+comment "LED Blink"
+source "drivers/leds/blink/Kconfig"
+
 endif # NEW_LEDS
index 156c0b4..2a698df 100644 (file)
@@ -108,3 +108,6 @@ obj-$(CONFIG_LEDS_CLASS_FLASH)              += flash/
 
 # LED Triggers
 obj-$(CONFIG_LEDS_TRIGGERS)            += trigger/
+
+# LED Blink
+obj-$(CONFIG_LEDS_BLINK)                += blink/
diff --git a/drivers/leds/blink/Kconfig b/drivers/leds/blink/Kconfig
new file mode 100644 (file)
index 0000000..265b534
--- /dev/null
@@ -0,0 +1,20 @@
+menuconfig LEDS_BLINK
+       bool "LED Blink support"
+       depends on LEDS_CLASS
+       help
+         This option enables blink support for the leds class.
+         If unsure, say Y.
+
+if LEDS_BLINK
+
+config LEDS_BLINK_LGM
+       tristate "LED support for Intel LGM SoC series"
+       depends on LEDS_CLASS
+       depends on MFD_SYSCON
+       depends on OF
+       help
+         Parallel to serial conversion, which is also called SSO controller,
+         can drive external shift register for LED outputs.
+         This enables LED support for Serial Shift Output controller(SSO).
+
+endif # LEDS_BLINK
diff --git a/drivers/leds/blink/Makefile b/drivers/leds/blink/Makefile
new file mode 100644 (file)
index 0000000..2fa6c7b
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_LEDS_BLINK_LGM)   += leds-lgm-sso.o
diff --git a/drivers/leds/blink/leds-lgm-sso.c b/drivers/leds/blink/leds-lgm-sso.c
new file mode 100644 (file)
index 0000000..7d5c9ca
--- /dev/null
@@ -0,0 +1,888 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Intel Lightning Mountain SoC LED Serial Shift Output Controller driver
+ *
+ * Copyright (c) 2020 Intel Corporation.
+ */
+
+#include <linux/bitfield.h>
+#include <linux/clk.h>
+#include <linux/gpio.h>
+#include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/property.h>
+#include <linux/regmap.h>
+#include <linux/sizes.h>
+#include <linux/uaccess.h>
+
+#define SSO_DEV_NAME                   "lgm-sso"
+
+#define LED_BLINK_H8_0                 0x0
+#define LED_BLINK_H8_1                 0x4
+#define GET_FREQ_OFFSET(pin, src)      (((pin) * 6) + ((src) * 2))
+#define GET_SRC_OFFSET(pinc)           (((pin) * 6) + 4)
+
+#define DUTY_CYCLE(x)                  (0x8 + ((x) * 4))
+#define SSO_CON0                       0x2B0
+#define SSO_CON0_RZFL                  BIT(26)
+#define SSO_CON0_BLINK_R               BIT(30)
+#define SSO_CON0_SWU                   BIT(31)
+
+#define SSO_CON1                       0x2B4
+#define SSO_CON1_FCDSC                 GENMASK(21, 20) /* Fixed Divider Shift Clock */
+#define SSO_CON1_FPID                  GENMASK(24, 23)
+#define SSO_CON1_GPTD                  GENMASK(26, 25)
+#define SSO_CON1_US                    GENMASK(31, 30)
+
+#define SSO_CPU                                0x2B8
+#define SSO_CON2                       0x2C4
+#define SSO_CON3                       0x2C8
+
+/* Driver MACRO */
+#define MAX_PIN_NUM_PER_BANK           SZ_32
+#define MAX_GROUP_NUM                  SZ_4
+#define PINS_PER_GROUP                 SZ_8
+#define FPID_FREQ_RANK_MAX             SZ_4
+#define SSO_LED_MAX_NUM                        SZ_32
+#define MAX_FREQ_RANK                  10
+#define DEF_GPTC_CLK_RATE              200000000
+#define SSO_DEF_BRIGHTNESS             LED_HALF
+#define DATA_CLK_EDGE                  0 /* 0-rising, 1-falling */
+
+static const u32 freq_div_tbl[] = {4000, 2000, 1000, 800};
+static const int freq_tbl[] = {2, 4, 8, 10, 50000, 100000, 200000, 250000};
+static const int shift_clk_freq_tbl[] = {25000000, 12500000, 6250000, 3125000};
+
+/*
+ * Update Source to update the SOUTs
+ * SW - Software has to update the SWU bit
+ * GPTC - General Purpose timer is used as clock source
+ * FPID - Divided FSC clock (FPID) is used as clock source
+ */
+enum {
+       US_SW = 0,
+       US_GPTC = 1,
+       US_FPID = 2
+};
+
+enum {
+       MAX_FPID_FREQ_RANK = 5, /* 1 to 4 */
+       MAX_GPTC_FREQ_RANK = 9, /* 5 to 8 */
+       MAX_GPTC_HS_FREQ_RANK = 10, /* 9 to 10 */
+};
+
+enum {
+       LED_GRP0_PIN_MAX = 24,
+       LED_GRP1_PIN_MAX = 29,
+       LED_GRP2_PIN_MAX = 32,
+};
+
+enum {
+       LED_GRP0_0_23,
+       LED_GRP1_24_28,
+       LED_GRP2_29_31,
+       LED_GROUP_MAX,
+};
+
+enum {
+       CLK_SRC_FPID = 0,
+       CLK_SRC_GPTC = 1,
+       CLK_SRC_GPTC_HS = 2,
+};
+
+struct sso_led_priv;
+
+struct sso_led_desc {
+       const char *name;
+       const char *default_trigger;
+       unsigned int brightness;
+       unsigned int blink_rate;
+       unsigned int retain_state_suspended:1;
+       unsigned int retain_state_shutdown:1;
+       unsigned int panic_indicator:1;
+       unsigned int hw_blink:1;
+       unsigned int hw_trig:1;
+       unsigned int blinking:1;
+       int freq_idx;
+       u32 pin;
+};
+
+struct sso_led {
+       struct list_head list;
+       struct led_classdev cdev;
+       struct gpio_desc *gpiod;
+       struct sso_led_desc desc;
+       struct sso_led_priv *priv;
+};
+
+struct sso_gpio {
+       struct gpio_chip chip;
+       int shift_clk_freq;
+       int edge;
+       int freq;
+       u32 pins;
+       u32 alloc_bitmap;
+};
+
+struct sso_led_priv {
+       struct regmap *mmap;
+       struct device *dev;
+       struct platform_device *pdev;
+       struct clk *gclk;
+       struct clk *fpid_clk;
+       u32 fpid_clkrate;
+       u32 gptc_clkrate;
+       u32 freq[MAX_FREQ_RANK];
+       struct list_head led_list;
+       struct sso_gpio gpio;
+};
+
+static int sso_get_blink_rate_idx(struct sso_led_priv *priv, u32 rate)
+{
+       int i;
+
+       for (i = 0; i < MAX_FREQ_RANK; i++) {
+               if (rate <= priv->freq[i])
+                       return i;
+       }
+
+       return -1;
+}
+
+static unsigned int sso_led_pin_to_group(u32 pin)
+{
+       if (pin < LED_GRP0_PIN_MAX)
+               return LED_GRP0_0_23;
+       else if (pin < LED_GRP1_PIN_MAX)
+               return LED_GRP1_24_28;
+       else
+               return LED_GRP2_29_31;
+}
+
+static u32 sso_led_get_freq_src(int freq_idx)
+{
+       if (freq_idx < MAX_FPID_FREQ_RANK)
+               return CLK_SRC_FPID;
+       else if (freq_idx < MAX_GPTC_FREQ_RANK)
+               return CLK_SRC_GPTC;
+       else
+               return CLK_SRC_GPTC_HS;
+}
+
+static u32 sso_led_pin_blink_off(u32 pin, unsigned int group)
+{
+       if (group == LED_GRP2_29_31)
+               return pin - LED_GRP1_PIN_MAX;
+       else if (group == LED_GRP1_24_28)
+               return pin - LED_GRP0_PIN_MAX;
+       else    /* led 0 - 23 in led 32 location */
+               return SSO_LED_MAX_NUM - LED_GRP1_PIN_MAX;
+}
+
+static struct sso_led
+*cdev_to_sso_led_data(struct led_classdev *led_cdev)
+{
+       return container_of(led_cdev, struct sso_led, cdev);
+}
+
+static void sso_led_freq_set(struct sso_led_priv *priv, u32 pin, int freq_idx)
+{
+       u32 reg, off, freq_src, val_freq;
+       u32 low, high, val;
+       unsigned int group;
+
+       if (!freq_idx)
+               return;
+
+       group = sso_led_pin_to_group(pin);
+       freq_src = sso_led_get_freq_src(freq_idx);
+       off = sso_led_pin_blink_off(pin, group);
+
+       if (group == LED_GRP0_0_23)
+               return;
+       else if (group == LED_GRP1_24_28)
+               reg = LED_BLINK_H8_0;
+       else
+               reg = LED_BLINK_H8_1;
+
+       if (freq_src == CLK_SRC_FPID)
+               val_freq = freq_idx - 1;
+       else if (freq_src == CLK_SRC_GPTC)
+               val_freq = freq_idx - MAX_FPID_FREQ_RANK;
+
+       /* set blink rate idx */
+       if (freq_src != CLK_SRC_GPTC_HS) {
+               low = GET_FREQ_OFFSET(off, freq_src);
+               high = low + 2;
+               val = val_freq << high;
+               regmap_update_bits(priv->mmap, reg, GENMASK(high, low), val);
+       }
+
+       /* select clock source */
+       low = GET_SRC_OFFSET(off);
+       high = low + 2;
+       val = freq_src << high;
+       regmap_update_bits(priv->mmap, reg, GENMASK(high, low), val);
+}
+
+static void sso_led_brightness_set(struct led_classdev *led_cdev,
+                                  enum led_brightness brightness)
+{
+       struct sso_led_priv *priv;
+       struct sso_led_desc *desc;
+       struct sso_led *led;
+       int val;
+
+       led = cdev_to_sso_led_data(led_cdev);
+       priv = led->priv;
+       desc = &led->desc;
+
+       desc->brightness = brightness;
+       regmap_write(priv->mmap, DUTY_CYCLE(desc->pin), brightness);
+
+       if (brightness == LED_OFF)
+               val = 0;
+       else
+               val = 1;
+
+       /* HW blink off */
+       if (desc->hw_blink && !val && desc->blinking) {
+               desc->blinking = 0;
+               regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin), 0);
+       } else if (desc->hw_blink && val && !desc->blinking) {
+               desc->blinking = 1;
+               regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin),
+                                  1 << desc->pin);
+       }
+
+       if (!desc->hw_trig && led->gpiod)
+               gpiod_set_value(led->gpiod, val);
+}
+
+static enum led_brightness sso_led_brightness_get(struct led_classdev *led_cdev)
+{
+       struct sso_led *led = cdev_to_sso_led_data(led_cdev);
+
+       return (enum led_brightness)led->desc.brightness;
+}
+
+static int
+delay_to_freq_idx(struct sso_led *led, unsigned long *delay_on,
+                 unsigned long *delay_off)
+{
+       struct sso_led_priv *priv = led->priv;
+       unsigned long delay;
+       int freq_idx;
+       u32 freq;
+
+       if (!*delay_on && !*delay_off) {
+               *delay_on = *delay_off = (1000 / priv->freq[0]) / 2;
+               return 0;
+       }
+
+       delay = *delay_on + *delay_off;
+       freq = 1000 / delay;
+
+       freq_idx = sso_get_blink_rate_idx(priv, freq);
+       if (freq_idx == -1)
+               freq_idx = MAX_FREQ_RANK - 1;
+
+       delay = 1000 / priv->freq[freq_idx];
+       *delay_on = *delay_off = delay / 2;
+
+       if (!*delay_on)
+               *delay_on = *delay_off = 1;
+
+       return freq_idx;
+}
+
+static int
+sso_led_blink_set(struct led_classdev *led_cdev, unsigned long *delay_on,
+                 unsigned long *delay_off)
+{
+       struct sso_led_priv *priv;
+       struct sso_led *led;
+       int freq_idx;
+
+       led = cdev_to_sso_led_data(led_cdev);
+       priv = led->priv;
+       freq_idx = delay_to_freq_idx(led, delay_on, delay_off);
+
+       sso_led_freq_set(priv, led->desc.pin, freq_idx);
+       regmap_update_bits(priv->mmap, SSO_CON2, BIT(led->desc.pin),
+                          1 << led->desc.pin);
+       led->desc.freq_idx = freq_idx;
+       led->desc.blink_rate = priv->freq[freq_idx];
+       led->desc.blinking = 1;
+
+       return 1;
+}
+
+static void sso_led_hw_cfg(struct sso_led_priv *priv, struct sso_led *led)
+{
+       struct sso_led_desc *desc = &led->desc;
+
+       /* set freq */
+       if (desc->hw_blink) {
+               sso_led_freq_set(priv, desc->pin, desc->freq_idx);
+               regmap_update_bits(priv->mmap, SSO_CON2, BIT(desc->pin),
+                                  1 << desc->pin);
+       }
+
+       if (desc->hw_trig)
+               regmap_update_bits(priv->mmap, SSO_CON3, BIT(desc->pin),
+                                  1 << desc->pin);
+
+       /* set brightness */
+       regmap_write(priv->mmap, DUTY_CYCLE(desc->pin), desc->brightness);
+
+       /* enable output */
+       if (!desc->hw_trig && desc->brightness)
+               gpiod_set_value(led->gpiod, 1);
+}
+
+static int sso_create_led(struct sso_led_priv *priv, struct sso_led *led,
+                         struct fwnode_handle *child)
+{
+       struct sso_led_desc *desc = &led->desc;
+       struct led_init_data init_data;
+       int err;
+
+       init_data.fwnode = child;
+       init_data.devicename = SSO_DEV_NAME;
+       init_data.default_label = ":";
+
+       led->cdev.default_trigger = desc->default_trigger;
+       led->cdev.brightness_set = sso_led_brightness_set;
+       led->cdev.brightness_get = sso_led_brightness_get;
+       led->cdev.brightness = desc->brightness;
+       led->cdev.max_brightness = LED_FULL;
+
+       if (desc->retain_state_shutdown)
+               led->cdev.flags |= LED_RETAIN_AT_SHUTDOWN;
+       if (desc->retain_state_suspended)
+               led->cdev.flags |= LED_CORE_SUSPENDRESUME;
+       if (desc->panic_indicator)
+               led->cdev.flags |= LED_PANIC_INDICATOR;
+
+       if (desc->hw_blink)
+               led->cdev.blink_set = sso_led_blink_set;
+
+       sso_led_hw_cfg(priv, led);
+
+       err = devm_led_classdev_register_ext(priv->dev, &led->cdev, &init_data);
+       if (err)
+               return err;
+
+       list_add(&led->list, &priv->led_list);
+
+       return 0;
+}
+
+static void sso_init_freq(struct sso_led_priv *priv)
+{
+       int i;
+
+       priv->freq[0] = 0;
+       for (i = 1; i < MAX_FREQ_RANK; i++) {
+               if (i < MAX_FPID_FREQ_RANK) {
+                       priv->freq[i] = priv->fpid_clkrate / freq_div_tbl[i - 1];
+               } else if (i < MAX_GPTC_FREQ_RANK) {
+                       priv->freq[i] = priv->gptc_clkrate /
+                               freq_div_tbl[i - MAX_FPID_FREQ_RANK];
+               } else if (i < MAX_GPTC_HS_FREQ_RANK) {
+                       priv->freq[i] = priv->gptc_clkrate;
+               }
+       }
+}
+
+static int sso_gpio_request(struct gpio_chip *chip, unsigned int offset)
+{
+       struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+       if (priv->gpio.alloc_bitmap & BIT(offset))
+               return -EINVAL;
+
+       priv->gpio.alloc_bitmap |= BIT(offset);
+       regmap_write(priv->mmap, DUTY_CYCLE(offset), 0xFF);
+
+       return 0;
+}
+
+static void sso_gpio_free(struct gpio_chip *chip, unsigned int offset)
+{
+       struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+       priv->gpio.alloc_bitmap &= ~BIT(offset);
+       regmap_write(priv->mmap, DUTY_CYCLE(offset), 0x0);
+}
+
+static int sso_gpio_get_dir(struct gpio_chip *chip, unsigned int offset)
+{
+       return GPIOF_DIR_OUT;
+}
+
+static int
+sso_gpio_dir_out(struct gpio_chip *chip, unsigned int offset, int value)
+{
+       struct sso_led_priv *priv = gpiochip_get_data(chip);
+       bool bit = !!value;
+
+       regmap_update_bits(priv->mmap, SSO_CPU, BIT(offset), bit << offset);
+       if (!priv->gpio.freq)
+               regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_SWU,
+                                  SSO_CON0_SWU);
+
+       return 0;
+}
+
+static int sso_gpio_get(struct gpio_chip *chip, unsigned int offset)
+{
+       struct sso_led_priv *priv = gpiochip_get_data(chip);
+       u32 reg_val;
+
+       regmap_read(priv->mmap, SSO_CPU, &reg_val);
+
+       return !!(reg_val & BIT(offset));
+}
+
+static void sso_gpio_set(struct gpio_chip *chip, unsigned int offset, int value)
+{
+       struct sso_led_priv *priv = gpiochip_get_data(chip);
+
+       regmap_update_bits(priv->mmap, SSO_CPU, BIT(offset), value << offset);
+       if (!priv->gpio.freq)
+               regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_SWU,
+                                  SSO_CON0_SWU);
+}
+
+static int sso_gpio_gc_init(struct device *dev, struct sso_led_priv *priv)
+{
+       struct gpio_chip *gc = &priv->gpio.chip;
+
+       gc->request             = sso_gpio_request;
+       gc->free                = sso_gpio_free;
+       gc->get_direction       = sso_gpio_get_dir;
+       gc->direction_output    = sso_gpio_dir_out;
+       gc->get                 = sso_gpio_get;
+       gc->set                 = sso_gpio_set;
+
+       gc->label               = "lgm-sso";
+       gc->base                = -1;
+       /* To exclude pins from control, use "gpio-reserved-ranges" */
+       gc->ngpio               = priv->gpio.pins;
+       gc->parent              = dev;
+       gc->owner               = THIS_MODULE;
+       gc->of_node             = dev->of_node;
+
+       return devm_gpiochip_add_data(dev, gc, priv);
+}
+
+static int sso_gpio_get_freq_idx(int freq)
+{
+       int idx;
+
+       for (idx = 0; idx < ARRAY_SIZE(freq_tbl); idx++) {
+               if (freq <= freq_tbl[idx])
+                       return idx;
+       }
+
+       return -1;
+}
+
+static void sso_register_shift_clk(struct sso_led_priv *priv)
+{
+       int idx, size = ARRAY_SIZE(shift_clk_freq_tbl);
+       u32 val = 0;
+
+       for (idx = 0; idx < size; idx++) {
+               if (shift_clk_freq_tbl[idx] <= priv->gpio.shift_clk_freq) {
+                       val = idx;
+                       break;
+               }
+       }
+
+       if (idx == size)
+               dev_warn(priv->dev, "%s: Invalid freq %d\n",
+                        __func__, priv->gpio.shift_clk_freq);
+
+       regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_FCDSC,
+                          FIELD_PREP(SSO_CON1_FCDSC, val));
+}
+
+static int sso_gpio_freq_set(struct sso_led_priv *priv)
+{
+       int freq_idx;
+       u32 val;
+
+       freq_idx = sso_gpio_get_freq_idx(priv->gpio.freq);
+       if (freq_idx == -1)
+               freq_idx = ARRAY_SIZE(freq_tbl) - 1;
+
+       val = freq_idx % FPID_FREQ_RANK_MAX;
+
+       if (!priv->gpio.freq) {
+               regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R, 0);
+               regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+                                  FIELD_PREP(SSO_CON1_US, US_SW));
+       } else if (freq_idx < FPID_FREQ_RANK_MAX) {
+               regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R,
+                                  SSO_CON0_BLINK_R);
+               regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+                                  FIELD_PREP(SSO_CON1_US, US_FPID));
+               regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_FPID,
+                                  FIELD_PREP(SSO_CON1_FPID, val));
+       } else {
+               regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_BLINK_R,
+                                  SSO_CON0_BLINK_R);
+               regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_US,
+                                  FIELD_PREP(SSO_CON1_US, US_GPTC));
+               regmap_update_bits(priv->mmap, SSO_CON1, SSO_CON1_GPTD,
+                                  FIELD_PREP(SSO_CON1_GPTD, val));
+       }
+
+       return 0;
+}
+
+static int sso_gpio_hw_init(struct sso_led_priv *priv)
+{
+       u32 activate;
+       int i, err;
+
+       /* Clear all duty cycles */
+       for (i = 0; i < priv->gpio.pins; i++) {
+               err = regmap_write(priv->mmap, DUTY_CYCLE(i), 0);
+               if (err)
+                       return err;
+       }
+
+       /* 4 groups for total 32 pins */
+       for (i = 1; i <= MAX_GROUP_NUM; i++) {
+               activate = !!(i * PINS_PER_GROUP <= priv->gpio.pins ||
+                             priv->gpio.pins > (i - 1) * PINS_PER_GROUP);
+               err = regmap_update_bits(priv->mmap, SSO_CON1, BIT(i - 1),
+                                        activate << (i - 1));
+               if (err)
+                       return err;
+       }
+
+       /* NO HW directly controlled pin by default */
+       err = regmap_write(priv->mmap, SSO_CON3, 0);
+       if (err)
+               return err;
+
+       /* NO BLINK for all pins */
+       err = regmap_write(priv->mmap, SSO_CON2, 0);
+       if (err)
+               return err;
+
+       /* OUTPUT 0 by default */
+       err = regmap_write(priv->mmap, SSO_CPU, 0);
+       if (err)
+               return err;
+
+       /* update edge */
+       err = regmap_update_bits(priv->mmap, SSO_CON0, SSO_CON0_RZFL,
+                                FIELD_PREP(SSO_CON0_RZFL, priv->gpio.edge));
+       if (err)
+               return err;
+
+       /* Set GPIO update rate */
+       sso_gpio_freq_set(priv);
+
+       /* Register shift clock */
+       sso_register_shift_clk(priv);
+
+       return 0;
+}
+
+static void sso_led_shutdown(struct sso_led *led)
+{
+       struct sso_led_priv *priv = led->priv;
+
+       /* unregister led */
+       devm_led_classdev_unregister(priv->dev, &led->cdev);
+
+       /* clear HW control bit */
+       if (led->desc.hw_trig)
+               regmap_update_bits(priv->mmap, SSO_CON3, BIT(led->desc.pin), 0);
+
+       if (led->gpiod)
+               devm_gpiod_put(priv->dev, led->gpiod);
+
+       led->priv = NULL;
+}
+
+static int
+__sso_led_dt_parse(struct sso_led_priv *priv, struct fwnode_handle *fw_ssoled)
+{
+       struct fwnode_handle *fwnode_child;
+       struct device *dev = priv->dev;
+       struct sso_led_desc *desc;
+       struct sso_led *led;
+       struct list_head *p;
+       const char *tmp;
+       u32 prop;
+       int ret;
+
+       fwnode_for_each_child_node(fw_ssoled, fwnode_child) {
+               led = devm_kzalloc(dev, sizeof(*led), GFP_KERNEL);
+               if (!led)
+                       return -ENOMEM;
+
+               INIT_LIST_HEAD(&led->list);
+               led->priv = priv;
+               desc = &led->desc;
+
+               led->gpiod = devm_fwnode_get_gpiod_from_child(dev, NULL,
+                                                             fwnode_child,
+                                                             GPIOD_ASIS, NULL);
+               if (IS_ERR(led->gpiod)) {
+                       dev_err(dev, "led: get gpio fail!\n");
+                       goto __dt_err;
+               }
+
+               fwnode_property_read_string(fwnode_child,
+                                           "linux,default-trigger",
+                                           &desc->default_trigger);
+
+               if (fwnode_property_present(fwnode_child,
+                                           "retain-state-suspended"))
+                       desc->retain_state_suspended = 1;
+
+               if (fwnode_property_present(fwnode_child,
+                                           "retain-state-shutdown"))
+                       desc->retain_state_shutdown = 1;
+
+               if (fwnode_property_present(fwnode_child, "panic-indicator"))
+                       desc->panic_indicator = 1;
+
+               ret = fwnode_property_read_u32(fwnode_child, "reg", &prop);
+               if (ret != 0 || prop >= SSO_LED_MAX_NUM) {
+                       dev_err(dev, "invalid LED pin:%u\n", prop);
+                       goto __dt_err;
+               }
+               desc->pin = prop;
+
+               if (fwnode_property_present(fwnode_child, "intel,sso-hw-blink"))
+                       desc->hw_blink = 1;
+
+               desc->hw_trig = fwnode_property_read_bool(fwnode_child,
+                                                         "intel,sso-hw-trigger");
+               if (desc->hw_trig) {
+                       desc->default_trigger = NULL;
+                       desc->retain_state_shutdown = 0;
+                       desc->retain_state_suspended = 0;
+                       desc->panic_indicator = 0;
+                       desc->hw_blink = 0;
+               }
+
+               if (fwnode_property_read_u32(fwnode_child,
+                                            "intel,sso-blink-rate-hz", &prop)) {
+                       /* default first freq rate */
+                       desc->freq_idx = 0;
+                       desc->blink_rate = priv->freq[desc->freq_idx];
+               } else {
+                       desc->freq_idx = sso_get_blink_rate_idx(priv, prop);
+                       if (desc->freq_idx == -1)
+                               desc->freq_idx = MAX_FREQ_RANK - 1;
+
+                       desc->blink_rate = priv->freq[desc->freq_idx];
+               }
+
+               if (!fwnode_property_read_string(fwnode_child, "default-state", &tmp)) {
+                       if (!strcmp(tmp, "on"))
+                               desc->brightness = LED_FULL;
+               }
+
+               if (sso_create_led(priv, led, fwnode_child))
+                       goto __dt_err;
+       }
+       fwnode_handle_put(fw_ssoled);
+
+       return 0;
+__dt_err:
+       fwnode_handle_put(fw_ssoled);
+       /* unregister leds */
+       list_for_each(p, &priv->led_list) {
+               led = list_entry(p, struct sso_led, list);
+               sso_led_shutdown(led);
+       }
+
+       return -EINVAL;
+}
+
+static int sso_led_dt_parse(struct sso_led_priv *priv)
+{
+       struct fwnode_handle *fwnode = dev_fwnode(priv->dev);
+       struct fwnode_handle *fw_ssoled;
+       struct device *dev = priv->dev;
+       int count;
+       int ret;
+
+       count = device_get_child_node_count(dev);
+       if (!count)
+               return 0;
+
+       fw_ssoled = fwnode_get_named_child_node(fwnode, "ssoled");
+       if (fw_ssoled) {
+               ret = __sso_led_dt_parse(priv, fw_ssoled);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static int sso_probe_gpios(struct sso_led_priv *priv)
+{
+       struct device *dev = priv->dev;
+       int ret;
+
+       if (device_property_read_u32(dev, "ngpios", &priv->gpio.pins))
+               priv->gpio.pins = MAX_PIN_NUM_PER_BANK;
+
+       if (priv->gpio.pins > MAX_PIN_NUM_PER_BANK)
+               return -EINVAL;
+
+       if (device_property_read_u32(dev, "intel,sso-update-rate-hz",
+                                    &priv->gpio.freq))
+               priv->gpio.freq = 0;
+
+       priv->gpio.edge = DATA_CLK_EDGE;
+       priv->gpio.shift_clk_freq = -1;
+
+       ret = sso_gpio_hw_init(priv);
+       if (ret)
+               return ret;
+
+       return sso_gpio_gc_init(dev, priv);
+}
+
+static void sso_clk_disable(void *data)
+{
+       struct sso_led_priv *priv = data;
+
+       clk_disable_unprepare(priv->fpid_clk);
+       clk_disable_unprepare(priv->gclk);
+}
+
+static int intel_sso_led_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct sso_led_priv *priv;
+       int ret;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+
+       priv->pdev = pdev;
+       priv->dev = dev;
+
+       /* gate clock */
+       priv->gclk = devm_clk_get(dev, "sso");
+       if (IS_ERR(priv->gclk)) {
+               dev_err(dev, "get sso gate clock failed!\n");
+               return PTR_ERR(priv->gclk);
+       }
+
+       ret = clk_prepare_enable(priv->gclk);
+       if (ret) {
+               dev_err(dev, "Failed to prepate/enable sso gate clock!\n");
+               return ret;
+       }
+
+       priv->fpid_clk = devm_clk_get(dev, "fpid");
+       if (IS_ERR(priv->fpid_clk)) {
+               dev_err(dev, "Failed to get fpid clock!\n");
+               return PTR_ERR(priv->fpid_clk);
+       }
+
+       ret = clk_prepare_enable(priv->fpid_clk);
+       if (ret) {
+               dev_err(dev, "Failed to prepare/enable fpid clock!\n");
+               return ret;
+       }
+       priv->fpid_clkrate = clk_get_rate(priv->fpid_clk);
+
+       ret = devm_add_action_or_reset(dev, sso_clk_disable, priv);
+       if (ret) {
+               dev_err(dev, "Failed to devm_add_action_or_reset, %d\n", ret);
+               return ret;
+       }
+
+       priv->mmap = syscon_node_to_regmap(dev->of_node);
+       if (IS_ERR(priv->mmap)) {
+               dev_err(dev, "Failed to map iomem!\n");
+               return PTR_ERR(priv->mmap);
+       }
+
+       ret = sso_probe_gpios(priv);
+       if (ret) {
+               regmap_exit(priv->mmap);
+               return ret;
+       }
+
+       INIT_LIST_HEAD(&priv->led_list);
+
+       platform_set_drvdata(pdev, priv);
+       sso_init_freq(priv);
+
+       priv->gptc_clkrate = DEF_GPTC_CLK_RATE;
+
+       ret = sso_led_dt_parse(priv);
+       if (ret) {
+               regmap_exit(priv->mmap);
+               return ret;
+       }
+       dev_info(priv->dev, "sso LED init success!\n");
+
+       return 0;
+}
+
+static int intel_sso_led_remove(struct platform_device *pdev)
+{
+       struct sso_led_priv *priv;
+       struct list_head *pos, *n;
+       struct sso_led *led;
+
+       priv = platform_get_drvdata(pdev);
+
+       list_for_each_safe(pos, n, &priv->led_list) {
+               list_del(pos);
+               led = list_entry(pos, struct sso_led, list);
+               sso_led_shutdown(led);
+       }
+
+       clk_disable_unprepare(priv->fpid_clk);
+       clk_disable_unprepare(priv->gclk);
+       regmap_exit(priv->mmap);
+
+       return 0;
+}
+
+static const struct of_device_id of_sso_led_match[] = {
+       { .compatible = "intel,lgm-ssoled" },
+       {}
+};
+
+MODULE_DEVICE_TABLE(of, of_sso_led_match);
+
+static struct platform_driver intel_sso_led_driver = {
+       .probe          = intel_sso_led_probe,
+       .remove         = intel_sso_led_remove,
+       .driver         = {
+                       .name = "lgm-ssoled",
+                       .of_match_table = of_match_ptr(of_sso_led_match),
+       },
+};
+
+module_platform_driver(intel_sso_led_driver);
+
+MODULE_DESCRIPTION("Intel SSO LED/GPIO driver");
+MODULE_LICENSE("GPL v2");
index 131ca83..2e495ff 100644 (file)
@@ -145,8 +145,7 @@ static void led_remove_brightness_hw_changed(struct led_classdev *led_cdev)
        device_remove_file(led_cdev->dev, &dev_attr_brightness_hw_changed);
 }
 
-void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev,
-                                              enum led_brightness brightness)
+void led_classdev_notify_brightness_hw_changed(struct led_classdev *led_cdev, unsigned int brightness)
 {
        if (WARN_ON(!led_cdev->brightness_hw_changed_kn))
                return;
index c4e780b..8eb8054 100644 (file)
@@ -39,8 +39,7 @@ const char * const led_colors[LED_COLOR_ID_MAX] = {
 };
 EXPORT_SYMBOL_GPL(led_colors);
 
-static int __led_set_brightness(struct led_classdev *led_cdev,
-                               enum led_brightness value)
+static int __led_set_brightness(struct led_classdev *led_cdev, unsigned int value)
 {
        if (!led_cdev->brightness_set)
                return -ENOTSUPP;
@@ -50,8 +49,7 @@ static int __led_set_brightness(struct led_classdev *led_cdev,
        return 0;
 }
 
-static int __led_set_brightness_blocking(struct led_classdev *led_cdev,
-                                        enum led_brightness value)
+static int __led_set_brightness_blocking(struct led_classdev *led_cdev, unsigned int value)
 {
        if (!led_cdev->brightness_set_blocking)
                return -ENOTSUPP;
@@ -240,8 +238,7 @@ void led_stop_software_blink(struct led_classdev *led_cdev)
 }
 EXPORT_SYMBOL_GPL(led_stop_software_blink);
 
-void led_set_brightness(struct led_classdev *led_cdev,
-                       enum led_brightness brightness)
+void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness)
 {
        /*
         * If software blink is active, delay brightness setting
@@ -253,7 +250,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
                 * work queue task to avoid problems in case we are called
                 * from hard irq context.
                 */
-               if (brightness == LED_OFF) {
+               if (!brightness) {
                        set_bit(LED_BLINK_DISABLE, &led_cdev->work_flags);
                        schedule_work(&led_cdev->set_brightness_work);
                } else {
@@ -268,8 +265,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_set_brightness);
 
-void led_set_brightness_nopm(struct led_classdev *led_cdev,
-                             enum led_brightness value)
+void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value)
 {
        /* Use brightness_set op if available, it is guaranteed not to sleep */
        if (!__led_set_brightness(led_cdev, value))
@@ -281,8 +277,7 @@ void led_set_brightness_nopm(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_set_brightness_nopm);
 
-void led_set_brightness_nosleep(struct led_classdev *led_cdev,
-                               enum led_brightness value)
+void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value)
 {
        led_cdev->brightness = min(value, led_cdev->max_brightness);
 
@@ -293,8 +288,7 @@ void led_set_brightness_nosleep(struct led_classdev *led_cdev,
 }
 EXPORT_SYMBOL_GPL(led_set_brightness_nosleep);
 
-int led_set_brightness_sync(struct led_classdev *led_cdev,
-                           enum led_brightness value)
+int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value)
 {
        if (led_cdev->blink_delay_on || led_cdev->blink_delay_off)
                return -EBUSY;
index 7fd557a..c409b80 100644 (file)
@@ -83,6 +83,7 @@ static const struct apu_led_profile apu1_led_profile[] = {
 };
 
 static const struct dmi_system_id apu_led_dmi_table[] __initconst = {
+       /* PC Engines APU with factory bios "SageBios_PCEngines_APU-45" */
        {
                .ident = "apu",
                .matches = {
@@ -90,6 +91,14 @@ static const struct dmi_system_id apu_led_dmi_table[] __initconst = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "APU")
                }
        },
+       /* PC Engines APU with "Mainline" bios >= 4.6.8 */
+       {
+               .ident = "apu",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "PC Engines"),
+                       DMI_MATCH(DMI_PRODUCT_NAME, "apu1")
+               }
+       },
        {}
 };
 MODULE_DEVICE_TABLE(dmi, apu_led_dmi_table);
@@ -173,7 +182,7 @@ static int __init apu_led_init(void)
        int err;
 
        if (!(dmi_match(DMI_SYS_VENDOR, "PC Engines") &&
-             dmi_match(DMI_PRODUCT_NAME, "APU"))) {
+             (dmi_match(DMI_PRODUCT_NAME, "APU") || dmi_match(DMI_PRODUCT_NAME, "apu1")))) {
                pr_err("No PC Engines APUv1 board detected. For APUv2,3 support, enable CONFIG_PCENGINES_APU2\n");
                return -ENODEV;
        }
index e11fe17..b4e1fdf 100644 (file)
@@ -192,13 +192,13 @@ static int store_color_common(struct device *dev, const char *buf, int color)
        return 0;
 }
 
-static ssize_t show_red(struct device *dev, struct device_attribute *attr,
+static ssize_t red_show(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        return show_color_common(dev, buf, RED);
 }
 
-static ssize_t store_red(struct device *dev, struct device_attribute *attr,
+static ssize_t red_store(struct device *dev, struct device_attribute *attr,
                         const char *buf, size_t count)
 {
        int ret;
@@ -209,15 +209,15 @@ static ssize_t store_red(struct device *dev, struct device_attribute *attr,
        return count;
 }
 
-static DEVICE_ATTR(red, S_IRUGO | S_IWUSR, show_red, store_red);
+static DEVICE_ATTR_RW(red);
 
-static ssize_t show_green(struct device *dev, struct device_attribute *attr,
+static ssize_t green_show(struct device *dev, struct device_attribute *attr,
                          char *buf)
 {
        return show_color_common(dev, buf, GREEN);
 }
 
-static ssize_t store_green(struct device *dev, struct device_attribute *attr,
+static ssize_t green_store(struct device *dev, struct device_attribute *attr,
                           const char *buf, size_t count)
 {
 
@@ -229,15 +229,15 @@ static ssize_t store_green(struct device *dev, struct device_attribute *attr,
        return count;
 }
 
-static DEVICE_ATTR(green, S_IRUGO | S_IWUSR, show_green, store_green);
+static DEVICE_ATTR_RW(green);
 
-static ssize_t show_blue(struct device *dev, struct device_attribute *attr,
+static ssize_t blue_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
        return show_color_common(dev, buf, BLUE);
 }
 
-static ssize_t store_blue(struct device *dev, struct device_attribute *attr,
+static ssize_t blue_store(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t count)
 {
        int ret;
@@ -248,16 +248,16 @@ static ssize_t store_blue(struct device *dev, struct device_attribute *attr,
        return count;
 }
 
-static DEVICE_ATTR(blue, S_IRUGO | S_IWUSR, show_blue, store_blue);
+static DEVICE_ATTR_RW(blue);
 
-static ssize_t show_test(struct device *dev, struct device_attribute *attr,
+static ssize_t test_show(struct device *dev, struct device_attribute *attr,
                         char *buf)
 {
        return scnprintf(buf, PAGE_SIZE,
                         "#Write into test to start test sequence!#\n");
 }
 
-static ssize_t store_test(struct device *dev, struct device_attribute *attr,
+static ssize_t test_store(struct device *dev, struct device_attribute *attr,
                          const char *buf, size_t count)
 {
 
@@ -273,7 +273,7 @@ static ssize_t store_test(struct device *dev, struct device_attribute *attr,
        return count;
 }
 
-static DEVICE_ATTR(test, S_IRUGO | S_IWUSR, show_test, store_test);
+static DEVICE_ATTR_RW(test);
 
 /* TODO: HSB, fade, timeadj, script ... */
 
index 93f5b1b..b5d5e22 100644 (file)
@@ -96,7 +96,8 @@ static int create_gpio_led(const struct gpio_led *template,
        } else {
                state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
        }
-       led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
+       led_dat->cdev.brightness = state;
+       led_dat->cdev.max_brightness = 1;
        if (!template->retain_state_suspended)
                led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
        if (template->panic_indicator)
index 2f8362f..2db455e 100644 (file)
@@ -346,8 +346,8 @@ static void lm3530_brightness_set(struct led_classdev *led_cdev,
        }
 }
 
-static ssize_t lm3530_mode_get(struct device *dev,
-               struct device_attribute *attr, char *buf)
+static ssize_t mode_show(struct device *dev,
+                        struct device_attribute *attr, char *buf)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct lm3530_data *drvdata;
@@ -365,8 +365,8 @@ static ssize_t lm3530_mode_get(struct device *dev,
        return len;
 }
 
-static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
-                                  *attr, const char *buf, size_t size)
+static ssize_t mode_store(struct device *dev, struct device_attribute
+                         *attr, const char *buf, size_t size)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct lm3530_data *drvdata;
@@ -397,7 +397,7 @@ static ssize_t lm3530_mode_set(struct device *dev, struct device_attribute
 
        return sizeof(drvdata->mode);
 }
-static DEVICE_ATTR(mode, 0644, lm3530_mode_get, lm3530_mode_set);
+static DEVICE_ATTR_RW(mode);
 
 static struct attribute *lm3530_attrs[] = {
        &dev_attr_mode.attr,
index 9dd2058..43d5970 100644 (file)
@@ -608,7 +608,7 @@ static struct attribute *lm3533_led_attributes[] = {
 static umode_t lm3533_led_attr_is_visible(struct kobject *kobj,
                                             struct attribute *attr, int n)
 {
-       struct device *dev = container_of(kobj, struct device, kobj);
+       struct device *dev = kobj_to_dev(kobj);
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct lm3533_led *led = to_lm3533_led(led_cdev);
        umode_t mode = attr->mode;
index 1505521..2d3e118 100644 (file)
@@ -349,9 +349,9 @@ static int lm355x_indicator_brightness_set(struct led_classdev *cdev,
 }
 
 /* indicator pattern only for lm3556*/
-static ssize_t lm3556_indicator_pattern_store(struct device *dev,
-                                             struct device_attribute *attr,
-                                             const char *buf, size_t size)
+static ssize_t pattern_store(struct device *dev,
+                            struct device_attribute *attr,
+                            const char *buf, size_t size)
 {
        ssize_t ret;
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -381,7 +381,7 @@ out:
        return ret;
 }
 
-static DEVICE_ATTR(pattern, S_IWUSR, NULL, lm3556_indicator_pattern_store);
+static DEVICE_ATTR_WO(pattern);
 
 static struct attribute *lm355x_indicator_attrs[] = {
        &dev_attr_pattern.attr,
index 62c1487..8007b82 100644 (file)
@@ -165,9 +165,9 @@ static int lm3642_control(struct lm3642_chip_data *chip,
 /* torch */
 
 /* torch pin config for lm3642 */
-static ssize_t lm3642_torch_pin_store(struct device *dev,
-                                     struct device_attribute *attr,
-                                     const char *buf, size_t size)
+static ssize_t torch_pin_store(struct device *dev,
+                              struct device_attribute *attr,
+                              const char *buf, size_t size)
 {
        ssize_t ret;
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -193,7 +193,7 @@ static ssize_t lm3642_torch_pin_store(struct device *dev,
        return size;
 }
 
-static DEVICE_ATTR(torch_pin, S_IWUSR, NULL, lm3642_torch_pin_store);
+static DEVICE_ATTR_WO(torch_pin);
 
 static int lm3642_torch_brightness_set(struct led_classdev *cdev,
                                        enum led_brightness brightness)
@@ -212,9 +212,9 @@ static int lm3642_torch_brightness_set(struct led_classdev *cdev,
 /* flash */
 
 /* strobe pin config for lm3642*/
-static ssize_t lm3642_strobe_pin_store(struct device *dev,
-                                      struct device_attribute *attr,
-                                      const char *buf, size_t size)
+static ssize_t strobe_pin_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t size)
 {
        ssize_t ret;
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
@@ -240,7 +240,7 @@ static ssize_t lm3642_strobe_pin_store(struct device *dev,
        return size;
 }
 
-static DEVICE_ATTR(strobe_pin, S_IWUSR, NULL, lm3642_strobe_pin_store);
+static DEVICE_ATTR_WO(strobe_pin);
 
 static int lm3642_strobe_brightness_set(struct led_classdev *cdev,
                                         enum led_brightness brightness)
index f13117e..0623061 100644 (file)
@@ -6,10 +6,9 @@
 #include <linux/i2c.h>
 #include <linux/init.h>
 #include <linux/leds.h>
+#include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/mutex.h>
-#include <linux/of.h>
-#include <linux/of_gpio.h>
 #include <linux/regmap.h>
 #include <linux/regulator/consumer.h>
 #include <linux/slab.h>
@@ -322,7 +321,7 @@ static int lp50xx_brightness_set(struct led_classdev *cdev,
 
        ret = regmap_write(led->priv->regmap, reg_val, brightness);
        if (ret) {
-               dev_err(&led->priv->client->dev,
+               dev_err(led->priv->dev,
                        "Cannot write brightness value %d\n", ret);
                goto out;
        }
@@ -338,7 +337,7 @@ static int lp50xx_brightness_set(struct led_classdev *cdev,
                ret = regmap_write(led->priv->regmap, reg_val,
                                   mc_dev->subled_info[i].intensity);
                if (ret) {
-                       dev_err(&led->priv->client->dev,
+                       dev_err(led->priv->dev,
                                "Cannot write intensity value %d\n", ret);
                        goto out;
                }
@@ -360,8 +359,8 @@ static int lp50xx_set_banks(struct lp50xx *priv, u32 led_banks[])
                        bank_enable_mask |= (1 << led_banks[i]);
        }
 
-       led_config_lo = (u8)(bank_enable_mask & 0xff);
-       led_config_hi = (u8)(bank_enable_mask >> 8) & 0xff;
+       led_config_lo = bank_enable_mask;
+       led_config_hi = bank_enable_mask >> 8;
 
        ret = regmap_write(priv->regmap, LP50XX_LED_CFG0, led_config_lo);
        if (ret)
@@ -382,11 +381,9 @@ static int lp50xx_enable_disable(struct lp50xx *priv, int enable_disable)
 {
        int ret;
 
-       if (priv->enable_gpio) {
-               ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
-               if (ret)
-                       return ret;
-       }
+       ret = gpiod_direction_output(priv->enable_gpio, enable_disable);
+       if (ret)
+               return ret;
 
        if (enable_disable)
                return regmap_write(priv->regmap, LP50XX_DEV_CFG0, LP50XX_CHIP_EN);
@@ -404,7 +401,7 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
 
        if (num_leds > 1) {
                if (num_leds > priv->chip_info->max_modules) {
-                       dev_err(&priv->client->dev, "reg property is invalid\n");
+                       dev_err(priv->dev, "reg property is invalid\n");
                        return -EINVAL;
                }
 
@@ -412,13 +409,13 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
 
                ret = fwnode_property_read_u32_array(child, "reg", led_banks, num_leds);
                if (ret) {
-                       dev_err(&priv->client->dev, "reg property is missing\n");
+                       dev_err(priv->dev, "reg property is missing\n");
                        return ret;
                }
 
                ret = lp50xx_set_banks(priv, led_banks);
                if (ret) {
-                       dev_err(&priv->client->dev, "Cannot setup banked LEDs\n");
+                       dev_err(priv->dev, "Cannot setup banked LEDs\n");
                        return ret;
                }
 
@@ -426,12 +423,12 @@ static int lp50xx_probe_leds(struct fwnode_handle *child, struct lp50xx *priv,
        } else {
                ret = fwnode_property_read_u32(child, "reg", &led_number);
                if (ret) {
-                       dev_err(&priv->client->dev, "led reg property missing\n");
+                       dev_err(priv->dev, "led reg property missing\n");
                        return ret;
                }
 
                if (led_number > priv->chip_info->num_leds) {
-                       dev_err(&priv->client->dev, "led-sources property is invalid\n");
+                       dev_err(priv->dev, "led-sources property is invalid\n");
                        return -EINVAL;
                }
 
@@ -455,12 +452,9 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
        int i = 0;
 
        priv->enable_gpio = devm_gpiod_get_optional(priv->dev, "enable", GPIOD_OUT_LOW);
-       if (IS_ERR(priv->enable_gpio)) {
-               ret = PTR_ERR(priv->enable_gpio);
-               dev_err(&priv->client->dev, "Failed to get enable gpio: %d\n",
-                       ret);
-               return ret;
-       }
+       if (IS_ERR(priv->enable_gpio))
+               return dev_err_probe(priv->dev, PTR_ERR(priv->enable_gpio),
+                                    "Failed to get enable GPIO\n");
 
        priv->regulator = devm_regulator_get(priv->dev, "vled");
        if (IS_ERR(priv->regulator))
@@ -470,7 +464,7 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
                led = &priv->leds[i];
                ret = fwnode_property_count_u32(child, "reg");
                if (ret < 0) {
-                       dev_err(&priv->client->dev, "reg property is invalid\n");
+                       dev_err(priv->dev, "reg property is invalid\n");
                        goto child_out;
                }
 
@@ -510,12 +504,11 @@ static int lp50xx_probe_dt(struct lp50xx *priv)
                led_cdev = &led->mc_cdev.led_cdev;
                led_cdev->brightness_set_blocking = lp50xx_brightness_set;
 
-               ret = devm_led_classdev_multicolor_register_ext(&priv->client->dev,
+               ret = devm_led_classdev_multicolor_register_ext(priv->dev,
                                                       &led->mc_cdev,
                                                       &init_data);
                if (ret) {
-                       dev_err(&priv->client->dev, "led register err: %d\n",
-                               ret);
+                       dev_err(priv->dev, "led register err: %d\n", ret);
                        goto child_out;
                }
                i++;
@@ -529,8 +522,7 @@ child_out:
        return ret;
 }
 
-static int lp50xx_probe(struct i2c_client *client,
-                       const struct i2c_device_id *id)
+static int lp50xx_probe(struct i2c_client *client)
 {
        struct lp50xx *led;
        int count;
@@ -550,7 +542,7 @@ static int lp50xx_probe(struct i2c_client *client,
        mutex_init(&led->lock);
        led->client = client;
        led->dev = &client->dev;
-       led->chip_info = &lp50xx_chip_info_tbl[id->driver_data];
+       led->chip_info = device_get_match_data(&client->dev);
        i2c_set_clientdata(client, led);
        led->regmap = devm_regmap_init_i2c(client,
                                        led->chip_info->lp50xx_regmap_config);
@@ -579,15 +571,14 @@ static int lp50xx_remove(struct i2c_client *client)
 
        ret = lp50xx_enable_disable(led, 0);
        if (ret) {
-               dev_err(&led->client->dev, "Failed to disable chip\n");
+               dev_err(led->dev, "Failed to disable chip\n");
                return ret;
        }
 
        if (led->regulator) {
                ret = regulator_disable(led->regulator);
                if (ret)
-                       dev_err(&led->client->dev,
-                               "Failed to disable regulator\n");
+                       dev_err(led->dev, "Failed to disable regulator\n");
        }
 
        mutex_destroy(&led->lock);
@@ -596,24 +587,24 @@ static int lp50xx_remove(struct i2c_client *client)
 }
 
 static const struct i2c_device_id lp50xx_id[] = {
-       { "lp5009", LP5009 },
-       { "lp5012", LP5012 },
-       { "lp5018", LP5018 },
-       { "lp5024", LP5024 },
-       { "lp5030", LP5030 },
-       { "lp5036", LP5036 },
+       { "lp5009", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5009] },
+       { "lp5012", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5012] },
+       { "lp5018", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5018] },
+       { "lp5024", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5024] },
+       { "lp5030", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5030] },
+       { "lp5036", (kernel_ulong_t)&lp50xx_chip_info_tbl[LP5036] },
        { }
 };
 MODULE_DEVICE_TABLE(i2c, lp50xx_id);
 
 static const struct of_device_id of_lp50xx_leds_match[] = {
-       { .compatible = "ti,lp5009", .data = (void *)LP5009 },
-       { .compatible = "ti,lp5012", .data = (void *)LP5012 },
-       { .compatible = "ti,lp5018", .data = (void *)LP5018 },
-       { .compatible = "ti,lp5024", .data = (void *)LP5024 },
-       { .compatible = "ti,lp5030", .data = (void *)LP5030 },
-       { .compatible = "ti,lp5036", .data = (void *)LP5036 },
-       {},
+       { .compatible = "ti,lp5009", .data = &lp50xx_chip_info_tbl[LP5009] },
+       { .compatible = "ti,lp5012", .data = &lp50xx_chip_info_tbl[LP5012] },
+       { .compatible = "ti,lp5018", .data = &lp50xx_chip_info_tbl[LP5018] },
+       { .compatible = "ti,lp5024", .data = &lp50xx_chip_info_tbl[LP5024] },
+       { .compatible = "ti,lp5030", .data = &lp50xx_chip_info_tbl[LP5030] },
+       { .compatible = "ti,lp5036", .data = &lp50xx_chip_info_tbl[LP5036] },
+       {}
 };
 MODULE_DEVICE_TABLE(of, of_lp50xx_leds_match);
 
@@ -622,7 +613,7 @@ static struct i2c_driver lp50xx_driver = {
                .name   = "lp50xx",
                .of_match_table = of_lp50xx_leds_match,
        },
-       .probe          = lp50xx_probe,
+       .probe_new      = lp50xx_probe,
        .remove         = lp50xx_remove,
        .id_table       = lp50xx_id,
 };
index 512a11d..c0bddb3 100644 (file)
@@ -160,8 +160,8 @@ static void max8997_led_brightness_set(struct led_classdev *led_cdev,
        }
 }
 
-static ssize_t max8997_led_show_mode(struct device *dev,
-                               struct device_attribute *attr, char *buf)
+static ssize_t mode_show(struct device *dev,
+                        struct device_attribute *attr, char *buf)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct max8997_led *led =
@@ -193,9 +193,9 @@ static ssize_t max8997_led_show_mode(struct device *dev,
        return ret;
 }
 
-static ssize_t max8997_led_store_mode(struct device *dev,
-                               struct device_attribute *attr,
-                               const char *buf, size_t size)
+static ssize_t mode_store(struct device *dev,
+                         struct device_attribute *attr,
+                         const char *buf, size_t size)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct max8997_led *led =
@@ -222,7 +222,7 @@ static ssize_t max8997_led_store_mode(struct device *dev,
        return size;
 }
 
-static DEVICE_ATTR(mode, 0644, max8997_led_show_mode, max8997_led_store_mode);
+static DEVICE_ATTR_RW(mode);
 
 static struct attribute *max8997_attrs[] = {
        &dev_attr_mode.attr,
index 68fbf0b..77213b7 100644 (file)
@@ -204,9 +204,9 @@ static void netxbig_led_set(struct led_classdev *led_cdev,
        spin_unlock_irqrestore(&led_dat->lock, flags);
 }
 
-static ssize_t netxbig_led_sata_store(struct device *dev,
-                                     struct device_attribute *attr,
-                                     const char *buff, size_t count)
+static ssize_t sata_store(struct device *dev,
+                         struct device_attribute *attr,
+                         const char *buff, size_t count)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct netxbig_led_data *led_dat =
@@ -255,8 +255,8 @@ exit_unlock:
        return ret;
 }
 
-static ssize_t netxbig_led_sata_show(struct device *dev,
-                                    struct device_attribute *attr, char *buf)
+static ssize_t sata_show(struct device *dev,
+                        struct device_attribute *attr, char *buf)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct netxbig_led_data *led_dat =
@@ -265,7 +265,7 @@ static ssize_t netxbig_led_sata_show(struct device *dev,
        return sprintf(buf, "%d\n", led_dat->sata);
 }
 
-static DEVICE_ATTR(sata, 0644, netxbig_led_sata_show, netxbig_led_sata_store);
+static DEVICE_ATTR_RW(sata);
 
 static struct attribute *netxbig_led_attrs[] = {
        &dev_attr_sata.attr,
index 245de44..fcaa347 100644 (file)
@@ -441,8 +441,8 @@ static void set_power_light_amber_noblink(void)
        nasgpio_led_set_brightness(&amber->led_cdev, LED_FULL);
 }
 
-static ssize_t nas_led_blink_show(struct device *dev,
-                                 struct device_attribute *attr, char *buf)
+static ssize_t blink_show(struct device *dev,
+                         struct device_attribute *attr, char *buf)
 {
        struct led_classdev *led = dev_get_drvdata(dev);
        int blinking = 0;
@@ -451,9 +451,9 @@ static ssize_t nas_led_blink_show(struct device *dev,
        return sprintf(buf, "%u\n", blinking);
 }
 
-static ssize_t nas_led_blink_store(struct device *dev,
-                                  struct device_attribute *attr,
-                                  const char *buf, size_t size)
+static ssize_t blink_store(struct device *dev,
+                          struct device_attribute *attr,
+                          const char *buf, size_t size)
 {
        int ret;
        struct led_classdev *led = dev_get_drvdata(dev);
@@ -468,7 +468,7 @@ static ssize_t nas_led_blink_store(struct device *dev,
        return size;
 }
 
-static DEVICE_ATTR(blink, 0644, nas_led_blink_show, nas_led_blink_store);
+static DEVICE_ATTR_RW(blink);
 
 static struct attribute *nasgpio_led_attrs[] = {
        &dev_attr_blink.attr,
@@ -478,7 +478,6 @@ ATTRIBUTE_GROUPS(nasgpio_led);
 
 static int register_nasgpio_led(int led_nr)
 {
-       int ret;
        struct nasgpio_led *nas_led = &nasgpio_leds[led_nr];
        struct led_classdev *led = get_classdev_for_led_nr(led_nr);
 
@@ -489,11 +488,8 @@ static int register_nasgpio_led(int led_nr)
        led->brightness_set = nasgpio_led_set_brightness;
        led->blink_set = nasgpio_led_set_blink;
        led->groups = nasgpio_led_groups;
-       ret = led_classdev_register(&nas_gpio_pci_dev->dev, led);
-       if (ret)
-               return ret;
 
-       return 0;
+       return led_classdev_register(&nas_gpio_pci_dev->dev, led);
 }
 
 static void unregister_nasgpio_led(int led_nr)
index 67f4235..c48b805 100644 (file)
@@ -155,8 +155,8 @@ static const char * const led_src_texts[] = {
        "soft",
 };
 
-static ssize_t wm831x_status_src_show(struct device *dev,
-                                     struct device_attribute *attr, char *buf)
+static ssize_t src_show(struct device *dev,
+                       struct device_attribute *attr, char *buf)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct wm831x_status *led = to_wm831x_status(led_cdev);
@@ -178,9 +178,9 @@ static ssize_t wm831x_status_src_show(struct device *dev,
        return ret;
 }
 
-static ssize_t wm831x_status_src_store(struct device *dev,
-                                      struct device_attribute *attr,
-                                      const char *buf, size_t size)
+static ssize_t src_store(struct device *dev,
+                        struct device_attribute *attr,
+                        const char *buf, size_t size)
 {
        struct led_classdev *led_cdev = dev_get_drvdata(dev);
        struct wm831x_status *led = to_wm831x_status(led_cdev);
@@ -197,7 +197,7 @@ static ssize_t wm831x_status_src_store(struct device *dev,
        return size;
 }
 
-static DEVICE_ATTR(src, 0644, wm831x_status_src_show, wm831x_status_src_store);
+static DEVICE_ATTR_RW(src);
 
 static struct attribute *wm831x_status_attrs[] = {
        &dev_attr_src.attr,
index 2d9eb48..345062c 100644 (file)
@@ -19,10 +19,8 @@ static inline int led_get_brightness(struct led_classdev *led_cdev)
 
 void led_init_core(struct led_classdev *led_cdev);
 void led_stop_software_blink(struct led_classdev *led_cdev);
-void led_set_brightness_nopm(struct led_classdev *led_cdev,
-                               enum led_brightness value);
-void led_set_brightness_nosleep(struct led_classdev *led_cdev,
-                               enum led_brightness value);
+void led_set_brightness_nopm(struct led_classdev *led_cdev, unsigned int value);
+void led_set_brightness_nosleep(struct led_classdev *led_cdev, unsigned int value);
 ssize_t led_trigger_read(struct file *filp, struct kobject *kobj,
                        struct bin_attribute *attr, char *buf,
                        loff_t pos, size_t count);
index d2ab6ab..f62db7e 100644 (file)
@@ -51,10 +51,8 @@ static ssize_t ttyname_store(struct device *dev,
 
        if (size) {
                ttyname = kmemdup_nul(buf, size, GFP_KERNEL);
-               if (!ttyname) {
-                       ret = -ENOMEM;
-                       goto out_unlock;
-               }
+               if (!ttyname)
+                       return -ENOMEM;
        } else {
                ttyname = NULL;
        }
@@ -69,7 +67,6 @@ static ssize_t ttyname_store(struct device *dev,
 
        trigger_data->ttyname = ttyname;
 
-out_unlock:
        mutex_unlock(&trigger_data->mutex);
 
        if (ttyname && !running)
@@ -125,12 +122,12 @@ static void ledtrig_tty_work(struct work_struct *work)
 
        if (icount.rx != trigger_data->rx ||
            icount.tx != trigger_data->tx) {
-               led_set_brightness(trigger_data->led_cdev, LED_ON);
+               led_set_brightness_sync(trigger_data->led_cdev, LED_ON);
 
                trigger_data->rx = icount.rx;
                trigger_data->tx = icount.tx;
        } else {
-               led_set_brightness(trigger_data->led_cdev, LED_OFF);
+               led_set_brightness_sync(trigger_data->led_cdev, LED_OFF);
        }
 
 out:
index 6cf1991..d997f8e 100644 (file)
@@ -238,19 +238,19 @@ struct mhuv2_mbox_chan_priv {
 };
 
 /* Macro for reading a bitfield within a physically mapped packed struct */
-#define readl_relaxed_bitfield(_regptr, _field)                                \
+#define readl_relaxed_bitfield(_regptr, _type, _field)                 \
        ({                                                              \
                u32 _regval;                                            \
                _regval = readl_relaxed((_regptr));                     \
-               (*(typeof((_regptr)))(&_regval))._field;                \
+               (*(_type *)(&_regval))._field;                          \
        })
 
 /* Macro for writing a bitfield within a physically mapped packed struct */
-#define writel_relaxed_bitfield(_value, _regptr, _field)               \
+#define writel_relaxed_bitfield(_value, _regptr, _type, _field)                \
        ({                                                              \
                u32 _regval;                                            \
                _regval = readl_relaxed(_regptr);                       \
-               (*(typeof(_regptr))(&_regval))._field = _value;         \
+               (*(_type *)(&_regval))._field = _value;                 \
                writel_relaxed(_regval, _regptr);                       \
        })
 
@@ -496,7 +496,7 @@ static const struct mhuv2_protocol_ops mhuv2_data_transfer_ops = {
 
 /* Interrupt handlers */
 
-static struct mbox_chan *get_irq_chan_comb(struct mhuv2 *mhu, u32 *reg)
+static struct mbox_chan *get_irq_chan_comb(struct mhuv2 *mhu, u32 __iomem *reg)
 {
        struct mbox_chan *chans = mhu->mbox.chans;
        int channel = 0, i, offset = 0, windows, protocol, ch_wn;
@@ -699,7 +699,9 @@ static irqreturn_t mhuv2_receiver_interrupt(int irq, void *arg)
                ret = IRQ_HANDLED;
        }
 
-       kfree(data);
+       if (!IS_ERR(data))
+               kfree(data);
+
        return ret;
 }
 
@@ -969,8 +971,8 @@ static int mhuv2_tx_init(struct amba_device *adev, struct mhuv2 *mhu,
        mhu->mbox.ops = &mhuv2_sender_ops;
        mhu->send = reg;
 
-       mhu->windows = readl_relaxed_bitfield(&mhu->send->mhu_cfg, num_ch);
-       mhu->minor = readl_relaxed_bitfield(&mhu->send->aidr, arch_minor_rev);
+       mhu->windows = readl_relaxed_bitfield(&mhu->send->mhu_cfg, struct mhu_cfg_t, num_ch);
+       mhu->minor = readl_relaxed_bitfield(&mhu->send->aidr, struct aidr_t, arch_minor_rev);
 
        spin_lock_init(&mhu->doorbell_pending_lock);
 
@@ -990,7 +992,7 @@ static int mhuv2_tx_init(struct amba_device *adev, struct mhuv2 *mhu,
                        mhu->mbox.txdone_poll = false;
                        mhu->irq = adev->irq[0];
 
-                       writel_relaxed_bitfield(1, &mhu->send->int_en, chcomb);
+                       writel_relaxed_bitfield(1, &mhu->send->int_en, struct int_en_t, chcomb);
 
                        /* Disable all channel interrupts */
                        for (i = 0; i < mhu->windows; i++)
@@ -1023,8 +1025,8 @@ static int mhuv2_rx_init(struct amba_device *adev, struct mhuv2 *mhu,
        mhu->mbox.ops = &mhuv2_receiver_ops;
        mhu->recv = reg;
 
-       mhu->windows = readl_relaxed_bitfield(&mhu->recv->mhu_cfg, num_ch);
-       mhu->minor = readl_relaxed_bitfield(&mhu->recv->aidr, arch_minor_rev);
+       mhu->windows = readl_relaxed_bitfield(&mhu->recv->mhu_cfg, struct mhu_cfg_t, num_ch);
+       mhu->minor = readl_relaxed_bitfield(&mhu->recv->aidr, struct aidr_t, arch_minor_rev);
 
        mhu->irq = adev->irq[0];
        if (!mhu->irq) {
@@ -1045,7 +1047,7 @@ static int mhuv2_rx_init(struct amba_device *adev, struct mhuv2 *mhu,
                writel_relaxed(0xFFFFFFFF, &mhu->recv->ch_wn[i].mask_set);
 
        if (mhu->minor)
-               writel_relaxed_bitfield(1, &mhu->recv->int_en, chcomb);
+               writel_relaxed_bitfield(1, &mhu->recv->int_en, struct int_en_t, chcomb);
 
        return 0;
 }
index 93fe08a..7295e38 100644 (file)
@@ -3,7 +3,7 @@
  * OMAP mailbox driver
  *
  * Copyright (C) 2006-2009 Nokia Corporation. All rights reserved.
- * Copyright (C) 2013-2019 Texas Instruments Incorporated - https://www.ti.com
+ * Copyright (C) 2013-2021 Texas Instruments Incorporated - https://www.ti.com
  *
  * Contact: Hiroshi DOYU <Hiroshi.DOYU@nokia.com>
  *          Suman Anna <s-anna@ti.com>
@@ -664,6 +664,10 @@ static const struct of_device_id omap_mailbox_of_match[] = {
                .data           = &omap4_data,
        },
        {
+               .compatible     = "ti,am64-mailbox",
+               .data           = &omap4_data,
+       },
+       {
                /* end */
        },
 };
index 077e5c6..f25324d 100644 (file)
@@ -61,11 +61,15 @@ static const struct qcom_apcs_ipc_data apps_shared_apcs_data = {
        .offset = 12, .clk_name = NULL
 };
 
+static const struct qcom_apcs_ipc_data sdx55_apcs_data = {
+       .offset = 0x1008, .clk_name = "qcom-sdx55-acps-clk"
+};
+
 static const struct regmap_config apcs_regmap_config = {
        .reg_bits = 32,
        .reg_stride = 4,
        .val_bits = 32,
-       .max_register = 0xFFC,
+       .max_register = 0x1008,
        .fast_io = true,
 };
 
@@ -159,9 +163,11 @@ static const struct of_device_id qcom_apcs_ipc_of_match[] = {
        { .compatible = "qcom,msm8998-apcs-hmss-global", .data = &msm8998_apcs_data },
        { .compatible = "qcom,qcs404-apcs-apps-global", .data = &msm8916_apcs_data },
        { .compatible = "qcom,sc7180-apss-shared", .data = &apps_shared_apcs_data },
+       { .compatible = "qcom,sc8180x-apss-shared", .data = &apps_shared_apcs_data },
        { .compatible = "qcom,sdm660-apcs-hmss-global", .data = &sdm660_apcs_data },
        { .compatible = "qcom,sdm845-apss-shared", .data = &apps_shared_apcs_data },
        { .compatible = "qcom,sm8150-apss-shared", .data = &apps_shared_apcs_data },
+       { .compatible = "qcom,sdx55-apcs-gcc", .data = &sdx55_apcs_data },
        {}
 };
 MODULE_DEVICE_TABLE(of, qcom_apcs_ipc_of_match);
index f6fab24..4c32530 100644 (file)
@@ -35,7 +35,7 @@
 #define SPRD_MBOX_IRQ_CLR                      BIT(0)
 
 /* Bit and mask definiation for outbox's SPRD_MBOX_FIFO_STS register */
-#define SPRD_OUTBOX_FIFO_FULL                  BIT(0)
+#define SPRD_OUTBOX_FIFO_FULL                  BIT(2)
 #define SPRD_OUTBOX_FIFO_WR_SHIFT              16
 #define SPRD_OUTBOX_FIFO_RD_SHIFT              24
 #define SPRD_OUTBOX_FIFO_POS_MASK              GENMASK(7, 0)
index e07091d..acd0675 100644 (file)
@@ -98,7 +98,9 @@ struct tegra_hsp {
        unsigned int num_ss;
        unsigned int num_db;
        unsigned int num_si;
+
        spinlock_t lock;
+       struct lock_class_key lock_key;
 
        struct list_head doorbells;
        struct tegra_hsp_mailbox *mailboxes;
@@ -775,6 +777,18 @@ static int tegra_hsp_probe(struct platform_device *pdev)
                        return err;
        }
 
+       lockdep_register_key(&hsp->lock_key);
+       lockdep_set_class(&hsp->lock, &hsp->lock_key);
+
+       return 0;
+}
+
+static int tegra_hsp_remove(struct platform_device *pdev)
+{
+       struct tegra_hsp *hsp = platform_get_drvdata(pdev);
+
+       lockdep_unregister_key(&hsp->lock_key);
+
        return 0;
 }
 
@@ -834,6 +848,7 @@ static struct platform_driver tegra_hsp_driver = {
                .pm = &tegra_hsp_pm_ops,
        },
        .probe = tegra_hsp_probe,
+       .remove = tegra_hsp_remove,
 };
 
 static int __init tegra_hsp_init(void)
index 71691f3..03e1fe4 100644 (file)
@@ -965,7 +965,7 @@ static int bcache_device_init(struct bcache_device *d, unsigned int block_size,
        q->limits.max_hw_sectors        = UINT_MAX;
        q->limits.max_sectors           = UINT_MAX;
        q->limits.max_segment_size      = UINT_MAX;
-       q->limits.max_segments          = BIO_MAX_PAGES;
+       q->limits.max_segments          = BIO_MAX_VECS;
        blk_queue_max_discard_sectors(q, UINT_MAX);
        q->limits.discard_granularity   = 512;
        q->limits.io_min                = block_size;
index fce4cbf..50f3e67 100644 (file)
@@ -1526,6 +1526,10 @@ EXPORT_SYMBOL_GPL(dm_bufio_get_block_size);
 sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
 {
        sector_t s = i_size_read(c->bdev->bd_inode) >> SECTOR_SHIFT;
+       if (s >= c->start)
+               s -= c->start;
+       else
+               s = 0;
        if (likely(c->sectors_per_block_bits >= 0))
                s >>= c->sectors_per_block_bits;
        else
index 11c105e..b0ab080 100644 (file)
@@ -229,7 +229,7 @@ static DEFINE_SPINLOCK(dm_crypt_clients_lock);
 static unsigned dm_crypt_clients_n = 0;
 static volatile unsigned long dm_crypt_pages_per_client;
 #define DM_CRYPT_MEMORY_PERCENT                        2
-#define DM_CRYPT_MIN_PAGES_PER_CLIENT          (BIO_MAX_PAGES * 16)
+#define DM_CRYPT_MIN_PAGES_PER_CLIENT          (BIO_MAX_VECS * 16)
 
 static void clone_init(struct dm_crypt_io *, struct bio *);
 static void kcryptd_queue_crypt(struct dm_crypt_io *io);
@@ -3246,7 +3246,7 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
                ALIGN(sizeof(struct dm_crypt_io) + cc->dmreq_start + additional_req_size,
                      ARCH_KMALLOC_MINALIGN);
 
-       ret = mempool_init(&cc->page_pool, BIO_MAX_PAGES, crypt_page_alloc, crypt_page_free, cc);
+       ret = mempool_init(&cc->page_pool, BIO_MAX_VECS, crypt_page_alloc, crypt_page_free, cc);
        if (ret) {
                ti->error = "Cannot allocate page mempool";
                goto bad;
@@ -3373,9 +3373,9 @@ static int crypt_map(struct dm_target *ti, struct bio *bio)
        /*
         * Check if bio is too large, split as needed.
         */
-       if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) &&
+       if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_VECS << PAGE_SHIFT)) &&
            (bio_data_dir(bio) == WRITE || cc->on_disk_tag_size))
-               dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT));
+               dm_accept_partial_bio(bio, ((BIO_MAX_VECS << PAGE_SHIFT) >> SECTOR_SHIFT));
 
        /*
         * Ensure that bio is a multiple of internal sector encryption size
index 4312007..2d3cda0 100644 (file)
@@ -341,8 +341,8 @@ static void do_region(int op, int op_flags, unsigned region,
                        num_bvecs = 1;
                        break;
                default:
-                       num_bvecs = min_t(int, BIO_MAX_PAGES,
-                                         dm_sector_div_up(remaining, (PAGE_SIZE >> SECTOR_SHIFT)));
+                       num_bvecs = bio_max_segs(dm_sector_div_up(remaining,
+                                               (PAGE_SIZE >> SECTOR_SHIFT)));
                }
 
                bio = bio_alloc_bioset(GFP_NOIO, num_bvecs, &io->client->bios);
index 5e306bb..1ca65b4 100644 (file)
@@ -529,7 +529,7 @@ static int list_devices(struct file *filp, struct dm_ioctl *param, size_t param_
         * Grab our output buffer.
         */
        nl = orig_nl = get_result_buffer(param, param_size, &len);
-       if (len < needed) {
+       if (len < needed || len < sizeof(nl->dev)) {
                param->flags |= DM_BUFFER_FULL_FLAG;
                goto out;
        }
index e3d35c6..5788265 100644 (file)
@@ -264,15 +264,14 @@ static int write_inline_data(struct log_writes_c *lc, void *entry,
                             size_t entrylen, void *data, size_t datalen,
                             sector_t sector)
 {
-       int num_pages, bio_pages, pg_datalen, pg_sectorlen, i;
+       int bio_pages, pg_datalen, pg_sectorlen, i;
        struct page *page;
        struct bio *bio;
        size_t ret;
        void *ptr;
 
        while (datalen) {
-               num_pages = ALIGN(datalen, PAGE_SIZE) >> PAGE_SHIFT;
-               bio_pages = min(num_pages, BIO_MAX_PAGES);
+               bio_pages = bio_max_segs(DIV_ROUND_UP(datalen, PAGE_SIZE));
 
                atomic_inc(&lc->io_blocks);
 
@@ -364,7 +363,7 @@ static int log_one_block(struct log_writes_c *lc,
                goto out;
 
        atomic_inc(&lc->io_blocks);
-       bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES));
+       bio = bio_alloc(GFP_KERNEL, bio_max_segs(block->vec_cnt));
        if (!bio) {
                DMERR("Couldn't alloc log bio");
                goto error;
@@ -386,7 +385,8 @@ static int log_one_block(struct log_writes_c *lc,
                if (ret != block->vecs[i].bv_len) {
                        atomic_inc(&lc->io_blocks);
                        submit_bio(bio);
-                       bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES));
+                       bio = bio_alloc(GFP_KERNEL,
+                                       bio_max_segs(block->vec_cnt - i));
                        if (!bio) {
                                DMERR("Couldn't alloc log bio");
                                goto error;
index 95391f7..e5f0f17 100644 (file)
@@ -1594,6 +1594,13 @@ static int device_not_zoned_model(struct dm_target *ti, struct dm_dev *dev,
        return blk_queue_zoned_model(q) != *zoned_model;
 }
 
+/*
+ * Check the device zoned model based on the target feature flag. If the target
+ * has the DM_TARGET_ZONED_HM feature flag set, host-managed zoned devices are
+ * also accepted but all devices must have the same zoned model. If the target
+ * has the DM_TARGET_MIXED_ZONED_MODEL feature set, the devices can have any
+ * zoned model with all zoned devices having the same zone size.
+ */
 static bool dm_table_supports_zoned_model(struct dm_table *t,
                                          enum blk_zoned_model zoned_model)
 {
@@ -1603,13 +1610,15 @@ static bool dm_table_supports_zoned_model(struct dm_table *t,
        for (i = 0; i < dm_table_get_num_targets(t); i++) {
                ti = dm_table_get_target(t, i);
 
-               if (zoned_model == BLK_ZONED_HM &&
-                   !dm_target_supports_zoned_hm(ti->type))
-                       return false;
-
-               if (!ti->type->iterate_devices ||
-                   ti->type->iterate_devices(ti, device_not_zoned_model, &zoned_model))
-                       return false;
+               if (dm_target_supports_zoned_hm(ti->type)) {
+                       if (!ti->type->iterate_devices ||
+                           ti->type->iterate_devices(ti, device_not_zoned_model,
+                                                     &zoned_model))
+                               return false;
+               } else if (!dm_target_supports_mixed_zoned_model(ti->type)) {
+                       if (zoned_model == BLK_ZONED_HM)
+                               return false;
+               }
        }
 
        return true;
@@ -1621,9 +1630,17 @@ static int device_not_matches_zone_sectors(struct dm_target *ti, struct dm_dev *
        struct request_queue *q = bdev_get_queue(dev->bdev);
        unsigned int *zone_sectors = data;
 
+       if (!blk_queue_is_zoned(q))
+               return 0;
+
        return blk_queue_zone_sectors(q) != *zone_sectors;
 }
 
+/*
+ * Check consistency of zoned model and zone sectors across all targets. For
+ * zone sectors, if the destination device is a zoned block device, it shall
+ * have the specified zone_sectors.
+ */
 static int validate_hardware_zoned_model(struct dm_table *table,
                                         enum blk_zoned_model zoned_model,
                                         unsigned int zone_sectors)
@@ -1642,7 +1659,7 @@ static int validate_hardware_zoned_model(struct dm_table *table,
                return -EINVAL;
 
        if (dm_table_any_dev_attr(table, device_not_matches_zone_sectors, &zone_sectors)) {
-               DMERR("%s: zone sectors is not consistent across all devices",
+               DMERR("%s: zone sectors is not consistent across all zoned devices",
                      dm_device_name(table->md));
                return -EINVAL;
        }
index fb41b4f..66f4c63 100644 (file)
@@ -61,19 +61,18 @@ static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
 static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
                           unsigned *offset, struct dm_buffer **buf)
 {
-       u64 position, block;
+       u64 position, block, rem;
        u8 *res;
 
        position = (index + rsb) * v->fec->roots;
-       block = position >> v->data_dev_block_bits;
-       *offset = (unsigned)(position - (block << v->data_dev_block_bits));
+       block = div64_u64_rem(position, v->fec->roots << SECTOR_SHIFT, &rem);
+       *offset = (unsigned)rem;
 
-       res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
+       res = dm_bufio_read(v->fec->bufio, block, buf);
        if (IS_ERR(res)) {
                DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
                      v->data_dev->name, (unsigned long long)rsb,
-                     (unsigned long long)(v->fec->start + block),
-                     PTR_ERR(res));
+                     (unsigned long long)block, PTR_ERR(res));
                *buf = NULL;
        }
 
@@ -155,7 +154,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
 
                /* read the next block when we run out of parity bytes */
                offset += v->fec->roots;
-               if (offset >= 1 << v->data_dev_block_bits) {
+               if (offset >= v->fec->roots << SECTOR_SHIFT) {
                        dm_bufio_release(buf);
 
                        par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
@@ -674,7 +673,7 @@ int verity_fec_ctr(struct dm_verity *v)
 {
        struct dm_verity_fec *f = v->fec;
        struct dm_target *ti = v->ti;
-       u64 hash_blocks;
+       u64 hash_blocks, fec_blocks;
        int ret;
 
        if (!verity_fec_is_enabled(v)) {
@@ -744,15 +743,17 @@ int verity_fec_ctr(struct dm_verity *v)
        }
 
        f->bufio = dm_bufio_client_create(f->dev->bdev,
-                                         1 << v->data_dev_block_bits,
+                                         f->roots << SECTOR_SHIFT,
                                          1, 0, NULL, NULL);
        if (IS_ERR(f->bufio)) {
                ti->error = "Cannot initialize FEC bufio client";
                return PTR_ERR(f->bufio);
        }
 
-       if (dm_bufio_get_device_size(f->bufio) <
-           ((f->start + f->rounds * f->roots) >> v->data_dev_block_bits)) {
+       dm_bufio_set_sector_offset(f->bufio, f->start << (v->data_dev_block_bits - SECTOR_SHIFT));
+
+       fec_blocks = div64_u64(f->rounds * f->roots, v->fec->roots << SECTOR_SHIFT);
+       if (dm_bufio_get_device_size(f->bufio) < fec_blocks) {
                ti->error = "FEC device is too small";
                return -E2BIG;
        }
index 6b8e5bd..808a98e 100644 (file)
@@ -34,7 +34,7 @@
 #define DM_VERITY_OPT_IGN_ZEROES       "ignore_zero_blocks"
 #define DM_VERITY_OPT_AT_MOST_ONCE     "check_at_most_once"
 
-#define DM_VERITY_OPTS_MAX             (2 + DM_VERITY_OPTS_FEC + \
+#define DM_VERITY_OPTS_MAX             (3 + DM_VERITY_OPTS_FEC + \
                                         DM_VERITY_ROOT_HASH_VERIFICATION_OPTS)
 
 static unsigned dm_verity_prefetch_cluster = DM_VERITY_DEFAULT_PREFETCH_SIZE;
index 844c4be..4f72b6f 100644 (file)
@@ -1892,10 +1892,10 @@ restart:
                        list_add(&g->lru, &wbl.list);
                        wbl.size++;
                        g->write_in_progress = true;
-                       g->wc_list_contiguous = BIO_MAX_PAGES;
+                       g->wc_list_contiguous = BIO_MAX_VECS;
                        f = g;
                        e->wc_list_contiguous++;
-                       if (unlikely(e->wc_list_contiguous == BIO_MAX_PAGES)) {
+                       if (unlikely(e->wc_list_contiguous == BIO_MAX_VECS)) {
                                if (unlikely(wc->writeback_all)) {
                                        next_node = rb_next(&f->rb_node);
                                        if (likely(next_node))
index 697f9de..7e88df6 100644 (file)
@@ -1143,7 +1143,7 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv,
 static struct target_type dmz_type = {
        .name            = "zoned",
        .version         = {2, 0, 0},
-       .features        = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM,
+       .features        = DM_TARGET_SINGLETON | DM_TARGET_MIXED_ZONED_MODEL,
        .module          = THIS_MODULE,
        .ctr             = dmz_ctr,
        .dtr             = dmz_dtr,
index 50b693d..3f3be94 100644 (file)
@@ -2036,7 +2036,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
        if (size != dm_get_size(md))
                memset(&md->geometry, 0, sizeof(md->geometry));
 
-       set_capacity_and_notify(md->disk, size);
+       if (!get_capacity(md->disk))
+               set_capacity(md->disk, size);
+       else
+               set_capacity_and_notify(md->disk, size);
 
        dm_table_event_callback(t, event_callback, md);
 
index 4337ae0..0b5dcaa 100644 (file)
@@ -735,7 +735,7 @@ static void r5l_submit_current_io(struct r5l_log *log)
 
 static struct bio *r5l_bio_alloc(struct r5l_log *log)
 {
-       struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES, &log->bs);
+       struct bio *bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS, &log->bs);
 
        bio_set_op_attrs(bio, REQ_OP_WRITE, 0);
        bio_set_dev(bio, log->rdev->bdev);
@@ -1634,7 +1634,7 @@ static int r5l_recovery_allocate_ra_pool(struct r5l_log *log,
 {
        struct page *page;
 
-       ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_PAGES, &log->bs);
+       ctx->ra_bio = bio_alloc_bioset(GFP_KERNEL, BIO_MAX_VECS, &log->bs);
        if (!ctx->ra_bio)
                return -ENOMEM;
 
index e8c118e..3ddc2aa 100644 (file)
@@ -496,7 +496,7 @@ static void ppl_submit_iounit(struct ppl_io_unit *io)
                if (!bio_add_page(bio, sh->ppl_page, PAGE_SIZE, 0)) {
                        struct bio *prev = bio;
 
-                       bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_PAGES,
+                       bio = bio_alloc_bioset(GFP_NOIO, BIO_MAX_VECS,
                                               &ppl_conf->bs);
                        bio->bi_opf = prev->bi_opf;
                        bio->bi_write_hint = prev->bi_write_hint;
index 8a85852..5f6e97a 100644 (file)
@@ -430,4 +430,3 @@ MODULE_AUTHOR("Andreas Monitzer <andy@monitzer.com>");
 MODULE_AUTHOR("Ben Backx <ben@bbackx.com>");
 MODULE_DESCRIPTION("FireDTV DVB Driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("FireDTV DVB");
index 692b95a..9a82e68 100644 (file)
@@ -41,7 +41,6 @@ MODULE_PARM_DESC(debug,
 
 MODULE_AUTHOR("Andy Walls");
 MODULE_DESCRIPTION("CX23418 ALSA Interface");
-MODULE_SUPPORTED_DEVICE("CX23418 MPEG2 encoder");
 MODULE_LICENSE("GPL");
 
 MODULE_VERSION(CX18_VERSION);
index 95aed00..f2440eb 100644 (file)
@@ -232,7 +232,6 @@ MODULE_PARM_DESC(cx18_first_minor,
 
 MODULE_AUTHOR("Hans Verkuil");
 MODULE_DESCRIPTION("CX23418 driver");
-MODULE_SUPPORTED_DEVICE("CX23418 MPEG2 encoder");
 MODULE_LICENSE("GPL");
 
 MODULE_VERSION(CX18_VERSION);
index 608fbaf..8797d85 100644 (file)
@@ -104,7 +104,6 @@ MODULE_PARM_DESC(index, "Index value for cx25821 capture interface(s).");
 MODULE_DESCRIPTION("ALSA driver module for cx25821 based capture cards");
 MODULE_AUTHOR("Hiep Huynh");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Conexant,25821}");  /* "{{Conexant,23881}," */
 
 static unsigned int debug;
 module_param(debug, int, 0644);
index 95e0cbb..c83814c 100644 (file)
@@ -98,7 +98,6 @@ MODULE_AUTHOR("Mauro Carvalho Chehab <mchehab@kernel.org>");
 MODULE_LICENSE("GPL v2");
 MODULE_VERSION(CX88_VERSION);
 
-MODULE_SUPPORTED_DEVICE("{{Conexant,23881},{{Conexant,23882},{{Conexant,23883}");
 static unsigned int debug;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "enable debug messages");
index 39029b8..4cefdb2 100644 (file)
@@ -38,7 +38,6 @@ MODULE_PARM_DESC(index,
 
 MODULE_AUTHOR("Andy Walls");
 MODULE_DESCRIPTION("CX23415/CX23416 ALSA Interface");
-MODULE_SUPPORTED_DEVICE("CX23415/CX23416 MPEG2 encoder");
 MODULE_LICENSE("GPL");
 
 MODULE_VERSION(IVTV_VERSION);
index 6e448cb..942b8c2 100644 (file)
@@ -275,9 +275,6 @@ MODULE_PARM_DESC(ivtv_first_minor, "Set device node number assigned to first car
 
 MODULE_AUTHOR("Kevin Thayer, Chris Kennedy, Hans Verkuil");
 MODULE_DESCRIPTION("CX23415/CX23416 driver");
-MODULE_SUPPORTED_DEVICE
-    ("CX23415/CX23416 MPEG2 encoder (WinTV PVR-150/250/350/500,\n"
-               "\t\t\tYuan MPG series and similar)");
 MODULE_LICENSE("GPL");
 
 MODULE_VERSION(IVTV_VERSION);
index 336df65..524912f 100644 (file)
@@ -1269,6 +1269,5 @@ late_initcall_sync(sta2x11_vip_init_module);
 MODULE_DESCRIPTION("STA2X11 Video Input Port driver");
 MODULE_AUTHOR("Wind River");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("sta2x11 video input");
 MODULE_VERSION(DRV_VERSION);
 MODULE_DEVICE_TABLE(pci, sta2x11_vip_pci_tbl);
index 0514be6..e392b3e 100644 (file)
@@ -1363,4 +1363,3 @@ module_platform_driver(atmel_isi_driver);
 MODULE_AUTHOR("Josh Wu <josh.wu@atmel.com>");
 MODULE_DESCRIPTION("The V4L2 driver for Atmel Linux");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("video");
index 0b78fec..61d9885 100644 (file)
@@ -330,4 +330,3 @@ module_platform_driver(atmel_isc_driver);
 MODULE_AUTHOR("Songjun Wu");
 MODULE_DESCRIPTION("The V4L2 driver for Atmel-ISC");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("video");
index 9c94a8b..baac86f 100644 (file)
 MODULE_AUTHOR("Jonathan Corbet <corbet@lwn.net>");
 MODULE_DESCRIPTION("Marvell 88ALP01 CMOS Camera Controller driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("Video");
-
-
-
 
 struct cafe_camera {
        int registered;                 /* Fully initialized? */
index aa5f457..a60c302 100644 (file)
@@ -1288,7 +1288,6 @@ static void rkisp1_params_config_parameter(struct rkisp1_params *params)
        memset(hst.hist_weight, 0x01, sizeof(hst.hist_weight));
        rkisp1_hst_config(params, &hst);
        rkisp1_param_set_bits(params, RKISP1_CIF_ISP_HIST_PROP,
-                             ~RKISP1_CIF_ISP_HIST_PROP_MODE_MASK |
                              rkisp1_hst_params_default_config.mode);
 
        /* set the  range */
index bbcc225..d9b4ad0 100644 (file)
@@ -2149,4 +2149,3 @@ MODULE_AUTHOR("Yannick Fertre <yannick.fertre@st.com>");
 MODULE_AUTHOR("Hugues Fruchet <hugues.fruchet@st.com>");
 MODULE_DESCRIPTION("STMicroelectronics STM32 Digital Camera Memory Interface driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("video");
index 86d5e3f..06f74d4 100644 (file)
@@ -245,7 +245,7 @@ static int vsp1_du_pipeline_setup_brx(struct vsp1_device *vsp1,
                brx = &vsp1->bru->entity;
        else if (pipe->brx && !drm_pipe->force_brx_release)
                brx = pipe->brx;
-       else if (!vsp1->bru->entity.pipe)
+       else if (vsp1_feature(vsp1, VSP1_HAS_BRU) && !vsp1->bru->entity.pipe)
                brx = &vsp1->bru->entity;
        else
                brx = &vsp1->brs->entity;
@@ -462,9 +462,9 @@ static int vsp1_du_pipeline_setup_inputs(struct vsp1_device *vsp1,
         * make sure it is present in the pipeline's list of entities if it
         * wasn't already.
         */
-       if (!use_uif) {
+       if (drm_pipe->uif && !use_uif) {
                drm_pipe->uif->pipe = NULL;
-       } else if (!drm_pipe->uif->pipe) {
+       } else if (drm_pipe->uif && !drm_pipe->uif->pipe) {
                drm_pipe->uif->pipe = pipe;
                list_add_tail(&drm_pipe->uif->list_pipe, &pipe->entities);
        }
index 5bb2932..ff6a8fc 100644 (file)
@@ -5,6 +5,7 @@ obj-y += keymaps/
 obj-$(CONFIG_RC_CORE) += rc-core.o
 rc-core-y := rc-main.o rc-ir-raw.o
 rc-core-$(CONFIG_LIRC) += lirc_dev.o
+rc-core-$(CONFIG_MEDIA_CEC_RC) += keymaps/rc-cec.o
 rc-core-$(CONFIG_BPF_LIRC_MODE2) += bpf-lirc.o
 obj-$(CONFIG_IR_NEC_DECODER) += ir-nec-decoder.o
 obj-$(CONFIG_IR_RC5_DECODER) += ir-rc5-decoder.o
index b252a1d..cc6662e 100644 (file)
@@ -21,7 +21,6 @@ obj-$(CONFIG_RC_MAP) += rc-adstech-dvb-t-pci.o \
                        rc-behold.o \
                        rc-behold-columbus.o \
                        rc-budget-ci-old.o \
-                       rc-cec.o \
                        rc-cinergy-1400.o \
                        rc-cinergy.o \
                        rc-d680-dmb.o \
index 3e3bd11..068e22a 100644 (file)
@@ -1,6 +1,16 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 /* Keytable for the CEC remote control
  *
+ * This keymap is unusual in that it can't be built as a module,
+ * instead it is registered directly in rc-main.c if CONFIG_MEDIA_CEC_RC
+ * is set. This is because it can be called from drm_dp_cec_set_edid() via
+ * cec_register_adapter() in an asynchronous context, and it is not
+ * allowed to use request_module() to load rc-cec.ko in that case.
+ *
+ * Since this keymap is only used if CONFIG_MEDIA_CEC_RC is set, we
+ * just compile this keymap into the rc-core module and never as a
+ * separate module.
+ *
  * Copyright (c) 2015 by Kamil Debski
  */
 
@@ -152,7 +162,7 @@ static struct rc_map_table cec[] = {
        /* 0x77-0xff: Reserved */
 };
 
-static struct rc_map_list cec_map = {
+struct rc_map_list cec_map = {
        .map = {
                .scan           = cec,
                .size           = ARRAY_SIZE(cec),
@@ -160,19 +170,3 @@ static struct rc_map_list cec_map = {
                .name           = RC_MAP_CEC,
        }
 };
-
-static int __init init_rc_map_cec(void)
-{
-       return rc_map_register(&cec_map);
-}
-
-static void __exit exit_rc_map_cec(void)
-{
-       rc_map_unregister(&cec_map);
-}
-
-module_init(init_rc_map_cec);
-module_exit(exit_rc_map_cec);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Kamil Debski");
index 1fd62c1..8e88dc8 100644 (file)
@@ -2069,6 +2069,9 @@ static int __init rc_core_init(void)
 
        led_trigger_register_simple("rc-feedback", &led_feedback);
        rc_map_register(&empty_map);
+#ifdef CONFIG_MEDIA_CEC_RC
+       rc_map_register(&cec_map);
+#endif
 
        return 0;
 }
@@ -2078,6 +2081,9 @@ static void __exit rc_core_exit(void)
        lirc_dev_exit();
        class_unregister(&rc_class);
        led_trigger_unregister_simple(led_feedback);
+#ifdef CONFIG_MEDIA_CEC_RC
+       rc_map_unregister(&cec_map);
+#endif
        rc_map_unregister(&empty_map);
 }
 
index e488e78..69d5c62 100644 (file)
@@ -56,7 +56,6 @@ MODULE_PARM_DESC(flicker_mode, "Flicker frequency (0 (disabled), " __stringify(5
 
 MODULE_AUTHOR("Steve Miller (STMicroelectronics) <steve.miller@st.com>");
 MODULE_DESCRIPTION("V4L-driver for STMicroelectronics CPiA2 based cameras");
-MODULE_SUPPORTED_DEVICE("video");
 MODULE_LICENSE("GPL");
 MODULE_VERSION(CPIA_VERSION);
 
index 3a2df36..a19a467 100644 (file)
@@ -51,7 +51,6 @@ MODULE_PARM_DESC(index, "Index value for tm6000x capture interface(s).");
 MODULE_DESCRIPTION("ALSA driver module for tm5600/tm6000/tm6010 based TV cards");
 MODULE_AUTHOR("Mauro Carvalho Chehab");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{Trident,tm5600},{{Trident,tm6000},{{Trident,tm6010}");
 static unsigned int debug;
 module_param(debug, int, 0644);
 MODULE_PARM_DESC(debug, "enable debug messages");
index 293a460..4990fa8 100644 (file)
@@ -23,8 +23,6 @@ MODULE_DESCRIPTION("DVB driver extension module for tm5600/6000/6010 based TV ca
 MODULE_AUTHOR("Mauro Carvalho Chehab");
 MODULE_LICENSE("GPL");
 
-MODULE_SUPPORTED_DEVICE("{{Trident, tm5600},{{Trident, tm6000},{{Trident, tm6010}");
-
 static int debug;
 
 module_param(debug, int, 0644);
index b57e94f..333bd30 100644 (file)
@@ -371,7 +371,7 @@ void usbtv_audio_free(struct usbtv *usbtv)
        cancel_work_sync(&usbtv->snd_trigger);
 
        if (usbtv->snd && usbtv->udev) {
-               snd_card_free(usbtv->snd);
+               snd_card_free_when_closed(usbtv->snd);
                usbtv->snd = NULL;
        }
 }
index 7d9d33d..72c0df1 100644 (file)
@@ -137,6 +137,15 @@ config TI_EMIF_SRAM
          sequence so this driver provides several relocatable PM functions
          for the SoC PM code to use.
 
+config FPGA_DFL_EMIF
+       tristate "FPGA DFL EMIF Driver"
+       depends on FPGA_DFL && HAS_IOMEM
+       help
+         This driver is for the EMIF private feature implemented under
+         FPGA Device Feature List (DFL) framework. It is used to expose
+         memory interface status information as well as memory clearing
+         control.
+
 config MVEBU_DEVBUS
        bool "Marvell EBU Device Bus Controller"
        default y if PLAT_ORION
index e71cf7b..bc7663e 100644 (file)
@@ -28,6 +28,8 @@ obj-$(CONFIG_STM32_FMC2_EBI)  += stm32-fmc2-ebi.o
 obj-$(CONFIG_SAMSUNG_MC)       += samsung/
 obj-$(CONFIG_TEGRA_MC)         += tegra/
 obj-$(CONFIG_TI_EMIF_SRAM)     += ti-emif-sram.o
+obj-$(CONFIG_FPGA_DFL_EMIF)    += dfl-emif.o
+
 ti-emif-sram-objs              := ti-emif-pm.o ti-emif-sram-pm.o
 
 AFLAGS_ti-emif-sram-pm.o       :=-Wa,-march=armv7-a
diff --git a/drivers/memory/dfl-emif.c b/drivers/memory/dfl-emif.c
new file mode 100644 (file)
index 0000000..3f71981
--- /dev/null
@@ -0,0 +1,207 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * DFL device driver for EMIF private feature
+ *
+ * Copyright (C) 2020 Intel Corporation, Inc.
+ *
+ */
+#include <linux/bitfield.h>
+#include <linux/dfl.h>
+#include <linux/errno.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/io-64-nonatomic-lo-hi.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#define FME_FEATURE_ID_EMIF            0x9
+
+#define EMIF_STAT                      0x8
+#define EMIF_STAT_INIT_DONE_SFT                0
+#define EMIF_STAT_CALC_FAIL_SFT                8
+#define EMIF_STAT_CLEAR_BUSY_SFT       16
+#define EMIF_CTRL                      0x10
+#define EMIF_CTRL_CLEAR_EN_SFT         0
+#define EMIF_CTRL_CLEAR_EN_MSK         GENMASK_ULL(3, 0)
+
+#define EMIF_POLL_INVL                 10000 /* us */
+#define EMIF_POLL_TIMEOUT              5000000 /* us */
+
+struct dfl_emif {
+       struct device *dev;
+       void __iomem *base;
+       spinlock_t lock;        /* Serialises access to EMIF_CTRL reg */
+};
+
+struct emif_attr {
+       struct device_attribute attr;
+       u32 shift;
+       u32 index;
+};
+
+#define to_emif_attr(dev_attr) \
+       container_of(dev_attr, struct emif_attr, attr)
+
+static ssize_t emif_state_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       struct emif_attr *eattr = to_emif_attr(attr);
+       struct dfl_emif *de = dev_get_drvdata(dev);
+       u64 val;
+
+       val = readq(de->base + EMIF_STAT);
+
+       return sysfs_emit(buf, "%u\n",
+                         !!(val & BIT_ULL(eattr->shift + eattr->index)));
+}
+
+static ssize_t emif_clear_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       struct emif_attr *eattr = to_emif_attr(attr);
+       struct dfl_emif *de = dev_get_drvdata(dev);
+       u64 clear_busy_msk, clear_en_msk, val;
+       void __iomem *base = de->base;
+
+       if (!sysfs_streq(buf, "1"))
+               return -EINVAL;
+
+       clear_busy_msk = BIT_ULL(EMIF_STAT_CLEAR_BUSY_SFT + eattr->index);
+       clear_en_msk = BIT_ULL(EMIF_CTRL_CLEAR_EN_SFT + eattr->index);
+
+       spin_lock(&de->lock);
+       /* The CLEAR_EN field is WO, but other fields are RW */
+       val = readq(base + EMIF_CTRL);
+       val &= ~EMIF_CTRL_CLEAR_EN_MSK;
+       val |= clear_en_msk;
+       writeq(val, base + EMIF_CTRL);
+       spin_unlock(&de->lock);
+
+       if (readq_poll_timeout(base + EMIF_STAT, val,
+                              !(val & clear_busy_msk),
+                              EMIF_POLL_INVL, EMIF_POLL_TIMEOUT)) {
+               dev_err(de->dev, "timeout, fail to clear\n");
+               return -ETIMEDOUT;
+       }
+
+       return count;
+}
+
+#define emif_state_attr(_name, _shift, _index)                         \
+       static struct emif_attr emif_attr_##inf##_index##_##_name =     \
+               { .attr = __ATTR(inf##_index##_##_name, 0444,           \
+                                emif_state_show, NULL),                \
+                 .shift = (_shift), .index = (_index) }
+
+#define emif_clear_attr(_index)                                                \
+       static struct emif_attr emif_attr_##inf##_index##_clear =       \
+               { .attr = __ATTR(inf##_index##_clear, 0200,             \
+                                NULL, emif_clear_store),               \
+                 .index = (_index) }
+
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 0);
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 1);
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 2);
+emif_state_attr(init_done, EMIF_STAT_INIT_DONE_SFT, 3);
+
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 0);
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 1);
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 2);
+emif_state_attr(cal_fail, EMIF_STAT_CALC_FAIL_SFT, 3);
+
+emif_clear_attr(0);
+emif_clear_attr(1);
+emif_clear_attr(2);
+emif_clear_attr(3);
+
+static struct attribute *dfl_emif_attrs[] = {
+       &emif_attr_inf0_init_done.attr.attr,
+       &emif_attr_inf0_cal_fail.attr.attr,
+       &emif_attr_inf0_clear.attr.attr,
+
+       &emif_attr_inf1_init_done.attr.attr,
+       &emif_attr_inf1_cal_fail.attr.attr,
+       &emif_attr_inf1_clear.attr.attr,
+
+       &emif_attr_inf2_init_done.attr.attr,
+       &emif_attr_inf2_cal_fail.attr.attr,
+       &emif_attr_inf2_clear.attr.attr,
+
+       &emif_attr_inf3_init_done.attr.attr,
+       &emif_attr_inf3_cal_fail.attr.attr,
+       &emif_attr_inf3_clear.attr.attr,
+
+       NULL,
+};
+
+static umode_t dfl_emif_visible(struct kobject *kobj,
+                               struct attribute *attr, int n)
+{
+       struct dfl_emif *de = dev_get_drvdata(kobj_to_dev(kobj));
+       struct emif_attr *eattr = container_of(attr, struct emif_attr,
+                                              attr.attr);
+       u64 val;
+
+       /*
+        * This device supports upto 4 memory interfaces, but not all
+        * interfaces are used on different platforms. The read out value of
+        * CLEAN_EN field (which is a bitmap) could tell how many interfaces
+        * are available.
+        */
+       val = FIELD_GET(EMIF_CTRL_CLEAR_EN_MSK, readq(de->base + EMIF_CTRL));
+
+       return (val & BIT_ULL(eattr->index)) ? attr->mode : 0;
+}
+
+static const struct attribute_group dfl_emif_group = {
+       .is_visible = dfl_emif_visible,
+       .attrs = dfl_emif_attrs,
+};
+
+static const struct attribute_group *dfl_emif_groups[] = {
+       &dfl_emif_group,
+       NULL,
+};
+
+static int dfl_emif_probe(struct dfl_device *ddev)
+{
+       struct device *dev = &ddev->dev;
+       struct dfl_emif *de;
+
+       de = devm_kzalloc(dev, sizeof(*de), GFP_KERNEL);
+       if (!de)
+               return -ENOMEM;
+
+       de->base = devm_ioremap_resource(dev, &ddev->mmio_res);
+       if (IS_ERR(de->base))
+               return PTR_ERR(de->base);
+
+       de->dev = dev;
+       spin_lock_init(&de->lock);
+       dev_set_drvdata(dev, de);
+
+       return 0;
+}
+
+static const struct dfl_device_id dfl_emif_ids[] = {
+       { FME_ID, FME_FEATURE_ID_EMIF },
+       { }
+};
+MODULE_DEVICE_TABLE(dfl, dfl_emif_ids);
+
+static struct dfl_driver dfl_emif_driver = {
+       .drv    = {
+               .name       = "dfl-emif",
+               .dev_groups = dfl_emif_groups,
+       },
+       .id_table = dfl_emif_ids,
+       .probe   = dfl_emif_probe,
+};
+module_dfl_driver(dfl_emif_driver);
+
+MODULE_DESCRIPTION("DFL EMIF driver");
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL v2");
index 40c723e..b74efa4 100644 (file)
@@ -659,15 +659,6 @@ config MFD_INTEL_LPSS_PCI
          I2C, SPI and HS-UART starting from Intel Sunrisepoint (Intel Skylake
          PCH) in PCI mode.
 
-config MFD_INTEL_MSIC
-       bool "Intel MSIC"
-       depends on INTEL_SCU
-       select MFD_CORE
-       help
-         Select this option to enable access to Intel MSIC (Avatele
-         Passage) chip. This chip embeds audio, battery, GPIO, etc.
-         devices used in Intel Medfield platforms.
-
 config MFD_INTEL_PMC_BXT
        tristate "Intel PMC Driver for Broxton"
        depends on X86
index 0255434..834f546 100644 (file)
@@ -214,7 +214,6 @@ obj-$(CONFIG_MFD_ATMEL_SMC) += atmel-smc.o
 obj-$(CONFIG_MFD_INTEL_LPSS)   += intel-lpss.o
 obj-$(CONFIG_MFD_INTEL_LPSS_PCI)       += intel-lpss-pci.o
 obj-$(CONFIG_MFD_INTEL_LPSS_ACPI)      += intel-lpss-acpi.o
-obj-$(CONFIG_MFD_INTEL_MSIC)   += intel_msic.o
 obj-$(CONFIG_MFD_INTEL_PMC_BXT)        += intel_pmc_bxt.o
 obj-$(CONFIG_MFD_INTEL_PMT)    += intel_pmt.o
 obj-$(CONFIG_MFD_PALMAS)       += palmas.o
diff --git a/drivers/mfd/intel_msic.c b/drivers/mfd/intel_msic.c
deleted file mode 100644 (file)
index daa772f..0000000
+++ /dev/null
@@ -1,425 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- * Driver for Intel MSIC
- *
- * Copyright (C) 2011, Intel Corporation
- * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- */
-
-#include <linux/err.h>
-#include <linux/gpio.h>
-#include <linux/io.h>
-#include <linux/init.h>
-#include <linux/mfd/core.h>
-#include <linux/mfd/intel_msic.h>
-#include <linux/platform_device.h>
-#include <linux/slab.h>
-
-#include <asm/intel_scu_ipc.h>
-
-#define MSIC_VENDOR(id)                ((id >> 6) & 3)
-#define MSIC_VERSION(id)       (id & 0x3f)
-#define MSIC_MAJOR(id)         ('A' + ((id >> 3) & 7))
-#define MSIC_MINOR(id)         (id & 7)
-
-/*
- * MSIC interrupt tree is readable from SRAM at INTEL_MSIC_IRQ_PHYS_BASE.
- * Since IRQ block starts from address 0x002 we need to subtract that from
- * the actual IRQ status register address.
- */
-#define MSIC_IRQ_STATUS(x)     (INTEL_MSIC_IRQ_PHYS_BASE + ((x) - 2))
-#define MSIC_IRQ_STATUS_ACCDET MSIC_IRQ_STATUS(INTEL_MSIC_ACCDET)
-
-/*
- * The SCU hardware has limitation of 16 bytes per read/write buffer on
- * Medfield.
- */
-#define SCU_IPC_RWBUF_LIMIT    16
-
-/**
- * struct intel_msic - an MSIC MFD instance
- * @pdev: pointer to the platform device
- * @vendor: vendor ID
- * @version: chip version
- * @irq_base: base address of the mapped MSIC SRAM interrupt tree
- */
-struct intel_msic {
-       struct platform_device          *pdev;
-       unsigned                        vendor;
-       unsigned                        version;
-       void __iomem                    *irq_base;
-};
-
-static const struct resource msic_touch_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_adc_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_battery_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_gpio_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_audio_resources[] = {
-       DEFINE_RES_IRQ_NAMED(0, "IRQ"),
-       /*
-        * We will pass IRQ_BASE to the driver now but this can be removed
-        * when/if the driver starts to use intel_msic_irq_read().
-        */
-       DEFINE_RES_MEM_NAMED(MSIC_IRQ_STATUS_ACCDET, 1, "IRQ_BASE"),
-};
-
-static const struct resource msic_hdmi_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_thermal_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_power_btn_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-static const struct resource msic_ocd_resources[] = {
-       DEFINE_RES_IRQ(0),
-};
-
-/*
- * Devices that are part of the MSIC and are available via firmware
- * populated SFI DEVS table.
- */
-static struct mfd_cell msic_devs[] = {
-       [INTEL_MSIC_BLOCK_TOUCH]        = {
-               .name                   = "msic_touch",
-               .num_resources          = ARRAY_SIZE(msic_touch_resources),
-               .resources              = msic_touch_resources,
-       },
-       [INTEL_MSIC_BLOCK_ADC]          = {
-               .name                   = "msic_adc",
-               .num_resources          = ARRAY_SIZE(msic_adc_resources),
-               .resources              = msic_adc_resources,
-       },
-       [INTEL_MSIC_BLOCK_BATTERY]      = {
-               .name                   = "msic_battery",
-               .num_resources          = ARRAY_SIZE(msic_battery_resources),
-               .resources              = msic_battery_resources,
-       },
-       [INTEL_MSIC_BLOCK_GPIO]         = {
-               .name                   = "msic_gpio",
-               .num_resources          = ARRAY_SIZE(msic_gpio_resources),
-               .resources              = msic_gpio_resources,
-       },
-       [INTEL_MSIC_BLOCK_AUDIO]        = {
-               .name                   = "msic_audio",
-               .num_resources          = ARRAY_SIZE(msic_audio_resources),
-               .resources              = msic_audio_resources,
-       },
-       [INTEL_MSIC_BLOCK_HDMI]         = {
-               .name                   = "msic_hdmi",
-               .num_resources          = ARRAY_SIZE(msic_hdmi_resources),
-               .resources              = msic_hdmi_resources,
-       },
-       [INTEL_MSIC_BLOCK_THERMAL]      = {
-               .name                   = "msic_thermal",
-               .num_resources          = ARRAY_SIZE(msic_thermal_resources),
-               .resources              = msic_thermal_resources,
-       },
-       [INTEL_MSIC_BLOCK_POWER_BTN]    = {
-               .name                   = "msic_power_btn",
-               .num_resources          = ARRAY_SIZE(msic_power_btn_resources),
-               .resources              = msic_power_btn_resources,
-       },
-       [INTEL_MSIC_BLOCK_OCD]          = {
-               .name                   = "msic_ocd",
-               .num_resources          = ARRAY_SIZE(msic_ocd_resources),
-               .resources              = msic_ocd_resources,
-       },
-};
-
-/*
- * Other MSIC related devices which are not directly available via SFI DEVS
- * table. These can be pseudo devices, regulators etc. which are needed for
- * different purposes.
- *
- * These devices appear only after the MSIC driver itself is initialized so
- * we can guarantee that the SCU IPC interface is ready.
- */
-static const struct mfd_cell msic_other_devs[] = {
-       /* Audio codec in the MSIC */
-       {
-               .id                     = -1,
-               .name                   = "sn95031",
-       },
-};
-
-/**
- * intel_msic_reg_read - read a single MSIC register
- * @reg: register to read
- * @val: register value is placed here
- *
- * Read a single register from MSIC. Returns %0 on success and negative
- * errno in case of failure.
- *
- * Function may sleep.
- */
-int intel_msic_reg_read(unsigned short reg, u8 *val)
-{
-       return intel_scu_ipc_ioread8(reg, val);
-}
-EXPORT_SYMBOL_GPL(intel_msic_reg_read);
-
-/**
- * intel_msic_reg_write - write a single MSIC register
- * @reg: register to write
- * @val: value to write to that register
- *
- * Write a single MSIC register. Returns 0 on success and negative
- * errno in case of failure.
- *
- * Function may sleep.
- */
-int intel_msic_reg_write(unsigned short reg, u8 val)
-{
-       return intel_scu_ipc_iowrite8(reg, val);
-}
-EXPORT_SYMBOL_GPL(intel_msic_reg_write);
-
-/**
- * intel_msic_reg_update - update a single MSIC register
- * @reg: register to update
- * @val: value to write to the register
- * @mask: specifies which of the bits are updated (%0 = don't update,
- *        %1 = update)
- *
- * Perform an update to a register @reg. @mask is used to specify which
- * bits are updated. Returns %0 in case of success and negative errno in
- * case of failure.
- *
- * Function may sleep.
- */
-int intel_msic_reg_update(unsigned short reg, u8 val, u8 mask)
-{
-       return intel_scu_ipc_update_register(reg, val, mask);
-}
-EXPORT_SYMBOL_GPL(intel_msic_reg_update);
-
-/**
- * intel_msic_bulk_read - read an array of registers
- * @reg: array of register addresses to read
- * @buf: array where the read values are placed
- * @count: number of registers to read
- *
- * Function reads @count registers from the MSIC using addresses passed in
- * @reg. Read values are placed in @buf. Reads are performed atomically
- * wrt. MSIC.
- *
- * Returns %0 in case of success and negative errno in case of failure.
- *
- * Function may sleep.
- */
-int intel_msic_bulk_read(unsigned short *reg, u8 *buf, size_t count)
-{
-       if (WARN_ON(count > SCU_IPC_RWBUF_LIMIT))
-               return -EINVAL;
-
-       return intel_scu_ipc_readv(reg, buf, count);
-}
-EXPORT_SYMBOL_GPL(intel_msic_bulk_read);
-
-/**
- * intel_msic_bulk_write - write an array of values to the MSIC registers
- * @reg: array of registers to write
- * @buf: values to write to each register
- * @count: number of registers to write
- *
- * Function writes @count registers in @buf to MSIC. Writes are performed
- * atomically wrt MSIC. Returns %0 in case of success and negative errno in
- * case of failure.
- *
- * Function may sleep.
- */
-int intel_msic_bulk_write(unsigned short *reg, u8 *buf, size_t count)
-{
-       if (WARN_ON(count > SCU_IPC_RWBUF_LIMIT))
-               return -EINVAL;
-
-       return intel_scu_ipc_writev(reg, buf, count);
-}
-EXPORT_SYMBOL_GPL(intel_msic_bulk_write);
-
-/**
- * intel_msic_irq_read - read a register from an MSIC interrupt tree
- * @msic: MSIC instance
- * @reg: interrupt register (between %INTEL_MSIC_IRQLVL1 and
- *      %INTEL_MSIC_RESETIRQ2)
- * @val: value of the register is placed here
- *
- * This function can be used by an MSIC subdevice interrupt handler to read
- * a register value from the MSIC interrupt tree. In this way subdevice
- * drivers don't have to map in the interrupt tree themselves but can just
- * call this function instead.
- *
- * Function doesn't sleep and is callable from interrupt context.
- *
- * Returns %-EINVAL if @reg is outside of the allowed register region.
- */
-int intel_msic_irq_read(struct intel_msic *msic, unsigned short reg, u8 *val)
-{
-       if (WARN_ON(reg < INTEL_MSIC_IRQLVL1 || reg > INTEL_MSIC_RESETIRQ2))
-               return -EINVAL;
-
-       *val = readb(msic->irq_base + (reg - INTEL_MSIC_IRQLVL1));
-       return 0;
-}
-EXPORT_SYMBOL_GPL(intel_msic_irq_read);
-
-static int intel_msic_init_devices(struct intel_msic *msic)
-{
-       struct platform_device *pdev = msic->pdev;
-       struct intel_msic_platform_data *pdata = dev_get_platdata(&pdev->dev);
-       int ret, i;
-
-       if (pdata->gpio) {
-               struct mfd_cell *cell = &msic_devs[INTEL_MSIC_BLOCK_GPIO];
-
-               cell->platform_data = pdata->gpio;
-               cell->pdata_size = sizeof(*pdata->gpio);
-       }
-
-       if (pdata->ocd) {
-               unsigned gpio = pdata->ocd->gpio;
-
-               ret = devm_gpio_request_one(&pdev->dev, gpio,
-                                       GPIOF_IN, "ocd_gpio");
-               if (ret) {
-                       dev_err(&pdev->dev, "failed to register OCD GPIO\n");
-                       return ret;
-               }
-
-               ret = gpio_to_irq(gpio);
-               if (ret < 0) {
-                       dev_err(&pdev->dev, "no IRQ number for OCD GPIO\n");
-                       return ret;
-               }
-
-               /* Update the IRQ number for the OCD */
-               pdata->irq[INTEL_MSIC_BLOCK_OCD] = ret;
-       }
-
-       for (i = 0; i < ARRAY_SIZE(msic_devs); i++) {
-               if (!pdata->irq[i])
-                       continue;
-
-               ret = mfd_add_devices(&pdev->dev, -1, &msic_devs[i], 1, NULL,
-                                     pdata->irq[i], NULL);
-               if (ret)
-                       goto fail;
-       }
-
-       ret = mfd_add_devices(&pdev->dev, 0, msic_other_devs,
-                             ARRAY_SIZE(msic_other_devs), NULL, 0, NULL);
-       if (ret)
-               goto fail;
-
-       return 0;
-
-fail:
-       mfd_remove_devices(&pdev->dev);
-
-       return ret;
-}
-
-static void intel_msic_remove_devices(struct intel_msic *msic)
-{
-       struct platform_device *pdev = msic->pdev;
-
-       mfd_remove_devices(&pdev->dev);
-}
-
-static int intel_msic_probe(struct platform_device *pdev)
-{
-       struct intel_msic_platform_data *pdata = dev_get_platdata(&pdev->dev);
-       struct intel_msic *msic;
-       struct resource *res;
-       u8 id0, id1;
-       int ret;
-
-       if (!pdata) {
-               dev_err(&pdev->dev, "no platform data passed\n");
-               return -EINVAL;
-       }
-
-       /* First validate that we have an MSIC in place */
-       ret = intel_scu_ipc_ioread8(INTEL_MSIC_ID0, &id0);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to identify the MSIC chip (ID0)\n");
-               return -ENXIO;
-       }
-
-       ret = intel_scu_ipc_ioread8(INTEL_MSIC_ID1, &id1);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to identify the MSIC chip (ID1)\n");
-               return -ENXIO;
-       }
-
-       if (MSIC_VENDOR(id0) != MSIC_VENDOR(id1)) {
-               dev_err(&pdev->dev, "invalid vendor ID: %x, %x\n", id0, id1);
-               return -ENXIO;
-       }
-
-       msic = devm_kzalloc(&pdev->dev, sizeof(*msic), GFP_KERNEL);
-       if (!msic)
-               return -ENOMEM;
-
-       msic->vendor = MSIC_VENDOR(id0);
-       msic->version = MSIC_VERSION(id0);
-       msic->pdev = pdev;
-
-       /*
-        * Map in the MSIC interrupt tree area in SRAM. This is exposed to
-        * the clients via intel_msic_irq_read().
-        */
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-       msic->irq_base = devm_ioremap_resource(&pdev->dev, res);
-       if (IS_ERR(msic->irq_base))
-               return PTR_ERR(msic->irq_base);
-
-       platform_set_drvdata(pdev, msic);
-
-       ret = intel_msic_init_devices(msic);
-       if (ret) {
-               dev_err(&pdev->dev, "failed to initialize MSIC devices\n");
-               return ret;
-       }
-
-       dev_info(&pdev->dev, "Intel MSIC version %c%d (vendor %#x)\n",
-                MSIC_MAJOR(msic->version), MSIC_MINOR(msic->version),
-                msic->vendor);
-
-       return 0;
-}
-
-static int intel_msic_remove(struct platform_device *pdev)
-{
-       struct intel_msic *msic = platform_get_drvdata(pdev);
-
-       intel_msic_remove_devices(msic);
-
-       return 0;
-}
-
-static struct platform_driver intel_msic_driver = {
-       .probe          = intel_msic_probe,
-       .remove         = intel_msic_remove,
-       .driver         = {
-               .name   = "intel_msic",
-       },
-};
-builtin_platform_driver(intel_msic_driver);
index fe8ca94..b67cb0a 100644 (file)
@@ -72,7 +72,8 @@ static const struct dmi_system_id dmi_platform_info[] = {
        {}
 };
 
-static const struct resource intel_quark_i2c_res[] = {
+/* This is used as a place holder and will be modified at run-time */
+static struct resource intel_quark_i2c_res[] = {
        [INTEL_QUARK_IORES_MEM] = {
                .flags = IORESOURCE_MEM,
        },
@@ -85,7 +86,8 @@ static struct mfd_cell_acpi_match intel_quark_acpi_match_i2c = {
        .adr = MFD_ACPI_MATCH_I2C,
 };
 
-static const struct resource intel_quark_gpio_res[] = {
+/* This is used as a place holder and will be modified at run-time */
+static struct resource intel_quark_gpio_res[] = {
        [INTEL_QUARK_IORES_MEM] = {
                .flags = IORESOURCE_MEM,
        },
index fafa8b0..f532c59 100644 (file)
@@ -50,14 +50,6 @@ config AD525X_DPOT_SPI
          To compile this driver as a module, choose M here: the
          module will be called ad525x_dpot-spi.
 
-config ATMEL_TCLIB
-       bool "Atmel AT32/AT91 Timer/Counter Library"
-       depends on ARCH_AT91
-       help
-         Select this if you want a library to allocate the Timer/Counter
-         blocks found on many Atmel processors.  This facilitates using
-         these blocks by different drivers despite processor differences.
-
 config DUMMY_IRQ
        tristate "Dummy IRQ handler"
        help
@@ -112,19 +104,6 @@ config PHANTOM
          If you choose to build module, its name will be phantom. If unsure,
          say N here.
 
-config INTEL_MID_PTI
-       tristate "Parallel Trace Interface for MIPI P1149.7 cJTAG standard"
-       depends on PCI && TTY && (X86_INTEL_MID || COMPILE_TEST)
-       help
-         The PTI (Parallel Trace Interface) driver directs
-         trace data routed from various parts in the system out
-         through an Intel Penwell PTI port and out of the mobile
-         device for analysis with a debugging tool (Lauterbach or Fido).
-
-         You should select this driver if the target kernel is meant for
-         an Intel Atom (non-netbook) mobile device containing a MIPI
-         P1149.7 standard implementation.
-
 config TIFM_CORE
        tristate "TI Flash Media interface support"
        depends on PCI
@@ -478,6 +457,7 @@ source "drivers/misc/genwqe/Kconfig"
 source "drivers/misc/echo/Kconfig"
 source "drivers/misc/cxl/Kconfig"
 source "drivers/misc/ocxl/Kconfig"
+source "drivers/misc/bcm-vk/Kconfig"
 source "drivers/misc/cardreader/Kconfig"
 source "drivers/misc/habanalabs/Kconfig"
 source "drivers/misc/uacce/Kconfig"
index d23231e..99b6f15 100644 (file)
@@ -8,9 +8,7 @@ obj-$(CONFIG_IBMVMC)            += ibmvmc.o
 obj-$(CONFIG_AD525X_DPOT)      += ad525x_dpot.o
 obj-$(CONFIG_AD525X_DPOT_I2C)  += ad525x_dpot-i2c.o
 obj-$(CONFIG_AD525X_DPOT_SPI)  += ad525x_dpot-spi.o
-obj-$(CONFIG_INTEL_MID_PTI)    += pti.o
 obj-$(CONFIG_ATMEL_SSC)                += atmel-ssc.o
-obj-$(CONFIG_ATMEL_TCLIB)      += atmel_tclib.o
 obj-$(CONFIG_DUMMY_IRQ)                += dummy-irq.o
 obj-$(CONFIG_ICS932S401)       += ics932s401.o
 obj-$(CONFIG_LKDTM)            += lkdtm/
@@ -51,6 +49,7 @@ obj-$(CONFIG_ECHO)            += echo/
 obj-$(CONFIG_CXL_BASE)         += cxl/
 obj-$(CONFIG_PCI_ENDPOINT_TEST)        += pci_endpoint_test.o
 obj-$(CONFIG_OCXL)             += ocxl/
+obj-$(CONFIG_BCM_VK)           += bcm-vk/
 obj-y                          += cardreader/
 obj-$(CONFIG_PVPANIC)          += pvpanic.o
 obj-$(CONFIG_HABANA_AI)                += habanalabs/
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
deleted file mode 100644 (file)
index 7de7840..0000000
+++ /dev/null
@@ -1,200 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/init.h>
-#include <linux/io.h>
-#include <linux/ioport.h>
-#include <linux/kernel.h>
-#include <linux/platform_device.h>
-#include <linux/module.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <linux/of.h>
-#include <soc/at91/atmel_tcb.h>
-
-/*
- * This is a thin library to solve the problem of how to portably allocate
- * one of the TC blocks.  For simplicity, it doesn't currently expect to
- * share individual timers between different drivers.
- */
-
-#if defined(CONFIG_AVR32)
-/* AVR32 has these divide PBB */
-const u8 atmel_tc_divisors[5] = { 0, 4, 8, 16, 32, };
-EXPORT_SYMBOL(atmel_tc_divisors);
-
-#elif defined(CONFIG_ARCH_AT91)
-/* AT91 has these divide MCK */
-const u8 atmel_tc_divisors[5] = { 2, 8, 32, 128, 0, };
-EXPORT_SYMBOL(atmel_tc_divisors);
-
-#endif
-
-static DEFINE_SPINLOCK(tc_list_lock);
-static LIST_HEAD(tc_list);
-
-/**
- * atmel_tc_alloc - allocate a specified TC block
- * @block: which block to allocate
- *
- * Caller allocates a block.  If it is available, a pointer to a
- * pre-initialized struct atmel_tc is returned. The caller can access
- * the registers directly through the "regs" field.
- */
-struct atmel_tc *atmel_tc_alloc(unsigned block)
-{
-       struct atmel_tc         *tc;
-       struct platform_device  *pdev = NULL;
-
-       spin_lock(&tc_list_lock);
-       list_for_each_entry(tc, &tc_list, node) {
-               if (tc->allocated)
-                       continue;
-
-               if ((tc->pdev->dev.of_node && tc->id == block) ||
-                   (tc->pdev->id == block)) {
-                       pdev = tc->pdev;
-                       tc->allocated = true;
-                       break;
-               }
-       }
-       spin_unlock(&tc_list_lock);
-
-       return pdev ? tc : NULL;
-}
-EXPORT_SYMBOL_GPL(atmel_tc_alloc);
-
-/**
- * atmel_tc_free - release a specified TC block
- * @tc: Timer/counter block that was returned by atmel_tc_alloc()
- *
- * This reverses the effect of atmel_tc_alloc(), invalidating the resource
- * returned by that routine and making the TC available to other drivers.
- */
-void atmel_tc_free(struct atmel_tc *tc)
-{
-       spin_lock(&tc_list_lock);
-       if (tc->allocated)
-               tc->allocated = false;
-       spin_unlock(&tc_list_lock);
-}
-EXPORT_SYMBOL_GPL(atmel_tc_free);
-
-#if defined(CONFIG_OF)
-static struct atmel_tcb_config tcb_rm9200_config = {
-       .counter_width = 16,
-};
-
-static struct atmel_tcb_config tcb_sam9x5_config = {
-       .counter_width = 32,
-};
-
-static const struct of_device_id atmel_tcb_dt_ids[] = {
-       {
-               .compatible = "atmel,at91rm9200-tcb",
-               .data = &tcb_rm9200_config,
-       }, {
-               .compatible = "atmel,at91sam9x5-tcb",
-               .data = &tcb_sam9x5_config,
-       }, {
-               /* sentinel */
-       }
-};
-
-MODULE_DEVICE_TABLE(of, atmel_tcb_dt_ids);
-#endif
-
-static int __init tc_probe(struct platform_device *pdev)
-{
-       struct atmel_tc *tc;
-       struct clk      *clk;
-       int             irq;
-       unsigned int    i;
-
-       if (of_get_child_count(pdev->dev.of_node))
-               return -EBUSY;
-
-       irq = platform_get_irq(pdev, 0);
-       if (irq < 0)
-               return -EINVAL;
-
-       tc = devm_kzalloc(&pdev->dev, sizeof(struct atmel_tc), GFP_KERNEL);
-       if (!tc)
-               return -ENOMEM;
-
-       tc->pdev = pdev;
-
-       clk = devm_clk_get(&pdev->dev, "t0_clk");
-       if (IS_ERR(clk))
-               return PTR_ERR(clk);
-
-       tc->slow_clk = devm_clk_get(&pdev->dev, "slow_clk");
-       if (IS_ERR(tc->slow_clk))
-               return PTR_ERR(tc->slow_clk);
-
-       tc->regs = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(tc->regs))
-               return PTR_ERR(tc->regs);
-
-       /* Now take SoC information if available */
-       if (pdev->dev.of_node) {
-               const struct of_device_id *match;
-               match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node);
-               if (match)
-                       tc->tcb_config = match->data;
-
-               tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb");
-       } else {
-               tc->id = pdev->id;
-       }
-
-       tc->clk[0] = clk;
-       tc->clk[1] = devm_clk_get(&pdev->dev, "t1_clk");
-       if (IS_ERR(tc->clk[1]))
-               tc->clk[1] = clk;
-       tc->clk[2] = devm_clk_get(&pdev->dev, "t2_clk");
-       if (IS_ERR(tc->clk[2]))
-               tc->clk[2] = clk;
-
-       tc->irq[0] = irq;
-       tc->irq[1] = platform_get_irq(pdev, 1);
-       if (tc->irq[1] < 0)
-               tc->irq[1] = irq;
-       tc->irq[2] = platform_get_irq(pdev, 2);
-       if (tc->irq[2] < 0)
-               tc->irq[2] = irq;
-
-       for (i = 0; i < 3; i++)
-               writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
-
-       spin_lock(&tc_list_lock);
-       list_add_tail(&tc->node, &tc_list);
-       spin_unlock(&tc_list_lock);
-
-       platform_set_drvdata(pdev, tc);
-
-       return 0;
-}
-
-static void tc_shutdown(struct platform_device *pdev)
-{
-       int i;
-       struct atmel_tc *tc = platform_get_drvdata(pdev);
-
-       for (i = 0; i < 3; i++)
-               writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
-}
-
-static struct platform_driver tc_driver = {
-       .driver = {
-               .name   = "atmel_tcb",
-               .of_match_table = of_match_ptr(atmel_tcb_dt_ids),
-       },
-       .shutdown = tc_shutdown,
-};
-
-static int __init tc_init(void)
-{
-       return platform_driver_probe(&tc_driver, tc_probe);
-}
-arch_initcall(tc_init);
diff --git a/drivers/misc/bcm-vk/Kconfig b/drivers/misc/bcm-vk/Kconfig
new file mode 100644 (file)
index 0000000..68a9727
--- /dev/null
@@ -0,0 +1,29 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Broadcom VK device
+#
+config BCM_VK
+       tristate "Support for Broadcom VK Accelerators"
+       depends on PCI_MSI
+       help
+         Select this option to enable support for Broadcom
+         VK Accelerators.  VK is used for performing
+         multiple specific offload processing tasks in parallel.
+         Such offload tasks assist in such operations as video
+         transcoding, compression, and crypto tasks.
+         This driver enables userspace programs to access these
+         accelerators via /dev/bcm-vk.N devices.
+
+         If unsure, say N.
+
+config BCM_VK_TTY
+       bool "Enable tty ports on a Broadcom VK Accelerator device"
+       depends on TTY
+       depends on BCM_VK
+       help
+         Select this option to enable tty support to allow console
+         access to Broadcom VK Accelerator cards from host.
+
+         Device node will in the form /dev/bcm-vk.x_ttyVKy where:
+         x is the instance of the VK card
+         y is the tty device number on the VK card.
diff --git a/drivers/misc/bcm-vk/Makefile b/drivers/misc/bcm-vk/Makefile
new file mode 100644 (file)
index 0000000..1df2ebe
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Makefile for Broadcom VK driver
+#
+
+obj-$(CONFIG_BCM_VK) += bcm_vk.o
+bcm_vk-objs := \
+       bcm_vk_dev.o \
+       bcm_vk_msg.o \
+       bcm_vk_sg.o
+
+bcm_vk-$(CONFIG_BCM_VK_TTY) += bcm_vk_tty.o
diff --git a/drivers/misc/bcm-vk/bcm_vk.h b/drivers/misc/bcm-vk/bcm_vk.h
new file mode 100644 (file)
index 0000000..a1338f3
--- /dev/null
@@ -0,0 +1,549 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_H
+#define BCM_VK_H
+
+#include <linux/atomic.h>
+#include <linux/firmware.h>
+#include <linux/irq.h>
+#include <linux/kref.h>
+#include <linux/miscdevice.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/poll.h>
+#include <linux/sched/signal.h>
+#include <linux/tty.h>
+#include <linux/uaccess.h>
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk_msg.h"
+
+#define DRV_MODULE_NAME                "bcm-vk"
+
+/*
+ * Load Image is completed in two stages:
+ *
+ * 1) When the VK device boot-up, M7 CPU runs and executes the BootROM.
+ * The Secure Boot Loader (SBL) as part of the BootROM will run
+ * to open up ITCM for host to push BOOT1 image.
+ * SBL will authenticate the image before jumping to BOOT1 image.
+ *
+ * 2) Because BOOT1 image is a secured image, we also called it the
+ * Secure Boot Image (SBI). At second stage, SBI will initialize DDR
+ * and wait for host to push BOOT2 image to DDR.
+ * SBI will authenticate the image before jumping to BOOT2 image.
+ *
+ */
+/* Location of registers of interest in BAR0 */
+
+/* Request register for Secure Boot Loader (SBL) download */
+#define BAR_CODEPUSH_SBL               0x400
+/* Start of ITCM */
+#define CODEPUSH_BOOT1_ENTRY           0x00400000
+#define CODEPUSH_MASK                  0xfffff000
+#define CODEPUSH_BOOTSTART             BIT(0)
+
+/* Boot Status register */
+#define BAR_BOOT_STATUS                        0x404
+
+#define SRAM_OPEN                      BIT(16)
+#define DDR_OPEN                       BIT(17)
+
+/* Firmware loader progress status definitions */
+#define FW_LOADER_ACK_SEND_MORE_DATA   BIT(18)
+#define FW_LOADER_ACK_IN_PROGRESS      BIT(19)
+#define FW_LOADER_ACK_RCVD_ALL_DATA    BIT(20)
+
+/* Boot1/2 is running in standalone mode */
+#define BOOT_STDALONE_RUNNING          BIT(21)
+
+/* definitions for boot status register */
+#define BOOT_STATE_MASK                        (0xffffffff & \
+                                        ~(FW_LOADER_ACK_SEND_MORE_DATA | \
+                                          FW_LOADER_ACK_IN_PROGRESS | \
+                                          BOOT_STDALONE_RUNNING))
+
+#define BOOT_ERR_SHIFT                 4
+#define BOOT_ERR_MASK                  (0xf << BOOT_ERR_SHIFT)
+#define BOOT_PROG_MASK                 0xf
+
+#define BROM_STATUS_NOT_RUN            0x2
+#define BROM_NOT_RUN                   (SRAM_OPEN | BROM_STATUS_NOT_RUN)
+#define BROM_STATUS_COMPLETE           0x6
+#define BROM_RUNNING                   (SRAM_OPEN | BROM_STATUS_COMPLETE)
+#define BOOT1_STATUS_COMPLETE          0x6
+#define BOOT1_RUNNING                  (DDR_OPEN | BOOT1_STATUS_COMPLETE)
+#define BOOT2_STATUS_COMPLETE          0x6
+#define BOOT2_RUNNING                  (FW_LOADER_ACK_RCVD_ALL_DATA | \
+                                        BOOT2_STATUS_COMPLETE)
+
+/* Boot request for Secure Boot Image (SBI) */
+#define BAR_CODEPUSH_SBI               0x408
+/* 64M mapped to BAR2 */
+#define CODEPUSH_BOOT2_ENTRY           0x60000000
+
+#define BAR_CARD_STATUS                        0x410
+/* CARD_STATUS definitions */
+#define CARD_STATUS_TTYVK0_READY       BIT(0)
+#define CARD_STATUS_TTYVK1_READY       BIT(1)
+
+#define BAR_BOOT1_STDALONE_PROGRESS    0x420
+#define BOOT1_STDALONE_SUCCESS         (BIT(13) | BIT(14))
+#define BOOT1_STDALONE_PROGRESS_MASK   BOOT1_STDALONE_SUCCESS
+
+#define BAR_METADATA_VERSION           0x440
+#define BAR_OS_UPTIME                  0x444
+#define BAR_CHIP_ID                    0x448
+#define MAJOR_SOC_REV(_chip_id)                (((_chip_id) >> 20) & 0xf)
+
+#define BAR_CARD_TEMPERATURE           0x45c
+/* defines for all temperature sensor */
+#define BCM_VK_TEMP_FIELD_MASK         0xff
+#define BCM_VK_CPU_TEMP_SHIFT          0
+#define BCM_VK_DDR0_TEMP_SHIFT         8
+#define BCM_VK_DDR1_TEMP_SHIFT         16
+
+#define BAR_CARD_VOLTAGE               0x460
+/* defines for voltage rail conversion */
+#define BCM_VK_VOLT_RAIL_MASK          0xffff
+#define BCM_VK_3P3_VOLT_REG_SHIFT      16
+
+#define BAR_CARD_ERR_LOG               0x464
+/* Error log register bit definition - register for error alerts */
+#define ERR_LOG_UECC                   BIT(0)
+#define ERR_LOG_SSIM_BUSY              BIT(1)
+#define ERR_LOG_AFBC_BUSY              BIT(2)
+#define ERR_LOG_HIGH_TEMP_ERR          BIT(3)
+#define ERR_LOG_WDOG_TIMEOUT           BIT(4)
+#define ERR_LOG_SYS_FAULT              BIT(5)
+#define ERR_LOG_RAMDUMP                        BIT(6)
+#define ERR_LOG_COP_WDOG_TIMEOUT       BIT(7)
+/* warnings */
+#define ERR_LOG_MEM_ALLOC_FAIL         BIT(8)
+#define ERR_LOG_LOW_TEMP_WARN          BIT(9)
+#define ERR_LOG_ECC                    BIT(10)
+#define ERR_LOG_IPC_DWN                        BIT(11)
+
+/* Alert bit definitions detectd on host */
+#define ERR_LOG_HOST_INTF_V_FAIL       BIT(13)
+#define ERR_LOG_HOST_HB_FAIL           BIT(14)
+#define ERR_LOG_HOST_PCIE_DWN          BIT(15)
+
+#define BAR_CARD_ERR_MEM               0x468
+/* defines for mem err, all fields have same width */
+#define BCM_VK_MEM_ERR_FIELD_MASK      0xff
+#define BCM_VK_ECC_MEM_ERR_SHIFT       0
+#define BCM_VK_UECC_MEM_ERR_SHIFT      8
+/* threshold of event occurrence and logs start to come out */
+#define BCM_VK_ECC_THRESHOLD           10
+#define BCM_VK_UECC_THRESHOLD          1
+
+#define BAR_CARD_PWR_AND_THRE          0x46c
+/* defines for power and temp threshold, all fields have same width */
+#define BCM_VK_PWR_AND_THRE_FIELD_MASK 0xff
+#define BCM_VK_LOW_TEMP_THRE_SHIFT     0
+#define BCM_VK_HIGH_TEMP_THRE_SHIFT    8
+#define BCM_VK_PWR_STATE_SHIFT         16
+
+#define BAR_CARD_STATIC_INFO           0x470
+
+#define BAR_INTF_VER                   0x47c
+#define BAR_INTF_VER_MAJOR_SHIFT       16
+#define BAR_INTF_VER_MASK              0xffff
+/*
+ * major and minor semantic version numbers supported
+ * Please update as required on interface changes
+ */
+#define SEMANTIC_MAJOR                 1
+#define SEMANTIC_MINOR                 0
+
+/*
+ * first door bell reg, ie for queue = 0.  Only need the first one, as
+ * we will use the queue number to derive the others
+ */
+#define VK_BAR0_REGSEG_DB_BASE         0x484
+#define VK_BAR0_REGSEG_DB_REG_GAP      8 /*
+                                          * DB register gap,
+                                          * DB1 at 0x48c and DB2 at 0x494
+                                          */
+
+/* reset register and specific values */
+#define VK_BAR0_RESET_DB_NUM           3
+#define VK_BAR0_RESET_DB_SOFT          0xffffffff
+#define VK_BAR0_RESET_DB_HARD          0xfffffffd
+#define VK_BAR0_RESET_RAMPDUMP         0xa0000000
+
+#define VK_BAR0_Q_DB_BASE(q_num)       (VK_BAR0_REGSEG_DB_BASE + \
+                                        ((q_num) * VK_BAR0_REGSEG_DB_REG_GAP))
+#define VK_BAR0_RESET_DB_BASE          (VK_BAR0_REGSEG_DB_BASE + \
+                                        (VK_BAR0_RESET_DB_NUM * VK_BAR0_REGSEG_DB_REG_GAP))
+
+#define BAR_BOOTSRC_SELECT             0xc78
+/* BOOTSRC definitions */
+#define BOOTSRC_SOFT_ENABLE            BIT(14)
+
+/* Card OS Firmware version size */
+#define BAR_FIRMWARE_TAG_SIZE          50
+#define FIRMWARE_STATUS_PRE_INIT_DONE  0x1f
+
+/* VK MSG_ID defines */
+#define VK_MSG_ID_BITMAP_SIZE          4096
+#define VK_MSG_ID_BITMAP_MASK          (VK_MSG_ID_BITMAP_SIZE - 1)
+#define VK_MSG_ID_OVERFLOW             0xffff
+
+/*
+ * BAR1
+ */
+
+/* BAR1 message q definition */
+
+/* indicate if msgq ctrl in BAR1 is populated */
+#define VK_BAR1_MSGQ_DEF_RDY           0x60c0
+/* ready marker value for the above location, normal boot2 */
+#define VK_BAR1_MSGQ_RDY_MARKER                0xbeefcafe
+/* ready marker value for the above location, normal boot2 */
+#define VK_BAR1_DIAG_RDY_MARKER                0xdeadcafe
+/* number of msgqs in BAR1 */
+#define VK_BAR1_MSGQ_NR                        0x60c4
+/* BAR1 queue control structure offset */
+#define VK_BAR1_MSGQ_CTRL_OFF          0x60c8
+
+/* BAR1 ucode and boot1 version tag */
+#define VK_BAR1_UCODE_VER_TAG          0x6170
+#define VK_BAR1_BOOT1_VER_TAG          0x61b0
+#define VK_BAR1_VER_TAG_SIZE           64
+
+/* Memory to hold the DMA buffer memory address allocated for boot2 download */
+#define VK_BAR1_DMA_BUF_OFF_HI         0x61e0
+#define VK_BAR1_DMA_BUF_OFF_LO         (VK_BAR1_DMA_BUF_OFF_HI + 4)
+#define VK_BAR1_DMA_BUF_SZ             (VK_BAR1_DMA_BUF_OFF_HI + 8)
+
+/* Scratch memory allocated on host for VK */
+#define VK_BAR1_SCRATCH_OFF_HI         0x61f0
+#define VK_BAR1_SCRATCH_OFF_LO         (VK_BAR1_SCRATCH_OFF_HI + 4)
+#define VK_BAR1_SCRATCH_SZ_ADDR                (VK_BAR1_SCRATCH_OFF_HI + 8)
+#define VK_BAR1_SCRATCH_DEF_NR_PAGES   32
+
+/* BAR1 DAUTH info */
+#define VK_BAR1_DAUTH_BASE_ADDR                0x6200
+#define VK_BAR1_DAUTH_STORE_SIZE       0x48
+#define VK_BAR1_DAUTH_VALID_SIZE       0x8
+#define VK_BAR1_DAUTH_MAX              4
+#define VK_BAR1_DAUTH_STORE_ADDR(x) \
+               (VK_BAR1_DAUTH_BASE_ADDR + \
+                (x) * (VK_BAR1_DAUTH_STORE_SIZE + VK_BAR1_DAUTH_VALID_SIZE))
+#define VK_BAR1_DAUTH_VALID_ADDR(x) \
+               (VK_BAR1_DAUTH_STORE_ADDR(x) + VK_BAR1_DAUTH_STORE_SIZE)
+
+/* BAR1 SOTP AUTH and REVID info */
+#define VK_BAR1_SOTP_REVID_BASE_ADDR   0x6340
+#define VK_BAR1_SOTP_REVID_SIZE                0x10
+#define VK_BAR1_SOTP_REVID_MAX         2
+#define VK_BAR1_SOTP_REVID_ADDR(x) \
+               (VK_BAR1_SOTP_REVID_BASE_ADDR + (x) * VK_BAR1_SOTP_REVID_SIZE)
+
+/* VK device supports a maximum of 3 bars */
+#define MAX_BAR        3
+
+/* default number of msg blk for inband SGL */
+#define BCM_VK_DEF_IB_SGL_BLK_LEN       16
+#define BCM_VK_IB_SGL_BLK_MAX           24
+
+enum pci_barno {
+       BAR_0 = 0,
+       BAR_1,
+       BAR_2
+};
+
+#ifdef CONFIG_BCM_VK_TTY
+#define BCM_VK_NUM_TTY 2
+#else
+#define BCM_VK_NUM_TTY 0
+#endif
+
+struct bcm_vk_tty {
+       struct tty_port port;
+       u32 to_offset;  /* bar offset to use */
+       u32 to_size;    /* to VK buffer size */
+       u32 wr;         /* write offset shadow */
+       u32 from_offset;        /* bar offset to use */
+       u32 from_size;  /* from VK buffer size */
+       u32 rd;         /* read offset shadow */
+       pid_t pid;
+       bool irq_enabled;
+       bool is_opened;         /* tracks tty open/close */
+};
+
+/* VK device max power state, supports 3, full, reduced and low */
+#define MAX_OPP 3
+#define MAX_CARD_INFO_TAG_SIZE 64
+
+struct bcm_vk_card_info {
+       u32 version;
+       char os_tag[MAX_CARD_INFO_TAG_SIZE];
+       char cmpt_tag[MAX_CARD_INFO_TAG_SIZE];
+       u32 cpu_freq_mhz;
+       u32 cpu_scale[MAX_OPP];
+       u32 ddr_freq_mhz;
+       u32 ddr_size_MB;
+       u32 video_core_freq_mhz;
+};
+
+/* DAUTH related info */
+struct bcm_vk_dauth_key {
+       char store[VK_BAR1_DAUTH_STORE_SIZE];
+       char valid[VK_BAR1_DAUTH_VALID_SIZE];
+};
+
+struct bcm_vk_dauth_info {
+       struct bcm_vk_dauth_key keys[VK_BAR1_DAUTH_MAX];
+};
+
+/*
+ * Control structure of logging messages from the card.  This
+ * buffer is for logmsg that comes from vk
+ */
+struct bcm_vk_peer_log {
+       u32 rd_idx;
+       u32 wr_idx;
+       u32 buf_size;
+       u32 mask;
+       char data[0];
+};
+
+/* max buf size allowed */
+#define BCM_VK_PEER_LOG_BUF_MAX SZ_16K
+/* max size per line of peer log */
+#define BCM_VK_PEER_LOG_LINE_MAX  256
+
+/*
+ * single entry for processing type + utilization
+ */
+#define BCM_VK_PROC_TYPE_TAG_LEN 8
+struct bcm_vk_proc_mon_entry_t {
+       char tag[BCM_VK_PROC_TYPE_TAG_LEN];
+       u32 used;
+       u32 max; /**< max capacity */
+};
+
+/**
+ * Structure for run time utilization
+ */
+#define BCM_VK_PROC_MON_MAX 8 /* max entries supported */
+struct bcm_vk_proc_mon_info {
+       u32 num; /**< no of entries */
+       u32 entry_size; /**< per entry size */
+       struct bcm_vk_proc_mon_entry_t entries[BCM_VK_PROC_MON_MAX];
+};
+
+struct bcm_vk_hb_ctrl {
+       struct timer_list timer;
+       u32 last_uptime;
+       u32 lost_cnt;
+};
+
+struct bcm_vk_alert {
+       u16 flags;
+       u16 notfs;
+};
+
+/* some alert counters that the driver will keep track */
+struct bcm_vk_alert_cnts {
+       u16 ecc;
+       u16 uecc;
+};
+
+struct bcm_vk {
+       struct pci_dev *pdev;
+       void __iomem *bar[MAX_BAR];
+       int num_irqs;
+
+       struct bcm_vk_card_info card_info;
+       struct bcm_vk_proc_mon_info proc_mon_info;
+       struct bcm_vk_dauth_info dauth_info;
+
+       /* mutex to protect the ioctls */
+       struct mutex mutex;
+       struct miscdevice miscdev;
+       int devid; /* dev id allocated */
+
+#ifdef CONFIG_BCM_VK_TTY
+       struct tty_driver *tty_drv;
+       struct timer_list serial_timer;
+       struct bcm_vk_tty tty[BCM_VK_NUM_TTY];
+       struct workqueue_struct *tty_wq_thread;
+       struct work_struct tty_wq_work;
+#endif
+
+       /* Reference-counting to handle file operations */
+       struct kref kref;
+
+       spinlock_t msg_id_lock; /* Spinlock for msg_id */
+       u16 msg_id;
+       DECLARE_BITMAP(bmap, VK_MSG_ID_BITMAP_SIZE);
+       spinlock_t ctx_lock; /* Spinlock for component context */
+       struct bcm_vk_ctx ctx[VK_CMPT_CTX_MAX];
+       struct bcm_vk_ht_entry pid_ht[VK_PID_HT_SZ];
+       pid_t reset_pid; /* process that issue reset */
+
+       atomic_t msgq_inited; /* indicate if info has been synced with vk */
+       struct bcm_vk_msg_chan to_v_msg_chan;
+       struct bcm_vk_msg_chan to_h_msg_chan;
+
+       struct workqueue_struct *wq_thread;
+       struct work_struct wq_work; /* work queue for deferred job */
+       unsigned long wq_offload[1]; /* various flags on wq requested */
+       void *tdma_vaddr; /* test dma segment virtual addr */
+       dma_addr_t tdma_addr; /* test dma segment bus addr */
+
+       struct notifier_block panic_nb;
+       u32 ib_sgl_size; /* size allocated for inband sgl insertion */
+
+       /* heart beat mechanism control structure */
+       struct bcm_vk_hb_ctrl hb_ctrl;
+       /* house-keeping variable of error logs */
+       spinlock_t host_alert_lock; /* protection to access host_alert struct */
+       struct bcm_vk_alert host_alert;
+       struct bcm_vk_alert peer_alert; /* bits set by the card */
+       struct bcm_vk_alert_cnts alert_cnts;
+
+       /* offset of the peer log control in BAR2 */
+       u32 peerlog_off;
+       struct bcm_vk_peer_log peerlog_info; /* record of peer log info */
+       /* offset of processing monitoring info in BAR2 */
+       u32 proc_mon_off;
+};
+
+/* wq offload work items bits definitions */
+enum bcm_vk_wq_offload_flags {
+       BCM_VK_WQ_DWNLD_PEND = 0,
+       BCM_VK_WQ_DWNLD_AUTO = 1,
+       BCM_VK_WQ_NOTF_PEND  = 2,
+};
+
+/* a macro to get an individual field with mask and shift */
+#define BCM_VK_EXTRACT_FIELD(_field, _reg, _mask, _shift) \
+               (_field = (((_reg) >> (_shift)) & (_mask)))
+
+struct bcm_vk_entry {
+       const u32 mask;
+       const u32 exp_val;
+       const char *str;
+};
+
+/* alerts that could be generated from peer */
+#define BCM_VK_PEER_ERR_NUM 12
+extern struct bcm_vk_entry const bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM];
+/* alerts detected by the host */
+#define BCM_VK_HOST_ERR_NUM 3
+extern struct bcm_vk_entry const bcm_vk_host_err[BCM_VK_HOST_ERR_NUM];
+
+/*
+ * check if PCIe interface is down on read.  Use it when it is
+ * certain that _val should never be all ones.
+ */
+#define BCM_VK_INTF_IS_DOWN(val) ((val) == 0xffffffff)
+
+static inline u32 vkread32(struct bcm_vk *vk, enum pci_barno bar, u64 offset)
+{
+       return readl(vk->bar[bar] + offset);
+}
+
+static inline void vkwrite32(struct bcm_vk *vk,
+                            u32 value,
+                            enum pci_barno bar,
+                            u64 offset)
+{
+       writel(value, vk->bar[bar] + offset);
+}
+
+static inline u8 vkread8(struct bcm_vk *vk, enum pci_barno bar, u64 offset)
+{
+       return readb(vk->bar[bar] + offset);
+}
+
+static inline void vkwrite8(struct bcm_vk *vk,
+                           u8 value,
+                           enum pci_barno bar,
+                           u64 offset)
+{
+       writeb(value, vk->bar[bar] + offset);
+}
+
+static inline bool bcm_vk_msgq_marker_valid(struct bcm_vk *vk)
+{
+       u32 rdy_marker = 0;
+       u32 fw_status;
+
+       fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+
+       if ((fw_status & VK_FWSTS_READY) == VK_FWSTS_READY)
+               rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+
+       return (rdy_marker == VK_BAR1_MSGQ_RDY_MARKER);
+}
+
+int bcm_vk_open(struct inode *inode, struct file *p_file);
+ssize_t bcm_vk_read(struct file *p_file, char __user *buf, size_t count,
+                   loff_t *f_pos);
+ssize_t bcm_vk_write(struct file *p_file, const char __user *buf,
+                    size_t count, loff_t *f_pos);
+__poll_t bcm_vk_poll(struct file *p_file, struct poll_table_struct *wait);
+int bcm_vk_release(struct inode *inode, struct file *p_file);
+void bcm_vk_release_data(struct kref *kref);
+irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id);
+irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id);
+irqreturn_t bcm_vk_tty_irqhandler(int irq, void *dev_id);
+int bcm_vk_msg_init(struct bcm_vk *vk);
+void bcm_vk_msg_remove(struct bcm_vk *vk);
+void bcm_vk_drain_msg_on_reset(struct bcm_vk *vk);
+int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync);
+void bcm_vk_blk_drv_access(struct bcm_vk *vk);
+s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk);
+int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, u32 shut_type,
+                            const pid_t pid, const u32 q_num);
+void bcm_to_v_q_doorbell(struct bcm_vk *vk, u32 q_num, u32 db_val);
+int bcm_vk_auto_load_all_images(struct bcm_vk *vk);
+void bcm_vk_hb_init(struct bcm_vk *vk);
+void bcm_vk_hb_deinit(struct bcm_vk *vk);
+void bcm_vk_handle_notf(struct bcm_vk *vk);
+bool bcm_vk_drv_access_ok(struct bcm_vk *vk);
+void bcm_vk_set_host_alert(struct bcm_vk *vk, u32 bit_mask);
+
+#ifdef CONFIG_BCM_VK_TTY
+int bcm_vk_tty_init(struct bcm_vk *vk, char *name);
+void bcm_vk_tty_exit(struct bcm_vk *vk);
+void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk);
+void bcm_vk_tty_wq_exit(struct bcm_vk *vk);
+
+static inline void bcm_vk_tty_set_irq_enabled(struct bcm_vk *vk, int index)
+{
+       vk->tty[index].irq_enabled = true;
+}
+#else
+static inline int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
+{
+       return 0;
+}
+
+static inline void bcm_vk_tty_exit(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_wq_exit(struct bcm_vk *vk)
+{
+}
+
+static inline void bcm_vk_tty_set_irq_enabled(struct bcm_vk *vk, int index)
+{
+}
+#endif /* CONFIG_BCM_VK_TTY */
+
+#endif
diff --git a/drivers/misc/bcm-vk/bcm_vk_dev.c b/drivers/misc/bcm-vk/bcm_vk_dev.c
new file mode 100644 (file)
index 0000000..6bfea32
--- /dev/null
@@ -0,0 +1,1652 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/delay.h>
+#include <linux/dma-mapping.h>
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include <linux/idr.h>
+#include <linux/interrupt.h>
+#include <linux/kref.h>
+#include <linux/module.h>
+#include <linux/mutex.h>
+#include <linux/pci.h>
+#include <linux/pci_regs.h>
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk.h"
+
+#define PCI_DEVICE_ID_VALKYRIE 0x5e87
+#define PCI_DEVICE_ID_VIPER    0x5e88
+
+static DEFINE_IDA(bcm_vk_ida);
+
+enum soc_idx {
+       VALKYRIE_A0 = 0,
+       VALKYRIE_B0,
+       VIPER,
+       VK_IDX_INVALID
+};
+
+enum img_idx {
+       IMG_PRI = 0,
+       IMG_SEC,
+       IMG_PER_TYPE_MAX
+};
+
+struct load_image_entry {
+       const u32 image_type;
+       const char *image_name[IMG_PER_TYPE_MAX];
+};
+
+#define NUM_BOOT_STAGES 2
+/* default firmware images names */
+static const struct load_image_entry image_tab[][NUM_BOOT_STAGES] = {
+       [VALKYRIE_A0] = {
+               {VK_IMAGE_TYPE_BOOT1, {"vk_a0-boot1.bin", "vk-boot1.bin"}},
+               {VK_IMAGE_TYPE_BOOT2, {"vk_a0-boot2.bin", "vk-boot2.bin"}}
+       },
+       [VALKYRIE_B0] = {
+               {VK_IMAGE_TYPE_BOOT1, {"vk_b0-boot1.bin", "vk-boot1.bin"}},
+               {VK_IMAGE_TYPE_BOOT2, {"vk_b0-boot2.bin", "vk-boot2.bin"}}
+       },
+
+       [VIPER] = {
+               {VK_IMAGE_TYPE_BOOT1, {"vp-boot1.bin", ""}},
+               {VK_IMAGE_TYPE_BOOT2, {"vp-boot2.bin", ""}}
+       },
+};
+
+/* Location of memory base addresses of interest in BAR1 */
+/* Load Boot1 to start of ITCM */
+#define BAR1_CODEPUSH_BASE_BOOT1       0x100000
+
+/* Allow minimum 1s for Load Image timeout responses */
+#define LOAD_IMAGE_TIMEOUT_MS          (1 * MSEC_PER_SEC)
+
+/* Image startup timeouts */
+#define BOOT1_STARTUP_TIMEOUT_MS       (5 * MSEC_PER_SEC)
+#define BOOT2_STARTUP_TIMEOUT_MS       (10 * MSEC_PER_SEC)
+
+/* 1ms wait for checking the transfer complete status */
+#define TXFR_COMPLETE_TIMEOUT_MS       1
+
+/* MSIX usages */
+#define VK_MSIX_MSGQ_MAX               3
+#define VK_MSIX_NOTF_MAX               1
+#define VK_MSIX_TTY_MAX                        BCM_VK_NUM_TTY
+#define VK_MSIX_IRQ_MAX                        (VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX + \
+                                        VK_MSIX_TTY_MAX)
+#define VK_MSIX_IRQ_MIN_REQ             (VK_MSIX_MSGQ_MAX + VK_MSIX_NOTF_MAX)
+
+/* Number of bits set in DMA mask*/
+#define BCM_VK_DMA_BITS                        64
+
+/* Ucode boot wait time */
+#define BCM_VK_UCODE_BOOT_US            (100 * USEC_PER_MSEC)
+/* 50% margin */
+#define BCM_VK_UCODE_BOOT_MAX_US        ((BCM_VK_UCODE_BOOT_US * 3) >> 1)
+
+/* deinit time for the card os after receiving doorbell */
+#define BCM_VK_DEINIT_TIME_MS          (2 * MSEC_PER_SEC)
+
+/*
+ * module parameters
+ */
+static bool auto_load = true;
+module_param(auto_load, bool, 0444);
+MODULE_PARM_DESC(auto_load,
+                "Load images automatically at PCIe probe time.\n");
+static uint nr_scratch_pages = VK_BAR1_SCRATCH_DEF_NR_PAGES;
+module_param(nr_scratch_pages, uint, 0444);
+MODULE_PARM_DESC(nr_scratch_pages,
+                "Number of pre allocated DMAable coherent pages.\n");
+static uint nr_ib_sgl_blk = BCM_VK_DEF_IB_SGL_BLK_LEN;
+module_param(nr_ib_sgl_blk, uint, 0444);
+MODULE_PARM_DESC(nr_ib_sgl_blk,
+                "Number of in-band msg blks for short SGL.\n");
+
+/*
+ * alerts that could be generated from peer
+ */
+const struct bcm_vk_entry bcm_vk_peer_err[BCM_VK_PEER_ERR_NUM] = {
+       {ERR_LOG_UECC, ERR_LOG_UECC, "uecc"},
+       {ERR_LOG_SSIM_BUSY, ERR_LOG_SSIM_BUSY, "ssim_busy"},
+       {ERR_LOG_AFBC_BUSY, ERR_LOG_AFBC_BUSY, "afbc_busy"},
+       {ERR_LOG_HIGH_TEMP_ERR, ERR_LOG_HIGH_TEMP_ERR, "high_temp"},
+       {ERR_LOG_WDOG_TIMEOUT, ERR_LOG_WDOG_TIMEOUT, "wdog_timeout"},
+       {ERR_LOG_SYS_FAULT, ERR_LOG_SYS_FAULT, "sys_fault"},
+       {ERR_LOG_RAMDUMP, ERR_LOG_RAMDUMP, "ramdump"},
+       {ERR_LOG_COP_WDOG_TIMEOUT, ERR_LOG_COP_WDOG_TIMEOUT,
+        "cop_wdog_timeout"},
+       {ERR_LOG_MEM_ALLOC_FAIL, ERR_LOG_MEM_ALLOC_FAIL, "malloc_fail warn"},
+       {ERR_LOG_LOW_TEMP_WARN, ERR_LOG_LOW_TEMP_WARN, "low_temp warn"},
+       {ERR_LOG_ECC, ERR_LOG_ECC, "ecc"},
+       {ERR_LOG_IPC_DWN, ERR_LOG_IPC_DWN, "ipc_down"},
+};
+
+/* alerts detected by the host */
+const struct bcm_vk_entry bcm_vk_host_err[BCM_VK_HOST_ERR_NUM] = {
+       {ERR_LOG_HOST_PCIE_DWN, ERR_LOG_HOST_PCIE_DWN, "PCIe_down"},
+       {ERR_LOG_HOST_HB_FAIL, ERR_LOG_HOST_HB_FAIL, "hb_fail"},
+       {ERR_LOG_HOST_INTF_V_FAIL, ERR_LOG_HOST_INTF_V_FAIL, "intf_ver_fail"},
+};
+
+irqreturn_t bcm_vk_notf_irqhandler(int irq, void *dev_id)
+{
+       struct bcm_vk *vk = dev_id;
+
+       if (!bcm_vk_drv_access_ok(vk)) {
+               dev_err(&vk->pdev->dev,
+                       "Interrupt %d received when msgq not inited\n", irq);
+               goto skip_schedule_work;
+       }
+
+       /* if notification is not pending, set bit and schedule work */
+       if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
+               queue_work(vk->wq_thread, &vk->wq_work);
+
+skip_schedule_work:
+       return IRQ_HANDLED;
+}
+
+static int bcm_vk_intf_ver_chk(struct bcm_vk *vk)
+{
+       struct device *dev = &vk->pdev->dev;
+       u32 reg;
+       u16 major, minor;
+       int ret = 0;
+
+       /* read interface register */
+       reg = vkread32(vk, BAR_0, BAR_INTF_VER);
+       major = (reg >> BAR_INTF_VER_MAJOR_SHIFT) & BAR_INTF_VER_MASK;
+       minor = reg & BAR_INTF_VER_MASK;
+
+       /*
+        * if major number is 0, it is pre-release and it would be allowed
+        * to continue, else, check versions accordingly
+        */
+       if (!major) {
+               dev_warn(dev, "Pre-release major.minor=%d.%d - drv %d.%d\n",
+                        major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+       } else if (major != SEMANTIC_MAJOR) {
+               dev_err(dev,
+                       "Intf major.minor=%d.%d rejected - drv %d.%d\n",
+                       major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+               bcm_vk_set_host_alert(vk, ERR_LOG_HOST_INTF_V_FAIL);
+               ret = -EPFNOSUPPORT;
+       } else {
+               dev_dbg(dev,
+                       "Intf major.minor=%d.%d passed - drv %d.%d\n",
+                       major, minor, SEMANTIC_MAJOR, SEMANTIC_MINOR);
+       }
+       return ret;
+}
+
+static void bcm_vk_log_notf(struct bcm_vk *vk,
+                           struct bcm_vk_alert *alert,
+                           struct bcm_vk_entry const *entry_tab,
+                           const u32 table_size)
+{
+       u32 i;
+       u32 masked_val, latched_val;
+       struct bcm_vk_entry const *entry;
+       u32 reg;
+       u16 ecc_mem_err, uecc_mem_err;
+       struct device *dev = &vk->pdev->dev;
+
+       for (i = 0; i < table_size; i++) {
+               entry = &entry_tab[i];
+               masked_val = entry->mask & alert->notfs;
+               latched_val = entry->mask & alert->flags;
+
+               if (masked_val == ERR_LOG_UECC) {
+                       /*
+                        * if there is difference between stored cnt and it
+                        * is greater than threshold, log it.
+                        */
+                       reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
+                       BCM_VK_EXTRACT_FIELD(uecc_mem_err, reg,
+                                            BCM_VK_MEM_ERR_FIELD_MASK,
+                                            BCM_VK_UECC_MEM_ERR_SHIFT);
+                       if ((uecc_mem_err != vk->alert_cnts.uecc) &&
+                           (uecc_mem_err >= BCM_VK_UECC_THRESHOLD))
+                               dev_info(dev,
+                                        "ALERT! %s.%d uecc RAISED - ErrCnt %d\n",
+                                        DRV_MODULE_NAME, vk->devid,
+                                        uecc_mem_err);
+                       vk->alert_cnts.uecc = uecc_mem_err;
+               } else if (masked_val == ERR_LOG_ECC) {
+                       reg = vkread32(vk, BAR_0, BAR_CARD_ERR_MEM);
+                       BCM_VK_EXTRACT_FIELD(ecc_mem_err, reg,
+                                            BCM_VK_MEM_ERR_FIELD_MASK,
+                                            BCM_VK_ECC_MEM_ERR_SHIFT);
+                       if ((ecc_mem_err != vk->alert_cnts.ecc) &&
+                           (ecc_mem_err >= BCM_VK_ECC_THRESHOLD))
+                               dev_info(dev, "ALERT! %s.%d ecc RAISED - ErrCnt %d\n",
+                                        DRV_MODULE_NAME, vk->devid,
+                                        ecc_mem_err);
+                       vk->alert_cnts.ecc = ecc_mem_err;
+               } else if (masked_val != latched_val) {
+                       /* print a log as info */
+                       dev_info(dev, "ALERT! %s.%d %s %s\n",
+                                DRV_MODULE_NAME, vk->devid, entry->str,
+                                masked_val ? "RAISED" : "CLEARED");
+               }
+       }
+}
+
+static void bcm_vk_dump_peer_log(struct bcm_vk *vk)
+{
+       struct bcm_vk_peer_log log;
+       struct bcm_vk_peer_log *log_info = &vk->peerlog_info;
+       char loc_buf[BCM_VK_PEER_LOG_LINE_MAX];
+       int cnt;
+       struct device *dev = &vk->pdev->dev;
+       unsigned int data_offset;
+
+       memcpy_fromio(&log, vk->bar[BAR_2] + vk->peerlog_off, sizeof(log));
+
+       dev_dbg(dev, "Peer PANIC: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+               log.buf_size, log.mask, log.rd_idx, log.wr_idx);
+
+       if (!log_info->buf_size) {
+               dev_err(dev, "Peer log dump disabled - skipped!\n");
+               return;
+       }
+
+       /* perform range checking for rd/wr idx */
+       if ((log.rd_idx > log_info->mask) ||
+           (log.wr_idx > log_info->mask) ||
+           (log.buf_size != log_info->buf_size) ||
+           (log.mask != log_info->mask)) {
+               dev_err(dev,
+                       "Corrupted Ptrs: Size 0x%x(0x%x) Mask 0x%x(0x%x) [Rd Wr] = [%d %d], skip log dump.\n",
+                       log_info->buf_size, log.buf_size,
+                       log_info->mask, log.mask,
+                       log.rd_idx, log.wr_idx);
+               return;
+       }
+
+       cnt = 0;
+       data_offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log);
+       loc_buf[BCM_VK_PEER_LOG_LINE_MAX - 1] = '\0';
+       while (log.rd_idx != log.wr_idx) {
+               loc_buf[cnt] = vkread8(vk, BAR_2, data_offset + log.rd_idx);
+
+               if ((loc_buf[cnt] == '\0') ||
+                   (cnt == (BCM_VK_PEER_LOG_LINE_MAX - 1))) {
+                       dev_err(dev, "%s", loc_buf);
+                       cnt = 0;
+               } else {
+                       cnt++;
+               }
+               log.rd_idx = (log.rd_idx + 1) & log.mask;
+       }
+       /* update rd idx at the end */
+       vkwrite32(vk, log.rd_idx, BAR_2,
+                 vk->peerlog_off + offsetof(struct bcm_vk_peer_log, rd_idx));
+}
+
+void bcm_vk_handle_notf(struct bcm_vk *vk)
+{
+       u32 reg;
+       struct bcm_vk_alert alert;
+       bool intf_down;
+       unsigned long flags;
+
+       /* handle peer alerts and then locally detected ones */
+       reg = vkread32(vk, BAR_0, BAR_CARD_ERR_LOG);
+       intf_down = BCM_VK_INTF_IS_DOWN(reg);
+       if (!intf_down) {
+               vk->peer_alert.notfs = reg;
+               bcm_vk_log_notf(vk, &vk->peer_alert, bcm_vk_peer_err,
+                               ARRAY_SIZE(bcm_vk_peer_err));
+               vk->peer_alert.flags = vk->peer_alert.notfs;
+       } else {
+               /* turn off access */
+               bcm_vk_blk_drv_access(vk);
+       }
+
+       /* check and make copy of alert with lock and then free lock */
+       spin_lock_irqsave(&vk->host_alert_lock, flags);
+       if (intf_down)
+               vk->host_alert.notfs |= ERR_LOG_HOST_PCIE_DWN;
+
+       alert = vk->host_alert;
+       vk->host_alert.flags = vk->host_alert.notfs;
+       spin_unlock_irqrestore(&vk->host_alert_lock, flags);
+
+       /* call display with copy */
+       bcm_vk_log_notf(vk, &alert, bcm_vk_host_err,
+                       ARRAY_SIZE(bcm_vk_host_err));
+
+       /*
+        * If it is a sys fault or heartbeat timeout, we would like extract
+        * log msg from the card so that we would know what is the last fault
+        */
+       if (!intf_down &&
+           ((vk->host_alert.flags & ERR_LOG_HOST_HB_FAIL) ||
+            (vk->peer_alert.flags & ERR_LOG_SYS_FAULT)))
+               bcm_vk_dump_peer_log(vk);
+}
+
+static inline int bcm_vk_wait(struct bcm_vk *vk, enum pci_barno bar,
+                             u64 offset, u32 mask, u32 value,
+                             unsigned long timeout_ms)
+{
+       struct device *dev = &vk->pdev->dev;
+       unsigned long start_time;
+       unsigned long timeout;
+       u32 rd_val, boot_status;
+
+       start_time = jiffies;
+       timeout = start_time + msecs_to_jiffies(timeout_ms);
+
+       do {
+               rd_val = vkread32(vk, bar, offset);
+               dev_dbg(dev, "BAR%d Offset=0x%llx: 0x%x\n",
+                       bar, offset, rd_val);
+
+               /* check for any boot err condition */
+               boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+               if (boot_status & BOOT_ERR_MASK) {
+                       dev_err(dev, "Boot Err 0x%x, progress 0x%x after %d ms\n",
+                               (boot_status & BOOT_ERR_MASK) >> BOOT_ERR_SHIFT,
+                               boot_status & BOOT_PROG_MASK,
+                               jiffies_to_msecs(jiffies - start_time));
+                       return -EFAULT;
+               }
+
+               if (time_after(jiffies, timeout))
+                       return -ETIMEDOUT;
+
+               cpu_relax();
+               cond_resched();
+       } while ((rd_val & mask) != value);
+
+       return 0;
+}
+
+static void bcm_vk_get_card_info(struct bcm_vk *vk)
+{
+       struct device *dev = &vk->pdev->dev;
+       u32 offset;
+       int i;
+       u8 *dst;
+       struct bcm_vk_card_info *info = &vk->card_info;
+
+       /* first read the offset from spare register */
+       offset = vkread32(vk, BAR_0, BAR_CARD_STATIC_INFO);
+       offset &= (pci_resource_len(vk->pdev, BAR_2 * 2) - 1);
+
+       /* based on the offset, read info to internal card info structure */
+       dst = (u8 *)info;
+       for (i = 0; i < sizeof(*info); i++)
+               *dst++ = vkread8(vk, BAR_2, offset++);
+
+#define CARD_INFO_LOG_FMT "version   : %x\n" \
+                         "os_tag    : %s\n" \
+                         "cmpt_tag  : %s\n" \
+                         "cpu_freq  : %d MHz\n" \
+                         "cpu_scale : %d full, %d lowest\n" \
+                         "ddr_freq  : %d MHz\n" \
+                         "ddr_size  : %d MB\n" \
+                         "video_freq: %d MHz\n"
+       dev_dbg(dev, CARD_INFO_LOG_FMT, info->version, info->os_tag,
+               info->cmpt_tag, info->cpu_freq_mhz, info->cpu_scale[0],
+               info->cpu_scale[MAX_OPP - 1], info->ddr_freq_mhz,
+               info->ddr_size_MB, info->video_core_freq_mhz);
+
+       /*
+        * get the peer log pointer, only need the offset, and get record
+        * of the log buffer information which would be used for checking
+        * before dump, in case the BAR2 memory has been corrupted.
+        */
+       vk->peerlog_off = offset;
+       memcpy_fromio(&vk->peerlog_info, vk->bar[BAR_2] + vk->peerlog_off,
+                     sizeof(vk->peerlog_info));
+
+       /*
+        * Do a range checking and if out of bound, the record will be zeroed
+        * which guarantees that nothing would be dumped.  In other words,
+        * peer dump is disabled.
+        */
+       if ((vk->peerlog_info.buf_size > BCM_VK_PEER_LOG_BUF_MAX) ||
+           (vk->peerlog_info.mask != (vk->peerlog_info.buf_size - 1)) ||
+           (vk->peerlog_info.rd_idx > vk->peerlog_info.mask) ||
+           (vk->peerlog_info.wr_idx > vk->peerlog_info.mask)) {
+               dev_err(dev, "Peer log disabled - range error: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+                       vk->peerlog_info.buf_size,
+                       vk->peerlog_info.mask,
+                       vk->peerlog_info.rd_idx,
+                       vk->peerlog_info.wr_idx);
+               memset(&vk->peerlog_info, 0, sizeof(vk->peerlog_info));
+       } else {
+               dev_dbg(dev, "Peer log: Size 0x%x(0x%x), [Rd Wr] = [%d %d]\n",
+                       vk->peerlog_info.buf_size,
+                       vk->peerlog_info.mask,
+                       vk->peerlog_info.rd_idx,
+                       vk->peerlog_info.wr_idx);
+       }
+}
+
+static void bcm_vk_get_proc_mon_info(struct bcm_vk *vk)
+{
+       struct device *dev = &vk->pdev->dev;
+       struct bcm_vk_proc_mon_info *mon = &vk->proc_mon_info;
+       u32 num, entry_size, offset, buf_size;
+       u8 *dst;
+
+       /* calculate offset which is based on peerlog offset */
+       buf_size = vkread32(vk, BAR_2,
+                           vk->peerlog_off
+                           + offsetof(struct bcm_vk_peer_log, buf_size));
+       offset = vk->peerlog_off + sizeof(struct bcm_vk_peer_log)
+                + buf_size;
+
+       /* first read the num and entry size */
+       num = vkread32(vk, BAR_2, offset);
+       entry_size = vkread32(vk, BAR_2, offset + sizeof(num));
+
+       /* check for max allowed */
+       if (num > BCM_VK_PROC_MON_MAX) {
+               dev_err(dev, "Processing monitoring entry %d exceeds max %d\n",
+                       num, BCM_VK_PROC_MON_MAX);
+               return;
+       }
+       mon->num = num;
+       mon->entry_size = entry_size;
+
+       vk->proc_mon_off = offset;
+
+       /* read it once that will capture those static info */
+       dst = (u8 *)&mon->entries[0];
+       offset += sizeof(num) + sizeof(entry_size);
+       memcpy_fromio(dst, vk->bar[BAR_2] + offset, num * entry_size);
+}
+
+static int bcm_vk_sync_card_info(struct bcm_vk *vk)
+{
+       u32 rdy_marker = vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+
+       /* check for marker, but allow diags mode to skip sync */
+       if (!bcm_vk_msgq_marker_valid(vk))
+               return (rdy_marker == VK_BAR1_DIAG_RDY_MARKER ? 0 : -EINVAL);
+
+       /*
+        * Write down scratch addr which is used for DMA. For
+        * signed part, BAR1 is accessible only after boot2 has come
+        * up
+        */
+       if (vk->tdma_addr) {
+               vkwrite32(vk, (u64)vk->tdma_addr >> 32, BAR_1,
+                         VK_BAR1_SCRATCH_OFF_HI);
+               vkwrite32(vk, (u32)vk->tdma_addr, BAR_1,
+                         VK_BAR1_SCRATCH_OFF_LO);
+               vkwrite32(vk, nr_scratch_pages * PAGE_SIZE, BAR_1,
+                         VK_BAR1_SCRATCH_SZ_ADDR);
+       }
+
+       /* get static card info, only need to read once */
+       bcm_vk_get_card_info(vk);
+
+       /* get the proc mon info once */
+       bcm_vk_get_proc_mon_info(vk);
+
+       return 0;
+}
+
+void bcm_vk_blk_drv_access(struct bcm_vk *vk)
+{
+       int i;
+
+       /*
+        * kill all the apps except for the process that is resetting.
+        * If not called during reset, reset_pid will be 0, and all will be
+        * killed.
+        */
+       spin_lock(&vk->ctx_lock);
+
+       /* set msgq_inited to 0 so that all rd/wr will be blocked */
+       atomic_set(&vk->msgq_inited, 0);
+
+       for (i = 0; i < VK_PID_HT_SZ; i++) {
+               struct bcm_vk_ctx *ctx;
+
+               list_for_each_entry(ctx, &vk->pid_ht[i].head, node) {
+                       if (ctx->pid != vk->reset_pid) {
+                               dev_dbg(&vk->pdev->dev,
+                                       "Send kill signal to pid %d\n",
+                                       ctx->pid);
+                               kill_pid(find_vpid(ctx->pid), SIGKILL, 1);
+                       }
+               }
+       }
+       bcm_vk_tty_terminate_tty_user(vk);
+       spin_unlock(&vk->ctx_lock);
+}
+
+static void bcm_vk_buf_notify(struct bcm_vk *vk, void *bufp,
+                             dma_addr_t host_buf_addr, u32 buf_size)
+{
+       /* update the dma address to the card */
+       vkwrite32(vk, (u64)host_buf_addr >> 32, BAR_1,
+                 VK_BAR1_DMA_BUF_OFF_HI);
+       vkwrite32(vk, (u32)host_buf_addr, BAR_1,
+                 VK_BAR1_DMA_BUF_OFF_LO);
+       vkwrite32(vk, buf_size, BAR_1, VK_BAR1_DMA_BUF_SZ);
+}
+
+static int bcm_vk_load_image_by_type(struct bcm_vk *vk, u32 load_type,
+                                    const char *filename)
+{
+       struct device *dev = &vk->pdev->dev;
+       const struct firmware *fw = NULL;
+       void *bufp = NULL;
+       size_t max_buf, offset;
+       int ret;
+       u64 offset_codepush;
+       u32 codepush;
+       u32 value;
+       dma_addr_t boot_dma_addr;
+       bool is_stdalone;
+
+       if (load_type == VK_IMAGE_TYPE_BOOT1) {
+               /*
+                * After POR, enable VK soft BOOTSRC so bootrom do not clear
+                * the pushed image (the TCM memories).
+                */
+               value = vkread32(vk, BAR_0, BAR_BOOTSRC_SELECT);
+               value |= BOOTSRC_SOFT_ENABLE;
+               vkwrite32(vk, value, BAR_0, BAR_BOOTSRC_SELECT);
+
+               codepush = CODEPUSH_BOOTSTART + CODEPUSH_BOOT1_ENTRY;
+               offset_codepush = BAR_CODEPUSH_SBL;
+
+               /* Write a 1 to request SRAM open bit */
+               vkwrite32(vk, CODEPUSH_BOOTSTART, BAR_0, offset_codepush);
+
+               /* Wait for VK to respond */
+               ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, SRAM_OPEN,
+                                 SRAM_OPEN, LOAD_IMAGE_TIMEOUT_MS);
+               if (ret < 0) {
+                       dev_err(dev, "boot1 wait SRAM err - ret(%d)\n", ret);
+                       goto err_buf_out;
+               }
+
+               max_buf = SZ_256K;
+               bufp = dma_alloc_coherent(dev,
+                                         max_buf,
+                                         &boot_dma_addr, GFP_KERNEL);
+               if (!bufp) {
+                       dev_err(dev, "Error allocating 0x%zx\n", max_buf);
+                       ret = -ENOMEM;
+                       goto err_buf_out;
+               }
+       } else if (load_type == VK_IMAGE_TYPE_BOOT2) {
+               codepush = CODEPUSH_BOOT2_ENTRY;
+               offset_codepush = BAR_CODEPUSH_SBI;
+
+               /* Wait for VK to respond */
+               ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS, DDR_OPEN,
+                                 DDR_OPEN, LOAD_IMAGE_TIMEOUT_MS);
+               if (ret < 0) {
+                       dev_err(dev, "boot2 wait DDR open error - ret(%d)\n",
+                               ret);
+                       goto err_buf_out;
+               }
+
+               max_buf = SZ_4M;
+               bufp = dma_alloc_coherent(dev,
+                                         max_buf,
+                                         &boot_dma_addr, GFP_KERNEL);
+               if (!bufp) {
+                       dev_err(dev, "Error allocating 0x%zx\n", max_buf);
+                       ret = -ENOMEM;
+                       goto err_buf_out;
+               }
+
+               bcm_vk_buf_notify(vk, bufp, boot_dma_addr, max_buf);
+       } else {
+               dev_err(dev, "Error invalid image type 0x%x\n", load_type);
+               ret = -EINVAL;
+               goto err_buf_out;
+       }
+
+       offset = 0;
+       ret = request_partial_firmware_into_buf(&fw, filename, dev,
+                                               bufp, max_buf, offset);
+       if (ret) {
+               dev_err(dev, "Error %d requesting firmware file: %s\n",
+                       ret, filename);
+               goto err_firmware_out;
+       }
+       dev_dbg(dev, "size=0x%zx\n", fw->size);
+       if (load_type == VK_IMAGE_TYPE_BOOT1)
+               memcpy_toio(vk->bar[BAR_1] + BAR1_CODEPUSH_BASE_BOOT1,
+                           bufp,
+                           fw->size);
+
+       dev_dbg(dev, "Signaling 0x%x to 0x%llx\n", codepush, offset_codepush);
+       vkwrite32(vk, codepush, BAR_0, offset_codepush);
+
+       if (load_type == VK_IMAGE_TYPE_BOOT1) {
+               u32 boot_status;
+
+               /* wait until done */
+               ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
+                                 BOOT1_RUNNING,
+                                 BOOT1_RUNNING,
+                                 BOOT1_STARTUP_TIMEOUT_MS);
+
+               boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+               is_stdalone = !BCM_VK_INTF_IS_DOWN(boot_status) &&
+                             (boot_status & BOOT_STDALONE_RUNNING);
+               if (ret && !is_stdalone) {
+                       dev_err(dev,
+                               "Timeout %ld ms waiting for boot1 to come up - ret(%d)\n",
+                               BOOT1_STARTUP_TIMEOUT_MS, ret);
+                       goto err_firmware_out;
+               } else if (is_stdalone) {
+                       u32 reg;
+
+                       reg = vkread32(vk, BAR_0, BAR_BOOT1_STDALONE_PROGRESS);
+                       if ((reg & BOOT1_STDALONE_PROGRESS_MASK) ==
+                                    BOOT1_STDALONE_SUCCESS) {
+                               dev_info(dev, "Boot1 standalone success\n");
+                               ret = 0;
+                       } else {
+                               dev_err(dev, "Timeout %ld ms - Boot1 standalone failure\n",
+                                       BOOT1_STARTUP_TIMEOUT_MS);
+                               ret = -EINVAL;
+                               goto err_firmware_out;
+                       }
+               }
+       } else if (load_type == VK_IMAGE_TYPE_BOOT2) {
+               unsigned long timeout;
+
+               timeout = jiffies + msecs_to_jiffies(LOAD_IMAGE_TIMEOUT_MS);
+
+               /* To send more data to VK than max_buf allowed at a time */
+               do {
+                       /*
+                        * Check for ack from card. when Ack is received,
+                        * it means all the data is received by card.
+                        * Exit the loop after ack is received.
+                        */
+                       ret = bcm_vk_wait(vk, BAR_0, BAR_BOOT_STATUS,
+                                         FW_LOADER_ACK_RCVD_ALL_DATA,
+                                         FW_LOADER_ACK_RCVD_ALL_DATA,
+                                         TXFR_COMPLETE_TIMEOUT_MS);
+                       if (ret == 0) {
+                               dev_dbg(dev, "Exit boot2 download\n");
+                               break;
+                       } else if (ret == -EFAULT) {
+                               dev_err(dev, "Error detected during ACK waiting");
+                               goto err_firmware_out;
+                       }
+
+                       /* exit the loop, if there is no response from card */
+                       if (time_after(jiffies, timeout)) {
+                               dev_err(dev, "Error. No reply from card\n");
+                               ret = -ETIMEDOUT;
+                               goto err_firmware_out;
+                       }
+
+                       /* Wait for VK to open BAR space to copy new data */
+                       ret = bcm_vk_wait(vk, BAR_0, offset_codepush,
+                                         codepush, 0,
+                                         TXFR_COMPLETE_TIMEOUT_MS);
+                       if (ret == 0) {
+                               offset += max_buf;
+                               ret = request_partial_firmware_into_buf
+                                               (&fw,
+                                                filename,
+                                                dev, bufp,
+                                                max_buf,
+                                                offset);
+                               if (ret) {
+                                       dev_err(dev,
+                                               "Error %d requesting firmware file: %s offset: 0x%zx\n",
+                                               ret, filename, offset);
+                                       goto err_firmware_out;
+                               }
+                               dev_dbg(dev, "size=0x%zx\n", fw->size);
+                               dev_dbg(dev, "Signaling 0x%x to 0x%llx\n",
+                                       codepush, offset_codepush);
+                               vkwrite32(vk, codepush, BAR_0, offset_codepush);
+                               /* reload timeout after every codepush */
+                               timeout = jiffies +
+                                   msecs_to_jiffies(LOAD_IMAGE_TIMEOUT_MS);
+                       } else if (ret == -EFAULT) {
+                               dev_err(dev, "Error detected waiting for transfer\n");
+                               goto err_firmware_out;
+                       }
+               } while (1);
+
+               /* wait for fw status bits to indicate app ready */
+               ret = bcm_vk_wait(vk, BAR_0, VK_BAR_FWSTS,
+                                 VK_FWSTS_READY,
+                                 VK_FWSTS_READY,
+                                 BOOT2_STARTUP_TIMEOUT_MS);
+               if (ret < 0) {
+                       dev_err(dev, "Boot2 not ready - ret(%d)\n", ret);
+                       goto err_firmware_out;
+               }
+
+               is_stdalone = vkread32(vk, BAR_0, BAR_BOOT_STATUS) &
+                             BOOT_STDALONE_RUNNING;
+               if (!is_stdalone) {
+                       ret = bcm_vk_intf_ver_chk(vk);
+                       if (ret) {
+                               dev_err(dev, "failure in intf version check\n");
+                               goto err_firmware_out;
+                       }
+
+                       /*
+                        * Next, initialize Message Q if we are loading boot2.
+                        * Do a force sync
+                        */
+                       ret = bcm_vk_sync_msgq(vk, true);
+                       if (ret) {
+                               dev_err(dev, "Boot2 Error reading comm msg Q info\n");
+                               ret = -EIO;
+                               goto err_firmware_out;
+                       }
+
+                       /* sync & channel other info */
+                       ret = bcm_vk_sync_card_info(vk);
+                       if (ret) {
+                               dev_err(dev, "Syncing Card Info failure\n");
+                               goto err_firmware_out;
+                       }
+               }
+       }
+
+err_firmware_out:
+       release_firmware(fw);
+
+err_buf_out:
+       if (bufp)
+               dma_free_coherent(dev, max_buf, bufp, boot_dma_addr);
+
+       return ret;
+}
+
+static u32 bcm_vk_next_boot_image(struct bcm_vk *vk)
+{
+       u32 boot_status;
+       u32 fw_status;
+       u32 load_type = 0;  /* default for unknown */
+
+       boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+       fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+
+       if (!BCM_VK_INTF_IS_DOWN(boot_status) && (boot_status & SRAM_OPEN))
+               load_type = VK_IMAGE_TYPE_BOOT1;
+       else if (boot_status == BOOT1_RUNNING)
+               load_type = VK_IMAGE_TYPE_BOOT2;
+
+       /* Log status so that we know different stages */
+       dev_info(&vk->pdev->dev,
+                "boot-status value for next image: 0x%x : fw-status 0x%x\n",
+                boot_status, fw_status);
+
+       return load_type;
+}
+
+static enum soc_idx get_soc_idx(struct bcm_vk *vk)
+{
+       struct pci_dev *pdev = vk->pdev;
+       enum soc_idx idx = VK_IDX_INVALID;
+       u32 rev;
+       static enum soc_idx const vk_soc_tab[] = { VALKYRIE_A0, VALKYRIE_B0 };
+
+       switch (pdev->device) {
+       case PCI_DEVICE_ID_VALKYRIE:
+               /* get the chip id to decide sub-class */
+               rev = MAJOR_SOC_REV(vkread32(vk, BAR_0, BAR_CHIP_ID));
+               if (rev < ARRAY_SIZE(vk_soc_tab)) {
+                       idx = vk_soc_tab[rev];
+               } else {
+                       /* Default to A0 firmware for all other chip revs */
+                       idx = VALKYRIE_A0;
+                       dev_warn(&pdev->dev,
+                                "Rev %d not in image lookup table, default to idx=%d\n",
+                                rev, idx);
+               }
+               break;
+
+       case PCI_DEVICE_ID_VIPER:
+               idx = VIPER;
+               break;
+
+       default:
+               dev_err(&pdev->dev, "no images for 0x%x\n", pdev->device);
+       }
+       return idx;
+}
+
+static const char *get_load_fw_name(struct bcm_vk *vk,
+                                   const struct load_image_entry *entry)
+{
+       const struct firmware *fw;
+       struct device *dev = &vk->pdev->dev;
+       int ret;
+       unsigned long dummy;
+       int i;
+
+       for (i = 0; i < IMG_PER_TYPE_MAX; i++) {
+               fw = NULL;
+               ret = request_partial_firmware_into_buf(&fw,
+                                                       entry->image_name[i],
+                                                       dev, &dummy,
+                                                       sizeof(dummy),
+                                                       0);
+               release_firmware(fw);
+               if (!ret)
+                       return entry->image_name[i];
+       }
+       return NULL;
+}
+
+int bcm_vk_auto_load_all_images(struct bcm_vk *vk)
+{
+       int i, ret = -1;
+       enum soc_idx idx;
+       struct device *dev = &vk->pdev->dev;
+       u32 curr_type;
+       const char *curr_name;
+
+       idx = get_soc_idx(vk);
+       if (idx == VK_IDX_INVALID)
+               goto auto_load_all_exit;
+
+       /* log a message to know the relative loading order */
+       dev_dbg(dev, "Load All for device %d\n", vk->devid);
+
+       for (i = 0; i < NUM_BOOT_STAGES; i++) {
+               curr_type = image_tab[idx][i].image_type;
+               if (bcm_vk_next_boot_image(vk) == curr_type) {
+                       curr_name = get_load_fw_name(vk, &image_tab[idx][i]);
+                       if (!curr_name) {
+                               dev_err(dev, "No suitable firmware exists for type %d",
+                                       curr_type);
+                               ret = -ENOENT;
+                               goto auto_load_all_exit;
+                       }
+                       ret = bcm_vk_load_image_by_type(vk, curr_type,
+                                                       curr_name);
+                       dev_info(dev, "Auto load %s, ret %d\n",
+                                curr_name, ret);
+
+                       if (ret) {
+                               dev_err(dev, "Error loading default %s\n",
+                                       curr_name);
+                               goto auto_load_all_exit;
+                       }
+               }
+       }
+
+auto_load_all_exit:
+       return ret;
+}
+
+static int bcm_vk_trigger_autoload(struct bcm_vk *vk)
+{
+       if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0)
+               return -EPERM;
+
+       set_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
+       queue_work(vk->wq_thread, &vk->wq_work);
+
+       return 0;
+}
+
+/*
+ * deferred work queue for draining and auto download.
+ */
+static void bcm_vk_wq_handler(struct work_struct *work)
+{
+       struct bcm_vk *vk = container_of(work, struct bcm_vk, wq_work);
+       struct device *dev = &vk->pdev->dev;
+       s32 ret;
+
+       /* check wq offload bit map to perform various operations */
+       if (test_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload)) {
+               /* clear bit right the way for notification */
+               clear_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload);
+               bcm_vk_handle_notf(vk);
+       }
+       if (test_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload)) {
+               bcm_vk_auto_load_all_images(vk);
+
+               /*
+                * at the end of operation, clear AUTO bit and pending
+                * bit
+                */
+               clear_bit(BCM_VK_WQ_DWNLD_AUTO, vk->wq_offload);
+               clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+       }
+
+       /* next, try to drain */
+       ret = bcm_to_h_msg_dequeue(vk);
+
+       if (ret == 0)
+               dev_dbg(dev, "Spurious trigger for workqueue\n");
+       else if (ret < 0)
+               bcm_vk_blk_drv_access(vk);
+}
+
+static long bcm_vk_load_image(struct bcm_vk *vk,
+                             const struct vk_image __user *arg)
+{
+       struct device *dev = &vk->pdev->dev;
+       const char *image_name;
+       struct vk_image image;
+       u32 next_loadable;
+       enum soc_idx idx;
+       int image_idx;
+       int ret = -EPERM;
+
+       if (copy_from_user(&image, arg, sizeof(image)))
+               return -EACCES;
+
+       if ((image.type != VK_IMAGE_TYPE_BOOT1) &&
+           (image.type != VK_IMAGE_TYPE_BOOT2)) {
+               dev_err(dev, "invalid image.type %u\n", image.type);
+               return ret;
+       }
+
+       next_loadable = bcm_vk_next_boot_image(vk);
+       if (next_loadable != image.type) {
+               dev_err(dev, "Next expected image %u, Loading %u\n",
+                       next_loadable, image.type);
+               return ret;
+       }
+
+       /*
+        * if something is pending download already.  This could only happen
+        * for now when the driver is being loaded, or if someone has issued
+        * another download command in another shell.
+        */
+       if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
+               dev_err(dev, "Download operation already pending.\n");
+               return ret;
+       }
+
+       image_name = image.filename;
+       if (image_name[0] == '\0') {
+               /* Use default image name if NULL */
+               idx = get_soc_idx(vk);
+               if (idx == VK_IDX_INVALID)
+                       goto err_idx;
+
+               /* Image idx starts with boot1 */
+               image_idx = image.type - VK_IMAGE_TYPE_BOOT1;
+               image_name = get_load_fw_name(vk, &image_tab[idx][image_idx]);
+               if (!image_name) {
+                       dev_err(dev, "No suitable image found for type %d",
+                               image.type);
+                       ret = -ENOENT;
+                       goto err_idx;
+               }
+       } else {
+               /* Ensure filename is NULL terminated */
+               image.filename[sizeof(image.filename) - 1] = '\0';
+       }
+       ret = bcm_vk_load_image_by_type(vk, image.type, image_name);
+       dev_info(dev, "Load %s, ret %d\n", image_name, ret);
+err_idx:
+       clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+
+       return ret;
+}
+
+static int bcm_vk_reset_successful(struct bcm_vk *vk)
+{
+       struct device *dev = &vk->pdev->dev;
+       u32 fw_status, reset_reason;
+       int ret = -EAGAIN;
+
+       /*
+        * Reset could be triggered when the card in several state:
+        *   i)   in bootROM
+        *   ii)  after boot1
+        *   iii) boot2 running
+        *
+        * i) & ii) - no status bits will be updated.  If vkboot1
+        * runs automatically after reset, it  will update the reason
+        * to be unknown reason
+        * iii) - reboot reason match + deinit done.
+        */
+       fw_status = vkread32(vk, BAR_0, VK_BAR_FWSTS);
+       /* immediate exit if interface goes down */
+       if (BCM_VK_INTF_IS_DOWN(fw_status)) {
+               dev_err(dev, "PCIe Intf Down!\n");
+               goto reset_exit;
+       }
+
+       reset_reason = (fw_status & VK_FWSTS_RESET_REASON_MASK);
+       if ((reset_reason == VK_FWSTS_RESET_MBOX_DB) ||
+           (reset_reason == VK_FWSTS_RESET_UNKNOWN))
+               ret = 0;
+
+       /*
+        * if some of the deinit bits are set, but done
+        * bit is not, this is a failure if triggered while boot2 is running
+        */
+       if ((fw_status & VK_FWSTS_DEINIT_TRIGGERED) &&
+           !(fw_status & VK_FWSTS_RESET_DONE))
+               ret = -EAGAIN;
+
+reset_exit:
+       dev_dbg(dev, "FW status = 0x%x ret %d\n", fw_status, ret);
+
+       return ret;
+}
+
+static void bcm_to_v_reset_doorbell(struct bcm_vk *vk, u32 db_val)
+{
+       vkwrite32(vk, db_val, BAR_0, VK_BAR0_RESET_DB_BASE);
+}
+
+static int bcm_vk_trigger_reset(struct bcm_vk *vk)
+{
+       u32 i;
+       u32 value, boot_status;
+       bool is_stdalone, is_boot2;
+       static const u32 bar0_reg_clr_list[] = { BAR_OS_UPTIME,
+                                                BAR_INTF_VER,
+                                                BAR_CARD_VOLTAGE,
+                                                BAR_CARD_TEMPERATURE,
+                                                BAR_CARD_PWR_AND_THRE };
+
+       /* clean up before pressing the door bell */
+       bcm_vk_drain_msg_on_reset(vk);
+       vkwrite32(vk, 0, BAR_1, VK_BAR1_MSGQ_DEF_RDY);
+       /* make tag '\0' terminated */
+       vkwrite32(vk, 0, BAR_1, VK_BAR1_BOOT1_VER_TAG);
+
+       for (i = 0; i < VK_BAR1_DAUTH_MAX; i++) {
+               vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_STORE_ADDR(i));
+               vkwrite32(vk, 0, BAR_1, VK_BAR1_DAUTH_VALID_ADDR(i));
+       }
+       for (i = 0; i < VK_BAR1_SOTP_REVID_MAX; i++)
+               vkwrite32(vk, 0, BAR_1, VK_BAR1_SOTP_REVID_ADDR(i));
+
+       memset(&vk->card_info, 0, sizeof(vk->card_info));
+       memset(&vk->peerlog_info, 0, sizeof(vk->peerlog_info));
+       memset(&vk->proc_mon_info, 0, sizeof(vk->proc_mon_info));
+       memset(&vk->alert_cnts, 0, sizeof(vk->alert_cnts));
+
+       /*
+        * When boot request fails, the CODE_PUSH_OFFSET stays persistent.
+        * Allowing us to debug the failure. When we call reset,
+        * we should clear CODE_PUSH_OFFSET so ROM does not execute
+        * boot again (and fails again) and instead waits for a new
+        * codepush.  And, if previous boot has encountered error, need
+        * to clear the entry values
+        */
+       boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+       if (boot_status & BOOT_ERR_MASK) {
+               dev_info(&vk->pdev->dev,
+                        "Card in boot error 0x%x, clear CODEPUSH val\n",
+                        boot_status);
+               value = 0;
+       } else {
+               value = vkread32(vk, BAR_0, BAR_CODEPUSH_SBL);
+               value &= CODEPUSH_MASK;
+       }
+       vkwrite32(vk, value, BAR_0, BAR_CODEPUSH_SBL);
+
+       /* special reset handling */
+       is_stdalone = boot_status & BOOT_STDALONE_RUNNING;
+       is_boot2 = (boot_status & BOOT_STATE_MASK) == BOOT2_RUNNING;
+       if (vk->peer_alert.flags & ERR_LOG_RAMDUMP) {
+               /*
+                * if card is in ramdump mode, it is hitting an error.  Don't
+                * reset the reboot reason as it will contain valid info that
+                * is important - simply use special reset
+                */
+               vkwrite32(vk, VK_BAR0_RESET_RAMPDUMP, BAR_0, VK_BAR_FWSTS);
+               return VK_BAR0_RESET_RAMPDUMP;
+       } else if (is_stdalone && !is_boot2) {
+               dev_info(&vk->pdev->dev, "Hard reset on Standalone mode");
+               bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_HARD);
+               return VK_BAR0_RESET_DB_HARD;
+       }
+
+       /* reset fw_status with proper reason, and press db */
+       vkwrite32(vk, VK_FWSTS_RESET_MBOX_DB, BAR_0, VK_BAR_FWSTS);
+       bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_SOFT);
+
+       /* clear other necessary registers and alert records */
+       for (i = 0; i < ARRAY_SIZE(bar0_reg_clr_list); i++)
+               vkwrite32(vk, 0, BAR_0, bar0_reg_clr_list[i]);
+       memset(&vk->host_alert, 0, sizeof(vk->host_alert));
+       memset(&vk->peer_alert, 0, sizeof(vk->peer_alert));
+       /* clear 4096 bits of bitmap */
+       bitmap_clear(vk->bmap, 0, VK_MSG_ID_BITMAP_SIZE);
+
+       return 0;
+}
+
+static long bcm_vk_reset(struct bcm_vk *vk, struct vk_reset __user *arg)
+{
+       struct device *dev = &vk->pdev->dev;
+       struct vk_reset reset;
+       int ret = 0;
+       u32 ramdump_reset;
+       int special_reset;
+
+       if (copy_from_user(&reset, arg, sizeof(struct vk_reset)))
+               return -EFAULT;
+
+       /* check if any download is in-progress, if so return error */
+       if (test_and_set_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload) != 0) {
+               dev_err(dev, "Download operation pending - skip reset.\n");
+               return -EPERM;
+       }
+
+       ramdump_reset = vk->peer_alert.flags & ERR_LOG_RAMDUMP;
+       dev_info(dev, "Issue Reset %s\n",
+                ramdump_reset ? "in ramdump mode" : "");
+
+       /*
+        * The following is the sequence of reset:
+        * - send card level graceful shut down
+        * - wait enough time for VK to handle its business, stopping DMA etc
+        * - kill host apps
+        * - Trigger interrupt with DB
+        */
+       bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_GRACEFUL, 0, 0);
+
+       spin_lock(&vk->ctx_lock);
+       if (!vk->reset_pid) {
+               vk->reset_pid = task_pid_nr(current);
+       } else {
+               dev_err(dev, "Reset already launched by process pid %d\n",
+                       vk->reset_pid);
+               ret = -EACCES;
+       }
+       spin_unlock(&vk->ctx_lock);
+       if (ret)
+               goto err_exit;
+
+       bcm_vk_blk_drv_access(vk);
+       special_reset = bcm_vk_trigger_reset(vk);
+
+       /*
+        * Wait enough time for card os to deinit
+        * and populate the reset reason.
+        */
+       msleep(BCM_VK_DEINIT_TIME_MS);
+
+       if (special_reset) {
+               /* if it is special ramdump reset, return the type to user */
+               reset.arg2 = special_reset;
+               if (copy_to_user(arg, &reset, sizeof(reset)))
+                       ret = -EFAULT;
+       } else {
+               ret = bcm_vk_reset_successful(vk);
+       }
+
+err_exit:
+       clear_bit(BCM_VK_WQ_DWNLD_PEND, vk->wq_offload);
+       return ret;
+}
+
+static int bcm_vk_mmap(struct file *file, struct vm_area_struct *vma)
+{
+       struct bcm_vk_ctx *ctx = file->private_data;
+       struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+       unsigned long pg_size;
+
+       /* only BAR2 is mmap possible, which is bar num 4 due to 64bit */
+#define VK_MMAPABLE_BAR 4
+
+       pg_size = ((pci_resource_len(vk->pdev, VK_MMAPABLE_BAR) - 1)
+                   >> PAGE_SHIFT) + 1;
+       if (vma->vm_pgoff + vma_pages(vma) > pg_size)
+               return -EINVAL;
+
+       vma->vm_pgoff += (pci_resource_start(vk->pdev, VK_MMAPABLE_BAR)
+                         >> PAGE_SHIFT);
+       vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
+
+       return io_remap_pfn_range(vma, vma->vm_start, vma->vm_pgoff,
+                                 vma->vm_end - vma->vm_start,
+                                 vma->vm_page_prot);
+}
+
+static long bcm_vk_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+       long ret = -EINVAL;
+       struct bcm_vk_ctx *ctx = file->private_data;
+       struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+       void __user *argp = (void __user *)arg;
+
+       dev_dbg(&vk->pdev->dev,
+               "ioctl, cmd=0x%02x, arg=0x%02lx\n",
+               cmd, arg);
+
+       mutex_lock(&vk->mutex);
+
+       switch (cmd) {
+       case VK_IOCTL_LOAD_IMAGE:
+               ret = bcm_vk_load_image(vk, argp);
+               break;
+
+       case VK_IOCTL_RESET:
+               ret = bcm_vk_reset(vk, argp);
+               break;
+
+       default:
+               break;
+       }
+
+       mutex_unlock(&vk->mutex);
+
+       return ret;
+}
+
+static const struct file_operations bcm_vk_fops = {
+       .owner = THIS_MODULE,
+       .open = bcm_vk_open,
+       .read = bcm_vk_read,
+       .write = bcm_vk_write,
+       .poll = bcm_vk_poll,
+       .release = bcm_vk_release,
+       .mmap = bcm_vk_mmap,
+       .unlocked_ioctl = bcm_vk_ioctl,
+};
+
+static int bcm_vk_on_panic(struct notifier_block *nb,
+                          unsigned long e, void *p)
+{
+       struct bcm_vk *vk = container_of(nb, struct bcm_vk, panic_nb);
+
+       bcm_to_v_reset_doorbell(vk, VK_BAR0_RESET_DB_HARD);
+
+       return 0;
+}
+
+static int bcm_vk_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
+{
+       int err;
+       int i;
+       int id;
+       int irq;
+       char name[20];
+       struct bcm_vk *vk;
+       struct device *dev = &pdev->dev;
+       struct miscdevice *misc_device;
+       u32 boot_status;
+
+       /* allocate vk structure which is tied to kref for freeing */
+       vk = kzalloc(sizeof(*vk), GFP_KERNEL);
+       if (!vk)
+               return -ENOMEM;
+
+       kref_init(&vk->kref);
+       if (nr_ib_sgl_blk > BCM_VK_IB_SGL_BLK_MAX) {
+               dev_warn(dev, "Inband SGL blk %d limited to max %d\n",
+                        nr_ib_sgl_blk, BCM_VK_IB_SGL_BLK_MAX);
+               nr_ib_sgl_blk = BCM_VK_IB_SGL_BLK_MAX;
+       }
+       vk->ib_sgl_size = nr_ib_sgl_blk * VK_MSGQ_BLK_SIZE;
+       mutex_init(&vk->mutex);
+
+       err = pci_enable_device(pdev);
+       if (err) {
+               dev_err(dev, "Cannot enable PCI device\n");
+               goto err_free_exit;
+       }
+       vk->pdev = pci_dev_get(pdev);
+
+       err = pci_request_regions(pdev, DRV_MODULE_NAME);
+       if (err) {
+               dev_err(dev, "Cannot obtain PCI resources\n");
+               goto err_disable_pdev;
+       }
+
+       /* make sure DMA is good */
+       err = dma_set_mask_and_coherent(&pdev->dev,
+                                       DMA_BIT_MASK(BCM_VK_DMA_BITS));
+       if (err) {
+               dev_err(dev, "failed to set DMA mask\n");
+               goto err_disable_pdev;
+       }
+
+       /* The tdma is a scratch area for some DMA testings. */
+       if (nr_scratch_pages) {
+               vk->tdma_vaddr = dma_alloc_coherent
+                                       (dev,
+                                        nr_scratch_pages * PAGE_SIZE,
+                                        &vk->tdma_addr, GFP_KERNEL);
+               if (!vk->tdma_vaddr) {
+                       err = -ENOMEM;
+                       goto err_disable_pdev;
+               }
+       }
+
+       pci_set_master(pdev);
+       pci_set_drvdata(pdev, vk);
+
+       irq = pci_alloc_irq_vectors(pdev,
+                                   1,
+                                   VK_MSIX_IRQ_MAX,
+                                   PCI_IRQ_MSI | PCI_IRQ_MSIX);
+
+       if (irq < VK_MSIX_IRQ_MIN_REQ) {
+               dev_err(dev, "failed to get min %d MSIX interrupts, irq(%d)\n",
+                       VK_MSIX_IRQ_MIN_REQ, irq);
+               err = (irq >= 0) ? -EINVAL : irq;
+               goto err_disable_pdev;
+       }
+
+       if (irq != VK_MSIX_IRQ_MAX)
+               dev_warn(dev, "Number of IRQs %d allocated - requested(%d).\n",
+                        irq, VK_MSIX_IRQ_MAX);
+
+       for (i = 0; i < MAX_BAR; i++) {
+               /* multiple by 2 for 64 bit BAR mapping */
+               vk->bar[i] = pci_ioremap_bar(pdev, i * 2);
+               if (!vk->bar[i]) {
+                       dev_err(dev, "failed to remap BAR%d\n", i);
+                       err = -ENOMEM;
+                       goto err_iounmap;
+               }
+       }
+
+       for (vk->num_irqs = 0;
+            vk->num_irqs < VK_MSIX_MSGQ_MAX;
+            vk->num_irqs++) {
+               err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+                                      bcm_vk_msgq_irqhandler,
+                                      IRQF_SHARED, DRV_MODULE_NAME, vk);
+               if (err) {
+                       dev_err(dev, "failed to request msgq IRQ %d for MSIX %d\n",
+                               pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+                       goto err_irq;
+               }
+       }
+       /* one irq for notification from VK */
+       err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+                              bcm_vk_notf_irqhandler,
+                              IRQF_SHARED, DRV_MODULE_NAME, vk);
+       if (err) {
+               dev_err(dev, "failed to request notf IRQ %d for MSIX %d\n",
+                       pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+               goto err_irq;
+       }
+       vk->num_irqs++;
+
+       for (i = 0;
+            (i < VK_MSIX_TTY_MAX) && (vk->num_irqs < irq);
+            i++, vk->num_irqs++) {
+               err = devm_request_irq(dev, pci_irq_vector(pdev, vk->num_irqs),
+                                      bcm_vk_tty_irqhandler,
+                                      IRQF_SHARED, DRV_MODULE_NAME, vk);
+               if (err) {
+                       dev_err(dev, "failed request tty IRQ %d for MSIX %d\n",
+                               pdev->irq + vk->num_irqs, vk->num_irqs + 1);
+                       goto err_irq;
+               }
+               bcm_vk_tty_set_irq_enabled(vk, i);
+       }
+
+       id = ida_simple_get(&bcm_vk_ida, 0, 0, GFP_KERNEL);
+       if (id < 0) {
+               err = id;
+               dev_err(dev, "unable to get id\n");
+               goto err_irq;
+       }
+
+       vk->devid = id;
+       snprintf(name, sizeof(name), DRV_MODULE_NAME ".%d", id);
+       misc_device = &vk->miscdev;
+       misc_device->minor = MISC_DYNAMIC_MINOR;
+       misc_device->name = kstrdup(name, GFP_KERNEL);
+       if (!misc_device->name) {
+               err = -ENOMEM;
+               goto err_ida_remove;
+       }
+       misc_device->fops = &bcm_vk_fops,
+
+       err = misc_register(misc_device);
+       if (err) {
+               dev_err(dev, "failed to register device\n");
+               goto err_kfree_name;
+       }
+
+       INIT_WORK(&vk->wq_work, bcm_vk_wq_handler);
+
+       /* create dedicated workqueue */
+       vk->wq_thread = create_singlethread_workqueue(name);
+       if (!vk->wq_thread) {
+               dev_err(dev, "Fail to create workqueue thread\n");
+               err = -ENOMEM;
+               goto err_misc_deregister;
+       }
+
+       err = bcm_vk_msg_init(vk);
+       if (err) {
+               dev_err(dev, "failed to init msg queue info\n");
+               goto err_destroy_workqueue;
+       }
+
+       /* sync other info */
+       bcm_vk_sync_card_info(vk);
+
+       /* register for panic notifier */
+       vk->panic_nb.notifier_call = bcm_vk_on_panic;
+       err = atomic_notifier_chain_register(&panic_notifier_list,
+                                            &vk->panic_nb);
+       if (err) {
+               dev_err(dev, "Fail to register panic notifier\n");
+               goto err_destroy_workqueue;
+       }
+
+       snprintf(name, sizeof(name), KBUILD_MODNAME ".%d_ttyVK", id);
+       err = bcm_vk_tty_init(vk, name);
+       if (err)
+               goto err_unregister_panic_notifier;
+
+       /*
+        * lets trigger an auto download.  We don't want to do it serially here
+        * because at probing time, it is not supposed to block for a long time.
+        */
+       boot_status = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+       if (auto_load) {
+               if ((boot_status & BOOT_STATE_MASK) == BROM_RUNNING) {
+                       err = bcm_vk_trigger_autoload(vk);
+                       if (err)
+                               goto err_bcm_vk_tty_exit;
+               } else {
+                       dev_err(dev,
+                               "Auto-load skipped - BROM not in proper state (0x%x)\n",
+                               boot_status);
+               }
+       }
+
+       /* enable hb */
+       bcm_vk_hb_init(vk);
+
+       dev_dbg(dev, "BCM-VK:%u created\n", id);
+
+       return 0;
+
+err_bcm_vk_tty_exit:
+       bcm_vk_tty_exit(vk);
+
+err_unregister_panic_notifier:
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &vk->panic_nb);
+
+err_destroy_workqueue:
+       destroy_workqueue(vk->wq_thread);
+
+err_misc_deregister:
+       misc_deregister(misc_device);
+
+err_kfree_name:
+       kfree(misc_device->name);
+       misc_device->name = NULL;
+
+err_ida_remove:
+       ida_simple_remove(&bcm_vk_ida, id);
+
+err_irq:
+       for (i = 0; i < vk->num_irqs; i++)
+               devm_free_irq(dev, pci_irq_vector(pdev, i), vk);
+
+       pci_disable_msix(pdev);
+       pci_disable_msi(pdev);
+
+err_iounmap:
+       for (i = 0; i < MAX_BAR; i++) {
+               if (vk->bar[i])
+                       pci_iounmap(pdev, vk->bar[i]);
+       }
+       pci_release_regions(pdev);
+
+err_disable_pdev:
+       if (vk->tdma_vaddr)
+               dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
+                                 vk->tdma_vaddr, vk->tdma_addr);
+
+       pci_free_irq_vectors(pdev);
+       pci_disable_device(pdev);
+       pci_dev_put(pdev);
+
+err_free_exit:
+       kfree(vk);
+
+       return err;
+}
+
+void bcm_vk_release_data(struct kref *kref)
+{
+       struct bcm_vk *vk = container_of(kref, struct bcm_vk, kref);
+       struct pci_dev *pdev = vk->pdev;
+
+       dev_dbg(&pdev->dev, "BCM-VK:%d release data 0x%p\n", vk->devid, vk);
+       pci_dev_put(pdev);
+       kfree(vk);
+}
+
+static void bcm_vk_remove(struct pci_dev *pdev)
+{
+       int i;
+       struct bcm_vk *vk = pci_get_drvdata(pdev);
+       struct miscdevice *misc_device = &vk->miscdev;
+
+       bcm_vk_hb_deinit(vk);
+
+       /*
+        * Trigger a reset to card and wait enough time for UCODE to rerun,
+        * which re-initialize the card into its default state.
+        * This ensures when driver is re-enumerated it will start from
+        * a completely clean state.
+        */
+       bcm_vk_trigger_reset(vk);
+       usleep_range(BCM_VK_UCODE_BOOT_US, BCM_VK_UCODE_BOOT_MAX_US);
+
+       /* unregister panic notifier */
+       atomic_notifier_chain_unregister(&panic_notifier_list,
+                                        &vk->panic_nb);
+
+       bcm_vk_msg_remove(vk);
+       bcm_vk_tty_exit(vk);
+
+       if (vk->tdma_vaddr)
+               dma_free_coherent(&pdev->dev, nr_scratch_pages * PAGE_SIZE,
+                                 vk->tdma_vaddr, vk->tdma_addr);
+
+       /* remove if name is set which means misc dev registered */
+       if (misc_device->name) {
+               misc_deregister(misc_device);
+               kfree(misc_device->name);
+               ida_simple_remove(&bcm_vk_ida, vk->devid);
+       }
+       for (i = 0; i < vk->num_irqs; i++)
+               devm_free_irq(&pdev->dev, pci_irq_vector(pdev, i), vk);
+
+       pci_disable_msix(pdev);
+       pci_disable_msi(pdev);
+
+       cancel_work_sync(&vk->wq_work);
+       destroy_workqueue(vk->wq_thread);
+       bcm_vk_tty_wq_exit(vk);
+
+       for (i = 0; i < MAX_BAR; i++) {
+               if (vk->bar[i])
+                       pci_iounmap(pdev, vk->bar[i]);
+       }
+
+       dev_dbg(&pdev->dev, "BCM-VK:%d released\n", vk->devid);
+
+       pci_release_regions(pdev);
+       pci_free_irq_vectors(pdev);
+       pci_disable_device(pdev);
+
+       kref_put(&vk->kref, bcm_vk_release_data);
+}
+
+static void bcm_vk_shutdown(struct pci_dev *pdev)
+{
+       struct bcm_vk *vk = pci_get_drvdata(pdev);
+       u32 reg, boot_stat;
+
+       reg = vkread32(vk, BAR_0, BAR_BOOT_STATUS);
+       boot_stat = reg & BOOT_STATE_MASK;
+
+       if (boot_stat == BOOT1_RUNNING) {
+               /* simply trigger a reset interrupt to park it */
+               bcm_vk_trigger_reset(vk);
+       } else if (boot_stat == BROM_NOT_RUN) {
+               int err;
+               u16 lnksta;
+
+               /*
+                * The boot status only reflects boot condition since last reset
+                * As ucode will run only once to configure pcie, if multiple
+                * resets happen, we lost track if ucode has run or not.
+                * Here, read the current link speed and use that to
+                * sync up the bootstatus properly so that on reboot-back-up,
+                * it has the proper state to start with autoload
+                */
+               err = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta);
+               if (!err &&
+                   (lnksta & PCI_EXP_LNKSTA_CLS) != PCI_EXP_LNKSTA_CLS_2_5GB) {
+                       reg |= BROM_STATUS_COMPLETE;
+                       vkwrite32(vk, reg, BAR_0, BAR_BOOT_STATUS);
+               }
+       }
+}
+
+static const struct pci_device_id bcm_vk_ids[] = {
+       { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VALKYRIE), },
+       { PCI_DEVICE(PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_VIPER), },
+       { }
+};
+MODULE_DEVICE_TABLE(pci, bcm_vk_ids);
+
+static struct pci_driver pci_driver = {
+       .name     = DRV_MODULE_NAME,
+       .id_table = bcm_vk_ids,
+       .probe    = bcm_vk_probe,
+       .remove   = bcm_vk_remove,
+       .shutdown = bcm_vk_shutdown,
+};
+module_pci_driver(pci_driver);
+
+MODULE_DESCRIPTION("Broadcom VK Host Driver");
+MODULE_AUTHOR("Scott Branden <scott.branden@broadcom.com>");
+MODULE_LICENSE("GPL v2");
+MODULE_VERSION("1.0");
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.c b/drivers/misc/bcm-vk/bcm_vk_msg.c
new file mode 100644 (file)
index 0000000..f40cf08
--- /dev/null
@@ -0,0 +1,1357 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/delay.h>
+#include <linux/fs.h>
+#include <linux/hash.h>
+#include <linux/interrupt.h>
+#include <linux/list.h>
+#include <linux/module.h>
+#include <linux/poll.h>
+#include <linux/sizes.h>
+#include <linux/spinlock.h>
+#include <linux/timer.h>
+
+#include "bcm_vk.h"
+#include "bcm_vk_msg.h"
+#include "bcm_vk_sg.h"
+
+/* functions to manipulate the transport id in msg block */
+#define BCM_VK_MSG_Q_SHIFT      4
+#define BCM_VK_MSG_Q_MASK       0xF
+#define BCM_VK_MSG_ID_MASK      0xFFF
+
+#define BCM_VK_DMA_DRAIN_MAX_MS          2000
+
+/* number x q_size will be the max number of msg processed per loop */
+#define BCM_VK_MSG_PROC_MAX_LOOP 2
+
+/* module parameter */
+static bool hb_mon = true;
+module_param(hb_mon, bool, 0444);
+MODULE_PARM_DESC(hb_mon, "Monitoring heartbeat continuously.\n");
+static int batch_log = 1;
+module_param(batch_log, int, 0444);
+MODULE_PARM_DESC(batch_log, "Max num of logs per batch operation.\n");
+
+static bool hb_mon_is_on(void)
+{
+       return hb_mon;
+}
+
+static u32 get_q_num(const struct vk_msg_blk *msg)
+{
+       u32 q_num = msg->trans_id & BCM_VK_MSG_Q_MASK;
+
+       if (q_num >= VK_MSGQ_PER_CHAN_MAX)
+               q_num = VK_MSGQ_NUM_DEFAULT;
+       return q_num;
+}
+
+static void set_q_num(struct vk_msg_blk *msg, u32 q_num)
+{
+       u32 trans_q;
+
+       if (q_num >= VK_MSGQ_PER_CHAN_MAX)
+               trans_q = VK_MSGQ_NUM_DEFAULT;
+       else
+               trans_q = q_num;
+
+       msg->trans_id = (msg->trans_id & ~BCM_VK_MSG_Q_MASK) | trans_q;
+}
+
+static u32 get_msg_id(const struct vk_msg_blk *msg)
+{
+       return ((msg->trans_id >> BCM_VK_MSG_Q_SHIFT) & BCM_VK_MSG_ID_MASK);
+}
+
+static void set_msg_id(struct vk_msg_blk *msg, u32 val)
+{
+       msg->trans_id = (val << BCM_VK_MSG_Q_SHIFT) | get_q_num(msg);
+}
+
+static u32 msgq_inc(const struct bcm_vk_sync_qinfo *qinfo, u32 idx, u32 inc)
+{
+       return ((idx + inc) & qinfo->q_mask);
+}
+
+static
+struct vk_msg_blk __iomem *msgq_blk_addr(const struct bcm_vk_sync_qinfo *qinfo,
+                                        u32 idx)
+{
+       return qinfo->q_start + (VK_MSGQ_BLK_SIZE * idx);
+}
+
+static u32 msgq_occupied(const struct bcm_vk_msgq __iomem *msgq,
+                        const struct bcm_vk_sync_qinfo *qinfo)
+{
+       u32 wr_idx, rd_idx;
+
+       wr_idx = readl_relaxed(&msgq->wr_idx);
+       rd_idx = readl_relaxed(&msgq->rd_idx);
+
+       return ((wr_idx - rd_idx) & qinfo->q_mask);
+}
+
+static
+u32 msgq_avail_space(const struct bcm_vk_msgq __iomem *msgq,
+                    const struct bcm_vk_sync_qinfo *qinfo)
+{
+       return (qinfo->q_size - msgq_occupied(msgq, qinfo) - 1);
+}
+
+/* number of retries when enqueue message fails before returning EAGAIN */
+#define BCM_VK_H2VK_ENQ_RETRY 10
+#define BCM_VK_H2VK_ENQ_RETRY_DELAY_MS 50
+
+bool bcm_vk_drv_access_ok(struct bcm_vk *vk)
+{
+       return (!!atomic_read(&vk->msgq_inited));
+}
+
+void bcm_vk_set_host_alert(struct bcm_vk *vk, u32 bit_mask)
+{
+       struct bcm_vk_alert *alert = &vk->host_alert;
+       unsigned long flags;
+
+       /* use irqsave version as this maybe called inside timer interrupt */
+       spin_lock_irqsave(&vk->host_alert_lock, flags);
+       alert->notfs |= bit_mask;
+       spin_unlock_irqrestore(&vk->host_alert_lock, flags);
+
+       if (test_and_set_bit(BCM_VK_WQ_NOTF_PEND, vk->wq_offload) == 0)
+               queue_work(vk->wq_thread, &vk->wq_work);
+}
+
+/*
+ * Heartbeat related defines
+ * The heartbeat from host is a last resort.  If stuck condition happens
+ * on the card, firmware is supposed to detect it.  Therefore, the heartbeat
+ * values used will be more relaxed on the driver, which need to be bigger
+ * than the watchdog timeout on the card.  The watchdog timeout on the card
+ * is 20s, with a jitter of 2s => 22s.  We use a value of 27s here.
+ */
+#define BCM_VK_HB_TIMER_S 3
+#define BCM_VK_HB_TIMER_VALUE (BCM_VK_HB_TIMER_S * HZ)
+#define BCM_VK_HB_LOST_MAX (27 / BCM_VK_HB_TIMER_S)
+
+static void bcm_vk_hb_poll(struct timer_list *t)
+{
+       u32 uptime_s;
+       struct bcm_vk_hb_ctrl *hb = container_of(t, struct bcm_vk_hb_ctrl,
+                                                timer);
+       struct bcm_vk *vk = container_of(hb, struct bcm_vk, hb_ctrl);
+
+       if (bcm_vk_drv_access_ok(vk) && hb_mon_is_on()) {
+               /* read uptime from register and compare */
+               uptime_s = vkread32(vk, BAR_0, BAR_OS_UPTIME);
+
+               if (uptime_s == hb->last_uptime)
+                       hb->lost_cnt++;
+               else /* reset to avoid accumulation */
+                       hb->lost_cnt = 0;
+
+               dev_dbg(&vk->pdev->dev, "Last uptime %d current %d, lost %d\n",
+                       hb->last_uptime, uptime_s, hb->lost_cnt);
+
+               /*
+                * if the interface goes down without any activity, a value
+                * of 0xFFFFFFFF will be continuously read, and the detection
+                * will be happened eventually.
+                */
+               hb->last_uptime = uptime_s;
+       } else {
+               /* reset heart beat lost cnt */
+               hb->lost_cnt = 0;
+       }
+
+       /* next, check if heartbeat exceeds limit */
+       if (hb->lost_cnt > BCM_VK_HB_LOST_MAX) {
+               dev_err(&vk->pdev->dev, "Heartbeat Misses %d times, %d s!\n",
+                       BCM_VK_HB_LOST_MAX,
+                       BCM_VK_HB_LOST_MAX * BCM_VK_HB_TIMER_S);
+
+               bcm_vk_blk_drv_access(vk);
+               bcm_vk_set_host_alert(vk, ERR_LOG_HOST_HB_FAIL);
+       }
+       /* re-arm timer */
+       mod_timer(&hb->timer, jiffies + BCM_VK_HB_TIMER_VALUE);
+}
+
+void bcm_vk_hb_init(struct bcm_vk *vk)
+{
+       struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
+
+       timer_setup(&hb->timer, bcm_vk_hb_poll, 0);
+       mod_timer(&hb->timer, jiffies + BCM_VK_HB_TIMER_VALUE);
+}
+
+void bcm_vk_hb_deinit(struct bcm_vk *vk)
+{
+       struct bcm_vk_hb_ctrl *hb = &vk->hb_ctrl;
+
+       del_timer(&hb->timer);
+}
+
+static void bcm_vk_msgid_bitmap_clear(struct bcm_vk *vk,
+                                     unsigned int start,
+                                     unsigned int nbits)
+{
+       spin_lock(&vk->msg_id_lock);
+       bitmap_clear(vk->bmap, start, nbits);
+       spin_unlock(&vk->msg_id_lock);
+}
+
+/*
+ * allocate a ctx per file struct
+ */
+static struct bcm_vk_ctx *bcm_vk_get_ctx(struct bcm_vk *vk, const pid_t pid)
+{
+       u32 i;
+       struct bcm_vk_ctx *ctx = NULL;
+       u32 hash_idx = hash_32(pid, VK_PID_HT_SHIFT_BIT);
+
+       spin_lock(&vk->ctx_lock);
+
+       /* check if it is in reset, if so, don't allow */
+       if (vk->reset_pid) {
+               dev_err(&vk->pdev->dev,
+                       "No context allowed during reset by pid %d\n",
+                       vk->reset_pid);
+
+               goto in_reset_exit;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
+               if (!vk->ctx[i].in_use) {
+                       vk->ctx[i].in_use = true;
+                       ctx = &vk->ctx[i];
+                       break;
+               }
+       }
+
+       if (!ctx) {
+               dev_err(&vk->pdev->dev, "All context in use\n");
+
+               goto all_in_use_exit;
+       }
+
+       /* set the pid and insert it to hash table */
+       ctx->pid = pid;
+       ctx->hash_idx = hash_idx;
+       list_add_tail(&ctx->node, &vk->pid_ht[hash_idx].head);
+
+       /* increase kref */
+       kref_get(&vk->kref);
+
+       /* clear counter */
+       atomic_set(&ctx->pend_cnt, 0);
+       atomic_set(&ctx->dma_cnt, 0);
+       init_waitqueue_head(&ctx->rd_wq);
+
+all_in_use_exit:
+in_reset_exit:
+       spin_unlock(&vk->ctx_lock);
+
+       return ctx;
+}
+
+static u16 bcm_vk_get_msg_id(struct bcm_vk *vk)
+{
+       u16 rc = VK_MSG_ID_OVERFLOW;
+       u16 test_bit_count = 0;
+
+       spin_lock(&vk->msg_id_lock);
+       while (test_bit_count < (VK_MSG_ID_BITMAP_SIZE - 1)) {
+               /*
+                * first time come in this loop, msg_id will be 0
+                * and the first one tested will be 1.  We skip
+                * VK_SIMPLEX_MSG_ID (0) for one way host2vk
+                * communication
+                */
+               vk->msg_id++;
+               if (vk->msg_id == VK_MSG_ID_BITMAP_SIZE)
+                       vk->msg_id = 1;
+
+               if (test_bit(vk->msg_id, vk->bmap)) {
+                       test_bit_count++;
+                       continue;
+               }
+               rc = vk->msg_id;
+               bitmap_set(vk->bmap, vk->msg_id, 1);
+               break;
+       }
+       spin_unlock(&vk->msg_id_lock);
+
+       return rc;
+}
+
+static int bcm_vk_free_ctx(struct bcm_vk *vk, struct bcm_vk_ctx *ctx)
+{
+       u32 idx;
+       u32 hash_idx;
+       pid_t pid;
+       struct bcm_vk_ctx *entry;
+       int count = 0;
+
+       if (!ctx) {
+               dev_err(&vk->pdev->dev, "NULL context detected\n");
+               return -EINVAL;
+       }
+       idx = ctx->idx;
+       pid = ctx->pid;
+
+       spin_lock(&vk->ctx_lock);
+
+       if (!vk->ctx[idx].in_use) {
+               dev_err(&vk->pdev->dev, "context[%d] not in use!\n", idx);
+       } else {
+               vk->ctx[idx].in_use = false;
+               vk->ctx[idx].miscdev = NULL;
+
+               /* Remove it from hash list and see if it is the last one. */
+               list_del(&ctx->node);
+               hash_idx = ctx->hash_idx;
+               list_for_each_entry(entry, &vk->pid_ht[hash_idx].head, node) {
+                       if (entry->pid == pid)
+                               count++;
+               }
+       }
+
+       spin_unlock(&vk->ctx_lock);
+
+       return count;
+}
+
+static void bcm_vk_free_wkent(struct device *dev, struct bcm_vk_wkent *entry)
+{
+       int proc_cnt;
+
+       bcm_vk_sg_free(dev, entry->dma, VK_DMA_MAX_ADDRS, &proc_cnt);
+       if (proc_cnt)
+               atomic_dec(&entry->ctx->dma_cnt);
+
+       kfree(entry->to_h_msg);
+       kfree(entry);
+}
+
+static void bcm_vk_drain_all_pend(struct device *dev,
+                                 struct bcm_vk_msg_chan *chan,
+                                 struct bcm_vk_ctx *ctx)
+{
+       u32 num;
+       struct bcm_vk_wkent *entry, *tmp;
+       struct bcm_vk *vk;
+       struct list_head del_q;
+
+       if (ctx)
+               vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+
+       INIT_LIST_HEAD(&del_q);
+       spin_lock(&chan->pendq_lock);
+       for (num = 0; num < chan->q_nr; num++) {
+               list_for_each_entry_safe(entry, tmp, &chan->pendq[num], node) {
+                       if ((!ctx) || (entry->ctx->idx == ctx->idx)) {
+                               list_del(&entry->node);
+                               list_add_tail(&entry->node, &del_q);
+                       }
+               }
+       }
+       spin_unlock(&chan->pendq_lock);
+
+       /* batch clean up */
+       num = 0;
+       list_for_each_entry_safe(entry, tmp, &del_q, node) {
+               list_del(&entry->node);
+               num++;
+               if (ctx) {
+                       struct vk_msg_blk *msg;
+                       int bit_set;
+                       bool responded;
+                       u32 msg_id;
+
+                       /* if it is specific ctx, log for any stuck */
+                       msg = entry->to_v_msg;
+                       msg_id = get_msg_id(msg);
+                       bit_set = test_bit(msg_id, vk->bmap);
+                       responded = entry->to_h_msg ? true : false;
+                       if (num <= batch_log)
+                               dev_info(dev,
+                                        "Drained: fid %u size %u msg 0x%x(seq-%x) ctx 0x%x[fd-%d] args:[0x%x 0x%x] resp %s, bmap %d\n",
+                                        msg->function_id, msg->size,
+                                        msg_id, entry->seq_num,
+                                        msg->context_id, entry->ctx->idx,
+                                        msg->cmd, msg->arg,
+                                        responded ? "T" : "F", bit_set);
+                       if (responded)
+                               atomic_dec(&ctx->pend_cnt);
+                       else if (bit_set)
+                               bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
+               }
+               bcm_vk_free_wkent(dev, entry);
+       }
+       if (num && ctx)
+               dev_info(dev, "Total drained items %d [fd-%d]\n",
+                        num, ctx->idx);
+}
+
+void bcm_vk_drain_msg_on_reset(struct bcm_vk *vk)
+{
+       bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, NULL);
+       bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, NULL);
+}
+
+/*
+ * Function to sync up the messages queue info that is provided by BAR1
+ */
+int bcm_vk_sync_msgq(struct bcm_vk *vk, bool force_sync)
+{
+       struct bcm_vk_msgq __iomem *msgq;
+       struct device *dev = &vk->pdev->dev;
+       u32 msgq_off;
+       u32 num_q;
+       struct bcm_vk_msg_chan *chan_list[] = {&vk->to_v_msg_chan,
+                                              &vk->to_h_msg_chan};
+       struct bcm_vk_msg_chan *chan;
+       int i, j;
+       int ret = 0;
+
+       /*
+        * If the driver is loaded at startup where vk OS is not up yet,
+        * the msgq-info may not be available until a later time.  In
+        * this case, we skip and the sync function is supposed to be
+        * called again.
+        */
+       if (!bcm_vk_msgq_marker_valid(vk)) {
+               dev_info(dev, "BAR1 msgq marker not initialized.\n");
+               return -EAGAIN;
+       }
+
+       msgq_off = vkread32(vk, BAR_1, VK_BAR1_MSGQ_CTRL_OFF);
+
+       /* each side is always half the total  */
+       num_q = vkread32(vk, BAR_1, VK_BAR1_MSGQ_NR) / 2;
+       if (!num_q || (num_q > VK_MSGQ_PER_CHAN_MAX)) {
+               dev_err(dev,
+                       "Advertised msgq %d error - max %d allowed\n",
+                       num_q, VK_MSGQ_PER_CHAN_MAX);
+               return -EINVAL;
+       }
+
+       vk->to_v_msg_chan.q_nr = num_q;
+       vk->to_h_msg_chan.q_nr = num_q;
+
+       /* first msgq location */
+       msgq = vk->bar[BAR_1] + msgq_off;
+
+       /*
+        * if this function is called when it is already inited,
+        * something is wrong
+        */
+       if (bcm_vk_drv_access_ok(vk) && !force_sync) {
+               dev_err(dev, "Msgq info already in sync\n");
+               return -EPERM;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(chan_list); i++) {
+               chan = chan_list[i];
+               memset(chan->sync_qinfo, 0, sizeof(chan->sync_qinfo));
+
+               for (j = 0; j < num_q; j++) {
+                       struct bcm_vk_sync_qinfo *qinfo;
+                       u32 msgq_start;
+                       u32 msgq_size;
+                       u32 msgq_nxt;
+                       u32 msgq_db_offset, q_db_offset;
+
+                       chan->msgq[j] = msgq;
+                       msgq_start = readl_relaxed(&msgq->start);
+                       msgq_size = readl_relaxed(&msgq->size);
+                       msgq_nxt = readl_relaxed(&msgq->nxt);
+                       msgq_db_offset = readl_relaxed(&msgq->db_offset);
+                       q_db_offset = (msgq_db_offset & ((1 << DB_SHIFT) - 1));
+                       if (q_db_offset  == (~msgq_db_offset >> DB_SHIFT))
+                               msgq_db_offset = q_db_offset;
+                       else
+                               /* fall back to default */
+                               msgq_db_offset = VK_BAR0_Q_DB_BASE(j);
+
+                       dev_info(dev,
+                                "MsgQ[%d] type %d num %d, @ 0x%x, db_offset 0x%x rd_idx %d wr_idx %d, size %d, nxt 0x%x\n",
+                                j,
+                                readw_relaxed(&msgq->type),
+                                readw_relaxed(&msgq->num),
+                                msgq_start,
+                                msgq_db_offset,
+                                readl_relaxed(&msgq->rd_idx),
+                                readl_relaxed(&msgq->wr_idx),
+                                msgq_size,
+                                msgq_nxt);
+
+                       qinfo = &chan->sync_qinfo[j];
+                       /* formulate and record static info */
+                       qinfo->q_start = vk->bar[BAR_1] + msgq_start;
+                       qinfo->q_size = msgq_size;
+                       /* set low threshold as 50% or 1/2 */
+                       qinfo->q_low = qinfo->q_size >> 1;
+                       qinfo->q_mask = qinfo->q_size - 1;
+                       qinfo->q_db_offset = msgq_db_offset;
+
+                       msgq++;
+               }
+       }
+       atomic_set(&vk->msgq_inited, 1);
+
+       return ret;
+}
+
+static int bcm_vk_msg_chan_init(struct bcm_vk_msg_chan *chan)
+{
+       u32 i;
+
+       mutex_init(&chan->msgq_mutex);
+       spin_lock_init(&chan->pendq_lock);
+       for (i = 0; i < VK_MSGQ_MAX_NR; i++)
+               INIT_LIST_HEAD(&chan->pendq[i]);
+
+       return 0;
+}
+
+static void bcm_vk_append_pendq(struct bcm_vk_msg_chan *chan, u16 q_num,
+                               struct bcm_vk_wkent *entry)
+{
+       struct bcm_vk_ctx *ctx;
+
+       spin_lock(&chan->pendq_lock);
+       list_add_tail(&entry->node, &chan->pendq[q_num]);
+       if (entry->to_h_msg) {
+               ctx = entry->ctx;
+               atomic_inc(&ctx->pend_cnt);
+               wake_up_interruptible(&ctx->rd_wq);
+       }
+       spin_unlock(&chan->pendq_lock);
+}
+
+static u32 bcm_vk_append_ib_sgl(struct bcm_vk *vk,
+                               struct bcm_vk_wkent *entry,
+                               struct _vk_data *data,
+                               unsigned int num_planes)
+{
+       unsigned int i;
+       unsigned int item_cnt = 0;
+       struct device *dev = &vk->pdev->dev;
+       struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+       struct vk_msg_blk *msg = &entry->to_v_msg[0];
+       struct bcm_vk_msgq __iomem *msgq;
+       struct bcm_vk_sync_qinfo *qinfo;
+       u32 ib_sgl_size = 0;
+       u8 *buf = (u8 *)&entry->to_v_msg[entry->to_v_blks];
+       u32 avail;
+       u32 q_num;
+
+       /* check if high watermark is hit, and if so, skip */
+       q_num = get_q_num(msg);
+       msgq = chan->msgq[q_num];
+       qinfo = &chan->sync_qinfo[q_num];
+       avail = msgq_avail_space(msgq, qinfo);
+       if (avail < qinfo->q_low) {
+               dev_dbg(dev, "Skip inserting inband SGL, [0x%x/0x%x]\n",
+                       avail, qinfo->q_size);
+               return 0;
+       }
+
+       for (i = 0; i < num_planes; i++) {
+               if (data[i].address &&
+                   (ib_sgl_size + data[i].size) <= vk->ib_sgl_size) {
+                       item_cnt++;
+                       memcpy(buf, entry->dma[i].sglist, data[i].size);
+                       ib_sgl_size += data[i].size;
+                       buf += data[i].size;
+               }
+       }
+
+       dev_dbg(dev, "Num %u sgl items appended, size 0x%x, room 0x%x\n",
+               item_cnt, ib_sgl_size, vk->ib_sgl_size);
+
+       /* round up size */
+       ib_sgl_size = (ib_sgl_size + VK_MSGQ_BLK_SIZE - 1)
+                      >> VK_MSGQ_BLK_SZ_SHIFT;
+
+       return ib_sgl_size;
+}
+
+void bcm_to_v_q_doorbell(struct bcm_vk *vk, u32 q_num, u32 db_val)
+{
+       struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+       struct bcm_vk_sync_qinfo *qinfo = &chan->sync_qinfo[q_num];
+
+       vkwrite32(vk, db_val, BAR_0, qinfo->q_db_offset);
+}
+
+static int bcm_to_v_msg_enqueue(struct bcm_vk *vk, struct bcm_vk_wkent *entry)
+{
+       static u32 seq_num;
+       struct bcm_vk_msg_chan *chan = &vk->to_v_msg_chan;
+       struct device *dev = &vk->pdev->dev;
+       struct vk_msg_blk *src = &entry->to_v_msg[0];
+
+       struct vk_msg_blk __iomem *dst;
+       struct bcm_vk_msgq __iomem *msgq;
+       struct bcm_vk_sync_qinfo *qinfo;
+       u32 q_num = get_q_num(src);
+       u32 wr_idx; /* local copy */
+       u32 i;
+       u32 avail;
+       u32 retry;
+
+       if (entry->to_v_blks != src->size + 1) {
+               dev_err(dev, "number of blks %d not matching %d MsgId[0x%x]: func %d ctx 0x%x\n",
+                       entry->to_v_blks,
+                       src->size + 1,
+                       get_msg_id(src),
+                       src->function_id,
+                       src->context_id);
+               return -EMSGSIZE;
+       }
+
+       msgq = chan->msgq[q_num];
+       qinfo = &chan->sync_qinfo[q_num];
+
+       mutex_lock(&chan->msgq_mutex);
+
+       avail = msgq_avail_space(msgq, qinfo);
+
+       /* if not enough space, return EAGAIN and let app handles it */
+       retry = 0;
+       while ((avail < entry->to_v_blks) &&
+              (retry++ < BCM_VK_H2VK_ENQ_RETRY)) {
+               mutex_unlock(&chan->msgq_mutex);
+
+               msleep(BCM_VK_H2VK_ENQ_RETRY_DELAY_MS);
+               mutex_lock(&chan->msgq_mutex);
+               avail = msgq_avail_space(msgq, qinfo);
+       }
+       if (retry > BCM_VK_H2VK_ENQ_RETRY) {
+               mutex_unlock(&chan->msgq_mutex);
+               return -EAGAIN;
+       }
+
+       /* at this point, mutex is taken and there is enough space */
+       entry->seq_num = seq_num++; /* update debug seq number */
+       wr_idx = readl_relaxed(&msgq->wr_idx);
+
+       if (wr_idx >= qinfo->q_size) {
+               dev_crit(dev, "Invalid wr_idx 0x%x => max 0x%x!",
+                        wr_idx, qinfo->q_size);
+               bcm_vk_blk_drv_access(vk);
+               bcm_vk_set_host_alert(vk, ERR_LOG_HOST_PCIE_DWN);
+               goto idx_err;
+       }
+
+       dst = msgq_blk_addr(qinfo, wr_idx);
+       for (i = 0; i < entry->to_v_blks; i++) {
+               memcpy_toio(dst, src, sizeof(*dst));
+
+               src++;
+               wr_idx = msgq_inc(qinfo, wr_idx, 1);
+               dst = msgq_blk_addr(qinfo, wr_idx);
+       }
+
+       /* flush the write pointer */
+       writel(wr_idx, &msgq->wr_idx);
+
+       /* log new info for debugging */
+       dev_dbg(dev,
+               "MsgQ[%d] [Rd Wr] = [%d %d] blks inserted %d - Q = [u-%d a-%d]/%d\n",
+               readl_relaxed(&msgq->num),
+               readl_relaxed(&msgq->rd_idx),
+               wr_idx,
+               entry->to_v_blks,
+               msgq_occupied(msgq, qinfo),
+               msgq_avail_space(msgq, qinfo),
+               readl_relaxed(&msgq->size));
+       /*
+        * press door bell based on queue number. 1 is added to the wr_idx
+        * to avoid the value of 0 appearing on the VK side to distinguish
+        * from initial value.
+        */
+       bcm_to_v_q_doorbell(vk, q_num, wr_idx + 1);
+idx_err:
+       mutex_unlock(&chan->msgq_mutex);
+       return 0;
+}
+
+int bcm_vk_send_shutdown_msg(struct bcm_vk *vk, u32 shut_type,
+                            const pid_t pid, const u32 q_num)
+{
+       int rc = 0;
+       struct bcm_vk_wkent *entry;
+       struct device *dev = &vk->pdev->dev;
+
+       /*
+        * check if the marker is still good.  Sometimes, the PCIe interface may
+        * have gone done, and if so and we ship down thing based on broken
+        * values, kernel may panic.
+        */
+       if (!bcm_vk_msgq_marker_valid(vk)) {
+               dev_info(dev, "PCIe comm chan - invalid marker (0x%x)!\n",
+                        vkread32(vk, BAR_1, VK_BAR1_MSGQ_DEF_RDY));
+               return -EINVAL;
+       }
+
+       entry = kzalloc(sizeof(*entry) +
+                       sizeof(struct vk_msg_blk), GFP_KERNEL);
+       if (!entry)
+               return -ENOMEM;
+
+       /* fill up necessary data */
+       entry->to_v_msg[0].function_id = VK_FID_SHUTDOWN;
+       set_q_num(&entry->to_v_msg[0], q_num);
+       set_msg_id(&entry->to_v_msg[0], VK_SIMPLEX_MSG_ID);
+       entry->to_v_blks = 1; /* always 1 block */
+
+       entry->to_v_msg[0].cmd = shut_type;
+       entry->to_v_msg[0].arg = pid;
+
+       rc = bcm_to_v_msg_enqueue(vk, entry);
+       if (rc)
+               dev_err(dev,
+                       "Sending shutdown message to q %d for pid %d fails.\n",
+                       get_q_num(&entry->to_v_msg[0]), pid);
+
+       kfree(entry);
+
+       return rc;
+}
+
+static int bcm_vk_handle_last_sess(struct bcm_vk *vk, const pid_t pid,
+                                  const u32 q_num)
+{
+       int rc = 0;
+       struct device *dev = &vk->pdev->dev;
+
+       /*
+        * don't send down or do anything if message queue is not initialized
+        * and if it is the reset session, clear it.
+        */
+       if (!bcm_vk_drv_access_ok(vk)) {
+               if (vk->reset_pid == pid)
+                       vk->reset_pid = 0;
+               return -EPERM;
+       }
+
+       dev_dbg(dev, "No more sessions, shut down pid %d\n", pid);
+
+       /* only need to do it if it is not the reset process */
+       if (vk->reset_pid != pid)
+               rc = bcm_vk_send_shutdown_msg(vk, VK_SHUTDOWN_PID, pid, q_num);
+       else
+               /* put reset_pid to 0 if it is exiting last session */
+               vk->reset_pid = 0;
+
+       return rc;
+}
+
+static struct bcm_vk_wkent *bcm_vk_dequeue_pending(struct bcm_vk *vk,
+                                                  struct bcm_vk_msg_chan *chan,
+                                                  u16 q_num,
+                                                  u16 msg_id)
+{
+       bool found = false;
+       struct bcm_vk_wkent *entry;
+
+       spin_lock(&chan->pendq_lock);
+       list_for_each_entry(entry, &chan->pendq[q_num], node) {
+               if (get_msg_id(&entry->to_v_msg[0]) == msg_id) {
+                       list_del(&entry->node);
+                       found = true;
+                       bcm_vk_msgid_bitmap_clear(vk, msg_id, 1);
+                       break;
+               }
+       }
+       spin_unlock(&chan->pendq_lock);
+       return ((found) ? entry : NULL);
+}
+
+s32 bcm_to_h_msg_dequeue(struct bcm_vk *vk)
+{
+       struct device *dev = &vk->pdev->dev;
+       struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
+       struct vk_msg_blk *data;
+       struct vk_msg_blk __iomem *src;
+       struct vk_msg_blk *dst;
+       struct bcm_vk_msgq __iomem *msgq;
+       struct bcm_vk_sync_qinfo *qinfo;
+       struct bcm_vk_wkent *entry;
+       u32 rd_idx, wr_idx;
+       u32 q_num, msg_id, j;
+       u32 num_blks;
+       s32 total = 0;
+       int cnt = 0;
+       int msg_processed = 0;
+       int max_msg_to_process;
+       bool exit_loop;
+
+       /*
+        * drain all the messages from the queues, and find its pending
+        * entry in the to_v queue, based on msg_id & q_num, and move the
+        * entry to the to_h pending queue, waiting for user space
+        * program to extract
+        */
+       mutex_lock(&chan->msgq_mutex);
+
+       for (q_num = 0; q_num < chan->q_nr; q_num++) {
+               msgq = chan->msgq[q_num];
+               qinfo = &chan->sync_qinfo[q_num];
+               max_msg_to_process = BCM_VK_MSG_PROC_MAX_LOOP * qinfo->q_size;
+
+               rd_idx = readl_relaxed(&msgq->rd_idx);
+               wr_idx = readl_relaxed(&msgq->wr_idx);
+               msg_processed = 0;
+               exit_loop = false;
+               while ((rd_idx != wr_idx) && !exit_loop) {
+                       u8 src_size;
+
+                       /*
+                        * Make a local copy and get pointer to src blk
+                        * The rd_idx is masked before getting the pointer to
+                        * avoid out of bound access in case the interface goes
+                        * down.  It will end up pointing to the last block in
+                        * the buffer, but subsequent src->size check would be
+                        * able to catch this.
+                        */
+                       src = msgq_blk_addr(qinfo, rd_idx & qinfo->q_mask);
+                       src_size = readb(&src->size);
+
+                       if ((rd_idx >= qinfo->q_size) ||
+                           (src_size > (qinfo->q_size - 1))) {
+                               dev_crit(dev,
+                                        "Invalid rd_idx 0x%x or size 0x%x => max 0x%x!",
+                                        rd_idx, src_size, qinfo->q_size);
+                               bcm_vk_blk_drv_access(vk);
+                               bcm_vk_set_host_alert(vk,
+                                                     ERR_LOG_HOST_PCIE_DWN);
+                               goto idx_err;
+                       }
+
+                       num_blks = src_size + 1;
+                       data = kzalloc(num_blks * VK_MSGQ_BLK_SIZE, GFP_KERNEL);
+                       if (data) {
+                               /* copy messages and linearize it */
+                               dst = data;
+                               for (j = 0; j < num_blks; j++) {
+                                       memcpy_fromio(dst, src, sizeof(*dst));
+
+                                       dst++;
+                                       rd_idx = msgq_inc(qinfo, rd_idx, 1);
+                                       src = msgq_blk_addr(qinfo, rd_idx);
+                               }
+                               total++;
+                       } else {
+                               /*
+                                * if we could not allocate memory in kernel,
+                                * that is fatal.
+                                */
+                               dev_crit(dev, "Kernel mem allocation failure.\n");
+                               total = -ENOMEM;
+                               goto idx_err;
+                       }
+
+                       /* flush rd pointer after a message is dequeued */
+                       writel(rd_idx, &msgq->rd_idx);
+
+                       /* log new info for debugging */
+                       dev_dbg(dev,
+                               "MsgQ[%d] [Rd Wr] = [%d %d] blks extracted %d - Q = [u-%d a-%d]/%d\n",
+                               readl_relaxed(&msgq->num),
+                               rd_idx,
+                               wr_idx,
+                               num_blks,
+                               msgq_occupied(msgq, qinfo),
+                               msgq_avail_space(msgq, qinfo),
+                               readl_relaxed(&msgq->size));
+
+                       /*
+                        * No need to search if it is an autonomous one-way
+                        * message from driver, as these messages do not bear
+                        * a to_v pending item. Currently, only the shutdown
+                        * message falls into this category.
+                        */
+                       if (data->function_id == VK_FID_SHUTDOWN) {
+                               kfree(data);
+                               continue;
+                       }
+
+                       msg_id = get_msg_id(data);
+                       /* lookup original message in to_v direction */
+                       entry = bcm_vk_dequeue_pending(vk,
+                                                      &vk->to_v_msg_chan,
+                                                      q_num,
+                                                      msg_id);
+
+                       /*
+                        * if there is message to does not have prior send,
+                        * this is the location to add here
+                        */
+                       if (entry) {
+                               entry->to_h_blks = num_blks;
+                               entry->to_h_msg = data;
+                               bcm_vk_append_pendq(&vk->to_h_msg_chan,
+                                                   q_num, entry);
+
+                       } else {
+                               if (cnt++ < batch_log)
+                                       dev_info(dev,
+                                                "Could not find MsgId[0x%x] for resp func %d bmap %d\n",
+                                                msg_id, data->function_id,
+                                                test_bit(msg_id, vk->bmap));
+                               kfree(data);
+                       }
+                       /* Fetch wr_idx to handle more back-to-back events */
+                       wr_idx = readl(&msgq->wr_idx);
+
+                       /*
+                        * cap the max so that even we try to handle more back-to-back events,
+                        * so that it won't hold CPU too long or in case rd/wr idexes are
+                        * corrupted which triggers infinite looping.
+                        */
+                       if (++msg_processed >= max_msg_to_process) {
+                               dev_warn(dev, "Q[%d] Per loop processing exceeds %d\n",
+                                        q_num, max_msg_to_process);
+                               exit_loop = true;
+                       }
+               }
+       }
+idx_err:
+       mutex_unlock(&chan->msgq_mutex);
+       dev_dbg(dev, "total %d drained from queues\n", total);
+
+       return total;
+}
+
+/*
+ * init routine for all required data structures
+ */
+static int bcm_vk_data_init(struct bcm_vk *vk)
+{
+       int i;
+
+       spin_lock_init(&vk->ctx_lock);
+       for (i = 0; i < ARRAY_SIZE(vk->ctx); i++) {
+               vk->ctx[i].in_use = false;
+               vk->ctx[i].idx = i;     /* self identity */
+               vk->ctx[i].miscdev = NULL;
+       }
+       spin_lock_init(&vk->msg_id_lock);
+       spin_lock_init(&vk->host_alert_lock);
+       vk->msg_id = 0;
+
+       /* initialize hash table */
+       for (i = 0; i < VK_PID_HT_SZ; i++)
+               INIT_LIST_HEAD(&vk->pid_ht[i].head);
+
+       return 0;
+}
+
+irqreturn_t bcm_vk_msgq_irqhandler(int irq, void *dev_id)
+{
+       struct bcm_vk *vk = dev_id;
+
+       if (!bcm_vk_drv_access_ok(vk)) {
+               dev_err(&vk->pdev->dev,
+                       "Interrupt %d received when msgq not inited\n", irq);
+               goto skip_schedule_work;
+       }
+
+       queue_work(vk->wq_thread, &vk->wq_work);
+
+skip_schedule_work:
+       return IRQ_HANDLED;
+}
+
+int bcm_vk_open(struct inode *inode, struct file *p_file)
+{
+       struct bcm_vk_ctx *ctx;
+       struct miscdevice *miscdev = (struct miscdevice *)p_file->private_data;
+       struct bcm_vk *vk = container_of(miscdev, struct bcm_vk, miscdev);
+       struct device *dev = &vk->pdev->dev;
+       int rc = 0;
+
+       /* get a context and set it up for file */
+       ctx = bcm_vk_get_ctx(vk, task_tgid_nr(current));
+       if (!ctx) {
+               dev_err(dev, "Error allocating context\n");
+               rc = -ENOMEM;
+       } else {
+               /*
+                * set up context and replace private data with context for
+                * other methods to use.  Reason for the context is because
+                * it is allowed for multiple sessions to open the sysfs, and
+                * for each file open, when upper layer query the response,
+                * only those that are tied to a specific open should be
+                * returned.  The context->idx will be used for such binding
+                */
+               ctx->miscdev = miscdev;
+               p_file->private_data = ctx;
+               dev_dbg(dev, "ctx_returned with idx %d, pid %d\n",
+                       ctx->idx, ctx->pid);
+       }
+       return rc;
+}
+
+ssize_t bcm_vk_read(struct file *p_file,
+                   char __user *buf,
+                   size_t count,
+                   loff_t *f_pos)
+{
+       ssize_t rc = -ENOMSG;
+       struct bcm_vk_ctx *ctx = p_file->private_data;
+       struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
+                                        miscdev);
+       struct device *dev = &vk->pdev->dev;
+       struct bcm_vk_msg_chan *chan = &vk->to_h_msg_chan;
+       struct bcm_vk_wkent *entry = NULL;
+       u32 q_num;
+       u32 rsp_length;
+       bool found = false;
+
+       if (!bcm_vk_drv_access_ok(vk))
+               return -EPERM;
+
+       dev_dbg(dev, "Buf count %zu\n", count);
+       found = false;
+
+       /*
+        * search through the pendq on the to_h chan, and return only those
+        * that belongs to the same context.  Search is always from the high to
+        * the low priority queues
+        */
+       spin_lock(&chan->pendq_lock);
+       for (q_num = 0; q_num < chan->q_nr; q_num++) {
+               list_for_each_entry(entry, &chan->pendq[q_num], node) {
+                       if (entry->ctx->idx == ctx->idx) {
+                               if (count >=
+                                   (entry->to_h_blks * VK_MSGQ_BLK_SIZE)) {
+                                       list_del(&entry->node);
+                                       atomic_dec(&ctx->pend_cnt);
+                                       found = true;
+                               } else {
+                                       /* buffer not big enough */
+                                       rc = -EMSGSIZE;
+                               }
+                               goto read_loop_exit;
+                       }
+               }
+       }
+read_loop_exit:
+       spin_unlock(&chan->pendq_lock);
+
+       if (found) {
+               /* retrieve the passed down msg_id */
+               set_msg_id(&entry->to_h_msg[0], entry->usr_msg_id);
+               rsp_length = entry->to_h_blks * VK_MSGQ_BLK_SIZE;
+               if (copy_to_user(buf, entry->to_h_msg, rsp_length) == 0)
+                       rc = rsp_length;
+
+               bcm_vk_free_wkent(dev, entry);
+       } else if (rc == -EMSGSIZE) {
+               struct vk_msg_blk tmp_msg = entry->to_h_msg[0];
+
+               /*
+                * in this case, return just the first block, so
+                * that app knows what size it is looking for.
+                */
+               set_msg_id(&tmp_msg, entry->usr_msg_id);
+               tmp_msg.size = entry->to_h_blks - 1;
+               if (copy_to_user(buf, &tmp_msg, VK_MSGQ_BLK_SIZE) != 0) {
+                       dev_err(dev, "Error return 1st block in -EMSGSIZE\n");
+                       rc = -EFAULT;
+               }
+       }
+       return rc;
+}
+
+ssize_t bcm_vk_write(struct file *p_file,
+                    const char __user *buf,
+                    size_t count,
+                    loff_t *f_pos)
+{
+       ssize_t rc;
+       struct bcm_vk_ctx *ctx = p_file->private_data;
+       struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk,
+                                        miscdev);
+       struct bcm_vk_msgq __iomem *msgq;
+       struct device *dev = &vk->pdev->dev;
+       struct bcm_vk_wkent *entry;
+       u32 sgl_extra_blks;
+       u32 q_num;
+       u32 msg_size;
+       u32 msgq_size;
+
+       if (!bcm_vk_drv_access_ok(vk))
+               return -EPERM;
+
+       dev_dbg(dev, "Msg count %zu\n", count);
+
+       /* first, do sanity check where count should be multiple of basic blk */
+       if (count & (VK_MSGQ_BLK_SIZE - 1)) {
+               dev_err(dev, "Failure with size %zu not multiple of %zu\n",
+                       count, VK_MSGQ_BLK_SIZE);
+               rc = -EINVAL;
+               goto write_err;
+       }
+
+       /* allocate the work entry + buffer for size count and inband sgl */
+       entry = kzalloc(sizeof(*entry) + count + vk->ib_sgl_size,
+                       GFP_KERNEL);
+       if (!entry) {
+               rc = -ENOMEM;
+               goto write_err;
+       }
+
+       /* now copy msg from user space, and then formulate the work entry */
+       if (copy_from_user(&entry->to_v_msg[0], buf, count)) {
+               rc = -EFAULT;
+               goto write_free_ent;
+       }
+
+       entry->to_v_blks = count >> VK_MSGQ_BLK_SZ_SHIFT;
+       entry->ctx = ctx;
+
+       /* do a check on the blk size which could not exceed queue space */
+       q_num = get_q_num(&entry->to_v_msg[0]);
+       msgq = vk->to_v_msg_chan.msgq[q_num];
+       msgq_size = readl_relaxed(&msgq->size);
+       if (entry->to_v_blks + (vk->ib_sgl_size >> VK_MSGQ_BLK_SZ_SHIFT)
+           > (msgq_size - 1)) {
+               dev_err(dev, "Blk size %d exceed max queue size allowed %d\n",
+                       entry->to_v_blks, msgq_size - 1);
+               rc = -EINVAL;
+               goto write_free_ent;
+       }
+
+       /* Use internal message id */
+       entry->usr_msg_id = get_msg_id(&entry->to_v_msg[0]);
+       rc = bcm_vk_get_msg_id(vk);
+       if (rc == VK_MSG_ID_OVERFLOW) {
+               dev_err(dev, "msg_id overflow\n");
+               rc = -EOVERFLOW;
+               goto write_free_ent;
+       }
+       set_msg_id(&entry->to_v_msg[0], rc);
+       ctx->q_num = q_num;
+
+       dev_dbg(dev,
+               "[Q-%d]Message ctx id %d, usr_msg_id 0x%x sent msg_id 0x%x\n",
+               ctx->q_num, ctx->idx, entry->usr_msg_id,
+               get_msg_id(&entry->to_v_msg[0]));
+
+       if (entry->to_v_msg[0].function_id == VK_FID_TRANS_BUF) {
+               /* Convert any pointers to sg list */
+               unsigned int num_planes;
+               int dir;
+               struct _vk_data *data;
+
+               /*
+                * check if we are in reset, if so, no buffer transfer is
+                * allowed and return error.
+                */
+               if (vk->reset_pid) {
+                       dev_dbg(dev, "No Transfer allowed during reset, pid %d.\n",
+                               ctx->pid);
+                       rc = -EACCES;
+                       goto write_free_msgid;
+               }
+
+               num_planes = entry->to_v_msg[0].cmd & VK_CMD_PLANES_MASK;
+               if ((entry->to_v_msg[0].cmd & VK_CMD_MASK) == VK_CMD_DOWNLOAD)
+                       dir = DMA_FROM_DEVICE;
+               else
+                       dir = DMA_TO_DEVICE;
+
+               /* Calculate vk_data location */
+               /* Go to end of the message */
+               msg_size = entry->to_v_msg[0].size;
+               if (msg_size > entry->to_v_blks) {
+                       rc = -EMSGSIZE;
+                       goto write_free_msgid;
+               }
+
+               data = (struct _vk_data *)&entry->to_v_msg[msg_size + 1];
+
+               /* Now back up to the start of the pointers */
+               data -= num_planes;
+
+               /* Convert user addresses to DMA SG List */
+               rc = bcm_vk_sg_alloc(dev, entry->dma, dir, data, num_planes);
+               if (rc)
+                       goto write_free_msgid;
+
+               atomic_inc(&ctx->dma_cnt);
+               /* try to embed inband sgl */
+               sgl_extra_blks = bcm_vk_append_ib_sgl(vk, entry, data,
+                                                     num_planes);
+               entry->to_v_blks += sgl_extra_blks;
+               entry->to_v_msg[0].size += sgl_extra_blks;
+       } else if (entry->to_v_msg[0].function_id == VK_FID_INIT &&
+                  entry->to_v_msg[0].context_id == VK_NEW_CTX) {
+               /*
+                * Init happens in 2 stages, only the first stage contains the
+                * pid that needs translating.
+                */
+               pid_t org_pid, pid;
+
+               /*
+                * translate the pid into the unique host space as user
+                * may run sessions inside containers or process
+                * namespaces.
+                */
+#define VK_MSG_PID_MASK 0xffffff00
+#define VK_MSG_PID_SH   8
+               org_pid = (entry->to_v_msg[0].arg & VK_MSG_PID_MASK)
+                          >> VK_MSG_PID_SH;
+
+               pid = task_tgid_nr(current);
+               entry->to_v_msg[0].arg =
+                       (entry->to_v_msg[0].arg & ~VK_MSG_PID_MASK) |
+                       (pid << VK_MSG_PID_SH);
+               if (org_pid != pid)
+                       dev_dbg(dev, "In PID 0x%x(%d), converted PID 0x%x(%d)\n",
+                               org_pid, org_pid, pid, pid);
+       }
+
+       /*
+        * store work entry to pending queue until a response is received.
+        * This needs to be done before enqueuing the message
+        */
+       bcm_vk_append_pendq(&vk->to_v_msg_chan, q_num, entry);
+
+       rc = bcm_to_v_msg_enqueue(vk, entry);
+       if (rc) {
+               dev_err(dev, "Fail to enqueue msg to to_v queue\n");
+
+               /* remove message from pending list */
+               entry = bcm_vk_dequeue_pending
+                              (vk,
+                               &vk->to_v_msg_chan,
+                               q_num,
+                               get_msg_id(&entry->to_v_msg[0]));
+               goto write_free_ent;
+       }
+
+       return count;
+
+write_free_msgid:
+       bcm_vk_msgid_bitmap_clear(vk, get_msg_id(&entry->to_v_msg[0]), 1);
+write_free_ent:
+       kfree(entry);
+write_err:
+       return rc;
+}
+
+__poll_t bcm_vk_poll(struct file *p_file, struct poll_table_struct *wait)
+{
+       __poll_t ret = 0;
+       int cnt;
+       struct bcm_vk_ctx *ctx = p_file->private_data;
+       struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+       struct device *dev = &vk->pdev->dev;
+
+       poll_wait(p_file, &ctx->rd_wq, wait);
+
+       cnt = atomic_read(&ctx->pend_cnt);
+       if (cnt) {
+               ret = (__force __poll_t)(POLLIN | POLLRDNORM);
+               if (cnt < 0) {
+                       dev_err(dev, "Error cnt %d, setting back to 0", cnt);
+                       atomic_set(&ctx->pend_cnt, 0);
+               }
+       }
+
+       return ret;
+}
+
+int bcm_vk_release(struct inode *inode, struct file *p_file)
+{
+       int ret;
+       struct bcm_vk_ctx *ctx = p_file->private_data;
+       struct bcm_vk *vk = container_of(ctx->miscdev, struct bcm_vk, miscdev);
+       struct device *dev = &vk->pdev->dev;
+       pid_t pid = ctx->pid;
+       int dma_cnt;
+       unsigned long timeout, start_time;
+
+       /*
+        * if there are outstanding DMA transactions, need to delay long enough
+        * to ensure that the card side would have stopped touching the host buffer
+        * and its SGL list.  A race condition could happen if the host app is killed
+        * abruptly, eg kill -9, while some DMA transfer orders are still inflight.
+        * Nothing could be done except for a delay as host side is running in a
+        * completely async fashion.
+        */
+       start_time = jiffies;
+       timeout = start_time + msecs_to_jiffies(BCM_VK_DMA_DRAIN_MAX_MS);
+       do {
+               if (time_after(jiffies, timeout)) {
+                       dev_warn(dev, "%d dma still pending for [fd-%d] pid %d\n",
+                                dma_cnt, ctx->idx, pid);
+                       break;
+               }
+               dma_cnt = atomic_read(&ctx->dma_cnt);
+               cpu_relax();
+               cond_resched();
+       } while (dma_cnt);
+       dev_dbg(dev, "Draining for [fd-%d] pid %d - delay %d ms\n",
+               ctx->idx, pid, jiffies_to_msecs(jiffies - start_time));
+
+       bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, ctx);
+       bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, ctx);
+
+       ret = bcm_vk_free_ctx(vk, ctx);
+       if (ret == 0)
+               ret = bcm_vk_handle_last_sess(vk, pid, ctx->q_num);
+       else
+               ret = 0;
+
+       kref_put(&vk->kref, bcm_vk_release_data);
+
+       return ret;
+}
+
+int bcm_vk_msg_init(struct bcm_vk *vk)
+{
+       struct device *dev = &vk->pdev->dev;
+       int ret;
+
+       if (bcm_vk_data_init(vk)) {
+               dev_err(dev, "Error initializing internal data structures\n");
+               return -EINVAL;
+       }
+
+       if (bcm_vk_msg_chan_init(&vk->to_v_msg_chan) ||
+           bcm_vk_msg_chan_init(&vk->to_h_msg_chan)) {
+               dev_err(dev, "Error initializing communication channel\n");
+               return -EIO;
+       }
+
+       /* read msgq info if ready */
+       ret = bcm_vk_sync_msgq(vk, false);
+       if (ret && (ret != -EAGAIN)) {
+               dev_err(dev, "Error reading comm msg Q info\n");
+               return -EIO;
+       }
+
+       return 0;
+}
+
+void bcm_vk_msg_remove(struct bcm_vk *vk)
+{
+       bcm_vk_blk_drv_access(vk);
+
+       /* drain all pending items */
+       bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_v_msg_chan, NULL);
+       bcm_vk_drain_all_pend(&vk->pdev->dev, &vk->to_h_msg_chan, NULL);
+}
+
diff --git a/drivers/misc/bcm-vk/bcm_vk_msg.h b/drivers/misc/bcm-vk/bcm_vk_msg.h
new file mode 100644 (file)
index 0000000..4eaad84
--- /dev/null
@@ -0,0 +1,163 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_MSG_H
+#define BCM_VK_MSG_H
+
+#include <uapi/linux/misc/bcm_vk.h>
+#include "bcm_vk_sg.h"
+
+/* Single message queue control structure */
+struct bcm_vk_msgq {
+       u16 type;       /* queue type */
+       u16 num;        /* queue number */
+       u32 start;      /* offset in BAR1 where the queue memory starts */
+
+       u32 rd_idx; /* read idx */
+       u32 wr_idx; /* write idx */
+
+       u32 size;       /*
+                        * size, which is in number of 16byte blocks,
+                        * to align with the message data structure.
+                        */
+       u32 nxt;        /*
+                        * nxt offset to the next msg queue struct.
+                        * This is to provide flexibity for alignment purposes.
+                        */
+
+/* Least significant 16 bits in below field hold doorbell register offset */
+#define DB_SHIFT 16
+
+       u32 db_offset; /* queue doorbell register offset in BAR0 */
+
+       u32 rsvd;
+};
+
+/*
+ * Structure to record static info from the msgq sync.  We keep local copy
+ * for some of these variables for both performance + checking purpose.
+ */
+struct bcm_vk_sync_qinfo {
+       void __iomem *q_start;
+       u32 q_size;
+       u32 q_mask;
+       u32 q_low;
+       u32 q_db_offset;
+};
+
+#define VK_MSGQ_MAX_NR 4 /* Maximum number of message queues */
+
+/*
+ * message block - basic unit in the message where a message's size is always
+ *                N x sizeof(basic_block)
+ */
+struct vk_msg_blk {
+       u8 function_id;
+#define VK_FID_TRANS_BUF       5
+#define VK_FID_SHUTDOWN                8
+#define VK_FID_INIT            9
+       u8 size; /* size of the message in number of vk_msg_blk's */
+       u16 trans_id; /* transport id, queue & msg_id */
+       u32 context_id;
+#define VK_NEW_CTX             0
+       u32 cmd;
+#define VK_CMD_PLANES_MASK     0x000f /* number of planes to up/download */
+#define VK_CMD_UPLOAD          0x0400 /* memory transfer to vk */
+#define VK_CMD_DOWNLOAD                0x0500 /* memory transfer from vk */
+#define VK_CMD_MASK            0x0f00 /* command mask */
+       u32 arg;
+};
+
+/* vk_msg_blk is 16 bytes fixed */
+#define VK_MSGQ_BLK_SIZE   (sizeof(struct vk_msg_blk))
+/* shift for fast division of basic msg blk size */
+#define VK_MSGQ_BLK_SZ_SHIFT 4
+
+/* use msg_id 0 for any simplex host2vk communication */
+#define VK_SIMPLEX_MSG_ID 0
+
+/* context per session opening of sysfs */
+struct bcm_vk_ctx {
+       struct list_head node; /* use for linkage in Hash Table */
+       unsigned int idx;
+       bool in_use;
+       pid_t pid;
+       u32 hash_idx;
+       u32 q_num; /* queue number used by the stream */
+       struct miscdevice *miscdev;
+       atomic_t pend_cnt; /* number of items pending to be read from host */
+       atomic_t dma_cnt; /* any dma transaction outstanding */
+       wait_queue_head_t rd_wq;
+};
+
+/* pid hash table entry */
+struct bcm_vk_ht_entry {
+       struct list_head head;
+};
+
+#define VK_DMA_MAX_ADDRS 4 /* Max 4 DMA Addresses */
+/* structure for house keeping a single work entry */
+struct bcm_vk_wkent {
+       struct list_head node; /* for linking purpose */
+       struct bcm_vk_ctx *ctx;
+
+       /* Store up to 4 dma pointers */
+       struct bcm_vk_dma dma[VK_DMA_MAX_ADDRS];
+
+       u32 to_h_blks; /* response */
+       struct vk_msg_blk *to_h_msg;
+
+       /*
+        * put the to_v_msg at the end so that we could simply append to_v msg
+        * to the end of the allocated block
+        */
+       u32 usr_msg_id;
+       u32 to_v_blks;
+       u32 seq_num;
+       struct vk_msg_blk to_v_msg[0];
+};
+
+/* queue stats counters */
+struct bcm_vk_qs_cnts {
+       u32 cnt; /* general counter, used to limit output */
+       u32 acc_sum;
+       u32 max_occ; /* max during a sampling period */
+       u32 max_abs; /* the abs max since reset */
+};
+
+/* control channel structure for either to_v or to_h communication */
+struct bcm_vk_msg_chan {
+       u32 q_nr;
+       /* Mutex to access msgq */
+       struct mutex msgq_mutex;
+       /* pointing to BAR locations */
+       struct bcm_vk_msgq __iomem *msgq[VK_MSGQ_MAX_NR];
+       /* Spinlock to access pending queue */
+       spinlock_t pendq_lock;
+       /* for temporary storing pending items, one for each queue */
+       struct list_head pendq[VK_MSGQ_MAX_NR];
+       /* static queue info from the sync */
+       struct bcm_vk_sync_qinfo sync_qinfo[VK_MSGQ_MAX_NR];
+};
+
+/* totol number of message q allowed by the driver */
+#define VK_MSGQ_PER_CHAN_MAX   3
+#define VK_MSGQ_NUM_DEFAULT    (VK_MSGQ_PER_CHAN_MAX - 1)
+
+/* total number of supported ctx, 32 ctx each for 5 components */
+#define VK_CMPT_CTX_MAX                (32 * 5)
+
+/* hash table defines to store the opened FDs */
+#define VK_PID_HT_SHIFT_BIT    7 /* 128 */
+#define VK_PID_HT_SZ           BIT(VK_PID_HT_SHIFT_BIT)
+
+/* The following are offsets of DDR info provided by the vk card */
+#define VK_BAR0_SEG_SIZE       (4 * SZ_1K) /* segment size for BAR0 */
+
+/* shutdown types supported */
+#define VK_SHUTDOWN_PID                1
+#define VK_SHUTDOWN_GRACEFUL   2
+
+#endif
diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.c b/drivers/misc/bcm-vk/bcm_vk_sg.c
new file mode 100644 (file)
index 0000000..2e9daaf
--- /dev/null
@@ -0,0 +1,275 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+#include <linux/dma-mapping.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/pgtable.h>
+#include <linux/vmalloc.h>
+
+#include <asm/page.h>
+#include <asm/unaligned.h>
+
+#include <uapi/linux/misc/bcm_vk.h>
+
+#include "bcm_vk.h"
+#include "bcm_vk_msg.h"
+#include "bcm_vk_sg.h"
+
+/*
+ * Valkyrie has a hardware limitation of 16M transfer size.
+ * So limit the SGL chunks to 16M.
+ */
+#define BCM_VK_MAX_SGL_CHUNK SZ_16M
+
+static int bcm_vk_dma_alloc(struct device *dev,
+                           struct bcm_vk_dma *dma,
+                           int dir,
+                           struct _vk_data *vkdata);
+static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma);
+
+/* Uncomment to dump SGLIST */
+/* #define BCM_VK_DUMP_SGLIST */
+
+static int bcm_vk_dma_alloc(struct device *dev,
+                           struct bcm_vk_dma *dma,
+                           int direction,
+                           struct _vk_data *vkdata)
+{
+       dma_addr_t addr, sg_addr;
+       int err;
+       int i;
+       int offset;
+       u32 size;
+       u32 remaining_size;
+       u32 transfer_size;
+       u64 data;
+       unsigned long first, last;
+       struct _vk_data *sgdata;
+
+       /* Get 64-bit user address */
+       data = get_unaligned(&vkdata->address);
+
+       /* offset into first page */
+       offset = offset_in_page(data);
+
+       /* Calculate number of pages */
+       first = (data & PAGE_MASK) >> PAGE_SHIFT;
+       last  = ((data + vkdata->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
+       dma->nr_pages = last - first + 1;
+
+       /* Allocate DMA pages */
+       dma->pages = kmalloc_array(dma->nr_pages,
+                                  sizeof(struct page *),
+                                  GFP_KERNEL);
+       if (!dma->pages)
+               return -ENOMEM;
+
+       dev_dbg(dev, "Alloc DMA Pages [0x%llx+0x%x => %d pages]\n",
+               data, vkdata->size, dma->nr_pages);
+
+       dma->direction = direction;
+
+       /* Get user pages into memory */
+       err = get_user_pages_fast(data & PAGE_MASK,
+                                 dma->nr_pages,
+                                 direction == DMA_FROM_DEVICE,
+                                 dma->pages);
+       if (err != dma->nr_pages) {
+               dma->nr_pages = (err >= 0) ? err : 0;
+               dev_err(dev, "get_user_pages_fast, err=%d [%d]\n",
+                       err, dma->nr_pages);
+               return err < 0 ? err : -EINVAL;
+       }
+
+       /* Max size of sg list is 1 per mapped page + fields at start */
+       dma->sglen = (dma->nr_pages * sizeof(*sgdata)) +
+                    (sizeof(u32) * SGLIST_VKDATA_START);
+
+       /* Allocate sglist */
+       dma->sglist = dma_alloc_coherent(dev,
+                                        dma->sglen,
+                                        &dma->handle,
+                                        GFP_KERNEL);
+       if (!dma->sglist)
+               return -ENOMEM;
+
+       dma->sglist[SGLIST_NUM_SG] = 0;
+       dma->sglist[SGLIST_TOTALSIZE] = vkdata->size;
+       remaining_size = vkdata->size;
+       sgdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
+
+       /* Map all pages into DMA */
+       size = min_t(size_t, PAGE_SIZE - offset, remaining_size);
+       remaining_size -= size;
+       sg_addr = dma_map_page(dev,
+                              dma->pages[0],
+                              offset,
+                              size,
+                              dma->direction);
+       transfer_size = size;
+       if (unlikely(dma_mapping_error(dev, sg_addr))) {
+               __free_page(dma->pages[0]);
+               return -EIO;
+       }
+
+       for (i = 1; i < dma->nr_pages; i++) {
+               size = min_t(size_t, PAGE_SIZE, remaining_size);
+               remaining_size -= size;
+               addr = dma_map_page(dev,
+                                   dma->pages[i],
+                                   0,
+                                   size,
+                                   dma->direction);
+               if (unlikely(dma_mapping_error(dev, addr))) {
+                       __free_page(dma->pages[i]);
+                       return -EIO;
+               }
+
+               /*
+                * Compress SG list entry when pages are contiguous
+                * and transfer size less or equal to BCM_VK_MAX_SGL_CHUNK
+                */
+               if ((addr == (sg_addr + transfer_size)) &&
+                   ((transfer_size + size) <= BCM_VK_MAX_SGL_CHUNK)) {
+                       /* pages are contiguous, add to same sg entry */
+                       transfer_size += size;
+               } else {
+                       /* pages are not contiguous, write sg entry */
+                       sgdata->size = transfer_size;
+                       put_unaligned(sg_addr, (u64 *)&sgdata->address);
+                       dma->sglist[SGLIST_NUM_SG]++;
+
+                       /* start new sg entry */
+                       sgdata++;
+                       sg_addr = addr;
+                       transfer_size = size;
+               }
+       }
+       /* Write last sg list entry */
+       sgdata->size = transfer_size;
+       put_unaligned(sg_addr, (u64 *)&sgdata->address);
+       dma->sglist[SGLIST_NUM_SG]++;
+
+       /* Update pointers and size field to point to sglist */
+       put_unaligned((u64)dma->handle, &vkdata->address);
+       vkdata->size = (dma->sglist[SGLIST_NUM_SG] * sizeof(*sgdata)) +
+                      (sizeof(u32) * SGLIST_VKDATA_START);
+
+#ifdef BCM_VK_DUMP_SGLIST
+       dev_dbg(dev,
+               "sgl 0x%llx handle 0x%llx, sglen: 0x%x sgsize: 0x%x\n",
+               (u64)dma->sglist,
+               dma->handle,
+               dma->sglen,
+               vkdata->size);
+       for (i = 0; i < vkdata->size / sizeof(u32); i++)
+               dev_dbg(dev, "i:0x%x 0x%x\n", i, dma->sglist[i]);
+#endif
+
+       return 0;
+}
+
+int bcm_vk_sg_alloc(struct device *dev,
+                   struct bcm_vk_dma *dma,
+                   int dir,
+                   struct _vk_data *vkdata,
+                   int num)
+{
+       int i;
+       int rc = -EINVAL;
+
+       /* Convert user addresses to DMA SG List */
+       for (i = 0; i < num; i++) {
+               if (vkdata[i].size && vkdata[i].address) {
+                       /*
+                        * If both size and address are non-zero
+                        * then DMA alloc.
+                        */
+                       rc = bcm_vk_dma_alloc(dev,
+                                             &dma[i],
+                                             dir,
+                                             &vkdata[i]);
+               } else if (vkdata[i].size ||
+                          vkdata[i].address) {
+                       /*
+                        * If one of size and address are zero
+                        * there is a problem.
+                        */
+                       dev_err(dev,
+                               "Invalid vkdata %x 0x%x 0x%llx\n",
+                               i, vkdata[i].size, vkdata[i].address);
+                       rc = -EINVAL;
+               } else {
+                       /*
+                        * If size and address are both zero
+                        * don't convert, but return success.
+                        */
+                       rc = 0;
+               }
+
+               if (rc)
+                       goto fail_alloc;
+       }
+       return rc;
+
+fail_alloc:
+       while (i > 0) {
+               i--;
+               if (dma[i].sglist)
+                       bcm_vk_dma_free(dev, &dma[i]);
+       }
+       return rc;
+}
+
+static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma)
+{
+       dma_addr_t addr;
+       int i;
+       int num_sg;
+       u32 size;
+       struct _vk_data *vkdata;
+
+       dev_dbg(dev, "free sglist=%p sglen=0x%x\n", dma->sglist, dma->sglen);
+
+       /* Unmap all pages in the sglist */
+       num_sg = dma->sglist[SGLIST_NUM_SG];
+       vkdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
+       for (i = 0; i < num_sg; i++) {
+               size = vkdata[i].size;
+               addr = get_unaligned(&vkdata[i].address);
+
+               dma_unmap_page(dev, addr, size, dma->direction);
+       }
+
+       /* Free allocated sglist */
+       dma_free_coherent(dev, dma->sglen, dma->sglist, dma->handle);
+
+       /* Release lock on all pages */
+       for (i = 0; i < dma->nr_pages; i++)
+               put_page(dma->pages[i]);
+
+       /* Free allocated dma pages */
+       kfree(dma->pages);
+       dma->sglist = NULL;
+
+       return 0;
+}
+
+int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
+                  int *proc_cnt)
+{
+       int i;
+
+       *proc_cnt = 0;
+       /* Unmap and free all pages and sglists */
+       for (i = 0; i < num; i++) {
+               if (dma[i].sglist) {
+                       bcm_vk_dma_free(dev, &dma[i]);
+                       *proc_cnt += 1;
+               }
+       }
+
+       return 0;
+}
diff --git a/drivers/misc/bcm-vk/bcm_vk_sg.h b/drivers/misc/bcm-vk/bcm_vk_sg.h
new file mode 100644 (file)
index 0000000..81b3d09
--- /dev/null
@@ -0,0 +1,61 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef BCM_VK_SG_H
+#define BCM_VK_SG_H
+
+#include <linux/dma-mapping.h>
+
+struct bcm_vk_dma {
+       /* for userland buffer */
+       struct page **pages;
+       int nr_pages;
+
+       /* common */
+       dma_addr_t handle;
+       /*
+        * sglist is of the following LE format
+        * [U32] num_sg  = number of sg addresses (N)
+        * [U32] totalsize = totalsize of data being transferred in sglist
+        * [U32] size[0] = size of data in address0
+        * [U32] addr_l[0] = lower 32-bits of address0
+        * [U32] addr_h[0] = higher 32-bits of address0
+        * ..
+        * [U32] size[N-1] = size of data in addressN-1
+        * [U32] addr_l[N-1] = lower 32-bits of addressN-1
+        * [U32] addr_h[N-1] = higher 32-bits of addressN-1
+        */
+       u32 *sglist;
+#define SGLIST_NUM_SG          0
+#define SGLIST_TOTALSIZE       1
+#define SGLIST_VKDATA_START    2
+
+       int sglen; /* Length (bytes) of sglist */
+       int direction;
+};
+
+struct _vk_data {
+       u32 size;    /* data size in bytes */
+       u64 address; /* Pointer to data     */
+} __packed;
+
+/*
+ * Scatter-gather DMA buffer API.
+ *
+ * These functions provide a simple way to create a page list and a
+ * scatter-gather list from userspace address and map the memory
+ * for DMA operation.
+ */
+int bcm_vk_sg_alloc(struct device *dev,
+                   struct bcm_vk_dma *dma,
+                   int dir,
+                   struct _vk_data *vkdata,
+                   int num);
+
+int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
+                  int *proc_cnt);
+
+#endif
+
diff --git a/drivers/misc/bcm-vk/bcm_vk_tty.c b/drivers/misc/bcm-vk/bcm_vk_tty.c
new file mode 100644 (file)
index 0000000..4d02692
--- /dev/null
@@ -0,0 +1,339 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#include <linux/tty.h>
+#include <linux/tty_driver.h>
+#include <linux/tty_flip.h>
+
+#include "bcm_vk.h"
+
+/* TTYVK base offset is 0x30000 into BAR1 */
+#define BAR1_TTYVK_BASE_OFFSET 0x300000
+/* Each TTYVK channel (TO or FROM) is 0x10000 */
+#define BAR1_TTYVK_CHAN_OFFSET 0x100000
+/* Each TTYVK channel has TO and FROM, hence the * 2 */
+#define BAR1_TTYVK_BASE(index) (BAR1_TTYVK_BASE_OFFSET + \
+                                ((index) * BAR1_TTYVK_CHAN_OFFSET * 2))
+/* TO TTYVK channel base comes before FROM for each index */
+#define TO_TTYK_BASE(index)    BAR1_TTYVK_BASE(index)
+#define FROM_TTYK_BASE(index)  (BAR1_TTYVK_BASE(index) + \
+                                BAR1_TTYVK_CHAN_OFFSET)
+
+struct bcm_vk_tty_chan {
+       u32 reserved;
+       u32 size;
+       u32 wr;
+       u32 rd;
+       u32 *data;
+};
+
+#define VK_BAR_CHAN(v, DIR, e) ((v)->DIR##_offset \
+                                + offsetof(struct bcm_vk_tty_chan, e))
+#define VK_BAR_CHAN_SIZE(v, DIR)       VK_BAR_CHAN(v, DIR, size)
+#define VK_BAR_CHAN_WR(v, DIR)         VK_BAR_CHAN(v, DIR, wr)
+#define VK_BAR_CHAN_RD(v, DIR)         VK_BAR_CHAN(v, DIR, rd)
+#define VK_BAR_CHAN_DATA(v, DIR, off)  (VK_BAR_CHAN(v, DIR, data) + (off))
+
+#define VK_BAR0_REGSEG_TTY_DB_OFFSET   0x86c
+
+/* Poll every 1/10 of second - temp hack till we use MSI interrupt */
+#define SERIAL_TIMER_VALUE (HZ / 10)
+
+static void bcm_vk_tty_poll(struct timer_list *t)
+{
+       struct bcm_vk *vk = from_timer(vk, t, serial_timer);
+
+       queue_work(vk->tty_wq_thread, &vk->tty_wq_work);
+       mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
+}
+
+irqreturn_t bcm_vk_tty_irqhandler(int irq, void *dev_id)
+{
+       struct bcm_vk *vk = dev_id;
+
+       queue_work(vk->tty_wq_thread, &vk->tty_wq_work);
+
+       return IRQ_HANDLED;
+}
+
+static void bcm_vk_tty_wq_handler(struct work_struct *work)
+{
+       struct bcm_vk *vk = container_of(work, struct bcm_vk, tty_wq_work);
+       struct bcm_vk_tty *vktty;
+       int card_status;
+       int count;
+       unsigned char c;
+       int i;
+       int wr;
+
+       card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
+       if (BCM_VK_INTF_IS_DOWN(card_status))
+               return;
+
+       for (i = 0; i < BCM_VK_NUM_TTY; i++) {
+               count = 0;
+               /* Check the card status that the tty channel is ready */
+               if ((card_status & BIT(i)) == 0)
+                       continue;
+
+               vktty = &vk->tty[i];
+
+               /* Don't increment read index if tty app is closed */
+               if (!vktty->is_opened)
+                       continue;
+
+               /* Fetch the wr offset in buffer from VK */
+               wr = vkread32(vk, BAR_1, VK_BAR_CHAN_WR(vktty, from));
+
+               /* safe to ignore until bar read gives proper size */
+               if (vktty->from_size == 0)
+                       continue;
+
+               if (wr >= vktty->from_size) {
+                       dev_err(&vk->pdev->dev,
+                               "ERROR: wq handler ttyVK%d wr:0x%x > 0x%x\n",
+                               i, wr, vktty->from_size);
+                       /* Need to signal and close device in this case */
+                       continue;
+               }
+
+               /*
+                * Simple read of circular buffer and
+                * insert into tty flip buffer
+                */
+               while (vk->tty[i].rd != wr) {
+                       c = vkread8(vk, BAR_1,
+                                   VK_BAR_CHAN_DATA(vktty, from, vktty->rd));
+                       vktty->rd++;
+                       if (vktty->rd >= vktty->from_size)
+                               vktty->rd = 0;
+                       tty_insert_flip_char(&vktty->port, c, TTY_NORMAL);
+                       count++;
+               }
+
+               if (count) {
+                       tty_flip_buffer_push(&vktty->port);
+
+                       /* Update read offset from shadow register to card */
+                       vkwrite32(vk, vktty->rd, BAR_1,
+                                 VK_BAR_CHAN_RD(vktty, from));
+               }
+       }
+}
+
+static int bcm_vk_tty_open(struct tty_struct *tty, struct file *file)
+{
+       int card_status;
+       struct bcm_vk *vk;
+       struct bcm_vk_tty *vktty;
+       int index;
+
+       /* initialize the pointer in case something fails */
+       tty->driver_data = NULL;
+
+       vk = (struct bcm_vk *)dev_get_drvdata(tty->dev);
+       index = tty->index;
+
+       if (index >= BCM_VK_NUM_TTY)
+               return -EINVAL;
+
+       vktty = &vk->tty[index];
+
+       vktty->pid = task_pid_nr(current);
+       vktty->to_offset = TO_TTYK_BASE(index);
+       vktty->from_offset = FROM_TTYK_BASE(index);
+
+       /* Do not allow tty device to be opened if tty on card not ready */
+       card_status = vkread32(vk, BAR_0, BAR_CARD_STATUS);
+       if (BCM_VK_INTF_IS_DOWN(card_status) || ((card_status & BIT(index)) == 0))
+               return -EBUSY;
+
+       /*
+        * Get shadow registers of the buffer sizes and the "to" write offset
+        * and "from" read offset
+        */
+       vktty->to_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, to));
+       vktty->wr = vkread32(vk, BAR_1,  VK_BAR_CHAN_WR(vktty, to));
+       vktty->from_size = vkread32(vk, BAR_1, VK_BAR_CHAN_SIZE(vktty, from));
+       vktty->rd = vkread32(vk, BAR_1,  VK_BAR_CHAN_RD(vktty, from));
+       vktty->is_opened = true;
+
+       if (tty->count == 1 && !vktty->irq_enabled) {
+               timer_setup(&vk->serial_timer, bcm_vk_tty_poll, 0);
+               mod_timer(&vk->serial_timer, jiffies + SERIAL_TIMER_VALUE);
+       }
+       return 0;
+}
+
+static void bcm_vk_tty_close(struct tty_struct *tty, struct file *file)
+{
+       struct bcm_vk *vk = dev_get_drvdata(tty->dev);
+
+       if (tty->index >= BCM_VK_NUM_TTY)
+               return;
+
+       vk->tty[tty->index].is_opened = false;
+
+       if (tty->count == 1)
+               del_timer_sync(&vk->serial_timer);
+}
+
+static void bcm_vk_tty_doorbell(struct bcm_vk *vk, u32 db_val)
+{
+       vkwrite32(vk, db_val, BAR_0,
+                 VK_BAR0_REGSEG_DB_BASE + VK_BAR0_REGSEG_TTY_DB_OFFSET);
+}
+
+static int bcm_vk_tty_write(struct tty_struct *tty,
+                           const unsigned char *buffer,
+                           int count)
+{
+       int index;
+       struct bcm_vk *vk;
+       struct bcm_vk_tty *vktty;
+       int i;
+
+       index = tty->index;
+       vk = dev_get_drvdata(tty->dev);
+       vktty = &vk->tty[index];
+
+       /* Simple write each byte to circular buffer */
+       for (i = 0; i < count; i++) {
+               vkwrite8(vk, buffer[i], BAR_1,
+                        VK_BAR_CHAN_DATA(vktty, to, vktty->wr));
+               vktty->wr++;
+               if (vktty->wr >= vktty->to_size)
+                       vktty->wr = 0;
+       }
+       /* Update write offset from shadow register to card */
+       vkwrite32(vk, vktty->wr, BAR_1, VK_BAR_CHAN_WR(vktty, to));
+       bcm_vk_tty_doorbell(vk, 0);
+
+       return count;
+}
+
+static int bcm_vk_tty_write_room(struct tty_struct *tty)
+{
+       struct bcm_vk *vk = dev_get_drvdata(tty->dev);
+
+       return vk->tty[tty->index].to_size - 1;
+}
+
+static const struct tty_operations serial_ops = {
+       .open = bcm_vk_tty_open,
+       .close = bcm_vk_tty_close,
+       .write = bcm_vk_tty_write,
+       .write_room = bcm_vk_tty_write_room,
+};
+
+int bcm_vk_tty_init(struct bcm_vk *vk, char *name)
+{
+       int i;
+       int err;
+       struct tty_driver *tty_drv;
+       struct device *dev = &vk->pdev->dev;
+
+       tty_drv = tty_alloc_driver
+                               (BCM_VK_NUM_TTY,
+                                TTY_DRIVER_REAL_RAW | TTY_DRIVER_DYNAMIC_DEV);
+       if (IS_ERR(tty_drv))
+               return PTR_ERR(tty_drv);
+
+       /* Save struct tty_driver for uninstalling the device */
+       vk->tty_drv = tty_drv;
+
+       /* initialize the tty driver */
+       tty_drv->driver_name = KBUILD_MODNAME;
+       tty_drv->name = kstrdup(name, GFP_KERNEL);
+       if (!tty_drv->name) {
+               err = -ENOMEM;
+               goto err_put_tty_driver;
+       }
+       tty_drv->type = TTY_DRIVER_TYPE_SERIAL;
+       tty_drv->subtype = SERIAL_TYPE_NORMAL;
+       tty_drv->init_termios = tty_std_termios;
+       tty_set_operations(tty_drv, &serial_ops);
+
+       /* register the tty driver */
+       err = tty_register_driver(tty_drv);
+       if (err) {
+               dev_err(dev, "tty_register_driver failed\n");
+               goto err_kfree_tty_name;
+       }
+
+       for (i = 0; i < BCM_VK_NUM_TTY; i++) {
+               struct device *tty_dev;
+
+               tty_port_init(&vk->tty[i].port);
+               tty_dev = tty_port_register_device(&vk->tty[i].port, tty_drv,
+                                                  i, dev);
+               if (IS_ERR(tty_dev)) {
+                       err = PTR_ERR(tty_dev);
+                       goto unwind;
+               }
+               dev_set_drvdata(tty_dev, vk);
+               vk->tty[i].is_opened = false;
+       }
+
+       INIT_WORK(&vk->tty_wq_work, bcm_vk_tty_wq_handler);
+       vk->tty_wq_thread = create_singlethread_workqueue("tty");
+       if (!vk->tty_wq_thread) {
+               dev_err(dev, "Fail to create tty workqueue thread\n");
+               err = -ENOMEM;
+               goto unwind;
+       }
+       return 0;
+
+unwind:
+       while (--i >= 0)
+               tty_port_unregister_device(&vk->tty[i].port, tty_drv, i);
+       tty_unregister_driver(tty_drv);
+
+err_kfree_tty_name:
+       kfree(tty_drv->name);
+       tty_drv->name = NULL;
+
+err_put_tty_driver:
+       put_tty_driver(tty_drv);
+
+       return err;
+}
+
+void bcm_vk_tty_exit(struct bcm_vk *vk)
+{
+       int i;
+
+       del_timer_sync(&vk->serial_timer);
+       for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
+               tty_port_unregister_device(&vk->tty[i].port,
+                                          vk->tty_drv,
+                                          i);
+               tty_port_destroy(&vk->tty[i].port);
+       }
+       tty_unregister_driver(vk->tty_drv);
+
+       kfree(vk->tty_drv->name);
+       vk->tty_drv->name = NULL;
+
+       put_tty_driver(vk->tty_drv);
+}
+
+void bcm_vk_tty_terminate_tty_user(struct bcm_vk *vk)
+{
+       struct bcm_vk_tty *vktty;
+       int i;
+
+       for (i = 0; i < BCM_VK_NUM_TTY; ++i) {
+               vktty = &vk->tty[i];
+               if (vktty->pid)
+                       kill_pid(find_vpid(vktty->pid), SIGKILL, 1);
+       }
+}
+
+void bcm_vk_tty_wq_exit(struct bcm_vk *vk)
+{
+       cancel_work_sync(&vk->tty_wq_work);
+       destroy_workqueue(vk->tty_wq_thread);
+}
index 8859011..8200af2 100644 (file)
@@ -398,6 +398,11 @@ static int rts522a_extra_init_hw(struct rtsx_pcr *pcr)
 {
        rts5227_extra_init_hw(pcr);
 
+       /* Power down OCP for power consumption */
+       if (!pcr->card_exist)
+               rtsx_pci_write_register(pcr, FPDCTL, OC_POWER_DOWN,
+                               OC_POWER_DOWN);
+
        rtsx_pci_write_register(pcr, FUNC_FORCE_CTL, FUNC_FORCE_UPME_XMT_DBG,
                FUNC_FORCE_UPME_XMT_DBG);
        rtsx_pci_write_register(pcr, PCLK_CTL, 0x04, 0x04);
index 5a491d2..2733111 100644 (file)
@@ -59,12 +59,6 @@ static const struct pci_device_id rtsx_pci_ids[] = {
 
 MODULE_DEVICE_TABLE(pci, rtsx_pci_ids);
 
-static inline void rtsx_pci_disable_aspm(struct rtsx_pcr *pcr)
-{
-       pcie_capability_clear_and_set_word(pcr->pci, PCI_EXP_LNKCTL,
-                                          PCI_EXP_LNKCTL_ASPMC, 0);
-}
-
 static int rtsx_comm_set_ltr_latency(struct rtsx_pcr *pcr, u32 latency)
 {
        rtsx_pci_write_register(pcr, MSGTXDATA0,
@@ -1805,7 +1799,6 @@ static int rtsx_pci_runtime_resume(struct device *device)
        struct pci_dev *pcidev = to_pci_dev(device);
        struct pcr_handle *handle;
        struct rtsx_pcr *pcr;
-       int ret = 0;
 
        handle = pci_get_drvdata(pcidev);
        pcr = handle->pcr;
@@ -1830,7 +1823,7 @@ static int rtsx_pci_runtime_resume(struct device *device)
        schedule_delayed_work(&pcr->idle_work, msecs_to_jiffies(200));
 
        mutex_unlock(&pcr->pcr_mutex);
-       return ret;
+       return 0;
 }
 
 #else /* CONFIG_PM */
index d97a243..c173a5e 100644 (file)
@@ -178,7 +178,7 @@ static ssize_t perst_reloads_same_image_store(struct device *device,
        if ((rc != 1) || !(val == 1 || val == 0))
                return -EINVAL;
 
-       adapter->perst_same_image = (val == 1 ? true : false);
+       adapter->perst_same_image = (val == 1);
        return count;
 }
 
index 7c45f82..80114f4 100644 (file)
@@ -35,6 +35,10 @@ static const struct eeprom_93xx46_devtype_data atmel_at93c46d_data = {
                  EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH,
 };
 
+static const struct eeprom_93xx46_devtype_data microchip_93lc46b_data = {
+       .quirks = EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE,
+};
+
 struct eeprom_93xx46_dev {
        struct spi_device *spi;
        struct eeprom_93xx46_platform_data *pdata;
@@ -55,6 +59,11 @@ static inline bool has_quirk_instruction_length(struct eeprom_93xx46_dev *edev)
        return edev->pdata->quirks & EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH;
 }
 
+static inline bool has_quirk_extra_read_cycle(struct eeprom_93xx46_dev *edev)
+{
+       return edev->pdata->quirks & EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE;
+}
+
 static int eeprom_93xx46_read(void *priv, unsigned int off,
                              void *val, size_t count)
 {
@@ -96,6 +105,11 @@ static int eeprom_93xx46_read(void *priv, unsigned int off,
                dev_dbg(&edev->spi->dev, "read cmd 0x%x, %d Hz\n",
                        cmd_addr, edev->spi->max_speed_hz);
 
+               if (has_quirk_extra_read_cycle(edev)) {
+                       cmd_addr <<= 1;
+                       bits += 1;
+               }
+
                spi_message_init(&m);
 
                t[0].tx_buf = (char *)&cmd_addr;
@@ -363,6 +377,7 @@ static void select_deassert(void *context)
 static const struct of_device_id eeprom_93xx46_of_table[] = {
        { .compatible = "eeprom-93xx46", },
        { .compatible = "atmel,at93c46d", .data = &atmel_at93c46d_data, },
+       { .compatible = "microchip,93lc46b", .data = &microchip_93lc46b_data, },
        {}
 };
 MODULE_DEVICE_TABLE(of, eeprom_93xx46_of_table);
@@ -512,3 +527,5 @@ MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Driver for 93xx46 EEPROMs");
 MODULE_AUTHOR("Anatolij Gustschin <agust@denx.de>");
 MODULE_ALIAS("spi:93xx46");
+MODULE_ALIAS("spi:eeprom-93xx46");
+MODULE_ALIAS("spi:93lc46b");
index 70eb5ed..beda610 100644 (file)
@@ -520,12 +520,13 @@ fastrpc_map_dma_buf(struct dma_buf_attachment *attachment,
 {
        struct fastrpc_dma_buf_attachment *a = attachment->priv;
        struct sg_table *table;
+       int ret;
 
        table = &a->sgt;
 
-       if (!dma_map_sgtable(attachment->dev, table, dir, 0))
-               return ERR_PTR(-ENOMEM);
-
+       ret = dma_map_sgtable(attachment->dev, table, dir, 0);
+       if (ret)
+               table = ERR_PTR(ret);
        return table;
 }
 
@@ -949,6 +950,11 @@ static int fastrpc_internal_invoke(struct fastrpc_user *fl,  u32 kernel,
        if (!fl->cctx->rpdev)
                return -EPIPE;
 
+       if (handle == FASTRPC_INIT_HANDLE && !kernel) {
+               dev_warn_ratelimited(fl->sctx->dev, "user app trying to send a kernel RPC message (%d)\n",  handle);
+               return -EPERM;
+       }
+
        ctx = fastrpc_context_alloc(fl, kernel, sc, args);
        if (IS_ERR(ctx))
                return PTR_ERR(ctx);
index eccd8c7..5d8b482 100644 (file)
@@ -1,7 +1,13 @@
 # SPDX-License-Identifier: GPL-2.0-only
+
+include $(src)/common/mmu/Makefile
+habanalabs-y += $(HL_COMMON_MMU_FILES)
+
+include $(src)/common/pci/Makefile
+habanalabs-y += $(HL_COMMON_PCI_FILES)
+
 HL_COMMON_FILES := common/habanalabs_drv.o common/device.o common/context.o \
                common/asid.o common/habanalabs_ioctl.o \
                common/command_buffer.o common/hw_queue.o common/irq.o \
                common/sysfs.o common/hwmon.o common/memory.o \
-               common/command_submission.o common/mmu.o common/mmu_v1.o \
-               common/firmware_if.o common/pci.o
+               common/command_submission.o common/firmware_if.o
index a2fdf31..ede04c0 100644 (file)
@@ -50,8 +50,10 @@ unsigned long hl_asid_alloc(struct hl_device *hdev)
 
 void hl_asid_free(struct hl_device *hdev, unsigned long asid)
 {
-       if (WARN((asid == 0 || asid >= hdev->asic_prop.max_asid),
-                                               "Invalid ASID %lu", asid))
+       if (asid == HL_KERNEL_ASID_ID || asid >= hdev->asic_prop.max_asid) {
+               dev_crit(hdev->dev, "Invalid ASID %lu", asid);
                return;
+       }
+
        clear_bit(asid, hdev->asid_bitmap);
 }
index 6f6a904..d9adb9a 100644 (file)
@@ -635,10 +635,12 @@ struct hl_cb *hl_cb_kernel_create(struct hl_device *hdev, u32 cb_size,
 
        cb_handle >>= PAGE_SHIFT;
        cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr, (u32) cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!cb, "Kernel CB handle invalid 0x%x\n", (u32) cb_handle);
-       if (!cb)
+       /* hl_cb_get should never fail here */
+       if (!cb) {
+               dev_crit(hdev->dev, "Kernel CB handle invalid 0x%x\n",
+                               (u32) cb_handle);
                goto destroy_cb;
+       }
 
        return cb;
 
index b2b3d2b..7bd4a03 100644 (file)
@@ -48,8 +48,8 @@ void hl_sob_reset_error(struct kref *ref)
        struct hl_device *hdev = hw_sob->hdev;
 
        dev_crit(hdev->dev,
-                       "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
-                       hw_sob->q_idx, hw_sob->sob_id);
+               "SOB release shouldn't be called here, q_idx: %d, sob_id: %d\n",
+               hw_sob->q_idx, hw_sob->sob_id);
 }
 
 /**
@@ -149,9 +149,10 @@ void hl_fence_get(struct hl_fence *fence)
                kref_get(&fence->refcount);
 }
 
-static void hl_fence_init(struct hl_fence *fence)
+static void hl_fence_init(struct hl_fence *fence, u64 sequence)
 {
        kref_init(&fence->refcount);
+       fence->cs_sequence = sequence;
        fence->error = 0;
        fence->timestamp = ktime_set(0, 0);
        init_completion(&fence->completion);
@@ -184,6 +185,28 @@ static void cs_job_put(struct hl_cs_job *job)
        kref_put(&job->refcount, cs_job_do_release);
 }
 
+bool cs_needs_completion(struct hl_cs *cs)
+{
+       /* In case this is a staged CS, only the last CS in sequence should
+        * get a completion, any non staged CS will always get a completion
+        */
+       if (cs->staged_cs && !cs->staged_last)
+               return false;
+
+       return true;
+}
+
+bool cs_needs_timeout(struct hl_cs *cs)
+{
+       /* In case this is a staged CS, only the first CS in sequence should
+        * get a timeout, any non staged CS will always get a timeout
+        */
+       if (cs->staged_cs && !cs->staged_first)
+               return false;
+
+       return true;
+}
+
 static bool is_cb_patched(struct hl_device *hdev, struct hl_cs_job *job)
 {
        /*
@@ -225,6 +248,7 @@ static int cs_parser(struct hl_fpriv *hpriv, struct hl_cs_job *job)
        parser.queue_type = job->queue_type;
        parser.is_kernel_allocated_cb = job->is_kernel_allocated_cb;
        job->patched_cb = NULL;
+       parser.completion = cs_needs_completion(job->cs);
 
        rc = hdev->asic_funcs->cs_parser(hdev, &parser);
 
@@ -290,13 +314,153 @@ static void complete_job(struct hl_device *hdev, struct hl_cs_job *job)
 
        hl_debugfs_remove_job(hdev, job);
 
-       if (job->queue_type == QUEUE_TYPE_EXT ||
-                       job->queue_type == QUEUE_TYPE_HW)
+       /* We decrement reference only for a CS that gets completion
+        * because the reference was incremented only for this kind of CS
+        * right before it was scheduled.
+        *
+        * In staged submission, only the last CS marked as 'staged_last'
+        * gets completion, hence its release function will be called from here.
+        * As for all the rest CS's in the staged submission which do not get
+        * completion, their CS reference will be decremented by the
+        * 'staged_last' CS during the CS release flow.
+        * All relevant PQ CI counters will be incremented during the CS release
+        * flow by calling 'hl_hw_queue_update_ci'.
+        */
+       if (cs_needs_completion(cs) &&
+               (job->queue_type == QUEUE_TYPE_EXT ||
+                       job->queue_type == QUEUE_TYPE_HW))
                cs_put(cs);
 
        cs_job_put(job);
 }
 
+/*
+ * hl_staged_cs_find_first - locate the first CS in this staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs_seq: staged submission sequence number
+ *
+ * @note: This function must be called under 'hdev->cs_mirror_lock'
+ *
+ * Find and return a CS pointer with the given sequence
+ */
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq)
+{
+       struct hl_cs *cs;
+
+       list_for_each_entry_reverse(cs, &hdev->cs_mirror_list, mirror_node)
+               if (cs->staged_cs && cs->staged_first &&
+                               cs->sequence == cs_seq)
+                       return cs;
+
+       return NULL;
+}
+
+/*
+ * is_staged_cs_last_exists - returns true if the last CS in sequence exists
+ *
+ * @hdev: pointer to device structure
+ * @cs: staged submission member
+ *
+ */
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs)
+{
+       struct hl_cs *last_entry;
+
+       last_entry = list_last_entry(&cs->staged_cs_node, struct hl_cs,
+                                                               staged_cs_node);
+
+       if (last_entry->staged_last)
+               return true;
+
+       return false;
+}
+
+/*
+ * staged_cs_get - get CS reference if this CS is a part of a staged CS
+ *
+ * @hdev: pointer to device structure
+ * @cs: current CS
+ * @cs_seq: staged submission sequence number
+ *
+ * Increment CS reference for every CS in this staged submission except for
+ * the CS which get completion.
+ */
+static void staged_cs_get(struct hl_device *hdev, struct hl_cs *cs)
+{
+       /* Only the last CS in this staged submission will get a completion.
+        * We must increment the reference for all other CS's in this
+        * staged submission.
+        * Once we get a completion we will release the whole staged submission.
+        */
+       if (!cs->staged_last)
+               cs_get(cs);
+}
+
+/*
+ * staged_cs_put - put a CS in case it is part of staged submission
+ *
+ * @hdev: pointer to device structure
+ * @cs: CS to put
+ *
+ * This function decrements a CS reference (for a non completion CS)
+ */
+static void staged_cs_put(struct hl_device *hdev, struct hl_cs *cs)
+{
+       /* We release all CS's in a staged submission except the last
+        * CS which we have never incremented its reference.
+        */
+       if (!cs_needs_completion(cs))
+               cs_put(cs);
+}
+
+static void cs_handle_tdr(struct hl_device *hdev, struct hl_cs *cs)
+{
+       bool next_entry_found = false;
+       struct hl_cs *next;
+
+       if (!cs_needs_timeout(cs))
+               return;
+
+       spin_lock(&hdev->cs_mirror_lock);
+
+       /* We need to handle tdr only once for the complete staged submission.
+        * Hence, we choose the CS that reaches this function first which is
+        * the CS marked as 'staged_last'.
+        */
+       if (cs->staged_cs && cs->staged_last)
+               cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+
+       spin_unlock(&hdev->cs_mirror_lock);
+
+       /* Don't cancel TDR in case this CS was timedout because we might be
+        * running from the TDR context
+        */
+       if (cs && (cs->timedout ||
+                       hdev->timeout_jiffies == MAX_SCHEDULE_TIMEOUT))
+               return;
+
+       if (cs && cs->tdr_active)
+               cancel_delayed_work_sync(&cs->work_tdr);
+
+       spin_lock(&hdev->cs_mirror_lock);
+
+       /* queue TDR for next CS */
+       list_for_each_entry(next, &hdev->cs_mirror_list, mirror_node)
+               if (cs_needs_timeout(next)) {
+                       next_entry_found = true;
+                       break;
+               }
+
+       if (next_entry_found && !next->tdr_active) {
+               next->tdr_active = true;
+               schedule_delayed_work(&next->work_tdr,
+                                       hdev->timeout_jiffies);
+       }
+
+       spin_unlock(&hdev->cs_mirror_lock);
+}
+
 static void cs_do_release(struct kref *ref)
 {
        struct hl_cs *cs = container_of(ref, struct hl_cs, refcount);
@@ -346,36 +510,37 @@ static void cs_do_release(struct kref *ref)
 
        hdev->asic_funcs->hw_queues_unlock(hdev);
 
-       /* Need to update CI for internal queues */
-       hl_int_hw_queue_update_ci(cs);
+       /* Need to update CI for all queue jobs that does not get completion */
+       hl_hw_queue_update_ci(cs);
 
        /* remove CS from CS mirror list */
        spin_lock(&hdev->cs_mirror_lock);
        list_del_init(&cs->mirror_node);
        spin_unlock(&hdev->cs_mirror_lock);
 
-       /* Don't cancel TDR in case this CS was timedout because we might be
-        * running from the TDR context
-        */
-       if (!cs->timedout && hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) {
-               struct hl_cs *next;
-
-               if (cs->tdr_active)
-                       cancel_delayed_work_sync(&cs->work_tdr);
+       cs_handle_tdr(hdev, cs);
 
-               spin_lock(&hdev->cs_mirror_lock);
-
-               /* queue TDR for next CS */
-               next = list_first_entry_or_null(&hdev->cs_mirror_list,
-                                               struct hl_cs, mirror_node);
+       if (cs->staged_cs) {
+               /* the completion CS decrements reference for the entire
+                * staged submission
+                */
+               if (cs->staged_last) {
+                       struct hl_cs *staged_cs, *tmp;
 
-               if (next && !next->tdr_active) {
-                       next->tdr_active = true;
-                       schedule_delayed_work(&next->work_tdr,
-                                               hdev->timeout_jiffies);
+                       list_for_each_entry_safe(staged_cs, tmp,
+                                       &cs->staged_cs_node, staged_cs_node)
+                               staged_cs_put(hdev, staged_cs);
                }
 
-               spin_unlock(&hdev->cs_mirror_lock);
+               /* A staged CS will be a member in the list only after it
+                * was submitted. We used 'cs_mirror_lock' when inserting
+                * it to list so we will use it again when removing it
+                */
+               if (cs->submitted) {
+                       spin_lock(&hdev->cs_mirror_lock);
+                       list_del(&cs->staged_cs_node);
+                       spin_unlock(&hdev->cs_mirror_lock);
+               }
        }
 
 out:
@@ -461,7 +626,8 @@ static void cs_timedout(struct work_struct *work)
 }
 
 static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
-                       enum hl_cs_type cs_type, struct hl_cs **cs_new)
+                       enum hl_cs_type cs_type, u64 user_sequence,
+                       struct hl_cs **cs_new)
 {
        struct hl_cs_counters_atomic *cntr;
        struct hl_fence *other = NULL;
@@ -478,6 +644,9 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
                return -ENOMEM;
        }
 
+       /* increment refcnt for context */
+       hl_ctx_get(hdev, ctx);
+
        cs->ctx = ctx;
        cs->submitted = false;
        cs->completed = false;
@@ -507,6 +676,18 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
                                (hdev->asic_prop.max_pending_cs - 1)];
 
        if (other && !completion_done(&other->completion)) {
+               /* If the following statement is true, it means we have reached
+                * a point in which only part of the staged submission was
+                * submitted and we don't have enough room in the 'cs_pending'
+                * array for the rest of the submission.
+                * This causes a deadlock because this CS will never be
+                * completed as it depends on future CS's for completion.
+                */
+               if (other->cs_sequence == user_sequence)
+                       dev_crit_ratelimited(hdev->dev,
+                               "Staged CS %llu deadlock due to lack of resources",
+                               user_sequence);
+
                dev_dbg_ratelimited(hdev->dev,
                        "Rejecting CS because of too many in-flights CS\n");
                atomic64_inc(&ctx->cs_counters.max_cs_in_flight_drop_cnt);
@@ -525,7 +706,7 @@ static int allocate_cs(struct hl_device *hdev, struct hl_ctx *ctx,
        }
 
        /* init hl_fence */
-       hl_fence_init(&cs_cmpl->base_fence);
+       hl_fence_init(&cs_cmpl->base_fence, cs_cmpl->cs_seq);
 
        cs->sequence = cs_cmpl->cs_seq;
 
@@ -549,6 +730,7 @@ free_fence:
        kfree(cs_cmpl);
 free_cs:
        kfree(cs);
+       hl_ctx_put(ctx);
        return rc;
 }
 
@@ -556,6 +738,8 @@ static void cs_rollback(struct hl_device *hdev, struct hl_cs *cs)
 {
        struct hl_cs_job *job, *tmp;
 
+       staged_cs_put(hdev, cs);
+
        list_for_each_entry_safe(job, tmp, &cs->job_list, cs_node)
                complete_job(hdev, job);
 }
@@ -565,7 +749,9 @@ void hl_cs_rollback_all(struct hl_device *hdev)
        int i;
        struct hl_cs *cs, *tmp;
 
-       /* flush all completions */
+       /* flush all completions before iterating over the CS mirror list in
+        * order to avoid a race with the release functions
+        */
        for (i = 0 ; i < hdev->asic_prop.completion_queues_count ; i++)
                flush_workqueue(hdev->cq_wq[i]);
 
@@ -574,12 +760,24 @@ void hl_cs_rollback_all(struct hl_device *hdev)
                cs_get(cs);
                cs->aborted = true;
                dev_warn_ratelimited(hdev->dev, "Killing CS %d.%llu\n",
-                                       cs->ctx->asid, cs->sequence);
+                               cs->ctx->asid, cs->sequence);
                cs_rollback(hdev, cs);
                cs_put(cs);
        }
 }
 
+void hl_pending_cb_list_flush(struct hl_ctx *ctx)
+{
+       struct hl_pending_cb *pending_cb, *tmp;
+
+       list_for_each_entry_safe(pending_cb, tmp,
+                       &ctx->pending_cb_list, cb_node) {
+               list_del(&pending_cb->cb_node);
+               hl_cb_put(pending_cb->cb);
+               kfree(pending_cb);
+       }
+}
+
 static void job_wq_completion(struct work_struct *work)
 {
        struct hl_cs_job *job = container_of(work, struct hl_cs_job,
@@ -734,6 +932,12 @@ static int hl_cs_sanity_checks(struct hl_fpriv *hpriv, union hl_cs_args *args)
                return -EBUSY;
        }
 
+       if ((args->in.cs_flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+                       !hdev->supports_staged_submission) {
+               dev_err(hdev->dev, "staged submission not supported");
+               return -EPERM;
+       }
+
        cs_type_flags = args->in.cs_flags & HL_CS_FLAGS_TYPE_MASK;
 
        if (unlikely(cs_type_flags && !is_power_of_2(cs_type_flags))) {
@@ -805,10 +1009,38 @@ static int hl_cs_copy_chunk_array(struct hl_device *hdev,
        return 0;
 }
 
+static int cs_staged_submission(struct hl_device *hdev, struct hl_cs *cs,
+                               u64 sequence, u32 flags)
+{
+       if (!(flags & HL_CS_FLAGS_STAGED_SUBMISSION))
+               return 0;
+
+       cs->staged_last = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_LAST);
+       cs->staged_first = !!(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST);
+
+       if (cs->staged_first) {
+               /* Staged CS sequence is the first CS sequence */
+               INIT_LIST_HEAD(&cs->staged_cs_node);
+               cs->staged_sequence = cs->sequence;
+       } else {
+               /* User sequence will be validated in 'hl_hw_queue_schedule_cs'
+                * under the cs_mirror_lock
+                */
+               cs->staged_sequence = sequence;
+       }
+
+       /* Increment CS reference if needed */
+       staged_cs_get(hdev, cs);
+
+       cs->staged_cs = true;
+
+       return 0;
+}
+
 static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
-                               u32 num_chunks, u64 *cs_seq, bool timestamp)
+                               u32 num_chunks, u64 *cs_seq, u32 flags)
 {
-       bool int_queues_only = true;
+       bool staged_mid, int_queues_only = true;
        struct hl_device *hdev = hpriv->hdev;
        struct hl_cs_chunk *cs_chunk_array;
        struct hl_cs_counters_atomic *cntr;
@@ -816,9 +1048,11 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
        struct hl_cs_job *job;
        struct hl_cs *cs;
        struct hl_cb *cb;
+       u64 user_sequence;
        int rc, i;
 
        cntr = &hdev->aggregated_cs_counters;
+       user_sequence = *cs_seq;
        *cs_seq = ULLONG_MAX;
 
        rc = hl_cs_copy_chunk_array(hdev, &cs_chunk_array, chunks, num_chunks,
@@ -826,20 +1060,26 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
        if (rc)
                goto out;
 
-       /* increment refcnt for context */
-       hl_ctx_get(hdev, hpriv->ctx);
+       if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+                       !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+               staged_mid = true;
+       else
+               staged_mid = false;
 
-       rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT, &cs);
-       if (rc) {
-               hl_ctx_put(hpriv->ctx);
+       rc = allocate_cs(hdev, hpriv->ctx, CS_TYPE_DEFAULT,
+                       staged_mid ? user_sequence : ULLONG_MAX, &cs);
+       if (rc)
                goto free_cs_chunk_array;
-       }
 
-       cs->timestamp = !!timestamp;
+       cs->timestamp = !!(flags & HL_CS_FLAGS_TIMESTAMP);
        *cs_seq = cs->sequence;
 
        hl_debugfs_add_cs(cs);
 
+       rc = cs_staged_submission(hdev, cs, user_sequence, flags);
+       if (rc)
+               goto free_cs_object;
+
        /* Validate ALL the CS chunks before submitting the CS */
        for (i = 0 ; i < num_chunks ; i++) {
                struct hl_cs_chunk *chunk = &cs_chunk_array[i];
@@ -899,8 +1139,9 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                 * Only increment for JOB on external or H/W queues, because
                 * only for those JOBs we get completion
                 */
-               if (job->queue_type == QUEUE_TYPE_EXT ||
-                               job->queue_type == QUEUE_TYPE_HW)
+               if (cs_needs_completion(cs) &&
+                       (job->queue_type == QUEUE_TYPE_EXT ||
+                               job->queue_type == QUEUE_TYPE_HW))
                        cs_get(cs);
 
                hl_debugfs_add_job(hdev, job);
@@ -916,11 +1157,14 @@ static int cs_ioctl_default(struct hl_fpriv *hpriv, void __user *chunks,
                }
        }
 
-       if (int_queues_only) {
+       /* We allow a CS with any queue type combination as long as it does
+        * not get a completion
+        */
+       if (int_queues_only && cs_needs_completion(cs)) {
                atomic64_inc(&ctx->cs_counters.validation_drop_cnt);
                atomic64_inc(&cntr->validation_drop_cnt);
                dev_err(hdev->dev,
-                       "Reject CS %d.%llu because only internal queues jobs are present\n",
+                       "Reject CS %d.%llu since it contains only internal queues jobs and needs completion\n",
                        cs->ctx->asid, cs->sequence);
                rc = -EINVAL;
                goto free_cs_object;
@@ -954,6 +1198,129 @@ out:
        return rc;
 }
 
+static int pending_cb_create_job(struct hl_device *hdev, struct hl_ctx *ctx,
+               struct hl_cs *cs, struct hl_cb *cb, u32 size, u32 hw_queue_id)
+{
+       struct hw_queue_properties *hw_queue_prop;
+       struct hl_cs_counters_atomic *cntr;
+       struct hl_cs_job *job;
+
+       hw_queue_prop = &hdev->asic_prop.hw_queues_props[hw_queue_id];
+       cntr = &hdev->aggregated_cs_counters;
+
+       job = hl_cs_allocate_job(hdev, hw_queue_prop->type, true);
+       if (!job) {
+               atomic64_inc(&ctx->cs_counters.out_of_mem_drop_cnt);
+               atomic64_inc(&cntr->out_of_mem_drop_cnt);
+               dev_err(hdev->dev, "Failed to allocate a new job\n");
+               return -ENOMEM;
+       }
+
+       job->id = 0;
+       job->cs = cs;
+       job->user_cb = cb;
+       atomic_inc(&job->user_cb->cs_cnt);
+       job->user_cb_size = size;
+       job->hw_queue_id = hw_queue_id;
+       job->patched_cb = job->user_cb;
+       job->job_cb_size = job->user_cb_size;
+
+       /* increment refcount as for external queues we get completion */
+       cs_get(cs);
+
+       cs->jobs_in_queue_cnt[job->hw_queue_id]++;
+
+       list_add_tail(&job->cs_node, &cs->job_list);
+
+       hl_debugfs_add_job(hdev, job);
+
+       return 0;
+}
+
+static int hl_submit_pending_cb(struct hl_fpriv *hpriv)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       struct hl_ctx *ctx = hpriv->ctx;
+       struct hl_pending_cb *pending_cb, *tmp;
+       struct list_head local_cb_list;
+       struct hl_cs *cs;
+       struct hl_cb *cb;
+       u32 hw_queue_id;
+       u32 cb_size;
+       int process_list, rc = 0;
+
+       if (list_empty(&ctx->pending_cb_list))
+               return 0;
+
+       process_list = atomic_cmpxchg(&ctx->thread_pending_cb_token, 1, 0);
+
+       /* Only a single thread is allowed to process the list */
+       if (!process_list)
+               return 0;
+
+       if (list_empty(&ctx->pending_cb_list))
+               goto free_pending_cb_token;
+
+       /* move all list elements to a local list */
+       INIT_LIST_HEAD(&local_cb_list);
+       spin_lock(&ctx->pending_cb_lock);
+       list_for_each_entry_safe(pending_cb, tmp, &ctx->pending_cb_list,
+                                                               cb_node)
+               list_move_tail(&pending_cb->cb_node, &local_cb_list);
+       spin_unlock(&ctx->pending_cb_lock);
+
+       rc = allocate_cs(hdev, ctx, CS_TYPE_DEFAULT, ULLONG_MAX, &cs);
+       if (rc)
+               goto add_list_elements;
+
+       hl_debugfs_add_cs(cs);
+
+       /* Iterate through pending cb list, create jobs and add to CS */
+       list_for_each_entry(pending_cb, &local_cb_list, cb_node) {
+               cb = pending_cb->cb;
+               cb_size = pending_cb->cb_size;
+               hw_queue_id = pending_cb->hw_queue_id;
+
+               rc = pending_cb_create_job(hdev, ctx, cs, cb, cb_size,
+                                                               hw_queue_id);
+               if (rc)
+                       goto free_cs_object;
+       }
+
+       rc = hl_hw_queue_schedule_cs(cs);
+       if (rc) {
+               if (rc != -EAGAIN)
+                       dev_err(hdev->dev,
+                               "Failed to submit CS %d.%llu (%d)\n",
+                               ctx->asid, cs->sequence, rc);
+               goto free_cs_object;
+       }
+
+       /* pending cb was scheduled successfully */
+       list_for_each_entry_safe(pending_cb, tmp, &local_cb_list, cb_node) {
+               list_del(&pending_cb->cb_node);
+               kfree(pending_cb);
+       }
+
+       cs_put(cs);
+
+       goto free_pending_cb_token;
+
+free_cs_object:
+       cs_rollback(hdev, cs);
+       cs_put(cs);
+add_list_elements:
+       spin_lock(&ctx->pending_cb_lock);
+       list_for_each_entry_safe_reverse(pending_cb, tmp, &local_cb_list,
+                                                               cb_node)
+               list_move(&pending_cb->cb_node, &ctx->pending_cb_list);
+       spin_unlock(&ctx->pending_cb_lock);
+free_pending_cb_token:
+       atomic_set(&ctx->thread_pending_cb_token, 1);
+
+       return rc;
+}
+
 static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
                                u64 *cs_seq)
 {
@@ -1003,7 +1370,7 @@ static int hl_cs_ctx_switch(struct hl_fpriv *hpriv, union hl_cs_args *args,
                        rc = 0;
                } else {
                        rc = cs_ioctl_default(hpriv, chunks, num_chunks,
-                                               cs_seq, false);
+                                                               cs_seq, 0);
                }
 
                mutex_unlock(&hpriv->restore_phase_mutex);
@@ -1275,15 +1642,11 @@ static int cs_ioctl_signal_wait(struct hl_fpriv *hpriv, enum hl_cs_type cs_type,
                }
        }
 
-       /* increment refcnt for context */
-       hl_ctx_get(hdev, ctx);
-
-       rc = allocate_cs(hdev, ctx, cs_type, &cs);
+       rc = allocate_cs(hdev, ctx, cs_type, ULLONG_MAX, &cs);
        if (rc) {
                if (cs_type == CS_TYPE_WAIT ||
                        cs_type == CS_TYPE_COLLECTIVE_WAIT)
                        hl_fence_put(sig_fence);
-               hl_ctx_put(ctx);
                goto free_cs_chunk_array;
        }
 
@@ -1346,7 +1709,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
        enum hl_cs_type cs_type;
        u64 cs_seq = ULONG_MAX;
        void __user *chunks;
-       u32 num_chunks;
+       u32 num_chunks, flags;
        int rc;
 
        rc = hl_cs_sanity_checks(hpriv, args);
@@ -1357,10 +1720,20 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
        if (rc)
                goto out;
 
+       rc = hl_submit_pending_cb(hpriv);
+       if (rc)
+               goto out;
+
        cs_type = hl_cs_get_cs_type(args->in.cs_flags &
                                        ~HL_CS_FLAGS_FORCE_RESTORE);
        chunks = (void __user *) (uintptr_t) args->in.chunks_execute;
        num_chunks = args->in.num_chunks_execute;
+       flags = args->in.cs_flags;
+
+       /* In case this is a staged CS, user should supply the CS sequence */
+       if ((flags & HL_CS_FLAGS_STAGED_SUBMISSION) &&
+                       !(flags & HL_CS_FLAGS_STAGED_SUBMISSION_FIRST))
+               cs_seq = args->in.seq;
 
        switch (cs_type) {
        case CS_TYPE_SIGNAL:
@@ -1371,7 +1744,7 @@ int hl_cs_ioctl(struct hl_fpriv *hpriv, void *data)
                break;
        default:
                rc = cs_ioctl_default(hpriv, chunks, num_chunks, &cs_seq,
-                               args->in.cs_flags & HL_CS_FLAGS_TIMESTAMP);
+                                                       args->in.cs_flags);
                break;
        }
 
index f65e655..cda871a 100644 (file)
 static void hl_ctx_fini(struct hl_ctx *ctx)
 {
        struct hl_device *hdev = ctx->hdev;
-       u64 idle_mask = 0;
+       u64 idle_mask[HL_BUSY_ENGINES_MASK_EXT_SIZE] = {0};
        int i;
 
+       /* Release all allocated pending cb's, those cb's were never
+        * scheduled so it is safe to release them here
+        */
+       hl_pending_cb_list_flush(ctx);
+
        /*
         * If we arrived here, there are no jobs waiting for this context
         * on its queues so we can safely remove it.
@@ -50,12 +55,15 @@ static void hl_ctx_fini(struct hl_ctx *ctx)
 
                if ((!hdev->pldm) && (hdev->pdev) &&
                                (!hdev->asic_funcs->is_device_idle(hdev,
-                                                       &idle_mask, NULL)))
+                                       idle_mask,
+                                       HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL)))
                        dev_notice(hdev->dev,
-                               "device not idle after user context is closed (0x%llx)\n",
-                               idle_mask);
+                                       "device not idle after user context is closed (0x%llx, 0x%llx)\n",
+                                               idle_mask[0], idle_mask[1]);
        } else {
                dev_dbg(hdev->dev, "closing kernel context\n");
+               hdev->asic_funcs->ctx_fini(ctx);
+               hl_vm_ctx_fini(ctx);
                hl_mmu_ctx_fini(ctx);
        }
 }
@@ -140,8 +148,11 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
        kref_init(&ctx->refcount);
 
        ctx->cs_sequence = 1;
+       INIT_LIST_HEAD(&ctx->pending_cb_list);
+       spin_lock_init(&ctx->pending_cb_lock);
        spin_lock_init(&ctx->cs_lock);
        atomic_set(&ctx->thread_ctx_switch_token, 1);
+       atomic_set(&ctx->thread_pending_cb_token, 1);
        ctx->thread_ctx_switch_wait_token = 0;
        ctx->cs_pending = kcalloc(hdev->asic_prop.max_pending_cs,
                                sizeof(struct hl_fence *),
@@ -151,11 +162,18 @@ int hl_ctx_init(struct hl_device *hdev, struct hl_ctx *ctx, bool is_kernel_ctx)
 
        if (is_kernel_ctx) {
                ctx->asid = HL_KERNEL_ASID_ID; /* Kernel driver gets ASID 0 */
-               rc = hl_mmu_ctx_init(ctx);
+               rc = hl_vm_ctx_init(ctx);
                if (rc) {
-                       dev_err(hdev->dev, "Failed to init mmu ctx module\n");
+                       dev_err(hdev->dev, "Failed to init mem ctx module\n");
+                       rc = -ENOMEM;
                        goto err_free_cs_pending;
                }
+
+               rc = hdev->asic_funcs->ctx_init(ctx);
+               if (rc) {
+                       dev_err(hdev->dev, "ctx_init failed\n");
+                       goto err_vm_ctx_fini;
+               }
        } else {
                ctx->asid = hl_asid_alloc(hdev);
                if (!ctx->asid) {
@@ -194,7 +212,8 @@ err_cb_va_pool_fini:
 err_vm_ctx_fini:
        hl_vm_ctx_fini(ctx);
 err_asid_free:
-       hl_asid_free(hdev, ctx->asid);
+       if (ctx->asid != HL_KERNEL_ASID_ID)
+               hl_asid_free(hdev, ctx->asid);
 err_free_cs_pending:
        kfree(ctx->cs_pending);
 
index cef7166..9f19bee 100644 (file)
@@ -310,8 +310,8 @@ static int mmu_show(struct seq_file *s, void *data)
        struct hl_dbg_device_entry *dev_entry = entry->dev_entry;
        struct hl_device *hdev = dev_entry->hdev;
        struct hl_ctx *ctx;
-       struct hl_mmu_hop_info hops_info;
-       u64 virt_addr = dev_entry->mmu_addr;
+       struct hl_mmu_hop_info hops_info = {0};
+       u64 virt_addr = dev_entry->mmu_addr, phys_addr;
        int i;
 
        if (!hdev->mmu_enable)
@@ -333,8 +333,19 @@ static int mmu_show(struct seq_file *s, void *data)
                return 0;
        }
 
-       seq_printf(s, "asid: %u, virt_addr: 0x%llx\n",
-                       dev_entry->mmu_asid, dev_entry->mmu_addr);
+       phys_addr = hops_info.hop_info[hops_info.used_hops - 1].hop_pte_val;
+
+       if (hops_info.scrambled_vaddr &&
+               (dev_entry->mmu_addr != hops_info.scrambled_vaddr))
+               seq_printf(s,
+                       "asid: %u, virt_addr: 0x%llx, scrambled virt_addr: 0x%llx,\nphys_addr: 0x%llx, scrambled_phys_addr: 0x%llx\n",
+                       dev_entry->mmu_asid, dev_entry->mmu_addr,
+                       hops_info.scrambled_vaddr,
+                       hops_info.unscrambled_paddr, phys_addr);
+       else
+               seq_printf(s,
+                       "asid: %u, virt_addr: 0x%llx, phys_addr: 0x%llx\n",
+                       dev_entry->mmu_asid, dev_entry->mmu_addr, phys_addr);
 
        for (i = 0 ; i < hops_info.used_hops ; i++) {
                seq_printf(s, "hop%d_addr: 0x%llx\n",
@@ -403,7 +414,7 @@ static int engines_show(struct seq_file *s, void *data)
                return 0;
        }
 
-       hdev->asic_funcs->is_device_idle(hdev, NULL, s);
+       hdev->asic_funcs->is_device_idle(hdev, NULL, 0, s);
 
        return 0;
 }
@@ -865,6 +876,17 @@ static ssize_t hl_stop_on_err_write(struct file *f, const char __user *buf,
        return count;
 }
 
+static ssize_t hl_security_violations_read(struct file *f, char __user *buf,
+                                       size_t count, loff_t *ppos)
+{
+       struct hl_dbg_device_entry *entry = file_inode(f)->i_private;
+       struct hl_device *hdev = entry->hdev;
+
+       hdev->asic_funcs->ack_protection_bits_errors(hdev);
+
+       return 0;
+}
+
 static const struct file_operations hl_data32b_fops = {
        .owner = THIS_MODULE,
        .read = hl_data_read32,
@@ -922,6 +944,11 @@ static const struct file_operations hl_stop_on_err_fops = {
        .write = hl_stop_on_err_write
 };
 
+static const struct file_operations hl_security_violations_fops = {
+       .owner = THIS_MODULE,
+       .read = hl_security_violations_read
+};
+
 static const struct hl_info_list hl_debugfs_list[] = {
        {"command_buffers", command_buffers_show, NULL},
        {"command_submission", command_submission_show, NULL},
@@ -965,7 +992,6 @@ void hl_debugfs_add_device(struct hl_device *hdev)
        struct hl_dbg_device_entry *dev_entry = &hdev->hl_debugfs;
        int count = ARRAY_SIZE(hl_debugfs_list);
        struct hl_debugfs_entry *entry;
-       struct dentry *ent;
        int i;
 
        dev_entry->hdev = hdev;
@@ -1071,14 +1097,18 @@ void hl_debugfs_add_device(struct hl_device *hdev)
                                dev_entry,
                                &hl_stop_on_err_fops);
 
-       for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+       debugfs_create_file("dump_security_violations",
+                               0644,
+                               dev_entry->root,
+                               dev_entry,
+                               &hl_security_violations_fops);
 
-               ent = debugfs_create_file(hl_debugfs_list[i].name,
+       for (i = 0, entry = dev_entry->entry_arr ; i < count ; i++, entry++) {
+               debugfs_create_file(hl_debugfs_list[i].name,
                                        0444,
                                        dev_entry->root,
                                        entry,
                                        &hl_debugfs_fops);
-               entry->dent = ent;
                entry->info_ent = &hl_debugfs_list[i];
                entry->dev_entry = dev_entry;
        }
index 69d04ec..334009e 100644 (file)
@@ -93,12 +93,19 @@ void hl_hpriv_put(struct hl_fpriv *hpriv)
 static int hl_device_release(struct inode *inode, struct file *filp)
 {
        struct hl_fpriv *hpriv = filp->private_data;
+       struct hl_device *hdev = hpriv->hdev;
+
+       filp->private_data = NULL;
+
+       if (!hdev) {
+               pr_crit("Closing FD after device was removed. Memory leak will occur and it is advised to reboot.\n");
+               put_pid(hpriv->taskpid);
+               return 0;
+       }
 
        hl_cb_mgr_fini(hpriv->hdev, &hpriv->cb_mgr);
        hl_ctx_mgr_fini(hpriv->hdev, &hpriv->ctx_mgr);
 
-       filp->private_data = NULL;
-
        hl_hpriv_put(hpriv);
 
        return 0;
@@ -107,15 +114,20 @@ static int hl_device_release(struct inode *inode, struct file *filp)
 static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 {
        struct hl_fpriv *hpriv = filp->private_data;
-       struct hl_device *hdev;
+       struct hl_device *hdev = hpriv->hdev;
 
        filp->private_data = NULL;
 
-       hdev = hpriv->hdev;
+       if (!hdev) {
+               pr_err("Closing FD after device was removed\n");
+               goto out;
+       }
 
        mutex_lock(&hdev->fpriv_list_lock);
        list_del(&hpriv->dev_node);
        mutex_unlock(&hdev->fpriv_list_lock);
+out:
+       put_pid(hpriv->taskpid);
 
        kfree(hpriv);
 
@@ -134,14 +146,23 @@ static int hl_device_release_ctrl(struct inode *inode, struct file *filp)
 static int hl_mmap(struct file *filp, struct vm_area_struct *vma)
 {
        struct hl_fpriv *hpriv = filp->private_data;
+       struct hl_device *hdev = hpriv->hdev;
        unsigned long vm_pgoff;
 
+       if (!hdev) {
+               pr_err_ratelimited("Trying to mmap after device was removed! Please close FD\n");
+               return -ENODEV;
+       }
+
        vm_pgoff = vma->vm_pgoff;
        vma->vm_pgoff = HL_MMAP_OFFSET_VALUE_GET(vm_pgoff);
 
        switch (vm_pgoff & HL_MMAP_TYPE_MASK) {
        case HL_MMAP_TYPE_CB:
                return hl_cb_mmap(hpriv, vma);
+
+       case HL_MMAP_TYPE_BLOCK:
+               return hl_hw_block_mmap(hpriv, vma);
        }
 
        return -EINVAL;
@@ -373,7 +394,6 @@ static int device_early_init(struct hl_device *hdev)
 
        mutex_init(&hdev->send_cpu_message_lock);
        mutex_init(&hdev->debug_lock);
-       mutex_init(&hdev->mmu_cache_lock);
        INIT_LIST_HEAD(&hdev->cs_mirror_list);
        spin_lock_init(&hdev->cs_mirror_lock);
        INIT_LIST_HEAD(&hdev->fpriv_list);
@@ -414,7 +434,6 @@ static void device_early_fini(struct hl_device *hdev)
 {
        int i;
 
-       mutex_destroy(&hdev->mmu_cache_lock);
        mutex_destroy(&hdev->debug_lock);
        mutex_destroy(&hdev->send_cpu_message_lock);
 
@@ -882,6 +901,16 @@ wait_for_processes:
        return -EBUSY;
 }
 
+static void device_disable_open_processes(struct hl_device *hdev)
+{
+       struct hl_fpriv *hpriv;
+
+       mutex_lock(&hdev->fpriv_list_lock);
+       list_for_each_entry(hpriv, &hdev->fpriv_list, dev_node)
+               hpriv->hdev = NULL;
+       mutex_unlock(&hdev->fpriv_list_lock);
+}
+
 /*
  * hl_device_reset - reset the device
  *
@@ -1158,12 +1187,20 @@ kill_processes:
        atomic_set(&hdev->in_reset, 0);
        hdev->needs_reset = false;
 
-       if (hard_reset)
+       dev_notice(hdev->dev, "Successfully finished resetting the device\n");
+
+       if (hard_reset) {
                hdev->hard_reset_cnt++;
-       else
-               hdev->soft_reset_cnt++;
 
-       dev_warn(hdev->dev, "Successfully finished resetting the device\n");
+               /* After reset is done, we are ready to receive events from
+                * the F/W. We can't do it before because we will ignore events
+                * and if those events are fatal, we won't know about it and
+                * the device will be operational although it shouldn't be
+                */
+               hdev->asic_funcs->enable_events_from_fw(hdev);
+       } else {
+               hdev->soft_reset_cnt++;
+       }
 
        return 0;
 
@@ -1314,11 +1351,16 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
        hdev->compute_ctx = NULL;
 
+       hl_debugfs_add_device(hdev);
+
+       /* debugfs nodes are created in hl_ctx_init so it must be called after
+        * hl_debugfs_add_device.
+        */
        rc = hl_ctx_init(hdev, hdev->kernel_ctx, true);
        if (rc) {
                dev_err(hdev->dev, "failed to initialize kernel context\n");
                kfree(hdev->kernel_ctx);
-               goto mmu_fini;
+               goto remove_device_from_debugfs;
        }
 
        rc = hl_cb_pool_init(hdev);
@@ -1327,8 +1369,6 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
                goto release_ctx;
        }
 
-       hl_debugfs_add_device(hdev);
-
        /*
         * From this point, in case of an error, add char devices and create
         * sysfs nodes as part of the error flow, to allow debugging.
@@ -1411,12 +1451,21 @@ int hl_device_init(struct hl_device *hdev, struct class *hclass)
 
        hdev->init_done = true;
 
+       /* After initialization is done, we are ready to receive events from
+        * the F/W. We can't do it before because we will ignore events and if
+        * those events are fatal, we won't know about it and the device will
+        * be operational although it shouldn't be
+        */
+       hdev->asic_funcs->enable_events_from_fw(hdev);
+
        return 0;
 
 release_ctx:
        if (hl_ctx_put(hdev->kernel_ctx) != 1)
                dev_err(hdev->dev,
                        "kernel ctx is still alive on initialization failure\n");
+remove_device_from_debugfs:
+       hl_debugfs_remove_device(hdev);
 mmu_fini:
        hl_mmu_fini(hdev);
 eq_fini:
@@ -1482,7 +1531,8 @@ void hl_device_fini(struct hl_device *hdev)
                usleep_range(50, 200);
                rc = atomic_cmpxchg(&hdev->in_reset, 0, 1);
                if (ktime_compare(ktime_get(), timeout) > 0) {
-                       WARN(1, "Failed to remove device because reset function did not finish\n");
+                       dev_crit(hdev->dev,
+                               "Failed to remove device because reset function did not finish\n");
                        return;
                }
        }
@@ -1515,8 +1565,6 @@ void hl_device_fini(struct hl_device *hdev)
 
        device_late_fini(hdev);
 
-       hl_debugfs_remove_device(hdev);
-
        /*
         * Halt the engines and disable interrupts so we won't get any more
         * completions from H/W and we won't have any accesses from the
@@ -1536,8 +1584,10 @@ void hl_device_fini(struct hl_device *hdev)
                HL_PENDING_RESET_LONG_SEC);
 
        rc = device_kill_open_processes(hdev, HL_PENDING_RESET_LONG_SEC);
-       if (rc)
+       if (rc) {
                dev_crit(hdev->dev, "Failed to kill all open processes\n");
+               device_disable_open_processes(hdev);
+       }
 
        hl_cb_pool_fini(hdev);
 
@@ -1548,6 +1598,8 @@ void hl_device_fini(struct hl_device *hdev)
        if ((hdev->kernel_ctx) && (hl_ctx_put(hdev->kernel_ctx) != 1))
                dev_err(hdev->dev, "kernel ctx is still alive\n");
 
+       hl_debugfs_remove_device(hdev);
+
        hl_vm_fini(hdev);
 
        hl_mmu_fini(hdev);
index c9a1298..09706c5 100644 (file)
@@ -90,9 +90,10 @@ int hl_fw_send_pci_access_msg(struct hl_device *hdev, u32 opcode)
 int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
                                u16 len, u32 timeout, u64 *result)
 {
+       struct hl_hw_queue *queue = &hdev->kernel_queues[hw_queue_id];
        struct cpucp_packet *pkt;
        dma_addr_t pkt_dma_addr;
-       u32 tmp;
+       u32 tmp, expected_ack_val;
        int rc = 0;
 
        pkt = hdev->asic_funcs->cpu_accessible_dma_pool_alloc(hdev, len,
@@ -115,14 +116,23 @@ int hl_fw_send_cpu_message(struct hl_device *hdev, u32 hw_queue_id, u32 *msg,
                goto out;
        }
 
+       /* set fence to a non valid value */
+       pkt->fence = UINT_MAX;
+
        rc = hl_hw_queue_send_cb_no_cmpl(hdev, hw_queue_id, len, pkt_dma_addr);
        if (rc) {
                dev_err(hdev->dev, "Failed to send CB on CPU PQ (%d)\n", rc);
                goto out;
        }
 
+       if (hdev->asic_prop.fw_app_security_map &
+                       CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN)
+               expected_ack_val = queue->pi;
+       else
+               expected_ack_val = CPUCP_PACKET_FENCE_VAL;
+
        rc = hl_poll_timeout_memory(hdev, &pkt->fence, tmp,
-                               (tmp == CPUCP_PACKET_FENCE_VAL), 1000,
+                               (tmp == expected_ack_val), 1000,
                                timeout, true);
 
        hl_hw_queue_inc_ci_kernel(hdev, hw_queue_id);
@@ -279,8 +289,74 @@ int hl_fw_send_heartbeat(struct hl_device *hdev)
        return rc;
 }
 
+static int fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
+               u32 cpu_security_boot_status_reg)
+{
+       u32 err_val, security_val;
+
+       /* Some of the firmware status codes are deprecated in newer f/w
+        * versions. In those versions, the errors are reported
+        * in different registers. Therefore, we need to check those
+        * registers and print the exact errors. Moreover, there
+        * may be multiple errors, so we need to report on each error
+        * separately. Some of the error codes might indicate a state
+        * that is not an error per-se, but it is an error in production
+        * environment
+        */
+       err_val = RREG32(boot_err0_reg);
+       if (!(err_val & CPU_BOOT_ERR0_ENABLED))
+               return 0;
+
+       if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
+               dev_err(hdev->dev,
+                       "Device boot error - DRAM initialization failed\n");
+       if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
+               dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
+       if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
+               dev_err(hdev->dev,
+                       "Device boot error - Thermal Sensor initialization failed\n");
+       if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
+               dev_warn(hdev->dev,
+                       "Device boot warning - Skipped DRAM initialization\n");
+
+       if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED) {
+               if (hdev->bmc_enable)
+                       dev_warn(hdev->dev,
+                               "Device boot error - Skipped waiting for BMC\n");
+               else
+                       err_val &= ~CPU_BOOT_ERR0_BMC_WAIT_SKIPPED;
+       }
+
+       if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
+               dev_err(hdev->dev,
+                       "Device boot error - Serdes data from BMC not available\n");
+       if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
+               dev_err(hdev->dev,
+                       "Device boot error - NIC F/W initialization failed\n");
+       if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
+               dev_warn(hdev->dev,
+                       "Device boot warning - security not ready\n");
+       if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
+               dev_err(hdev->dev, "Device boot error - security failure\n");
+       if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
+               dev_err(hdev->dev, "Device boot error - eFuse failure\n");
+       if (err_val & CPU_BOOT_ERR0_PLL_FAIL)
+               dev_err(hdev->dev, "Device boot error - PLL failure\n");
+
+       security_val = RREG32(cpu_security_boot_status_reg);
+       if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
+               dev_dbg(hdev->dev, "Device security status %#x\n",
+                               security_val);
+
+       if (err_val & ~CPU_BOOT_ERR0_ENABLED)
+               return -EIO;
+
+       return 0;
+}
+
 int hl_fw_cpucp_info_get(struct hl_device *hdev,
-                       u32 cpu_security_boot_status_reg)
+                       u32 cpu_security_boot_status_reg,
+                       u32 boot_err0_reg)
 {
        struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct cpucp_packet pkt = {};
@@ -314,6 +390,12 @@ int hl_fw_cpucp_info_get(struct hl_device *hdev,
                goto out;
        }
 
+       rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+       if (rc) {
+               dev_err(hdev->dev, "Errors in device boot\n");
+               goto out;
+       }
+
        memcpy(&prop->cpucp_info, cpucp_info_cpu_addr,
                        sizeof(prop->cpucp_info));
 
@@ -483,58 +565,6 @@ int hl_fw_cpucp_pll_info_get(struct hl_device *hdev, u16 pll_index,
        return rc;
 }
 
-static void fw_read_errors(struct hl_device *hdev, u32 boot_err0_reg,
-               u32 cpu_security_boot_status_reg)
-{
-       u32 err_val, security_val;
-
-       /* Some of the firmware status codes are deprecated in newer f/w
-        * versions. In those versions, the errors are reported
-        * in different registers. Therefore, we need to check those
-        * registers and print the exact errors. Moreover, there
-        * may be multiple errors, so we need to report on each error
-        * separately. Some of the error codes might indicate a state
-        * that is not an error per-se, but it is an error in production
-        * environment
-        */
-       err_val = RREG32(boot_err0_reg);
-       if (!(err_val & CPU_BOOT_ERR0_ENABLED))
-               return;
-
-       if (err_val & CPU_BOOT_ERR0_DRAM_INIT_FAIL)
-               dev_err(hdev->dev,
-                       "Device boot error - DRAM initialization failed\n");
-       if (err_val & CPU_BOOT_ERR0_FIT_CORRUPTED)
-               dev_err(hdev->dev, "Device boot error - FIT image corrupted\n");
-       if (err_val & CPU_BOOT_ERR0_TS_INIT_FAIL)
-               dev_err(hdev->dev,
-                       "Device boot error - Thermal Sensor initialization failed\n");
-       if (err_val & CPU_BOOT_ERR0_DRAM_SKIPPED)
-               dev_warn(hdev->dev,
-                       "Device boot warning - Skipped DRAM initialization\n");
-       if (err_val & CPU_BOOT_ERR0_BMC_WAIT_SKIPPED)
-               dev_warn(hdev->dev,
-                       "Device boot error - Skipped waiting for BMC\n");
-       if (err_val & CPU_BOOT_ERR0_NIC_DATA_NOT_RDY)
-               dev_err(hdev->dev,
-                       "Device boot error - Serdes data from BMC not available\n");
-       if (err_val & CPU_BOOT_ERR0_NIC_FW_FAIL)
-               dev_err(hdev->dev,
-                       "Device boot error - NIC F/W initialization failed\n");
-       if (err_val & CPU_BOOT_ERR0_SECURITY_NOT_RDY)
-               dev_warn(hdev->dev,
-                       "Device boot warning - security not ready\n");
-       if (err_val & CPU_BOOT_ERR0_SECURITY_FAIL)
-               dev_err(hdev->dev, "Device boot error - security failure\n");
-       if (err_val & CPU_BOOT_ERR0_EFUSE_FAIL)
-               dev_err(hdev->dev, "Device boot error - eFuse failure\n");
-
-       security_val = RREG32(cpu_security_boot_status_reg);
-       if (security_val & CPU_BOOT_DEV_STS0_ENABLED)
-               dev_dbg(hdev->dev, "Device security status %#x\n",
-                               security_val);
-}
-
 static void detect_cpu_boot_status(struct hl_device *hdev, u32 status)
 {
        /* Some of the status codes below are deprecated in newer f/w
@@ -659,6 +689,9 @@ int hl_fw_read_preboot_status(struct hl_device *hdev, u32 cpu_boot_status_reg,
                prop->fw_security_disabled = true;
        }
 
+       dev_dbg(hdev->dev, "Firmware preboot security status %#x\n",
+                       security_status);
+
        dev_dbg(hdev->dev, "Firmware preboot hard-reset is %s\n",
                        prop->hard_reset_done_by_fw ? "enabled" : "disabled");
 
@@ -753,6 +786,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
                if (prop->fw_boot_cpu_security_map &
                                CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
                        prop->hard_reset_done_by_fw = true;
+
+               dev_dbg(hdev->dev,
+                       "Firmware boot CPU security status %#x\n",
+                       prop->fw_boot_cpu_security_map);
        }
 
        dev_dbg(hdev->dev, "Firmware boot CPU hard-reset is %s\n",
@@ -826,6 +863,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
                goto out;
        }
 
+       rc = fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
+       if (rc)
+               return rc;
+
        /* Clear reset status since we need to read again from app */
        prop->hard_reset_done_by_fw = false;
 
@@ -837,6 +878,10 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
                if (prop->fw_app_security_map &
                                CPU_BOOT_DEV_STS0_FW_HARD_RST_EN)
                        prop->hard_reset_done_by_fw = true;
+
+               dev_dbg(hdev->dev,
+                       "Firmware application CPU security status %#x\n",
+                       prop->fw_app_security_map);
        }
 
        dev_dbg(hdev->dev, "Firmware application CPU hard-reset is %s\n",
@@ -844,6 +889,8 @@ int hl_fw_init_cpu(struct hl_device *hdev, u32 cpu_boot_status_reg,
 
        dev_info(hdev->dev, "Successfully loaded firmware to device\n");
 
+       return 0;
+
 out:
        fw_read_errors(hdev, boot_err0_reg, cpu_security_boot_status_reg);
 
index 41af347..4b321e4 100644 (file)
 #define HL_NAME                                "habanalabs"
 
 /* Use upper bits of mmap offset to store habana driver specific information.
- * bits[63:62] - Encode mmap type
+ * bits[63:61] - Encode mmap type
  * bits[45:0]  - mmap offset value
  *
  * NOTE: struct vm_area_struct.vm_pgoff uses offset in pages. Hence, these
  *  defines are w.r.t to PAGE_SIZE
  */
-#define HL_MMAP_TYPE_SHIFT             (62 - PAGE_SHIFT)
-#define HL_MMAP_TYPE_MASK              (0x3ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_SHIFT             (61 - PAGE_SHIFT)
+#define HL_MMAP_TYPE_MASK              (0x7ull << HL_MMAP_TYPE_SHIFT)
+#define HL_MMAP_TYPE_BLOCK             (0x4ull << HL_MMAP_TYPE_SHIFT)
 #define HL_MMAP_TYPE_CB                        (0x2ull << HL_MMAP_TYPE_SHIFT)
 
-#define HL_MMAP_OFFSET_VALUE_MASK      (0x3FFFFFFFFFFFull >> PAGE_SHIFT)
+#define HL_MMAP_OFFSET_VALUE_MASK      (0x1FFFFFFFFFFFull >> PAGE_SHIFT)
 #define HL_MMAP_OFFSET_VALUE_GET(off)  (off & HL_MMAP_OFFSET_VALUE_MASK)
 
 #define HL_PENDING_RESET_PER_SEC       10
@@ -408,6 +409,9 @@ struct hl_mmu_properties {
  * @sync_stream_first_mon: first monitor available for sync stream use
  * @first_available_user_sob: first sob available for the user
  * @first_available_user_mon: first monitor available for the user
+ * @first_available_user_msix_interrupt: first available msix interrupt
+ *                                       reserved for the user
+ * @first_available_cq: first available CQ for the user.
  * @tpc_enabled_mask: which TPCs are enabled.
  * @completion_queues_count: number of completion queues.
  * @fw_security_disabled: true if security measures are disabled in firmware,
@@ -416,6 +420,7 @@ struct hl_mmu_properties {
  *                            from BOOT_DEV_STS0
  * @dram_supports_virtual_memory: is there an MMU towards the DRAM
  * @hard_reset_done_by_fw: true if firmware is handling hard reset flow
+ * @num_functional_hbms: number of functional HBMs in each DCORE.
  */
 struct asic_fixed_properties {
        struct hw_queue_properties      *hw_queues_props;
@@ -468,18 +473,22 @@ struct asic_fixed_properties {
        u16                             sync_stream_first_mon;
        u16                             first_available_user_sob[HL_MAX_DCORES];
        u16                             first_available_user_mon[HL_MAX_DCORES];
+       u16                             first_available_user_msix_interrupt;
+       u16                             first_available_cq[HL_MAX_DCORES];
        u8                              tpc_enabled_mask;
        u8                              completion_queues_count;
        u8                              fw_security_disabled;
        u8                              fw_security_status_valid;
        u8                              dram_supports_virtual_memory;
        u8                              hard_reset_done_by_fw;
+       u8                              num_functional_hbms;
 };
 
 /**
  * struct hl_fence - software synchronization primitive
  * @completion: fence is implemented using completion
  * @refcount: refcount for this fence
+ * @cs_sequence: sequence of the corresponding command submission
  * @error: mark this fence with error
  * @timestamp: timestamp upon completion
  *
@@ -487,6 +496,7 @@ struct asic_fixed_properties {
 struct hl_fence {
        struct completion       completion;
        struct kref             refcount;
+       u64                     cs_sequence;
        int                     error;
        ktime_t                 timestamp;
 };
@@ -846,6 +856,19 @@ enum div_select_defs {
  * @collective_wait_init_cs: Generate collective master/slave packets
  *                           and place them in the relevant cs jobs
  * @collective_wait_create_jobs: allocate collective wait cs jobs
+ * @scramble_addr: Routine to scramble the address prior of mapping it
+ *                 in the MMU.
+ * @descramble_addr: Routine to de-scramble the address prior of
+ *                   showing it to users.
+ * @ack_protection_bits_errors: ack and dump all security violations
+ * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it.
+ *                   also returns the size of the block if caller supplies
+ *                   a valid pointer for it
+ * @hw_block_mmap: mmap a HW block with a given id.
+ * @enable_events_from_fw: send interrupt to firmware to notify them the
+ *                         driver is ready to receive asynchronous events. This
+ *                         function should be called during the first init and
+ *                         after every hard-reset of the device
  */
 struct hl_asic_funcs {
        int (*early_init)(struct hl_device *hdev);
@@ -918,8 +941,8 @@ struct hl_asic_funcs {
        void (*set_clock_gating)(struct hl_device *hdev);
        void (*disable_clock_gating)(struct hl_device *hdev);
        int (*debug_coresight)(struct hl_device *hdev, void *data);
-       bool (*is_device_idle)(struct hl_device *hdev, u64 *mask,
-                               struct seq_file *s);
+       bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr,
+                                       u8 mask_len, struct seq_file *s);
        int (*soft_reset_late_init)(struct hl_device *hdev);
        void (*hw_queues_lock)(struct hl_device *hdev);
        void (*hw_queues_unlock)(struct hl_device *hdev);
@@ -955,6 +978,14 @@ struct hl_asic_funcs {
        int (*collective_wait_create_jobs)(struct hl_device *hdev,
                        struct hl_ctx *ctx, struct hl_cs *cs, u32 wait_queue_id,
                        u32 collective_engine_id);
+       u64 (*scramble_addr)(struct hl_device *hdev, u64 addr);
+       u64 (*descramble_addr)(struct hl_device *hdev, u64 addr);
+       void (*ack_protection_bits_errors)(struct hl_device *hdev);
+       int (*get_hw_block_id)(struct hl_device *hdev, u64 block_addr,
+                               u32 *block_size, u32 *block_id);
+       int (*hw_block_mmap)(struct hl_device *hdev, struct vm_area_struct *vma,
+                       u32 block_id, u32 block_size);
+       void (*enable_events_from_fw)(struct hl_device *hdev);
 };
 
 
@@ -1012,6 +1043,20 @@ struct hl_cs_counters_atomic {
 };
 
 /**
+ * struct hl_pending_cb - pending command buffer structure
+ * @cb_node: cb node in pending cb list
+ * @cb: command buffer to send in next submission
+ * @cb_size: command buffer size
+ * @hw_queue_id: destination queue id
+ */
+struct hl_pending_cb {
+       struct list_head        cb_node;
+       struct hl_cb            *cb;
+       u32                     cb_size;
+       u32                     hw_queue_id;
+};
+
+/**
  * struct hl_ctx - user/kernel context.
  * @mem_hash: holds mapping from virtual address to virtual memory area
  *             descriptor (hl_vm_phys_pg_list or hl_userptr).
@@ -1026,6 +1071,8 @@ struct hl_cs_counters_atomic {
  * @mmu_lock: protects the MMU page tables. Any change to the PGT, modifying the
  *            MMU hash or walking the PGT requires talking this lock.
  * @debugfs_list: node in debugfs list of contexts.
+ * pending_cb_list: list of pending command buffers waiting to be sent upon
+ *                  next user command submission context.
  * @cs_counters: context command submission counters.
  * @cb_va_pool: device VA pool for command buffers which are mapped to the
  *              device's MMU.
@@ -1034,11 +1081,17 @@ struct hl_cs_counters_atomic {
  *                     index to cs_pending array.
  * @dram_default_hops: array that holds all hops addresses needed for default
  *                     DRAM mapping.
+ * @pending_cb_lock: spinlock to protect pending cb list
  * @cs_lock: spinlock to protect cs_sequence.
  * @dram_phys_mem: amount of used physical DRAM memory by this context.
  * @thread_ctx_switch_token: token to prevent multiple threads of the same
  *                             context from running the context switch phase.
  *                             Only a single thread should run it.
+ * @thread_pending_cb_token: token to prevent multiple threads from processing
+ *                             the pending CB list. Only a single thread should
+ *                             process the list since it is protected by a
+ *                             spinlock and we don't want to halt the entire
+ *                             command submission sequence.
  * @thread_ctx_switch_wait_token: token to prevent the threads that didn't run
  *                             the context switch phase from moving to their
  *                             execution phase before the context switch phase
@@ -1057,13 +1110,16 @@ struct hl_ctx {
        struct mutex                    mem_hash_lock;
        struct mutex                    mmu_lock;
        struct list_head                debugfs_list;
+       struct list_head                pending_cb_list;
        struct hl_cs_counters_atomic    cs_counters;
        struct gen_pool                 *cb_va_pool;
        u64                             cs_sequence;
        u64                             *dram_default_hops;
+       spinlock_t                      pending_cb_lock;
        spinlock_t                      cs_lock;
        atomic64_t                      dram_phys_mem;
        atomic_t                        thread_ctx_switch_token;
+       atomic_t                        thread_pending_cb_token;
        u32                             thread_ctx_switch_wait_token;
        u32                             asid;
        u32                             handle;
@@ -1124,8 +1180,11 @@ struct hl_userptr {
  * @finish_work: workqueue object to run when CS is completed by H/W.
  * @work_tdr: delayed work node for TDR.
  * @mirror_node : node in device mirror list of command submissions.
+ * @staged_cs_node: node in the staged cs list.
  * @debugfs_list: node in debugfs list of command submissions.
  * @sequence: the sequence number of this CS.
+ * @staged_sequence: the sequence of the staged submission this CS is part of,
+ *                   relevant only if staged_cs is set.
  * @type: CS_TYPE_*.
  * @submitted: true if CS was submitted to H/W.
  * @completed: true if CS was completed by device.
@@ -1133,7 +1192,11 @@ struct hl_userptr {
  * @tdr_active: true if TDR was activated for this CS (to prevent
  *             double TDR activation).
  * @aborted: true if CS was aborted due to some device error.
- * @timestamp: true if a timestmap must be captured upon completion
+ * @timestamp: true if a timestmap must be captured upon completion.
+ * @staged_last: true if this is the last staged CS and needs completion.
+ * @staged_first: true if this is the first staged CS and we need to receive
+ *                timeout for this CS.
+ * @staged_cs: true if this CS is part of a staged submission.
  */
 struct hl_cs {
        u16                     *jobs_in_queue_cnt;
@@ -1146,8 +1209,10 @@ struct hl_cs {
        struct work_struct      finish_work;
        struct delayed_work     work_tdr;
        struct list_head        mirror_node;
+       struct list_head        staged_cs_node;
        struct list_head        debugfs_list;
        u64                     sequence;
+       u64                     staged_sequence;
        enum hl_cs_type         type;
        u8                      submitted;
        u8                      completed;
@@ -1155,6 +1220,9 @@ struct hl_cs {
        u8                      tdr_active;
        u8                      aborted;
        u8                      timestamp;
+       u8                      staged_last;
+       u8                      staged_first;
+       u8                      staged_cs;
 };
 
 /**
@@ -1225,6 +1293,7 @@ struct hl_cs_job {
  *                    MSG_PROT packets. Relevant only for GAUDI as GOYA doesn't
  *                    have streams so the engine can't be busy by another
  *                    stream.
+ * @completion: true if we need completion for this CS.
  */
 struct hl_cs_parser {
        struct hl_cb            *user_cb;
@@ -1239,6 +1308,7 @@ struct hl_cs_parser {
        u8                      job_id;
        u8                      is_kernel_allocated_cb;
        u8                      contains_dma_pkt;
+       u8                      completion;
 };
 
 /*
@@ -1395,12 +1465,10 @@ struct hl_info_list {
 
 /**
  * struct hl_debugfs_entry - debugfs dentry wrapper.
- * @dent: base debugfs entry structure.
  * @info_ent: dentry realted ops.
  * @dev_entry: ASIC specific debugfs manager.
  */
 struct hl_debugfs_entry {
-       struct dentry                   *dent;
        const struct hl_info_list       *info_ent;
        struct hl_dbg_device_entry      *dev_entry;
 };
@@ -1688,12 +1756,20 @@ struct hl_mmu_per_hop_info {
  * struct hl_mmu_hop_info - A structure describing the TLB hops and their
  * hop-entries that were created in order to translate a virtual address to a
  * physical one.
+ * @scrambled_vaddr: The value of the virtual address after scrambling. This
+ *                   address replaces the original virtual-address when mapped
+ *                   in the MMU tables.
+ * @unscrambled_paddr: The un-scrambled physical address.
  * @hop_info: Array holding the per-hop information used for the translation.
  * @used_hops: The number of hops used for the translation.
+ * @range_type: virtual address range type.
  */
 struct hl_mmu_hop_info {
+       u64 scrambled_vaddr;
+       u64 unscrambled_paddr;
        struct hl_mmu_per_hop_info hop_info[MMU_ARCH_5_HOPS];
        u32 used_hops;
+       enum hl_va_range_type range_type;
 };
 
 /**
@@ -1766,7 +1842,6 @@ struct hl_mmu_funcs {
  * @asic_funcs: ASIC specific functions.
  * @asic_specific: ASIC specific information to use only from ASIC files.
  * @vm: virtual memory manager for MMU.
- * @mmu_cache_lock: protects MMU cache invalidation as it can serve one context.
  * @hwmon_dev: H/W monitor device.
  * @pm_mng_profile: current power management profile.
  * @hl_chip_info: ASIC's sensors information.
@@ -1844,6 +1919,7 @@ struct hl_mmu_funcs {
  *                          user processes
  * @device_fini_pending: true if device_fini was called and might be
  *                       waiting for the reset thread to finish
+ * @supports_staged_submission: true if staged submissions are supported
  */
 struct hl_device {
        struct pci_dev                  *pdev;
@@ -1881,7 +1957,6 @@ struct hl_device {
        const struct hl_asic_funcs      *asic_funcs;
        void                            *asic_specific;
        struct hl_vm                    vm;
-       struct mutex                    mmu_cache_lock;
        struct device                   *hwmon_dev;
        enum hl_pm_mng_profile          pm_mng_profile;
        struct hwmon_chip_info          *hl_chip_info;
@@ -1950,6 +2025,7 @@ struct hl_device {
        u8                              needs_reset;
        u8                              process_kill_trial_cnt;
        u8                              device_fini_pending;
+       u8                              supports_staged_submission;
 
        /* Parameters for bring-up */
        u64                             nic_ports_mask;
@@ -2067,7 +2143,7 @@ int hl_hw_queue_send_cb_no_cmpl(struct hl_device *hdev, u32 hw_queue_id,
 int hl_hw_queue_schedule_cs(struct hl_cs *cs);
 u32 hl_hw_queue_add_ptr(u32 ptr, u16 val);
 void hl_hw_queue_inc_ci_kernel(struct hl_device *hdev, u32 hw_queue_id);
-void hl_int_hw_queue_update_ci(struct hl_cs *cs);
+void hl_hw_queue_update_ci(struct hl_cs *cs);
 void hl_hw_queue_reset(struct hl_device *hdev, bool hard_reset);
 
 #define hl_queue_inc_ptr(p)            hl_hw_queue_add_ptr(p, 1)
@@ -2123,6 +2199,7 @@ int hl_cb_create(struct hl_device *hdev, struct hl_cb_mgr *mgr,
                        bool map_cb, u64 *handle);
 int hl_cb_destroy(struct hl_device *hdev, struct hl_cb_mgr *mgr, u64 cb_handle);
 int hl_cb_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma);
 struct hl_cb *hl_cb_get(struct hl_device *hdev,        struct hl_cb_mgr *mgr,
                        u32 handle);
 void hl_cb_put(struct hl_cb *cb);
@@ -2136,6 +2213,7 @@ int hl_cb_va_pool_init(struct hl_ctx *ctx);
 void hl_cb_va_pool_fini(struct hl_ctx *ctx);
 
 void hl_cs_rollback_all(struct hl_device *hdev);
+void hl_pending_cb_list_flush(struct hl_ctx *ctx);
 struct hl_cs_job *hl_cs_allocate_job(struct hl_device *hdev,
                enum hl_queue_type queue_type, bool is_kernel_allocated_cb);
 void hl_sob_reset_error(struct kref *ref);
@@ -2143,6 +2221,10 @@ int hl_gen_sob_mask(u16 sob_base, u8 sob_mask, u8 *mask);
 void hl_fence_put(struct hl_fence *fence);
 void hl_fence_get(struct hl_fence *fence);
 void cs_get(struct hl_cs *cs);
+bool cs_needs_completion(struct hl_cs *cs);
+bool cs_needs_timeout(struct hl_cs *cs);
+bool is_staged_cs_last_exists(struct hl_device *hdev, struct hl_cs *cs);
+struct hl_cs *hl_staged_cs_find_first(struct hl_device *hdev, u64 cs_seq);
 
 void goya_set_asic_funcs(struct hl_device *hdev);
 void gaudi_set_asic_funcs(struct hl_device *hdev);
@@ -2184,6 +2266,8 @@ void hl_mmu_v1_set_funcs(struct hl_device *hdev, struct hl_mmu_funcs *mmu);
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr);
 int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
                        struct hl_mmu_hop_info *hops);
+u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr);
+u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr);
 bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr);
 
 int hl_fw_load_fw_to_device(struct hl_device *hdev, const char *fw_name,
@@ -2201,7 +2285,8 @@ void hl_fw_cpu_accessible_dma_pool_free(struct hl_device *hdev, size_t size,
                                        void *vaddr);
 int hl_fw_send_heartbeat(struct hl_device *hdev);
 int hl_fw_cpucp_info_get(struct hl_device *hdev,
-                       u32 cpu_security_boot_status_reg);
+                       u32 cpu_security_boot_status_reg,
+                       u32 boot_err0_reg);
 int hl_fw_get_eeprom_data(struct hl_device *hdev, void *data, size_t max_size);
 int hl_fw_cpucp_pci_counters_get(struct hl_device *hdev,
                struct hl_info_pci_counters *counters);
index d25892d..083a309 100644 (file)
@@ -5,6 +5,8 @@
  * All Rights Reserved.
  */
 
+#define pr_fmt(fmt)    "habanalabs: " fmt
+
 #include <uapi/misc/habanalabs.h>
 #include "habanalabs.h"
 
@@ -57,12 +59,23 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
 
        hw_ip.device_id = hdev->asic_funcs->get_pci_id(hdev);
        hw_ip.sram_base_address = prop->sram_user_base_address;
-       hw_ip.dram_base_address = prop->dram_user_base_address;
+       hw_ip.dram_base_address =
+                       hdev->mmu_enable && prop->dram_supports_virtual_memory ?
+                       prop->dmmu.start_addr : prop->dram_user_base_address;
        hw_ip.tpc_enabled_mask = prop->tpc_enabled_mask;
        hw_ip.sram_size = prop->sram_size - sram_kmd_size;
-       hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+
+       if (hdev->mmu_enable)
+               hw_ip.dram_size =
+                       DIV_ROUND_DOWN_ULL(prop->dram_size - dram_kmd_size,
+                                               prop->dram_page_size) *
+                                                       prop->dram_page_size;
+       else
+               hw_ip.dram_size = prop->dram_size - dram_kmd_size;
+
        if (hw_ip.dram_size > PAGE_SIZE)
                hw_ip.dram_enabled = 1;
+       hw_ip.dram_page_size = prop->dram_page_size;
        hw_ip.num_of_events = prop->num_of_events;
 
        memcpy(hw_ip.cpucp_version, prop->cpucp_info.cpucp_version,
@@ -79,6 +92,8 @@ static int hw_ip_info(struct hl_device *hdev, struct hl_info_args *args)
        hw_ip.psoc_pci_pll_od = prop->psoc_pci_pll_od;
        hw_ip.psoc_pci_pll_div_factor = prop->psoc_pci_pll_div_factor;
 
+       hw_ip.first_available_interrupt_id =
+                       prop->first_available_user_msix_interrupt;
        return copy_to_user(out, &hw_ip,
                min((size_t)size, sizeof(hw_ip))) ? -EFAULT : 0;
 }
@@ -132,9 +147,10 @@ static int hw_idle(struct hl_device *hdev, struct hl_info_args *args)
                return -EINVAL;
 
        hw_idle.is_idle = hdev->asic_funcs->is_device_idle(hdev,
-                                       &hw_idle.busy_engines_mask_ext, NULL);
+                                       hw_idle.busy_engines_mask_ext,
+                                       HL_BUSY_ENGINES_MASK_EXT_SIZE, NULL);
        hw_idle.busy_engines_mask =
-                       lower_32_bits(hw_idle.busy_engines_mask_ext);
+                       lower_32_bits(hw_idle.busy_engines_mask_ext[0]);
 
        return copy_to_user(out, &hw_idle,
                min((size_t) max_size, sizeof(hw_idle))) ? -EFAULT : 0;
@@ -383,7 +399,8 @@ static int sync_manager_info(struct hl_fpriv *hpriv, struct hl_info_args *args)
                        prop->first_available_user_sob[args->dcore_id];
        sm_info.first_available_monitor =
                        prop->first_available_user_mon[args->dcore_id];
-
+       sm_info.first_available_cq =
+                       prop->first_available_cq[args->dcore_id];
 
        return copy_to_user(out, &sm_info, min_t(size_t, (size_t) max_size,
                        sizeof(sm_info))) ? -EFAULT : 0;
@@ -667,6 +684,11 @@ long hl_ioctl(struct file *filep, unsigned int cmd, unsigned long arg)
        const struct hl_ioctl_desc *ioctl = NULL;
        unsigned int nr = _IOC_NR(cmd);
 
+       if (!hdev) {
+               pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
+               return -ENODEV;
+       }
+
        if ((nr >= HL_COMMAND_START) && (nr < HL_COMMAND_END)) {
                ioctl = &hl_ioctls[nr];
        } else {
@@ -685,6 +707,11 @@ long hl_ioctl_control(struct file *filep, unsigned int cmd, unsigned long arg)
        const struct hl_ioctl_desc *ioctl = NULL;
        unsigned int nr = _IOC_NR(cmd);
 
+       if (!hdev) {
+               pr_err_ratelimited("Sending ioctl after device was removed! Please close FD\n");
+               return -ENODEV;
+       }
+
        if (nr == _IOC_NR(HL_IOCTL_INFO)) {
                ioctl = &hl_ioctls_control[nr];
        } else {
index 7621725..0f33518 100644 (file)
@@ -38,7 +38,7 @@ static inline int queue_free_slots(struct hl_hw_queue *q, u32 queue_len)
                return (abs(delta) - queue_len);
 }
 
-void hl_int_hw_queue_update_ci(struct hl_cs *cs)
+void hl_hw_queue_update_ci(struct hl_cs *cs)
 {
        struct hl_device *hdev = cs->ctx->hdev;
        struct hl_hw_queue *q;
@@ -53,8 +53,13 @@ void hl_int_hw_queue_update_ci(struct hl_cs *cs)
        if (!hdev->asic_prop.max_queues || q->queue_type == QUEUE_TYPE_HW)
                return;
 
+       /* We must increment CI for every queue that will never get a
+        * completion, there are 2 scenarios this can happen:
+        * 1. All queues of a non completion CS will never get a completion.
+        * 2. Internal queues never gets completion.
+        */
        for (i = 0 ; i < hdev->asic_prop.max_queues ; i++, q++) {
-               if (q->queue_type == QUEUE_TYPE_INT)
+               if (!cs_needs_completion(cs) || q->queue_type == QUEUE_TYPE_INT)
                        atomic_add(cs->jobs_in_queue_cnt[i], &q->ci);
        }
 }
@@ -292,6 +297,10 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
        len = job->job_cb_size;
        ptr = cb->bus_address;
 
+       /* Skip completion flow in case this is a non completion CS */
+       if (!cs_needs_completion(job->cs))
+               goto submit_bd;
+
        cq_pkt.data = cpu_to_le32(
                        ((q->pi << CQ_ENTRY_SHADOW_INDEX_SHIFT)
                                & CQ_ENTRY_SHADOW_INDEX_MASK) |
@@ -318,6 +327,7 @@ static void ext_queue_schedule_job(struct hl_cs_job *job)
 
        cq->pi = hl_cq_inc_ptr(cq->pi);
 
+submit_bd:
        ext_and_hw_queue_submit_bd(hdev, q, ctl, len, ptr);
 }
 
@@ -525,6 +535,7 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
        struct hl_cs_job *job, *tmp;
        struct hl_hw_queue *q;
        int rc = 0, i, cq_cnt;
+       bool first_entry;
        u32 max_queues;
 
        cntr = &hdev->aggregated_cs_counters;
@@ -548,7 +559,9 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
                        switch (q->queue_type) {
                        case QUEUE_TYPE_EXT:
                                rc = ext_queue_sanity_checks(hdev, q,
-                                               cs->jobs_in_queue_cnt[i], true);
+                                               cs->jobs_in_queue_cnt[i],
+                                               cs_needs_completion(cs) ?
+                                                               true : false);
                                break;
                        case QUEUE_TYPE_INT:
                                rc = int_queue_sanity_checks(hdev, q,
@@ -583,12 +596,38 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
                hdev->asic_funcs->collective_wait_init_cs(cs);
 
        spin_lock(&hdev->cs_mirror_lock);
+
+       /* Verify staged CS exists and add to the staged list */
+       if (cs->staged_cs && !cs->staged_first) {
+               struct hl_cs *staged_cs;
+
+               staged_cs = hl_staged_cs_find_first(hdev, cs->staged_sequence);
+               if (!staged_cs) {
+                       dev_err(hdev->dev,
+                               "Cannot find staged submission sequence %llu",
+                               cs->staged_sequence);
+                       rc = -EINVAL;
+                       goto unlock_cs_mirror;
+               }
+
+               if (is_staged_cs_last_exists(hdev, staged_cs)) {
+                       dev_err(hdev->dev,
+                               "Staged submission sequence %llu already submitted",
+                               cs->staged_sequence);
+                       rc = -EINVAL;
+                       goto unlock_cs_mirror;
+               }
+
+               list_add_tail(&cs->staged_cs_node, &staged_cs->staged_cs_node);
+       }
+
        list_add_tail(&cs->mirror_node, &hdev->cs_mirror_list);
 
        /* Queue TDR if the CS is the first entry and if timeout is wanted */
+       first_entry = list_first_entry(&hdev->cs_mirror_list,
+                                       struct hl_cs, mirror_node) == cs;
        if ((hdev->timeout_jiffies != MAX_SCHEDULE_TIMEOUT) &&
-                       (list_first_entry(&hdev->cs_mirror_list,
-                                       struct hl_cs, mirror_node) == cs)) {
+                               first_entry && cs_needs_timeout(cs)) {
                cs->tdr_active = true;
                schedule_delayed_work(&cs->work_tdr, hdev->timeout_jiffies);
 
@@ -623,6 +662,8 @@ int hl_hw_queue_schedule_cs(struct hl_cs *cs)
 
        goto out;
 
+unlock_cs_mirror:
+       spin_unlock(&hdev->cs_mirror_lock);
 unroll_cq_resv:
        q = &hdev->kernel_queues[0];
        for (i = 0 ; (i < max_queues) && (cq_cnt > 0) ; i++, q++) {
index de53fb5..44a0522 100644 (file)
@@ -47,7 +47,7 @@ inline u32 hl_cq_inc_ptr(u32 ptr)
  * Increment ptr by 1. If it reaches the number of event queue
  * entries, set it to 0
  */
-inline u32 hl_eq_inc_ptr(u32 ptr)
+static inline u32 hl_eq_inc_ptr(u32 ptr)
 {
        ptr++;
        if (unlikely(ptr == HL_EQ_LENGTH))
index 245c015..1f59105 100644 (file)
@@ -14,6 +14,9 @@
 
 #define HL_MMU_DEBUG   0
 
+/* use small pages for supporting non-pow2 (32M/40M/48M) DRAM phys page sizes */
+#define DRAM_POOL_PAGE_SIZE SZ_8M
+
 /*
  * The va ranges in context object contain a list with the available chunks of
  * device virtual memory.
  */
 
 /*
- * alloc_device_memory - allocate device memory
- *
- * @ctx                 : current context
- * @args                : host parameters containing the requested size
- * @ret_handle          : result handle
+ * alloc_device_memory() - allocate device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
+ * @ret_handle: result handle.
  *
  * This function does the following:
- * - Allocate the requested size rounded up to 'dram_page_size' pages
- * - Return unique handle
+ * - Allocate the requested size rounded up to 'dram_page_size' pages.
+ * - Return unique handle for later map/unmap/free.
  */
 static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
                                u32 *ret_handle)
@@ -55,15 +57,14 @@ static int alloc_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args,
        struct hl_vm *vm = &hdev->vm;
        struct hl_vm_phys_pg_pack *phys_pg_pack;
        u64 paddr = 0, total_size, num_pgs, i;
-       u32 num_curr_pgs, page_size, page_shift;
+       u32 num_curr_pgs, page_size;
        int handle, rc;
        bool contiguous;
 
        num_curr_pgs = 0;
        page_size = hdev->asic_prop.dram_page_size;
-       page_shift = __ffs(page_size);
-       num_pgs = (args->alloc.mem_size + (page_size - 1)) >> page_shift;
-       total_size = num_pgs << page_shift;
+       num_pgs = DIV_ROUND_UP_ULL(args->alloc.mem_size, page_size);
+       total_size = num_pgs * page_size;
 
        if (!total_size) {
                dev_err(hdev->dev, "Cannot allocate 0 bytes\n");
@@ -182,17 +183,17 @@ pages_pack_err:
        return rc;
 }
 
-/*
- * dma_map_host_va - DMA mapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @p_userptr: pointer to result userptr structure
+/**
+ * dma_map_host_va() - DMA mapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @p_userptr: pointer to result userptr structure.
  *
  * This function does the following:
- * - Allocate userptr structure
- * - Pin the given host memory using the userptr structure
- * - Perform DMA mapping to have the DMA addresses of the pages
+ * - Allocate userptr structure.
+ * - Pin the given host memory using the userptr structure.
+ * - Perform DMA mapping to have the DMA addresses of the pages.
  */
 static int dma_map_host_va(struct hl_device *hdev, u64 addr, u64 size,
                                struct hl_userptr **p_userptr)
@@ -236,14 +237,14 @@ userptr_err:
        return rc;
 }
 
-/*
- * dma_unmap_host_va - DMA unmapping of the given host virtual address.
- * @hdev: habanalabs device structure
- * @userptr: userptr to free
+/**
+ * dma_unmap_host_va() - DMA unmapping of the given host virtual address.
+ * @hdev: habanalabs device structure.
+ * @userptr: userptr to free.
  *
  * This function does the following:
- * - Unpins the physical pages
- * - Frees the userptr structure
+ * - Unpins the physical pages.
+ * - Frees the userptr structure.
  */
 static void dma_unmap_host_va(struct hl_device *hdev,
                                struct hl_userptr *userptr)
@@ -252,14 +253,13 @@ static void dma_unmap_host_va(struct hl_device *hdev,
        kfree(userptr);
 }
 
-/*
- * dram_pg_pool_do_release - free DRAM pages pool
- *
- * @ref                 : pointer to reference object
+/**
+ * dram_pg_pool_do_release() - free DRAM pages pool
+ * @ref: pointer to reference object.
  *
  * This function does the following:
- * - Frees the idr structure of physical pages handles
- * - Frees the generic pool of DRAM physical pages
+ * - Frees the idr structure of physical pages handles.
+ * - Frees the generic pool of DRAM physical pages.
  */
 static void dram_pg_pool_do_release(struct kref *ref)
 {
@@ -274,15 +274,15 @@ static void dram_pg_pool_do_release(struct kref *ref)
        gen_pool_destroy(vm->dram_pg_pool);
 }
 
-/*
- * free_phys_pg_pack - free physical page pack
- * @hdev: habanalabs device structure
- * @phys_pg_pack: physical page pack to free
+/**
+ * free_phys_pg_pack() - free physical page pack.
+ * @hdev: habanalabs device structure.
+ * @phys_pg_pack: physical page pack to free.
  *
  * This function does the following:
  * - For DRAM memory only, iterate over the pack and free each physical block
- *   structure by returning it to the general pool
- * - Free the hl_vm_phys_pg_pack structure
+ *   structure by returning it to the general pool.
+ * - Free the hl_vm_phys_pg_pack structure.
  */
 static void free_phys_pg_pack(struct hl_device *hdev,
                                struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -313,20 +313,20 @@ static void free_phys_pg_pack(struct hl_device *hdev,
        kfree(phys_pg_pack);
 }
 
-/*
- * free_device_memory - free device memory
- *
- * @ctx                  : current context
- * @handle              : handle of the memory chunk to free
+/**
+ * free_device_memory() - free device memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters containing the requested size.
  *
  * This function does the following:
- * - Free the device memory related to the given handle
+ * - Free the device memory related to the given handle.
  */
-static int free_device_memory(struct hl_ctx *ctx, u32 handle)
+static int free_device_memory(struct hl_ctx *ctx, struct hl_mem_in *args)
 {
        struct hl_device *hdev = ctx->hdev;
        struct hl_vm *vm = &hdev->vm;
        struct hl_vm_phys_pg_pack *phys_pg_pack;
+       u32 handle = args->free.handle;
 
        spin_lock(&vm->idr_lock);
        phys_pg_pack = idr_find(&vm->phys_pg_pack_handles, handle);
@@ -361,16 +361,15 @@ static int free_device_memory(struct hl_ctx *ctx, u32 handle)
        return 0;
 }
 
-/*
- * clear_va_list_locked - free virtual addresses list
- *
- * @hdev                : habanalabs device structure
- * @va_list             : list of virtual addresses to free
+/**
+ * clear_va_list_locked() - free virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to free.
  *
  * This function does the following:
- * - Iterate over the list and free each virtual addresses block
+ * - Iterate over the list and free each virtual addresses block.
  *
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
  */
 static void clear_va_list_locked(struct hl_device *hdev,
                struct list_head *va_list)
@@ -383,16 +382,15 @@ static void clear_va_list_locked(struct hl_device *hdev,
        }
 }
 
-/*
- * print_va_list_locked    - print virtual addresses list
- *
- * @hdev                : habanalabs device structure
- * @va_list             : list of virtual addresses to print
+/**
+ * print_va_list_locked() - print virtual addresses list.
+ * @hdev: habanalabs device structure.
+ * @va_list: list of virtual addresses to print.
  *
  * This function does the following:
- * - Iterate over the list and print each virtual addresses block
+ * - Iterate over the list and print each virtual addresses block.
  *
- * This function should be called only when va_list lock is taken
+ * This function should be called only when va_list lock is taken.
  */
 static void print_va_list_locked(struct hl_device *hdev,
                struct list_head *va_list)
@@ -409,18 +407,17 @@ static void print_va_list_locked(struct hl_device *hdev,
 #endif
 }
 
-/*
- * merge_va_blocks_locked - merge a virtual block if possible
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @va_block            : virtual block to merge with adjacent blocks
+/**
+ * merge_va_blocks_locked() - merge a virtual block if possible.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @va_block: virtual block to merge with adjacent blocks.
  *
  * This function does the following:
  * - Merge the given blocks with the adjacent blocks if their virtual ranges
- *   create a contiguous virtual range
+ *   create a contiguous virtual range.
  *
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
  */
 static void merge_va_blocks_locked(struct hl_device *hdev,
                struct list_head *va_list, struct hl_vm_va_block *va_block)
@@ -445,19 +442,18 @@ static void merge_va_blocks_locked(struct hl_device *hdev,
        }
 }
 
-/*
- * add_va_block_locked - add a virtual block to the virtual addresses list
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @start               : start virtual address
- * @end                 : end virtual address
+/**
+ * add_va_block_locked() - add a virtual block to the virtual addresses list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
  *
  * This function does the following:
- * - Add the given block to the virtual blocks list and merge with other
- * blocks if a contiguous virtual block can be created
+ * - Add the given block to the virtual blocks list and merge with other blocks
+ *   if a contiguous virtual block can be created.
  *
- * This Function should be called only when va_list lock is taken
+ * This Function should be called only when va_list lock is taken.
  */
 static int add_va_block_locked(struct hl_device *hdev,
                struct list_head *va_list, u64 start, u64 end)
@@ -501,16 +497,15 @@ static int add_va_block_locked(struct hl_device *hdev,
        return 0;
 }
 
-/*
- * add_va_block - wrapper for add_va_block_locked
- *
- * @hdev                : pointer to the habanalabs device structure
- * @va_list             : pointer to the virtual addresses block list
- * @start               : start virtual address
- * @end                 : end virtual address
+/**
+ * add_va_block() - wrapper for add_va_block_locked.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_list: pointer to the virtual addresses block list.
+ * @start: start virtual address.
+ * @end: end virtual address.
  *
  * This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
  */
 static inline int add_va_block(struct hl_device *hdev,
                struct hl_va_range *va_range, u64 start, u64 end)
@@ -524,8 +519,9 @@ static inline int add_va_block(struct hl_device *hdev,
        return rc;
 }
 
-/*
+/**
  * get_va_block() - get a virtual block for the given size and alignment.
+ *
  * @hdev: pointer to the habanalabs device structure.
  * @va_range: pointer to the virtual addresses range.
  * @size: requested block size.
@@ -534,33 +530,51 @@ static inline int add_va_block(struct hl_device *hdev,
  *
  * This function does the following:
  * - Iterate on the virtual block list to find a suitable virtual block for the
- *   given size and alignment.
+ *   given size, hint address and alignment.
  * - Reserve the requested block and update the list.
  * - Return the start address of the virtual block.
  */
-static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
-                       u64 size, u64 hint_addr, u32 va_block_align)
+static u64 get_va_block(struct hl_device *hdev,
+                               struct hl_va_range *va_range,
+                               u64 size, u64 hint_addr, u32 va_block_align)
 {
        struct hl_vm_va_block *va_block, *new_va_block = NULL;
-       u64 valid_start, valid_size, prev_start, prev_end, align_mask,
-               res_valid_start = 0, res_valid_size = 0;
+       u64 tmp_hint_addr, valid_start, valid_size, prev_start, prev_end,
+               align_mask, reserved_valid_start = 0, reserved_valid_size = 0;
        bool add_prev = false;
+       bool is_align_pow_2  = is_power_of_2(va_range->page_size);
+
+       if (is_align_pow_2)
+               align_mask = ~((u64)va_block_align - 1);
+       else
+               /*
+                * with non-power-of-2 range we work only with page granularity
+                * and the start address is page aligned,
+                * so no need for alignment checking.
+                */
+               size = DIV_ROUND_UP_ULL(size, va_range->page_size) *
+                                                       va_range->page_size;
 
-       align_mask = ~((u64)va_block_align - 1);
+       tmp_hint_addr = hint_addr;
 
-       /* check if hint_addr is aligned */
-       if (hint_addr & (va_block_align - 1))
+       /* Check if we need to ignore hint address */
+       if ((is_align_pow_2 && (hint_addr & (va_block_align - 1))) ||
+                       (!is_align_pow_2 &&
+                               do_div(tmp_hint_addr, va_range->page_size))) {
+               dev_info(hdev->dev, "Hint address 0x%llx will be ignored\n",
+                                       hint_addr);
                hint_addr = 0;
+       }
 
        mutex_lock(&va_range->lock);
 
        print_va_list_locked(hdev, &va_range->list);
 
        list_for_each_entry(va_block, &va_range->list, node) {
-               /* calc the first possible aligned addr */
+               /* Calc the first possible aligned addr */
                valid_start = va_block->start;
 
-               if (valid_start & (va_block_align - 1)) {
+               if (is_align_pow_2 && (valid_start & (va_block_align - 1))) {
                        valid_start &= align_mask;
                        valid_start += va_block_align;
                        if (valid_start > va_block->end)
@@ -568,35 +582,41 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
                }
 
                valid_size = va_block->end - valid_start;
+               if (valid_size < size)
+                       continue;
 
-               if (valid_size >= size &&
-                       (!new_va_block || valid_size < res_valid_size)) {
+               /* Pick the minimal length block which has the required size */
+               if (!new_va_block || (valid_size < reserved_valid_size)) {
                        new_va_block = va_block;
-                       res_valid_start = valid_start;
-                       res_valid_size = valid_size;
+                       reserved_valid_start = valid_start;
+                       reserved_valid_size = valid_size;
                }
 
                if (hint_addr && hint_addr >= valid_start &&
-                               ((hint_addr + size) <= va_block->end)) {
+                                       (hint_addr + size) <= va_block->end) {
                        new_va_block = va_block;
-                       res_valid_start = hint_addr;
-                       res_valid_size = valid_size;
+                       reserved_valid_start = hint_addr;
+                       reserved_valid_size = valid_size;
                        break;
                }
        }
 
        if (!new_va_block) {
                dev_err(hdev->dev, "no available va block for size %llu\n",
-                               size);
+                                                               size);
                goto out;
        }
 
-       if (res_valid_start > new_va_block->start) {
+       /*
+        * Check if there is some leftover range due to reserving the new
+        * va block, then return it to the main virtual addresses list.
+        */
+       if (reserved_valid_start > new_va_block->start) {
                prev_start = new_va_block->start;
-               prev_end = res_valid_start - 1;
+               prev_end = reserved_valid_start - 1;
 
-               new_va_block->start = res_valid_start;
-               new_va_block->size = res_valid_size;
+               new_va_block->start = reserved_valid_start;
+               new_va_block->size = reserved_valid_size;
 
                add_prev = true;
        }
@@ -617,7 +637,7 @@ static u64 get_va_block(struct hl_device *hdev, struct hl_va_range *va_range,
 out:
        mutex_unlock(&va_range->lock);
 
-       return res_valid_start;
+       return reserved_valid_start;
 }
 
 /*
@@ -644,9 +664,9 @@ u64 hl_reserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
 
 /**
  * hl_get_va_range_type() - get va_range type for the given address and size.
- * @address: The start address of the area we want to validate.
- * @size: The size in bytes of the area we want to validate.
- * @type: returned va_range type
+ * @address: the start address of the area we want to validate.
+ * @size: the size in bytes of the area we want to validate.
+ * @type: returned va_range type.
  *
  * Return: true if the area is inside a valid range, false otherwise.
  */
@@ -667,16 +687,15 @@ static int hl_get_va_range_type(struct hl_ctx *ctx, u64 address, u64 size,
        return -EINVAL;
 }
 
-/*
- * hl_unreserve_va_block - wrapper for add_va_block for unreserving a va block
- *
+/**
+ * hl_unreserve_va_block() - wrapper for add_va_block to unreserve a va block.
  * @hdev: pointer to the habanalabs device structure
- * @ctx: current context
- * @start: start virtual address
- * @end: end virtual address
+ * @ctx: pointer to the context structure.
+ * @start: start virtual address.
+ * @end: end virtual address.
  *
  * This function does the following:
- * - Takes the list lock and calls add_va_block_locked
+ * - Takes the list lock and calls add_va_block_locked.
  */
 int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
                u64 start_addr, u64 size)
@@ -701,11 +720,10 @@ int hl_unreserve_va_block(struct hl_device *hdev, struct hl_ctx *ctx,
        return rc;
 }
 
-/*
- * get_sg_info - get number of pages and the DMA address from SG list
- *
- * @sg                 : the SG list
- * @dma_addr           : pointer to DMA address to return
+/**
+ * get_sg_info() - get number of pages and the DMA address from SG list.
+ * @sg: the SG list.
+ * @dma_addr: pointer to DMA address to return.
  *
  * Calculate the number of consecutive pages described by the SG list. Take the
  * offset of the address in the first page, add to it the length and round it up
@@ -719,17 +737,17 @@ static u32 get_sg_info(struct scatterlist *sg, dma_addr_t *dma_addr)
                        (PAGE_SIZE - 1)) >> PAGE_SHIFT;
 }
 
-/*
- * init_phys_pg_pack_from_userptr - initialize physical page pack from host
- *                                  memory
- * @ctx: current context
- * @userptr: userptr to initialize from
- * @pphys_pg_pack: result pointer
+/**
+ * init_phys_pg_pack_from_userptr() - initialize physical page pack from host
+ *                                    memory
+ * @ctx: pointer to the context structure.
+ * @userptr: userptr to initialize from.
+ * @pphys_pg_pack: result pointer.
  *
  * This function does the following:
- * - Pin the physical pages related to the given virtual block
+ * - Pin the physical pages related to the given virtual block.
  * - Create a physical page pack from the physical pages related to the given
- *   virtual block
+ *   virtual block.
  */
 static int init_phys_pg_pack_from_userptr(struct hl_ctx *ctx,
                                struct hl_userptr *userptr,
@@ -821,16 +839,16 @@ page_pack_arr_mem_err:
        return rc;
 }
 
-/*
- * map_phys_pg_pack - maps the physical page pack.
- * @ctx: current context
- * @vaddr: start address of the virtual area to map from
- * @phys_pg_pack: the pack of physical pages to map to
+/**
+ * map_phys_pg_pack() - maps the physical page pack..
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to map from.
+ * @phys_pg_pack: the pack of physical pages to map to.
  *
  * This function does the following:
- * - Maps each chunk of virtual memory to matching physical chunk
- * - Stores number of successful mappings in the given argument
- * - Returns 0 on success, error code otherwise
+ * - Maps each chunk of virtual memory to matching physical chunk.
+ * - Stores number of successful mappings in the given argument.
+ * - Returns 0 on success, error code otherwise.
  */
 static int map_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
                                struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -875,11 +893,11 @@ err:
        return rc;
 }
 
-/*
- * unmap_phys_pg_pack - unmaps the physical page pack
- * @ctx: current context
- * @vaddr: start address of the virtual area to unmap
- * @phys_pg_pack: the pack of physical pages to unmap
+/**
+ * unmap_phys_pg_pack() - unmaps the physical page pack.
+ * @ctx: pointer to the context structure.
+ * @vaddr: start address of the virtual area to unmap.
+ * @phys_pg_pack: the pack of physical pages to unmap.
  */
 static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
                                struct hl_vm_phys_pg_pack *phys_pg_pack)
@@ -913,7 +931,7 @@ static void unmap_phys_pg_pack(struct hl_ctx *ctx, u64 vaddr,
 }
 
 static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
-                               u64 *paddr)
+                                       u64 *paddr)
 {
        struct hl_device *hdev = ctx->hdev;
        struct hl_vm *vm = &hdev->vm;
@@ -936,19 +954,18 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args,
        return 0;
 }
 
-/*
- * map_device_va - map the given memory
- *
- * @ctx                 : current context
- * @args         : host parameters with handle/host virtual address
- * @device_addr         : pointer to result device virtual address
+/**
+ * map_device_va() - map the given memory.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with handle/host virtual address.
+ * @device_addr: pointer to result device virtual address.
  *
  * This function does the following:
  * - If given a physical device memory handle, map to a device virtual block
- *   and return the start address of this block
+ *   and return the start address of this block.
  * - If given a host virtual address and size, find the related physical pages,
  *   map a device virtual block to this pages and return the start address of
- *   this block
+ *   this block.
  */
 static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
                u64 *device_addr)
@@ -1034,7 +1051,7 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
 
                hint_addr = args->map_device.hint_addr;
 
-               /* DRAM VA alignment is the same as the DRAM page size */
+               /* DRAM VA alignment is the same as the MMU page size */
                va_range = ctx->va_range[HL_VA_RANGE_TYPE_DRAM];
                va_block_align = hdev->asic_prop.dmmu.page_size;
        }
@@ -1125,24 +1142,26 @@ init_page_pack_err:
        return rc;
 }
 
-/*
- * unmap_device_va      - unmap the given device virtual address
- *
- * @ctx                 : current context
- * @vaddr               : device virtual address to unmap
- * @ctx_free            : true if in context free flow, false otherwise.
+/**
+ * unmap_device_va() - unmap the given device virtual address.
+ * @ctx: pointer to the context structure.
+ * @args: host parameters with device virtual address to unmap.
+ * @ctx_free: true if in context free flow, false otherwise.
  *
  * This function does the following:
- * - Unmap the physical pages related to the given virtual address
- * - return the device virtual block to the virtual block list
+ * - unmap the physical pages related to the given virtual address.
+ * - return the device virtual block to the virtual block list.
  */
-static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
+static int unmap_device_va(struct hl_ctx *ctx, struct hl_mem_in *args,
+                               bool ctx_free)
 {
        struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_vm_phys_pg_pack *phys_pg_pack = NULL;
        struct hl_vm_hash_node *hnode = NULL;
        struct hl_userptr *userptr = NULL;
        struct hl_va_range *va_range;
+       u64 vaddr = args->unmap.device_virt_addr;
        enum vm_type_t *vm_type;
        bool is_userptr;
        int rc = 0;
@@ -1201,7 +1220,13 @@ static int unmap_device_va(struct hl_ctx *ctx, u64 vaddr, bool ctx_free)
                goto mapping_cnt_err;
        }
 
-       vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
+       if (!is_userptr && !is_power_of_2(phys_pg_pack->page_size))
+               vaddr = prop->dram_base_address +
+                       DIV_ROUND_DOWN_ULL(vaddr - prop->dram_base_address,
+                                               phys_pg_pack->page_size) *
+                                                       phys_pg_pack->page_size;
+       else
+               vaddr &= ~(((u64) phys_pg_pack->page_size) - 1);
 
        mutex_lock(&ctx->mmu_lock);
 
@@ -1264,12 +1289,90 @@ vm_type_err:
        return rc;
 }
 
+static int map_block(struct hl_device *hdev, u64 address, u64 *handle,
+                       u32 *size)
+{
+       u32 block_id = 0;
+       int rc;
+
+       rc = hdev->asic_funcs->get_hw_block_id(hdev, address, size, &block_id);
+
+       *handle = block_id | HL_MMAP_TYPE_BLOCK;
+       *handle <<= PAGE_SHIFT;
+
+       return rc;
+}
+
+static void hw_block_vm_close(struct vm_area_struct *vma)
+{
+       struct hl_ctx *ctx = (struct hl_ctx *) vma->vm_private_data;
+
+       hl_ctx_put(ctx);
+       vma->vm_private_data = NULL;
+}
+
+static const struct vm_operations_struct hw_block_vm_ops = {
+       .close = hw_block_vm_close
+};
+
+/**
+ * hl_hw_block_mmap() - mmap a hw block to user.
+ * @hpriv: pointer to the private data of the fd
+ * @vma: pointer to vm_area_struct of the process
+ *
+ * Driver increments context reference for every HW block mapped in order
+ * to prevent user from closing FD without unmapping first
+ */
+int hl_hw_block_mmap(struct hl_fpriv *hpriv, struct vm_area_struct *vma)
+{
+       struct hl_device *hdev = hpriv->hdev;
+       u32 block_id, block_size;
+       int rc;
+
+       /* We use the page offset to hold the block id and thus we need to clear
+        * it before doing the mmap itself
+        */
+       block_id = vma->vm_pgoff;
+       vma->vm_pgoff = 0;
+
+       /* Driver only allows mapping of a complete HW block */
+       block_size = vma->vm_end - vma->vm_start;
+
+#ifdef _HAS_TYPE_ARG_IN_ACCESS_OK
+       if (!access_ok(VERIFY_WRITE,
+               (void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#else
+       if (!access_ok((void __user *) (uintptr_t) vma->vm_start, block_size)) {
+#endif
+               dev_err(hdev->dev,
+                       "user pointer is invalid - 0x%lx\n",
+                       vma->vm_start);
+
+               return -EINVAL;
+       }
+
+       vma->vm_ops = &hw_block_vm_ops;
+       vma->vm_private_data = hpriv->ctx;
+
+       hl_ctx_get(hdev, hpriv->ctx);
+
+       rc = hdev->asic_funcs->hw_block_mmap(hdev, vma, block_id, block_size);
+       if (rc) {
+               hl_ctx_put(hpriv->ctx);
+               return rc;
+       }
+
+       vma->vm_pgoff = block_id;
+
+       return 0;
+}
+
 static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
 {
        struct hl_device *hdev = hpriv->hdev;
        struct hl_ctx *ctx = hpriv->ctx;
-       u64 device_addr = 0;
-       u32 handle = 0;
+       u64 block_handle, device_addr = 0;
+       u32 handle = 0, block_size;
        int rc;
 
        switch (args->in.op) {
@@ -1292,7 +1395,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
                break;
 
        case HL_MEM_OP_FREE:
-               rc = free_device_memory(ctx, args->in.free.handle);
+               rc = free_device_memory(ctx, &args->in);
                break;
 
        case HL_MEM_OP_MAP:
@@ -1301,7 +1404,7 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
                        rc = 0;
                } else {
                        rc = get_paddr_from_handle(ctx, &args->in,
-                                       &device_addr);
+                                                       &device_addr);
                }
 
                memset(args, 0, sizeof(*args));
@@ -1312,6 +1415,13 @@ static int mem_ioctl_no_mmu(struct hl_fpriv *hpriv, union hl_mem_args *args)
                rc = 0;
                break;
 
+       case HL_MEM_OP_MAP_BLOCK:
+               rc = map_block(hdev, args->in.map_block.block_addr,
+                               &block_handle, &block_size);
+               args->out.block_handle = block_handle;
+               args->out.block_size = block_size;
+               break;
+
        default:
                dev_err(hdev->dev, "Unknown opcode for memory IOCTL\n");
                rc = -ENOTTY;
@@ -1328,8 +1438,8 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
        union hl_mem_args *args = data;
        struct hl_device *hdev = hpriv->hdev;
        struct hl_ctx *ctx = hpriv->ctx;
-       u64 device_addr = 0;
-       u32 handle = 0;
+       u64 block_handle, device_addr = 0;
+       u32 handle = 0, block_size;
        int rc;
 
        if (!hl_device_operational(hdev, &status)) {
@@ -1400,7 +1510,7 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
                        goto out;
                }
 
-               rc = free_device_memory(ctx, args->in.free.handle);
+               rc = free_device_memory(ctx, &args->in);
                break;
 
        case HL_MEM_OP_MAP:
@@ -1411,8 +1521,14 @@ int hl_mem_ioctl(struct hl_fpriv *hpriv, void *data)
                break;
 
        case HL_MEM_OP_UNMAP:
-               rc = unmap_device_va(ctx, args->in.unmap.device_virt_addr,
-                                       false);
+               rc = unmap_device_va(ctx, &args->in, false);
+               break;
+
+       case HL_MEM_OP_MAP_BLOCK:
+               rc = map_block(hdev, args->in.map_block.block_addr,
+                               &block_handle, &block_size);
+               args->out.block_handle = block_handle;
+               args->out.block_size = block_size;
                break;
 
        default:
@@ -1473,16 +1589,16 @@ destroy_pages:
        return rc;
 }
 
-/*
- * hl_pin_host_memory - pins a chunk of host memory.
- * @hdev: pointer to the habanalabs device structure
- * @addr: the host virtual address of the memory area
- * @size: the size of the memory area
- * @userptr: pointer to hl_userptr structure
+/**
+ * hl_pin_host_memory() - pins a chunk of host memory.
+ * @hdev: pointer to the habanalabs device structure.
+ * @addr: the host virtual address of the memory area.
+ * @size: the size of the memory area.
+ * @userptr: pointer to hl_userptr structure.
  *
  * This function does the following:
- * - Pins the physical pages
- * - Create an SG list from those pages
+ * - Pins the physical pages.
+ * - Create an SG list from those pages.
  */
 int hl_pin_host_memory(struct hl_device *hdev, u64 addr, u64 size,
                                        struct hl_userptr *userptr)
@@ -1571,11 +1687,10 @@ void hl_unpin_host_memory(struct hl_device *hdev, struct hl_userptr *userptr)
        kfree(userptr->sgt);
 }
 
-/*
- * hl_userptr_delete_list - clear userptr list
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr_list        : pointer to the list to clear
+/**
+ * hl_userptr_delete_list() - clear userptr list.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
  *
  * This function does the following:
  * - Iterates over the list and unpins the host memory and frees the userptr
@@ -1594,12 +1709,11 @@ void hl_userptr_delete_list(struct hl_device *hdev,
        INIT_LIST_HEAD(userptr_list);
 }
 
-/*
- * hl_userptr_is_pinned - returns whether the given userptr is pinned
- *
- * @hdev                : pointer to the habanalabs device structure
- * @userptr_list        : pointer to the list to clear
- * @userptr             : pointer to userptr to check
+/**
+ * hl_userptr_is_pinned() - returns whether the given userptr is pinned.
+ * @hdev: pointer to the habanalabs device structure.
+ * @userptr_list: pointer to the list to clear.
+ * @userptr: pointer to userptr to check.
  *
  * This function does the following:
  * - Iterates over the list and checks if the given userptr is in it, means is
@@ -1617,12 +1731,12 @@ bool hl_userptr_is_pinned(struct hl_device *hdev, u64 addr,
        return false;
 }
 
-/*
- * va_range_init - initialize virtual addresses range
- * @hdev: pointer to the habanalabs device structure
- * @va_range: pointer to the range to initialize
- * @start: range start address
- * @end: range end address
+/**
+ * va_range_init() - initialize virtual addresses range.
+ * @hdev: pointer to the habanalabs device structure.
+ * @va_range: pointer to the range to initialize.
+ * @start: range start address.
+ * @end: range end address.
  *
  * This function does the following:
  * - Initializes the virtual addresses list of the given range with the given
@@ -1635,15 +1749,21 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
 
        INIT_LIST_HEAD(&va_range->list);
 
-       /* PAGE_SIZE alignment */
+       /*
+        * PAGE_SIZE alignment
+        * it is the callers responsibility to align the addresses if the
+        * page size is not a power of 2
+        */
 
-       if (start & (PAGE_SIZE - 1)) {
-               start &= PAGE_MASK;
-               start += PAGE_SIZE;
-       }
+       if (is_power_of_2(page_size)) {
+               if (start & (PAGE_SIZE - 1)) {
+                       start &= PAGE_MASK;
+                       start += PAGE_SIZE;
+               }
 
-       if (end & (PAGE_SIZE - 1))
-               end &= PAGE_MASK;
+               if (end & (PAGE_SIZE - 1))
+                       end &= PAGE_MASK;
+       }
 
        if (start >= end) {
                dev_err(hdev->dev, "too small vm range for va list\n");
@@ -1664,13 +1784,13 @@ static int va_range_init(struct hl_device *hdev, struct hl_va_range *va_range,
        return 0;
 }
 
-/*
- * va_range_fini() - clear a virtual addresses range
- * @hdev: pointer to the habanalabs structure
- * va_range: pointer to virtual addresses range
+/**
+ * va_range_fini() - clear a virtual addresses range.
+ * @hdev: pointer to the habanalabs structure.
+ * va_range: pointer to virtual addresses rang.e
  *
  * This function does the following:
- * - Frees the virtual addresses block list and its lock
+ * - Frees the virtual addresses block list and its lock.
  */
 static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
 {
@@ -1682,22 +1802,22 @@ static void va_range_fini(struct hl_device *hdev, struct hl_va_range *va_range)
        kfree(va_range);
 }
 
-/*
- * vm_ctx_init_with_ranges() - initialize virtual memory for context
- * @ctx: pointer to the habanalabs context structure
+/**
+ * vm_ctx_init_with_ranges() - initialize virtual memory for context.
+ * @ctx: pointer to the habanalabs context structure.
  * @host_range_start: host virtual addresses range start.
  * @host_range_end: host virtual addresses range end.
  * @host_huge_range_start: host virtual addresses range start for memory
- *                          allocated with huge pages.
+ *                         allocated with huge pages.
  * @host_huge_range_end: host virtual addresses range end for memory allocated
  *                        with huge pages.
  * @dram_range_start: dram virtual addresses range start.
  * @dram_range_end: dram virtual addresses range end.
  *
  * This function initializes the following:
- * - MMU for context
- * - Virtual address to area descriptor hashtable
- * - Virtual block list of available virtual memory
+ * - MMU for context.
+ * - Virtual address to area descriptor hashtable.
+ * - Virtual block list of available virtual memory.
  */
 static int vm_ctx_init_with_ranges(struct hl_ctx *ctx,
                                        u64 host_range_start,
@@ -1818,7 +1938,8 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
 
        dram_range_start = prop->dmmu.start_addr;
        dram_range_end = prop->dmmu.end_addr;
-       dram_page_size = prop->dmmu.page_size;
+       dram_page_size = prop->dram_page_size ?
+                               prop->dram_page_size : prop->dmmu.page_size;
        host_range_start = prop->pmmu.start_addr;
        host_range_end = prop->pmmu.end_addr;
        host_page_size = prop->pmmu.page_size;
@@ -1832,15 +1953,14 @@ int hl_vm_ctx_init(struct hl_ctx *ctx)
                        dram_range_start, dram_range_end, dram_page_size);
 }
 
-/*
- * hl_vm_ctx_fini       - virtual memory teardown of context
- *
- * @ctx                 : pointer to the habanalabs context structure
+/**
+ * hl_vm_ctx_fini() - virtual memory teardown of context.
+ * @ctx: pointer to the habanalabs context structure.
  *
  * This function perform teardown the following:
- * - Virtual block list of available virtual memory
- * - Virtual address to area descriptor hashtable
- * - MMU for context
+ * - Virtual block list of available virtual memory.
+ * - Virtual address to area descriptor hashtable.
+ * - MMU for context.
  *
  * In addition this function does the following:
  * - Unmaps the existing hashtable nodes if the hashtable is not empty. The
@@ -1859,9 +1979,10 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
        struct hl_vm_phys_pg_pack *phys_pg_list;
        struct hl_vm_hash_node *hnode;
        struct hlist_node *tmp_node;
+       struct hl_mem_in args;
        int i;
 
-       if (!ctx->hdev->mmu_enable)
+       if (!hdev->mmu_enable)
                return;
 
        hl_debugfs_remove_ctx_mem_hash(hdev, ctx);
@@ -1878,13 +1999,18 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
                dev_dbg(hdev->dev,
                        "hl_mem_hash_node of vaddr 0x%llx of asid %d is still alive\n",
                        hnode->vaddr, ctx->asid);
-               unmap_device_va(ctx, hnode->vaddr, true);
+               args.unmap.device_virt_addr = hnode->vaddr;
+               unmap_device_va(ctx, &args, true);
        }
 
+       mutex_lock(&ctx->mmu_lock);
+
        /* invalidate the cache once after the unmapping loop */
        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_USERPTR);
        hdev->asic_funcs->mmu_invalidate_cache(hdev, true, VM_TYPE_PHYS_PACK);
 
+       mutex_unlock(&ctx->mmu_lock);
+
        spin_lock(&vm->idr_lock);
        idr_for_each_entry(&vm->phys_pg_pack_handles, phys_pg_list, i)
                if (phys_pg_list->asid == ctx->asid) {
@@ -1911,19 +2037,19 @@ void hl_vm_ctx_fini(struct hl_ctx *ctx)
         * because the user notifies us on allocations. If the user is no more,
         * all DRAM is available
         */
-       if (!ctx->hdev->asic_prop.dram_supports_virtual_memory)
-               atomic64_set(&ctx->hdev->dram_used_mem, 0);
+       if (ctx->asid != HL_KERNEL_ASID_ID &&
+                       !hdev->asic_prop.dram_supports_virtual_memory)
+               atomic64_set(&hdev->dram_used_mem, 0);
 }
 
-/*
- * hl_vm_init           - initialize virtual memory module
- *
- * @hdev                : pointer to the habanalabs device structure
+/**
+ * hl_vm_init() - initialize virtual memory module.
+ * @hdev: pointer to the habanalabs device structure.
  *
  * This function initializes the following:
- * - MMU module
- * - DRAM physical pages pool of 2MB
- * - Idr for device memory allocation handles
+ * - MMU module.
+ * - DRAM physical pages pool of 2MB.
+ * - Idr for device memory allocation handles.
  */
 int hl_vm_init(struct hl_device *hdev)
 {
@@ -1931,7 +2057,13 @@ int hl_vm_init(struct hl_device *hdev)
        struct hl_vm *vm = &hdev->vm;
        int rc;
 
-       vm->dram_pg_pool = gen_pool_create(__ffs(prop->dram_page_size), -1);
+       if (is_power_of_2(prop->dram_page_size))
+               vm->dram_pg_pool =
+                       gen_pool_create(__ffs(prop->dram_page_size), -1);
+       else
+               vm->dram_pg_pool =
+                       gen_pool_create(__ffs(DRAM_POOL_PAGE_SIZE), -1);
+
        if (!vm->dram_pg_pool) {
                dev_err(hdev->dev, "Failed to create dram page pool\n");
                return -ENOMEM;
@@ -1964,15 +2096,14 @@ pool_add_err:
        return rc;
 }
 
-/*
- * hl_vm_fini           - virtual memory module teardown
- *
- * @hdev                : pointer to the habanalabs device structure
+/**
+ * hl_vm_fini() - virtual memory module teardown.
+ * @hdev: pointer to the habanalabs device structure.
  *
  * This function perform teardown to the following:
- * - Idr for device memory allocation handles
- * - DRAM physical pages pool of 2MB
- * - MMU module
+ * - Idr for device memory allocation handles.
+ * - DRAM physical pages pool of 2MB.
+ * - MMU module.
  */
 void hl_vm_fini(struct hl_device *hdev)
 {
diff --git a/drivers/misc/habanalabs/common/mmu/Makefile b/drivers/misc/habanalabs/common/mmu/Makefile
new file mode 100644 (file)
index 0000000..d852c38
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+HL_COMMON_MMU_FILES := common/mmu/mmu.o common/mmu/mmu_v1.o
similarity index 75%
rename from drivers/misc/habanalabs/common/mmu.c
rename to drivers/misc/habanalabs/common/mmu/mmu.c
index 28a4638..93c9e5f 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <linux/slab.h>
 
-#include "habanalabs.h"
+#include "../habanalabs.h"
 
 bool hl_is_dram_va(struct hl_device *hdev, u64 virt_addr)
 {
@@ -166,7 +166,6 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
                mmu_prop = &prop->pmmu;
 
        pgt_residency = mmu_prop->host_resident ? MMU_HR_PGT : MMU_DR_PGT;
-
        /*
         * The H/W handles mapping of specific page sizes. Hence if the page
         * size is bigger, we break it to sub-pages and unmap them separately.
@@ -174,11 +173,21 @@ int hl_mmu_unmap_page(struct hl_ctx *ctx, u64 virt_addr, u32 page_size,
        if ((page_size % mmu_prop->page_size) == 0) {
                real_page_size = mmu_prop->page_size;
        } else {
-               dev_err(hdev->dev,
-                       "page size of %u is not %uKB aligned, can't unmap\n",
-                       page_size, mmu_prop->page_size >> 10);
+               /*
+                * MMU page size may differ from DRAM page size.
+                * In such case work with the DRAM page size and let the MMU
+                * scrambling routine to handle this mismatch when
+                * calculating the address to remove from the MMU page table
+                */
+               if (is_dram_addr && ((page_size % prop->dram_page_size) == 0)) {
+                       real_page_size = prop->dram_page_size;
+               } else {
+                       dev_err(hdev->dev,
+                               "page size of %u is not %uKB aligned, can't unmap\n",
+                               page_size, mmu_prop->page_size >> 10);
 
-               return -EFAULT;
+                       return -EFAULT;
+               }
        }
 
        npages = page_size / real_page_size;
@@ -253,6 +262,17 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
         */
        if ((page_size % mmu_prop->page_size) == 0) {
                real_page_size = mmu_prop->page_size;
+       } else if (is_dram_addr && ((page_size % prop->dram_page_size) == 0) &&
+                       (prop->dram_page_size < mmu_prop->page_size)) {
+               /*
+                * MMU page size may differ from DRAM page size.
+                * In such case work with the DRAM page size and let the MMU
+                * scrambling routine handle this mismatch when calculating
+                * the address to place in the MMU page table. (in that case
+                * also make sure that the dram_page_size smaller than the
+                * mmu page size)
+                */
+               real_page_size = prop->dram_page_size;
        } else {
                dev_err(hdev->dev,
                        "page size of %u is not %uKB aligned, can't map\n",
@@ -261,9 +281,21 @@ int hl_mmu_map_page(struct hl_ctx *ctx, u64 virt_addr, u64 phys_addr,
                return -EFAULT;
        }
 
-       WARN_ONCE((phys_addr & (real_page_size - 1)),
-               "Mapping 0x%llx with page size of 0x%x is erroneous! Address must be divisible by page size",
-               phys_addr, real_page_size);
+       /*
+        * Verify that the phys and virt addresses are aligned with the
+        * MMU page size (in dram this means checking the address and MMU
+        * after scrambling)
+        */
+       if ((is_dram_addr &&
+                       ((hdev->asic_funcs->scramble_addr(hdev, phys_addr) &
+                               (mmu_prop->page_size - 1)) ||
+                       (hdev->asic_funcs->scramble_addr(hdev, virt_addr) &
+                               (mmu_prop->page_size - 1)))) ||
+               (!is_dram_addr && ((phys_addr & (real_page_size - 1)) ||
+                               (virt_addr & (real_page_size - 1)))))
+               dev_crit(hdev->dev,
+                       "Mapping address 0x%llx with virtual address 0x%llx and page size of 0x%x is erroneous! Addresses must be divisible by page size",
+                       phys_addr, virt_addr, real_page_size);
 
        npages = page_size / real_page_size;
        real_virt_addr = virt_addr;
@@ -444,19 +476,67 @@ void hl_mmu_swap_in(struct hl_ctx *ctx)
                hdev->mmu_func[MMU_HR_PGT].swap_in(ctx);
 }
 
+static void hl_mmu_pa_page_with_offset(struct hl_ctx *ctx, u64 virt_addr,
+                                               struct hl_mmu_hop_info *hops,
+                                               u64 *phys_addr)
+{
+       struct hl_device *hdev = ctx->hdev;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
+       u64 offset_mask, addr_mask, hop_shift, tmp_phys_addr;
+       u32 hop0_shift_off;
+       void *p;
+
+       /* last hop holds the phys address and flags */
+       if (hops->unscrambled_paddr)
+               tmp_phys_addr = hops->unscrambled_paddr;
+       else
+               tmp_phys_addr = hops->hop_info[hops->used_hops - 1].hop_pte_val;
+
+       if (hops->range_type == HL_VA_RANGE_TYPE_HOST_HUGE)
+               p = &prop->pmmu_huge;
+       else if (hops->range_type == HL_VA_RANGE_TYPE_HOST)
+               p = &prop->pmmu;
+       else /* HL_VA_RANGE_TYPE_DRAM */
+               p = &prop->dmmu;
+
+       if ((hops->range_type == HL_VA_RANGE_TYPE_DRAM) &&
+                       !is_power_of_2(prop->dram_page_size)) {
+               u32 bit;
+               u64 page_offset_mask;
+               u64 phys_addr_mask;
+
+               bit = __ffs64((u64)prop->dram_page_size);
+               page_offset_mask = ((1ull << bit) - 1);
+               phys_addr_mask = ~page_offset_mask;
+               *phys_addr = (tmp_phys_addr & phys_addr_mask) |
+                               (virt_addr & page_offset_mask);
+       } else {
+               /*
+                * find the correct hop shift field in hl_mmu_properties
+                * structure in order to determine the right masks
+                * for the page offset.
+                */
+               hop0_shift_off = offsetof(struct hl_mmu_properties, hop0_shift);
+               p = (char *)p + hop0_shift_off;
+               p = (char *)p + ((hops->used_hops - 1) * sizeof(u64));
+               hop_shift = *(u64 *)p;
+               offset_mask = (1ull << hop_shift) - 1;
+               addr_mask = ~(offset_mask);
+               *phys_addr = (tmp_phys_addr & addr_mask) |
+                               (virt_addr & offset_mask);
+       }
+}
+
 int hl_mmu_va_to_pa(struct hl_ctx *ctx, u64 virt_addr, u64 *phys_addr)
 {
        struct hl_mmu_hop_info hops;
-       u64 tmp_addr;
        int rc;
 
        rc = hl_mmu_get_tlb_info(ctx, virt_addr, &hops);
        if (rc)
                return rc;
 
-       /* last hop holds the phys address and flags */
-       tmp_addr = hops.hop_info[hops.used_hops - 1].hop_pte_val;
-       *phys_addr = (tmp_addr & HOP_PHYS_ADDR_MASK) | (virt_addr & FLAGS_MASK);
+       hl_mmu_pa_page_with_offset(ctx, virt_addr, &hops,  phys_addr);
 
        return 0;
 }
@@ -473,6 +553,8 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
        if (!hdev->mmu_enable)
                return -EOPNOTSUPP;
 
+       hops->scrambled_vaddr = virt_addr;      /* assume no scrambling */
+
        is_dram_addr = hl_mem_area_inside_range(virt_addr, prop->dmmu.page_size,
                                                prop->dmmu.start_addr,
                                                prop->dmmu.end_addr);
@@ -491,6 +573,11 @@ int hl_mmu_get_tlb_info(struct hl_ctx *ctx, u64 virt_addr,
 
        mutex_unlock(&ctx->mmu_lock);
 
+       /* add page offset to physical address */
+       if (hops->unscrambled_paddr)
+               hl_mmu_pa_page_with_offset(ctx, virt_addr, hops,
+                                       &hops->unscrambled_paddr);
+
        return rc;
 }
 
@@ -512,3 +599,28 @@ int hl_mmu_if_set_funcs(struct hl_device *hdev)
 
        return 0;
 }
+
+/**
+ * hl_mmu_scramble_addr() - The generic mmu address scrambling routine.
+ * @hdev: pointer to device data.
+ * @addr: The address to scramble.
+ *
+ * Return: The scrambled address.
+ */
+u64 hl_mmu_scramble_addr(struct hl_device *hdev, u64 addr)
+{
+       return addr;
+}
+
+/**
+ * hl_mmu_descramble_addr() - The generic mmu address descrambling
+ * routine.
+ * @hdev: pointer to device data.
+ * @addr: The address to descramble.
+ *
+ * Return: The un-scrambled address.
+ */
+u64 hl_mmu_descramble_addr(struct hl_device *hdev, u64 addr)
+{
+       return addr;
+}
similarity index 99%
rename from drivers/misc/habanalabs/common/mmu_v1.c
rename to drivers/misc/habanalabs/common/mmu/mmu_v1.c
index 06d8a44..c5e93ff 100644 (file)
@@ -5,8 +5,8 @@
  * All Rights Reserved.
  */
 
-#include "habanalabs.h"
-#include "../include/hw_ip/mmu/mmu_general.h"
+#include "../habanalabs.h"
+#include "../../include/hw_ip/mmu/mmu_general.h"
 
 #include <linux/slab.h>
 
diff --git a/drivers/misc/habanalabs/common/pci/Makefile b/drivers/misc/habanalabs/common/pci/Makefile
new file mode 100644 (file)
index 0000000..dc922a6
--- /dev/null
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0-only
+HL_COMMON_PCI_FILES := common/pci/pci.o
similarity index 91%
rename from drivers/misc/habanalabs/common/pci.c
rename to drivers/misc/habanalabs/common/pci/pci.c
index b4725e6..b799f92 100644 (file)
@@ -5,8 +5,8 @@
  * All Rights Reserved.
  */
 
-#include "habanalabs.h"
-#include "../include/hw_ip/pci/pci_general.h"
+#include "../habanalabs.h"
+#include "../../include/hw_ip/pci/pci_general.h"
 
 #include <linux/pci.h>
 
@@ -308,40 +308,6 @@ int hl_pci_set_outbound_region(struct hl_device *hdev,
 }
 
 /**
- * hl_pci_set_dma_mask() - Set DMA masks for the device.
- * @hdev: Pointer to hl_device structure.
- *
- * This function sets the DMA masks (regular and consistent) for a specified
- * value. If it doesn't succeed, it tries to set it to a fall-back value
- *
- * Return: 0 on success, non-zero for failure.
- */
-static int hl_pci_set_dma_mask(struct hl_device *hdev)
-{
-       struct pci_dev *pdev = hdev->pdev;
-       int rc;
-
-       /* set DMA mask */
-       rc = pci_set_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to set pci dma mask to %d bits, error %d\n",
-                       hdev->dma_mask, rc);
-               return rc;
-       }
-
-       rc = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(hdev->dma_mask));
-       if (rc) {
-               dev_err(hdev->dev,
-                       "Failed to set pci consistent dma mask to %d bits, error %d\n",
-                       hdev->dma_mask, rc);
-               return rc;
-       }
-
-       return 0;
-}
-
-/**
  * hl_pci_init() - PCI initialization code.
  * @hdev: Pointer to hl_device structure.
  *
@@ -377,9 +343,14 @@ int hl_pci_init(struct hl_device *hdev)
                goto unmap_pci_bars;
        }
 
-       rc = hl_pci_set_dma_mask(hdev);
-       if (rc)
+       rc = dma_set_mask_and_coherent(&pdev->dev,
+                                       DMA_BIT_MASK(hdev->dma_mask));
+       if (rc) {
+               dev_err(hdev->dev,
+                       "Failed to set dma mask to %d bits, error %d\n",
+                       hdev->dma_mask, rc);
                goto unmap_pci_bars;
+       }
 
        return 0;
 
index b328dda..9152242 100644 (file)
@@ -225,6 +225,12 @@ gaudi_qman_arb_error_cause[GAUDI_NUM_OF_QM_ARB_ERR_CAUSE] = {
        "MSG AXI LBW returned with error"
 };
 
+enum gaudi_sm_sei_cause {
+       GAUDI_SM_SEI_SO_OVERFLOW,
+       GAUDI_SM_SEI_LBW_4B_UNALIGNED,
+       GAUDI_SM_SEI_AXI_RESPONSE_ERR
+};
+
 static enum hl_queue_type gaudi_queue_type[GAUDI_QUEUE_ID_SIZE] = {
        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_0 */
        QUEUE_TYPE_EXT, /* GAUDI_QUEUE_ID_DMA_0_1 */
@@ -354,6 +360,10 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
                                        struct hl_cs_job *job);
 static int gaudi_memset_device_memory(struct hl_device *hdev, u64 addr,
                                        u32 size, u64 val);
+static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
+                                       u32 num_regs, u32 val);
+static int gaudi_schedule_register_memset(struct hl_device *hdev,
+               u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val);
 static int gaudi_run_tpc_kernel(struct hl_device *hdev, u64 tpc_kernel,
                                u32 tpc_id);
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev);
@@ -517,6 +527,11 @@ static int gaudi_get_fixed_properties(struct hl_device *hdev)
                        prop->sync_stream_first_mon +
                        (num_sync_stream_queues * HL_RSVD_MONS);
 
+       prop->first_available_user_msix_interrupt = USHRT_MAX;
+
+       for (i = 0 ; i < HL_MAX_DCORES ; i++)
+               prop->first_available_cq[i] = USHRT_MAX;
+
        /* disable fw security for now, set it in a later stage */
        prop->fw_security_disabled = true;
        prop->fw_security_status_valid = false;
@@ -913,11 +928,17 @@ static void gaudi_sob_group_hw_reset(struct kref *ref)
        struct gaudi_hw_sob_group *hw_sob_group =
                container_of(ref, struct gaudi_hw_sob_group, kref);
        struct hl_device *hdev = hw_sob_group->hdev;
-       int i;
+       u64 base_addr;
+       int rc;
 
-       for (i = 0 ; i < NUMBER_OF_SOBS_IN_GRP ; i++)
-               WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
-                               (hw_sob_group->base_sob_id + i) * 4, 0);
+       base_addr = CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       hw_sob_group->base_sob_id * 4;
+       rc = gaudi_schedule_register_memset(hdev, hw_sob_group->queue_id,
+                       base_addr, NUMBER_OF_SOBS_IN_GRP, 0);
+       if (rc)
+               dev_err(hdev->dev,
+                       "failed resetting sob group - sob base %u, count %u",
+                       hw_sob_group->base_sob_id, NUMBER_OF_SOBS_IN_GRP);
 
        kref_init(&hw_sob_group->kref);
 }
@@ -1008,6 +1029,8 @@ static void gaudi_collective_master_init_job(struct hl_device *hdev,
                cprop->hw_sob_group[sob_group_offset].base_sob_id;
        master_monitor = prop->collective_mstr_mon_id[0];
 
+       cprop->hw_sob_group[sob_group_offset].queue_id = queue_id;
+
        dev_dbg(hdev->dev,
                "Generate master wait CBs, sob %d (mask %#x), val:0x%x, mon %u, q %d\n",
                master_sob_base, cprop->mstr_sob_mask[0],
@@ -1248,7 +1271,7 @@ static int gaudi_collective_wait_create_jobs(struct hl_device *hdev,
        u32 queue_id, collective_queue, num_jobs;
        u32 stream, nic_queue, nic_idx = 0;
        bool skip;
-       int i, rc;
+       int i, rc = 0;
 
        /* Verify wait queue id is configured as master */
        hw_queue_prop = &hdev->asic_prop.hw_queues_props[wait_queue_id];
@@ -1359,8 +1382,6 @@ static int gaudi_late_init(struct hl_device *hdev)
                return rc;
        }
 
-       WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
-
        rc = gaudi_fetch_psoc_frequency(hdev);
        if (rc) {
                dev_err(hdev->dev, "Failed to fetch psoc frequency\n");
@@ -1607,6 +1628,7 @@ static int gaudi_sw_init(struct hl_device *hdev)
 
        hdev->supports_sync_stream = true;
        hdev->supports_coresight = true;
+       hdev->supports_staged_submission = true;
 
        return 0;
 
@@ -3438,6 +3460,12 @@ static void gaudi_set_clock_gating(struct hl_device *hdev)
                enable = !!(hdev->clock_gating_mask &
                                (BIT_ULL(gaudi_dma_assignment[i])));
 
+               /* GC sends work to DMA engine through Upper CP in DMA5 so
+                * we need to not enable clock gating in that DMA
+                */
+               if (i == GAUDI_HBM_DMA_4)
+                       enable = 0;
+
                qman_offset = gaudi_dma_assignment[i] * DMA_QMAN_OFFSET;
                WREG32(mmDMA0_QM_CGM_CFG1 + qman_offset,
                                enable ? QMAN_CGM1_PWR_GATE_EN : 0);
@@ -3704,6 +3732,7 @@ static int gaudi_init_cpu(struct hl_device *hdev)
 static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_eq *eq;
        u32 status;
        struct hl_hw_queue *cpu_pq =
@@ -3760,6 +3789,10 @@ static int gaudi_init_cpu_queues(struct hl_device *hdev, u32 cpu_timeout)
                return -EIO;
        }
 
+       /* update FW application security bits */
+       if (prop->fw_security_status_valid)
+               prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
+
        gaudi->hw_cap_initialized |= HW_CAP_CPU_Q;
        return 0;
 }
@@ -4417,9 +4450,12 @@ static void gaudi_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
        /* ring the doorbell */
        WREG32(db_reg_offset, db_value);
 
-       if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ)
+       if (hw_queue_id == GAUDI_QUEUE_ID_CPU_PQ) {
+               /* make sure device CPU will read latest data from host */
+               mb();
                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
                                GAUDI_EVENT_PI_UPDATE);
+       }
 }
 
 static void gaudi_pqe_write(struct hl_device *hdev, __le64 *pqe,
@@ -4518,7 +4554,6 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
 {
        struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct gaudi_device *gaudi = hdev->asic_specific;
-       u64 idle_mask = 0;
        int rc = 0;
        u64 val = 0;
 
@@ -4531,8 +4566,8 @@ static int gaudi_scrub_device_mem(struct hl_device *hdev, u64 addr, u64 size)
                                hdev,
                                mmDMA0_CORE_STS0/* dummy */,
                                val/* dummy */,
-                               (hdev->asic_funcs->is_device_idle(hdev,
-                                               &idle_mask, NULL)),
+                               (hdev->asic_funcs->is_device_idle(hdev, NULL,
+                                               0, NULL)),
                                                1000,
                                                HBM_SCRUBBING_TIMEOUT_US);
                if (rc) {
@@ -5060,7 +5095,8 @@ static int gaudi_validate_cb(struct hl_device *hdev,
         * 1. A packet that will act as a completion packet
         * 2. A packet that will generate MSI-X interrupt
         */
-       parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
+       if (parser->completion)
+               parser->patched_cb_size += sizeof(struct packet_msg_prot) * 2;
 
        return rc;
 }
@@ -5287,8 +5323,11 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
         * 1. A packet that will act as a completion packet
         * 2. A packet that will generate MSI interrupt
         */
-       parser->patched_cb_size = parser->user_cb_size +
-                       sizeof(struct packet_msg_prot) * 2;
+       if (parser->completion)
+               parser->patched_cb_size = parser->user_cb_size +
+                               sizeof(struct packet_msg_prot) * 2;
+       else
+               parser->patched_cb_size = parser->user_cb_size;
 
        rc = hl_cb_create(hdev, &hdev->kernel_cb_mgr, hdev->kernel_ctx,
                                parser->patched_cb_size, false, false,
@@ -5304,10 +5343,10 @@ static int gaudi_parse_cb_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                       (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -5376,10 +5415,10 @@ static int gaudi_parse_cb_no_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail here */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                               (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -5579,31 +5618,206 @@ release_cb:
        return rc;
 }
 
-static void gaudi_restore_sm_registers(struct hl_device *hdev)
+static int gaudi_memset_registers(struct hl_device *hdev, u64 reg_base,
+                                       u32 num_regs, u32 val)
+{
+       struct packet_msg_long *pkt;
+       struct hl_cs_job *job;
+       u32 cb_size, ctl;
+       struct hl_cb *cb;
+       int i, rc;
+
+       cb_size = (sizeof(*pkt) * num_regs) + sizeof(struct packet_msg_prot);
+
+       if (cb_size > SZ_2M) {
+               dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
+               return -ENOMEM;
+       }
+
+       cb = hl_cb_kernel_create(hdev, cb_size, false);
+       if (!cb)
+               return -EFAULT;
+
+       pkt = cb->kernel_address;
+
+       ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
+       for (i = 0; i < num_regs ; i++, pkt++) {
+               pkt->ctl = cpu_to_le32(ctl);
+               pkt->value = cpu_to_le32(val);
+               pkt->addr = cpu_to_le64(reg_base + (i * 4));
+       }
+
+       job = hl_cs_allocate_job(hdev, QUEUE_TYPE_EXT, true);
+       if (!job) {
+               dev_err(hdev->dev, "Failed to allocate a new job\n");
+               rc = -ENOMEM;
+               goto release_cb;
+       }
+
+       job->id = 0;
+       job->user_cb = cb;
+       atomic_inc(&job->user_cb->cs_cnt);
+       job->user_cb_size = cb_size;
+       job->hw_queue_id = GAUDI_QUEUE_ID_DMA_0_0;
+       job->patched_cb = job->user_cb;
+       job->job_cb_size = cb_size;
+
+       hl_debugfs_add_job(hdev, job);
+
+       rc = gaudi_send_job_on_qman0(hdev, job);
+       hl_debugfs_remove_job(hdev, job);
+       kfree(job);
+       atomic_dec(&cb->cs_cnt);
+
+release_cb:
+       hl_cb_put(cb);
+       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
+
+       return rc;
+}
+
+static int gaudi_schedule_register_memset(struct hl_device *hdev,
+               u32 hw_queue_id, u64 reg_base, u32 num_regs, u32 val)
 {
+       struct hl_ctx *ctx = hdev->compute_ctx;
+       struct hl_pending_cb *pending_cb;
+       struct packet_msg_long *pkt;
+       u32 cb_size, ctl;
+       struct hl_cb *cb;
        int i;
 
-       for (i = 0 ; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4) {
-               WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
-               WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
-               WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
+       /* If no compute context available or context is going down
+        * memset registers directly
+        */
+       if (!ctx || kref_read(&ctx->refcount) == 0)
+               return gaudi_memset_registers(hdev, reg_base, num_regs, val);
+
+       cb_size = (sizeof(*pkt) * num_regs) +
+                       sizeof(struct packet_msg_prot) * 2;
+
+       if (cb_size > SZ_2M) {
+               dev_err(hdev->dev, "CB size must be smaller than %uMB", SZ_2M);
+               return -ENOMEM;
+       }
+
+       pending_cb = kzalloc(sizeof(*pending_cb), GFP_KERNEL);
+       if (!pending_cb)
+               return -ENOMEM;
+
+       cb = hl_cb_kernel_create(hdev, cb_size, false);
+       if (!cb) {
+               kfree(pending_cb);
+               return -EFAULT;
        }
 
-       for (i = 0 ; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4) {
-               WREG32(mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
-               WREG32(mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
-               WREG32(mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
+       pkt = cb->kernel_address;
+
+       ctl = FIELD_PREP(GAUDI_PKT_LONG_CTL_OP_MASK, 0); /* write the value */
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_LONG);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
+
+       for (i = 0; i < num_regs ; i++, pkt++) {
+               pkt->ctl = cpu_to_le32(ctl);
+               pkt->value = cpu_to_le32(val);
+               pkt->addr = cpu_to_le64(reg_base + (i * 4));
        }
 
-       i = GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4;
+       hl_cb_destroy(hdev, &hdev->kernel_cb_mgr, cb->id << PAGE_SHIFT);
 
-       for (; i < NUM_OF_SOB_IN_BLOCK << 2 ; i += 4)
-               WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + i, 0);
+       pending_cb->cb = cb;
+       pending_cb->cb_size = cb_size;
+       /* The queue ID MUST be an external queue ID. Otherwise, we will
+        * have undefined behavior
+        */
+       pending_cb->hw_queue_id = hw_queue_id;
 
-       i = GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4;
+       spin_lock(&ctx->pending_cb_lock);
+       list_add_tail(&pending_cb->cb_node, &ctx->pending_cb_list);
+       spin_unlock(&ctx->pending_cb_lock);
 
-       for (; i < NUM_OF_MONITORS_IN_BLOCK << 2 ; i += 4)
-               WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 + i, 0);
+       return 0;
+}
+
+static int gaudi_restore_sm_registers(struct hl_device *hdev)
+{
+       u64 base_addr;
+       u32 num_regs;
+       int rc;
+
+       base_addr = CFG_BASE + mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
+       num_regs = NUM_OF_SOB_IN_BLOCK;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_SOB_OBJ_0;
+       num_regs = NUM_OF_SOB_IN_BLOCK;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_SOB_OBJ_0;
+       num_regs = NUM_OF_SOB_IN_BLOCK;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_E_N_SYNC_MNGR_OBJS_MON_STATUS_0;
+       num_regs = NUM_OF_MONITORS_IN_BLOCK;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_E_S_SYNC_MNGR_OBJS_MON_STATUS_0;
+       num_regs = NUM_OF_MONITORS_IN_BLOCK;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_W_N_SYNC_MNGR_OBJS_MON_STATUS_0;
+       num_regs = NUM_OF_MONITORS_IN_BLOCK;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       (GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT * 4);
+       num_regs = NUM_OF_SOB_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_SYNC_OBJECT;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       base_addr = CFG_BASE +  mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_MON_STATUS_0 +
+                       (GAUDI_FIRST_AVAILABLE_W_S_MONITOR * 4);
+       num_regs = NUM_OF_MONITORS_IN_BLOCK - GAUDI_FIRST_AVAILABLE_W_S_MONITOR;
+       rc = gaudi_memset_registers(hdev, base_addr, num_regs, 0);
+       if (rc) {
+               dev_err(hdev->dev, "failed resetting SM registers");
+               return -ENOMEM;
+       }
+
+       return 0;
 }
 
 static void gaudi_restore_dma_registers(struct hl_device *hdev)
@@ -5660,18 +5874,23 @@ static void gaudi_restore_qm_registers(struct hl_device *hdev)
        }
 }
 
-static void gaudi_restore_user_registers(struct hl_device *hdev)
+static int gaudi_restore_user_registers(struct hl_device *hdev)
 {
-       gaudi_restore_sm_registers(hdev);
+       int rc;
+
+       rc = gaudi_restore_sm_registers(hdev);
+       if (rc)
+               return rc;
+
        gaudi_restore_dma_registers(hdev);
        gaudi_restore_qm_registers(hdev);
+
+       return 0;
 }
 
 static int gaudi_context_switch(struct hl_device *hdev, u32 asid)
 {
-       gaudi_restore_user_registers(hdev);
-
-       return 0;
+       return gaudi_restore_user_registers(hdev);
 }
 
 static int gaudi_mmu_clear_pgt_range(struct hl_device *hdev)
@@ -5730,8 +5949,6 @@ static int gaudi_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
                }
                if (hbm_bar_addr == U64_MAX)
                        rc = -EIO;
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
        } else {
                rc = -EFAULT;
        }
@@ -5777,8 +5994,6 @@ static int gaudi_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
                }
                if (hbm_bar_addr == U64_MAX)
                        rc = -EIO;
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
        } else {
                rc = -EFAULT;
        }
@@ -5828,8 +6043,6 @@ static int gaudi_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
                }
                if (hbm_bar_addr == U64_MAX)
                        rc = -EIO;
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
        } else {
                rc = -EFAULT;
        }
@@ -5878,8 +6091,6 @@ static int gaudi_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
                }
                if (hbm_bar_addr == U64_MAX)
                        rc = -EIO;
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
        } else {
                rc = -EFAULT;
        }
@@ -5924,7 +6135,7 @@ static void gaudi_mmu_prepare(struct hl_device *hdev, u32 asid)
                return;
 
        if (asid & ~DMA0_QM_GLBL_NON_SECURE_PROPS_0_ASID_MASK) {
-               WARN(1, "asid %u is too big\n", asid);
+               dev_crit(hdev->dev, "asid %u is too big\n", asid);
                return;
        }
 
@@ -6227,7 +6438,7 @@ static int gaudi_send_job_on_qman0(struct hl_device *hdev,
        else
                timeout = HL_DEVICE_TIMEOUT_USEC;
 
-       if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
+       if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
                dev_err_ratelimited(hdev->dev,
                        "Can't send driver job on QMAN0 because the device is not idle\n");
                return -EBUSY;
@@ -6658,6 +6869,34 @@ static void gaudi_handle_qman_err_generic(struct hl_device *hdev,
        }
 }
 
+static void gaudi_print_sm_sei_info(struct hl_device *hdev, u16 event_type,
+               struct hl_eq_sm_sei_data *sei_data)
+{
+       u32 index = event_type - GAUDI_EVENT_DMA_IF_SEI_0;
+
+       switch (sei_data->sei_cause) {
+       case SM_SEI_SO_OVERFLOW:
+               dev_err(hdev->dev,
+                       "SM %u SEI Error: SO %u overflow/underflow",
+                       index, le32_to_cpu(sei_data->sei_log));
+               break;
+       case SM_SEI_LBW_4B_UNALIGNED:
+               dev_err(hdev->dev,
+                       "SM %u SEI Error: Unaligned 4B LBW access, monitor agent address low - %#x",
+                       index, le32_to_cpu(sei_data->sei_log));
+               break;
+       case SM_SEI_AXI_RESPONSE_ERR:
+               dev_err(hdev->dev,
+                       "SM %u SEI Error: AXI ID %u response error",
+                       index, le32_to_cpu(sei_data->sei_log));
+               break;
+       default:
+               dev_err(hdev->dev, "Unknown SM SEI cause %u",
+                               le32_to_cpu(sei_data->sei_log));
+               break;
+       }
+}
+
 static void gaudi_handle_ecc_event(struct hl_device *hdev, u16 event_type,
                struct hl_eq_ecc_data *ecc_data)
 {
@@ -6874,7 +7113,9 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
        u32 base, val, val2, wr_par, rd_par, ca_par, derr, serr, type, ch;
        int err = 0;
 
-       if (!hdev->asic_prop.fw_security_disabled) {
+       if (hdev->asic_prop.fw_security_status_valid &&
+                       (hdev->asic_prop.fw_app_security_map &
+                               CPU_BOOT_DEV_STS0_HBM_ECC_EN)) {
                if (!hbm_ecc_data) {
                        dev_err(hdev->dev, "No FW ECC data");
                        return 0;
@@ -6896,14 +7137,24 @@ static int gaudi_hbm_read_interrupts(struct hl_device *hdev, int device,
                                le32_to_cpu(hbm_ecc_data->hbm_ecc_info));
 
                dev_err(hdev->dev,
-                       "HBM%d pc%d ECC: TYPE=%d, WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
-                       device, ch, type, wr_par, rd_par, ca_par, serr, derr);
+                       "HBM%d pc%d interrupts info: WR_PAR=%d, RD_PAR=%d, CA_PAR=%d, SERR=%d, DERR=%d\n",
+                       device, ch, wr_par, rd_par, ca_par, serr, derr);
+               dev_err(hdev->dev,
+                       "HBM%d pc%d ECC info: 1ST_ERR_ADDR=0x%x, 1ST_ERR_TYPE=%d, SEC_CONT_CNT=%u, SEC_CNT=%d, DEC_CNT=%d\n",
+                       device, ch, hbm_ecc_data->first_addr, type,
+                       hbm_ecc_data->sec_cont_cnt, hbm_ecc_data->sec_cnt,
+                       hbm_ecc_data->dec_cnt);
 
                err = 1;
 
                return 0;
        }
 
+       if (!hdev->asic_prop.fw_security_disabled) {
+               dev_info(hdev->dev, "Cannot access MC regs for ECC data while security is enabled\n");
+               return 0;
+       }
+
        base = GAUDI_HBM_CFG_BASE + device * GAUDI_HBM_CFG_OFFSET;
        for (ch = 0 ; ch < GAUDI_HBM_CHANNELS ; ch++) {
                val = RREG32_MASK(base + ch * 0x1000 + 0x06C, 0x0000FFFF);
@@ -7153,6 +7404,7 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                gaudi_hbm_read_interrupts(hdev,
                                gaudi_hbm_event_to_dev(event_type),
                                &eq_entry->hbm_ecc_data);
+               hl_fw_unmask_irq(hdev, event_type);
                break;
 
        case GAUDI_EVENT_TPC0_DEC:
@@ -7281,6 +7533,13 @@ static void gaudi_handle_eqe(struct hl_device *hdev,
                hl_fw_unmask_irq(hdev, event_type);
                break;
 
+       case GAUDI_EVENT_DMA_IF_SEI_0 ... GAUDI_EVENT_DMA_IF_SEI_3:
+               gaudi_print_irq_info(hdev, event_type, false);
+               gaudi_print_sm_sei_info(hdev, event_type,
+                                       &eq_entry->sm_sei_data);
+               hl_fw_unmask_irq(hdev, event_type);
+               break;
+
        case GAUDI_EVENT_FIX_POWER_ENV_S ... GAUDI_EVENT_FIX_THERMAL_ENV_E:
                gaudi_print_clk_change_info(hdev, event_type);
                hl_fw_unmask_irq(hdev, event_type);
@@ -7330,8 +7589,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
        else
                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
 
-       mutex_lock(&hdev->mmu_cache_lock);
-
        /* L0 & L1 invalidation */
        WREG32(mmSTLB_INV_PS, 3);
        WREG32(mmSTLB_CACHE_INV, gaudi->mmu_cache_inv_pi++);
@@ -7347,8 +7604,6 @@ static int gaudi_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
 
        WREG32(mmSTLB_INV_SET, 0);
 
-       mutex_unlock(&hdev->mmu_cache_lock);
-
        if (rc) {
                dev_err_ratelimited(hdev->dev,
                                        "MMU cache invalidation timeout\n");
@@ -7371,8 +7626,6 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
                hdev->hard_reset_pending)
                return 0;
 
-       mutex_lock(&hdev->mmu_cache_lock);
-
        if (hdev->pldm)
                timeout_usec = GAUDI_PLDM_MMU_TIMEOUT_USEC;
        else
@@ -7400,8 +7653,6 @@ static int gaudi_mmu_invalidate_cache_range(struct hl_device *hdev,
                1000,
                timeout_usec);
 
-       mutex_unlock(&hdev->mmu_cache_lock);
-
        if (rc) {
                dev_err_ratelimited(hdev->dev,
                                        "MMU cache invalidation timeout\n");
@@ -7463,7 +7714,7 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
        if (!(gaudi->hw_cap_initialized & HW_CAP_CPU_Q))
                return 0;
 
-       rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
+       rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
        if (rc)
                return rc;
 
@@ -7483,13 +7734,14 @@ static int gaudi_cpucp_info_get(struct hl_device *hdev)
        return 0;
 }
 
-static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
-                                       struct seq_file *s)
+static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
+                                       u8 mask_len, struct seq_file *s)
 {
        struct gaudi_device *gaudi = hdev->asic_specific;
        const char *fmt = "%-5d%-9s%#-14x%#-12x%#x\n";
        const char *mme_slave_fmt = "%-5d%-9s%-14s%-12s%#x\n";
        const char *nic_fmt = "%-5d%-9s%#-14x%#x\n";
+       unsigned long *mask = (unsigned long *)mask_arr;
        u32 qm_glbl_sts0, qm_cgm_sts, dma_core_sts0, tpc_cfg_sts, mme_arch_sts;
        bool is_idle = true, is_eng_idle, is_slave;
        u64 offset;
@@ -7515,9 +7767,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
                                IS_DMA_IDLE(dma_core_sts0);
                is_idle &= is_eng_idle;
 
-               if (mask)
-                       *mask |= ((u64) !is_eng_idle) <<
-                                       (GAUDI_ENGINE_ID_DMA_0 + dma_id);
+               if (mask && !is_eng_idle)
+                       set_bit(GAUDI_ENGINE_ID_DMA_0 + dma_id, mask);
                if (s)
                        seq_printf(s, fmt, dma_id,
                                is_eng_idle ? "Y" : "N", qm_glbl_sts0,
@@ -7538,9 +7789,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
                                IS_TPC_IDLE(tpc_cfg_sts);
                is_idle &= is_eng_idle;
 
-               if (mask)
-                       *mask |= ((u64) !is_eng_idle) <<
-                                               (GAUDI_ENGINE_ID_TPC_0 + i);
+               if (mask && !is_eng_idle)
+                       set_bit(GAUDI_ENGINE_ID_TPC_0 + i, mask);
                if (s)
                        seq_printf(s, fmt, i,
                                is_eng_idle ? "Y" : "N",
@@ -7567,9 +7817,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
 
                is_idle &= is_eng_idle;
 
-               if (mask)
-                       *mask |= ((u64) !is_eng_idle) <<
-                                               (GAUDI_ENGINE_ID_MME_0 + i);
+               if (mask && !is_eng_idle)
+                       set_bit(GAUDI_ENGINE_ID_MME_0 + i, mask);
                if (s) {
                        if (!is_slave)
                                seq_printf(s, fmt, i,
@@ -7595,9 +7844,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
                        is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
                        is_idle &= is_eng_idle;
 
-                       if (mask)
-                               *mask |= ((u64) !is_eng_idle) <<
-                                               (GAUDI_ENGINE_ID_NIC_0 + port);
+                       if (mask && !is_eng_idle)
+                               set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
                        if (s)
                                seq_printf(s, nic_fmt, port,
                                                is_eng_idle ? "Y" : "N",
@@ -7611,9 +7859,8 @@ static bool gaudi_is_device_idle(struct hl_device *hdev, u64 *mask,
                        is_eng_idle = IS_QM_IDLE(qm_glbl_sts0, qm_cgm_sts);
                        is_idle &= is_eng_idle;
 
-                       if (mask)
-                               *mask |= ((u64) !is_eng_idle) <<
-                                               (GAUDI_ENGINE_ID_NIC_0 + port);
+                       if (mask && !is_eng_idle)
+                               set_bit(GAUDI_ENGINE_ID_NIC_0 + port, mask);
                        if (s)
                                seq_printf(s, nic_fmt, port,
                                                is_eng_idle ? "Y" : "N",
@@ -7876,18 +8123,16 @@ static void gaudi_internal_cb_pool_fini(struct hl_device *hdev,
 
 static int gaudi_ctx_init(struct hl_ctx *ctx)
 {
+       if (ctx->asid == HL_KERNEL_ASID_ID)
+               return 0;
+
        gaudi_mmu_prepare(ctx->hdev, ctx->asid);
        return gaudi_internal_cb_pool_init(ctx->hdev, ctx);
 }
 
 static void gaudi_ctx_fini(struct hl_ctx *ctx)
 {
-       struct hl_device *hdev = ctx->hdev;
-
-       /* Gaudi will NEVER support more then a single compute context.
-        * Therefore, don't clear anything unless it is the compute context
-        */
-       if (hdev->compute_ctx != ctx)
+       if (ctx->asid == HL_KERNEL_ASID_ID)
                return;
 
        gaudi_internal_cb_pool_fini(ctx->hdev, ctx);
@@ -7928,10 +8173,10 @@ static u32 gaudi_gen_signal_cb(struct hl_device *hdev, void *data, u16 sob_id,
        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, sob_id * 4);
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 3); /* W_S SOB base */
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, eb);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, eb);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 
        pkt->value = cpu_to_le32(value);
        pkt->ctl = cpu_to_le32(ctl);
@@ -7948,10 +8193,10 @@ static u32 gaudi_add_mon_msg_short(struct packet_msg_short *pkt, u32 value,
 
        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, addr);
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2);  /* W_S MON base */
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 0); /* last pkt MB */
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 0); /* last pkt MB */
 
        pkt->value = cpu_to_le32(value);
        pkt->ctl = cpu_to_le32(ctl);
@@ -7997,10 +8242,10 @@ static u32 gaudi_add_arm_monitor_pkt(struct hl_device *hdev,
        ctl = FIELD_PREP(GAUDI_PKT_SHORT_CTL_ADDR_MASK, msg_addr_offset);
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OP_MASK, 0); /* write the value */
        ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_BASE_MASK, 2); /* W_S MON base */
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_MSG_SHORT);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 
        pkt->value = cpu_to_le32(value);
        pkt->ctl = cpu_to_le32(ctl);
@@ -8018,10 +8263,10 @@ static u32 gaudi_add_fence_pkt(struct packet_fence *pkt)
        cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_TARGET_VAL_MASK, 1);
        cfg |= FIELD_PREP(GAUDI_PKT_FENCE_CFG_ID_MASK, 2);
 
-       ctl = FIELD_PREP(GAUDI_PKT_FENCE_CTL_OPCODE_MASK, PACKET_FENCE);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_EB_MASK, 0);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_RB_MASK, 1);
-       ctl |= FIELD_PREP(GAUDI_PKT_SHORT_CTL_MB_MASK, 1);
+       ctl = FIELD_PREP(GAUDI_PKT_CTL_OPCODE_MASK, PACKET_FENCE);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_EB_MASK, 0);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_RB_MASK, 1);
+       ctl |= FIELD_PREP(GAUDI_PKT_CTL_MB_MASK, 1);
 
        pkt->cfg = cpu_to_le32(cfg);
        pkt->ctl = cpu_to_le32(ctl);
@@ -8217,12 +8462,16 @@ static u32 gaudi_gen_wait_cb(struct hl_device *hdev,
 static void gaudi_reset_sob(struct hl_device *hdev, void *data)
 {
        struct hl_hw_sob *hw_sob = (struct hl_hw_sob *) data;
+       int rc;
 
        dev_dbg(hdev->dev, "reset SOB, q_idx: %d, sob_id: %d\n", hw_sob->q_idx,
                hw_sob->sob_id);
 
-       WREG32(mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 + hw_sob->sob_id * 4,
-               0);
+       rc = gaudi_schedule_register_memset(hdev, hw_sob->q_idx,
+                       CFG_BASE + mmSYNC_MNGR_W_S_SYNC_MNGR_OBJS_SOB_OBJ_0 +
+                       hw_sob->sob_id * 4, 1, 0);
+       if (rc)
+               dev_err(hdev->dev, "failed resetting sob %u", hw_sob->sob_id);
 
        kref_init(&hw_sob->kref);
 }
@@ -8246,6 +8495,24 @@ static u64 gaudi_get_device_time(struct hl_device *hdev)
        return device_time | RREG32(mmPSOC_TIMESTAMP_CNTCVL);
 }
 
+static int gaudi_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
+                               u32 *block_size, u32 *block_id)
+{
+       return -EPERM;
+}
+
+static int gaudi_block_mmap(struct hl_device *hdev,
+                               struct vm_area_struct *vma,
+                               u32 block_id, u32 block_size)
+{
+       return -EPERM;
+}
+
+static void gaudi_enable_events_from_fw(struct hl_device *hdev)
+{
+       WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR, GAUDI_EVENT_INTS_REGISTER);
+}
+
 static const struct hl_asic_funcs gaudi_funcs = {
        .early_init = gaudi_early_init,
        .early_fini = gaudi_early_fini,
@@ -8322,7 +8589,13 @@ static const struct hl_asic_funcs gaudi_funcs = {
        .set_dma_mask_from_fw = gaudi_set_dma_mask_from_fw,
        .get_device_time = gaudi_get_device_time,
        .collective_wait_init_cs = gaudi_collective_wait_init_cs,
-       .collective_wait_create_jobs = gaudi_collective_wait_create_jobs
+       .collective_wait_create_jobs = gaudi_collective_wait_create_jobs,
+       .scramble_addr = hl_mmu_scramble_addr,
+       .descramble_addr = hl_mmu_descramble_addr,
+       .ack_protection_bits_errors = gaudi_ack_protection_bits_errors,
+       .get_hw_block_id = gaudi_get_hw_block_id,
+       .hw_block_mmap = gaudi_block_mmap,
+       .enable_events_from_fw = gaudi_enable_events_from_fw
 };
 
 /**
index a7ab2d7..50bb4ad 100644 (file)
@@ -251,11 +251,13 @@ enum gaudi_nic_mask {
  * @hdev: habanalabs device structure.
  * @kref: refcount of this SOB group. group will reset once refcount is zero.
  * @base_sob_id: base sob id of this SOB group.
+ * @queue_id: id of the queue that waits on this sob group
  */
 struct gaudi_hw_sob_group {
        struct hl_device        *hdev;
        struct kref             kref;
        u32                     base_sob_id;
+       u32                     queue_id;
 };
 
 #define NUM_SOB_GROUPS (HL_RSVD_SOBS * QMAN_STREAMS)
@@ -333,6 +335,7 @@ struct gaudi_device {
 };
 
 void gaudi_init_security(struct hl_device *hdev);
+void gaudi_ack_protection_bits_errors(struct hl_device *hdev);
 void gaudi_add_device_attr(struct hl_device *hdev,
                        struct attribute_group *dev_attr_grp);
 void gaudi_set_pll_profile(struct hl_device *hdev, enum hl_pll_frequency freq);
index 88a09d4..6e56fa1 100644 (file)
@@ -634,9 +634,21 @@ static int gaudi_config_etr(struct hl_device *hdev,
                WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
                WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
                WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-               /* Workaround for H3 #HW-2075 bug: use small data chunks */
-               WREG32(mmPSOC_ETR_AXICTL, (is_host ? 0 : 0x700) |
-                                       PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+               if (hdev->asic_prop.fw_security_disabled) {
+                       /* make ETR not privileged */
+                       val = FIELD_PREP(
+                                       PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
+                       /* make ETR non-secured (inverted logic) */
+                       val |= FIELD_PREP(
+                                       PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
+                       /*
+                        * Workaround for H3 #HW-2075 bug: use small data
+                        * chunks
+                        */
+                       val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK,
+                                                       is_host ? 0 : 7);
+                       WREG32(mmPSOC_ETR_AXICTL, val);
+               }
                WREG32(mmPSOC_ETR_DBALO,
                                lower_32_bits(input->buffer_address));
                WREG32(mmPSOC_ETR_DBAHI,
index e101816..7085f45 100644 (file)
@@ -13052,3 +13052,8 @@ void gaudi_init_security(struct hl_device *hdev)
 
        gaudi_init_protection_bits(hdev);
 }
+
+void gaudi_ack_protection_bits_errors(struct hl_device *hdev)
+{
+
+}
index 63679a7..ed566c5 100644 (file)
@@ -455,6 +455,11 @@ int goya_get_fixed_properties(struct hl_device *hdev)
 
        prop->max_pending_cs = GOYA_MAX_PENDING_CS;
 
+       prop->first_available_user_msix_interrupt = USHRT_MAX;
+
+       for (i = 0 ; i < HL_MAX_DCORES ; i++)
+               prop->first_available_cq[i] = USHRT_MAX;
+
        /* disable fw security for now, set it in a later stage */
        prop->fw_security_disabled = true;
        prop->fw_security_status_valid = false;
@@ -792,9 +797,6 @@ int goya_late_init(struct hl_device *hdev)
                return rc;
        }
 
-       WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
-                       GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
-
        return 0;
 }
 
@@ -1186,6 +1188,7 @@ static int goya_stop_external_queues(struct hl_device *hdev)
 int goya_init_cpu_queues(struct hl_device *hdev)
 {
        struct goya_device *goya = hdev->asic_specific;
+       struct asic_fixed_properties *prop = &hdev->asic_prop;
        struct hl_eq *eq;
        u32 status;
        struct hl_hw_queue *cpu_pq = &hdev->kernel_queues[GOYA_QUEUE_ID_CPU_PQ];
@@ -1238,6 +1241,10 @@ int goya_init_cpu_queues(struct hl_device *hdev)
                return -EIO;
        }
 
+       /* update FW application security bits */
+       if (prop->fw_security_status_valid)
+               prop->fw_app_security_map = RREG32(mmCPU_BOOT_DEV_STS0);
+
        goya->hw_cap_initialized |= HW_CAP_CPU_Q;
        return 0;
 }
@@ -2804,9 +2811,12 @@ void goya_ring_doorbell(struct hl_device *hdev, u32 hw_queue_id, u32 pi)
        /* ring the doorbell */
        WREG32(db_reg_offset, db_value);
 
-       if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ)
+       if (hw_queue_id == GOYA_QUEUE_ID_CPU_PQ) {
+               /* make sure device CPU will read latest data from host */
+               mb();
                WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
                                GOYA_ASYNC_EVENT_ID_PI_UPDATE);
+       }
 }
 
 void goya_pqe_write(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd)
@@ -2914,7 +2924,7 @@ static int goya_send_job_on_qman0(struct hl_device *hdev, struct hl_cs_job *job)
        else
                timeout = HL_DEVICE_TIMEOUT_USEC;
 
-       if (!hdev->asic_funcs->is_device_idle(hdev, NULL, NULL)) {
+       if (!hdev->asic_funcs->is_device_idle(hdev, NULL, 0, NULL)) {
                dev_err_ratelimited(hdev->dev,
                        "Can't send driver job on QMAN0 because the device is not idle\n");
                return -EBUSY;
@@ -3876,10 +3886,10 @@ static int goya_parse_cb_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail here */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                       (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -3948,10 +3958,10 @@ static int goya_parse_cb_no_mmu(struct hl_device *hdev,
        patched_cb_handle >>= PAGE_SHIFT;
        parser->patched_cb = hl_cb_get(hdev, &hdev->kernel_cb_mgr,
                                (u32) patched_cb_handle);
-       /* hl_cb_get should never fail here so use kernel WARN */
-       WARN(!parser->patched_cb, "DMA CB handle invalid 0x%x\n",
-                       (u32) patched_cb_handle);
+       /* hl_cb_get should never fail here */
        if (!parser->patched_cb) {
+               dev_crit(hdev->dev, "DMA CB handle invalid 0x%x\n",
+                       (u32) patched_cb_handle);
                rc = -EFAULT;
                goto out;
        }
@@ -4122,9 +4132,6 @@ static int goya_debugfs_read32(struct hl_device *hdev, u64 addr, u32 *val)
                if (ddr_bar_addr == U64_MAX)
                        rc = -EIO;
 
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *val = *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
        } else {
                rc = -EFAULT;
        }
@@ -4178,9 +4185,6 @@ static int goya_debugfs_write32(struct hl_device *hdev, u64 addr, u32 val)
                if (ddr_bar_addr == U64_MAX)
                        rc = -EIO;
 
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *(u32 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
        } else {
                rc = -EFAULT;
        }
@@ -4223,9 +4227,6 @@ static int goya_debugfs_read64(struct hl_device *hdev, u64 addr, u64 *val)
                if (ddr_bar_addr == U64_MAX)
                        rc = -EIO;
 
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *val = *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE);
-
        } else {
                rc = -EFAULT;
        }
@@ -4266,9 +4267,6 @@ static int goya_debugfs_write64(struct hl_device *hdev, u64 addr, u64 val)
                if (ddr_bar_addr == U64_MAX)
                        rc = -EIO;
 
-       } else if (addr >= HOST_PHYS_BASE && !iommu_present(&pci_bus_type)) {
-               *(u64 *) phys_to_virt(addr - HOST_PHYS_BASE) = val;
-
        } else {
                rc = -EFAULT;
        }
@@ -4877,8 +4875,6 @@ int goya_context_switch(struct hl_device *hdev, u32 asid)
 
        WREG32(mmTPC_PLL_CLK_RLX_0, 0x200020);
 
-       goya_mmu_prepare(hdev, asid);
-
        goya_clear_sm_regs(hdev);
 
        return 0;
@@ -5044,7 +5040,7 @@ static void goya_mmu_prepare(struct hl_device *hdev, u32 asid)
                return;
 
        if (asid & ~MME_QM_GLBL_SECURE_PROPS_ASID_MASK) {
-               WARN(1, "asid %u is too big\n", asid);
+               dev_crit(hdev->dev, "asid %u is too big\n", asid);
                return;
        }
 
@@ -5073,8 +5069,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
        else
                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
 
-       mutex_lock(&hdev->mmu_cache_lock);
-
        /* L0 & L1 invalidation */
        WREG32(mmSTLB_INV_ALL_START, 1);
 
@@ -5086,8 +5080,6 @@ static int goya_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard,
                1000,
                timeout_usec);
 
-       mutex_unlock(&hdev->mmu_cache_lock);
-
        if (rc) {
                dev_err_ratelimited(hdev->dev,
                                        "MMU cache invalidation timeout\n");
@@ -5117,8 +5109,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
        else
                timeout_usec = MMU_CONFIG_TIMEOUT_USEC;
 
-       mutex_lock(&hdev->mmu_cache_lock);
-
        /*
         * TODO: currently invalidate entire L0 & L1 as in regular hard
         * invalidation. Need to apply invalidation of specific cache lines with
@@ -5141,8 +5131,6 @@ static int goya_mmu_invalidate_cache_range(struct hl_device *hdev,
                1000,
                timeout_usec);
 
-       mutex_unlock(&hdev->mmu_cache_lock);
-
        if (rc) {
                dev_err_ratelimited(hdev->dev,
                                        "MMU cache invalidation timeout\n");
@@ -5172,7 +5160,7 @@ int goya_cpucp_info_get(struct hl_device *hdev)
        if (!(goya->hw_cap_initialized & HW_CAP_CPU_Q))
                return 0;
 
-       rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0);
+       rc = hl_fw_cpucp_info_get(hdev, mmCPU_BOOT_DEV_STS0, mmCPU_BOOT_ERR0);
        if (rc)
                return rc;
 
@@ -5207,11 +5195,12 @@ static void goya_disable_clock_gating(struct hl_device *hdev)
        /* clock gating not supported in Goya */
 }
 
-static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
-                               struct seq_file *s)
+static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask_arr,
+                                       u8 mask_len, struct seq_file *s)
 {
        const char *fmt = "%-5d%-9s%#-14x%#-16x%#x\n";
        const char *dma_fmt = "%-5d%-9s%#-14x%#x\n";
+       unsigned long *mask = (unsigned long *)mask_arr;
        u32 qm_glbl_sts0, cmdq_glbl_sts0, dma_core_sts0, tpc_cfg_sts,
                mme_arch_sts;
        bool is_idle = true, is_eng_idle;
@@ -5231,9 +5220,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
                                IS_DMA_IDLE(dma_core_sts0);
                is_idle &= is_eng_idle;
 
-               if (mask)
-                       *mask |= ((u64) !is_eng_idle) <<
-                                               (GOYA_ENGINE_ID_DMA_0 + i);
+               if (mask && !is_eng_idle)
+                       set_bit(GOYA_ENGINE_ID_DMA_0 + i, mask);
                if (s)
                        seq_printf(s, dma_fmt, i, is_eng_idle ? "Y" : "N",
                                        qm_glbl_sts0, dma_core_sts0);
@@ -5255,9 +5243,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
                                IS_TPC_IDLE(tpc_cfg_sts);
                is_idle &= is_eng_idle;
 
-               if (mask)
-                       *mask |= ((u64) !is_eng_idle) <<
-                                               (GOYA_ENGINE_ID_TPC_0 + i);
+               if (mask && !is_eng_idle)
+                       set_bit(GOYA_ENGINE_ID_TPC_0 + i, mask);
                if (s)
                        seq_printf(s, fmt, i, is_eng_idle ? "Y" : "N",
                                qm_glbl_sts0, cmdq_glbl_sts0, tpc_cfg_sts);
@@ -5276,8 +5263,8 @@ static bool goya_is_device_idle(struct hl_device *hdev, u64 *mask,
                        IS_MME_IDLE(mme_arch_sts);
        is_idle &= is_eng_idle;
 
-       if (mask)
-               *mask |= ((u64) !is_eng_idle) << GOYA_ENGINE_ID_MME_0;
+       if (mask && !is_eng_idle)
+               set_bit(GOYA_ENGINE_ID_MME_0, mask);
        if (s) {
                seq_printf(s, fmt, 0, is_eng_idle ? "Y" : "N", qm_glbl_sts0,
                                cmdq_glbl_sts0, mme_arch_sts);
@@ -5321,6 +5308,9 @@ static int goya_get_eeprom_data(struct hl_device *hdev, void *data,
 
 static int goya_ctx_init(struct hl_ctx *ctx)
 {
+       if (ctx->asid != HL_KERNEL_ASID_ID)
+               goya_mmu_prepare(ctx->hdev, ctx->asid);
+
        return 0;
 }
 
@@ -5399,6 +5389,24 @@ static void goya_ctx_fini(struct hl_ctx *ctx)
 
 }
 
+static int goya_get_hw_block_id(struct hl_device *hdev, u64 block_addr,
+                       u32 *block_size, u32 *block_id)
+{
+       return -EPERM;
+}
+
+static int goya_block_mmap(struct hl_device *hdev, struct vm_area_struct *vma,
+                               u32 block_id, u32 block_size)
+{
+       return -EPERM;
+}
+
+static void goya_enable_events_from_fw(struct hl_device *hdev)
+{
+       WREG32(mmGIC_DISTRIBUTOR__5_GICD_SETSPI_NSR,
+                       GOYA_ASYNC_EVENT_ID_INTS_REGISTER);
+}
+
 static const struct hl_asic_funcs goya_funcs = {
        .early_init = goya_early_init,
        .early_fini = goya_early_fini,
@@ -5475,7 +5483,13 @@ static const struct hl_asic_funcs goya_funcs = {
        .set_dma_mask_from_fw = goya_set_dma_mask_from_fw,
        .get_device_time = goya_get_device_time,
        .collective_wait_init_cs = goya_collective_wait_init_cs,
-       .collective_wait_create_jobs = goya_collective_wait_create_jobs
+       .collective_wait_create_jobs = goya_collective_wait_create_jobs,
+       .scramble_addr = hl_mmu_scramble_addr,
+       .descramble_addr = hl_mmu_descramble_addr,
+       .ack_protection_bits_errors = goya_ack_protection_bits_errors,
+       .get_hw_block_id = goya_get_hw_block_id,
+       .hw_block_mmap = goya_block_mmap,
+       .enable_events_from_fw = goya_enable_events_from_fw
 };
 
 /*
index 8b34082..23fe099 100644 (file)
@@ -173,6 +173,7 @@ void goya_init_mme_qmans(struct hl_device *hdev);
 void goya_init_tpc_qmans(struct hl_device *hdev);
 int goya_init_cpu_queues(struct hl_device *hdev);
 void goya_init_security(struct hl_device *hdev);
+void goya_ack_protection_bits_errors(struct hl_device *hdev);
 int goya_late_init(struct hl_device *hdev);
 void goya_late_fini(struct hl_device *hdev);
 
index 6fa0393..6b7445c 100644 (file)
@@ -434,8 +434,15 @@ static int goya_config_etr(struct hl_device *hdev,
                WREG32(mmPSOC_ETR_BUFWM, 0x3FFC);
                WREG32(mmPSOC_ETR_RSZ, input->buffer_size);
                WREG32(mmPSOC_ETR_MODE, input->sink_mode);
-               WREG32(mmPSOC_ETR_AXICTL,
-                               0x700 | PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT);
+               if (hdev->asic_prop.fw_security_disabled) {
+                       /* make ETR not privileged */
+                       val = FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK, 0);
+                       /* make ETR non-secured (inverted logic) */
+                       val |= FIELD_PREP(PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK, 1);
+                       /* burst size 8 */
+                       val |= FIELD_PREP(PSOC_ETR_AXICTL_WRBURSTLEN_MASK, 7);
+                       WREG32(mmPSOC_ETR_AXICTL, val);
+               }
                WREG32(mmPSOC_ETR_DBALO,
                                lower_32_bits(input->buffer_address));
                WREG32(mmPSOC_ETR_DBAHI,
index 1470183..14c3bae 100644 (file)
@@ -3120,3 +3120,8 @@ void goya_init_security(struct hl_device *hdev)
 
        goya_init_protection_bits(hdev);
 }
+
+void goya_ack_protection_bits_errors(struct hl_device *hdev)
+{
+
+}
index 00bd9b3..b77c1c1 100644 (file)
@@ -58,11 +58,25 @@ struct hl_eq_ecc_data {
        __u8 pad[7];
 };
 
+enum hl_sm_sei_cause {
+       SM_SEI_SO_OVERFLOW,
+       SM_SEI_LBW_4B_UNALIGNED,
+       SM_SEI_AXI_RESPONSE_ERR
+};
+
+struct hl_eq_sm_sei_data {
+       __le32 sei_log;
+       /* enum hl_sm_sei_cause */
+       __u8 sei_cause;
+       __u8 pad[3];
+};
+
 struct hl_eq_entry {
        struct hl_eq_header hdr;
        union {
                struct hl_eq_ecc_data ecc_data;
                struct hl_eq_hbm_ecc_data hbm_ecc_data;
+               struct hl_eq_sm_sei_data sm_sei_data;
                __le64 data[7];
        };
 };
index b637dfd..e87f5a9 100644 (file)
@@ -70,6 +70,9 @@
  *                                     checksum. Trying to program image again
  *                                     might solve this.
  *
+ * CPU_BOOT_ERR0_PLL_FAIL              PLL settings failed, meaning that one
+ *                                     of the PLLs remains in REF_CLK
+ *
  * CPU_BOOT_ERR0_ENABLED               Error registers enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the error
@@ -88,6 +91,7 @@
 #define CPU_BOOT_ERR0_EFUSE_FAIL               (1 << 9)
 #define CPU_BOOT_ERR0_PRI_IMG_VER_FAIL         (1 << 10)
 #define CPU_BOOT_ERR0_SEC_IMG_VER_FAIL         (1 << 11)
+#define CPU_BOOT_ERR0_PLL_FAIL                 (1 << 12)
 #define CPU_BOOT_ERR0_ENABLED                  (1 << 31)
 
 /*
  * CPU_BOOT_DEV_STS0_PLL_INFO_EN       FW retrieval of PLL info is enabled.
  *                                     Initialized in: linux
  *
+ * CPU_BOOT_DEV_STS0_SP_SRAM_EN                SP SRAM is initialized and available
+ *                                     for use.
+ *                                     Initialized in: preboot
+ *
  * CPU_BOOT_DEV_STS0_CLK_GATE_EN       Clock Gating enabled.
  *                                     FW initialized Clock Gating.
  *                                     Initialized in: preboot
  *
+ * CPU_BOOT_DEV_STS0_HBM_ECC_EN                HBM ECC handling Enabled.
+ *                                     FW handles HBM ECC indications.
+ *                                     Initialized in: linux
+ *
+ * CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN     Packets ack value used in the armcpd
+ *                                     is set to the PI counter.
+ *                                     Initialized in: linux
+ *
  * CPU_BOOT_DEV_STS0_ENABLED           Device status register enabled.
  *                                     This is a main indication that the
  *                                     running FW populates the device status
 #define CPU_BOOT_DEV_STS0_DRAM_SCR_EN                  (1 << 9)
 #define CPU_BOOT_DEV_STS0_FW_HARD_RST_EN               (1 << 10)
 #define CPU_BOOT_DEV_STS0_PLL_INFO_EN                  (1 << 11)
+#define CPU_BOOT_DEV_STS0_SP_SRAM_EN                   (1 << 12)
 #define CPU_BOOT_DEV_STS0_CLK_GATE_EN                  (1 << 13)
+#define CPU_BOOT_DEV_STS0_HBM_ECC_EN                   (1 << 14)
+#define CPU_BOOT_DEV_STS0_PKT_PI_ACK_EN                        (1 << 15)
 #define CPU_BOOT_DEV_STS0_ENABLED                      (1 << 31)
 
 enum cpu_boot_status {
index 9ccba84..49335e8 100644 (file)
@@ -212,6 +212,10 @@ enum gaudi_async_event_id {
        GAUDI_EVENT_NIC_SEI_2 = 266,
        GAUDI_EVENT_NIC_SEI_3 = 267,
        GAUDI_EVENT_NIC_SEI_4 = 268,
+       GAUDI_EVENT_DMA_IF_SEI_0 = 277,
+       GAUDI_EVENT_DMA_IF_SEI_1 = 278,
+       GAUDI_EVENT_DMA_IF_SEI_2 = 279,
+       GAUDI_EVENT_DMA_IF_SEI_3 = 280,
        GAUDI_EVENT_PCIE_FLR = 290,
        GAUDI_EVENT_TPC0_BMON_SPMU = 300,
        GAUDI_EVENT_TPC0_KRN_ERR = 301,
index b9b90d0..b53aeda 100644 (file)
@@ -388,7 +388,10 @@ enum axi_id {
 #define RAZWI_INITIATOR_ID_X_Y_TPC6            RAZWI_INITIATOR_ID_X_Y(7, 6)
 #define RAZWI_INITIATOR_ID_X_Y_TPC7_NIC4_NIC5  RAZWI_INITIATOR_ID_X_Y(8, 6)
 
-#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT                           1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT     1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK      0x1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK      0x2
+#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK                0xF00
 
 /* STLB_CACHE_INV */
 #define STLB_CACHE_INV_PRODUCER_INDEX_SHIFT                          0
index f30f2c0..6e097ac 100644 (file)
@@ -78,6 +78,9 @@ struct packet_wreg_bulk {
        __le64 values[0]; /* data starts here */
 };
 
+#define GAUDI_PKT_LONG_CTL_OP_SHIFT            20
+#define GAUDI_PKT_LONG_CTL_OP_MASK             0x00300000
+
 struct packet_msg_long {
        __le32 value;
        __le32 ctl;
@@ -111,18 +114,6 @@ struct packet_msg_long {
 #define GAUDI_PKT_SHORT_CTL_BASE_SHIFT         22
 #define GAUDI_PKT_SHORT_CTL_BASE_MASK          0x00C00000
 
-#define GAUDI_PKT_SHORT_CTL_OPCODE_SHIFT       24
-#define GAUDI_PKT_SHORT_CTL_OPCODE_MASK                0x1F000000
-
-#define GAUDI_PKT_SHORT_CTL_EB_SHIFT           29
-#define GAUDI_PKT_SHORT_CTL_EB_MASK            0x20000000
-
-#define GAUDI_PKT_SHORT_CTL_RB_SHIFT           30
-#define GAUDI_PKT_SHORT_CTL_RB_MASK            0x40000000
-
-#define GAUDI_PKT_SHORT_CTL_MB_SHIFT           31
-#define GAUDI_PKT_SHORT_CTL_MB_MASK            0x80000000
-
 struct packet_msg_short {
        __le32 value;
        __le32 ctl;
@@ -146,18 +137,6 @@ struct packet_msg_prot {
 #define GAUDI_PKT_FENCE_CTL_PRED_SHIFT         0
 #define GAUDI_PKT_FENCE_CTL_PRED_MASK          0x0000001F
 
-#define GAUDI_PKT_FENCE_CTL_OPCODE_SHIFT       24
-#define GAUDI_PKT_FENCE_CTL_OPCODE_MASK                0x1F000000
-
-#define GAUDI_PKT_FENCE_CTL_EB_SHIFT           29
-#define GAUDI_PKT_FENCE_CTL_EB_MASK            0x20000000
-
-#define GAUDI_PKT_FENCE_CTL_RB_SHIFT           30
-#define GAUDI_PKT_FENCE_CTL_RB_MASK            0x40000000
-
-#define GAUDI_PKT_FENCE_CTL_MB_SHIFT           31
-#define GAUDI_PKT_FENCE_CTL_MB_MASK            0x80000000
-
 struct packet_fence {
        __le32 cfg;
        __le32 ctl;
index 067489b..9ff3cb2 100644 (file)
 #define DMA_QM_3_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 #define DMA_QM_4_GLBL_CFG1_DMA_STOP_SHIFT DMA_QM_0_GLBL_CFG1_DMA_STOP_SHIFT
 
-#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT                           1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_SHIFT     1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT0_MASK      0x1
+#define PSOC_ETR_AXICTL_PROTCTRLBIT1_MASK      0x2
+#define PSOC_ETR_AXICTL_WRBURSTLEN_MASK                0xF00
 
 #endif /* ASIC_REG_GOYA_MASKS_H_ */
index 2d778d0..c0fe329 100644 (file)
@@ -2288,15 +2288,13 @@ crq_failed:
        return -EPERM;
 }
 
-static int ibmvmc_remove(struct vio_dev *vdev)
+static void ibmvmc_remove(struct vio_dev *vdev)
 {
        struct crq_server_adapter *adapter = dev_get_drvdata(&vdev->dev);
 
        dev_info(adapter->dev, "Entering remove for UA 0x%x\n",
                 vdev->unit_address);
        ibmvmc_release_crq_queue(adapter);
-
-       return 0;
 }
 
 static struct vio_device_id ibmvmc_device_table[] = {
index 6b888d0..aa12097 100644 (file)
@@ -16,6 +16,7 @@ lkdtm-$(CONFIG_PPC_BOOK3S_64) += powerpc.o
 KASAN_SANITIZE_rodata.o                := n
 KASAN_SANITIZE_stackleak.o     := n
 KCOV_INSTRUMENT_rodata.o       := n
+CFLAGS_REMOVE_rodata.o         += $(CC_FLAGS_LTO)
 
 OBJCOPYFLAGS :=
 OBJCOPYFLAGS_rodata_objcopy.o  := \
index 2907db2..935acc6 100644 (file)
@@ -44,7 +44,8 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag,
        bus = cl->dev;
 
        mutex_lock(&bus->device_lock);
-       if (bus->dev_state != MEI_DEV_ENABLED) {
+       if (bus->dev_state != MEI_DEV_ENABLED &&
+           bus->dev_state != MEI_DEV_POWERING_DOWN) {
                rets = -ENODEV;
                goto out;
        }
@@ -60,6 +61,13 @@ ssize_t __mei_cl_send(struct mei_cl *cl, u8 *buf, size_t length, u8 vtag,
                goto out;
        }
 
+       if (vtag) {
+               /* Check if vtag is supported by client */
+               rets = mei_cl_vt_support_check(cl);
+               if (rets)
+                       goto out;
+       }
+
        if (length > mei_cl_mtu(cl)) {
                rets = -EFBIG;
                goto out;
@@ -128,7 +136,8 @@ ssize_t __mei_cl_recv(struct mei_cl *cl, u8 *buf, size_t length, u8 *vtag,
        bus = cl->dev;
 
        mutex_lock(&bus->device_lock);
-       if (bus->dev_state != MEI_DEV_ENABLED) {
+       if (bus->dev_state != MEI_DEV_ENABLED &&
+           bus->dev_state != MEI_DEV_POWERING_DOWN) {
                rets = -ENODEV;
                goto out;
        }
@@ -878,22 +887,17 @@ static int mei_cl_device_probe(struct device *dev)
 static int mei_cl_device_remove(struct device *dev)
 {
        struct mei_cl_device *cldev = to_mei_cl_device(dev);
-       struct mei_cl_driver *cldrv;
-       int ret = 0;
+       struct mei_cl_driver *cldrv = to_mei_cl_driver(dev->driver);
 
-       if (!cldev || !dev->driver)
-               return 0;
-
-       cldrv = to_mei_cl_driver(dev->driver);
        if (cldrv->remove)
-               ret = cldrv->remove(cldev);
+               cldrv->remove(cldev);
 
        mei_cldev_unregister_callbacks(cldev);
 
        mei_cl_bus_module_put(cldev);
        module_put(THIS_MODULE);
 
-       return ret;
+       return 0;
 }
 
 static ssize_t name_show(struct device *dev, struct device_attribute *a,
index a56d413..2cc370a 100644 (file)
@@ -9,6 +9,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/dma-mapping.h>
 
 #include <linux/mei.h>
 
@@ -990,7 +991,8 @@ int mei_cl_disconnect(struct mei_cl *cl)
                return 0;
        }
 
-       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+       if (dev->dev_state == MEI_DEV_POWERING_DOWN ||
+           dev->dev_state == MEI_DEV_POWER_DOWN) {
                cl_dbg(dev, cl, "Device is powering down, don't bother with disconnection\n");
                mei_cl_set_disconnected(cl);
                return 0;
@@ -1737,7 +1739,7 @@ static inline u8 mei_ext_hdr_set_vtag(struct mei_ext_hdr *ext, u8 vtag)
  *
  * @cb: message callback structure
  *
- * Return: a pointer to initialized header
+ * Return: a pointer to initialized header or ERR_PTR on failure
  */
 static struct mei_msg_hdr *mei_msg_hdr_init(const struct mei_cl_cb *cb)
 {
@@ -2113,6 +2115,8 @@ void mei_cl_complete(struct mei_cl *cl, struct mei_cl_cb *cb)
        case MEI_FOP_DISCONNECT:
        case MEI_FOP_NOTIFY_STOP:
        case MEI_FOP_NOTIFY_START:
+       case MEI_FOP_DMA_MAP:
+       case MEI_FOP_DMA_UNMAP:
                if (waitqueue_active(&cl->wait))
                        wake_up(&cl->wait);
 
@@ -2139,3 +2143,283 @@ void mei_cl_all_disconnect(struct mei_device *dev)
        list_for_each_entry(cl, &dev->file_list, link)
                mei_cl_set_disconnected(cl);
 }
+
+static struct mei_cl *mei_cl_dma_map_find(struct mei_device *dev, u8 buffer_id)
+{
+       struct mei_cl *cl;
+
+       list_for_each_entry(cl, &dev->file_list, link)
+               if (cl->dma.buffer_id == buffer_id)
+                       return cl;
+       return NULL;
+}
+
+/**
+ * mei_cl_irq_dma_map - send client dma map request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_dma_map(struct mei_cl *cl, struct mei_cl_cb *cb,
+                      struct list_head *cmpl_list)
+{
+       struct mei_device *dev = cl->dev;
+       u32 msg_slots;
+       int slots;
+       int ret;
+
+       msg_slots = mei_hbm2slots(sizeof(struct hbm_client_dma_map_request));
+       slots = mei_hbuf_empty_slots(dev);
+       if (slots < 0)
+               return -EOVERFLOW;
+
+       if ((u32)slots < msg_slots)
+               return -EMSGSIZE;
+
+       ret = mei_hbm_cl_dma_map_req(dev, cl);
+       if (ret) {
+               cl->status = ret;
+               list_move_tail(&cb->list, cmpl_list);
+               return ret;
+       }
+
+       list_move_tail(&cb->list, &dev->ctrl_rd_list);
+       return 0;
+}
+
+/**
+ * mei_cl_irq_dma_unmap - send client dma unmap request in irq_thread context
+ *
+ * @cl: client
+ * @cb: callback block.
+ * @cmpl_list: complete list.
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_irq_dma_unmap(struct mei_cl *cl, struct mei_cl_cb *cb,
+                        struct list_head *cmpl_list)
+{
+       struct mei_device *dev = cl->dev;
+       u32 msg_slots;
+       int slots;
+       int ret;
+
+       msg_slots = mei_hbm2slots(sizeof(struct hbm_client_dma_unmap_request));
+       slots = mei_hbuf_empty_slots(dev);
+       if (slots < 0)
+               return -EOVERFLOW;
+
+       if ((u32)slots < msg_slots)
+               return -EMSGSIZE;
+
+       ret = mei_hbm_cl_dma_unmap_req(dev, cl);
+       if (ret) {
+               cl->status = ret;
+               list_move_tail(&cb->list, cmpl_list);
+               return ret;
+       }
+
+       list_move_tail(&cb->list, &dev->ctrl_rd_list);
+       return 0;
+}
+
+static int mei_cl_dma_alloc(struct mei_cl *cl, u8 buf_id, size_t size)
+{
+       cl->dma.vaddr = dmam_alloc_coherent(cl->dev->dev, size,
+                                           &cl->dma.daddr, GFP_KERNEL);
+       if (!cl->dma.vaddr)
+               return -ENOMEM;
+
+       cl->dma.buffer_id = buf_id;
+       cl->dma.size = size;
+
+       return 0;
+}
+
+static void mei_cl_dma_free(struct mei_cl *cl)
+{
+       cl->dma.buffer_id = 0;
+       dmam_free_coherent(cl->dev->dev,
+                          cl->dma.size, cl->dma.vaddr, cl->dma.daddr);
+       cl->dma.size = 0;
+       cl->dma.vaddr = NULL;
+       cl->dma.daddr = 0;
+}
+
+/**
+ * mei_cl_alloc_and_map - send client dma map request
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ * @buffer_id: id of the mapped buffer
+ * @size: size of the buffer
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return:
+ * * -ENODEV
+ * * -EINVAL
+ * * -EOPNOTSUPP
+ * * -EPROTO
+ * * -ENOMEM;
+ */
+int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
+                            u8 buffer_id, size_t size)
+{
+       struct mei_device *dev;
+       struct mei_cl_cb *cb;
+       int rets;
+
+       if (WARN_ON(!cl || !cl->dev))
+               return -ENODEV;
+
+       dev = cl->dev;
+
+       if (!dev->hbm_f_cd_supported) {
+               cl_dbg(dev, cl, "client dma is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       if (buffer_id == 0)
+               return -EINVAL;
+
+       if (mei_cl_is_connected(cl))
+               return -EPROTO;
+
+       if (cl->dma_mapped)
+               return -EPROTO;
+
+       if (mei_cl_dma_map_find(dev, buffer_id)) {
+               cl_dbg(dev, cl, "client dma with id %d is already allocated\n",
+                      cl->dma.buffer_id);
+               return -EPROTO;
+       }
+
+       rets = pm_runtime_get(dev->dev);
+       if (rets < 0 && rets != -EINPROGRESS) {
+               pm_runtime_put_noidle(dev->dev);
+               cl_err(dev, cl, "rpm: get failed %d\n", rets);
+               return rets;
+       }
+
+       rets = mei_cl_dma_alloc(cl, buffer_id, size);
+       if (rets) {
+               pm_runtime_put_noidle(dev->dev);
+               return rets;
+       }
+
+       cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DMA_MAP, fp);
+       if (!cb) {
+               rets = -ENOMEM;
+               goto out;
+       }
+
+       if (mei_hbuf_acquire(dev)) {
+               if (mei_hbm_cl_dma_map_req(dev, cl)) {
+                       rets = -ENODEV;
+                       goto out;
+               }
+               list_move_tail(&cb->list, &dev->ctrl_rd_list);
+       }
+
+       mutex_unlock(&dev->device_lock);
+       wait_event_timeout(cl->wait,
+                          cl->dma_mapped || cl->status,
+                          mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+       mutex_lock(&dev->device_lock);
+
+       if (!cl->dma_mapped && !cl->status)
+               cl->status = -EFAULT;
+
+       rets = cl->status;
+
+out:
+       if (rets)
+               mei_cl_dma_free(cl);
+
+       cl_dbg(dev, cl, "rpm: autosuspend\n");
+       pm_runtime_mark_last_busy(dev->dev);
+       pm_runtime_put_autosuspend(dev->dev);
+
+       mei_io_cb_free(cb);
+       return rets;
+}
+
+/**
+ * mei_cl_unmap_and_free - send client dma unmap request
+ *
+ * @cl: host client
+ * @fp: pointer to file structure
+ *
+ * Locking: called under "dev->device_lock" lock
+ *
+ * Return: 0 on such and error otherwise.
+ */
+int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp)
+{
+       struct mei_device *dev;
+       struct mei_cl_cb *cb;
+       int rets;
+
+       if (WARN_ON(!cl || !cl->dev))
+               return -ENODEV;
+
+       dev = cl->dev;
+
+       if (!dev->hbm_f_cd_supported) {
+               cl_dbg(dev, cl, "client dma is not supported\n");
+               return -EOPNOTSUPP;
+       }
+
+       /* do not allow unmap for connected client */
+       if (mei_cl_is_connected(cl))
+               return -EPROTO;
+
+       if (!cl->dma_mapped)
+               return -EPROTO;
+
+       rets = pm_runtime_get(dev->dev);
+       if (rets < 0 && rets != -EINPROGRESS) {
+               pm_runtime_put_noidle(dev->dev);
+               cl_err(dev, cl, "rpm: get failed %d\n", rets);
+               return rets;
+       }
+
+       cb = mei_cl_enqueue_ctrl_wr_cb(cl, 0, MEI_FOP_DMA_UNMAP, fp);
+       if (!cb) {
+               rets = -ENOMEM;
+               goto out;
+       }
+
+       if (mei_hbuf_acquire(dev)) {
+               if (mei_hbm_cl_dma_unmap_req(dev, cl)) {
+                       rets = -ENODEV;
+                       goto out;
+               }
+               list_move_tail(&cb->list, &dev->ctrl_rd_list);
+       }
+
+       mutex_unlock(&dev->device_lock);
+       wait_event_timeout(cl->wait,
+                          !cl->dma_mapped || cl->status,
+                          mei_secs_to_jiffies(MEI_CL_CONNECT_TIMEOUT));
+       mutex_lock(&dev->device_lock);
+
+       if (cl->dma_mapped && !cl->status)
+               cl->status = -EFAULT;
+
+       rets = cl->status;
+
+       if (!rets)
+               mei_cl_dma_free(cl);
+out:
+       cl_dbg(dev, cl, "rpm: autosuspend\n");
+       pm_runtime_mark_last_busy(dev->dev);
+       pm_runtime_put_autosuspend(dev->dev);
+
+       mei_io_cb_free(cb);
+       return rets;
+}
index 9e08a98..b12cdcd 100644 (file)
@@ -265,6 +265,14 @@ void mei_cl_notify(struct mei_cl *cl);
 
 void mei_cl_all_disconnect(struct mei_device *dev);
 
+int mei_cl_irq_dma_map(struct mei_cl *cl, struct mei_cl_cb *cb,
+                      struct list_head *cmpl_list);
+int mei_cl_irq_dma_unmap(struct mei_cl *cl, struct mei_cl_cb *cb,
+                        struct list_head *cmpl_list);
+int mei_cl_dma_alloc_and_map(struct mei_cl *cl, const struct file *fp,
+                            u8 buffer_id, size_t size);
+int mei_cl_dma_unmap(struct mei_cl *cl, const struct file *fp);
+
 #define MEI_CL_FMT "cl:host=%02d me=%02d "
 #define MEI_CL_PRM(cl) (cl)->host_client_id, mei_cl_me_id(cl)
 
index 3ab1a43..1ce61e9 100644 (file)
@@ -106,6 +106,7 @@ static int mei_dbgfs_devstate_show(struct seq_file *m, void *unused)
                seq_printf(m, "\tDR: %01d\n", dev->hbm_f_dr_supported);
                seq_printf(m, "\tVT: %01d\n", dev->hbm_f_vt_supported);
                seq_printf(m, "\tCAP: %01d\n", dev->hbm_f_cap_supported);
+               seq_printf(m, "\tCD: %01d\n", dev->hbm_f_cd_supported);
        }
 
        seq_printf(m, "pg:  %s, %s\n",
index 686e8b6..d0277c7 100644 (file)
@@ -339,7 +339,9 @@ static int mei_hbm_capabilities_req(struct mei_device *dev)
        memset(&req, 0, sizeof(req));
        req.hbm_cmd = MEI_HBM_CAPABILITIES_REQ_CMD;
        if (dev->hbm_f_vt_supported)
-               req.capability_requested[0] = HBM_CAP_VT;
+               req.capability_requested[0] |= HBM_CAP_VT;
+       if (dev->hbm_f_cd_supported)
+               req.capability_requested[0] |= HBM_CAP_CD;
 
        ret = mei_hbm_write_message(dev, &mei_hdr, &req);
        if (ret) {
@@ -593,6 +595,117 @@ static void mei_hbm_cl_notify(struct mei_device *dev,
 }
 
 /**
+ * mei_hbm_cl_dma_map_req - send client dma map request
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_dma_map_req(struct mei_device *dev, struct mei_cl *cl)
+{
+       struct mei_msg_hdr mei_hdr;
+       struct hbm_client_dma_map_request req;
+       int ret;
+
+       mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+       memset(&req, 0, sizeof(req));
+
+       req.hbm_cmd = MEI_HBM_CLIENT_DMA_MAP_REQ_CMD;
+       req.client_buffer_id = cl->dma.buffer_id;
+       req.address_lsb = lower_32_bits(cl->dma.daddr);
+       req.address_msb = upper_32_bits(cl->dma.daddr);
+       req.size = cl->dma.size;
+
+       ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+       if (ret)
+               dev_err(dev->dev, "dma map request failed: ret = %d\n", ret);
+
+       return ret;
+}
+
+/**
+ * mei_hbm_cl_dma_unmap_req - send client dma unmap request
+ *
+ * @dev: the device structure
+ * @cl: mei host client
+ *
+ * Return: 0 on success and -EIO on write failure
+ */
+int mei_hbm_cl_dma_unmap_req(struct mei_device *dev, struct mei_cl *cl)
+{
+       struct mei_msg_hdr mei_hdr;
+       struct hbm_client_dma_unmap_request req;
+       int ret;
+
+       mei_hbm_hdr(&mei_hdr, sizeof(req));
+
+       memset(&req, 0, sizeof(req));
+
+       req.hbm_cmd = MEI_HBM_CLIENT_DMA_UNMAP_REQ_CMD;
+       req.client_buffer_id = cl->dma.buffer_id;
+
+       ret = mei_hbm_write_message(dev, &mei_hdr, &req);
+       if (ret)
+               dev_err(dev->dev, "dma unmap request failed: ret = %d\n", ret);
+
+       return ret;
+}
+
+static void mei_hbm_cl_dma_map_res(struct mei_device *dev,
+                                  struct hbm_client_dma_response *res)
+{
+       struct mei_cl *cl;
+       struct mei_cl_cb *cb, *next;
+
+       cl = NULL;
+       list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+               if (cb->fop_type != MEI_FOP_DMA_MAP)
+                       continue;
+               if (!cb->cl->dma.buffer_id || cb->cl->dma_mapped)
+                       continue;
+
+               cl = cb->cl;
+               break;
+       }
+       if (!cl)
+               return;
+
+       dev_dbg(dev->dev, "cl dma map result = %d\n", res->status);
+       cl->status = res->status;
+       if (!cl->status)
+               cl->dma_mapped = 1;
+       wake_up(&cl->wait);
+}
+
+static void mei_hbm_cl_dma_unmap_res(struct mei_device *dev,
+                                    struct hbm_client_dma_response *res)
+{
+       struct mei_cl *cl;
+       struct mei_cl_cb *cb, *next;
+
+       cl = NULL;
+       list_for_each_entry_safe(cb, next, &dev->ctrl_rd_list, list) {
+               if (cb->fop_type != MEI_FOP_DMA_UNMAP)
+                       continue;
+               if (!cb->cl->dma.buffer_id || !cb->cl->dma_mapped)
+                       continue;
+
+               cl = cb->cl;
+               break;
+       }
+       if (!cl)
+               return;
+
+       dev_dbg(dev->dev, "cl dma unmap result = %d\n", res->status);
+       cl->status = res->status;
+       if (!cl->status)
+               cl->dma_mapped = 0;
+       wake_up(&cl->wait);
+}
+
+/**
  * mei_hbm_prop_req - request property for a single client
  *
  * @dev: the device structure
@@ -1085,6 +1198,13 @@ static void mei_hbm_config_features(struct mei_device *dev)
            (dev->version.major_version == HBM_MAJOR_VERSION_CAP &&
             dev->version.minor_version >= HBM_MINOR_VERSION_CAP))
                dev->hbm_f_cap_supported = 1;
+
+       /* Client DMA Support */
+       dev->hbm_f_cd_supported = 0;
+       if (dev->version.major_version > HBM_MAJOR_VERSION_CD ||
+           (dev->version.major_version == HBM_MAJOR_VERSION_CD &&
+            dev->version.minor_version >= HBM_MINOR_VERSION_CD))
+               dev->hbm_f_cd_supported = 1;
 }
 
 /**
@@ -1124,6 +1244,7 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
        struct mei_hbm_cl_cmd *cl_cmd;
        struct hbm_client_connect_request *disconnect_req;
        struct hbm_flow_control *fctrl;
+       struct hbm_client_dma_response *client_dma_res;
 
        /* read the message to our buffer */
        BUG_ON(hdr->length >= sizeof(dev->rd_msg_buf));
@@ -1177,6 +1298,10 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
                if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
                    dev->hbm_state != MEI_HBM_STARTING) {
+                       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+                               dev_dbg(dev->dev, "hbm: start: on shutdown, ignoring\n");
+                               return 0;
+                       }
                        dev_err(dev->dev, "hbm: start: state mismatch, [%d, %d]\n",
                                dev->dev_state, dev->hbm_state);
                        return -EPROTO;
@@ -1215,7 +1340,12 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
                dev->init_clients_timer = 0;
 
-               if (dev->hbm_state != MEI_HBM_CAP_SETUP) {
+               if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+                   dev->hbm_state != MEI_HBM_CAP_SETUP) {
+                       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+                               dev_dbg(dev->dev, "hbm: capabilities response: on shutdown, ignoring\n");
+                               return 0;
+                       }
                        dev_err(dev->dev, "hbm: capabilities response: state mismatch, [%d, %d]\n",
                                dev->dev_state, dev->hbm_state);
                        return -EPROTO;
@@ -1224,6 +1354,8 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
                capability_res = (struct hbm_capability_response *)mei_msg;
                if (!(capability_res->capability_granted[0] & HBM_CAP_VT))
                        dev->hbm_f_vt_supported = 0;
+               if (!(capability_res->capability_granted[0] & HBM_CAP_CD))
+                       dev->hbm_f_cd_supported = 0;
 
                if (dev->hbm_f_dr_supported) {
                        if (mei_dmam_ring_alloc(dev))
@@ -1247,7 +1379,12 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
                dev->init_clients_timer = 0;
 
-               if (dev->hbm_state != MEI_HBM_DR_SETUP) {
+               if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
+                   dev->hbm_state != MEI_HBM_DR_SETUP) {
+                       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+                               dev_dbg(dev->dev, "hbm: dma setup response: on shutdown, ignoring\n");
+                               return 0;
+                       }
                        dev_err(dev->dev, "hbm: dma setup response: state mismatch, [%d, %d]\n",
                                dev->dev_state, dev->hbm_state);
                        return -EPROTO;
@@ -1311,6 +1448,10 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
                if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
                    dev->hbm_state != MEI_HBM_CLIENT_PROPERTIES) {
+                       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+                               dev_dbg(dev->dev, "hbm: properties response: on shutdown, ignoring\n");
+                               return 0;
+                       }
                        dev_err(dev->dev, "hbm: properties response: state mismatch, [%d, %d]\n",
                                dev->dev_state, dev->hbm_state);
                        return -EPROTO;
@@ -1349,6 +1490,10 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
 
                if (dev->dev_state != MEI_DEV_INIT_CLIENTS ||
                    dev->hbm_state != MEI_HBM_ENUM_CLIENTS) {
+                       if (dev->dev_state == MEI_DEV_POWER_DOWN) {
+                               dev_dbg(dev->dev, "hbm: enumeration response: on shutdown, ignoring\n");
+                               return 0;
+                       }
                        dev_err(dev->dev, "hbm: enumeration response: state mismatch, [%d, %d]\n",
                                dev->dev_state, dev->hbm_state);
                        return -EPROTO;
@@ -1373,7 +1518,7 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
                        return -EPROTO;
                }
 
-               dev->dev_state = MEI_DEV_POWER_DOWN;
+               mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
                dev_info(dev->dev, "hbm: stop response: resetting.\n");
                /* force the reset */
                return -EPROTO;
@@ -1426,6 +1571,18 @@ int mei_hbm_dispatch(struct mei_device *dev, struct mei_msg_hdr *hdr)
                mei_hbm_cl_notify(dev, cl_cmd);
                break;
 
+       case MEI_HBM_CLIENT_DMA_MAP_RES_CMD:
+               dev_dbg(dev->dev, "hbm: client dma map response: message received.\n");
+               client_dma_res = (struct hbm_client_dma_response *)mei_msg;
+               mei_hbm_cl_dma_map_res(dev, client_dma_res);
+               break;
+
+       case MEI_HBM_CLIENT_DMA_UNMAP_RES_CMD:
+               dev_dbg(dev->dev, "hbm: client dma unmap response: message received.\n");
+               client_dma_res = (struct hbm_client_dma_response *)mei_msg;
+               mei_hbm_cl_dma_unmap_res(dev, client_dma_res);
+               break;
+
        default:
                WARN(1, "hbm: wrong command %d\n", mei_msg->hbm_cmd);
                return -EPROTO;
index 4d95e38..cd5b08c 100644 (file)
@@ -10,6 +10,7 @@
 struct mei_device;
 struct mei_msg_hdr;
 struct mei_cl;
+struct mei_dma_data;
 
 /**
  * enum mei_hbm_state - host bus message protocol state
@@ -51,6 +52,7 @@ int mei_hbm_pg(struct mei_device *dev, u8 pg_cmd);
 void mei_hbm_pg_resume(struct mei_device *dev);
 int mei_hbm_cl_notify_req(struct mei_device *dev,
                          struct mei_cl *cl, u8 request);
-
+int mei_hbm_cl_dma_map_req(struct mei_device *dev, struct mei_cl *cl);
+int mei_hbm_cl_dma_unmap_req(struct mei_device *dev, struct mei_cl *cl);
 #endif /* _MEI_HBM_H_ */
 
index 3506a35..ec2a4fc 100644 (file)
@@ -844,16 +844,19 @@ enable_err_exit:
        return ret;
 }
 
-static int mei_hdcp_remove(struct mei_cl_device *cldev)
+static void mei_hdcp_remove(struct mei_cl_device *cldev)
 {
        struct i915_hdcp_comp_master *comp_master =
                                                mei_cldev_get_drvdata(cldev);
+       int ret;
 
        component_master_del(&cldev->dev, &mei_component_master_ops);
        kfree(comp_master);
        mei_cldev_set_drvdata(cldev, NULL);
 
-       return mei_cldev_disable(cldev);
+       ret = mei_cldev_disable(cldev);
+       if (ret)
+               dev_warn(&cldev->dev, "mei_cldev_disable() failed\n");
 }
 
 #define MEI_UUID_HDCP GUID_INIT(0xB638AB7E, 0x94E2, 0x4EA2, 0xA5, \
index 9cf8d8f..14be76d 100644 (file)
 #define MEI_DEV_ID_MCC        0x4B70  /* Mule Creek Canyon (EHL) */
 #define MEI_DEV_ID_MCC_4      0x4B75  /* Mule Creek Canyon 4 (EHL) */
 
+#define MEI_DEV_ID_EBG        0x1BE0  /* Emmitsburg WS */
+
+#define MEI_DEV_ID_ADP_S      0x7AE8  /* Alder Lake Point S */
+#define MEI_DEV_ID_ADP_LP     0x7A60  /* Alder Lake Point LP */
+
 /*
  * MEI HW Section
  */
index df2fb95..b106065 100644 (file)
 #define HBM_MINOR_VERSION_CAP              2
 #define HBM_MAJOR_VERSION_CAP              2
 
+/*
+ * MEI version with client DMA support
+ */
+#define HBM_MINOR_VERSION_CD               2
+#define HBM_MAJOR_VERSION_CD               2
+
 /* Host bus message command opcode */
 #define MEI_HBM_CMD_OP_MSK                  0x7f
 /* Host bus message command RESPONSE */
 #define MEI_HBM_CAPABILITIES_REQ_CMD        0x13
 #define MEI_HBM_CAPABILITIES_RES_CMD        0x93
 
+#define MEI_HBM_CLIENT_DMA_MAP_REQ_CMD      0x14
+#define MEI_HBM_CLIENT_DMA_MAP_RES_CMD      0x94
+
+#define MEI_HBM_CLIENT_DMA_UNMAP_REQ_CMD    0x15
+#define MEI_HBM_CLIENT_DMA_UNMAP_RES_CMD    0x95
+
 /*
  * MEI Stop Reason
  * used by hbm_host_stop_request.reason
@@ -648,6 +660,8 @@ struct hbm_dma_ring_ctrl {
 
 /* virtual tag supported */
 #define HBM_CAP_VT BIT(0)
+/* client dma supported */
+#define HBM_CAP_CD BIT(2)
 
 /**
  * struct hbm_capability_request - capability request from host to fw
@@ -671,4 +685,51 @@ struct hbm_capability_response {
        u8 capability_granted[3];
 } __packed;
 
+/**
+ * struct hbm_client_dma_map_request - client dma map request from host to fw
+ *
+ * @hbm_cmd: bus message command header
+ * @client_buffer_id: client buffer id
+ * @reserved: reserved
+ * @address_lsb: DMA address LSB
+ * @address_msb: DMA address MSB
+ * @size: DMA size
+ */
+struct hbm_client_dma_map_request {
+       u8 hbm_cmd;
+       u8 client_buffer_id;
+       u8 reserved[2];
+       u32 address_lsb;
+       u32 address_msb;
+       u32 size;
+} __packed;
+
+/**
+ * struct hbm_client_dma_unmap_request
+ *    client dma unmap request from the host to the firmware
+ *
+ * @hbm_cmd: bus message command header
+ * @status: unmap status
+ * @client_buffer_id: client buffer id
+ * @reserved: reserved
+ */
+struct hbm_client_dma_unmap_request {
+       u8 hbm_cmd;
+       u8 status;
+       u8 client_buffer_id;
+       u8 reserved;
+} __packed;
+
+/**
+ * struct hbm_client_dma_response
+ *   client dma unmap response from the firmware to the host
+ *
+ * @hbm_cmd: bus message command header
+ * @status: command status
+ */
+struct hbm_client_dma_response {
+       u8 hbm_cmd;
+       u8 status;
+} __packed;
+
 #endif
index bcee777..5c8cb67 100644 (file)
@@ -303,9 +303,12 @@ void mei_stop(struct mei_device *dev)
        dev_dbg(dev->dev, "stopping the device.\n");
 
        mutex_lock(&dev->device_lock);
-       mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
+       mei_set_devstate(dev, MEI_DEV_POWERING_DOWN);
        mutex_unlock(&dev->device_lock);
        mei_cl_bus_remove_devices(dev);
+       mutex_lock(&dev->device_lock);
+       mei_set_devstate(dev, MEI_DEV_POWER_DOWN);
+       mutex_unlock(&dev->device_lock);
 
        mei_cancel_work(dev);
 
index 326955b..a98f6b8 100644 (file)
@@ -295,12 +295,17 @@ static inline bool hdr_is_fixed(struct mei_msg_hdr *mei_hdr)
 static inline int hdr_is_valid(u32 msg_hdr)
 {
        struct mei_msg_hdr *mei_hdr;
+       u32 expected_len = 0;
 
        mei_hdr = (struct mei_msg_hdr *)&msg_hdr;
        if (!msg_hdr || mei_hdr->reserved)
                return -EBADMSG;
 
-       if (mei_hdr->dma_ring && mei_hdr->length != MEI_SLOT_SIZE)
+       if (mei_hdr->dma_ring)
+               expected_len += MEI_SLOT_SIZE;
+       if (mei_hdr->extended)
+               expected_len += MEI_SLOT_SIZE;
+       if (mei_hdr->length < expected_len)
                return -EBADMSG;
 
        return 0;
@@ -324,6 +329,8 @@ int mei_irq_read_handler(struct mei_device *dev,
        struct mei_cl *cl;
        int ret;
        u32 ext_meta_hdr_u32;
+       u32 hdr_size_left;
+       u32 hdr_size_ext;
        int i;
        int ext_hdr_end;
 
@@ -353,6 +360,7 @@ int mei_irq_read_handler(struct mei_device *dev,
        }
 
        ext_hdr_end = 1;
+       hdr_size_left = mei_hdr->length;
 
        if (mei_hdr->extended) {
                if (!dev->rd_msg_hdr[1]) {
@@ -363,8 +371,21 @@ int mei_irq_read_handler(struct mei_device *dev,
                        dev_dbg(dev->dev, "extended header is %08x\n",
                                ext_meta_hdr_u32);
                }
-               meta_hdr = ((struct mei_ext_meta_hdr *)
-                               dev->rd_msg_hdr + 1);
+               meta_hdr = ((struct mei_ext_meta_hdr *)dev->rd_msg_hdr + 1);
+               if (check_add_overflow((u32)sizeof(*meta_hdr),
+                                      mei_slots2data(meta_hdr->size),
+                                      &hdr_size_ext)) {
+                       dev_err(dev->dev, "extended message size too big %d\n",
+                               meta_hdr->size);
+                       return -EBADMSG;
+               }
+               if (hdr_size_left < hdr_size_ext) {
+                       dev_err(dev->dev, "corrupted message header len %d\n",
+                               mei_hdr->length);
+                       return -EBADMSG;
+               }
+               hdr_size_left -= hdr_size_ext;
+
                ext_hdr_end = meta_hdr->size + 2;
                for (i = dev->rd_msg_hdr_count; i < ext_hdr_end; i++) {
                        dev->rd_msg_hdr[i] = mei_read_hdr(dev);
@@ -376,6 +397,12 @@ int mei_irq_read_handler(struct mei_device *dev,
        }
 
        if (mei_hdr->dma_ring) {
+               if (hdr_size_left != sizeof(dev->rd_msg_hdr[ext_hdr_end])) {
+                       dev_err(dev->dev, "corrupted message header len %d\n",
+                               mei_hdr->length);
+                       return -EBADMSG;
+               }
+
                dev->rd_msg_hdr[ext_hdr_end] = mei_read_hdr(dev);
                dev->rd_msg_hdr_count++;
                (*slots)--;
@@ -520,6 +547,16 @@ int mei_irq_write_handler(struct mei_device *dev, struct list_head *cmpl_list)
                        if (ret)
                                return ret;
                        break;
+               case MEI_FOP_DMA_MAP:
+                       ret = mei_cl_irq_dma_map(cl, cb, cmpl_list);
+                       if (ret)
+                               return ret;
+                       break;
+               case MEI_FOP_DMA_UNMAP:
+                       ret = mei_cl_irq_dma_unmap(cl, cb, cmpl_list);
+                       if (ret)
+                               return ret;
+                       break;
                default:
                        BUG();
                }
index 9f66820..28937b6 100644 (file)
@@ -1026,7 +1026,7 @@ static ssize_t tx_queue_limit_show(struct device *device,
        size = dev->tx_queue_limit;
        mutex_unlock(&dev->device_lock);
 
-       return snprintf(buf, PAGE_SIZE, "%u\n", size);
+       return sysfs_emit(buf, "%u\n", size);
 }
 
 static ssize_t tx_queue_limit_store(struct device *device,
index 8c395bf..b7b6ef3 100644 (file)
@@ -57,6 +57,7 @@ enum mei_dev_state {
        MEI_DEV_ENABLED,
        MEI_DEV_RESETTING,
        MEI_DEV_DISABLED,
+       MEI_DEV_POWERING_DOWN,
        MEI_DEV_POWER_DOWN,
        MEI_DEV_POWER_UP
 };
@@ -78,6 +79,8 @@ enum mei_file_transaction_states {
  * @MEI_FOP_DISCONNECT_RSP: disconnect response
  * @MEI_FOP_NOTIFY_START:   start notification
  * @MEI_FOP_NOTIFY_STOP:    stop notification
+ * @MEI_FOP_DMA_MAP:   request client dma map
+ * @MEI_FOP_DMA_UNMAP: request client dma unmap
  */
 enum mei_cb_file_ops {
        MEI_FOP_READ = 0,
@@ -87,6 +90,8 @@ enum mei_cb_file_ops {
        MEI_FOP_DISCONNECT_RSP,
        MEI_FOP_NOTIFY_START,
        MEI_FOP_NOTIFY_STOP,
+       MEI_FOP_DMA_MAP,
+       MEI_FOP_DMA_UNMAP,
 };
 
 /**
@@ -112,6 +117,13 @@ struct mei_msg_data {
        unsigned char *data;
 };
 
+struct mei_dma_data {
+       u8 buffer_id;
+       void *vaddr;
+       dma_addr_t daddr;
+       size_t size;
+};
+
 /**
  * struct mei_dma_dscr - dma address descriptor
  *
@@ -235,6 +247,8 @@ struct mei_cl_vtag {
  * @rd_pending: pending read credits
  * @rd_completed_lock: protects rd_completed queue
  * @rd_completed: completed read
+ * @dma: dma settings
+ * @dma_mapped: dma buffer is currently mapped.
  *
  * @cldev: device on the mei client bus
  */
@@ -262,6 +276,8 @@ struct mei_cl {
        struct list_head rd_pending;
        spinlock_t rd_completed_lock; /* protects rd_completed queue */
        struct list_head rd_completed;
+       struct mei_dma_data dma;
+       u8 dma_mapped;
 
        struct mei_cl_device *cldev;
 };
@@ -450,6 +466,7 @@ struct mei_fw_version {
  * @hbm_f_dr_supported  : hbm feature dma ring supported
  * @hbm_f_vt_supported  : hbm feature vtag supported
  * @hbm_f_cap_supported : hbm feature capabilities message supported
+ * @hbm_f_cd_supported  : hbm feature client dma supported
  *
  * @fw_ver : FW versions
  *
@@ -537,6 +554,7 @@ struct mei_device {
        unsigned int hbm_f_dr_supported:1;
        unsigned int hbm_f_vt_supported:1;
        unsigned int hbm_f_cap_supported:1;
+       unsigned int hbm_f_cd_supported:1;
 
        struct mei_fw_version fw_ver[MEI_MAX_FW_VER_BLOCKS];
 
index 1de9ef7..a7e1796 100644 (file)
@@ -107,6 +107,11 @@ static const struct pci_device_id mei_me_pci_tbl[] = {
 
        {MEI_PCI_DEVICE(MEI_DEV_ID_CDF, MEI_ME_PCH8_CFG)},
 
+       {MEI_PCI_DEVICE(MEI_DEV_ID_EBG, MEI_ME_PCH15_SPS_CFG)},
+
+       {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_S, MEI_ME_PCH15_CFG)},
+       {MEI_PCI_DEVICE(MEI_DEV_ID_ADP_LP, MEI_ME_PCH15_CFG)},
+
        /* required last entry */
        {0, }
 };
index eff481c..1b2868c 100644 (file)
@@ -68,7 +68,6 @@
 #define PCI_ENDPOINT_TEST_FLAGS                        0x2c
 #define FLAG_USE_DMA                           BIT(0)
 
-#define PCI_DEVICE_ID_TI_J721E                 0xb00d
 #define PCI_DEVICE_ID_TI_AM654                 0xb00c
 #define PCI_DEVICE_ID_LS1088A                  0x80c0
 
diff --git a/drivers/misc/pti.c b/drivers/misc/pti.c
deleted file mode 100644 (file)
index 7236ae5..0000000
+++ /dev/null
@@ -1,978 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- *  pti.c - PTI driver for cJTAG data extration
- *
- *  Copyright (C) Intel 2010
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The PTI (Parallel Trace Interface) driver directs trace data routed from
- * various parts in the system out through the Intel Penwell PTI port and
- * out of the mobile device for analysis with a debugging tool
- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
- * compact JTAG, standard.
- */
-
-#include <linux/init.h>
-#include <linux/sched.h>
-#include <linux/interrupt.h>
-#include <linux/console.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/tty.h>
-#include <linux/tty_driver.h>
-#include <linux/pci.h>
-#include <linux/mutex.h>
-#include <linux/miscdevice.h>
-#include <linux/intel-pti.h>
-#include <linux/slab.h>
-#include <linux/uaccess.h>
-
-#define DRIVERNAME             "pti"
-#define PCINAME                        "pciPTI"
-#define TTYNAME                        "ttyPTI"
-#define CHARNAME               "pti"
-#define PTITTY_MINOR_START     0
-#define PTITTY_MINOR_NUM       2
-#define MAX_APP_IDS            16   /* 128 channel ids / u8 bit size */
-#define MAX_OS_IDS             16   /* 128 channel ids / u8 bit size */
-#define MAX_MODEM_IDS          16   /* 128 channel ids / u8 bit size */
-#define MODEM_BASE_ID          71   /* modem master ID address    */
-#define CONTROL_ID             72   /* control master ID address  */
-#define CONSOLE_ID             73   /* console master ID address  */
-#define OS_BASE_ID             74   /* base OS master ID address  */
-#define APP_BASE_ID            80   /* base App master ID address */
-#define CONTROL_FRAME_LEN      32   /* PTI control frame maximum size */
-#define USER_COPY_SIZE         8192 /* 8Kb buffer for user space copy */
-#define APERTURE_14            0x3800000 /* offset to first OS write addr */
-#define APERTURE_LEN           0x400000  /* address length */
-
-struct pti_tty {
-       struct pti_masterchannel *mc;
-};
-
-struct pti_dev {
-       struct tty_port port[PTITTY_MINOR_NUM];
-       unsigned long pti_addr;
-       unsigned long aperture_base;
-       void __iomem *pti_ioaddr;
-       u8 ia_app[MAX_APP_IDS];
-       u8 ia_os[MAX_OS_IDS];
-       u8 ia_modem[MAX_MODEM_IDS];
-};
-
-/*
- * This protects access to ia_app, ia_os, and ia_modem,
- * which keeps track of channels allocated in
- * an aperture write id.
- */
-static DEFINE_MUTEX(alloclock);
-
-static const struct pci_device_id pci_ids[] = {
-               {PCI_DEVICE(PCI_VENDOR_ID_INTEL, 0x82B)},
-               {0}
-};
-
-static struct tty_driver *pti_tty_driver;
-static struct pti_dev *drv_data;
-
-static unsigned int pti_console_channel;
-static unsigned int pti_control_channel;
-
-/**
- *  pti_write_to_aperture()- The private write function to PTI HW.
- *
- *  @mc: The 'aperture'. It's part of a write address that holds
- *       a master and channel ID.
- *  @buf: Data being written to the HW that will ultimately be seen
- *        in a debugging tool (Fido, Lauterbach).
- *  @len: Size of buffer.
- *
- *  Since each aperture is specified by a unique
- *  master/channel ID, no two processes will be writing
- *  to the same aperture at the same time so no lock is required. The
- *  PTI-Output agent will send these out in the order that they arrived, and
- *  thus, it will intermix these messages. The debug tool can then later
- *  regroup the appropriate message segments together reconstituting each
- *  message.
- */
-static void pti_write_to_aperture(struct pti_masterchannel *mc,
-                                 u8 *buf,
-                                 int len)
-{
-       int dwordcnt;
-       int final;
-       int i;
-       u32 ptiword;
-       u32 __iomem *aperture;
-       u8 *p = buf;
-
-       /*
-        * calculate the aperture offset from the base using the master and
-        * channel id's.
-        */
-       aperture = drv_data->pti_ioaddr + (mc->master << 15)
-               + (mc->channel << 8);
-
-       dwordcnt = len >> 2;
-       final = len - (dwordcnt << 2);      /* final = trailing bytes    */
-       if (final == 0 && dwordcnt != 0) {  /* always need a final dword */
-               final += 4;
-               dwordcnt--;
-       }
-
-       for (i = 0; i < dwordcnt; i++) {
-               ptiword = be32_to_cpu(*(u32 *)p);
-               p += 4;
-               iowrite32(ptiword, aperture);
-       }
-
-       aperture += PTI_LASTDWORD_DTS;  /* adding DTS signals that is EOM */
-
-       ptiword = 0;
-       for (i = 0; i < final; i++)
-               ptiword |= *p++ << (24-(8*i));
-
-       iowrite32(ptiword, aperture);
-       return;
-}
-
-/**
- *  pti_control_frame_built_and_sent()- control frame build and send function.
- *
- *  @mc:          The master / channel structure on which the function
- *                built a control frame.
- *  @thread_name: The thread name associated with the master / channel or
- *                'NULL' if using the 'current' global variable.
- *
- *  To be able to post process the PTI contents on host side, a control frame
- *  is added before sending any PTI content. So the host side knows on
- *  each PTI frame the name of the thread using a dedicated master / channel.
- *  The thread name is retrieved from 'current' global variable if 'thread_name'
- *  is 'NULL', else it is retrieved from 'thread_name' parameter.
- *  This function builds this frame and sends it to a master ID CONTROL_ID.
- *  The overhead is only 32 bytes since the driver only writes to HW
- *  in 32 byte chunks.
- */
-static void pti_control_frame_built_and_sent(struct pti_masterchannel *mc,
-                                            const char *thread_name)
-{
-       /*
-        * Since we access the comm member in current's task_struct, we only
-        * need to be as large as what 'comm' in that structure is.
-        */
-       char comm[TASK_COMM_LEN];
-       struct pti_masterchannel mccontrol = {.master = CONTROL_ID,
-                                             .channel = 0};
-       const char *thread_name_p;
-       const char *control_format = "%3d %3d %s";
-       u8 control_frame[CONTROL_FRAME_LEN];
-
-       if (!thread_name) {
-               if (!in_interrupt())
-                       get_task_comm(comm, current);
-               else
-                       strncpy(comm, "Interrupt", TASK_COMM_LEN);
-
-               /* Absolutely ensure our buffer is zero terminated. */
-               comm[TASK_COMM_LEN-1] = 0;
-               thread_name_p = comm;
-       } else {
-               thread_name_p = thread_name;
-       }
-
-       mccontrol.channel = pti_control_channel;
-       pti_control_channel = (pti_control_channel + 1) & 0x7f;
-
-       snprintf(control_frame, CONTROL_FRAME_LEN, control_format, mc->master,
-               mc->channel, thread_name_p);
-       pti_write_to_aperture(&mccontrol, control_frame, strlen(control_frame));
-}
-
-/**
- *  pti_write_full_frame_to_aperture()- high level function to
- *                                     write to PTI.
- *
- *  @mc:  The 'aperture'. It's part of a write address that holds
- *        a master and channel ID.
- *  @buf: Data being written to the HW that will ultimately be seen
- *        in a debugging tool (Fido, Lauterbach).
- *  @len: Size of buffer.
- *
- *  All threads sending data (either console, user space application, ...)
- *  are calling the high level function to write to PTI meaning that it is
- *  possible to add a control frame before sending the content.
- */
-static void pti_write_full_frame_to_aperture(struct pti_masterchannel *mc,
-                                               const unsigned char *buf,
-                                               int len)
-{
-       pti_control_frame_built_and_sent(mc, NULL);
-       pti_write_to_aperture(mc, (u8 *)buf, len);
-}
-
-/**
- * get_id()- Allocate a master and channel ID.
- *
- * @id_array:    an array of bits representing what channel
- *               id's are allocated for writing.
- * @max_ids:     The max amount of available write IDs to use.
- * @base_id:     The starting SW channel ID, based on the Intel
- *               PTI arch.
- * @thread_name: The thread name associated with the master / channel or
- *               'NULL' if using the 'current' global variable.
- *
- * Returns:
- *     pti_masterchannel struct with master, channel ID address
- *     0 for error
- *
- * Each bit in the arrays ia_app and ia_os correspond to a master and
- * channel id. The bit is one if the id is taken and 0 if free. For
- * every master there are 128 channel id's.
- */
-static struct pti_masterchannel *get_id(u8 *id_array,
-                                       int max_ids,
-                                       int base_id,
-                                       const char *thread_name)
-{
-       struct pti_masterchannel *mc;
-       int i, j, mask;
-
-       mc = kmalloc(sizeof(struct pti_masterchannel), GFP_KERNEL);
-       if (mc == NULL)
-               return NULL;
-
-       /* look for a byte with a free bit */
-       for (i = 0; i < max_ids; i++)
-               if (id_array[i] != 0xff)
-                       break;
-       if (i == max_ids) {
-               kfree(mc);
-               return NULL;
-       }
-       /* find the bit in the 128 possible channel opportunities */
-       mask = 0x80;
-       for (j = 0; j < 8; j++) {
-               if ((id_array[i] & mask) == 0)
-                       break;
-               mask >>= 1;
-       }
-
-       /* grab it */
-       id_array[i] |= mask;
-       mc->master  = base_id;
-       mc->channel = ((i & 0xf)<<3) + j;
-       /* write new master Id / channel Id allocation to channel control */
-       pti_control_frame_built_and_sent(mc, thread_name);
-       return mc;
-}
-
-/*
- * The following three functions:
- * pti_request_mastercahannel(), mipi_release_masterchannel()
- * and pti_writedata() are an API for other kernel drivers to
- * access PTI.
- */
-
-/**
- * pti_request_masterchannel()- Kernel API function used to allocate
- *                             a master, channel ID address
- *                             to write to PTI HW.
- *
- * @type:        0- request Application  master, channel aperture ID
- *                  write address.
- *               1- request OS master, channel aperture ID write
- *                  address.
- *               2- request Modem master, channel aperture ID
- *                  write address.
- *               Other values, error.
- * @thread_name: The thread name associated with the master / channel or
- *               'NULL' if using the 'current' global variable.
- *
- * Returns:
- *     pti_masterchannel struct
- *     0 for error
- */
-struct pti_masterchannel *pti_request_masterchannel(u8 type,
-                                                   const char *thread_name)
-{
-       struct pti_masterchannel *mc;
-
-       mutex_lock(&alloclock);
-
-       switch (type) {
-
-       case 0:
-               mc = get_id(drv_data->ia_app, MAX_APP_IDS,
-                           APP_BASE_ID, thread_name);
-               break;
-
-       case 1:
-               mc = get_id(drv_data->ia_os, MAX_OS_IDS,
-                           OS_BASE_ID, thread_name);
-               break;
-
-       case 2:
-               mc = get_id(drv_data->ia_modem, MAX_MODEM_IDS,
-                           MODEM_BASE_ID, thread_name);
-               break;
-       default:
-               mc = NULL;
-       }
-
-       mutex_unlock(&alloclock);
-       return mc;
-}
-EXPORT_SYMBOL_GPL(pti_request_masterchannel);
-
-/**
- * pti_release_masterchannel()- Kernel API function used to release
- *                             a master, channel ID address
- *                             used to write to PTI HW.
- *
- * @mc: master, channel apeture ID address to be released.  This
- *      will de-allocate the structure via kfree().
- */
-void pti_release_masterchannel(struct pti_masterchannel *mc)
-{
-       u8 master, channel, i;
-
-       mutex_lock(&alloclock);
-
-       if (mc) {
-               master = mc->master;
-               channel = mc->channel;
-
-               if (master == APP_BASE_ID) {
-                       i = channel >> 3;
-                       drv_data->ia_app[i] &=  ~(0x80>>(channel & 0x7));
-               } else if (master == OS_BASE_ID) {
-                       i = channel >> 3;
-                       drv_data->ia_os[i] &= ~(0x80>>(channel & 0x7));
-               } else {
-                       i = channel >> 3;
-                       drv_data->ia_modem[i] &= ~(0x80>>(channel & 0x7));
-               }
-
-               kfree(mc);
-       }
-
-       mutex_unlock(&alloclock);
-}
-EXPORT_SYMBOL_GPL(pti_release_masterchannel);
-
-/**
- * pti_writedata()- Kernel API function used to write trace
- *                  debugging data to PTI HW.
- *
- * @mc:    Master, channel aperture ID address to write to.
- *         Null value will return with no write occurring.
- * @buf:   Trace debuging data to write to the PTI HW.
- *         Null value will return with no write occurring.
- * @count: Size of buf. Value of 0 or a negative number will
- *         return with no write occuring.
- */
-void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count)
-{
-       /*
-        * since this function is exported, this is treated like an
-        * API function, thus, all parameters should
-        * be checked for validity.
-        */
-       if ((mc != NULL) && (buf != NULL) && (count > 0))
-               pti_write_to_aperture(mc, buf, count);
-       return;
-}
-EXPORT_SYMBOL_GPL(pti_writedata);
-
-/*
- * for the tty_driver_*() basic function descriptions, see tty_driver.h.
- * Specific header comments made for PTI-related specifics.
- */
-
-/**
- * pti_tty_driver_open()- Open an Application master, channel aperture
- * ID to the PTI device via tty device.
- *
- * @tty: tty interface.
- * @filp: filp interface pased to tty_port_open() call.
- *
- * Returns:
- *     int, 0 for success
- *     otherwise, fail value
- *
- * The main purpose of using the tty device interface is for
- * each tty port to have a unique PTI write aperture.  In an
- * example use case, ttyPTI0 gets syslogd and an APP aperture
- * ID and ttyPTI1 is where the n_tracesink ldisc hooks to route
- * modem messages into PTI.  Modem trace data does not have to
- * go to ttyPTI1, but ttyPTI0 and ttyPTI1 do need to be distinct
- * master IDs.  These messages go through the PTI HW and out of
- * the handheld platform and to the Fido/Lauterbach device.
- */
-static int pti_tty_driver_open(struct tty_struct *tty, struct file *filp)
-{
-       /*
-        * we actually want to allocate a new channel per open, per
-        * system arch.  HW gives more than plenty channels for a single
-        * system task to have its own channel to write trace data. This
-        * also removes a locking requirement for the actual write
-        * procedure.
-        */
-       return tty_port_open(tty->port, tty, filp);
-}
-
-/**
- * pti_tty_driver_close()- close tty device and release Application
- * master, channel aperture ID to the PTI device via tty device.
- *
- * @tty: tty interface.
- * @filp: filp interface pased to tty_port_close() call.
- *
- * The main purpose of using the tty device interface is to route
- * syslog daemon messages to the PTI HW and out of the handheld platform
- * and to the Fido/Lauterbach device.
- */
-static void pti_tty_driver_close(struct tty_struct *tty, struct file *filp)
-{
-       tty_port_close(tty->port, tty, filp);
-}
-
-/**
- * pti_tty_install()- Used to set up specific master-channels
- *                   to tty ports for organizational purposes when
- *                   tracing viewed from debuging tools.
- *
- * @driver: tty driver information.
- * @tty: tty struct containing pti information.
- *
- * Returns:
- *     0 for success
- *     otherwise, error
- */
-static int pti_tty_install(struct tty_driver *driver, struct tty_struct *tty)
-{
-       int idx = tty->index;
-       struct pti_tty *pti_tty_data;
-       int ret = tty_standard_install(driver, tty);
-
-       if (ret == 0) {
-               pti_tty_data = kmalloc(sizeof(struct pti_tty), GFP_KERNEL);
-               if (pti_tty_data == NULL)
-                       return -ENOMEM;
-
-               if (idx == PTITTY_MINOR_START)
-                       pti_tty_data->mc = pti_request_masterchannel(0, NULL);
-               else
-                       pti_tty_data->mc = pti_request_masterchannel(2, NULL);
-
-               if (pti_tty_data->mc == NULL) {
-                       kfree(pti_tty_data);
-                       return -ENXIO;
-               }
-               tty->driver_data = pti_tty_data;
-       }
-
-       return ret;
-}
-
-/**
- * pti_tty_cleanup()- Used to de-allocate master-channel resources
- *                   tied to tty's of this driver.
- *
- * @tty: tty struct containing pti information.
- */
-static void pti_tty_cleanup(struct tty_struct *tty)
-{
-       struct pti_tty *pti_tty_data = tty->driver_data;
-       if (pti_tty_data == NULL)
-               return;
-       pti_release_masterchannel(pti_tty_data->mc);
-       kfree(pti_tty_data);
-       tty->driver_data = NULL;
-}
-
-/**
- * pti_tty_driver_write()-  Write trace debugging data through the char
- * interface to the PTI HW.  Part of the misc device implementation.
- *
- * @tty: tty struct containing pti information.
- * @buf: trace data to be written.
- * @len:  # of byte to write.
- *
- * Returns:
- *     int, # of bytes written
- *     otherwise, error
- */
-static int pti_tty_driver_write(struct tty_struct *tty,
-       const unsigned char *buf, int len)
-{
-       struct pti_tty *pti_tty_data = tty->driver_data;
-       if ((pti_tty_data != NULL) && (pti_tty_data->mc != NULL)) {
-               pti_write_to_aperture(pti_tty_data->mc, (u8 *)buf, len);
-               return len;
-       }
-       /*
-        * we can't write to the pti hardware if the private driver_data
-        * and the mc address is not there.
-        */
-       else
-               return -EFAULT;
-}
-
-/**
- * pti_tty_write_room()- Always returns 2048.
- *
- * @tty: contains tty info of the pti driver.
- */
-static int pti_tty_write_room(struct tty_struct *tty)
-{
-       return 2048;
-}
-
-/**
- * pti_char_open()- Open an Application master, channel aperture
- * ID to the PTI device. Part of the misc device implementation.
- *
- * @inode: not used.
- * @filp:  Output- will have a masterchannel struct set containing
- *                 the allocated application PTI aperture write address.
- *
- * Returns:
- *     int, 0 for success
- *     otherwise, a fail value
- */
-static int pti_char_open(struct inode *inode, struct file *filp)
-{
-       struct pti_masterchannel *mc;
-
-       /*
-        * We really do want to fail immediately if
-        * pti_request_masterchannel() fails,
-        * before assigning the value to filp->private_data.
-        * Slightly easier to debug if this driver needs debugging.
-        */
-       mc = pti_request_masterchannel(0, NULL);
-       if (mc == NULL)
-               return -ENOMEM;
-       filp->private_data = mc;
-       return 0;
-}
-
-/**
- * pti_char_release()-  Close a char channel to the PTI device. Part
- * of the misc device implementation.
- *
- * @inode: Not used in this implementaiton.
- * @filp:  Contains private_data that contains the master, channel
- *         ID to be released by the PTI device.
- *
- * Returns:
- *     always 0
- */
-static int pti_char_release(struct inode *inode, struct file *filp)
-{
-       pti_release_masterchannel(filp->private_data);
-       filp->private_data = NULL;
-       return 0;
-}
-
-/**
- * pti_char_write()-  Write trace debugging data through the char
- * interface to the PTI HW.  Part of the misc device implementation.
- *
- * @filp:  Contains private data which is used to obtain
- *         master, channel write ID.
- * @data:  trace data to be written.
- * @len:   # of byte to write.
- * @ppose: Not used in this function implementation.
- *
- * Returns:
- *     int, # of bytes written
- *     otherwise, error value
- *
- * Notes: From side discussions with Alan Cox and experimenting
- * with PTI debug HW like Nokia's Fido box and Lauterbach
- * devices, 8192 byte write buffer used by USER_COPY_SIZE was
- * deemed an appropriate size for this type of usage with
- * debugging HW.
- */
-static ssize_t pti_char_write(struct file *filp, const char __user *data,
-                             size_t len, loff_t *ppose)
-{
-       struct pti_masterchannel *mc;
-       void *kbuf;
-       const char __user *tmp;
-       size_t size = USER_COPY_SIZE;
-       size_t n = 0;
-
-       tmp = data;
-       mc = filp->private_data;
-
-       kbuf = kmalloc(size, GFP_KERNEL);
-       if (kbuf == NULL)  {
-               pr_err("%s(%d): buf allocation failed\n",
-                       __func__, __LINE__);
-               return -ENOMEM;
-       }
-
-       do {
-               if (len - n > USER_COPY_SIZE)
-                       size = USER_COPY_SIZE;
-               else
-                       size = len - n;
-
-               if (copy_from_user(kbuf, tmp, size)) {
-                       kfree(kbuf);
-                       return n ? n : -EFAULT;
-               }
-
-               pti_write_to_aperture(mc, kbuf, size);
-               n  += size;
-               tmp += size;
-
-       } while (len > n);
-
-       kfree(kbuf);
-       return len;
-}
-
-static const struct tty_operations pti_tty_driver_ops = {
-       .open           = pti_tty_driver_open,
-       .close          = pti_tty_driver_close,
-       .write          = pti_tty_driver_write,
-       .write_room     = pti_tty_write_room,
-       .install        = pti_tty_install,
-       .cleanup        = pti_tty_cleanup
-};
-
-static const struct file_operations pti_char_driver_ops = {
-       .owner          = THIS_MODULE,
-       .write          = pti_char_write,
-       .open           = pti_char_open,
-       .release        = pti_char_release,
-};
-
-static struct miscdevice pti_char_driver = {
-       .minor          = MISC_DYNAMIC_MINOR,
-       .name           = CHARNAME,
-       .fops           = &pti_char_driver_ops
-};
-
-/**
- * pti_console_write()-  Write to the console that has been acquired.
- *
- * @c:   Not used in this implementaiton.
- * @buf: Data to be written.
- * @len: Length of buf.
- */
-static void pti_console_write(struct console *c, const char *buf, unsigned len)
-{
-       static struct pti_masterchannel mc = {.master  = CONSOLE_ID,
-                                             .channel = 0};
-
-       mc.channel = pti_console_channel;
-       pti_console_channel = (pti_console_channel + 1) & 0x7f;
-
-       pti_write_full_frame_to_aperture(&mc, buf, len);
-}
-
-/**
- * pti_console_device()-  Return the driver tty structure and set the
- *                       associated index implementation.
- *
- * @c:     Console device of the driver.
- * @index: index associated with c.
- *
- * Returns:
- *     always value of pti_tty_driver structure when this function
- *     is called.
- */
-static struct tty_driver *pti_console_device(struct console *c, int *index)
-{
-       *index = c->index;
-       return pti_tty_driver;
-}
-
-/**
- * pti_console_setup()-  Initialize console variables used by the driver.
- *
- * @c:     Not used.
- * @opts:  Not used.
- *
- * Returns:
- *     always 0.
- */
-static int pti_console_setup(struct console *c, char *opts)
-{
-       pti_console_channel = 0;
-       pti_control_channel = 0;
-       return 0;
-}
-
-/*
- * pti_console struct, used to capture OS printk()'s and shift
- * out to the PTI device for debugging.  This cannot be
- * enabled upon boot because of the possibility of eating
- * any serial console printk's (race condition discovered).
- * The console should be enabled upon when the tty port is
- * used for the first time.  Since the primary purpose for
- * the tty port is to hook up syslog to it, the tty port
- * will be open for a really long time.
- */
-static struct console pti_console = {
-       .name           = TTYNAME,
-       .write          = pti_console_write,
-       .device         = pti_console_device,
-       .setup          = pti_console_setup,
-       .flags          = CON_PRINTBUFFER,
-       .index          = 0,
-};
-
-/**
- * pti_port_activate()- Used to start/initialize any items upon
- * first opening of tty_port().
- *
- * @port: The tty port number of the PTI device.
- * @tty:  The tty struct associated with this device.
- *
- * Returns:
- *     always returns 0
- *
- * Notes: The primary purpose of the PTI tty port 0 is to hook
- * the syslog daemon to it; thus this port will be open for a
- * very long time.
- */
-static int pti_port_activate(struct tty_port *port, struct tty_struct *tty)
-{
-       if (port->tty->index == PTITTY_MINOR_START)
-               console_start(&pti_console);
-       return 0;
-}
-
-/**
- * pti_port_shutdown()- Used to stop/shutdown any items upon the
- * last tty port close.
- *
- * @port: The tty port number of the PTI device.
- *
- * Notes: The primary purpose of the PTI tty port 0 is to hook
- * the syslog daemon to it; thus this port will be open for a
- * very long time.
- */
-static void pti_port_shutdown(struct tty_port *port)
-{
-       if (port->tty->index == PTITTY_MINOR_START)
-               console_stop(&pti_console);
-}
-
-static const struct tty_port_operations tty_port_ops = {
-       .activate = pti_port_activate,
-       .shutdown = pti_port_shutdown,
-};
-
-/*
- * Note the _probe() call sets everything up and ties the char and tty
- * to successfully detecting the PTI device on the pci bus.
- */
-
-/**
- * pti_pci_probe()- Used to detect pti on the pci bus and set
- *                 things up in the driver.
- *
- * @pdev: pci_dev struct values for pti.
- * @ent:  pci_device_id struct for pti driver.
- *
- * Returns:
- *     0 for success
- *     otherwise, error
- */
-static int pti_pci_probe(struct pci_dev *pdev,
-               const struct pci_device_id *ent)
-{
-       unsigned int a;
-       int retval;
-       int pci_bar = 1;
-
-       dev_dbg(&pdev->dev, "%s %s(%d): PTI PCI ID %04x:%04x\n", __FILE__,
-                       __func__, __LINE__, pdev->vendor, pdev->device);
-
-       retval = misc_register(&pti_char_driver);
-       if (retval) {
-               pr_err("%s(%d): CHAR registration failed of pti driver\n",
-                       __func__, __LINE__);
-               pr_err("%s(%d): Error value returned: %d\n",
-                       __func__, __LINE__, retval);
-               goto err;
-       }
-
-       retval = pci_enable_device(pdev);
-       if (retval != 0) {
-               dev_err(&pdev->dev,
-                       "%s: pci_enable_device() returned error %d\n",
-                       __func__, retval);
-               goto err_unreg_misc;
-       }
-
-       drv_data = kzalloc(sizeof(*drv_data), GFP_KERNEL);
-       if (drv_data == NULL) {
-               retval = -ENOMEM;
-               dev_err(&pdev->dev,
-                       "%s(%d): kmalloc() returned NULL memory.\n",
-                       __func__, __LINE__);
-               goto err_disable_pci;
-       }
-       drv_data->pti_addr = pci_resource_start(pdev, pci_bar);
-
-       retval = pci_request_region(pdev, pci_bar, dev_name(&pdev->dev));
-       if (retval != 0) {
-               dev_err(&pdev->dev,
-                       "%s(%d): pci_request_region() returned error %d\n",
-                       __func__, __LINE__, retval);
-               goto err_free_dd;
-       }
-       drv_data->aperture_base = drv_data->pti_addr+APERTURE_14;
-       drv_data->pti_ioaddr =
-               ioremap((u32)drv_data->aperture_base,
-               APERTURE_LEN);
-       if (!drv_data->pti_ioaddr) {
-               retval = -ENOMEM;
-               goto err_rel_reg;
-       }
-
-       pci_set_drvdata(pdev, drv_data);
-
-       for (a = 0; a < PTITTY_MINOR_NUM; a++) {
-               struct tty_port *port = &drv_data->port[a];
-               tty_port_init(port);
-               port->ops = &tty_port_ops;
-
-               tty_port_register_device(port, pti_tty_driver, a, &pdev->dev);
-       }
-
-       register_console(&pti_console);
-
-       return 0;
-err_rel_reg:
-       pci_release_region(pdev, pci_bar);
-err_free_dd:
-       kfree(drv_data);
-err_disable_pci:
-       pci_disable_device(pdev);
-err_unreg_misc:
-       misc_deregister(&pti_char_driver);
-err:
-       return retval;
-}
-
-/**
- * pti_pci_remove()- Driver exit method to remove PTI from
- *                PCI bus.
- * @pdev: variable containing pci info of PTI.
- */
-static void pti_pci_remove(struct pci_dev *pdev)
-{
-       struct pti_dev *drv_data = pci_get_drvdata(pdev);
-       unsigned int a;
-
-       unregister_console(&pti_console);
-
-       for (a = 0; a < PTITTY_MINOR_NUM; a++) {
-               tty_unregister_device(pti_tty_driver, a);
-               tty_port_destroy(&drv_data->port[a]);
-       }
-
-       iounmap(drv_data->pti_ioaddr);
-       kfree(drv_data);
-       pci_release_region(pdev, 1);
-       pci_disable_device(pdev);
-
-       misc_deregister(&pti_char_driver);
-}
-
-static struct pci_driver pti_pci_driver = {
-       .name           = PCINAME,
-       .id_table       = pci_ids,
-       .probe          = pti_pci_probe,
-       .remove         = pti_pci_remove,
-};
-
-/**
- * pti_init()- Overall entry/init call to the pti driver.
- *             It starts the registration process with the kernel.
- *
- * Returns:
- *     int __init, 0 for success
- *     otherwise value is an error
- *
- */
-static int __init pti_init(void)
-{
-       int retval;
-
-       /* First register module as tty device */
-
-       pti_tty_driver = alloc_tty_driver(PTITTY_MINOR_NUM);
-       if (pti_tty_driver == NULL) {
-               pr_err("%s(%d): Memory allocation failed for ptiTTY driver\n",
-                       __func__, __LINE__);
-               return -ENOMEM;
-       }
-
-       pti_tty_driver->driver_name             = DRIVERNAME;
-       pti_tty_driver->name                    = TTYNAME;
-       pti_tty_driver->major                   = 0;
-       pti_tty_driver->minor_start             = PTITTY_MINOR_START;
-       pti_tty_driver->type                    = TTY_DRIVER_TYPE_SYSTEM;
-       pti_tty_driver->subtype                 = SYSTEM_TYPE_SYSCONS;
-       pti_tty_driver->flags                   = TTY_DRIVER_REAL_RAW |
-                                                 TTY_DRIVER_DYNAMIC_DEV;
-       pti_tty_driver->init_termios            = tty_std_termios;
-
-       tty_set_operations(pti_tty_driver, &pti_tty_driver_ops);
-
-       retval = tty_register_driver(pti_tty_driver);
-       if (retval) {
-               pr_err("%s(%d): TTY registration failed of pti driver\n",
-                       __func__, __LINE__);
-               pr_err("%s(%d): Error value returned: %d\n",
-                       __func__, __LINE__, retval);
-
-               goto put_tty;
-       }
-
-       retval = pci_register_driver(&pti_pci_driver);
-       if (retval) {
-               pr_err("%s(%d): PCI registration failed of pti driver\n",
-                       __func__, __LINE__);
-               pr_err("%s(%d): Error value returned: %d\n",
-                       __func__, __LINE__, retval);
-               goto unreg_tty;
-       }
-
-       return 0;
-unreg_tty:
-       tty_unregister_driver(pti_tty_driver);
-put_tty:
-       put_tty_driver(pti_tty_driver);
-       pti_tty_driver = NULL;
-       return retval;
-}
-
-/**
- * pti_exit()- Unregisters this module as a tty and pci driver.
- */
-static void __exit pti_exit(void)
-{
-       tty_unregister_driver(pti_tty_driver);
-       pci_unregister_driver(&pti_pci_driver);
-       put_tty_driver(pti_tty_driver);
-}
-
-module_init(pti_init);
-module_exit(pti_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Ken Mills, Jay Freyensee");
-MODULE_DESCRIPTION("PTI Driver");
-
index 41cab29..f1655f5 100644 (file)
 #include <uapi/misc/pvpanic.h>
 
 static void __iomem *base;
+static unsigned int capability = PVPANIC_PANICKED | PVPANIC_CRASH_LOADED;
+static unsigned int events;
+
+static ssize_t capability_show(struct device *dev,
+                              struct device_attribute *attr, char *buf)
+{
+       return sysfs_emit(buf, "%x\n", capability);
+}
+static DEVICE_ATTR_RO(capability);
+
+static ssize_t events_show(struct device *dev,  struct device_attribute *attr, char *buf)
+{
+       return sysfs_emit(buf, "%x\n", events);
+}
+
+static ssize_t events_store(struct device *dev,  struct device_attribute *attr,
+                           const char *buf, size_t count)
+{
+       unsigned int tmp;
+       int err;
+
+       err = kstrtouint(buf, 16, &tmp);
+       if (err)
+               return err;
+
+       if ((tmp & capability) != tmp)
+               return -EINVAL;
+
+       events = tmp;
+
+       return count;
+
+}
+static DEVICE_ATTR_RW(events);
+
+static struct attribute *pvpanic_dev_attrs[] = {
+       &dev_attr_capability.attr,
+       &dev_attr_events.attr,
+       NULL
+};
+ATTRIBUTE_GROUPS(pvpanic_dev);
 
 MODULE_AUTHOR("Hu Tao <hutao@cn.fujitsu.com>");
 MODULE_DESCRIPTION("pvpanic device driver");
@@ -27,7 +68,8 @@ MODULE_LICENSE("GPL");
 static void
 pvpanic_send_event(unsigned int event)
 {
-       iowrite8(event, base);
+       if (event & capability & events)
+               iowrite8(event, base);
 }
 
 static int
@@ -73,8 +115,13 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
                return -EINVAL;
        }
 
-       atomic_notifier_chain_register(&panic_notifier_list,
-                                      &pvpanic_panic_nb);
+       /* initlize capability by RDPT */
+       capability &= ioread8(base);
+       events = capability;
+
+       if (capability)
+               atomic_notifier_chain_register(&panic_notifier_list,
+                                              &pvpanic_panic_nb);
 
        return 0;
 }
@@ -82,8 +129,9 @@ static int pvpanic_mmio_probe(struct platform_device *pdev)
 static int pvpanic_mmio_remove(struct platform_device *pdev)
 {
 
-       atomic_notifier_chain_unregister(&panic_notifier_list,
-                                        &pvpanic_panic_nb);
+       if (capability)
+               atomic_notifier_chain_unregister(&panic_notifier_list,
+                                                &pvpanic_panic_nb);
 
        return 0;
 }
@@ -92,6 +140,7 @@ static const struct of_device_id pvpanic_mmio_match[] = {
        { .compatible = "qemu,pvpanic-mmio", },
        {}
 };
+MODULE_DEVICE_TABLE(of, pvpanic_mmio_match);
 
 static const struct acpi_device_id pvpanic_device_ids[] = {
        { "QEMU0001", 0 },
@@ -104,6 +153,7 @@ static struct platform_driver pvpanic_mmio_driver = {
                .name = "pvpanic-mmio",
                .of_match_table = pvpanic_mmio_match,
                .acpi_match_table = pvpanic_device_ids,
+               .dev_groups = pvpanic_dev_groups,
        },
        .probe = pvpanic_mmio_probe,
        .remove = pvpanic_mmio_remove,
index 23837d0..2508f83 100644 (file)
@@ -208,7 +208,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
        } else {
                dst = (void *)((u64)skb->data & ~(L1_CACHE_BYTES - 1));
                dev_dbg(xpnet, "transferring buffer to the skb->data area;\n\t"
-                       "xp_remote_memcpy(0x%p, 0x%p, %hu)\n", dst,
+                       "xp_remote_memcpy(0x%p, 0x%p, %u)\n", dst,
                                          (void *)msg->buf_pa, msg->size);
 
                ret = xp_remote_memcpy(xp_pa(dst), msg->buf_pa, msg->size);
@@ -218,7 +218,7 @@ xpnet_receive(short partid, int channel, struct xpnet_message *msg)
                         * !!! appears in_use and we can't just call
                         * !!! dev_kfree_skb.
                         */
-                       dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%hx) "
+                       dev_err(xpnet, "xp_remote_memcpy(0x%p, 0x%p, 0x%x) "
                                "returned error=0x%x\n", dst,
                                (void *)msg->buf_pa, msg->size, ret);
 
index c490658..880c33a 100644 (file)
@@ -237,7 +237,9 @@ static struct qp_list qp_guest_endpoints = {
 #define QPE_NUM_PAGES(_QPE) ((u32) \
                             (DIV_ROUND_UP(_QPE.produce_size, PAGE_SIZE) + \
                              DIV_ROUND_UP(_QPE.consume_size, PAGE_SIZE) + 2))
-
+#define QP_SIZES_ARE_VALID(_prod_qsize, _cons_qsize) \
+       ((_prod_qsize) + (_cons_qsize) >= max(_prod_qsize, _cons_qsize) && \
+        (_prod_qsize) + (_cons_qsize) <= VMCI_MAX_GUEST_QP_MEMORY)
 
 /*
  * Frees kernel VA space for a given queue and its queue header, and
@@ -528,7 +530,7 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size)
        u64 num_pages;
        const size_t queue_size = sizeof(*queue) + sizeof(*(queue->kernel_if));
 
-       if (size > SIZE_MAX - PAGE_SIZE)
+       if (size > min_t(size_t, VMCI_MAX_GUEST_QP_MEMORY, SIZE_MAX - PAGE_SIZE))
                return NULL;
        num_pages = DIV_ROUND_UP(size, PAGE_SIZE) + 1;
        if (num_pages > (SIZE_MAX - queue_size) /
@@ -537,6 +539,9 @@ static struct vmci_queue *qp_host_alloc_queue(u64 size)
 
        queue_page_size = num_pages * sizeof(*queue->kernel_if->u.h.page);
 
+       if (queue_size + queue_page_size > KMALLOC_MAX_SIZE)
+               return NULL;
+
        queue = kzalloc(queue_size + queue_page_size, GFP_KERNEL);
        if (queue) {
                queue->q_header = NULL;
@@ -630,7 +635,7 @@ static void qp_release_pages(struct page **pages,
 
        for (i = 0; i < num_pages; i++) {
                if (dirty)
-                       set_page_dirty(pages[i]);
+                       set_page_dirty_lock(pages[i]);
 
                put_page(pages[i]);
                pages[i] = NULL;
@@ -1207,7 +1212,7 @@ static int qp_alloc_guest_work(struct vmci_handle *handle,
        } else {
                result = qp_alloc_hypercall(queue_pair_entry);
                if (result < VMCI_SUCCESS) {
-                       pr_warn("qp_alloc_hypercall result = %d\n", result);
+                       pr_devel("qp_alloc_hypercall result = %d\n", result);
                        goto error;
                }
        }
@@ -1929,6 +1934,9 @@ int vmci_qp_broker_alloc(struct vmci_handle handle,
                         struct vmci_qp_page_store *page_store,
                         struct vmci_ctx *context)
 {
+       if (!QP_SIZES_ARE_VALID(produce_size, consume_size))
+               return VMCI_ERROR_NO_RESOURCES;
+
        return qp_broker_alloc(handle, peer, flags, priv_flags,
                               produce_size, consume_size,
                               page_store, context, NULL, NULL, NULL, NULL);
@@ -2685,8 +2693,7 @@ int vmci_qpair_alloc(struct vmci_qp **qpair,
         * used by the device is NO_RESOURCES, so use that here too.
         */
 
-       if (produce_qsize + consume_qsize < max(produce_qsize, consume_qsize) ||
-           produce_qsize + consume_qsize > VMCI_MAX_GUEST_QP_MEMORY)
+       if (!QP_SIZES_ARE_VALID(produce_qsize, consume_qsize))
                return VMCI_ERROR_NO_RESOURCES;
 
        retval = vmci_route(&src, &dst, false, &route);
index 00017fc..c4e6e92 100644 (file)
@@ -104,7 +104,7 @@ struct vmci_qp_dtch_info {
 struct vmci_qp_page_store {
        /* Reference to pages backing the queue pair. */
        u64 pages;
-       /* Length of pageList/virtual addres range (in pages). */
+       /* Length of pageList/virtual address range (in pages). */
        u32 len;
 };
 
index c2e70b7..4383c26 100644 (file)
@@ -399,11 +399,6 @@ void mmc_remove_card(struct mmc_card *card)
        mmc_remove_card_debugfs(card);
 #endif
 
-       if (host->cqe_enabled) {
-               host->cqe_ops->cqe_disable(host);
-               host->cqe_enabled = false;
-       }
-
        if (mmc_card_present(card)) {
                if (mmc_host_is_spi(card->host)) {
                        pr_info("%s: SPI card removed\n",
@@ -416,6 +411,10 @@ void mmc_remove_card(struct mmc_card *card)
                of_node_put(card->dev.of_node);
        }
 
+       if (host->cqe_enabled) {
+               host->cqe_ops->cqe_disable(host);
+               host->cqe_enabled = false;
+       }
+
        put_device(&card->dev);
 }
-
index 0d80b72..8741271 100644 (file)
@@ -423,10 +423,6 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
 
                /* EXT_CSD value is in units of 10ms, but we store in ms */
                card->ext_csd.part_time = 10 * ext_csd[EXT_CSD_PART_SWITCH_TIME];
-               /* Some eMMC set the value too low so set a minimum */
-               if (card->ext_csd.part_time &&
-                   card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME)
-                       card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME;
 
                /* Sleep / awake timeout in 100ns units */
                if (sa_shift > 0 && sa_shift <= 0x17)
@@ -616,6 +612,17 @@ static int mmc_decode_ext_csd(struct mmc_card *card, u8 *ext_csd)
                card->ext_csd.data_sector_size = 512;
        }
 
+       /*
+        * GENERIC_CMD6_TIME is to be used "unless a specific timeout is defined
+        * when accessing a specific field", so use it here if there is no
+        * PARTITION_SWITCH_TIME.
+        */
+       if (!card->ext_csd.part_time)
+               card->ext_csd.part_time = card->ext_csd.generic_cmd6_time;
+       /* Some eMMC set the value too low so set a minimum */
+       if (card->ext_csd.part_time < MMC_MIN_PART_SWITCH_TIME)
+               card->ext_csd.part_time = MMC_MIN_PART_SWITCH_TIME;
+
        /* eMMC v5 or later */
        if (card->ext_csd.rev >= 7) {
                memcpy(card->ext_csd.fwrev, &ext_csd[EXT_CSD_FIRMWARE_VERSION],
index 17dbc81..984d350 100644 (file)
@@ -1242,7 +1242,11 @@ mmci_start_command(struct mmci_host *host, struct mmc_command *cmd, u32 c)
                if (!cmd->busy_timeout)
                        cmd->busy_timeout = 10 * MSEC_PER_SEC;
 
-               clks = (unsigned long long)cmd->busy_timeout * host->cclk;
+               if (cmd->busy_timeout > host->mmc->max_busy_timeout)
+                       clks = (unsigned long long)host->mmc->max_busy_timeout * host->cclk;
+               else
+                       clks = (unsigned long long)cmd->busy_timeout * host->cclk;
+
                do_div(clks, MSEC_PER_SEC);
                writel_relaxed(clks, host->base + MMCIDATATIMER);
        }
@@ -2151,6 +2155,10 @@ static int mmci_probe(struct amba_device *dev,
                mmc->caps |= MMC_CAP_WAIT_WHILE_BUSY;
        }
 
+       /* Variants with mandatory busy timeout in HW needs R1B responses. */
+       if (variant->busy_timeout)
+               mmc->caps |= MMC_CAP_NEED_RSP_BUSY;
+
        /* Prepare a CMD12 - needed to clear the DPSM on some variants. */
        host->stop_abort.opcode = MMC_STOP_TRANSMISSION;
        host->stop_abort.arg = 0;
index 646823d..2d73407 100644 (file)
@@ -20,7 +20,6 @@
 #include <linux/slab.h>
 #include <linux/scatterlist.h>
 #include <linux/sizes.h>
-#include <linux/swiotlb.h>
 #include <linux/regulator/consumer.h>
 #include <linux/pm_runtime.h>
 #include <linux/of.h>
@@ -4582,12 +4581,8 @@ int sdhci_setup_host(struct sdhci_host *host)
                mmc->max_segs = SDHCI_MAX_SEGS;
        } else if (host->flags & SDHCI_USE_SDMA) {
                mmc->max_segs = 1;
-               if (swiotlb_max_segment()) {
-                       unsigned int max_req_size = (1 << IO_TLB_SHIFT) *
-                                               IO_TLB_SEGSIZE;
-                       mmc->max_req_size = min(mmc->max_req_size,
-                                               max_req_size);
-               }
+               mmc->max_req_size = min_t(size_t, mmc->max_req_size,
+                                         dma_max_mapping_size(mmc_dev(mmc)));
        } else { /* PIO */
                mmc->max_segs = SDHCI_MAX_SEGS;
        }
index 353ab27..e4412c7 100644 (file)
@@ -379,7 +379,7 @@ static struct attribute *channel_attrs[] = {
        NULL,
 };
 
-static struct attribute_group channel_attr_group = {
+static const struct attribute_group channel_attr_group = {
        .attrs = channel_attrs,
        .is_visible = channel_attr_is_visible,
 };
@@ -436,7 +436,7 @@ static struct attribute *interface_attrs[] = {
        NULL,
 };
 
-static struct attribute_group interface_attr_group = {
+static const struct attribute_group interface_attr_group = {
        .attrs = interface_attrs,
 };
 
@@ -718,7 +718,7 @@ static struct attribute *mc_attrs[] = {
        NULL,
 };
 
-static struct attribute_group mc_attr_group = {
+static const struct attribute_group mc_attr_group = {
        .attrs = mc_attrs,
 };
 
index eb72582..f9cfb08 100644 (file)
@@ -32,7 +32,6 @@
 
 MODULE_AUTHOR("Eric Brower <ebrower@usa.net>");
 MODULE_DESCRIPTION("User-programmable flash device on Sun Microsystems boardsets");
-MODULE_SUPPORTED_DEVICE(DRIVER_NAME);
 MODULE_LICENSE("GPL");
 MODULE_VERSION("2.1");
 
index 45d12b0..bcd31f4 100644 (file)
@@ -88,13 +88,13 @@ config WIREGUARD
        select CRYPTO_CURVE25519_X86 if X86 && 64BIT
        select ARM_CRYPTO if ARM
        select ARM64_CRYPTO if ARM64
-       select CRYPTO_CHACHA20_NEON if (ARM || ARM64) && KERNEL_MODE_NEON
+       select CRYPTO_CHACHA20_NEON if ARM || (ARM64 && KERNEL_MODE_NEON)
        select CRYPTO_POLY1305_NEON if ARM64 && KERNEL_MODE_NEON
        select CRYPTO_POLY1305_ARM if ARM
        select CRYPTO_BLAKE2S_ARM if ARM
        select CRYPTO_CURVE25519_NEON if ARM && KERNEL_MODE_NEON
        select CRYPTO_CHACHA_MIPS if CPU_MIPS32_R2
-       select CRYPTO_POLY1305_MIPS if CPU_MIPS32 || (CPU_MIPS64 && 64BIT)
+       select CRYPTO_POLY1305_MIPS if MIPS
        help
          WireGuard is a secure, fast, and easy to use replacement for IPSec
          that uses modern cryptography and clever networking tricks. It's
index 8bdc44b..3c8f665 100644 (file)
@@ -127,6 +127,8 @@ static int com20020pci_probe(struct pci_dev *pdev,
        int i, ioaddr, ret;
        struct resource *r;
 
+       ret = 0;
+
        if (pci_enable_device(pdev))
                return -EIO;
 
@@ -139,6 +141,8 @@ static int com20020pci_probe(struct pci_dev *pdev,
        priv->ci = ci;
        mm = &ci->misc_map;
 
+       pci_set_drvdata(pdev, priv);
+
        INIT_LIST_HEAD(&priv->list_dev);
 
        if (mm->size) {
@@ -161,7 +165,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
                dev = alloc_arcdev(device);
                if (!dev) {
                        ret = -ENOMEM;
-                       goto out_port;
+                       break;
                }
                dev->dev_port = i;
 
@@ -178,7 +182,7 @@ static int com20020pci_probe(struct pci_dev *pdev,
                        pr_err("IO region %xh-%xh already allocated\n",
                               ioaddr, ioaddr + cm->size - 1);
                        ret = -EBUSY;
-                       goto out_port;
+                       goto err_free_arcdev;
                }
 
                /* Dummy access after Reset
@@ -216,18 +220,18 @@ static int com20020pci_probe(struct pci_dev *pdev,
                if (arcnet_inb(ioaddr, COM20020_REG_R_STATUS) == 0xFF) {
                        pr_err("IO address %Xh is empty!\n", ioaddr);
                        ret = -EIO;
-                       goto out_port;
+                       goto err_free_arcdev;
                }
                if (com20020_check(dev)) {
                        ret = -EIO;
-                       goto out_port;
+                       goto err_free_arcdev;
                }
 
                card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev),
                                    GFP_KERNEL);
                if (!card) {
                        ret = -ENOMEM;
-                       goto out_port;
+                       goto err_free_arcdev;
                }
 
                card->index = i;
@@ -253,29 +257,29 @@ static int com20020pci_probe(struct pci_dev *pdev,
 
                ret = devm_led_classdev_register(&pdev->dev, &card->tx_led);
                if (ret)
-                       goto out_port;
+                       goto err_free_arcdev;
 
                ret = devm_led_classdev_register(&pdev->dev, &card->recon_led);
                if (ret)
-                       goto out_port;
+                       goto err_free_arcdev;
 
                dev_set_drvdata(&dev->dev, card);
 
                ret = com20020_found(dev, IRQF_SHARED);
                if (ret)
-                       goto out_port;
+                       goto err_free_arcdev;
 
                devm_arcnet_led_init(dev, dev->dev_id, i);
 
                list_add(&card->list, &priv->list_dev);
-       }
+               continue;
 
-       pci_set_drvdata(pdev, priv);
-
-       return 0;
-
-out_port:
-       com20020pci_remove(pdev);
+err_free_arcdev:
+               free_arcdev(dev);
+               break;
+       }
+       if (ret)
+               com20020pci_remove(pdev);
        return ret;
 }
 
index ef474ba..6958830 100644 (file)
@@ -212,18 +212,6 @@ static const struct can_bittiming_const c_can_bittiming_const = {
        .brp_inc = 1,
 };
 
-static inline void c_can_pm_runtime_enable(const struct c_can_priv *priv)
-{
-       if (priv->device)
-               pm_runtime_enable(priv->device);
-}
-
-static inline void c_can_pm_runtime_disable(const struct c_can_priv *priv)
-{
-       if (priv->device)
-               pm_runtime_disable(priv->device);
-}
-
 static inline void c_can_pm_runtime_get_sync(const struct c_can_priv *priv)
 {
        if (priv->device)
@@ -1335,7 +1323,6 @@ static const struct net_device_ops c_can_netdev_ops = {
 
 int register_c_can_dev(struct net_device *dev)
 {
-       struct c_can_priv *priv = netdev_priv(dev);
        int err;
 
        /* Deactivate pins to prevent DRA7 DCAN IP from being
@@ -1345,28 +1332,19 @@ int register_c_can_dev(struct net_device *dev)
         */
        pinctrl_pm_select_sleep_state(dev->dev.parent);
 
-       c_can_pm_runtime_enable(priv);
-
        dev->flags |= IFF_ECHO; /* we support local echo */
        dev->netdev_ops = &c_can_netdev_ops;
 
        err = register_candev(dev);
-       if (err)
-               c_can_pm_runtime_disable(priv);
-       else
+       if (!err)
                devm_can_led_init(dev);
-
        return err;
 }
 EXPORT_SYMBOL_GPL(register_c_can_dev);
 
 void unregister_c_can_dev(struct net_device *dev)
 {
-       struct c_can_priv *priv = netdev_priv(dev);
-
        unregister_candev(dev);
-
-       c_can_pm_runtime_disable(priv);
 }
 EXPORT_SYMBOL_GPL(unregister_c_can_dev);
 
index 406b484..7efb60b 100644 (file)
@@ -239,12 +239,13 @@ static void c_can_pci_remove(struct pci_dev *pdev)
 {
        struct net_device *dev = pci_get_drvdata(pdev);
        struct c_can_priv *priv = netdev_priv(dev);
+       void __iomem *addr = priv->base;
 
        unregister_c_can_dev(dev);
 
        free_c_can_dev(dev);
 
-       pci_iounmap(pdev, priv->base);
+       pci_iounmap(pdev, addr);
        pci_disable_msi(pdev);
        pci_clear_master(pdev);
        pci_release_regions(pdev);
index 05f425c..47b251b 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/list.h>
 #include <linux/io.h>
 #include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
 #include <linux/clk.h>
 #include <linux/of.h>
 #include <linux/of_device.h>
@@ -386,6 +387,7 @@ static int c_can_plat_probe(struct platform_device *pdev)
        platform_set_drvdata(pdev, dev);
        SET_NETDEV_DEV(dev, &pdev->dev);
 
+       pm_runtime_enable(priv->device);
        ret = register_c_can_dev(dev);
        if (ret) {
                dev_err(&pdev->dev, "registering %s failed (err=%d)\n",
@@ -398,6 +400,7 @@ static int c_can_plat_probe(struct platform_device *pdev)
        return 0;
 
 exit_free_device:
+       pm_runtime_disable(priv->device);
        free_c_can_dev(dev);
 exit:
        dev_err(&pdev->dev, "probe failed\n");
@@ -408,9 +411,10 @@ exit:
 static int c_can_plat_remove(struct platform_device *pdev)
 {
        struct net_device *dev = platform_get_drvdata(pdev);
+       struct c_can_priv *priv = netdev_priv(dev);
 
        unregister_c_can_dev(dev);
-
+       pm_runtime_disable(priv->device);
        free_c_can_dev(dev);
 
        return 0;
index d9281ae..311d856 100644 (file)
@@ -239,6 +239,7 @@ void can_setup(struct net_device *dev)
 struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
                                    unsigned int txqs, unsigned int rxqs)
 {
+       struct can_ml_priv *can_ml;
        struct net_device *dev;
        struct can_priv *priv;
        int size;
@@ -270,7 +271,8 @@ struct net_device *alloc_candev_mqs(int sizeof_priv, unsigned int echo_skb_max,
        priv = netdev_priv(dev);
        priv->dev = dev;
 
-       dev->ml_priv = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN);
+       can_ml = (void *)priv + ALIGN(sizeof_priv, NETDEV_ALIGN);
+       can_set_ml_priv(dev, can_ml);
 
        if (echo_skb_max) {
                priv->echo_skb_max = echo_skb_max;
index 867f6be..f5d79e6 100644 (file)
@@ -355,6 +355,7 @@ static void can_dellink(struct net_device *dev, struct list_head *head)
 
 struct rtnl_link_ops can_link_ops __read_mostly = {
        .kind           = "can",
+       .netns_refund   = true,
        .maxtype        = IFLA_CAN_MAX,
        .policy         = can_policy,
        .setup          = can_setup,
index 971ada3..57f3635 100644 (file)
@@ -697,11 +697,17 @@ static int flexcan_chip_disable(struct flexcan_priv *priv)
 static int flexcan_chip_freeze(struct flexcan_priv *priv)
 {
        struct flexcan_regs __iomem *regs = priv->regs;
-       unsigned int timeout = 1000 * 1000 * 10 / priv->can.bittiming.bitrate;
+       unsigned int timeout;
+       u32 bitrate = priv->can.bittiming.bitrate;
        u32 reg;
 
+       if (bitrate)
+               timeout = 1000 * 1000 * 10 / bitrate;
+       else
+               timeout = FLEXCAN_TIMEOUT_US / 10;
+
        reg = priv->read(&regs->mcr);
-       reg |= FLEXCAN_MCR_HALT;
+       reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT;
        priv->write(reg, &regs->mcr);
 
        while (timeout-- && !(priv->read(&regs->mcr) & FLEXCAN_MCR_FRZ_ACK))
@@ -1480,10 +1486,13 @@ static int flexcan_chip_start(struct net_device *dev)
 
        flexcan_set_bittiming(dev);
 
+       /* set freeze, halt */
+       err = flexcan_chip_freeze(priv);
+       if (err)
+               goto out_chip_disable;
+
        /* MCR
         *
-        * enable freeze
-        * halt now
         * only supervisor access
         * enable warning int
         * enable individual RX masking
@@ -1492,9 +1501,8 @@ static int flexcan_chip_start(struct net_device *dev)
         */
        reg_mcr = priv->read(&regs->mcr);
        reg_mcr &= ~FLEXCAN_MCR_MAXMB(0xff);
-       reg_mcr |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT | FLEXCAN_MCR_SUPV |
-               FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ | FLEXCAN_MCR_IDAM_C |
-               FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
+       reg_mcr |= FLEXCAN_MCR_SUPV | FLEXCAN_MCR_WRN_EN | FLEXCAN_MCR_IRMQ |
+               FLEXCAN_MCR_IDAM_C | FLEXCAN_MCR_MAXMB(priv->tx_mb_idx);
 
        /* MCR
         *
@@ -1865,10 +1873,14 @@ static int register_flexcandev(struct net_device *dev)
        if (err)
                goto out_chip_disable;
 
-       /* set freeze, halt and activate FIFO, restrict register access */
+       /* set freeze, halt */
+       err = flexcan_chip_freeze(priv);
+       if (err)
+               goto out_chip_disable;
+
+       /* activate FIFO, restrict register access */
        reg = priv->read(&regs->mcr);
-       reg |= FLEXCAN_MCR_FRZ | FLEXCAN_MCR_HALT |
-               FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
+       reg |=  FLEXCAN_MCR_FEN | FLEXCAN_MCR_SUPV;
        priv->write(reg, &regs->mcr);
 
        /* Currently we only support newer versions of this core
index 37e0501..74d9899 100644 (file)
@@ -57,6 +57,7 @@ MODULE_DESCRIPTION("CAN driver for Kvaser CAN/PCIe devices");
 #define KVASER_PCIEFD_KCAN_STAT_REG 0x418
 #define KVASER_PCIEFD_KCAN_MODE_REG 0x41c
 #define KVASER_PCIEFD_KCAN_BTRN_REG 0x420
+#define KVASER_PCIEFD_KCAN_BUS_LOAD_REG 0x424
 #define KVASER_PCIEFD_KCAN_BTRD_REG 0x428
 #define KVASER_PCIEFD_KCAN_PWM_REG 0x430
 /* Loopback control register */
@@ -949,6 +950,9 @@ static int kvaser_pciefd_setup_can_ctrls(struct kvaser_pciefd *pcie)
                timer_setup(&can->bec_poll_timer, kvaser_pciefd_bec_poll_timer,
                            0);
 
+               /* Disable Bus load reporting */
+               iowrite32(0, can->reg_base + KVASER_PCIEFD_KCAN_BUS_LOAD_REG);
+
                tx_npackets = ioread32(can->reg_base +
                                       KVASER_PCIEFD_KCAN_TX_NPACKETS_REG);
                if (((tx_npackets >> KVASER_PCIEFD_KCAN_TX_NPACKETS_MAX_SHIFT) &
index 3752520..0c8d36b 100644 (file)
@@ -501,9 +501,6 @@ static int m_can_do_rx_poll(struct net_device *dev, int quota)
        }
 
        while ((rxfs & RXFS_FFL_MASK) && (quota > 0)) {
-               if (rxfs & RXFS_RFL)
-                       netdev_warn(dev, "Rx FIFO 0 Message Lost\n");
-
                m_can_read_fifo(dev, rxfs);
 
                quota--;
@@ -876,7 +873,7 @@ static int m_can_rx_peripheral(struct net_device *dev)
 {
        struct m_can_classdev *cdev = netdev_priv(dev);
 
-       m_can_rx_handler(dev, 1);
+       m_can_rx_handler(dev, M_CAN_NAPI_WEIGHT);
 
        m_can_enable_all_interrupts(cdev);
 
index b7caec7..4147cec 100644 (file)
@@ -237,14 +237,14 @@ static int tcan4x5x_init(struct m_can_classdev *cdev)
        if (ret)
                return ret;
 
+       /* Zero out the MCAN buffers */
+       m_can_init_ram(cdev);
+
        ret = regmap_update_bits(tcan4x5x->regmap, TCAN4X5X_CONFIG,
                                 TCAN4X5X_MODE_SEL_MASK, TCAN4X5X_MODE_NORMAL);
        if (ret)
                return ret;
 
-       /* Zero out the MCAN buffers */
-       m_can_init_ram(cdev);
-
        return ret;
 }
 
index 0df1cdf..1df3c4b 100644 (file)
@@ -21,7 +21,6 @@
 
 MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>");
 MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCIe/M.2 FD family cards");
-MODULE_SUPPORTED_DEVICE("PEAK PCAN PCIe/M.2 FD CAN cards");
 MODULE_LICENSE("GPL v2");
 
 #define PCIEFD_DRV_NAME                "peak_pciefd"
index 6f88c99..4ab9175 100644 (file)
@@ -21,7 +21,6 @@
 
 MODULE_AUTHOR("Sebastian Haas <haas@ems-wuenche.com>");
 MODULE_DESCRIPTION("Socket-CAN driver for EMS CPC-PCI/PCIe/104P CAN cards");
-MODULE_SUPPORTED_DEVICE("EMS CPC-PCI/PCIe/104P CAN card");
 MODULE_LICENSE("GPL v2");
 
 #define EMS_PCI_V1_MAX_CHAN 2
index 770304e..e21b169 100644 (file)
@@ -21,7 +21,6 @@
 
 MODULE_AUTHOR("Markus Plessing <plessing@ems-wuensche.com>");
 MODULE_DESCRIPTION("Socket-CAN driver for EMS CPC-CARD cards");
-MODULE_SUPPORTED_DEVICE("EMS CPC-CARD CAN card");
 MODULE_LICENSE("GPL v2");
 
 #define EMS_PCMCIA_MAX_CHAN 2
index 0ea6b71..95fe9ee 100644 (file)
@@ -33,7 +33,6 @@
 
 MODULE_AUTHOR("Per Dalen <per.dalen@cnw.se>");
 MODULE_DESCRIPTION("Socket-CAN driver for KVASER PCAN PCI cards");
-MODULE_SUPPORTED_DEVICE("KVASER PCAN PCI CAN card");
 MODULE_LICENSE("GPL v2");
 
 #define MAX_NO_OF_CHANNELS        4 /* max no of channels on a single card */
index 4713921..84eac8c 100644 (file)
@@ -24,8 +24,6 @@
 
 MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>");
 MODULE_DESCRIPTION("Socket-CAN driver for PEAK PCAN PCI family cards");
-MODULE_SUPPORTED_DEVICE("PEAK PCAN PCI/PCIe/PCIeC miniPCI CAN cards");
-MODULE_SUPPORTED_DEVICE("PEAK PCAN miniPCIe/cPCI PC/104+ PCI/104e CAN Cards");
 MODULE_LICENSE("GPL v2");
 
 #define DRV_NAME  "peak_pci"
index cf951a7..131a084 100644 (file)
@@ -22,7 +22,6 @@
 MODULE_AUTHOR("Stephane Grosjean <s.grosjean@peak-system.com>");
 MODULE_DESCRIPTION("CAN driver for PEAK-System PCAN-PC Cards");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("PEAK PCAN-PC Card");
 
 /* PEAK-System PCMCIA driver name */
 #define PCC_NAME               "peak_pcmcia"
index 8567958..5de1ebb 100644 (file)
 MODULE_AUTHOR("Pavel Cheblakov <P.B.Cheblakov@inp.nsk.su>");
 MODULE_DESCRIPTION("Socket-CAN driver for PLX90xx PCI-bridge cards with "
                   "the SJA1000 chips");
-MODULE_SUPPORTED_DEVICE("Adlink PCI-7841/cPCI-7841, "
-                       "Adlink PCI-7841/cPCI-7841 SE, "
-                       "Marathon CAN-bus-PCI, "
-                       "Marathon CAN-bus-PCIe, "
-                       "TEWS TECHNOLOGIES TPMC810, "
-                       "esd CAN-PCI/CPCI/PCI104/200, "
-                       "esd CAN-PCI/PMC/266, "
-                       "esd CAN-PCIe/2000, "
-                       "Connect Tech Inc. CANpro/104-Plus Opto (CRG001), "
-                       "IXXAT PC-I 04/PCI, "
-                       "ELCUS CAN-200-PCI, "
-                       "ASEM DUAL CAN-RAW")
 MODULE_LICENSE("GPL v2");
 
 #define PLX_PCI_MAX_CHAN 2
index a1bd1be..30c8d53 100644 (file)
@@ -516,6 +516,7 @@ static struct slcan *slc_alloc(void)
        int i;
        char name[IFNAMSIZ];
        struct net_device *dev = NULL;
+       struct can_ml_priv *can_ml;
        struct slcan       *sl;
        int size;
 
@@ -538,7 +539,8 @@ static struct slcan *slc_alloc(void)
 
        dev->base_addr  = i;
        sl = netdev_priv(dev);
-       dev->ml_priv = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN);
+       can_ml = (void *)sl + ALIGN(sizeof(*sl), NETDEV_ALIGN);
+       can_set_ml_priv(dev, can_ml);
 
        /* Initialize channel control data */
        sl->magic = SLCAN_MAGIC;
index 3c5b929..799e9d5 100644 (file)
@@ -335,8 +335,6 @@ static void mcp251xfd_ring_init(struct mcp251xfd_priv *priv)
        u8 len;
        int i, j;
 
-       netdev_reset_queue(priv->ndev);
-
        /* TEF */
        tef_ring = priv->tef;
        tef_ring->head = 0;
@@ -1249,8 +1247,7 @@ mcp251xfd_handle_tefif_recover(const struct mcp251xfd_priv *priv, const u32 seq)
 
 static int
 mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
-                          const struct mcp251xfd_hw_tef_obj *hw_tef_obj,
-                          unsigned int *frame_len_ptr)
+                          const struct mcp251xfd_hw_tef_obj *hw_tef_obj)
 {
        struct net_device_stats *stats = &priv->ndev->stats;
        u32 seq, seq_masked, tef_tail_masked;
@@ -1272,8 +1269,7 @@ mcp251xfd_handle_tefif_one(struct mcp251xfd_priv *priv,
        stats->tx_bytes +=
                can_rx_offload_get_echo_skb(&priv->offload,
                                            mcp251xfd_get_tef_tail(priv),
-                                           hw_tef_obj->ts,
-                                           frame_len_ptr);
+                                           hw_tef_obj->ts, NULL);
        stats->tx_packets++;
        priv->tef->tail++;
 
@@ -1331,7 +1327,6 @@ mcp251xfd_tef_obj_read(const struct mcp251xfd_priv *priv,
 static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
 {
        struct mcp251xfd_hw_tef_obj hw_tef_obj[MCP251XFD_TX_OBJ_NUM_MAX];
-       unsigned int total_frame_len = 0;
        u8 tef_tail, len, l;
        int err, i;
 
@@ -1353,9 +1348,7 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
        }
 
        for (i = 0; i < len; i++) {
-               unsigned int frame_len;
-
-               err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i], &frame_len);
+               err = mcp251xfd_handle_tefif_one(priv, &hw_tef_obj[i]);
                /* -EAGAIN means the Sequence Number in the TEF
                 * doesn't match our tef_tail. This can happen if we
                 * read the TEF objects too early. Leave loop let the
@@ -1365,8 +1358,6 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
                        goto out_netif_wake_queue;
                if (err)
                        return err;
-
-               total_frame_len += frame_len;
        }
 
  out_netif_wake_queue:
@@ -1397,7 +1388,6 @@ static int mcp251xfd_handle_tefif(struct mcp251xfd_priv *priv)
                        return err;
 
                tx_ring->tail += len;
-               netdev_completed_queue(priv->ndev, len, total_frame_len);
 
                err = mcp251xfd_check_tef_tail(priv);
                if (err)
@@ -2443,7 +2433,6 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
        struct mcp251xfd_priv *priv = netdev_priv(ndev);
        struct mcp251xfd_tx_ring *tx_ring = priv->tx;
        struct mcp251xfd_tx_obj *tx_obj;
-       unsigned int frame_len;
        u8 tx_head;
        int err;
 
@@ -2462,9 +2451,7 @@ static netdev_tx_t mcp251xfd_start_xmit(struct sk_buff *skb,
        if (mcp251xfd_get_tx_free(tx_ring) == 0)
                netif_stop_queue(ndev);
 
-       frame_len = can_skb_get_frame_len(skb);
-       can_put_echo_skb(skb, ndev, tx_head, frame_len);
-       netdev_sent_queue(priv->ndev, frame_len);
+       can_put_echo_skb(skb, ndev, tx_head, 0);
 
        err = mcp251xfd_tx_obj_write(priv, tx_obj);
        if (err)
index c1e5d5b..538f4d9 100644 (file)
@@ -73,6 +73,7 @@ config CAN_KVASER_USB
            - Kvaser Memorator Pro 5xHS
            - Kvaser USBcan Light 4xHS
            - Kvaser USBcan Pro 2xHS v2
+           - Kvaser USBcan Pro 4xHS
            - Kvaser USBcan Pro 5xHS
            - Kvaser U100
            - Kvaser U100P
index 2b7efd2..4e97da8 100644 (file)
@@ -86,8 +86,9 @@
 #define USB_U100_PRODUCT_ID                    273
 #define USB_U100P_PRODUCT_ID                   274
 #define USB_U100S_PRODUCT_ID                   275
+#define USB_USBCAN_PRO_4HS_PRODUCT_ID          276
 #define USB_HYDRA_PRODUCT_ID_END \
-       USB_U100S_PRODUCT_ID
+       USB_USBCAN_PRO_4HS_PRODUCT_ID
 
 static inline bool kvaser_is_leaf(const struct usb_device_id *id)
 {
@@ -193,6 +194,7 @@ static const struct usb_device_id kvaser_usb_table[] = {
        { USB_DEVICE(KVASER_VENDOR_ID, USB_U100_PRODUCT_ID) },
        { USB_DEVICE(KVASER_VENDOR_ID, USB_U100P_PRODUCT_ID) },
        { USB_DEVICE(KVASER_VENDOR_ID, USB_U100S_PRODUCT_ID) },
+       { USB_DEVICE(KVASER_VENDOR_ID, USB_USBCAN_PRO_4HS_PRODUCT_ID) },
        { }
 };
 MODULE_DEVICE_TABLE(usb, kvaser_usb_table);
index e6c1e5d..e393e84 100644 (file)
@@ -18,8 +18,6 @@
 
 #include "pcan_usb_core.h"
 
-MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB adapter");
-
 /* PCAN-USB Endpoints */
 #define PCAN_USB_EP_CMDOUT             1
 #define PCAN_USB_EP_CMDIN              (PCAN_USB_EP_CMDOUT | USB_DIR_IN)
index f347ecc..bae0785 100644 (file)
@@ -16,9 +16,6 @@
 #include "pcan_usb_core.h"
 #include "pcan_usb_pro.h"
 
-MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB FD adapter");
-MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB Pro FD adapter");
-
 #define PCAN_USBPROFD_CHANNEL_COUNT    2
 #define PCAN_USBFD_CHANNEL_COUNT       1
 
index 275087c..18fa180 100644 (file)
@@ -17,8 +17,6 @@
 #include "pcan_usb_core.h"
 #include "pcan_usb_pro.h"
 
-MODULE_SUPPORTED_DEVICE("PEAK-System PCAN-USB Pro adapter");
-
 #define PCAN_USBPRO_CHANNEL_COUNT      2
 
 /* PCAN-USB Pro adapter internal clock (MHz) */
index 39ca14b..067705e 100644 (file)
@@ -153,7 +153,7 @@ static void vcan_setup(struct net_device *dev)
        dev->addr_len           = 0;
        dev->tx_queue_len       = 0;
        dev->flags              = IFF_NOARP;
-       dev->ml_priv            = netdev_priv(dev);
+       can_set_ml_priv(dev, netdev_priv(dev));
 
        /* set flags according to driver capabilities */
        if (echo)
index f9a524c..8861a7d 100644 (file)
@@ -141,6 +141,8 @@ static const struct net_device_ops vxcan_netdev_ops = {
 
 static void vxcan_setup(struct net_device *dev)
 {
+       struct can_ml_priv *can_ml;
+
        dev->type               = ARPHRD_CAN;
        dev->mtu                = CANFD_MTU;
        dev->hard_header_len    = 0;
@@ -149,7 +151,9 @@ static void vxcan_setup(struct net_device *dev)
        dev->flags              = (IFF_NOARP|IFF_ECHO);
        dev->netdev_ops         = &vxcan_netdev_ops;
        dev->needs_free_netdev  = true;
-       dev->ml_priv            = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN);
+
+       can_ml = netdev_priv(dev) + ALIGN(sizeof(struct vxcan_priv), NETDEV_ALIGN);
+       can_set_ml_priv(dev, can_ml);
 }
 
 /* forward declaration for rtnl_create_link() */
index ae86ded..eb44372 100644 (file)
@@ -543,6 +543,19 @@ static void b53_port_set_mcast_flood(struct b53_device *dev, int port,
        b53_write16(dev, B53_CTRL_PAGE, B53_IPMC_FLOOD_MASK, mc);
 }
 
+static void b53_port_set_learning(struct b53_device *dev, int port,
+                                 bool learning)
+{
+       u16 reg;
+
+       b53_read16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, &reg);
+       if (learning)
+               reg &= ~BIT(port);
+       else
+               reg |= BIT(port);
+       b53_write16(dev, B53_CTRL_PAGE, B53_DIS_LEARNING, reg);
+}
+
 int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
 {
        struct b53_device *dev = ds->priv;
@@ -557,6 +570,7 @@ int b53_enable_port(struct dsa_switch *ds, int port, struct phy_device *phy)
 
        b53_port_set_ucast_flood(dev, port, true);
        b53_port_set_mcast_flood(dev, port, true);
+       b53_port_set_learning(dev, port, false);
 
        if (dev->ops->irq_enable)
                ret = dev->ops->irq_enable(dev, port);
@@ -691,6 +705,7 @@ static void b53_enable_cpu_port(struct b53_device *dev, int port)
 
        b53_port_set_ucast_flood(dev, port, true);
        b53_port_set_mcast_flood(dev, port, true);
+       b53_port_set_learning(dev, port, false);
 }
 
 static void b53_enable_mib(struct b53_device *dev)
@@ -1090,13 +1105,6 @@ static int b53_setup(struct dsa_switch *ds)
                        b53_disable_port(ds, port);
        }
 
-       /* Let DSA handle the case were multiple bridges span the same switch
-        * device and different VLAN awareness settings are requested, which
-        * would be breaking filtering semantics for any of the other bridge
-        * devices. (not hardware supported)
-        */
-       ds->vlan_filtering_is_global = true;
-
        return b53_setup_devlink_resources(ds);
 }
 
@@ -1953,19 +1961,20 @@ void b53_br_fast_age(struct dsa_switch *ds, int port)
 }
 EXPORT_SYMBOL(b53_br_fast_age);
 
-static int b53_br_flags_pre(struct dsa_switch *ds, int port,
-                           struct switchdev_brport_flags flags,
-                           struct netlink_ext_ack *extack)
+int b53_br_flags_pre(struct dsa_switch *ds, int port,
+                    struct switchdev_brport_flags flags,
+                    struct netlink_ext_ack *extack)
 {
-       if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD))
+       if (flags.mask & ~(BR_FLOOD | BR_MCAST_FLOOD | BR_LEARNING))
                return -EINVAL;
 
        return 0;
 }
+EXPORT_SYMBOL(b53_br_flags_pre);
 
-static int b53_br_flags(struct dsa_switch *ds, int port,
-                       struct switchdev_brport_flags flags,
-                       struct netlink_ext_ack *extack)
+int b53_br_flags(struct dsa_switch *ds, int port,
+                struct switchdev_brport_flags flags,
+                struct netlink_ext_ack *extack)
 {
        if (flags.mask & BR_FLOOD)
                b53_port_set_ucast_flood(ds->priv, port,
@@ -1973,17 +1982,22 @@ static int b53_br_flags(struct dsa_switch *ds, int port,
        if (flags.mask & BR_MCAST_FLOOD)
                b53_port_set_mcast_flood(ds->priv, port,
                                         !!(flags.val & BR_MCAST_FLOOD));
+       if (flags.mask & BR_LEARNING)
+               b53_port_set_learning(ds->priv, port,
+                                     !!(flags.val & BR_LEARNING));
 
        return 0;
 }
+EXPORT_SYMBOL(b53_br_flags);
 
-static int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
-                          struct netlink_ext_ack *extack)
+int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
+                   struct netlink_ext_ack *extack)
 {
        b53_port_set_mcast_flood(ds->priv, port, mrouter);
 
        return 0;
 }
+EXPORT_SYMBOL(b53_set_mrouter);
 
 static bool b53_possible_cpu_port(struct dsa_switch *ds, int port)
 {
@@ -2643,6 +2657,13 @@ struct b53_device *b53_switch_alloc(struct device *base,
        ds->ops = &b53_switch_ops;
        ds->untag_bridge_pvid = true;
        dev->vlan_enabled = true;
+       /* Let DSA handle the case were multiple bridges span the same switch
+        * device and different VLAN awareness settings are requested, which
+        * would be breaking filtering semantics for any of the other bridge
+        * devices. (not hardware supported)
+        */
+       ds->vlan_filtering_is_global = true;
+
        mutex_init(&dev->reg_mutex);
        mutex_init(&dev->stats_mutex);
 
index faf983f..8419bb7 100644 (file)
@@ -326,6 +326,14 @@ int b53_br_join(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_leave(struct dsa_switch *ds, int port, struct net_device *bridge);
 void b53_br_set_stp_state(struct dsa_switch *ds, int port, u8 state);
 void b53_br_fast_age(struct dsa_switch *ds, int port);
+int b53_br_flags_pre(struct dsa_switch *ds, int port,
+                    struct switchdev_brport_flags flags,
+                    struct netlink_ext_ack *extack);
+int b53_br_flags(struct dsa_switch *ds, int port,
+                struct switchdev_brport_flags flags,
+                struct netlink_ext_ack *extack);
+int b53_set_mrouter(struct dsa_switch *ds, int port, bool mrouter,
+                   struct netlink_ext_ack *extack);
 int b53_setup_devlink_resources(struct dsa_switch *ds);
 void b53_port_event(struct dsa_switch *ds, int port);
 void b53_phylink_validate(struct dsa_switch *ds, int port,
index c90985c..b2c539a 100644 (file)
 #define B53_UC_FLOOD_MASK              0x32
 #define B53_MC_FLOOD_MASK              0x34
 #define B53_IPMC_FLOOD_MASK            0x36
+#define B53_DIS_LEARNING               0x3c
 
 /*
  * Override Ports 0-7 State on devices with xMII interfaces (8 bit)
index 1857aa9..ba5d546 100644 (file)
@@ -114,7 +114,10 @@ static void bcm_sf2_imp_setup(struct dsa_switch *ds, int port)
                /* Force link status for IMP port */
                reg = core_readl(priv, offset);
                reg |= (MII_SW_OR | LINK_STS);
-               reg &= ~GMII_SPEED_UP_2G;
+               if (priv->type == BCM4908_DEVICE_ID)
+                       reg |= GMII_SPEED_UP_2G;
+               else
+                       reg &= ~GMII_SPEED_UP_2G;
                core_writel(priv, reg, offset);
 
                /* Enable Broadcast, Multicast, Unicast forwarding to IMP port */
@@ -223,23 +226,10 @@ static int bcm_sf2_port_setup(struct dsa_switch *ds, int port,
        reg &= ~P_TXQ_PSM_VDD(port);
        core_writel(priv, reg, CORE_MEM_PSM_VDD_CTRL);
 
-       /* Enable learning */
-       reg = core_readl(priv, CORE_DIS_LEARN);
-       reg &= ~BIT(port);
-       core_writel(priv, reg, CORE_DIS_LEARN);
-
        /* Enable Broadcom tags for that port if requested */
-       if (priv->brcm_tag_mask & BIT(port)) {
+       if (priv->brcm_tag_mask & BIT(port))
                b53_brcm_hdr_setup(ds, port);
 
-               /* Disable learning on ASP port */
-               if (port == 7) {
-                       reg = core_readl(priv, CORE_DIS_LEARN);
-                       reg |= BIT(port);
-                       core_writel(priv, reg, CORE_DIS_LEARN);
-               }
-       }
-
        /* Configure Traffic Class to QoS mapping, allow each priority to map
         * to a different queue number
         */
@@ -419,7 +409,7 @@ static int bcm_sf2_sw_rst(struct bcm_sf2_priv *priv)
        /* The watchdog reset does not work on 7278, we need to hit the
         * "external" reset line through the reset controller.
         */
-       if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev)) {
+       if (priv->type == BCM7278_DEVICE_ID) {
                ret = reset_control_assert(priv->rcdev);
                if (ret)
                        return ret;
@@ -598,8 +588,10 @@ static u32 bcm_sf2_sw_get_phy_flags(struct dsa_switch *ds, int port)
         * in bits 15:8 and the patch level in bits 7:0 which is exactly what
         * the REG_PHY_REVISION register layout is.
         */
-
-       return priv->hw_params.gphy_rev;
+       if (priv->int_phy_mask & BIT(port))
+               return priv->hw_params.gphy_rev;
+       else
+               return 0;
 }
 
 static void bcm_sf2_sw_validate(struct dsa_switch *ds, int port,
@@ -1117,7 +1109,10 @@ static const struct dsa_switch_ops bcm_sf2_ops = {
        .set_mac_eee            = b53_set_mac_eee,
        .port_bridge_join       = b53_br_join,
        .port_bridge_leave      = b53_br_leave,
+       .port_pre_bridge_flags  = b53_br_flags_pre,
+       .port_bridge_flags      = b53_br_flags,
        .port_stp_state_set     = b53_br_set_stp_state,
+       .port_set_mrouter       = b53_set_mrouter,
        .port_fast_age          = b53_br_fast_age,
        .port_vlan_filtering    = b53_vlan_filtering,
        .port_vlan_add          = b53_vlan_add,
@@ -1275,7 +1270,7 @@ static int bcm_sf2_sw_probe(struct platform_device *pdev)
 
        priv->rcdev = devm_reset_control_get_optional_exclusive(&pdev->dev,
                                                                "switch");
-       if (PTR_ERR(priv->rcdev) == -EPROBE_DEFER)
+       if (IS_ERR(priv->rcdev))
                return PTR_ERR(priv->rcdev);
 
        /* Auto-detection using standard registers will not work, so
@@ -1436,7 +1431,7 @@ static int bcm_sf2_sw_remove(struct platform_device *pdev)
        bcm_sf2_mdio_unregister(priv);
        clk_disable_unprepare(priv->clk_mdiv);
        clk_disable_unprepare(priv->clk);
-       if (priv->type == BCM7278_DEVICE_ID && !IS_ERR(priv->rcdev))
+       if (priv->type == BCM7278_DEVICE_ID)
                reset_control_assert(priv->rcdev);
 
        return 0;
index c17de2b..9871d7c 100644 (file)
@@ -436,34 +436,32 @@ mt7530_pad_clk_setup(struct dsa_switch *ds, phy_interface_t interface)
                             TD_DM_DRVP(8) | TD_DM_DRVN(8));
 
        /* Setup core clock for MT7530 */
-       if (!trgint) {
-               /* Disable MT7530 core clock */
-               core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
-
-               /* Disable PLL, since phy_device has not yet been created
-                * provided for phy_[read,write]_mmd_indirect is called, we
-                * provide our own core_write_mmd_indirect to complete this
-                * function.
-                */
-               core_write_mmd_indirect(priv,
-                                       CORE_GSWPLL_GRP1,
-                                       MDIO_MMD_VEND2,
-                                       0);
-
-               /* Set core clock into 500Mhz */
-               core_write(priv, CORE_GSWPLL_GRP2,
-                          RG_GSWPLL_POSDIV_500M(1) |
-                          RG_GSWPLL_FBKDIV_500M(25));
+       /* Disable MT7530 core clock */
+       core_clear(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
 
-               /* Enable PLL */
-               core_write(priv, CORE_GSWPLL_GRP1,
-                          RG_GSWPLL_EN_PRE |
-                          RG_GSWPLL_POSDIV_200M(2) |
-                          RG_GSWPLL_FBKDIV_200M(32));
-
-               /* Enable MT7530 core clock */
-               core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
-       }
+       /* Disable PLL, since phy_device has not yet been created
+        * provided for phy_[read,write]_mmd_indirect is called, we
+        * provide our own core_write_mmd_indirect to complete this
+        * function.
+        */
+       core_write_mmd_indirect(priv,
+                               CORE_GSWPLL_GRP1,
+                               MDIO_MMD_VEND2,
+                               0);
+
+       /* Set core clock into 500Mhz */
+       core_write(priv, CORE_GSWPLL_GRP2,
+                  RG_GSWPLL_POSDIV_500M(1) |
+                  RG_GSWPLL_FBKDIV_500M(25));
+
+       /* Enable PLL */
+       core_write(priv, CORE_GSWPLL_GRP1,
+                  RG_GSWPLL_EN_PRE |
+                  RG_GSWPLL_POSDIV_200M(2) |
+                  RG_GSWPLL_FBKDIV_200M(32));
+
+       /* Enable MT7530 core clock */
+       core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
 
        /* Setup the MT7530 TRGMII Tx Clock */
        core_set(priv, CORE_TRGMII_GSW_CLK_CG, REG_GSWCK_EN);
@@ -1624,6 +1622,7 @@ mtk_get_tag_protocol(struct dsa_switch *ds, int port,
        }
 }
 
+#ifdef CONFIG_GPIOLIB
 static inline u32
 mt7530_gpio_to_bit(unsigned int offset)
 {
@@ -1726,6 +1725,7 @@ mt7530_setup_gpio(struct mt7530_priv *priv)
 
        return devm_gpiochip_add_data(dev, gc, priv);
 }
+#endif /* CONFIG_GPIOLIB */
 
 static int
 mt7530_setup(struct dsa_switch *ds)
@@ -1868,11 +1868,13 @@ mt7530_setup(struct dsa_switch *ds)
                }
        }
 
+#ifdef CONFIG_GPIOLIB
        if (of_property_read_bool(priv->dev->of_node, "gpio-controller")) {
                ret = mt7530_setup_gpio(priv);
                if (ret)
                        return ret;
        }
+#endif /* CONFIG_GPIOLIB */
 
        mt7530_setup_port5(ds, interface);
 
index 7692338..51ea104 100644 (file)
@@ -1922,7 +1922,7 @@ out_unlock_ptp:
                                speed = SPEED_1000;
                        else if (bmcr & BMCR_SPEED100)
                                speed = SPEED_100;
-                       else if (bmcr & BMCR_SPEED10)
+                       else
                                speed = SPEED_10;
 
                        sja1105_sgmii_pcs_force_speed(priv, speed);
@@ -3369,14 +3369,14 @@ static int sja1105_port_ucast_bcast_flood(struct sja1105_private *priv, int to,
                if (flags.val & BR_FLOOD)
                        priv->ucast_egress_floods |= BIT(to);
                else
-                       priv->ucast_egress_floods |= BIT(to);
+                       priv->ucast_egress_floods &= ~BIT(to);
        }
 
        if (flags.mask & BR_BCAST_FLOOD) {
                if (flags.val & BR_BCAST_FLOOD)
                        priv->bcast_egress_floods |= BIT(to);
                else
-                       priv->bcast_egress_floods |= BIT(to);
+                       priv->bcast_egress_floods &= ~BIT(to);
        }
 
        return sja1105_manage_flood_domains(priv);
index 139b7b4..a8efb7f 100644 (file)
@@ -85,7 +85,7 @@ u32 sja1105_crc32(const void *buf, size_t len)
        /* seed */
        crc = ~0;
        for (i = 0; i < len; i += 4) {
-               sja1105_unpack((void *)buf + i, &word, 31, 0, 4);
+               sja1105_unpack(buf + i, &word, 31, 0, 4);
                crc = crc32_le(crc, (u8 *)&word, 4);
        }
        return ~crc;
index f025f96..fde6e99 100644 (file)
@@ -528,7 +528,10 @@ static int xrs700x_hsr_join(struct dsa_switch *ds, int port,
                return -EOPNOTSUPP;
 
        dsa_hsr_foreach_port(dp, ds, hsr) {
-               partner = dp;
+               if (dp->index != port) {
+                       partner = dp;
+                       break;
+               }
        }
 
        /* We can't enable redundancy on the switch until both
@@ -582,7 +585,10 @@ static int xrs700x_hsr_leave(struct dsa_switch *ds, int port,
        unsigned int val;
 
        dsa_hsr_foreach_port(dp, ds, hsr) {
-               partner = dp;
+               if (dp->index != port) {
+                       partner = dp;
+                       break;
+               }
        }
 
        if (!partner)
index dd5c8a9..a60ce90 100644 (file)
 #define AG71XX_REG_RX_SM       0x01b0
 #define AG71XX_REG_TX_SM       0x01b4
 
-#define ETH_SWITCH_HEADER_LEN  2
-
 #define AG71XX_DEFAULT_MSG_ENABLE      \
        (NETIF_MSG_DRV                  \
        | NETIF_MSG_PROBE               \
@@ -933,7 +931,7 @@ static void ag71xx_hw_setup(struct ag71xx *ag)
 
 static unsigned int ag71xx_max_frame_len(unsigned int mtu)
 {
-       return ETH_SWITCH_HEADER_LEN + ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
+       return ETH_HLEN + VLAN_HLEN + mtu + ETH_FCS_LEN;
 }
 
 static void ag71xx_hw_set_macaddr(struct ag71xx *ag, unsigned char *mac)
index 9b7f1af..9e02f88 100644 (file)
@@ -1894,13 +1894,16 @@ static int alx_resume(struct device *dev)
 
        if (!netif_running(alx->dev))
                return 0;
-       netif_device_attach(alx->dev);
 
        rtnl_lock();
        err = __alx_open(alx, true);
        rtnl_unlock();
+       if (err)
+               return err;
 
-       return err;
+       netif_device_attach(alx->dev);
+
+       return 0;
 }
 
 static SIMPLE_DEV_PM_OPS(alx_pm_ops, alx_suspend, alx_resume);
index f8a168b..cb88ffb 100644 (file)
@@ -54,7 +54,7 @@ config B44_PCI
 config BCM4908_ENET
        tristate "Broadcom BCM4908 internal mac support"
        depends on ARCH_BCM4908 || COMPILE_TEST
-       default y
+       default y if ARCH_BCM4908
        help
          This driver supports Ethernet controller integrated into Broadcom
          BCM4908 family SoCs.
index 9be33dc..98cf82d 100644 (file)
@@ -570,6 +570,7 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 
                if (len < ETH_ZLEN ||
                    (ctl & (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) != (DMA_CTL_STATUS_SOP | DMA_CTL_STATUS_EOP)) {
+                       kfree_skb(slot.skb);
                        enet->netdev->stats.rx_dropped++;
                        break;
                }
@@ -582,6 +583,8 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
 
                enet->netdev->stats.rx_packets++;
                enet->netdev->stats.rx_bytes += len;
+
+               handled++;
        }
 
        if (handled < weight) {
@@ -589,6 +592,9 @@ static int bcm4908_enet_poll(struct napi_struct *napi, int weight)
                bcm4908_enet_intrs_on(enet);
        }
 
+       /* Hardware could disable ring if it run out of descriptors */
+       bcm4908_enet_dma_rx_ring_enable(enet, &enet->rx_ring);
+
        return handled;
 }
 
index fd87672..977f097 100644 (file)
@@ -1192,7 +1192,6 @@ static int bcm_enet_stop(struct net_device *dev)
        kdev = &priv->pdev->dev;
 
        netif_stop_queue(dev);
-       netdev_reset_queue(dev);
        napi_disable(&priv->napi);
        if (priv->has_phy)
                phy_stop(dev->phydev);
@@ -1231,6 +1230,9 @@ static int bcm_enet_stop(struct net_device *dev)
        if (priv->has_phy)
                phy_disconnect(dev->phydev);
 
+       /* reset BQL after forced tx reclaim to prevent kernel panic */
+       netdev_reset_queue(dev);
+
        return 0;
 }
 
@@ -2343,7 +2345,6 @@ static int bcm_enetsw_stop(struct net_device *dev)
 
        del_timer_sync(&priv->swphy_poll);
        netif_stop_queue(dev);
-       netdev_reset_queue(dev);
        napi_disable(&priv->napi);
        del_timer_sync(&priv->rx_timeout);
 
@@ -2371,6 +2372,9 @@ static int bcm_enetsw_stop(struct net_device *dev)
                free_irq(priv->irq_tx, dev);
        free_irq(priv->irq_rx, dev);
 
+       /* reset BQL after forced tx reclaim to prevent kernel panic */
+       netdev_reset_queue(dev);
+
        return 0;
 }
 
index a680fd9..b53a0d8 100644 (file)
@@ -8556,10 +8556,18 @@ static void bnxt_setup_inta(struct bnxt *bp)
        bp->irq_tbl[0].handler = bnxt_inta;
 }
 
+static int bnxt_init_int_mode(struct bnxt *bp);
+
 static int bnxt_setup_int_mode(struct bnxt *bp)
 {
        int rc;
 
+       if (!bp->irq_tbl) {
+               rc = bnxt_init_int_mode(bp);
+               if (rc || !bp->irq_tbl)
+                       return rc ?: -ENODEV;
+       }
+
        if (bp->flags & BNXT_FLAG_USING_MSIX)
                bnxt_setup_msix(bp);
        else
@@ -8744,7 +8752,7 @@ static int bnxt_init_inta(struct bnxt *bp)
 
 static int bnxt_init_int_mode(struct bnxt *bp)
 {
-       int rc = 0;
+       int rc = -ENODEV;
 
        if (bp->flags & BNXT_FLAG_MSIX_CAP)
                rc = bnxt_init_msix(bp);
@@ -9514,7 +9522,8 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 {
        struct hwrm_func_drv_if_change_output *resp = bp->hwrm_cmd_resp_addr;
        struct hwrm_func_drv_if_change_input req = {0};
-       bool resc_reinit = false, fw_reset = false;
+       bool fw_reset = !bp->irq_tbl;
+       bool resc_reinit = false;
        int rc, retry = 0;
        u32 flags = 0;
 
@@ -9557,6 +9566,7 @@ static int bnxt_hwrm_if_change(struct bnxt *bp, bool up)
 
        if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state) && !fw_reset) {
                netdev_err(bp->dev, "RESET_DONE not set during FW reset.\n");
+               set_bit(BNXT_STATE_ABORT_ERR, &bp->state);
                return -ENODEV;
        }
        if (resc_reinit || fw_reset) {
@@ -9890,6 +9900,9 @@ static int bnxt_reinit_after_abort(struct bnxt *bp)
        if (test_bit(BNXT_STATE_IN_FW_RESET, &bp->state))
                return -EBUSY;
 
+       if (bp->dev->reg_state == NETREG_UNREGISTERED)
+               return -ENODEV;
+
        rc = bnxt_fw_init_one(bp);
        if (!rc) {
                bnxt_clear_int_mode(bp);
index 472bf8f..15362d0 100644 (file)
@@ -3954,6 +3954,13 @@ static int macb_init(struct platform_device *pdev)
        return 0;
 }
 
+static const struct macb_usrio_config macb_default_usrio = {
+       .mii = MACB_BIT(MII),
+       .rmii = MACB_BIT(RMII),
+       .rgmii = GEM_BIT(RGMII),
+       .refclk = MACB_BIT(CLKEN),
+};
+
 #if defined(CONFIG_OF)
 /* 1518 rounded up */
 #define AT91ETHER_MAX_RBUFF_SZ 0x600
@@ -4439,13 +4446,6 @@ static int fu540_c000_init(struct platform_device *pdev)
        return macb_init(pdev);
 }
 
-static const struct macb_usrio_config macb_default_usrio = {
-       .mii = MACB_BIT(MII),
-       .rmii = MACB_BIT(RMII),
-       .rgmii = GEM_BIT(RGMII),
-       .refclk = MACB_BIT(CLKEN),
-};
-
 static const struct macb_usrio_config sama7g5_usrio = {
        .mii = 0,
        .rmii = 1,
@@ -4594,6 +4594,7 @@ static const struct macb_config default_gem_config = {
        .dma_burst_length = 16,
        .clk_init = macb_clk_init,
        .init = macb_init,
+       .usrio = &macb_default_usrio,
        .jumbo_max_len = 10240,
 };
 
index 46a809f..1115b8f 100644 (file)
@@ -672,7 +672,7 @@ static int chcr_ktls_cpl_act_open_rpl(struct adapter *adap,
        if (tx_info->pending_close) {
                spin_unlock(&tx_info->lock);
                if (!status) {
-                       /* it's a late success, tcb status is establised,
+                       /* it's a late success, tcb status is established,
                         * mark it close.
                         */
                        chcr_ktls_mark_tcb_close(tx_info);
@@ -722,7 +722,7 @@ static int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input)
                kvfree(tx_info);
                return 0;
        }
-       tx_info->open_state = false;
+       tx_info->open_state = CH_KTLS_OPEN_SUCCESS;
        spin_unlock(&tx_info->lock);
 
        complete(&tx_info->completion);
@@ -930,7 +930,7 @@ chcr_ktls_get_tx_flits(u32 nr_frags, unsigned int key_ctx_len)
 }
 
 /*
- * chcr_ktls_check_tcp_options: To check if there is any TCP option availbale
+ * chcr_ktls_check_tcp_options: To check if there is any TCP option available
  * other than timestamp.
  * @skb - skb contains partial record..
  * return: 1 / 0
@@ -1115,7 +1115,7 @@ static int chcr_ktls_xmit_wr_complete(struct sk_buff *skb,
        }
 
        if (unlikely(credits < ETHTXQ_STOP_THRES)) {
-               /* Credits are below the threshold vaues, stop the queue after
+               /* Credits are below the threshold values, stop the queue after
                 * injecting the Work Request for this packet.
                 */
                chcr_eth_txq_stop(q);
@@ -2006,7 +2006,7 @@ static int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev)
 
        /* TCP segments can be in received either complete or partial.
         * chcr_end_part_handler will handle cases if complete record or end
-        * part of the record is received. Incase of partial end part of record,
+        * part of the record is received. In case of partial end part of record,
         * we will send the complete record again.
         */
 
index 3fdc70d..252adfa 100644 (file)
@@ -133,6 +133,8 @@ struct board_info {
        u32             wake_state;
 
        int             ip_summed;
+
+       struct regulator *power_supply;
 };
 
 /* debug code */
@@ -1449,7 +1451,7 @@ dm9000_probe(struct platform_device *pdev)
                if (ret) {
                        dev_err(dev, "failed to request reset gpio %d: %d\n",
                                reset_gpios, ret);
-                       return -ENODEV;
+                       goto out_regulator_disable;
                }
 
                /* According to manual PWRST# Low Period Min 1ms */
@@ -1461,8 +1463,10 @@ dm9000_probe(struct platform_device *pdev)
 
        if (!pdata) {
                pdata = dm9000_parse_dt(&pdev->dev);
-               if (IS_ERR(pdata))
-                       return PTR_ERR(pdata);
+               if (IS_ERR(pdata)) {
+                       ret = PTR_ERR(pdata);
+                       goto out_regulator_disable;
+               }
        }
 
        /* Init network device */
@@ -1479,6 +1483,8 @@ dm9000_probe(struct platform_device *pdev)
 
        db->dev = &pdev->dev;
        db->ndev = ndev;
+       if (!IS_ERR(power))
+               db->power_supply = power;
 
        spin_lock_init(&db->lock);
        mutex_init(&db->addr_lock);
@@ -1501,7 +1507,7 @@ dm9000_probe(struct platform_device *pdev)
                goto out;
        }
 
-       db->irq_wake = platform_get_irq(pdev, 1);
+       db->irq_wake = platform_get_irq_optional(pdev, 1);
        if (db->irq_wake >= 0) {
                dev_dbg(db->dev, "wakeup irq %d\n", db->irq_wake);
 
@@ -1703,6 +1709,10 @@ out:
        dm9000_release_board(pdev, db);
        free_netdev(ndev);
 
+out_regulator_disable:
+       if (!IS_ERR(power))
+               regulator_disable(power);
+
        return ret;
 }
 
@@ -1760,10 +1770,13 @@ static int
 dm9000_drv_remove(struct platform_device *pdev)
 {
        struct net_device *ndev = platform_get_drvdata(pdev);
+       struct board_info *dm = to_dm9000_board(ndev);
 
        unregister_netdev(ndev);
-       dm9000_release_board(pdev, netdev_priv(ndev));
+       dm9000_release_board(pdev, dm);
        free_netdev(ndev);              /* free device structure */
+       if (dm->power_supply)
+               regulator_disable(dm->power_supply);
 
        dev_dbg(&pdev->dev, "released and freed device\n");
        return 0;
index 88bfe21..04421ae 100644 (file)
@@ -1337,6 +1337,7 @@ static int ftgmac100_poll(struct napi_struct *napi, int budget)
         */
        if (unlikely(priv->need_mac_restart)) {
                ftgmac100_start_hw(priv);
+               priv->need_mac_restart = false;
 
                /* Re-enable "bad" interrupts */
                iowrite32(FTGMAC100_INT_BAD,
index ccfe52a..720dc99 100644 (file)
@@ -2670,7 +2670,6 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
        u32 hash;
        u64 ns;
 
-       np = container_of(&portal, struct dpaa_napi_portal, p);
        dpaa_fq = container_of(fq, struct dpaa_fq, fq_base);
        fd_status = be32_to_cpu(fd->status);
        fd_format = qm_fd_get_format(fd);
@@ -2685,6 +2684,7 @@ static enum qman_cb_dqrr_result rx_default_dqrr(struct qman_portal *portal,
 
        percpu_priv = this_cpu_ptr(priv->percpu_priv);
        percpu_stats = &percpu_priv->stats;
+       np = &percpu_priv->np;
 
        if (unlikely(dpaa_eth_napi_schedule(percpu_priv, portal, sched_napi)))
                return qman_cb_dqrr_stop;
index c78d122..0947132 100644 (file)
@@ -281,6 +281,8 @@ static int enetc_poll(struct napi_struct *napi, int budget)
        int work_done;
        int i;
 
+       enetc_lock_mdio();
+
        for (i = 0; i < v->count_tx_rings; i++)
                if (!enetc_clean_tx_ring(&v->tx_ring[i], budget))
                        complete = false;
@@ -291,8 +293,10 @@ static int enetc_poll(struct napi_struct *napi, int budget)
        if (work_done)
                v->rx_napi_work = true;
 
-       if (!complete)
+       if (!complete) {
+               enetc_unlock_mdio();
                return budget;
+       }
 
        napi_complete_done(napi, work_done);
 
@@ -301,8 +305,6 @@ static int enetc_poll(struct napi_struct *napi, int budget)
 
        v->rx_napi_work = false;
 
-       enetc_lock_mdio();
-
        /* enable interrupts */
        enetc_wr_reg_hot(v->rbier, ENETC_RBIER_RXTIE);
 
@@ -327,8 +329,8 @@ static void enetc_get_tx_tstamp(struct enetc_hw *hw, union enetc_tx_bd *txbd,
 {
        u32 lo, hi, tstamp_lo;
 
-       lo = enetc_rd(hw, ENETC_SICTR0);
-       hi = enetc_rd(hw, ENETC_SICTR1);
+       lo = enetc_rd_hot(hw, ENETC_SICTR0);
+       hi = enetc_rd_hot(hw, ENETC_SICTR1);
        tstamp_lo = le32_to_cpu(txbd->wb.tstamp);
        if (lo <= tstamp_lo)
                hi -= 1;
@@ -342,6 +344,12 @@ static void enetc_tstamp_tx(struct sk_buff *skb, u64 tstamp)
        if (skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS) {
                memset(&shhwtstamps, 0, sizeof(shhwtstamps));
                shhwtstamps.hwtstamp = ns_to_ktime(tstamp);
+               /* Ensure skb_mstamp_ns, which might have been populated with
+                * the txtime, is not mistaken for a software timestamp,
+                * because this will prevent the dispatch of our hardware
+                * timestamp to the socket.
+                */
+               skb->tstamp = ktime_set(0, 0);
                skb_tstamp_tx(skb, &shhwtstamps);
        }
 }
@@ -358,9 +366,7 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
        i = tx_ring->next_to_clean;
        tx_swbd = &tx_ring->tx_swbd[i];
 
-       enetc_lock_mdio();
        bds_to_clean = enetc_bd_ready_count(tx_ring, i);
-       enetc_unlock_mdio();
 
        do_tstamp = false;
 
@@ -403,8 +409,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
                        tx_swbd = tx_ring->tx_swbd;
                }
 
-               enetc_lock_mdio();
-
                /* BD iteration loop end */
                if (is_eof) {
                        tx_frm_cnt++;
@@ -415,8 +419,6 @@ static bool enetc_clean_tx_ring(struct enetc_bdr *tx_ring, int napi_budget)
 
                if (unlikely(!bds_to_clean))
                        bds_to_clean = enetc_bd_ready_count(tx_ring, i);
-
-               enetc_unlock_mdio();
        }
 
        tx_ring->next_to_clean = i;
@@ -527,9 +529,8 @@ static void enetc_get_rx_tstamp(struct net_device *ndev,
 static void enetc_get_offloads(struct enetc_bdr *rx_ring,
                               union enetc_rx_bd *rxbd, struct sk_buff *skb)
 {
-#ifdef CONFIG_FSL_ENETC_PTP_CLOCK
        struct enetc_ndev_priv *priv = netdev_priv(rx_ring->ndev);
-#endif
+
        /* TODO: hashing */
        if (rx_ring->ndev->features & NETIF_F_RXCSUM) {
                u16 inet_csum = le16_to_cpu(rxbd->r.inet_csum);
@@ -538,12 +539,31 @@ static void enetc_get_offloads(struct enetc_bdr *rx_ring,
                skb->ip_summed = CHECKSUM_COMPLETE;
        }
 
-       /* copy VLAN to skb, if one is extracted, for now we assume it's a
-        * standard TPID, but HW also supports custom values
-        */
-       if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN)
-               __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-                                      le16_to_cpu(rxbd->r.vlan_opt));
+       if (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_VLAN) {
+               __be16 tpid = 0;
+
+               switch (le16_to_cpu(rxbd->r.flags) & ENETC_RXBD_FLAG_TPID) {
+               case 0:
+                       tpid = htons(ETH_P_8021Q);
+                       break;
+               case 1:
+                       tpid = htons(ETH_P_8021AD);
+                       break;
+               case 2:
+                       tpid = htons(enetc_port_rd(&priv->si->hw,
+                                                  ENETC_PCVLANR1));
+                       break;
+               case 3:
+                       tpid = htons(enetc_port_rd(&priv->si->hw,
+                                                  ENETC_PCVLANR2));
+                       break;
+               default:
+                       break;
+               }
+
+               __vlan_hwaccel_put_tag(skb, tpid, le16_to_cpu(rxbd->r.vlan_opt));
+       }
+
 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
        if (priv->active_offloads & ENETC_F_RX_TSTAMP)
                enetc_get_rx_tstamp(rx_ring->ndev, rxbd, skb);
@@ -660,8 +680,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
                u32 bd_status;
                u16 size;
 
-               enetc_lock_mdio();
-
                if (cleaned_cnt >= ENETC_RXBD_BUNDLE) {
                        int count = enetc_refill_rx_ring(rx_ring, cleaned_cnt);
 
@@ -672,19 +690,15 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
                rxbd = enetc_rxbd(rx_ring, i);
                bd_status = le32_to_cpu(rxbd->r.lstatus);
-               if (!bd_status) {
-                       enetc_unlock_mdio();
+               if (!bd_status)
                        break;
-               }
 
                enetc_wr_reg_hot(rx_ring->idr, BIT(rx_ring->index));
                dma_rmb(); /* for reading other rxbd fields */
                size = le16_to_cpu(rxbd->r.buf_len);
                skb = enetc_map_rx_buff_to_skb(rx_ring, i, size);
-               if (!skb) {
-                       enetc_unlock_mdio();
+               if (!skb)
                        break;
-               }
 
                enetc_get_offloads(rx_ring, rxbd, skb);
 
@@ -696,7 +710,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
                if (unlikely(bd_status &
                             ENETC_RXBD_LSTATUS(ENETC_RXBD_ERR_MASK))) {
-                       enetc_unlock_mdio();
                        dev_kfree_skb(skb);
                        while (!(bd_status & ENETC_RXBD_LSTATUS_F)) {
                                dma_rmb();
@@ -736,8 +749,6 @@ static int enetc_clean_rx_ring(struct enetc_bdr *rx_ring,
 
                enetc_process_skb(rx_ring, skb);
 
-               enetc_unlock_mdio();
-
                napi_gro_receive(napi, skb);
 
                rx_frm_cnt++;
@@ -984,7 +995,7 @@ static void enetc_free_rxtx_rings(struct enetc_ndev_priv *priv)
                enetc_free_tx_ring(priv->tx_ring[i]);
 }
 
-static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 {
        int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
@@ -1005,7 +1016,7 @@ static int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
        return 0;
 }
 
-static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
+void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
 {
        int size = cbdr->bd_count * sizeof(struct enetc_cbd);
 
@@ -1013,7 +1024,7 @@ static void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr)
        cbdr->bd_base = NULL;
 }
 
-static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
+void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
 {
        /* set CBDR cache attributes */
        enetc_wr(hw, ENETC_SICAR2,
@@ -1033,7 +1044,7 @@ static void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr)
        cbdr->cir = hw->reg + ENETC_SICBDRCIR;
 }
 
-static void enetc_clear_cbdr(struct enetc_hw *hw)
+void enetc_clear_cbdr(struct enetc_hw *hw)
 {
        enetc_wr(hw, ENETC_SICBDRMR, 0);
 }
@@ -1058,13 +1069,12 @@ static int enetc_setup_default_rss_table(struct enetc_si *si, int num_groups)
        return 0;
 }
 
-static int enetc_configure_si(struct enetc_ndev_priv *priv)
+int enetc_configure_si(struct enetc_ndev_priv *priv)
 {
        struct enetc_si *si = priv->si;
        struct enetc_hw *hw = &si->hw;
        int err;
 
-       enetc_setup_cbdr(hw, &si->cbd_ring);
        /* set SI cache attributes */
        enetc_wr(hw, ENETC_SICAR0,
                 ENETC_SICAR_RD_COHERENT | ENETC_SICAR_WR_COHERENT);
@@ -1112,6 +1122,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
        if (err)
                return err;
 
+       enetc_setup_cbdr(&si->hw, &si->cbd_ring);
+
        priv->cls_rules = kcalloc(si->num_fs_entries, sizeof(*priv->cls_rules),
                                  GFP_KERNEL);
        if (!priv->cls_rules) {
@@ -1119,14 +1131,8 @@ int enetc_alloc_si_resources(struct enetc_ndev_priv *priv)
                goto err_alloc_cls;
        }
 
-       err = enetc_configure_si(priv);
-       if (err)
-               goto err_config_si;
-
        return 0;
 
-err_config_si:
-       kfree(priv->cls_rules);
 err_alloc_cls:
        enetc_clear_cbdr(&si->hw);
        enetc_free_cbdr(priv->dev, &si->cbd_ring);
@@ -1212,7 +1218,8 @@ static void enetc_setup_rxbdr(struct enetc_hw *hw, struct enetc_bdr *rx_ring)
        rx_ring->idr = hw->reg + ENETC_SIRXIDR;
 
        enetc_refill_rx_ring(rx_ring, enetc_bd_unused(rx_ring));
-       enetc_wr(hw, ENETC_SIRXIDR, rx_ring->next_to_use);
+       /* update ENETC's consumer index */
+       enetc_rxbdr_wr(hw, idx, ENETC_RBCIR, rx_ring->next_to_use);
 
        /* enable ring */
        enetc_rxbdr_wr(hw, idx, ENETC_RBMR, rbmr);
index 8532d23..8b380fc 100644 (file)
@@ -292,6 +292,7 @@ void enetc_get_si_caps(struct enetc_si *si);
 void enetc_init_si_rings_params(struct enetc_ndev_priv *priv);
 int enetc_alloc_si_resources(struct enetc_ndev_priv *priv);
 void enetc_free_si_resources(struct enetc_ndev_priv *priv);
+int enetc_configure_si(struct enetc_ndev_priv *priv);
 
 int enetc_open(struct net_device *ndev);
 int enetc_close(struct net_device *ndev);
@@ -309,6 +310,10 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 void enetc_set_ethtool_ops(struct net_device *ndev);
 
 /* control buffer descriptor ring (CBDR) */
+int enetc_alloc_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
+void enetc_free_cbdr(struct device *dev, struct enetc_cbdr *cbdr);
+void enetc_setup_cbdr(struct enetc_hw *hw, struct enetc_cbdr *cbdr);
+void enetc_clear_cbdr(struct enetc_hw *hw);
 int enetc_set_mac_flt_entry(struct enetc_si *si, int index,
                            char *mac_addr, int si_map);
 int enetc_clear_mac_flt_entry(struct enetc_si *si, int index);
index c71fe8d..00938f7 100644 (file)
@@ -172,6 +172,8 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PSIPMAR0(n)      (0x0100 + (n) * 0x8) /* n = SI index */
 #define ENETC_PSIPMAR1(n)      (0x0104 + (n) * 0x8)
 #define ENETC_PVCLCTR          0x0208
+#define ENETC_PCVLANR1         0x0210
+#define ENETC_PCVLANR2         0x0214
 #define ENETC_VLAN_TYPE_C      BIT(0)
 #define ENETC_VLAN_TYPE_S      BIT(1)
 #define ENETC_PVCLCTR_OVTPIDL(bmp)     ((bmp) & 0xff) /* VLAN_TYPE */
@@ -232,14 +234,23 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_MAXFRM       0x8014
 #define ENETC_SET_TX_MTU(val)  ((val) << 16)
 #define ENETC_SET_MAXFRM(val)  ((val) & 0xffff)
+#define ENETC_PM0_RX_FIFO      0x801c
+#define ENETC_PM0_RX_FIFO_VAL  1
 
 #define ENETC_PM_IMDIO_BASE    0x8030
 
 #define ENETC_PM0_IF_MODE      0x8300
-#define ENETC_PMO_IFM_RG       BIT(2)
+#define ENETC_PM0_IFM_RG       BIT(2)
 #define ENETC_PM0_IFM_RLP      (BIT(5) | BIT(11))
-#define ENETC_PM0_IFM_RGAUTO   (BIT(15) | ENETC_PMO_IFM_RG | BIT(1))
-#define ENETC_PM0_IFM_XGMII    BIT(12)
+#define ENETC_PM0_IFM_EN_AUTO  BIT(15)
+#define ENETC_PM0_IFM_SSP_MASK GENMASK(14, 13)
+#define ENETC_PM0_IFM_SSP_1000 (2 << 13)
+#define ENETC_PM0_IFM_SSP_100  (0 << 13)
+#define ENETC_PM0_IFM_SSP_10   (1 << 13)
+#define ENETC_PM0_IFM_FULL_DPX BIT(12)
+#define ENETC_PM0_IFM_IFMODE_MASK GENMASK(1, 0)
+#define ENETC_PM0_IFM_IFMODE_XGMII 0
+#define ENETC_PM0_IFM_IFMODE_GMII 2
 #define ENETC_PSIDCAPR         0x1b08
 #define ENETC_PSIDCAPR_MSK     GENMASK(15, 0)
 #define ENETC_PSFCAPR          0x1b18
@@ -453,6 +464,8 @@ static inline u64 _enetc_rd_reg64_wa(void __iomem *reg)
 #define enetc_wr_reg(reg, val)         _enetc_wr_reg_wa((reg), (val))
 #define enetc_rd(hw, off)              enetc_rd_reg((hw)->reg + (off))
 #define enetc_wr(hw, off, val)         enetc_wr_reg((hw)->reg + (off), val)
+#define enetc_rd_hot(hw, off)          enetc_rd_reg_hot((hw)->reg + (off))
+#define enetc_wr_hot(hw, off, val)     enetc_wr_reg_hot((hw)->reg + (off), val)
 #define enetc_rd64(hw, off)            _enetc_rd_reg64_wa((hw)->reg + (off))
 /* port register accessors - PF only */
 #define enetc_port_rd(hw, off)         enetc_rd_reg((hw)->port + (off))
@@ -568,6 +581,7 @@ union enetc_rx_bd {
 #define ENETC_RXBD_LSTATUS(flags)      ((flags) << 16)
 #define ENETC_RXBD_FLAG_VLAN   BIT(9)
 #define ENETC_RXBD_FLAG_TSTMP  BIT(10)
+#define ENETC_RXBD_FLAG_TPID   GENMASK(1, 0)
 
 #define ENETC_MAC_ADDR_FILT_CNT        8 /* # of supported entries per port */
 #define EMETC_MAC_ADDR_FILT_RES        3 /* # of reserved entries at the beginning */
index 515c5b2..224fc37 100644 (file)
@@ -190,7 +190,6 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 {
        struct enetc_ndev_priv *priv = netdev_priv(ndev);
        struct enetc_pf *pf = enetc_si_priv(priv->si);
-       char vlan_promisc_simap = pf->vlan_promisc_simap;
        struct enetc_hw *hw = &priv->si->hw;
        bool uprom = false, mprom = false;
        struct enetc_mac_filter *filter;
@@ -203,16 +202,12 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
                psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0);
                uprom = true;
                mprom = true;
-               /* Enable VLAN promiscuous mode for SI0 (PF) */
-               vlan_promisc_simap |= BIT(0);
        } else if (ndev->flags & IFF_ALLMULTI) {
                /* enable multi cast promisc mode for SI0 (PF) */
                psipmr = ENETC_PSIPMR_SET_MP(0);
                mprom = true;
        }
 
-       enetc_set_vlan_promisc(&pf->si->hw, vlan_promisc_simap);
-
        /* first 2 filter entries belong to PF */
        if (!uprom) {
                /* Update unicast filters */
@@ -320,7 +315,7 @@ static void enetc_set_loopback(struct net_device *ndev, bool en)
        u32 reg;
 
        reg = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
-       if (reg & ENETC_PMO_IFM_RG) {
+       if (reg & ENETC_PM0_IFM_RG) {
                /* RGMII mode */
                reg = (reg & ~ENETC_PM0_IFM_RLP) |
                      (en ? ENETC_PM0_IFM_RLP : 0);
@@ -495,17 +490,30 @@ static void enetc_configure_port_mac(struct enetc_hw *hw)
 
        enetc_port_wr(hw, ENETC_PM1_CMD_CFG, ENETC_PM0_CMD_PHY_TX_EN |
                      ENETC_PM0_CMD_TXP | ENETC_PM0_PROMISC);
+
+       /* On LS1028A, the MAC RX FIFO defaults to 2, which is too high
+        * and may lead to RX lock-up under traffic. Set it to 1 instead,
+        * as recommended by the hardware team.
+        */
+       enetc_port_wr(hw, ENETC_PM0_RX_FIFO, ENETC_PM0_RX_FIFO_VAL);
 }
 
 static void enetc_mac_config(struct enetc_hw *hw, phy_interface_t phy_mode)
 {
-       /* set auto-speed for RGMII */
-       if (enetc_port_rd(hw, ENETC_PM0_IF_MODE) & ENETC_PMO_IFM_RG ||
-           phy_interface_mode_is_rgmii(phy_mode))
-               enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_RGAUTO);
+       u32 val;
+
+       if (phy_interface_mode_is_rgmii(phy_mode)) {
+               val = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
+               val &= ~ENETC_PM0_IFM_EN_AUTO;
+               val &= ENETC_PM0_IFM_IFMODE_MASK;
+               val |= ENETC_PM0_IFM_IFMODE_GMII | ENETC_PM0_IFM_RG;
+               enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
+       }
 
-       if (phy_mode == PHY_INTERFACE_MODE_USXGMII)
-               enetc_port_wr(hw, ENETC_PM0_IF_MODE, ENETC_PM0_IFM_XGMII);
+       if (phy_mode == PHY_INTERFACE_MODE_USXGMII) {
+               val = ENETC_PM0_IFM_FULL_DPX | ENETC_PM0_IFM_IFMODE_XGMII;
+               enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
+       }
 }
 
 static void enetc_mac_enable(struct enetc_hw *hw, bool en)
@@ -937,6 +945,34 @@ static void enetc_pl_mac_config(struct phylink_config *config,
                phylink_set_pcs(priv->phylink, &pf->pcs->pcs);
 }
 
+static void enetc_force_rgmii_mac(struct enetc_hw *hw, int speed, int duplex)
+{
+       u32 old_val, val;
+
+       old_val = val = enetc_port_rd(hw, ENETC_PM0_IF_MODE);
+
+       if (speed == SPEED_1000) {
+               val &= ~ENETC_PM0_IFM_SSP_MASK;
+               val |= ENETC_PM0_IFM_SSP_1000;
+       } else if (speed == SPEED_100) {
+               val &= ~ENETC_PM0_IFM_SSP_MASK;
+               val |= ENETC_PM0_IFM_SSP_100;
+       } else if (speed == SPEED_10) {
+               val &= ~ENETC_PM0_IFM_SSP_MASK;
+               val |= ENETC_PM0_IFM_SSP_10;
+       }
+
+       if (duplex == DUPLEX_FULL)
+               val |= ENETC_PM0_IFM_FULL_DPX;
+       else
+               val &= ~ENETC_PM0_IFM_FULL_DPX;
+
+       if (val == old_val)
+               return;
+
+       enetc_port_wr(hw, ENETC_PM0_IF_MODE, val);
+}
+
 static void enetc_pl_mac_link_up(struct phylink_config *config,
                                 struct phy_device *phy, unsigned int mode,
                                 phy_interface_t interface, int speed,
@@ -949,6 +985,10 @@ static void enetc_pl_mac_link_up(struct phylink_config *config,
        if (priv->active_offloads & ENETC_F_QBV)
                enetc_sched_speed_set(priv, speed);
 
+       if (!phylink_autoneg_inband(mode) &&
+           phy_interface_mode_is_rgmii(interface))
+               enetc_force_rgmii_mac(&pf->si->hw, speed, duplex);
+
        enetc_mac_enable(&pf->si->hw, true);
 }
 
@@ -1041,6 +1081,26 @@ static int enetc_init_port_rss_memory(struct enetc_si *si)
        return err;
 }
 
+static void enetc_init_unused_port(struct enetc_si *si)
+{
+       struct device *dev = &si->pdev->dev;
+       struct enetc_hw *hw = &si->hw;
+       int err;
+
+       si->cbd_ring.bd_count = ENETC_CBDR_DEFAULT_SIZE;
+       err = enetc_alloc_cbdr(dev, &si->cbd_ring);
+       if (err)
+               return;
+
+       enetc_setup_cbdr(hw, &si->cbd_ring);
+
+       enetc_init_port_rfs_memory(si);
+       enetc_init_port_rss_memory(si);
+
+       enetc_clear_cbdr(hw);
+       enetc_free_cbdr(dev, &si->cbd_ring);
+}
+
 static int enetc_pf_probe(struct pci_dev *pdev,
                          const struct pci_device_id *ent)
 {
@@ -1051,11 +1111,6 @@ static int enetc_pf_probe(struct pci_dev *pdev,
        struct enetc_pf *pf;
        int err;
 
-       if (node && !of_device_is_available(node)) {
-               dev_info(&pdev->dev, "device is disabled, skipping\n");
-               return -ENODEV;
-       }
-
        err = enetc_pci_probe(pdev, KBUILD_MODNAME, sizeof(*pf));
        if (err) {
                dev_err(&pdev->dev, "PCI probing failed\n");
@@ -1069,6 +1124,13 @@ static int enetc_pf_probe(struct pci_dev *pdev,
                goto err_map_pf_space;
        }
 
+       if (node && !of_device_is_available(node)) {
+               enetc_init_unused_port(si);
+               dev_info(&pdev->dev, "device is disabled, skipping\n");
+               err = -ENODEV;
+               goto err_device_disabled;
+       }
+
        pf = enetc_si_priv(si);
        pf->si = si;
        pf->total_vfs = pci_sriov_get_totalvfs(pdev);
@@ -1108,6 +1170,12 @@ static int enetc_pf_probe(struct pci_dev *pdev,
                goto err_init_port_rss;
        }
 
+       err = enetc_configure_si(priv);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to configure SI\n");
+               goto err_config_si;
+       }
+
        err = enetc_alloc_msix(priv);
        if (err) {
                dev_err(&pdev->dev, "MSIX alloc failed\n");
@@ -1136,6 +1204,7 @@ err_phylink_create:
        enetc_mdiobus_destroy(pf);
 err_mdiobus_create:
        enetc_free_msix(priv);
+err_config_si:
 err_init_port_rss:
 err_init_port_rfs:
 err_alloc_msix:
@@ -1144,6 +1213,7 @@ err_alloc_si_res:
        si->ndev = NULL;
        free_netdev(ndev);
 err_alloc_netdev:
+err_device_disabled:
 err_map_pf_space:
        enetc_pci_remove(pdev);
 
index 39c1a09..9b755a8 100644 (file)
@@ -171,6 +171,12 @@ static int enetc_vf_probe(struct pci_dev *pdev,
                goto err_alloc_si_res;
        }
 
+       err = enetc_configure_si(priv);
+       if (err) {
+               dev_err(&pdev->dev, "Failed to configure SI\n");
+               goto err_config_si;
+       }
+
        err = enetc_alloc_msix(priv);
        if (err) {
                dev_err(&pdev->dev, "MSIX alloc failed\n");
@@ -187,6 +193,7 @@ static int enetc_vf_probe(struct pci_dev *pdev,
 
 err_reg_netdev:
        enetc_free_msix(priv);
+err_config_si:
 err_alloc_msix:
        enetc_free_si_resources(priv);
 err_alloc_si_res:
index 2e344aa..1753807 100644 (file)
@@ -377,9 +377,16 @@ static int fec_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
        u64 ns;
        unsigned long flags;
 
+       mutex_lock(&adapter->ptp_clk_mutex);
+       /* Check the ptp clock */
+       if (!adapter->ptp_clk_on) {
+               mutex_unlock(&adapter->ptp_clk_mutex);
+               return -EINVAL;
+       }
        spin_lock_irqsave(&adapter->tmreg_lock, flags);
        ns = timecounter_read(&adapter->tc);
        spin_unlock_irqrestore(&adapter->tmreg_lock, flags);
+       mutex_unlock(&adapter->ptp_clk_mutex);
 
        *ts = ns_to_timespec64(ns);
 
index 541de32..1cf8ef7 100644 (file)
@@ -2390,6 +2390,10 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus,
                if (lstatus & BD_LFLAG(RXBD_LAST))
                        size -= skb->len;
 
+               WARN(size < 0, "gianfar: rx fragment size underflow");
+               if (size < 0)
+                       return false;
+
                skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page,
                                rxb->page_offset + RXBUF_ALIGNMENT,
                                size, GFAR_RXB_TRUESIZE);
@@ -2552,6 +2556,17 @@ static int gfar_clean_rx_ring(struct gfar_priv_rx_q *rx_queue,
                if (lstatus & BD_LFLAG(RXBD_EMPTY))
                        break;
 
+               /* lost RXBD_LAST descriptor due to overrun */
+               if (skb &&
+                   (lstatus & BD_LFLAG(RXBD_FIRST))) {
+                       /* discard faulty buffer */
+                       dev_kfree_skb(skb);
+                       skb = NULL;
+                       rx_queue->stats.rx_dropped++;
+
+                       /* can continue normally */
+               }
+
                /* order rx buffer descriptor reads */
                rmb();
 
index 5d7824d..c66a7a5 100644 (file)
@@ -1663,8 +1663,10 @@ static int hns_nic_clear_all_rx_fetch(struct net_device *ndev)
                        for (j = 0; j < fetch_num; j++) {
                                /* alloc one skb and init */
                                skb = hns_assemble_skb(ndev);
-                               if (!skb)
+                               if (!skb) {
+                                       ret = -ENOMEM;
                                        goto out;
+                               }
                                rd = &tx_ring_data(priv, skb->queue_mapping);
                                hns_nic_net_xmit_hw(ndev, skb, rd);
 
index ff52a65..057dda7 100644 (file)
@@ -1053,16 +1053,16 @@ struct hclge_fd_tcam_config_3_cmd {
 #define HCLGE_FD_AD_DROP_B             0
 #define HCLGE_FD_AD_DIRECT_QID_B       1
 #define HCLGE_FD_AD_QID_S              2
-#define HCLGE_FD_AD_QID_M              GENMASK(12, 2)
+#define HCLGE_FD_AD_QID_M              GENMASK(11, 2)
 #define HCLGE_FD_AD_USE_COUNTER_B      12
 #define HCLGE_FD_AD_COUNTER_NUM_S      13
 #define HCLGE_FD_AD_COUNTER_NUM_M      GENMASK(20, 13)
 #define HCLGE_FD_AD_NXT_STEP_B         20
 #define HCLGE_FD_AD_NXT_KEY_S          21
-#define HCLGE_FD_AD_NXT_KEY_M          GENMASK(26, 21)
+#define HCLGE_FD_AD_NXT_KEY_M          GENMASK(25, 21)
 #define HCLGE_FD_AD_WR_RULE_ID_B       0
 #define HCLGE_FD_AD_RULE_ID_S          1
-#define HCLGE_FD_AD_RULE_ID_M          GENMASK(13, 1)
+#define HCLGE_FD_AD_RULE_ID_M          GENMASK(12, 1)
 #define HCLGE_FD_AD_TC_OVRD_B          16
 #define HCLGE_FD_AD_TC_SIZE_S          17
 #define HCLGE_FD_AD_TC_SIZE_M          GENMASK(20, 17)
index 34b744d..e3f81c7 100644 (file)
@@ -5245,9 +5245,9 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
        case BIT(INNER_SRC_MAC):
                for (i = 0; i < ETH_ALEN; i++) {
                        calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-                              rule->tuples.src_mac[i]);
+                              rule->tuples_mask.src_mac[i]);
                        calc_y(key_y[ETH_ALEN - 1 - i], rule->tuples.src_mac[i],
-                              rule->tuples.src_mac[i]);
+                              rule->tuples_mask.src_mac[i]);
                }
 
                return true;
@@ -6330,8 +6330,7 @@ static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
                fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1);
                fs->m_ext.vlan_tci =
                                rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
-                               cpu_to_be16(VLAN_VID_MASK) :
-                               cpu_to_be16(rule->tuples_mask.vlan_tag1);
+                               0 : cpu_to_be16(rule->tuples_mask.vlan_tag1);
        }
 
        if (fs->flow_type & FLOW_MAC_EXT) {
index c3ec9ce..7fea9ae 100644 (file)
@@ -1758,7 +1758,7 @@ static int ibmveth_probe(struct vio_dev *dev, const struct vio_device_id *id)
        return 0;
 }
 
-static int ibmveth_remove(struct vio_dev *dev)
+static void ibmveth_remove(struct vio_dev *dev)
 {
        struct net_device *netdev = dev_get_drvdata(&dev->dev);
        struct ibmveth_adapter *adapter = netdev_priv(netdev);
@@ -1771,8 +1771,6 @@ static int ibmveth_remove(struct vio_dev *dev)
 
        free_netdev(netdev);
        dev_set_drvdata(&dev->dev, NULL);
-
-       return 0;
 }
 
 static struct attribute veth_active_attr;
index 1c0e4be..9c6438d 100644 (file)
@@ -78,7 +78,6 @@ MODULE_LICENSE("GPL");
 MODULE_VERSION(IBMVNIC_DRIVER_VERSION);
 
 static int ibmvnic_version = IBMVNIC_INITIAL_VERSION;
-static int ibmvnic_remove(struct vio_dev *);
 static void release_sub_crqs(struct ibmvnic_adapter *, bool);
 static int ibmvnic_reset_crq(struct ibmvnic_adapter *);
 static int ibmvnic_send_crq_init(struct ibmvnic_adapter *);
@@ -1172,12 +1171,25 @@ static int ibmvnic_open(struct net_device *netdev)
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
        int rc;
 
-       /* If device failover is pending, just set device state and return.
-        * Device operation will be handled by reset routine.
+       ASSERT_RTNL();
+
+       /* If device failover is pending or we are about to reset, just set
+        * device state and return. Device operation will be handled by reset
+        * routine.
+        *
+        * It should be safe to overwrite the adapter->state here. Since
+        * we hold the rtnl, either the reset has not actually started or
+        * the rtnl got dropped during the set_link_state() in do_reset().
+        * In the former case, no one else is changing the state (again we
+        * have the rtnl) and in the latter case, do_reset() will detect and
+        * honor our setting below.
         */
-       if (adapter->failover_pending) {
+       if (adapter->failover_pending || (test_bit(0, &adapter->resetting))) {
+               netdev_dbg(netdev, "[S:%d FOP:%d] Resetting, deferring open\n",
+                          adapter->state, adapter->failover_pending);
                adapter->state = VNIC_OPEN;
-               return 0;
+               rc = 0;
+               goto out;
        }
 
        if (adapter->state != VNIC_CLOSED) {
@@ -1196,10 +1208,12 @@ static int ibmvnic_open(struct net_device *netdev)
        rc = __ibmvnic_open(netdev);
 
 out:
-       /* If open fails due to a pending failover, set device state and
-        * return. Device operation will be handled by reset routine.
+       /* If open failed and there is a pending failover or in-progress reset,
+        * set device state and return. Device operation will be handled by
+        * reset routine. See also comments above regarding rtnl.
         */
-       if (rc && adapter->failover_pending) {
+       if (rc &&
+           (adapter->failover_pending || (test_bit(0, &adapter->resetting)))) {
                adapter->state = VNIC_OPEN;
                rc = 0;
        }
@@ -1891,10 +1905,9 @@ static int ibmvnic_set_mac(struct net_device *netdev, void *p)
        if (!is_valid_ether_addr(addr->sa_data))
                return -EADDRNOTAVAIL;
 
-       if (adapter->state != VNIC_PROBED) {
-               ether_addr_copy(adapter->mac_addr, addr->sa_data);
+       ether_addr_copy(adapter->mac_addr, addr->sa_data);
+       if (adapter->state != VNIC_PROBED)
                rc = __ibmvnic_set_mac(netdev, addr->sa_data);
-       }
 
        return rc;
 }
@@ -1928,6 +1941,14 @@ static int do_reset(struct ibmvnic_adapter *adapter,
        if (rwi->reset_reason == VNIC_RESET_FAILOVER)
                adapter->failover_pending = false;
 
+       /* read the state and check (again) after getting rtnl */
+       reset_state = adapter->state;
+
+       if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) {
+               rc = -EBUSY;
+               goto out;
+       }
+
        netif_carrier_off(netdev);
 
        old_num_rx_queues = adapter->req_rx_queues;
@@ -1958,11 +1979,27 @@ static int do_reset(struct ibmvnic_adapter *adapter,
                        if (rc)
                                goto out;
 
+                       if (adapter->state == VNIC_OPEN) {
+                               /* When we dropped rtnl, ibmvnic_open() got
+                                * it and noticed that we are resetting and
+                                * set the adapter state to OPEN. Update our
+                                * new "target" state, and resume the reset
+                                * from VNIC_CLOSING state.
+                                */
+                               netdev_dbg(netdev,
+                                          "Open changed state from %d, updating.\n",
+                                          reset_state);
+                               reset_state = VNIC_OPEN;
+                               adapter->state = VNIC_CLOSING;
+                       }
+
                        if (adapter->state != VNIC_CLOSING) {
+                               /* If someone else changed the adapter state
+                                * when we dropped the rtnl, fail the reset
+                                */
                                rc = -1;
                                goto out;
                        }
-
                        adapter->state = VNIC_CLOSED;
                }
        }
@@ -2106,6 +2143,14 @@ static int do_hard_reset(struct ibmvnic_adapter *adapter,
        netdev_dbg(adapter->netdev, "Hard resetting driver (%d)\n",
                   rwi->reset_reason);
 
+       /* read the state and check (again) after getting rtnl */
+       reset_state = adapter->state;
+
+       if (reset_state == VNIC_REMOVING || reset_state == VNIC_REMOVED) {
+               rc = -EBUSY;
+               goto out;
+       }
+
        netif_carrier_off(netdev);
        adapter->reset_reason = rwi->reset_reason;
 
@@ -5172,16 +5217,14 @@ static int ibmvnic_reset_init(struct ibmvnic_adapter *adapter, bool reset)
 {
        struct device *dev = &adapter->vdev->dev;
        unsigned long timeout = msecs_to_jiffies(20000);
-       u64 old_num_rx_queues, old_num_tx_queues;
+       u64 old_num_rx_queues = adapter->req_rx_queues;
+       u64 old_num_tx_queues = adapter->req_tx_queues;
        int rc;
 
        adapter->from_passive_init = false;
 
-       if (reset) {
-               old_num_rx_queues = adapter->req_rx_queues;
-               old_num_tx_queues = adapter->req_tx_queues;
+       if (reset)
                reinit_completion(&adapter->init_done);
-       }
 
        adapter->init_done_rc = 0;
        rc = ibmvnic_send_crq_init(adapter);
@@ -5349,7 +5392,7 @@ ibmvnic_init_fail:
        return rc;
 }
 
-static int ibmvnic_remove(struct vio_dev *dev)
+static void ibmvnic_remove(struct vio_dev *dev)
 {
        struct net_device *netdev = dev_get_drvdata(&dev->dev);
        struct ibmvnic_adapter *adapter = netdev_priv(netdev);
@@ -5364,9 +5407,9 @@ static int ibmvnic_remove(struct vio_dev *dev)
         * after setting state, so __ibmvnic_reset() which is called
         * from the flush_work() below, can make progress.
         */
-       spin_lock_irqsave(&adapter->rwi_lock, flags);
+       spin_lock(&adapter->rwi_lock);
        adapter->state = VNIC_REMOVING;
-       spin_unlock_irqrestore(&adapter->rwi_lock, flags);
+       spin_unlock(&adapter->rwi_lock);
 
        spin_unlock_irqrestore(&adapter->state_lock, flags);
 
@@ -5390,8 +5433,6 @@ static int ibmvnic_remove(struct vio_dev *dev)
        device_remove_file(&dev->dev, &dev_attr_failover);
        free_netdev(netdev);
        dev_set_drvdata(&dev->dev, NULL);
-
-       return 0;
 }
 
 static ssize_t failover_store(struct device *dev, struct device_attribute *attr,
index 88faf05..0b1e890 100644 (file)
@@ -899,6 +899,8 @@ static s32 e1000_set_d0_lplu_state_82571(struct e1000_hw *hw, bool active)
        } else {
                data &= ~IGP02E1000_PM_D0_LPLU;
                ret_val = e1e_wphy(hw, IGP02E1000_PHY_POWER_MGMT, data);
+               if (ret_val)
+                       return ret_val;
                /* LPLU and SmartSpeed are mutually exclusive.  LPLU is used
                 * during Dx states where the power conservation is most
                 * important.  During driver activity we should enable
index 69a2329..db79c4e 100644 (file)
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright(c) 1999 - 2018 Intel Corporation. */
 
-#ifndef _E1000_HW_H_
-#define _E1000_HW_H_
+#ifndef _E1000E_HW_H_
+#define _E1000E_HW_H_
 
 #include "regs.h"
 #include "defines.h"
@@ -714,4 +714,4 @@ struct e1000_hw {
 #include "80003es2lan.h"
 #include "ich8lan.h"
 
-#endif
+#endif /* _E1000E_HW_H_ */
index e9b82c2..a094800 100644 (file)
@@ -5974,15 +5974,19 @@ static void e1000_reset_task(struct work_struct *work)
        struct e1000_adapter *adapter;
        adapter = container_of(work, struct e1000_adapter, reset_task);
 
+       rtnl_lock();
        /* don't run the task if already down */
-       if (test_bit(__E1000_DOWN, &adapter->state))
+       if (test_bit(__E1000_DOWN, &adapter->state)) {
+               rtnl_unlock();
                return;
+       }
 
        if (!(adapter->flags & FLAG_RESTART_NOW)) {
                e1000e_dump(adapter);
                e_err("Reset adapter unexpectedly\n");
        }
        e1000e_reinit_locked(adapter);
+       rtnl_unlock();
 }
 
 /**
index a8a2b5f..c70dec6 100644 (file)
@@ -5083,7 +5083,7 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
        enum i40e_admin_queue_err adq_err;
        struct i40e_vsi *vsi = np->vsi;
        struct i40e_pf *pf = vsi->back;
-       bool is_reset_needed;
+       u32 reset_needed = 0;
        i40e_status status;
        u32 i, j;
 
@@ -5128,9 +5128,11 @@ static int i40e_set_priv_flags(struct net_device *dev, u32 flags)
 flags_complete:
        changed_flags = orig_flags ^ new_flags;
 
-       is_reset_needed = !!(changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
-               I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED |
-               I40E_FLAG_DISABLE_FW_LLDP));
+       if (changed_flags & I40E_FLAG_DISABLE_FW_LLDP)
+               reset_needed = I40E_PF_RESET_AND_REBUILD_FLAG;
+       if (changed_flags & (I40E_FLAG_VEB_STATS_ENABLED |
+           I40E_FLAG_LEGACY_RX | I40E_FLAG_SOURCE_PRUNING_DISABLED))
+               reset_needed = BIT(__I40E_PF_RESET_REQUESTED);
 
        /* Before we finalize any flag changes, we need to perform some
         * checks to ensure that the changes are supported and safe.
@@ -5252,7 +5254,7 @@ flags_complete:
                                case I40E_AQ_RC_EEXIST:
                                        dev_warn(&pf->pdev->dev,
                                                 "FW LLDP agent is already running\n");
-                                       is_reset_needed = false;
+                                       reset_needed = 0;
                                        break;
                                case I40E_AQ_RC_EPERM:
                                        dev_warn(&pf->pdev->dev,
@@ -5281,8 +5283,8 @@ flags_complete:
        /* Issue reset to cause things to take effect, as additional bits
         * are added we will need to create a mask of bits requiring reset
         */
-       if (is_reset_needed)
-               i40e_do_reset(pf, BIT(__I40E_PF_RESET_REQUESTED), true);
+       if (reset_needed)
+               i40e_do_reset(pf, reset_needed, true);
 
        return 0;
 }
index 8bb8eb6..17f3b80 100644 (file)
@@ -2616,7 +2616,7 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
                return;
        if (!test_and_clear_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state))
                return;
-       if (test_and_set_bit(__I40E_VF_DISABLE, pf->state)) {
+       if (test_bit(__I40E_VF_DISABLE, pf->state)) {
                set_bit(__I40E_MACVLAN_SYNC_PENDING, pf->state);
                return;
        }
@@ -2634,7 +2634,6 @@ static void i40e_sync_filters_subtask(struct i40e_pf *pf)
                        }
                }
        }
-       clear_bit(__I40E_VF_DISABLE, pf->state);
 }
 
 /**
@@ -3260,6 +3259,17 @@ static int i40e_configure_tx_ring(struct i40e_ring *ring)
 }
 
 /**
+ * i40e_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
+{
+       return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
+}
+
+/**
  * i40e_configure_rx_ring - Configure a receive ring context
  * @ring: The Rx ring to configure
  *
@@ -3370,6 +3380,8 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
        else
                set_ring_build_skb_enabled(ring);
 
+       ring->rx_offset = i40e_rx_offset(ring);
+
        /* cache tail for quicker writes, and clear the reg before use */
        ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
        writel(0, ring->tail);
@@ -5937,7 +5949,7 @@ static int i40e_add_channel(struct i40e_pf *pf, u16 uplink_seid,
        ch->enabled_tc = !i40e_is_channel_macvlan(ch) && enabled_tc;
        ch->seid = ctxt.seid;
        ch->vsi_number = ctxt.vsi_number;
-       ch->stat_counter_idx = cpu_to_le16(ctxt.info.stat_counter_idx);
+       ch->stat_counter_idx = le16_to_cpu(ctxt.info.stat_counter_idx);
 
        /* copy just the sections touched not the entire info
         * since not all sections are valid as returned by
@@ -7977,8 +7989,8 @@ static inline void
 i40e_set_cld_element(struct i40e_cloud_filter *filter,
                     struct i40e_aqc_cloud_filters_element_data *cld)
 {
-       int i, j;
        u32 ipa;
+       int i;
 
        memset(cld, 0, sizeof(*cld));
        ether_addr_copy(cld->outer_mac, filter->dst_mac);
@@ -7989,14 +8001,14 @@ i40e_set_cld_element(struct i40e_cloud_filter *filter,
 
        if (filter->n_proto == ETH_P_IPV6) {
 #define IPV6_MAX_INDEX (ARRAY_SIZE(filter->dst_ipv6) - 1)
-               for (i = 0, j = 0; i < ARRAY_SIZE(filter->dst_ipv6);
-                    i++, j += 2) {
+               for (i = 0; i < ARRAY_SIZE(filter->dst_ipv6); i++) {
                        ipa = be32_to_cpu(filter->dst_ipv6[IPV6_MAX_INDEX - i]);
-                       ipa = cpu_to_le32(ipa);
-                       memcpy(&cld->ipaddr.raw_v6.data[j], &ipa, sizeof(ipa));
+
+                       *(__le32 *)&cld->ipaddr.raw_v6.data[i * 2] = cpu_to_le32(ipa);
                }
        } else {
                ipa = be32_to_cpu(filter->dst_ipv4);
+
                memcpy(&cld->ipaddr.v4.data, &ipa, sizeof(ipa));
        }
 
@@ -8044,6 +8056,8 @@ int i40e_add_del_cloud_filter(struct i40e_vsi *vsi,
        if (filter->flags >= ARRAY_SIZE(flag_table))
                return I40E_ERR_CONFIG;
 
+       memset(&cld_filter, 0, sizeof(cld_filter));
+
        /* copy element needed to add cloud filter from filter */
        i40e_set_cld_element(filter, &cld_filter);
 
@@ -8107,10 +8121,13 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
                return -EOPNOTSUPP;
 
        /* adding filter using src_port/src_ip is not supported at this stage */
-       if (filter->src_port || filter->src_ipv4 ||
+       if (filter->src_port ||
+           (filter->src_ipv4 && filter->n_proto != ETH_P_IPV6) ||
            !ipv6_addr_any(&filter->ip.v6.src_ip6))
                return -EOPNOTSUPP;
 
+       memset(&cld_filter, 0, sizeof(cld_filter));
+
        /* copy element needed to add cloud filter from filter */
        i40e_set_cld_element(filter, &cld_filter.element);
 
@@ -8134,7 +8151,7 @@ int i40e_add_del_cloud_filter_big_buf(struct i40e_vsi *vsi,
                        cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_MAC_VLAN_PORT);
                }
 
-       } else if (filter->dst_ipv4 ||
+       } else if ((filter->dst_ipv4 && filter->n_proto != ETH_P_IPV6) ||
                   !ipv6_addr_any(&filter->ip.v6.dst_ip6)) {
                cld_filter.element.flags =
                                cpu_to_le16(I40E_AQC_ADD_CLOUD_FILTER_IP_PORT);
@@ -8928,11 +8945,6 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
                dev_dbg(&pf->pdev->dev, "PFR requested\n");
                i40e_handle_reset_warning(pf, lock_acquired);
 
-               dev_info(&pf->pdev->dev,
-                        pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
-                        "FW LLDP is disabled\n" :
-                        "FW LLDP is enabled\n");
-
        } else if (reset_flags & I40E_PF_RESET_AND_REBUILD_FLAG) {
                /* Request a PF Reset
                 *
@@ -8940,6 +8952,10 @@ void i40e_do_reset(struct i40e_pf *pf, u32 reset_flags, bool lock_acquired)
                 */
                i40e_prep_for_reset(pf);
                i40e_reset_and_rebuild(pf, true, lock_acquired);
+               dev_info(&pf->pdev->dev,
+                        pf->flags & I40E_FLAG_DISABLE_FW_LLDP ?
+                        "FW LLDP is disabled\n" :
+                        "FW LLDP is enabled\n");
 
        } else if (reset_flags & BIT_ULL(__I40E_REINIT_REQUESTED)) {
                int v;
@@ -10462,7 +10478,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
        int old_recovery_mode_bit = test_bit(__I40E_RECOVERY_MODE, pf->state);
        struct i40e_vsi *vsi = pf->vsi[pf->lan_vsi];
        struct i40e_hw *hw = &pf->hw;
-       u8 set_fc_aq_fail = 0;
        i40e_status ret;
        u32 val;
        int v;
@@ -10605,13 +10620,6 @@ static void i40e_rebuild(struct i40e_pf *pf, bool reinit, bool lock_acquired)
                         i40e_stat_str(&pf->hw, ret),
                         i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
 
-       /* make sure our flow control settings are restored */
-       ret = i40e_set_fc(&pf->hw, &set_fc_aq_fail, true);
-       if (ret)
-               dev_dbg(&pf->pdev->dev, "setting flow control: ret = %s last_status = %s\n",
-                       i40e_stat_str(&pf->hw, ret),
-                       i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
-
        /* Rebuild the VSIs and VEBs that existed before reset.
         * They are still in our local switch element arrays, so only
         * need to rebuild the switch model in the HW.
@@ -12191,6 +12199,8 @@ i40e_status i40e_set_partition_bw_setting(struct i40e_pf *pf)
        struct i40e_aqc_configure_partition_bw_data bw_data;
        i40e_status status;
 
+       memset(&bw_data, 0, sizeof(bw_data));
+
        /* Set the valid bit for this PF */
        bw_data.pf_valid_bits = cpu_to_le16(BIT(pf->hw.pf_id));
        bw_data.max_bw[pf->hw.pf_id] = pf->max_bw & I40E_ALT_BW_VALUE_MASK;
@@ -15198,7 +15208,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        int err;
        u32 val;
        u32 i;
-       u8 set_fc_aq_fail;
 
        err = pci_enable_device_mem(pdev);
        if (err)
@@ -15537,24 +15546,6 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        }
        INIT_LIST_HEAD(&pf->vsi[pf->lan_vsi]->ch_list);
 
-       /* Make sure flow control is set according to current settings */
-       err = i40e_set_fc(hw, &set_fc_aq_fail, true);
-       if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_GET)
-               dev_dbg(&pf->pdev->dev,
-                       "Set fc with err %s aq_err %s on get_phy_cap\n",
-                       i40e_stat_str(hw, err),
-                       i40e_aq_str(hw, hw->aq.asq_last_status));
-       if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_SET)
-               dev_dbg(&pf->pdev->dev,
-                       "Set fc with err %s aq_err %s on set_phy_config\n",
-                       i40e_stat_str(hw, err),
-                       i40e_aq_str(hw, hw->aq.asq_last_status));
-       if (set_fc_aq_fail & I40E_SET_FC_AQ_FAIL_UPDATE)
-               dev_dbg(&pf->pdev->dev,
-                       "Set fc with err %s aq_err %s on get_link_info\n",
-                       i40e_stat_str(hw, err),
-                       i40e_aq_str(hw, hw->aq.asq_last_status));
-
        /* if FDIR VSI was set up, start it now */
        for (i = 0; i < pf->num_alloc_vsi; i++) {
                if (pf->vsi[i] && pf->vsi[i]->type == I40E_VSI_FDIR) {
@@ -15611,6 +15602,8 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                if (err) {
                        dev_info(&pdev->dev,
                                 "setup of misc vector failed: %d\n", err);
+                       i40e_cloud_filter_exit(pf);
+                       i40e_fdir_teardown(pf);
                        goto err_vsis;
                }
        }
index f6f1af9..5747a99 100644 (file)
@@ -1570,17 +1570,6 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 }
 
 /**
- * i40e_rx_offset - Return expected offset into page to access data
- * @rx_ring: Ring we are requesting offset of
- *
- * Returns the offset value for ring into the data buffer.
- */
-static unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
-{
-       return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
-}
-
-/**
  * i40e_setup_rx_descriptors - Allocate Rx descriptors
  * @rx_ring: Rx descriptor ring (for a specific queue) to setup
  *
@@ -1608,7 +1597,6 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
        rx_ring->next_to_alloc = 0;
        rx_ring->next_to_clean = 0;
        rx_ring->next_to_use = 0;
-       rx_ring->rx_offset = i40e_rx_offset(rx_ring);
 
        /* XDP RX-queue info only needed for RX rings exposed to XDP */
        if (rx_ring->vsi->type == I40E_VSI_MAIN) {
@@ -1948,7 +1936,7 @@ void i40e_process_skb_fields(struct i40e_ring *rx_ring,
        skb_record_rx_queue(skb, rx_ring->queue_index);
 
        if (qword & BIT(I40E_RX_DESC_STATUS_L2TAG1P_SHIFT)) {
-               u16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
+               __le16 vlan_tag = rx_desc->wb.qword0.lo_dword.l2tag1;
 
                __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
                                       le16_to_cpu(vlan_tag));
@@ -3223,13 +3211,16 @@ static int i40e_tx_enable_csum(struct sk_buff *skb, u32 *tx_flags,
 
                        l4_proto = ip.v4->protocol;
                } else if (*tx_flags & I40E_TX_FLAGS_IPV6) {
+                       int ret;
+
                        tunnel |= I40E_TX_CTX_EXT_IP_IPV6;
 
                        exthdr = ip.hdr + sizeof(*ip.v6);
                        l4_proto = ip.v6->nexthdr;
-                       if (l4.hdr != exthdr)
-                               ipv6_skip_exthdr(skb, exthdr - skb->data,
-                                                &l4_proto, &frag_off);
+                       ret = ipv6_skip_exthdr(skb, exthdr - skb->data,
+                                              &l4_proto, &frag_off);
+                       if (ret < 0)
+                               return -1;
                }
 
                /* define outer transport */
index 4f11f7b..fc32c50 100644 (file)
@@ -453,7 +453,7 @@ static void i40e_set_rs_bit(struct i40e_ring *xdp_ring)
        struct i40e_tx_desc *tx_desc;
 
        tx_desc = I40E_TX_DESC(xdp_ring, ntu);
-       tx_desc->cmd_type_offset_bsz |= (I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT);
+       tx_desc->cmd_type_offset_bsz |= cpu_to_le64(I40E_TX_DESC_CMD_RS << I40E_TXD_QW1_CMD_SHIFT);
 }
 
 /**
index 0a867d6..dc5b3c0 100644 (file)
@@ -1776,7 +1776,8 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter)
                goto err_alloc;
        }
 
-       if (iavf_process_config(adapter))
+       err = iavf_process_config(adapter);
+       if (err)
                goto err_alloc;
        adapter->current_op = VIRTCHNL_OP_UNKNOWN;
 
index dae8280..3577064 100644 (file)
@@ -454,9 +454,7 @@ struct ice_pf {
        struct ice_hw_port_stats stats_prev;
        struct ice_hw hw;
        u8 stat_prev_loaded:1; /* has previous stats been loaded */
-#ifdef CONFIG_DCB
        u16 dcbx_cap;
-#endif /* CONFIG_DCB */
        u32 tx_timeout_count;
        unsigned long tx_timeout_last_recovery;
        u32 tx_timeout_recovery_level;
index 3124a3b..1148d76 100644 (file)
@@ -275,6 +275,22 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
 }
 
 /**
+ * ice_rx_offset - Return expected offset into page to access data
+ * @rx_ring: Ring we are requesting offset of
+ *
+ * Returns the offset value for ring into the data buffer.
+ */
+static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
+{
+       if (ice_ring_uses_build_skb(rx_ring))
+               return ICE_SKB_PAD;
+       else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
+               return XDP_PACKET_HEADROOM;
+
+       return 0;
+}
+
+/**
  * ice_setup_rx_ctx - Configure a receive ring context
  * @ring: The Rx ring to configure
  *
@@ -413,11 +429,15 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
        else
                ice_set_ring_build_skb_ena(ring);
 
+       ring->rx_offset = ice_rx_offset(ring);
+
        /* init queue specific tail register */
        ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
        writel(0, ring->tail);
 
        if (ring->xsk_pool) {
+               bool ok;
+
                if (!xsk_buff_can_alloc(ring->xsk_pool, num_bufs)) {
                        dev_warn(dev, "XSK buffer pool does not provide enough addresses to fill %d buffers on Rx ring %d\n",
                                 num_bufs, ring->q_index);
@@ -426,8 +446,8 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
                        return 0;
                }
 
-               err = ice_alloc_rx_bufs_zc(ring, num_bufs);
-               if (err)
+               ok = ice_alloc_rx_bufs_zc(ring, num_bufs);
+               if (!ok)
                        dev_info(dev, "Failed to allocate some buffers on XSK buffer pool enabled Rx ring %d (pf_q %d)\n",
                                 ring->q_index, pf_q);
                return 0;
index fcfefad..468a63f 100644 (file)
@@ -134,7 +134,7 @@ ice_dcbnl_getnumtcs(struct net_device *dev, int __always_unused tcid, u8 *num)
        if (!test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags))
                return -EINVAL;
 
-       *num = IEEE_8021QAZ_MAX_TCS;
+       *num = pf->hw.func_caps.common_cap.maxtc;
        return 0;
 }
 
@@ -159,6 +159,10 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode)
        struct ice_pf *pf = ice_netdev_to_pf(netdev);
        struct ice_qos_cfg *qos_cfg;
 
+       /* if FW LLDP agent is running, DCBNL not allowed to change mode */
+       if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+               return ICE_DCB_NO_HW_CHG;
+
        /* No support for LLD_MANAGED modes or CEE+IEEE */
        if ((mode & DCB_CAP_DCBX_LLD_MANAGED) ||
            ((mode & DCB_CAP_DCBX_VER_IEEE) && (mode & DCB_CAP_DCBX_VER_CEE)) ||
index 5636c9b..2dcfa68 100644 (file)
@@ -8,6 +8,7 @@
 #include "ice_fltr.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
+#include <net/dcbnl.h>
 
 struct ice_stats {
        char stat_string[ETH_GSTRING_LEN];
@@ -1238,6 +1239,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
                        status = ice_init_pf_dcb(pf, true);
                        if (status)
                                dev_warn(dev, "Fail to init DCB\n");
+
+                       pf->dcbx_cap &= ~DCB_CAP_DCBX_LLD_MANAGED;
+                       pf->dcbx_cap |= DCB_CAP_DCBX_HOST;
                } else {
                        enum ice_status status;
                        bool dcbx_agent_status;
@@ -1280,6 +1284,9 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
                        if (status)
                                dev_dbg(dev, "Fail to enable MIB change events\n");
 
+                       pf->dcbx_cap &= ~DCB_CAP_DCBX_HOST;
+                       pf->dcbx_cap |= DCB_CAP_DCBX_LLD_MANAGED;
+
                        ice_nway_reset(netdev);
                }
        }
@@ -3322,6 +3329,18 @@ ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
 }
 
 /**
+ * ice_get_valid_rss_size - return valid number of RSS queues
+ * @hw: pointer to the HW structure
+ * @new_size: requested RSS queues
+ */
+static int ice_get_valid_rss_size(struct ice_hw *hw, int new_size)
+{
+       struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
+
+       return min_t(int, new_size, BIT(caps->rss_table_entry_width));
+}
+
+/**
  * ice_vsi_set_dflt_rss_lut - set default RSS LUT with requested RSS size
  * @vsi: VSI to reconfigure RSS LUT on
  * @req_rss_size: requested range of queue numbers for hashing
@@ -3348,14 +3367,10 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
                return -ENOMEM;
 
        /* set RSS LUT parameters */
-       if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+       if (!test_bit(ICE_FLAG_RSS_ENA, pf->flags))
                vsi->rss_size = 1;
-       } else {
-               struct ice_hw_common_caps *caps = &hw->func_caps.common_cap;
-
-               vsi->rss_size = min_t(int, req_rss_size,
-                                     BIT(caps->rss_table_entry_width));
-       }
+       else
+               vsi->rss_size = ice_get_valid_rss_size(hw, req_rss_size);
 
        /* create/set RSS LUT */
        ice_fill_rss_lut(lut, vsi->rss_table_size, vsi->rss_size);
@@ -3434,9 +3449,12 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
 
        ice_vsi_recfg_qs(vsi, new_rx, new_tx);
 
-       if (new_rx && !netif_is_rxfh_configured(dev))
+       if (!netif_is_rxfh_configured(dev))
                return ice_vsi_set_dflt_rss_lut(vsi, new_rx);
 
+       /* Update rss_size due to change in Rx queues */
+       vsi->rss_size = ice_get_valid_rss_size(&pf->hw, new_rx);
+
        return 0;
 }
 
index b7dc25d..b91dcfd 100644 (file)
@@ -444,22 +444,6 @@ void ice_free_rx_ring(struct ice_ring *rx_ring)
 }
 
 /**
- * ice_rx_offset - Return expected offset into page to access data
- * @rx_ring: Ring we are requesting offset of
- *
- * Returns the offset value for ring into the data buffer.
- */
-static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
-{
-       if (ice_ring_uses_build_skb(rx_ring))
-               return ICE_SKB_PAD;
-       else if (ice_is_xdp_ena_vsi(rx_ring->vsi))
-               return XDP_PACKET_HEADROOM;
-
-       return 0;
-}
-
-/**
  * ice_setup_rx_ring - Allocate the Rx descriptors
  * @rx_ring: the Rx ring to set up
  *
@@ -493,7 +477,6 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring)
 
        rx_ring->next_to_use = 0;
        rx_ring->next_to_clean = 0;
-       rx_ring->rx_offset = ice_rx_offset(rx_ring);
 
        if (ice_is_xdp_ena_vsi(rx_ring->vsi))
                WRITE_ONCE(rx_ring->xdp_prog, rx_ring->vsi->xdp_prog);
index bf5fd81..1f38a8d 100644 (file)
@@ -1919,6 +1919,29 @@ static int ice_vc_get_ver_msg(struct ice_vf *vf, u8 *msg)
 }
 
 /**
+ * ice_vc_get_max_frame_size - get max frame size allowed for VF
+ * @vf: VF used to determine max frame size
+ *
+ * Max frame size is determined based on the current port's max frame size and
+ * whether a port VLAN is configured on this VF. The VF is not aware whether
+ * it's in a port VLAN so the PF needs to account for this in max frame size
+ * checks and sending the max frame size to the VF.
+ */
+static u16 ice_vc_get_max_frame_size(struct ice_vf *vf)
+{
+       struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+       struct ice_port_info *pi = vsi->port_info;
+       u16 max_frame_size;
+
+       max_frame_size = pi->phy.link_info.max_frame_size;
+
+       if (vf->port_vlan_info)
+               max_frame_size -= VLAN_HLEN;
+
+       return max_frame_size;
+}
+
+/**
  * ice_vc_get_vf_res_msg
  * @vf: pointer to the VF info
  * @msg: pointer to the msg buffer
@@ -2000,6 +2023,7 @@ static int ice_vc_get_vf_res_msg(struct ice_vf *vf, u8 *msg)
        vfres->max_vectors = pf->num_msix_per_vf;
        vfres->rss_key_size = ICE_VSIQF_HKEY_ARRAY_SIZE;
        vfres->rss_lut_size = ICE_VSIQF_HLUT_ARRAY_SIZE;
+       vfres->max_mtu = ice_vc_get_max_frame_size(vf);
 
        vfres->vsi_res[0].vsi_id = vf->lan_vsi_num;
        vfres->vsi_res[0].vsi_type = VIRTCHNL_VSI_SRIOV;
@@ -2420,7 +2444,7 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
        }
 
        if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
-               bool set_dflt_vsi = !!(info->flags & FLAG_VF_UNICAST_PROMISC);
+               bool set_dflt_vsi = alluni || allmulti;
 
                if (set_dflt_vsi && !ice_is_dflt_vsi_in_use(pf->first_sw))
                        /* only attempt to set the default forwarding VSI if
@@ -2998,6 +3022,8 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
 
                /* copy Rx queue info from VF into VSI */
                if (qpi->rxq.ring_len > 0) {
+                       u16 max_frame_size = ice_vc_get_max_frame_size(vf);
+
                        num_rxq++;
                        vsi->rx_rings[i]->dma = qpi->rxq.dma_ring_addr;
                        vsi->rx_rings[i]->count = qpi->rxq.ring_len;
@@ -3010,7 +3036,7 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
                        }
                        vsi->rx_buf_len = qpi->rxq.databuffer_size;
                        vsi->rx_rings[i]->rx_buf_len = vsi->rx_buf_len;
-                       if (qpi->rxq.max_pkt_size >= (16 * 1024) ||
+                       if (qpi->rxq.max_pkt_size > max_frame_size ||
                            qpi->rxq.max_pkt_size < 64) {
                                v_ret = VIRTCHNL_STATUS_ERR_PARAM;
                                goto error_param;
@@ -3018,6 +3044,11 @@ static int ice_vc_cfg_qs_msg(struct ice_vf *vf, u8 *msg)
                }
 
                vsi->max_frame = qpi->rxq.max_pkt_size;
+               /* add space for the port VLAN since the VF driver is not
+                * expected to account for it in the MTU calculation
+                */
+               if (vf->port_vlan_info)
+                       vsi->max_frame += VLAN_HLEN;
        }
 
        /* VF can request to configure less than allocated queues or default
index 83f3c95..9f94d91 100644 (file)
@@ -358,18 +358,18 @@ xsk_pool_if_up:
  * This function allocates a number of Rx buffers from the fill ring
  * or the internal recycle mechanism and places them on the Rx ring.
  *
- * Returns false if all allocations were successful, true if any fail.
+ * Returns true if all allocations were successful, false if any fail.
  */
 bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
 {
        union ice_32b_rx_flex_desc *rx_desc;
        u16 ntu = rx_ring->next_to_use;
        struct ice_rx_buf *rx_buf;
-       bool ret = false;
+       bool ok = true;
        dma_addr_t dma;
 
        if (!count)
-               return false;
+               return true;
 
        rx_desc = ICE_RX_DESC(rx_ring, ntu);
        rx_buf = &rx_ring->rx_buf[ntu];
@@ -377,7 +377,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
        do {
                rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool);
                if (!rx_buf->xdp) {
-                       ret = true;
+                       ok = false;
                        break;
                }
 
@@ -402,7 +402,7 @@ bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
                ice_release_rx_desc(rx_ring, ntu);
        }
 
-       return ret;
+       return ok;
 }
 
 /**
index 5d87957..44111f6 100644 (file)
@@ -1,8 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright(c) 2007 - 2018 Intel Corporation. */
 
-#ifndef _E1000_HW_H_
-#define _E1000_HW_H_
+#ifndef _E1000_IGB_HW_H_
+#define _E1000_IGB_HW_H_
 
 #include <linux/types.h>
 #include <linux/delay.h>
@@ -551,4 +551,4 @@ s32 igb_write_pcie_cap_reg(struct e1000_hw *hw, u32 reg, u16 *value);
 
 void igb_read_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
 void igb_write_pci_cfg(struct e1000_hw *hw, u32 reg, u16 *value);
-#endif /* _E1000_HW_H_ */
+#endif /* _E1000_IGB_HW_H_ */
index aaa954a..7bda8c5 100644 (file)
@@ -748,8 +748,8 @@ void igb_ptp_suspend(struct igb_adapter *adapter);
 void igb_ptp_rx_hang(struct igb_adapter *adapter);
 void igb_ptp_tx_hang(struct igb_adapter *adapter);
 void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb);
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
-                        struct sk_buff *skb);
+int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+                       struct sk_buff *skb);
 int igb_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igb_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
 void igb_set_flag_queue_pairs(struct igb_adapter *, const u32);
index 878b31d..a45cd2b 100644 (file)
@@ -8214,7 +8214,8 @@ static void igb_reuse_rx_page(struct igb_ring *rx_ring,
        new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
 }
 
-static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
+static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer,
+                                 int rx_buf_pgcnt)
 {
        unsigned int pagecnt_bias = rx_buffer->pagecnt_bias;
        struct page *page = rx_buffer->page;
@@ -8225,7 +8226,7 @@ static bool igb_can_reuse_rx_page(struct igb_rx_buffer *rx_buffer)
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely((page_ref_count(page) - pagecnt_bias) > 1))
+       if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1))
                return false;
 #else
 #define IGB_LAST_OFFSET \
@@ -8301,9 +8302,10 @@ static struct sk_buff *igb_construct_skb(struct igb_ring *rx_ring,
                return NULL;
 
        if (unlikely(igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP))) {
-               igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb);
-               xdp->data += IGB_TS_HDR_LEN;
-               size -= IGB_TS_HDR_LEN;
+               if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, xdp->data, skb)) {
+                       xdp->data += IGB_TS_HDR_LEN;
+                       size -= IGB_TS_HDR_LEN;
+               }
        }
 
        /* Determine available headroom for copy */
@@ -8364,8 +8366,8 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring,
 
        /* pull timestamp out of packet data */
        if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) {
-               igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb);
-               __skb_pull(skb, IGB_TS_HDR_LEN);
+               if (!igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb))
+                       __skb_pull(skb, IGB_TS_HDR_LEN);
        }
 
        /* update buffer offset */
@@ -8614,11 +8616,17 @@ static unsigned int igb_rx_offset(struct igb_ring *rx_ring)
 }
 
 static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
-                                              const unsigned int size)
+                                              const unsigned int size, int *rx_buf_pgcnt)
 {
        struct igb_rx_buffer *rx_buffer;
 
        rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
+       *rx_buf_pgcnt =
+#if (PAGE_SIZE < 8192)
+               page_count(rx_buffer->page);
+#else
+               0;
+#endif
        prefetchw(rx_buffer->page);
 
        /* we are reusing so sync this buffer for CPU use */
@@ -8634,9 +8642,9 @@ static struct igb_rx_buffer *igb_get_rx_buffer(struct igb_ring *rx_ring,
 }
 
 static void igb_put_rx_buffer(struct igb_ring *rx_ring,
-                             struct igb_rx_buffer *rx_buffer)
+                             struct igb_rx_buffer *rx_buffer, int rx_buf_pgcnt)
 {
-       if (igb_can_reuse_rx_page(rx_buffer)) {
+       if (igb_can_reuse_rx_page(rx_buffer, rx_buf_pgcnt)) {
                /* hand second half of page back to the ring */
                igb_reuse_rx_page(rx_ring, rx_buffer);
        } else {
@@ -8664,6 +8672,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
        unsigned int xdp_xmit = 0;
        struct xdp_buff xdp;
        u32 frame_sz = 0;
+       int rx_buf_pgcnt;
 
        /* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
 #if (PAGE_SIZE < 8192)
@@ -8693,7 +8702,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                 */
                dma_rmb();
 
-               rx_buffer = igb_get_rx_buffer(rx_ring, size);
+               rx_buffer = igb_get_rx_buffer(rx_ring, size, &rx_buf_pgcnt);
 
                /* retrieve a buffer from the ring */
                if (!skb) {
@@ -8736,7 +8745,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
                        break;
                }
 
-               igb_put_rx_buffer(rx_ring, rx_buffer);
+               igb_put_rx_buffer(rx_ring, rx_buffer, rx_buf_pgcnt);
                cleaned_count++;
 
                /* fetch next buffer in frame if non-eop */
index 7cc5428..86a5762 100644 (file)
@@ -856,6 +856,9 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
        dev_kfree_skb_any(skb);
 }
 
+#define IGB_RET_PTP_DISABLED 1
+#define IGB_RET_PTP_INVALID 2
+
 /**
  * igb_ptp_rx_pktstamp - retrieve Rx per packet timestamp
  * @q_vector: Pointer to interrupt specific structure
@@ -864,19 +867,29 @@ static void igb_ptp_tx_hwtstamp(struct igb_adapter *adapter)
  *
  * This function is meant to retrieve a timestamp from the first buffer of an
  * incoming frame.  The value is stored in little endian format starting on
- * byte 8.
+ * byte 8
+ *
+ * Returns: 0 if success, nonzero if failure
  **/
-void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
-                        struct sk_buff *skb)
+int igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
+                       struct sk_buff *skb)
 {
-       __le64 *regval = (__le64 *)va;
        struct igb_adapter *adapter = q_vector->adapter;
+       __le64 *regval = (__le64 *)va;
        int adjust = 0;
 
+       if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
+               return IGB_RET_PTP_DISABLED;
+
        /* The timestamp is recorded in little endian format.
         * DWORD: 0        1        2        3
         * Field: Reserved Reserved SYSTIML  SYSTIMH
         */
+
+       /* check reserved dwords are zero, be/le doesn't matter for zero */
+       if (regval[0])
+               return IGB_RET_PTP_INVALID;
+
        igb_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
                                   le64_to_cpu(regval[1]));
 
@@ -896,6 +909,8 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
        }
        skb_hwtstamps(skb)->hwtstamp =
                ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
+
+       return 0;
 }
 
 /**
@@ -906,13 +921,15 @@ void igb_ptp_rx_pktstamp(struct igb_q_vector *q_vector, void *va,
  * This function is meant to retrieve a timestamp from the internal registers
  * of the adapter and store it in the skb.
  **/
-void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector,
-                        struct sk_buff *skb)
+void igb_ptp_rx_rgtstamp(struct igb_q_vector *q_vector, struct sk_buff *skb)
 {
        struct igb_adapter *adapter = q_vector->adapter;
        struct e1000_hw *hw = &adapter->hw;
-       u64 regval;
        int adjust = 0;
+       u64 regval;
+
+       if (!(adapter->ptp_flags & IGB_PTP_ENABLED))
+               return;
 
        /* If this bit is set, then the RX registers contain the time stamp. No
         * other packet will be time stamped until we read these registers, so
index 5d2809d..1b08a7d 100644 (file)
@@ -547,7 +547,7 @@ void igc_ptp_init(struct igc_adapter *adapter);
 void igc_ptp_reset(struct igc_adapter *adapter);
 void igc_ptp_suspend(struct igc_adapter *adapter);
 void igc_ptp_stop(struct igc_adapter *adapter);
-void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
+void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va,
                         struct sk_buff *skb);
 int igc_ptp_set_ts_config(struct net_device *netdev, struct ifreq *ifr);
 int igc_ptp_get_ts_config(struct net_device *netdev, struct ifreq *ifr);
index 824a6c4..8722294 100644 (file)
@@ -1711,6 +1711,9 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
                                                     Autoneg);
        }
 
+       /* Set pause flow control settings */
+       ethtool_link_ksettings_add_link_mode(cmd, supported, Pause);
+
        switch (hw->fc.requested_mode) {
        case igc_fc_full:
                ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
@@ -1725,9 +1728,7 @@ static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
                                                     Asym_Pause);
                break;
        default:
-               ethtool_link_ksettings_add_link_mode(cmd, advertising, Pause);
-               ethtool_link_ksettings_add_link_mode(cmd, advertising,
-                                                    Asym_Pause);
+               break;
        }
 
        status = pm_runtime_suspended(&adapter->pdev->dev) ?
index 7ac9597..4d989eb 100644 (file)
@@ -3831,10 +3831,19 @@ static void igc_reset_task(struct work_struct *work)
 
        adapter = container_of(work, struct igc_adapter, reset_task);
 
+       rtnl_lock();
+       /* If we're already down or resetting, just bail */
+       if (test_bit(__IGC_DOWN, &adapter->state) ||
+           test_bit(__IGC_RESETTING, &adapter->state)) {
+               rtnl_unlock();
+               return;
+       }
+
        igc_rings_dump(adapter);
        igc_regs_dump(adapter);
        netdev_err(adapter->netdev, "Reset adapter\n");
        igc_reinit_locked(adapter);
+       rtnl_unlock();
 }
 
 /**
index ac0b9c8..545f4d0 100644 (file)
@@ -152,46 +152,54 @@ static void igc_ptp_systim_to_hwtstamp(struct igc_adapter *adapter,
 }
 
 /**
- * igc_ptp_rx_pktstamp - retrieve Rx per packet timestamp
+ * igc_ptp_rx_pktstamp - Retrieve timestamp from Rx packet buffer
  * @q_vector: Pointer to interrupt specific structure
  * @va: Pointer to address containing Rx buffer
  * @skb: Buffer containing timestamp and packet
  *
- * This function is meant to retrieve the first timestamp from the
- * first buffer of an incoming frame. The value is stored in little
- * endian format starting on byte 0. There's a second timestamp
- * starting on byte 8.
- **/
-void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, void *va,
+ * This function retrieves the timestamp saved in the beginning of packet
+ * buffer. While two timestamps are available, one in timer0 reference and the
+ * other in timer1 reference, this function considers only the timestamp in
+ * timer0 reference.
+ */
+void igc_ptp_rx_pktstamp(struct igc_q_vector *q_vector, __le32 *va,
                         struct sk_buff *skb)
 {
        struct igc_adapter *adapter = q_vector->adapter;
-       __le64 *regval = (__le64 *)va;
-       int adjust = 0;
-
-       /* The timestamp is recorded in little endian format.
-        * DWORD: | 0          | 1           | 2          | 3
-        * Field: | Timer0 Low | Timer0 High | Timer1 Low | Timer1 High
+       u64 regval;
+       int adjust;
+
+       /* Timestamps are saved in little endian at the beginning of the packet
+        * buffer following the layout:
+        *
+        * DWORD: | 0              | 1              | 2              | 3              |
+        * Field: | Timer1 SYSTIML | Timer1 SYSTIMH | Timer0 SYSTIML | Timer0 SYSTIMH |
+        *
+        * SYSTIML holds the nanoseconds part while SYSTIMH holds the seconds
+        * part of the timestamp.
         */
-       igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb),
-                                  le64_to_cpu(regval[0]));
-
-       /* adjust timestamp for the RX latency based on link speed */
-       if (adapter->hw.mac.type == igc_i225) {
-               switch (adapter->link_speed) {
-               case SPEED_10:
-                       adjust = IGC_I225_RX_LATENCY_10;
-                       break;
-               case SPEED_100:
-                       adjust = IGC_I225_RX_LATENCY_100;
-                       break;
-               case SPEED_1000:
-                       adjust = IGC_I225_RX_LATENCY_1000;
-                       break;
-               case SPEED_2500:
-                       adjust = IGC_I225_RX_LATENCY_2500;
-                       break;
-               }
+       regval = le32_to_cpu(va[2]);
+       regval |= (u64)le32_to_cpu(va[3]) << 32;
+       igc_ptp_systim_to_hwtstamp(adapter, skb_hwtstamps(skb), regval);
+
+       /* Adjust timestamp for the RX latency based on link speed */
+       switch (adapter->link_speed) {
+       case SPEED_10:
+               adjust = IGC_I225_RX_LATENCY_10;
+               break;
+       case SPEED_100:
+               adjust = IGC_I225_RX_LATENCY_100;
+               break;
+       case SPEED_1000:
+               adjust = IGC_I225_RX_LATENCY_1000;
+               break;
+       case SPEED_2500:
+               adjust = IGC_I225_RX_LATENCY_2500;
+               break;
+       default:
+               adjust = 0;
+               netdev_warn_once(adapter->netdev, "Imprecise timestamp\n");
+               break;
        }
        skb_hwtstamps(skb)->hwtstamp =
                ktime_sub_ns(skb_hwtstamps(skb)->hwtstamp, adjust);
index eca7352..54d4726 100644 (file)
@@ -575,6 +575,11 @@ static int ixgbe_ipsec_add_sa(struct xfrm_state *xs)
                return -EINVAL;
        }
 
+       if (xs->props.mode != XFRM_MODE_TRANSPORT) {
+               netdev_err(dev, "Unsupported mode for ipsec offload\n");
+               return -EINVAL;
+       }
+
        if (ixgbe_ipsec_check_mgmt_ip(xs)) {
                netdev_err(dev, "IPsec IP addr clash with mgmt filters\n");
                return -EINVAL;
index fae8420..03d9aad 100644 (file)
@@ -4118,6 +4118,8 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 #endif
        }
 
+       ring->rx_offset = ixgbe_rx_offset(ring);
+
        if (ring->xsk_pool && hw->mac.type != ixgbe_mac_82599EB) {
                u32 xsk_buf_len = xsk_pool_get_rx_frame_size(ring->xsk_pool);
 
@@ -6578,7 +6580,6 @@ int ixgbe_setup_rx_resources(struct ixgbe_adapter *adapter,
 
        rx_ring->next_to_clean = 0;
        rx_ring->next_to_use = 0;
-       rx_ring->rx_offset = ixgbe_rx_offset(rx_ring);
 
        /* XDP RX-queue info */
        if (xdp_rxq_info_reg(&rx_ring->xdp_rxq, adapter->netdev,
@@ -9565,8 +9566,10 @@ static int ixgbe_configure_clsu32(struct ixgbe_adapter *adapter,
        ixgbe_atr_compute_perfect_hash_82599(&input->filter, mask);
        err = ixgbe_fdir_write_perfect_filter_82599(hw, &input->filter,
                                                    input->sw_idx, queue);
-       if (!err)
-               ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
+       if (err)
+               goto err_out_w_lock;
+
+       ixgbe_update_ethtool_fdir_entry(adapter, input, input->sw_idx);
        spin_unlock(&adapter->fdir_perfect_lock);
 
        if ((uhtid != 0x800) && (adapter->jump_tables[uhtid]))
index 5170dd9..caaea2c 100644 (file)
@@ -272,6 +272,11 @@ static int ixgbevf_ipsec_add_sa(struct xfrm_state *xs)
                return -EINVAL;
        }
 
+       if (xs->props.mode != XFRM_MODE_TRANSPORT) {
+               netdev_err(dev, "Unsupported mode for ipsec offload\n");
+               return -EINVAL;
+       }
+
        if (xs->xso.flags & XFRM_OFFLOAD_INBOUND) {
                struct rx_sa rsa;
 
index 7fe15a3..fe0989c 100644 (file)
@@ -6,7 +6,7 @@
 config NET_VENDOR_MARVELL
        bool "Marvell devices"
        default y
-       depends on PCI || CPU_PXA168 || MV64X60 || PPC32 || PLAT_ORION || INET || COMPILE_TEST
+       depends on PCI || CPU_PXA168 || PPC32 || PLAT_ORION || INET || COMPILE_TEST
        help
          If you have a network (Ethernet) card belonging to this class, say Y.
 
@@ -19,7 +19,7 @@ if NET_VENDOR_MARVELL
 
 config MV643XX_ETH
        tristate "Marvell Discovery (643XX) and Orion ethernet support"
-       depends on MV64X60 || PPC32 || PLAT_ORION || COMPILE_TEST
+       depends on PPC32 || PLAT_ORION || COMPILE_TEST
        depends on INET
        select PHYLIB
        select MVMDIO
index 90e6111..3bfb659 100644 (file)
@@ -2684,7 +2684,7 @@ static const struct of_device_id mv643xx_eth_shared_ids[] = {
 MODULE_DEVICE_TABLE(of, mv643xx_eth_shared_ids);
 #endif
 
-#if defined(CONFIG_OF_IRQ) && !defined(CONFIG_MV64X60)
+#ifdef CONFIG_OF_IRQ
 #define mv643xx_eth_property(_np, _name, _v)                           \
        do {                                                            \
                u32 tmp;                                                \
index 0507369..1767c60 100644 (file)
@@ -4699,9 +4699,10 @@ static void mvpp2_irqs_deinit(struct mvpp2_port *port)
        }
 }
 
-static bool mvpp22_rss_is_supported(void)
+static bool mvpp22_rss_is_supported(struct mvpp2_port *port)
 {
-       return queue_mode == MVPP2_QDIST_MULTI_MODE;
+       return (queue_mode == MVPP2_QDIST_MULTI_MODE) &&
+               !(port->flags & MVPP2_F_LOOPBACK);
 }
 
 static int mvpp2_open(struct net_device *dev)
@@ -5513,7 +5514,7 @@ static int mvpp2_ethtool_get_rxnfc(struct net_device *dev,
        struct mvpp2_port *port = netdev_priv(dev);
        int ret = 0, i, loc = 0;
 
-       if (!mvpp22_rss_is_supported())
+       if (!mvpp22_rss_is_supported(port))
                return -EOPNOTSUPP;
 
        switch (info->cmd) {
@@ -5548,7 +5549,7 @@ static int mvpp2_ethtool_set_rxnfc(struct net_device *dev,
        struct mvpp2_port *port = netdev_priv(dev);
        int ret = 0;
 
-       if (!mvpp22_rss_is_supported())
+       if (!mvpp22_rss_is_supported(port))
                return -EOPNOTSUPP;
 
        switch (info->cmd) {
@@ -5569,7 +5570,9 @@ static int mvpp2_ethtool_set_rxnfc(struct net_device *dev,
 
 static u32 mvpp2_ethtool_get_rxfh_indir_size(struct net_device *dev)
 {
-       return mvpp22_rss_is_supported() ? MVPP22_RSS_TABLE_ENTRIES : 0;
+       struct mvpp2_port *port = netdev_priv(dev);
+
+       return mvpp22_rss_is_supported(port) ? MVPP22_RSS_TABLE_ENTRIES : 0;
 }
 
 static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
@@ -5578,7 +5581,7 @@ static int mvpp2_ethtool_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
        struct mvpp2_port *port = netdev_priv(dev);
        int ret = 0;
 
-       if (!mvpp22_rss_is_supported())
+       if (!mvpp22_rss_is_supported(port))
                return -EOPNOTSUPP;
 
        if (indir)
@@ -5596,7 +5599,7 @@ static int mvpp2_ethtool_set_rxfh(struct net_device *dev, const u32 *indir,
        struct mvpp2_port *port = netdev_priv(dev);
        int ret = 0;
 
-       if (!mvpp22_rss_is_supported())
+       if (!mvpp22_rss_is_supported(port))
                return -EOPNOTSUPP;
 
        if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
@@ -5617,7 +5620,7 @@ static int mvpp2_ethtool_get_rxfh_context(struct net_device *dev, u32 *indir,
        struct mvpp2_port *port = netdev_priv(dev);
        int ret = 0;
 
-       if (!mvpp22_rss_is_supported())
+       if (!mvpp22_rss_is_supported(port))
                return -EOPNOTSUPP;
        if (rss_context >= MVPP22_N_RSS_TABLES)
                return -EINVAL;
@@ -5639,7 +5642,7 @@ static int mvpp2_ethtool_set_rxfh_context(struct net_device *dev,
        struct mvpp2_port *port = netdev_priv(dev);
        int ret;
 
-       if (!mvpp22_rss_is_supported())
+       if (!mvpp22_rss_is_supported(port))
                return -EOPNOTSUPP;
 
        if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_CRC32)
@@ -5956,7 +5959,7 @@ static int mvpp2_port_init(struct mvpp2_port *port)
        mvpp2_cls_oversize_rxq_set(port);
        mvpp2_cls_port_config(port);
 
-       if (mvpp22_rss_is_supported())
+       if (mvpp22_rss_is_supported(port))
                mvpp22_port_rss_init(port);
 
        /* Provide an initial Rx packet size */
@@ -6861,7 +6864,7 @@ static int mvpp2_port_probe(struct platform_device *pdev,
        dev->hw_features |= features | NETIF_F_RXCSUM | NETIF_F_GRO |
                            NETIF_F_HW_VLAN_CTAG_FILTER;
 
-       if (mvpp22_rss_is_supported()) {
+       if (mvpp22_rss_is_supported(port)) {
                dev->hw_features |= NETIF_F_RXHASH;
                dev->features |= NETIF_F_NTUPLE;
        }
index 9caa375..68deae5 100644 (file)
@@ -56,7 +56,9 @@ static bool is_dev_rpm(void *cgxd)
 
 bool is_lmac_valid(struct cgx *cgx, int lmac_id)
 {
-       return cgx && test_bit(lmac_id, &cgx->lmac_bmap);
+       if (!cgx || lmac_id < 0 || lmac_id >= MAX_LMAC_PER_CGX)
+               return false;
+       return test_bit(lmac_id, &cgx->lmac_bmap);
 }
 
 struct mac_ops *get_mac_ops(void *cgxd)
index b192692..5c372d2 100644 (file)
@@ -13499,8 +13499,6 @@ static struct npc_mcam_kex npc_mkex_default = {
                        [NPC_LT_LC_IP] = {
                                /* SIP+DIP: 8 bytes, KW2[63:0] */
                                KEX_LD_CFG(0x07, 0xc, 0x1, 0x0, 0x10),
-                               /* TOS: 1 byte, KW1[63:56] */
-                               KEX_LD_CFG(0x0, 0x1, 0x1, 0x0, 0xf),
                        },
                        /* Layer C: IPv6 */
                        [NPC_LT_LC_IP6] = {
index d9a1a71..ab24a5e 100644 (file)
@@ -2462,8 +2462,10 @@ static void rvu_unregister_interrupts(struct rvu *rvu)
                    INTR_MASK(rvu->hw->total_pfs) & ~1ULL);
 
        for (irq = 0; irq < rvu->num_vec; irq++) {
-               if (rvu->irq_allocated[irq])
+               if (rvu->irq_allocated[irq]) {
                        free_irq(pci_irq_vector(rvu->pdev, irq), rvu);
+                       rvu->irq_allocated[irq] = false;
+               }
        }
 
        pci_free_irq_vectors(rvu->pdev);
@@ -2975,8 +2977,8 @@ static void rvu_remove(struct pci_dev *pdev)
        struct rvu *rvu = pci_get_drvdata(pdev);
 
        rvu_dbg_exit(rvu);
-       rvu_unregister_interrupts(rvu);
        rvu_unregister_dl(rvu);
+       rvu_unregister_interrupts(rvu);
        rvu_flr_wq_destroy(rvu);
        rvu_cgx_exit(rvu);
        rvu_fwdata_exit(rvu);
index fa6e46e..76f3992 100644 (file)
@@ -678,6 +678,7 @@ void npc_read_mcam_entry(struct rvu *rvu, struct npc_mcam *mcam,
                         u8 *intf, u8 *ena);
 bool is_mac_feature_supported(struct rvu *rvu, int pf, int feature);
 u32  rvu_cgx_get_fifolen(struct rvu *rvu);
+void *rvu_first_cgx_pdata(struct rvu *rvu);
 
 /* CPT APIs */
 int rvu_cpt_lf_teardown(struct rvu *rvu, u16 pcifunc, int lf, int slot);
index e668e48..6e2bf4f 100644 (file)
@@ -89,6 +89,21 @@ void *rvu_cgx_pdata(u8 cgx_id, struct rvu *rvu)
        return rvu->cgx_idmap[cgx_id];
 }
 
+/* Return first enabled CGX instance if none are enabled then return NULL */
+void *rvu_first_cgx_pdata(struct rvu *rvu)
+{
+       int first_enabled_cgx = 0;
+       void *cgxd = NULL;
+
+       for (; first_enabled_cgx < rvu->cgx_cnt_max; first_enabled_cgx++) {
+               cgxd = rvu_cgx_pdata(first_enabled_cgx, rvu);
+               if (cgxd)
+                       break;
+       }
+
+       return cgxd;
+}
+
 /* Based on P2X connectivity find mapped NIX block for a PF */
 static void rvu_map_cgx_nix_block(struct rvu *rvu, int pf,
                                  int cgx_id, int lmac_id)
@@ -711,10 +726,9 @@ int rvu_mbox_handler_cgx_features_get(struct rvu *rvu,
 u32 rvu_cgx_get_fifolen(struct rvu *rvu)
 {
        struct mac_ops *mac_ops;
-       int rvu_def_cgx_id = 0;
        u32 fifo_len;
 
-       mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu));
+       mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu));
        fifo_len = mac_ops ? mac_ops->fifo_len : 0;
 
        return fifo_len;
index 094124b..de3968d 100644 (file)
@@ -234,12 +234,14 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
                                          char __user *buffer,
                                          size_t count, loff_t *ppos)
 {
-       int index, off = 0, flag = 0, go_back = 0, off_prev;
+       int index, off = 0, flag = 0, go_back = 0, len = 0;
        struct rvu *rvu = filp->private_data;
        int lf, pf, vf, pcifunc;
        struct rvu_block block;
        int bytes_not_copied;
+       int lf_str_size = 12;
        int buf_size = 2048;
+       char *lfs;
        char *buf;
 
        /* don't allow partial reads */
@@ -249,12 +251,20 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
        buf = kzalloc(buf_size, GFP_KERNEL);
        if (!buf)
                return -ENOSPC;
-       off +=  scnprintf(&buf[off], buf_size - 1 - off, "\npcifunc\t\t");
+
+       lfs = kzalloc(lf_str_size, GFP_KERNEL);
+       if (!lfs) {
+               kfree(buf);
+               return -ENOMEM;
+       }
+       off +=  scnprintf(&buf[off], buf_size - 1 - off, "%-*s", lf_str_size,
+                         "pcifunc");
        for (index = 0; index < BLK_COUNT; index++)
-               if (strlen(rvu->hw->block[index].name))
-                       off +=  scnprintf(&buf[off], buf_size - 1 - off,
-                                         "%*s\t", (index - 1) * 2,
-                                         rvu->hw->block[index].name);
+               if (strlen(rvu->hw->block[index].name)) {
+                       off += scnprintf(&buf[off], buf_size - 1 - off,
+                                        "%-*s", lf_str_size,
+                                        rvu->hw->block[index].name);
+               }
        off += scnprintf(&buf[off], buf_size - 1 - off, "\n");
        for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
                for (vf = 0; vf <= rvu->hw->total_vfs; vf++) {
@@ -263,14 +273,15 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
                                continue;
 
                        if (vf) {
+                               sprintf(lfs, "PF%d:VF%d", pf, vf - 1);
                                go_back = scnprintf(&buf[off],
                                                    buf_size - 1 - off,
-                                                   "PF%d:VF%d\t\t", pf,
-                                                   vf - 1);
+                                                   "%-*s", lf_str_size, lfs);
                        } else {
+                               sprintf(lfs, "PF%d", pf);
                                go_back = scnprintf(&buf[off],
                                                    buf_size - 1 - off,
-                                                   "PF%d\t\t", pf);
+                                                   "%-*s", lf_str_size, lfs);
                        }
 
                        off += go_back;
@@ -278,20 +289,22 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
                                block = rvu->hw->block[index];
                                if (!strlen(block.name))
                                        continue;
-                               off_prev = off;
+                               len = 0;
+                               lfs[len] = '\0';
                                for (lf = 0; lf < block.lf.max; lf++) {
                                        if (block.fn_map[lf] != pcifunc)
                                                continue;
                                        flag = 1;
-                                       off += scnprintf(&buf[off], buf_size - 1
-                                                       - off, "%3d,", lf);
+                                       len += sprintf(&lfs[len], "%d,", lf);
                                }
-                               if (flag && off_prev != off)
-                                       off--;
-                               else
-                                       go_back++;
+
+                               if (flag)
+                                       len--;
+                               lfs[len] = '\0';
                                off += scnprintf(&buf[off], buf_size - 1 - off,
-                                               "\t");
+                                                "%-*s", lf_str_size, lfs);
+                               if (!strlen(lfs))
+                                       go_back += lf_str_size;
                        }
                        if (!flag)
                                off -= go_back;
@@ -303,6 +316,7 @@ static ssize_t rvu_dbg_rsrc_attach_status(struct file *filp,
        }
 
        bytes_not_copied = copy_to_user(buffer, buf, off);
+       kfree(lfs);
        kfree(buf);
 
        if (bytes_not_copied)
@@ -319,7 +333,6 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused)
        struct rvu *rvu = filp->private;
        struct pci_dev *pdev = NULL;
        struct mac_ops *mac_ops;
-       int rvu_def_cgx_id = 0;
        char cgx[10], lmac[10];
        struct rvu_pfvf *pfvf;
        int pf, domain, blkid;
@@ -327,7 +340,10 @@ static int rvu_dbg_rvu_pf_cgx_map_display(struct seq_file *filp, void *unused)
        u16 pcifunc;
 
        domain = 2;
-       mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu));
+       mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu));
+       /* There can be no CGX devices at all */
+       if (!mac_ops)
+               return 0;
        seq_printf(filp, "PCI dev\t\tRVU PF Func\tNIX block\t%s\tLMAC\n",
                   mac_ops->name);
        for (pf = 0; pf < rvu->hw->total_pfs; pf++) {
@@ -473,7 +489,7 @@ static ssize_t rvu_dbg_qsize_write(struct file *filp,
        u16 pcifunc;
        int ret, lf;
 
-       cmd_buf = memdup_user(buffer, count);
+       cmd_buf = memdup_user(buffer, count + 1);
        if (IS_ERR(cmd_buf))
                return -ENOMEM;
 
@@ -1818,7 +1834,6 @@ static void rvu_dbg_cgx_init(struct rvu *rvu)
 {
        struct mac_ops *mac_ops;
        unsigned long lmac_bmap;
-       int rvu_def_cgx_id = 0;
        int i, lmac_id;
        char dname[20];
        void *cgx;
@@ -1826,7 +1841,7 @@ static void rvu_dbg_cgx_init(struct rvu *rvu)
        if (!cgx_get_cgxcnt_max())
                return;
 
-       mac_ops = get_mac_ops(rvu_cgx_pdata(rvu_def_cgx_id, rvu));
+       mac_ops = get_mac_ops(rvu_first_cgx_pdata(rvu));
        if (!mac_ops)
                return;
 
index d300019..3d068b7 100644 (file)
@@ -2629,7 +2629,7 @@ static int set_flowkey_fields(struct nix_rx_flowkey_alg *alg, u32 flow_cfg)
        struct nix_rx_flowkey_alg *field;
        struct nix_rx_flowkey_alg tmp;
        u32 key_type, valid_key;
-       int l4_key_offset;
+       int l4_key_offset = 0;
 
        if (!alg)
                return -EINVAL;
index 04bb080..0bd49c7 100644 (file)
@@ -2490,10 +2490,10 @@ int rvu_mbox_handler_npc_mcam_free_counter(struct rvu *rvu,
                index = find_next_bit(mcam->bmap, mcam->bmap_entries, entry);
                if (index >= mcam->bmap_entries)
                        break;
+               entry = index + 1;
                if (mcam->entry2cntr_map[index] != req->cntr)
                        continue;
 
-               entry = index + 1;
                npc_unmap_mcam_entry_and_cntr(rvu, mcam, blkaddr,
                                              index, req->cntr);
        }
index 0dbbf38..dc17784 100644 (file)
@@ -257,17 +257,19 @@ int otx2_get_flow(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc,
 int otx2_get_all_flows(struct otx2_nic *pfvf, struct ethtool_rxnfc *nfc,
                       u32 *rule_locs)
 {
+       u32 rule_cnt = nfc->rule_cnt;
        u32 location = 0;
        int idx = 0;
        int err = 0;
 
        nfc->data = pfvf->flow_cfg->ntuple_max_flows;
-       while ((!err || err == -ENOENT) && idx < nfc->rule_cnt) {
+       while ((!err || err == -ENOENT) && idx < rule_cnt) {
                err = otx2_get_flow(pfvf, nfc, location);
                if (!err)
                        rule_locs[idx++] = location;
                location++;
        }
+       nfc->rule_cnt = rule_cnt;
 
        return err;
 }
index 53ab181..2fd3d23 100644 (file)
@@ -1672,6 +1672,7 @@ int otx2_stop(struct net_device *netdev)
        struct otx2_nic *pf = netdev_priv(netdev);
        struct otx2_cq_poll *cq_poll = NULL;
        struct otx2_qset *qset = &pf->qset;
+       struct otx2_rss_info *rss;
        int qidx, vec, wrk;
 
        netif_carrier_off(netdev);
@@ -1684,6 +1685,10 @@ int otx2_stop(struct net_device *netdev)
        /* First stop packet Rx/Tx */
        otx2_rxtx_enable(pf, false);
 
+       /* Clear RSS enable flag */
+       rss = &pf->hw.rss_info;
+       rss->enable = false;
+
        /* Cleanup Queue IRQ */
        vec = pci_irq_vector(pf->pdev,
                             pf->hw.nix_msixoff + NIX_LF_QINT_VEC_START);
index d1e4d42..3712e17 100644 (file)
@@ -1544,8 +1544,8 @@ static int pxa168_eth_remove(struct platform_device *pdev)
        clk_disable_unprepare(pep->clk);
        mdiobus_unregister(pep->smi_bus);
        mdiobus_free(pep->smi_bus);
-       unregister_netdev(dev);
        cancel_work_sync(&pep->tx_timeout_task);
+       unregister_netdev(dev);
        free_netdev(dev);
        return 0;
 }
index ebe1406..dbec8e1 100644 (file)
@@ -4806,12 +4806,11 @@ static struct net_device *sky2_init_netdev(struct sky2_hw *hw, unsigned port,
        if (!is_valid_ether_addr(dev->dev_addr)) {
                struct sockaddr sa = { AF_UNSPEC };
 
-               netdev_warn(dev,
-                           "Invalid MAC address, defaulting to random\n");
+               dev_warn(&hw->pdev->dev, "Invalid MAC address, defaulting to random\n");
                eth_hw_addr_random(dev);
                memcpy(sa.sa_data, dev->dev_addr, ETH_ALEN);
                if (sky2_set_mac_address(dev, &sa))
-                       netdev_warn(dev, "Failed to set MAC address.\n");
+                       dev_warn(&hw->pdev->dev, "Failed to set MAC address.\n");
        }
 
        return dev;
index a8641a4..96d2891 100644 (file)
@@ -1225,8 +1225,6 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv)
                goto push_new_skb;
        }
 
-       desc_data.dma_addr = new_dma_addr;
-
        /* We can't fail anymore at this point: it's safe to unmap the skb. */
        mtk_star_dma_unmap_rx(priv, &desc_data);
 
@@ -1236,6 +1234,9 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv)
        desc_data.skb->dev = ndev;
        netif_receive_skb(desc_data.skb);
 
+       /* update dma_addr for new skb */
+       desc_data.dma_addr = new_dma_addr;
+
 push_new_skb:
        desc_data.len = skb_tailroom(new_skb);
        desc_data.skb = new_skb;
index 23849f2..1434df6 100644 (file)
@@ -47,7 +47,7 @@
 #define EN_ETHTOOL_SHORT_MASK cpu_to_be16(0xffff)
 #define EN_ETHTOOL_WORD_MASK  cpu_to_be32(0xffffffff)
 
-static int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
+int mlx4_en_moderation_update(struct mlx4_en_priv *priv)
 {
        int i, t;
        int err = 0;
index 51b9700..5d0c9c6 100644 (file)
@@ -3554,6 +3554,8 @@ int mlx4_en_reset_config(struct net_device *dev,
                        en_err(priv, "Failed starting port\n");
        }
 
+       if (!err)
+               err = mlx4_en_moderation_update(priv);
 out:
        mutex_unlock(&mdev->state_lock);
        kfree(tmp);
index e8ed231..f3d1a20 100644 (file)
@@ -775,6 +775,7 @@ void mlx4_en_ptp_overflow_check(struct mlx4_en_dev *mdev);
 #define DEV_FEATURE_CHANGED(dev, new_features, feature) \
        ((dev->features & feature) ^ (new_features & feature))
 
+int mlx4_en_moderation_update(struct mlx4_en_priv *priv);
 int mlx4_en_reset_config(struct net_device *dev,
                         struct hwtstamp_config ts_config,
                         netdev_features_t new_features);
index 394f43a..a99e71b 100644 (file)
@@ -4986,6 +4986,7 @@ static int mlx4_do_mirror_rule(struct mlx4_dev *dev, struct res_fs_rule *fs_rule
 
        if (!fs_rule->mirr_mbox) {
                mlx4_err(dev, "rule mirroring mailbox is null\n");
+               mlx4_free_cmd_mailbox(dev, mailbox);
                return -EINVAL;
        }
        memcpy(mailbox->buf, fs_rule->mirr_mbox, fs_rule->mirr_mbox_size);
index 7435fe6..304b296 100644 (file)
@@ -92,14 +92,15 @@ struct page_pool;
                                    MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0)
 #define MLX5_MPWRQ_PAGES_PER_WQE               BIT(MLX5_MPWRQ_WQE_PAGE_ORDER)
 
-#define MLX5_MTT_OCTW(npages) (ALIGN(npages, 8) / 2)
+#define MLX5_ALIGN_MTTS(mtts)          (ALIGN(mtts, 8))
+#define MLX5_ALIGNED_MTTS_OCTW(mtts)   ((mtts) / 2)
+#define MLX5_MTT_OCTW(mtts)            (MLX5_ALIGNED_MTTS_OCTW(MLX5_ALIGN_MTTS(mtts)))
 /* Add another page to MLX5E_REQUIRED_WQE_MTTS as a buffer between
  * WQEs, This page will absorb write overflow by the hardware, when
  * receiving packets larger than MTU. These oversize packets are
  * dropped by the driver at a later stage.
  */
-#define MLX5E_REQUIRED_WQE_MTTS                (ALIGN(MLX5_MPWRQ_PAGES_PER_WQE + 1, 8))
-#define MLX5E_LOG_ALIGNED_MPWQE_PPW    (ilog2(MLX5E_REQUIRED_WQE_MTTS))
+#define MLX5E_REQUIRED_WQE_MTTS                (MLX5_ALIGN_MTTS(MLX5_MPWRQ_PAGES_PER_WQE + 1))
 #define MLX5E_REQUIRED_MTTS(wqes)      (wqes * MLX5E_REQUIRED_WQE_MTTS)
 #define MLX5E_MAX_RQ_NUM_MTTS  \
        ((1 << 16) * 2) /* So that MLX5_MTT_OCTW(num_mtts) fits into u16 */
index f3f6eb0..b2cd298 100644 (file)
@@ -1181,7 +1181,8 @@ int mlx5_tc_ct_add_no_trk_match(struct mlx5_flow_spec *spec)
 
        mlx5e_tc_match_to_reg_get_match(spec, CTSTATE_TO_REG,
                                        &ctstate, &ctstate_mask);
-       if (ctstate_mask)
+
+       if ((ctstate & ctstate_mask) == MLX5_CT_STATE_TRK_BIT)
                return -EOPNOTSUPP;
 
        ctstate_mask |= MLX5_CT_STATE_TRK_BIT;
index f8075a6..172e047 100644 (file)
@@ -685,14 +685,14 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
        u16 vport_num;
        int err = 0;
 
-       if (flow_attr->ip_version == 4) {
+       if (flow_attr->tun_ip_version == 4) {
                /* Addresses are swapped for decap */
                attr.fl.fl4.saddr = esw_attr->rx_tun_attr->dst_ip.v4;
                attr.fl.fl4.daddr = esw_attr->rx_tun_attr->src_ip.v4;
                err = mlx5e_route_lookup_ipv4_get(priv, priv->netdev, &attr);
        }
 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
-       else if (flow_attr->ip_version == 6) {
+       else if (flow_attr->tun_ip_version == 6) {
                /* Addresses are swapped for decap */
                attr.fl.fl6.saddr = esw_attr->rx_tun_attr->dst_ip.v6;
                attr.fl.fl6.daddr = esw_attr->rx_tun_attr->src_ip.v6;
@@ -718,10 +718,10 @@ int mlx5e_tc_tun_route_lookup(struct mlx5e_priv *priv,
        esw_attr->rx_tun_attr->decap_vport = vport_num;
 
 out:
-       if (flow_attr->ip_version == 4)
+       if (flow_attr->tun_ip_version == 4)
                mlx5e_route_lookup_ipv4_put(&attr);
 #if IS_ENABLED(CONFIG_INET) && IS_ENABLED(CONFIG_IPV6)
-       else if (flow_attr->ip_version == 6)
+       else if (flow_attr->tun_ip_version == 6)
                mlx5e_route_lookup_ipv6_put(&attr);
 #endif
        return err;
index 6a11633..7f7b0f6 100644 (file)
@@ -89,6 +89,7 @@ int mlx5e_tc_set_attr_rx_tun(struct mlx5e_tc_flow *flow,
         * required to establish routing.
         */
        flow_flag_set(flow, TUN_RX);
+       flow->attr->tun_ip_version = ip_version;
        return 0;
 }
 
@@ -1091,7 +1092,7 @@ int mlx5e_attach_decap_route(struct mlx5e_priv *priv,
        if (err || !esw_attr->rx_tun_attr->decap_vport)
                goto out;
 
-       key.ip_version = attr->ip_version;
+       key.ip_version = attr->tun_ip_version;
        if (key.ip_version == 4)
                key.endpoint_ip.v4 = esw_attr->rx_tun_attr->dst_ip.v4;
        else
index e472ed0..7ed3f9f 100644 (file)
@@ -227,6 +227,10 @@ static int mlx5e_tc_tun_parse_geneve_options(struct mlx5e_priv *priv,
        option_key = (struct geneve_opt *)&enc_opts.key->data[0];
        option_mask = (struct geneve_opt *)&enc_opts.mask->data[0];
 
+       if (option_mask->opt_class == 0 && option_mask->type == 0 &&
+           !memchr_inv(option_mask->opt_data, 0, option_mask->length * 4))
+               return 0;
+
        if (option_key->length > max_tlv_option_data_len) {
                NL_SET_ERR_MSG_MOD(extack,
                                   "Matching on GENEVE options: unsupported option len");
index abdf721..f5f2a8f 100644 (file)
@@ -1887,6 +1887,7 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
 {
        struct mlx5e_priv *priv = netdev_priv(netdev);
        struct mlx5_core_dev *mdev = priv->mdev;
+       int err;
 
        if (!MLX5_CAP_GEN(mdev, cqe_compression))
                return -EOPNOTSUPP;
@@ -1896,7 +1897,10 @@ static int set_pflag_rx_cqe_compress(struct net_device *netdev,
                return -EINVAL;
        }
 
-       mlx5e_modify_rx_cqe_compression_locked(priv, enable);
+       err = mlx5e_modify_rx_cqe_compression_locked(priv, enable);
+       if (err)
+               return err;
+
        priv->channels.params.rx_cqe_compress_def = enable;
 
        return 0;
@@ -2014,8 +2018,13 @@ static int set_pflag_tx_port_ts(struct net_device *netdev, bool enable)
         */
 
        if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
+               struct mlx5e_params old_params;
+
+               old_params = priv->channels.params;
                priv->channels.params = new_channels.params;
                err = mlx5e_num_channels_changed(priv);
+               if (err)
+                       priv->channels.params = old_params;
                goto out;
        }
 
index ec2fcb2..158f947 100644 (file)
@@ -334,9 +334,9 @@ static int mlx5e_create_rq_umr_mkey(struct mlx5_core_dev *mdev, struct mlx5e_rq
                                     rq->wqe_overflow.addr);
 }
 
-static inline u64 mlx5e_get_mpwqe_offset(struct mlx5e_rq *rq, u16 wqe_ix)
+static u64 mlx5e_get_mpwqe_offset(u16 wqe_ix)
 {
-       return (wqe_ix << MLX5E_LOG_ALIGNED_MPWQE_PPW) << PAGE_SHIFT;
+       return MLX5E_REQUIRED_MTTS(wqe_ix) << PAGE_SHIFT;
 }
 
 static void mlx5e_init_frags_partition(struct mlx5e_rq *rq)
@@ -577,7 +577,7 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
                                mlx5_wq_ll_get_wqe(&rq->mpwqe.wq, i);
                        u32 byte_count =
                                rq->mpwqe.num_strides << rq->mpwqe.log_stride_sz;
-                       u64 dma_offset = mlx5e_get_mpwqe_offset(rq, i);
+                       u64 dma_offset = mlx5e_get_mpwqe_offset(i);
 
                        wqe->data[0].addr = cpu_to_be64(dma_offset + rq->buff.headroom);
                        wqe->data[0].byte_count = cpu_to_be32(byte_count);
@@ -2368,8 +2368,9 @@ static u8 mlx5e_build_icosq_log_wq_sz(struct mlx5e_params *params,
 {
        switch (params->rq_wq_type) {
        case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ:
-               return order_base_2(MLX5E_UMR_WQEBBS) +
-                       mlx5e_get_rq_log_wq_sz(rqp->rqc);
+               return max_t(u8, MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE,
+                            order_base_2(MLX5E_UMR_WQEBBS) +
+                            mlx5e_get_rq_log_wq_sz(rqp->rqc));
        default: /* MLX5_WQ_TYPE_CYCLIC */
                return MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE;
        }
@@ -2502,8 +2503,10 @@ void mlx5e_close_channels(struct mlx5e_channels *chs)
 {
        int i;
 
-       if (chs->port_ptp)
+       if (chs->port_ptp) {
                mlx5e_port_ptp_close(chs->port_ptp);
+               chs->port_ptp = NULL;
+       }
 
        for (i = 0; i < chs->num; i++)
                mlx5e_close_channel(chs->c[i]);
@@ -3815,6 +3818,15 @@ void mlx5e_fold_sw_stats64(struct mlx5e_priv *priv, struct rtnl_link_stats64 *s)
                        s->tx_dropped    += sq_stats->dropped;
                }
        }
+       if (priv->port_ptp_opened) {
+               for (i = 0; i < priv->max_opened_tc; i++) {
+                       struct mlx5e_sq_stats *sq_stats = &priv->port_ptp_stats.sq[i];
+
+                       s->tx_packets    += sq_stats->packets;
+                       s->tx_bytes      += sq_stats->bytes;
+                       s->tx_dropped    += sq_stats->dropped;
+               }
+       }
 }
 
 void
@@ -3834,10 +3846,17 @@ mlx5e_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats)
        }
 
        if (mlx5e_is_uplink_rep(priv)) {
+               struct mlx5e_vport_stats *vstats = &priv->stats.vport;
+
                stats->rx_packets = PPORT_802_3_GET(pstats, a_frames_received_ok);
                stats->rx_bytes   = PPORT_802_3_GET(pstats, a_octets_received_ok);
                stats->tx_packets = PPORT_802_3_GET(pstats, a_frames_transmitted_ok);
                stats->tx_bytes   = PPORT_802_3_GET(pstats, a_octets_transmitted_ok);
+
+               /* vport multicast also counts packets that are dropped due to steering
+                * or rx out of buffer
+                */
+               stats->multicast = VPORT_COUNTER_GET(vstats, received_eth_multicast.packets);
        } else {
                mlx5e_fold_sw_stats64(priv, stats);
        }
@@ -4683,8 +4702,10 @@ static int mlx5e_xdp_set(struct net_device *netdev, struct bpf_prog *prog)
                struct mlx5e_channel *c = priv->channels.c[i];
 
                mlx5e_rq_replace_xdp_prog(&c->rq, prog);
-               if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state))
+               if (test_bit(MLX5E_CHANNEL_STATE_XSK, c->state)) {
+                       bpf_prog_inc(prog);
                        mlx5e_rq_replace_xdp_prog(&c->xskrq, prog);
+               }
        }
 
 unlock:
@@ -4958,6 +4979,11 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16
                                     priv->max_nch);
        params->num_tc       = 1;
 
+       /* Set an initial non-zero value, so that mlx5e_select_queue won't
+        * divide by zero if called before first activating channels.
+        */
+       priv->num_tc_x_num_ch = params->num_channels * params->num_tc;
+
        /* SQ */
        params->log_sq_size = is_kdump_kernel() ?
                MLX5E_PARAMS_MINIMUM_LOG_SQ_SIZE :
@@ -5474,8 +5500,6 @@ int mlx5e_priv_init(struct mlx5e_priv *priv,
                    struct net_device *netdev,
                    struct mlx5_core_dev *mdev)
 {
-       memset(priv, 0, sizeof(*priv));
-
        /* priv init */
        priv->mdev        = mdev;
        priv->netdev      = netdev;
@@ -5508,12 +5532,18 @@ void mlx5e_priv_cleanup(struct mlx5e_priv *priv)
 {
        int i;
 
+       /* bail if change profile failed and also rollback failed */
+       if (!priv->mdev)
+               return;
+
        destroy_workqueue(priv->wq);
        free_cpumask_var(priv->scratchpad.cpumask);
 
        for (i = 0; i < priv->htb.max_qos_sqs; i++)
                kfree(priv->htb.qos_sq_stats[i]);
        kvfree(priv->htb.qos_sq_stats);
+
+       memset(priv, 0, sizeof(*priv));
 }
 
 struct net_device *
@@ -5630,11 +5660,10 @@ void mlx5e_detach_netdev(struct mlx5e_priv *priv)
 }
 
 static int
-mlx5e_netdev_attach_profile(struct mlx5e_priv *priv,
+mlx5e_netdev_attach_profile(struct net_device *netdev, struct mlx5_core_dev *mdev,
                            const struct mlx5e_profile *new_profile, void *new_ppriv)
 {
-       struct net_device *netdev = priv->netdev;
-       struct mlx5_core_dev *mdev = priv->mdev;
+       struct mlx5e_priv *priv = netdev_priv(netdev);
        int err;
 
        err = mlx5e_priv_init(priv, netdev, mdev);
@@ -5647,10 +5676,16 @@ mlx5e_netdev_attach_profile(struct mlx5e_priv *priv,
        priv->ppriv = new_ppriv;
        err = new_profile->init(priv->mdev, priv->netdev);
        if (err)
-               return err;
+               goto priv_cleanup;
        err = mlx5e_attach_netdev(priv);
        if (err)
-               new_profile->cleanup(priv);
+               goto profile_cleanup;
+       return err;
+
+profile_cleanup:
+       new_profile->cleanup(priv);
+priv_cleanup:
+       mlx5e_priv_cleanup(priv);
        return err;
 }
 
@@ -5659,13 +5694,14 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
 {
        unsigned int new_max_nch = mlx5e_calc_max_nch(priv, new_profile);
        const struct mlx5e_profile *orig_profile = priv->profile;
+       struct net_device *netdev = priv->netdev;
+       struct mlx5_core_dev *mdev = priv->mdev;
        void *orig_ppriv = priv->ppriv;
        int err, rollback_err;
 
        /* sanity */
        if (new_max_nch != priv->max_nch) {
-               netdev_warn(priv->netdev,
-                           "%s: Replacing profile with different max channels\n",
+               netdev_warn(netdev, "%s: Replacing profile with different max channels\n",
                            __func__);
                return -EINVAL;
        }
@@ -5675,22 +5711,19 @@ int mlx5e_netdev_change_profile(struct mlx5e_priv *priv,
        priv->profile->cleanup(priv);
        mlx5e_priv_cleanup(priv);
 
-       err = mlx5e_netdev_attach_profile(priv, new_profile, new_ppriv);
+       err = mlx5e_netdev_attach_profile(netdev, mdev, new_profile, new_ppriv);
        if (err) { /* roll back to original profile */
-               netdev_warn(priv->netdev, "%s: new profile init failed, %d\n",
-                           __func__, err);
+               netdev_warn(netdev, "%s: new profile init failed, %d\n", __func__, err);
                goto rollback;
        }
 
        return 0;
 
 rollback:
-       rollback_err = mlx5e_netdev_attach_profile(priv, orig_profile, orig_ppriv);
-       if (rollback_err) {
-               netdev_err(priv->netdev,
-                          "%s: failed to rollback to orig profile, %d\n",
+       rollback_err = mlx5e_netdev_attach_profile(netdev, mdev, orig_profile, orig_ppriv);
+       if (rollback_err)
+               netdev_err(netdev, "%s: failed to rollback to orig profile, %d\n",
                           __func__, rollback_err);
-       }
        return err;
 }
 
index 1b6ad94..249d890 100644 (file)
@@ -500,7 +500,6 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        struct mlx5e_icosq *sq = rq->icosq;
        struct mlx5_wq_cyc *wq = &sq->wq;
        struct mlx5e_umr_wqe *umr_wqe;
-       u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
        u16 pi;
        int err;
        int i;
@@ -531,7 +530,8 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
        umr_wqe->ctrl.opmod_idx_opcode =
                cpu_to_be32((sq->pc << MLX5_WQE_CTRL_WQE_INDEX_SHIFT) |
                            MLX5_OPCODE_UMR);
-       umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
+       umr_wqe->uctrl.xlt_offset =
+               cpu_to_be16(MLX5_ALIGNED_MTTS_OCTW(MLX5E_REQUIRED_MTTS(ix)));
 
        sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
                .wqe_type   = MLX5E_ICOSQ_WQE_UMR_RX,
index 0da69b9..df2a0af 100644 (file)
@@ -2296,6 +2296,16 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
                        *match_level = MLX5_MATCH_L4;
        }
 
+       /* Currenlty supported only for MPLS over UDP */
+       if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS) &&
+           !netif_is_bareudp(filter_dev)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "Matching on MPLS is supported only for MPLS over UDP");
+               netdev_err(priv->netdev,
+                          "Matching on MPLS is supported only for MPLS over UDP\n");
+               return -EOPNOTSUPP;
+       }
+
        return 0;
 }
 
@@ -2899,6 +2909,37 @@ static int is_action_keys_supported(const struct flow_action_entry *act,
        return 0;
 }
 
+static bool modify_tuple_supported(bool modify_tuple, bool ct_clear,
+                                  bool ct_flow, struct netlink_ext_ack *extack,
+                                  struct mlx5e_priv *priv,
+                                  struct mlx5_flow_spec *spec)
+{
+       if (!modify_tuple || ct_clear)
+               return true;
+
+       if (ct_flow) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "can't offload tuple modification with non-clear ct()");
+               netdev_info(priv->netdev,
+                           "can't offload tuple modification with non-clear ct()");
+               return false;
+       }
+
+       /* Add ct_state=-trk match so it will be offloaded for non ct flows
+        * (or after clear action), as otherwise, since the tuple is changed,
+        * we can't restore ct state
+        */
+       if (mlx5_tc_ct_add_no_trk_match(spec)) {
+               NL_SET_ERR_MSG_MOD(extack,
+                                  "can't offload tuple modification with ct matches and no ct(clear) action");
+               netdev_info(priv->netdev,
+                           "can't offload tuple modification with ct matches and no ct(clear) action");
+               return false;
+       }
+
+       return true;
+}
+
 static bool modify_header_match_supported(struct mlx5e_priv *priv,
                                          struct mlx5_flow_spec *spec,
                                          struct flow_action *flow_action,
@@ -2937,18 +2978,9 @@ static bool modify_header_match_supported(struct mlx5e_priv *priv,
                        return err;
        }
 
-       /* Add ct_state=-trk match so it will be offloaded for non ct flows
-        * (or after clear action), as otherwise, since the tuple is changed,
-        *  we can't restore ct state
-        */
-       if (!ct_clear && modify_tuple &&
-           mlx5_tc_ct_add_no_trk_match(spec)) {
-               NL_SET_ERR_MSG_MOD(extack,
-                                  "can't offload tuple modify header with ct matches");
-               netdev_info(priv->netdev,
-                           "can't offload tuple modify header with ct matches");
+       if (!modify_tuple_supported(modify_tuple, ct_clear, ct_flow, extack,
+                                   priv, spec))
                return false;
-       }
 
        ip_proto = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ip_protocol);
        if (modify_ip_header && ip_proto != IPPROTO_TCP &&
@@ -4445,7 +4477,8 @@ static int apply_police_params(struct mlx5e_priv *priv, u64 rate,
         */
        if (rate) {
                rate = (rate * BITS_PER_BYTE) + 500000;
-               rate_mbps = max_t(u64, do_div(rate, 1000000), 1);
+               do_div(rate, 1000000);
+               rate_mbps = max_t(u32, rate, 1);
        }
 
        err = mlx5_esw_modify_vport_rate(esw, vport_num, rate_mbps);
index 89003ae..25c0917 100644 (file)
@@ -79,6 +79,7 @@ struct mlx5_flow_attr {
        u8 inner_match_level;
        u8 outer_match_level;
        u8 ip_version;
+       u8 tun_ip_version;
        u32 flags;
        union {
                struct mlx5_esw_flow_attr esw_attr[0];
index 94cb021..8694b83 100644 (file)
@@ -551,7 +551,8 @@ esw_setup_dests(struct mlx5_flow_destination *dest,
 
        if (!mlx5_eswitch_termtbl_required(esw, attr, flow_act, spec) &&
            MLX5_CAP_GEN(esw_attr->in_mdev, reg_c_preserve) &&
-           mlx5_eswitch_vport_match_metadata_enabled(esw))
+           mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+           MLX5_CAP_ESW_FLOWTABLE_FDB(esw->dev, ignore_flow_level))
                attr->flags |= MLX5_ESW_ATTR_FLAG_SRC_REWRITE;
 
        if (attr->dest_ft) {
index 80da50e..bd66ab2 100644 (file)
@@ -575,6 +575,7 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
        MLX5_SET(qpc, qpc, log_sq_size, ilog2(conn->qp.sq.size));
        MLX5_SET(qpc, qpc, cqn_snd, conn->cq.mcq.cqn);
        MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
+       MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
        MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
        if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
                MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
index 1eeca45..6f7cef4 100644 (file)
@@ -233,6 +233,7 @@ int mlx5i_create_underlay_qp(struct mlx5e_priv *priv)
        }
 
        qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
+       MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(priv->mdev));
        MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
        MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
        MLX5_SET(qpc, qpc, ulp_stateless_offload_mode,
@@ -694,6 +695,7 @@ static int mlx5i_check_required_hca_cap(struct mlx5_core_dev *mdev)
 static void mlx5_rdma_netdev_free(struct net_device *netdev)
 {
        struct mlx5e_priv *priv = mlx5i_epriv(netdev);
+       struct mlx5_core_dev *mdev = priv->mdev;
        struct mlx5i_priv *ipriv = priv->ppriv;
        const struct mlx5e_profile *profile = priv->profile;
 
@@ -702,7 +704,7 @@ static void mlx5_rdma_netdev_free(struct net_device *netdev)
 
        if (!ipriv->sub_interface) {
                mlx5i_pkey_qpn_ht_cleanup(netdev);
-               mlx5e_destroy_mdev_resources(priv->mdev);
+               mlx5e_destroy_mdev_resources(mdev);
        }
 }
 
index b0e129d..1e7f26b 100644 (file)
@@ -495,15 +495,15 @@ static int mlx5_perout_configure(struct ptp_clock_info *ptp,
                return -EINVAL;
 
        field_select = MLX5_MTPPS_FS_ENABLE;
+       pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index);
+       if (pin < 0)
+               return -EBUSY;
+
        if (on) {
                bool rt_mode = mlx5_real_time_mode(mdev);
                u32 nsec;
                s64 sec;
 
-               pin = ptp_find_pin(clock->ptp, PTP_PF_PEROUT, rq->perout.index);
-               if (pin < 0)
-                       return -EBUSY;
-
                pin_mode = MLX5_PIN_MODE_OUT;
                pattern = MLX5_OUT_PATTERN_PERIODIC;
                ts.tv_sec = rq->perout.period.sec;
index 2f2c352..c568896 100644 (file)
@@ -237,8 +237,8 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
        remaining_size = max_t(int, 0, driver_ver_sz - strlen(string));
 
        snprintf(string + strlen(string), remaining_size, "%u.%u.%u",
-                (u8)((LINUX_VERSION_CODE >> 16) & 0xff), (u8)((LINUX_VERSION_CODE >> 8) & 0xff),
-                (u16)(LINUX_VERSION_CODE & 0xffff));
+               LINUX_VERSION_MAJOR, LINUX_VERSION_PATCHLEVEL,
+               LINUX_VERSION_SUBLEVEL);
 
        /*Send the command*/
        MLX5_SET(set_driver_version_in, in, opcode,
index b265f27..90b524c 100644 (file)
@@ -181,15 +181,13 @@ static int mlx5_sf_dev_vhca_arm_all(struct mlx5_sf_dev_table *table)
        u16 max_functions;
        u16 function_id;
        int err = 0;
-       bool ecpu;
        int i;
 
        max_functions = mlx5_sf_max_functions(dev);
        function_id = MLX5_CAP_GEN(dev, sf_base_id);
-       ecpu = mlx5_read_embedded_cpu(dev);
        /* Arm the vhca context as the vhca event notifier */
        for (i = 0; i < max_functions; i++) {
-               err = mlx5_vhca_event_arm(dev, function_id, ecpu);
+               err = mlx5_vhca_event_arm(dev, function_id);
                if (err)
                        return err;
 
index 58b6be0..a5a0f60 100644 (file)
@@ -6,7 +6,7 @@
 #include "sf.h"
 #include "mlx5_ifc_vhca_event.h"
 #include "vhca_event.h"
-#include "ecpf.h"
+#include "mlx5_core.h"
 
 struct mlx5_sf_hw {
        u32 usr_sfnum;
@@ -18,7 +18,6 @@ struct mlx5_sf_hw_table {
        struct mlx5_core_dev *dev;
        struct mlx5_sf_hw *sfs;
        int max_local_functions;
-       u8 ecpu: 1;
        struct mutex table_lock; /* Serializes sf deletion and vhca state change handler. */
        struct notifier_block vhca_nb;
 };
@@ -64,7 +63,7 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
        }
        if (sw_id == -ENOSPC) {
                err = -ENOSPC;
-               goto err;
+               goto exist_err;
        }
 
        hw_fn_id = mlx5_sf_sw_to_hw_id(table->dev, sw_id);
@@ -72,7 +71,7 @@ int mlx5_sf_hw_table_sf_alloc(struct mlx5_core_dev *dev, u32 usr_sfnum)
        if (err)
                goto err;
 
-       err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, table->ecpu, usr_sfnum);
+       err = mlx5_modify_vhca_sw_id(dev, hw_fn_id, usr_sfnum);
        if (err)
                goto vhca_err;
 
@@ -118,7 +117,7 @@ void mlx5_sf_hw_table_sf_deferred_free(struct mlx5_core_dev *dev, u16 id)
 
        hw_fn_id = mlx5_sf_sw_to_hw_id(dev, id);
        mutex_lock(&table->table_lock);
-       err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, table->ecpu, out, sizeof(out));
+       err = mlx5_cmd_query_vhca_state(dev, hw_fn_id, out, sizeof(out));
        if (err)
                goto err;
        state = MLX5_GET(query_vhca_state_out, out, vhca_state_context.vhca_state);
@@ -164,7 +163,6 @@ int mlx5_sf_hw_table_init(struct mlx5_core_dev *dev)
        table->dev = dev;
        table->sfs = sfs;
        table->max_local_functions = max_functions;
-       table->ecpu = mlx5_read_embedded_cpu(dev);
        dev->priv.sf_hw_table = table;
        mlx5_core_dbg(dev, "SF HW table: max sfs = %d\n", max_functions);
        return 0;
index 1daf5a1..4fc8701 100644 (file)
@@ -20,7 +20,7 @@ struct mlx5_ifc_vhca_state_context_bits {
 
        u8         sw_function_id[0x20];
 
-       u8         reserved_at_40[0x80];
+       u8         reserved_at_40[0x40];
 };
 
 struct mlx5_ifc_query_vhca_state_out_bits {
index af2f2dd..28b14b0 100644 (file)
@@ -19,52 +19,51 @@ struct mlx5_vhca_event_work {
        struct mlx5_vhca_state_event event;
 };
 
-int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
-                             bool ecpu, u32 *out, u32 outlen)
+int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id, u32 *out, u32 outlen)
 {
        u32 in[MLX5_ST_SZ_DW(query_vhca_state_in)] = {};
 
        MLX5_SET(query_vhca_state_in, in, opcode, MLX5_CMD_OP_QUERY_VHCA_STATE);
        MLX5_SET(query_vhca_state_in, in, function_id, function_id);
-       MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, ecpu);
+       MLX5_SET(query_vhca_state_in, in, embedded_cpu_function, 0);
 
        return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
 }
 
 static int mlx5_cmd_modify_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
-                                     bool ecpu, u32 *in, u32 inlen)
+                                     u32 *in, u32 inlen)
 {
        u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {};
 
        MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE);
        MLX5_SET(modify_vhca_state_in, in, function_id, function_id);
-       MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, ecpu);
+       MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0);
 
        return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 }
 
-int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, bool ecpu, u32 sw_fn_id)
+int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id)
 {
        u32 out[MLX5_ST_SZ_DW(modify_vhca_state_out)] = {};
        u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {};
 
        MLX5_SET(modify_vhca_state_in, in, opcode, MLX5_CMD_OP_MODIFY_VHCA_STATE);
        MLX5_SET(modify_vhca_state_in, in, function_id, function_id);
-       MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, ecpu);
+       MLX5_SET(modify_vhca_state_in, in, embedded_cpu_function, 0);
        MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.sw_function_id, 1);
        MLX5_SET(modify_vhca_state_in, in, vhca_state_context.sw_function_id, sw_fn_id);
 
        return mlx5_cmd_exec_inout(dev, modify_vhca_state, in, out);
 }
 
-int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id, bool ecpu)
+int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id)
 {
        u32 in[MLX5_ST_SZ_DW(modify_vhca_state_in)] = {};
 
        MLX5_SET(modify_vhca_state_in, in, vhca_state_context.arm_change_event, 1);
        MLX5_SET(modify_vhca_state_in, in, vhca_state_field_select.arm_change_event, 1);
 
-       return mlx5_cmd_modify_vhca_state(dev, function_id, ecpu, in, sizeof(in));
+       return mlx5_cmd_modify_vhca_state(dev, function_id, in, sizeof(in));
 }
 
 static void
@@ -73,7 +72,7 @@ mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *
        u32 out[MLX5_ST_SZ_DW(query_vhca_state_out)] = {};
        int err;
 
-       err = mlx5_cmd_query_vhca_state(dev, event->function_id, event->ecpu, out, sizeof(out));
+       err = mlx5_cmd_query_vhca_state(dev, event->function_id, out, sizeof(out));
        if (err)
                return;
 
@@ -82,7 +81,7 @@ mlx5_vhca_event_notify(struct mlx5_core_dev *dev, struct mlx5_vhca_state_event *
        event->new_vhca_state = MLX5_GET(query_vhca_state_out, out,
                                         vhca_state_context.vhca_state);
 
-       mlx5_vhca_event_arm(dev, event->function_id, event->ecpu);
+       mlx5_vhca_event_arm(dev, event->function_id);
 
        blocking_notifier_call_chain(&dev->priv.vhca_state_notifier->n_head, 0, event);
 }
@@ -94,6 +93,7 @@ static void mlx5_vhca_state_work_handler(struct work_struct *_work)
        struct mlx5_core_dev *dev = notifier->dev;
 
        mlx5_vhca_event_notify(dev, &work->event);
+       kfree(work);
 }
 
 static int
@@ -110,7 +110,6 @@ mlx5_vhca_state_change_notifier(struct notifier_block *nb, unsigned long type, v
        INIT_WORK(&work->work, &mlx5_vhca_state_work_handler);
        work->notifier = notifier;
        work->event.function_id = be16_to_cpu(eqe->data.vhca_state.function_id);
-       work->event.ecpu = be16_to_cpu(eqe->data.vhca_state.ec_function);
        mlx5_events_work_enqueue(notifier->dev, &work->work);
        return NOTIFY_OK;
 }
index 1fe1ec6..013cdfe 100644 (file)
@@ -10,7 +10,6 @@ struct mlx5_vhca_state_event {
        u16 function_id;
        u16 sw_function_id;
        u8 new_vhca_state;
-       bool ecpu;
 };
 
 static inline bool mlx5_vhca_event_supported(const struct mlx5_core_dev *dev)
@@ -25,10 +24,10 @@ void mlx5_vhca_event_start(struct mlx5_core_dev *dev);
 void mlx5_vhca_event_stop(struct mlx5_core_dev *dev);
 int mlx5_vhca_event_notifier_register(struct mlx5_core_dev *dev, struct notifier_block *nb);
 void mlx5_vhca_event_notifier_unregister(struct mlx5_core_dev *dev, struct notifier_block *nb);
-int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, bool ecpu, u32 sw_fn_id);
-int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id, bool ecpu);
+int mlx5_modify_vhca_sw_id(struct mlx5_core_dev *dev, u16 function_id, u32 sw_fn_id);
+int mlx5_vhca_event_arm(struct mlx5_core_dev *dev, u16 function_id);
 int mlx5_cmd_query_vhca_state(struct mlx5_core_dev *dev, u16 function_id,
-                             bool ecpu, u32 *out, u32 outlen);
+                             u32 *out, u32 outlen);
 #else
 
 static inline void mlx5_vhca_state_cap_handle(struct mlx5_core_dev *dev, void *set_hca_cap)
index 83c4c87..8a6a56f 100644 (file)
@@ -169,6 +169,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
        MLX5_SET(qpc, qpc, log_rq_size, ilog2(dr_qp->rq.wqe_cnt));
        MLX5_SET(qpc, qpc, rq_type, MLX5_NON_ZERO_RQ);
        MLX5_SET(qpc, qpc, log_sq_size, ilog2(dr_qp->sq.wqe_cnt));
+       MLX5_SET(qpc, qpc, ts_format, mlx5_get_qp_default_ts(mdev));
        MLX5_SET64(qpc, qpc, dbr_addr, dr_qp->wq_ctrl.db.dma);
        if (MLX5_CAP_GEN(mdev, cqe_version) == 1)
                MLX5_SET(qpc, qpc, user_index, 0xFFFFFF);
index 4088d6e..9143ec3 100644 (file)
@@ -264,8 +264,8 @@ static void dr_ste_v1_set_miss_addr(u8 *hw_ste_p, u64 miss_addr)
 static u64 dr_ste_v1_get_miss_addr(u8 *hw_ste_p)
 {
        u64 index =
-               (MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
-                MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32) << 26);
+               ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_31_6) |
+                ((u64)MLX5_GET(ste_match_bwc_v1, hw_ste_p, miss_address_39_32)) << 26);
 
        return index << 6;
 }
index 16e2df6..c4adc7f 100644 (file)
@@ -4430,6 +4430,7 @@ MLXSW_ITEM32(reg, ptys, ext_eth_proto_cap, 0x08, 0, 32);
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4          BIT(20)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4          BIT(21)
 #define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4          BIT(22)
+#define MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4      BIT(23)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_CR            BIT(27)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_KR            BIT(28)
 #define MLXSW_REG_PTYS_ETH_SPEED_25GBASE_SR            BIT(29)
index bd7f873..0bd6416 100644 (file)
@@ -1169,6 +1169,11 @@ static const struct mlxsw_sp1_port_link_mode mlxsw_sp1_port_link_mode[] = {
                .mask_ethtool   = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
                .speed          = SPEED_100000,
        },
+       {
+               .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
+               .mask_ethtool   = ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT,
+               .speed          = SPEED_100000,
+       },
 };
 
 #define MLXSW_SP1_PORT_LINK_MODE_LEN ARRAY_SIZE(mlxsw_sp1_port_link_mode)
index 9ce9084..eda99d8 100644 (file)
@@ -5951,6 +5951,10 @@ mlxsw_sp_router_fib4_replace(struct mlxsw_sp *mlxsw_sp,
        if (mlxsw_sp->router->aborted)
                return 0;
 
+       if (fen_info->fi->nh &&
+           !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, fen_info->fi->nh->id))
+               return 0;
+
        fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, fen_info->tb_id,
                                         &fen_info->dst, sizeof(fen_info->dst),
                                         fen_info->dst_len,
@@ -6601,6 +6605,9 @@ static int mlxsw_sp_router_fib6_replace(struct mlxsw_sp *mlxsw_sp,
        if (mlxsw_sp_fib6_rt_should_ignore(rt))
                return 0;
 
+       if (rt->nh && !mlxsw_sp_nexthop_obj_group_lookup(mlxsw_sp, rt->nh->id))
+               return 0;
+
        fib_node = mlxsw_sp_fib_node_get(mlxsw_sp, rt->fib6_table->tb6_id,
                                         &rt->fib6_dst.addr,
                                         sizeof(rt->fib6_dst.addr),
index 40e2e79..131b2a5 100644 (file)
@@ -613,7 +613,8 @@ static const struct mlxsw_sx_port_link_mode mlxsw_sx_port_link_mode[] = {
        {
                .mask           = MLXSW_REG_PTYS_ETH_SPEED_100GBASE_CR4 |
                                  MLXSW_REG_PTYS_ETH_SPEED_100GBASE_SR4 |
-                                 MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4,
+                                 MLXSW_REG_PTYS_ETH_SPEED_100GBASE_KR4 |
+                                 MLXSW_REG_PTYS_ETH_SPEED_100GBASE_LR4_ER4,
                .speed          = 100000,
        },
 };
index dbdfabf..1c3e204 100644 (file)
@@ -2040,7 +2040,7 @@ lan743x_rx_trim_skb(struct sk_buff *skb, int frame_length)
                dev_kfree_skb_irq(skb);
                return NULL;
        }
-       frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 2);
+       frame_length = max_t(int, 0, frame_length - RX_HEAD_PADDING - 4);
        if (skb->len > frame_length) {
                skb->tail -= skb->len - frame_length;
                skb->len = frame_length;
index c0ede0c..05cb040 100644 (file)
@@ -13,6 +13,7 @@ if NET_VENDOR_MICROSEMI
 
 # Users should depend on NET_SWITCHDEV, HAS_IOMEM
 config MSCC_OCELOT_SWITCH_LIB
+       select NET_DEVLINK
        select REGMAP_MMIO
        select PACKING
        select PHYLIB
index c3ac026..a41b458 100644 (file)
@@ -540,13 +540,14 @@ ocelot_flower_parse_key(struct ocelot *ocelot, int port, bool ingress,
                        return -EOPNOTSUPP;
                }
 
+               flow_rule_match_ipv4_addrs(rule, &match);
+
                if (filter->block_id == VCAP_IS1 && *(u32 *)&match.mask->dst) {
                        NL_SET_ERR_MSG_MOD(extack,
                                           "Key type S1_NORMAL cannot match on destination IP");
                        return -EOPNOTSUPP;
                }
 
-               flow_rule_match_ipv4_addrs(rule, &match);
                tmp = &filter->key.ipv4.sip.value.addr[0];
                memcpy(tmp, &match.key->src, 4);
 
index 5defd31..aa06fcb 100644 (file)
@@ -327,8 +327,14 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
                goto err_free_ctx_entry;
        }
 
+       /* Do net allocate a mask-id for pre_tun_rules. These flows are used to
+        * configure the pre_tun table and are never actually send to the
+        * firmware as an add-flow message. This causes the mask-id allocation
+        * on the firmware to get out of sync if allocated here.
+        */
        new_mask_id = 0;
-       if (!nfp_check_mask_add(app, nfp_flow->mask_data,
+       if (!nfp_flow->pre_tun_rule.dev &&
+           !nfp_check_mask_add(app, nfp_flow->mask_data,
                                nfp_flow->meta.mask_len,
                                &nfp_flow->meta.flags, &new_mask_id)) {
                NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot allocate a new mask id");
@@ -359,7 +365,8 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
                        goto err_remove_mask;
                }
 
-               if (!nfp_check_mask_remove(app, nfp_flow->mask_data,
+               if (!nfp_flow->pre_tun_rule.dev &&
+                   !nfp_check_mask_remove(app, nfp_flow->mask_data,
                                           nfp_flow->meta.mask_len,
                                           NULL, &new_mask_id)) {
                        NL_SET_ERR_MSG_MOD(extack, "invalid entry: cannot release mask id");
@@ -374,8 +381,10 @@ int nfp_compile_flow_metadata(struct nfp_app *app,
        return 0;
 
 err_remove_mask:
-       nfp_check_mask_remove(app, nfp_flow->mask_data, nfp_flow->meta.mask_len,
-                             NULL, &new_mask_id);
+       if (!nfp_flow->pre_tun_rule.dev)
+               nfp_check_mask_remove(app, nfp_flow->mask_data,
+                                     nfp_flow->meta.mask_len,
+                                     NULL, &new_mask_id);
 err_remove_rhash:
        WARN_ON_ONCE(rhashtable_remove_fast(&priv->stats_ctx_table,
                                            &ctx_entry->ht_node,
@@ -406,9 +415,10 @@ int nfp_modify_flow_metadata(struct nfp_app *app,
 
        __nfp_modify_flow_metadata(priv, nfp_flow);
 
-       nfp_check_mask_remove(app, nfp_flow->mask_data,
-                             nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
-                             &new_mask_id);
+       if (!nfp_flow->pre_tun_rule.dev)
+               nfp_check_mask_remove(app, nfp_flow->mask_data,
+                                     nfp_flow->meta.mask_len, &nfp_flow->meta.flags,
+                                     &new_mask_id);
 
        /* Update flow payload with mask ids. */
        nfp_flow->unmasked_data[NFP_FL_MASK_ID_LOCATION] = new_mask_id;
index 1c59aff..d72225d 100644 (file)
@@ -1142,6 +1142,12 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
                return -EOPNOTSUPP;
        }
 
+       if (!(key_layer & NFP_FLOWER_LAYER_IPV4) &&
+           !(key_layer & NFP_FLOWER_LAYER_IPV6)) {
+               NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: match on ipv4/ipv6 eth_type must be present");
+               return -EOPNOTSUPP;
+       }
+
        /* Skip fields known to exist. */
        mask += sizeof(struct nfp_flower_meta_tci);
        ext += sizeof(struct nfp_flower_meta_tci);
@@ -1152,6 +1158,13 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
        mask += sizeof(struct nfp_flower_in_port);
        ext += sizeof(struct nfp_flower_in_port);
 
+       /* Ensure destination MAC address matches pre_tun_dev. */
+       mac = (struct nfp_flower_mac_mpls *)ext;
+       if (memcmp(&mac->mac_dst[0], flow->pre_tun_rule.dev->dev_addr, 6)) {
+               NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: dest MAC must match output dev MAC");
+               return -EOPNOTSUPP;
+       }
+
        /* Ensure destination MAC address is fully matched. */
        mac = (struct nfp_flower_mac_mpls *)mask;
        if (!is_broadcast_ether_addr(&mac->mac_dst[0])) {
@@ -1159,6 +1172,11 @@ nfp_flower_validate_pre_tun_rule(struct nfp_app *app,
                return -EOPNOTSUPP;
        }
 
+       if (mac->mpls_lse) {
+               NL_SET_ERR_MSG_MOD(extack, "unsupported pre-tunnel rule: MPLS not supported");
+               return -EOPNOTSUPP;
+       }
+
        mask += sizeof(struct nfp_flower_mac_mpls);
        ext += sizeof(struct nfp_flower_mac_mpls);
        if (key_layer & NFP_FLOWER_LAYER_IPV4 ||
index 7248d24..d19c02e 100644 (file)
@@ -16,8 +16,9 @@
 #define NFP_FL_MAX_ROUTES               32
 
 #define NFP_TUN_PRE_TUN_RULE_LIMIT     32
-#define NFP_TUN_PRE_TUN_RULE_DEL       0x1
-#define NFP_TUN_PRE_TUN_IDX_BIT                0x8
+#define NFP_TUN_PRE_TUN_RULE_DEL       BIT(0)
+#define NFP_TUN_PRE_TUN_IDX_BIT                BIT(3)
+#define NFP_TUN_PRE_TUN_IPV6_BIT       BIT(7)
 
 /**
  * struct nfp_tun_pre_run_rule - rule matched before decap
@@ -1268,6 +1269,7 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
 {
        struct nfp_flower_priv *app_priv = app->priv;
        struct nfp_tun_offloaded_mac *mac_entry;
+       struct nfp_flower_meta_tci *key_meta;
        struct nfp_tun_pre_tun_rule payload;
        struct net_device *internal_dev;
        int err;
@@ -1290,6 +1292,15 @@ int nfp_flower_xmit_pre_tun_flow(struct nfp_app *app,
        if (!mac_entry)
                return -ENOENT;
 
+       /* Set/clear IPV6 bit. cpu_to_be16() swap will lead to MSB being
+        * set/clear for port_idx.
+        */
+       key_meta = (struct nfp_flower_meta_tci *)flow->unmasked_data;
+       if (key_meta->nfp_flow_key_layer & NFP_FLOWER_LAYER_IPV6)
+               mac_entry->index |= NFP_TUN_PRE_TUN_IPV6_BIT;
+       else
+               mac_entry->index &= ~NFP_TUN_PRE_TUN_IPV6_BIT;
+
        payload.port_idx = cpu_to_be16(mac_entry->index);
 
        /* Copy mac id and vlan to flow - dev may not exist at delete time. */
index 162a1ff..4087311 100644 (file)
@@ -1079,15 +1079,17 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 {
        int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems;
        struct ionic_tx_stats *stats = q_to_tx_stats(q);
+       int ndescs;
        int err;
 
-       /* If TSO, need roundup(skb->len/mss) descs */
+       /* Each desc is mss long max, so a descriptor for each gso_seg */
        if (skb_is_gso(skb))
-               return (skb->len / skb_shinfo(skb)->gso_size) + 1;
+               ndescs = skb_shinfo(skb)->gso_segs;
+       else
+               ndescs = 1;
 
-       /* If non-TSO, just need 1 desc and nr_frags sg elems */
        if (skb_shinfo(skb)->nr_frags <= sg_elems)
-               return 1;
+               return ndescs;
 
        /* Too many frags, so linearize */
        err = skb_linearize(skb);
@@ -1096,8 +1098,7 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 
        stats->linearize++;
 
-       /* Need 1 desc and zero sg elems */
-       return 1;
+       return ndescs;
 }
 
 static int ionic_maybe_stop_tx(struct ionic_queue *q, int ndescs)
index 7760a33..7ecb3df 100644 (file)
@@ -1425,6 +1425,7 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter)
 
        if (fw_dump->tmpl_hdr == NULL || current_version > prev_version) {
                vfree(fw_dump->tmpl_hdr);
+               fw_dump->tmpl_hdr = NULL;
 
                if (qlcnic_83xx_md_check_extended_dump_capability(adapter))
                        extended = !qlcnic_83xx_extend_md_capab(adapter);
@@ -1443,6 +1444,8 @@ void qlcnic_83xx_get_minidump_template(struct qlcnic_adapter *adapter)
                        struct qlcnic_83xx_dump_template_hdr *hdr;
 
                        hdr = fw_dump->tmpl_hdr;
+                       if (!hdr)
+                               return;
                        hdr->drv_cap_mask = 0x1f;
                        fw_dump->cap_mask = 0x1f;
                        dev_info(&pdev->dev,
index 0a20dae..581a92f 100644 (file)
@@ -767,7 +767,7 @@ static void r8168fp_adjust_ocp_cmd(struct rtl8169_private *tp, u32 *cmd, int typ
        if (type == ERIAR_OOB &&
            (tp->mac_version == RTL_GIGA_MAC_VER_52 ||
             tp->mac_version == RTL_GIGA_MAC_VER_53))
-               *cmd |= 0x7f0 << 18;
+               *cmd |= 0xf70 << 18;
 }
 
 DECLARE_RTL_COND(rtl_eriar_cond)
@@ -2285,14 +2285,14 @@ static void r8168dp_hw_jumbo_disable(struct rtl8169_private *tp)
 
 static void r8168e_hw_jumbo_enable(struct rtl8169_private *tp)
 {
-       RTL_W8(tp, MaxTxPacketSize, 0x3f);
+       RTL_W8(tp, MaxTxPacketSize, 0x24);
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) | Jumbo_En0);
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) | 0x01);
 }
 
 static void r8168e_hw_jumbo_disable(struct rtl8169_private *tp)
 {
-       RTL_W8(tp, MaxTxPacketSize, 0x0c);
+       RTL_W8(tp, MaxTxPacketSize, 0x3f);
        RTL_W8(tp, Config3, RTL_R8(tp, Config3) & ~Jumbo_En0);
        RTL_W8(tp, Config4, RTL_R8(tp, Config4) & ~0x01);
 }
@@ -4646,6 +4646,9 @@ static void rtl8169_down(struct rtl8169_private *tp)
 
        rtl8169_update_counters(tp);
 
+       pci_clear_master(tp->pci_dev);
+       rtl_pci_commit(tp);
+
        rtl8169_cleanup(tp, true);
 
        rtl_prepare_power_down(tp);
@@ -4653,6 +4656,7 @@ static void rtl8169_down(struct rtl8169_private *tp)
 
 static void rtl8169_up(struct rtl8169_private *tp)
 {
+       pci_set_master(tp->pci_dev);
        phy_resume(tp->phydev);
        rtl8169_init_phy(tp);
        napi_enable(&tp->napi);
@@ -5307,8 +5311,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
        rtl_hw_reset(tp);
 
-       pci_set_master(pdev);
-
        rc = rtl_alloc_irq(tp);
        if (rc < 0) {
                dev_err(&pdev->dev, "Can't allocate interrupt\n");
index 590b088..f029c7c 100644 (file)
@@ -560,6 +560,8 @@ static struct sh_eth_cpu_data r7s72100_data = {
                          EESR_TDE,
        .fdr_value      = 0x0000070f,
 
+       .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+
        .no_psr         = 1,
        .apr            = 1,
        .mpr            = 1,
@@ -780,6 +782,8 @@ static struct sh_eth_cpu_data r7s9210_data = {
 
        .fdr_value      = 0x0000070f,
 
+       .trscer_err_mask = DESC_I_RINT8 | DESC_I_RINT5,
+
        .apr            = 1,
        .mpr            = 1,
        .tpauser        = 1,
@@ -1089,6 +1093,9 @@ static struct sh_eth_cpu_data sh771x_data = {
                          EESIPR_CEEFIP | EESIPR_CELFIP |
                          EESIPR_RRFIP | EESIPR_RTLFIP | EESIPR_RTSFIP |
                          EESIPR_PREIP | EESIPR_CERFIP,
+
+       .trscer_err_mask = DESC_I_RINT8,
+
        .tsu            = 1,
        .dual_port      = 1,
 };
index 3c53051..200785e 100644 (file)
@@ -1715,14 +1715,17 @@ static int netsec_netdev_init(struct net_device *ndev)
                goto err1;
 
        /* set phy power down */
-       data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR) |
-               BMCR_PDOWN;
-       netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data);
+       data = netsec_phy_read(priv->mii_bus, priv->phy_addr, MII_BMCR);
+       netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR,
+                        data | BMCR_PDOWN);
 
        ret = netsec_reset_hardware(priv, true);
        if (ret)
                goto err2;
 
+       /* Restore phy power state */
+       netsec_phy_write(priv->mii_bus, priv->phy_addr, MII_BMCR, data);
+
        spin_lock_init(&priv->desc_ring[NETSEC_RING_TX].lock);
        spin_lock_init(&priv->desc_ring[NETSEC_RING_RX].lock);
 
index 751dfde..0b64f77 100644 (file)
@@ -233,6 +233,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 static int intel_mgbe_common_data(struct pci_dev *pdev,
                                  struct plat_stmmacenet_data *plat)
 {
+       char clk_name[20];
        int ret;
        int i;
 
@@ -301,8 +302,10 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
        plat->eee_usecs_rate = plat->clk_ptp_rate;
 
        /* Set system clock */
+       sprintf(clk_name, "%s-%s", "stmmac", pci_name(pdev));
+
        plat->stmmac_clk = clk_register_fixed_rate(&pdev->dev,
-                                                  "stmmac-clk", NULL, 0,
+                                                  clk_name, NULL, 0,
                                                   plat->clk_ptp_rate);
 
        if (IS_ERR(plat->stmmac_clk)) {
@@ -446,8 +449,8 @@ static int tgl_common_data(struct pci_dev *pdev,
        return intel_mgbe_common_data(pdev, plat);
 }
 
-static int tgl_sgmii_data(struct pci_dev *pdev,
-                         struct plat_stmmacenet_data *plat)
+static int tgl_sgmii_phy0_data(struct pci_dev *pdev,
+                              struct plat_stmmacenet_data *plat)
 {
        plat->bus_id = 1;
        plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
@@ -456,12 +459,26 @@ static int tgl_sgmii_data(struct pci_dev *pdev,
        return tgl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info tgl_sgmii1g_info = {
-       .setup = tgl_sgmii_data,
+static struct stmmac_pci_info tgl_sgmii1g_phy0_info = {
+       .setup = tgl_sgmii_phy0_data,
 };
 
-static int adls_sgmii_data(struct pci_dev *pdev,
-                          struct plat_stmmacenet_data *plat)
+static int tgl_sgmii_phy1_data(struct pci_dev *pdev,
+                              struct plat_stmmacenet_data *plat)
+{
+       plat->bus_id = 2;
+       plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+       plat->serdes_powerup = intel_serdes_powerup;
+       plat->serdes_powerdown = intel_serdes_powerdown;
+       return tgl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info tgl_sgmii1g_phy1_info = {
+       .setup = tgl_sgmii_phy1_data,
+};
+
+static int adls_sgmii_phy0_data(struct pci_dev *pdev,
+                               struct plat_stmmacenet_data *plat)
 {
        plat->bus_id = 1;
        plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
@@ -471,10 +488,24 @@ static int adls_sgmii_data(struct pci_dev *pdev,
        return tgl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info adls_sgmii1g_info = {
-       .setup = adls_sgmii_data,
+static struct stmmac_pci_info adls_sgmii1g_phy0_info = {
+       .setup = adls_sgmii_phy0_data,
 };
 
+static int adls_sgmii_phy1_data(struct pci_dev *pdev,
+                               struct plat_stmmacenet_data *plat)
+{
+       plat->bus_id = 2;
+       plat->phy_interface = PHY_INTERFACE_MODE_SGMII;
+
+       /* SerDes power up and power down are done in BIOS for ADL */
+
+       return tgl_common_data(pdev, plat);
+}
+
+static struct stmmac_pci_info adls_sgmii1g_phy1_info = {
+       .setup = adls_sgmii_phy1_data,
+};
 static const struct stmmac_pci_func_data galileo_stmmac_func_data[] = {
        {
                .func = 6,
@@ -756,11 +787,11 @@ static const struct pci_device_id intel_eth_pci_id_table[] = {
        { PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID, &ehl_pse1_rgmii1g_info) },
        { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) },
        { PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) },
-       { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) },
-       { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_info) },
-       { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_info) },
-       { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_info) },
-       { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_info) },
+       { PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_phy0_info) },
+       { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_0_ID, &tgl_sgmii1g_phy0_info) },
+       { PCI_DEVICE_DATA(INTEL, TGLH_SGMII1G_1_ID, &tgl_sgmii1g_phy1_info) },
+       { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_0_ID, &adls_sgmii1g_phy0_info) },
+       { PCI_DEVICE_DATA(INTEL, ADLS_SGMII1G_1_ID, &adls_sgmii1g_phy1_info) },
        {}
 };
 MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table);
index 6b75cf2..e62efd1 100644 (file)
@@ -1214,6 +1214,8 @@ static int sun8i_dwmac_probe(struct platform_device *pdev)
        plat_dat->init = sun8i_dwmac_init;
        plat_dat->exit = sun8i_dwmac_exit;
        plat_dat->setup = sun8i_dwmac_setup;
+       plat_dat->tx_fifo_size = 4096;
+       plat_dat->rx_fifo_size = 16384;
 
        ret = sun8i_dwmac_set_syscon(&pdev->dev, plat_dat);
        if (ret)
index b7a0c57..d23be45 100644 (file)
@@ -218,6 +218,7 @@ static int visconti_eth_dwmac_probe(struct platform_device *pdev)
                goto remove_config;
        }
 
+       spin_lock_init(&dwmac->lock);
        dwmac->reg = stmmac_res.addr;
        plat_dat->bsp_priv = dwmac;
        plat_dat->fix_mac_speed = visconti_eth_fix_mac_speed;
index c6540b0..cbf4429 100644 (file)
@@ -402,19 +402,53 @@ static void dwmac4_rd_set_tx_ic(struct dma_desc *p)
        p->des2 |= cpu_to_le32(TDES2_INTERRUPT_ON_COMPLETION);
 }
 
-static void dwmac4_display_ring(void *head, unsigned int size, bool rx)
+static void dwmac4_display_ring(void *head, unsigned int size, bool rx,
+                               dma_addr_t dma_rx_phy, unsigned int desc_size)
 {
-       struct dma_desc *p = (struct dma_desc *)head;
+       dma_addr_t dma_addr;
        int i;
 
        pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
 
-       for (i = 0; i < size; i++) {
-               pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-                       i, (unsigned int)virt_to_phys(p),
-                       le32_to_cpu(p->des0), le32_to_cpu(p->des1),
-                       le32_to_cpu(p->des2), le32_to_cpu(p->des3));
-               p++;
+       if (desc_size == sizeof(struct dma_desc)) {
+               struct dma_desc *p = (struct dma_desc *)head;
+
+               for (i = 0; i < size; i++) {
+                       dma_addr = dma_rx_phy + i * sizeof(*p);
+                       pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+                               i, &dma_addr,
+                               le32_to_cpu(p->des0), le32_to_cpu(p->des1),
+                               le32_to_cpu(p->des2), le32_to_cpu(p->des3));
+                       p++;
+               }
+       } else if (desc_size == sizeof(struct dma_extended_desc)) {
+               struct dma_extended_desc *extp = (struct dma_extended_desc *)head;
+
+               for (i = 0; i < size; i++) {
+                       dma_addr = dma_rx_phy + i * sizeof(*extp);
+                       pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+                               i, &dma_addr,
+                               le32_to_cpu(extp->basic.des0), le32_to_cpu(extp->basic.des1),
+                               le32_to_cpu(extp->basic.des2), le32_to_cpu(extp->basic.des3),
+                               le32_to_cpu(extp->des4), le32_to_cpu(extp->des5),
+                               le32_to_cpu(extp->des6), le32_to_cpu(extp->des7));
+                       extp++;
+               }
+       } else if (desc_size == sizeof(struct dma_edesc)) {
+               struct dma_edesc *ep = (struct dma_edesc *)head;
+
+               for (i = 0; i < size; i++) {
+                       dma_addr = dma_rx_phy + i * sizeof(*ep);
+                       pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x 0x%x\n",
+                               i, &dma_addr,
+                               le32_to_cpu(ep->des4), le32_to_cpu(ep->des5),
+                               le32_to_cpu(ep->des6), le32_to_cpu(ep->des7),
+                               le32_to_cpu(ep->basic.des0), le32_to_cpu(ep->basic.des1),
+                               le32_to_cpu(ep->basic.des2), le32_to_cpu(ep->basic.des3));
+                       ep++;
+               }
+       } else {
+               pr_err("unsupported descriptor!");
        }
 }
 
@@ -499,10 +533,15 @@ static void dwmac4_get_rx_header_len(struct dma_desc *p, unsigned int *len)
        *len = le32_to_cpu(p->des2) & RDES2_HL;
 }
 
-static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
+static void dwmac4_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool buf2_valid)
 {
        p->des2 = cpu_to_le32(lower_32_bits(addr));
-       p->des3 = cpu_to_le32(upper_32_bits(addr) | RDES3_BUFFER2_VALID_ADDR);
+       p->des3 = cpu_to_le32(upper_32_bits(addr));
+
+       if (buf2_valid)
+               p->des3 |= cpu_to_le32(RDES3_BUFFER2_VALID_ADDR);
+       else
+               p->des3 &= cpu_to_le32(~RDES3_BUFFER2_VALID_ADDR);
 }
 
 static void dwmac4_set_tbs(struct dma_edesc *p, u32 sec, u32 nsec)
index bb29bfc..62aa0e9 100644 (file)
@@ -124,6 +124,23 @@ static void dwmac4_dma_init_channel(void __iomem *ioaddr,
               ioaddr + DMA_CHAN_INTR_ENA(chan));
 }
 
+static void dwmac410_dma_init_channel(void __iomem *ioaddr,
+                                     struct stmmac_dma_cfg *dma_cfg, u32 chan)
+{
+       u32 value;
+
+       /* common channel control register config */
+       value = readl(ioaddr + DMA_CHAN_CONTROL(chan));
+       if (dma_cfg->pblx8)
+               value = value | DMA_BUS_MODE_PBL;
+
+       writel(value, ioaddr + DMA_CHAN_CONTROL(chan));
+
+       /* Mask interrupts by writing to CSR7 */
+       writel(DMA_CHAN_INTR_DEFAULT_MASK_4_10,
+              ioaddr + DMA_CHAN_INTR_ENA(chan));
+}
+
 static void dwmac4_dma_init(void __iomem *ioaddr,
                            struct stmmac_dma_cfg *dma_cfg, int atds)
 {
@@ -523,7 +540,7 @@ const struct stmmac_dma_ops dwmac4_dma_ops = {
 const struct stmmac_dma_ops dwmac410_dma_ops = {
        .reset = dwmac4_dma_reset,
        .init = dwmac4_dma_init,
-       .init_chan = dwmac4_dma_init_channel,
+       .init_chan = dwmac410_dma_init_channel,
        .init_rx_chan = dwmac4_dma_init_rx_chan,
        .init_tx_chan = dwmac4_dma_init_tx_chan,
        .axi = dwmac4_dma_axi,
index 0b4ee2d..71e5075 100644 (file)
@@ -53,10 +53,6 @@ void dwmac4_dma_stop_tx(void __iomem *ioaddr, u32 chan)
 
        value &= ~DMA_CONTROL_ST;
        writel(value, ioaddr + DMA_CHAN_TX_CONTROL(chan));
-
-       value = readl(ioaddr + GMAC_CONFIG);
-       value &= ~GMAC_CONFIG_TE;
-       writel(value, ioaddr + GMAC_CONFIG);
 }
 
 void dwmac4_dma_start_rx(void __iomem *ioaddr, u32 chan)
index 0aaf19a..ccfb010 100644 (file)
@@ -292,7 +292,7 @@ static void dwxgmac2_get_rx_header_len(struct dma_desc *p, unsigned int *len)
                *len = le32_to_cpu(p->des2) & XGMAC_RDES2_HL;
 }
 
-static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr)
+static void dwxgmac2_set_sec_addr(struct dma_desc *p, dma_addr_t addr, bool is_valid)
 {
        p->des2 = cpu_to_le32(lower_32_bits(addr));
        p->des3 = cpu_to_le32(upper_32_bits(addr));
index d02cec2..6650edf 100644 (file)
@@ -417,19 +417,22 @@ static int enh_desc_get_rx_timestamp_status(void *desc, void *next_desc,
        }
 }
 
-static void enh_desc_display_ring(void *head, unsigned int size, bool rx)
+static void enh_desc_display_ring(void *head, unsigned int size, bool rx,
+                                 dma_addr_t dma_rx_phy, unsigned int desc_size)
 {
        struct dma_extended_desc *ep = (struct dma_extended_desc *)head;
+       dma_addr_t dma_addr;
        int i;
 
        pr_info("Extended %s descriptor ring:\n", rx ? "RX" : "TX");
 
        for (i = 0; i < size; i++) {
                u64 x;
+               dma_addr = dma_rx_phy + i * sizeof(*ep);
 
                x = *(u64 *)ep;
-               pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-                       i, (unsigned int)virt_to_phys(ep),
+               pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+                       i, &dma_addr,
                        (unsigned int)x, (unsigned int)(x >> 32),
                        ep->basic.des2, ep->basic.des3);
                ep++;
index b40b2e0..979ac9f 100644 (file)
@@ -78,7 +78,8 @@ struct stmmac_desc_ops {
        /* get rx timestamp status */
        int (*get_rx_timestamp_status)(void *desc, void *next_desc, u32 ats);
        /* Display ring */
-       void (*display_ring)(void *head, unsigned int size, bool rx);
+       void (*display_ring)(void *head, unsigned int size, bool rx,
+                            dma_addr_t dma_rx_phy, unsigned int desc_size);
        /* set MSS via context descriptor */
        void (*set_mss)(struct dma_desc *p, unsigned int mss);
        /* get descriptor skbuff address */
@@ -91,7 +92,7 @@ struct stmmac_desc_ops {
        int (*get_rx_hash)(struct dma_desc *p, u32 *hash,
                           enum pkt_hash_types *type);
        void (*get_rx_header_len)(struct dma_desc *p, unsigned int *len);
-       void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr);
+       void (*set_sec_addr)(struct dma_desc *p, dma_addr_t addr, bool buf2_valid);
        void (*set_sarc)(struct dma_desc *p, u32 sarc_type);
        void (*set_vlan_tag)(struct dma_desc *p, u16 tag, u16 inner_tag,
                             u32 inner_type);
index f083360..98ef43f 100644 (file)
@@ -269,19 +269,22 @@ static int ndesc_get_rx_timestamp_status(void *desc, void *next_desc, u32 ats)
                return 1;
 }
 
-static void ndesc_display_ring(void *head, unsigned int size, bool rx)
+static void ndesc_display_ring(void *head, unsigned int size, bool rx,
+                              dma_addr_t dma_rx_phy, unsigned int desc_size)
 {
        struct dma_desc *p = (struct dma_desc *)head;
+       dma_addr_t dma_addr;
        int i;
 
        pr_info("%s descriptor ring:\n", rx ? "RX" : "TX");
 
        for (i = 0; i < size; i++) {
                u64 x;
+               dma_addr = dma_rx_phy + i * sizeof(*p);
 
                x = *(u64 *)p;
-               pr_info("%03d [0x%x]: 0x%x 0x%x 0x%x 0x%x",
-                       i, (unsigned int)virt_to_phys(p),
+               pr_info("%03d [%pad]: 0x%x 0x%x 0x%x 0x%x",
+                       i, &dma_addr,
                        (unsigned int)x, (unsigned int)(x >> 32),
                        p->des2, p->des3);
                p++;
index 26b971c..208cae3 100644 (file)
@@ -1133,6 +1133,7 @@ static int stmmac_phy_setup(struct stmmac_priv *priv)
 static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 {
        u32 rx_cnt = priv->plat->rx_queues_to_use;
+       unsigned int desc_size;
        void *head_rx;
        u32 queue;
 
@@ -1142,19 +1143,24 @@ static void stmmac_display_rx_rings(struct stmmac_priv *priv)
 
                pr_info("\tRX Queue %u rings\n", queue);
 
-               if (priv->extend_desc)
+               if (priv->extend_desc) {
                        head_rx = (void *)rx_q->dma_erx;
-               else
+                       desc_size = sizeof(struct dma_extended_desc);
+               } else {
                        head_rx = (void *)rx_q->dma_rx;
+                       desc_size = sizeof(struct dma_desc);
+               }
 
                /* Display RX ring */
-               stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true);
+               stmmac_display_ring(priv, head_rx, priv->dma_rx_size, true,
+                                   rx_q->dma_rx_phy, desc_size);
        }
 }
 
 static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 {
        u32 tx_cnt = priv->plat->tx_queues_to_use;
+       unsigned int desc_size;
        void *head_tx;
        u32 queue;
 
@@ -1164,14 +1170,19 @@ static void stmmac_display_tx_rings(struct stmmac_priv *priv)
 
                pr_info("\tTX Queue %d rings\n", queue);
 
-               if (priv->extend_desc)
+               if (priv->extend_desc) {
                        head_tx = (void *)tx_q->dma_etx;
-               else if (tx_q->tbs & STMMAC_TBS_AVAIL)
+                       desc_size = sizeof(struct dma_extended_desc);
+               } else if (tx_q->tbs & STMMAC_TBS_AVAIL) {
                        head_tx = (void *)tx_q->dma_entx;
-               else
+                       desc_size = sizeof(struct dma_edesc);
+               } else {
                        head_tx = (void *)tx_q->dma_tx;
+                       desc_size = sizeof(struct dma_desc);
+               }
 
-               stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false);
+               stmmac_display_ring(priv, head_tx, priv->dma_tx_size, false,
+                                   tx_q->dma_tx_phy, desc_size);
        }
 }
 
@@ -1303,9 +1314,10 @@ static int stmmac_init_rx_buffers(struct stmmac_priv *priv, struct dma_desc *p,
                        return -ENOMEM;
 
                buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
-               stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+               stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
        } else {
                buf->sec_page = NULL;
+               stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
        }
 
        buf->addr = page_pool_get_dma_addr(buf->page);
@@ -1368,6 +1380,88 @@ static void stmmac_free_tx_buffer(struct stmmac_priv *priv, u32 queue, int i)
 }
 
 /**
+ * stmmac_reinit_rx_buffers - reinit the RX descriptor buffer.
+ * @priv: driver private structure
+ * Description: this function is called to re-allocate a receive buffer, perform
+ * the DMA mapping and init the descriptor.
+ */
+static void stmmac_reinit_rx_buffers(struct stmmac_priv *priv)
+{
+       u32 rx_count = priv->plat->rx_queues_to_use;
+       u32 queue;
+       int i;
+
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               for (i = 0; i < priv->dma_rx_size; i++) {
+                       struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+
+                       if (buf->page) {
+                               page_pool_recycle_direct(rx_q->page_pool, buf->page);
+                               buf->page = NULL;
+                       }
+
+                       if (priv->sph && buf->sec_page) {
+                               page_pool_recycle_direct(rx_q->page_pool, buf->sec_page);
+                               buf->sec_page = NULL;
+                       }
+               }
+       }
+
+       for (queue = 0; queue < rx_count; queue++) {
+               struct stmmac_rx_queue *rx_q = &priv->rx_queue[queue];
+
+               for (i = 0; i < priv->dma_rx_size; i++) {
+                       struct stmmac_rx_buffer *buf = &rx_q->buf_pool[i];
+                       struct dma_desc *p;
+
+                       if (priv->extend_desc)
+                               p = &((rx_q->dma_erx + i)->basic);
+                       else
+                               p = rx_q->dma_rx + i;
+
+                       if (!buf->page) {
+                               buf->page = page_pool_dev_alloc_pages(rx_q->page_pool);
+                               if (!buf->page)
+                                       goto err_reinit_rx_buffers;
+
+                               buf->addr = page_pool_get_dma_addr(buf->page);
+                       }
+
+                       if (priv->sph && !buf->sec_page) {
+                               buf->sec_page = page_pool_dev_alloc_pages(rx_q->page_pool);
+                               if (!buf->sec_page)
+                                       goto err_reinit_rx_buffers;
+
+                               buf->sec_addr = page_pool_get_dma_addr(buf->sec_page);
+                       }
+
+                       stmmac_set_desc_addr(priv, p, buf->addr);
+                       if (priv->sph)
+                               stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
+                       else
+                               stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
+                       if (priv->dma_buf_sz == BUF_SIZE_16KiB)
+                               stmmac_init_desc3(priv, p);
+               }
+       }
+
+       return;
+
+err_reinit_rx_buffers:
+       do {
+               while (--i >= 0)
+                       stmmac_free_rx_buffer(priv, queue, i);
+
+               if (queue == 0)
+                       break;
+
+               i = priv->dma_rx_size;
+       } while (queue-- > 0);
+}
+
+/**
  * init_dma_rx_desc_rings - init the RX descriptor rings
  * @dev: net device structure
  * @flags: gfp flag.
@@ -3648,7 +3742,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv *priv, u32 queue)
                                           DMA_FROM_DEVICE);
 
                stmmac_set_desc_addr(priv, p, buf->addr);
-               stmmac_set_desc_sec_addr(priv, p, buf->sec_addr);
+               if (priv->sph)
+                       stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, true);
+               else
+                       stmmac_set_desc_sec_addr(priv, p, buf->sec_addr, false);
                stmmac_refill_desc3(priv, rx_q, p);
 
                rx_q->rx_count_frames++;
@@ -3736,18 +3833,23 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit, u32 queue)
        unsigned int count = 0, error = 0, len = 0;
        int status = 0, coe = priv->hw->rx_csum;
        unsigned int next_entry = rx_q->cur_rx;
+       unsigned int desc_size;
        struct sk_buff *skb = NULL;
 
        if (netif_msg_rx_status(priv)) {
                void *rx_head;
 
                netdev_dbg(priv->dev, "%s: descriptor ring:\n", __func__);
-               if (priv->extend_desc)
+               if (priv->extend_desc) {
                        rx_head = (void *)rx_q->dma_erx;
-               else
+                       desc_size = sizeof(struct dma_extended_desc);
+               } else {
                        rx_head = (void *)rx_q->dma_rx;
+                       desc_size = sizeof(struct dma_desc);
+               }
 
-               stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true);
+               stmmac_display_ring(priv, rx_head, priv->dma_rx_size, true,
+                                   rx_q->dma_rx_phy, desc_size);
        }
        while (count < limit) {
                unsigned int buf1_len = 0, buf2_len = 0;
@@ -4315,24 +4417,27 @@ static int stmmac_set_mac_address(struct net_device *ndev, void *addr)
 static struct dentry *stmmac_fs_dir;
 
 static void sysfs_display_ring(void *head, int size, int extend_desc,
-                              struct seq_file *seq)
+                              struct seq_file *seq, dma_addr_t dma_phy_addr)
 {
        int i;
        struct dma_extended_desc *ep = (struct dma_extended_desc *)head;
        struct dma_desc *p = (struct dma_desc *)head;
+       dma_addr_t dma_addr;
 
        for (i = 0; i < size; i++) {
                if (extend_desc) {
-                       seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-                                  i, (unsigned int)virt_to_phys(ep),
+                       dma_addr = dma_phy_addr + i * sizeof(*ep);
+                       seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+                                  i, &dma_addr,
                                   le32_to_cpu(ep->basic.des0),
                                   le32_to_cpu(ep->basic.des1),
                                   le32_to_cpu(ep->basic.des2),
                                   le32_to_cpu(ep->basic.des3));
                        ep++;
                } else {
-                       seq_printf(seq, "%d [0x%x]: 0x%x 0x%x 0x%x 0x%x\n",
-                                  i, (unsigned int)virt_to_phys(p),
+                       dma_addr = dma_phy_addr + i * sizeof(*p);
+                       seq_printf(seq, "%d [%pad]: 0x%x 0x%x 0x%x 0x%x\n",
+                                  i, &dma_addr,
                                   le32_to_cpu(p->des0), le32_to_cpu(p->des1),
                                   le32_to_cpu(p->des2), le32_to_cpu(p->des3));
                        p++;
@@ -4360,11 +4465,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
                if (priv->extend_desc) {
                        seq_printf(seq, "Extended descriptor ring:\n");
                        sysfs_display_ring((void *)rx_q->dma_erx,
-                                          priv->dma_rx_size, 1, seq);
+                                          priv->dma_rx_size, 1, seq, rx_q->dma_rx_phy);
                } else {
                        seq_printf(seq, "Descriptor ring:\n");
                        sysfs_display_ring((void *)rx_q->dma_rx,
-                                          priv->dma_rx_size, 0, seq);
+                                          priv->dma_rx_size, 0, seq, rx_q->dma_rx_phy);
                }
        }
 
@@ -4376,11 +4481,11 @@ static int stmmac_rings_status_show(struct seq_file *seq, void *v)
                if (priv->extend_desc) {
                        seq_printf(seq, "Extended descriptor ring:\n");
                        sysfs_display_ring((void *)tx_q->dma_etx,
-                                          priv->dma_tx_size, 1, seq);
+                                          priv->dma_tx_size, 1, seq, tx_q->dma_tx_phy);
                } else if (!(tx_q->tbs & STMMAC_TBS_AVAIL)) {
                        seq_printf(seq, "Descriptor ring:\n");
                        sysfs_display_ring((void *)tx_q->dma_tx,
-                                          priv->dma_tx_size, 0, seq);
+                                          priv->dma_tx_size, 0, seq, tx_q->dma_tx_phy);
                }
        }
 
@@ -5144,13 +5249,16 @@ int stmmac_dvr_remove(struct device *dev)
        netdev_info(priv->dev, "%s: removing driver", __func__);
 
        stmmac_stop_all_dma(priv);
+       stmmac_mac_set(priv, priv->ioaddr, false);
+       netif_carrier_off(ndev);
+       unregister_netdev(ndev);
 
+       /* Serdes power down needs to happen after VLAN filter
+        * is deleted that is triggered by unregister_netdev().
+        */
        if (priv->plat->serdes_powerdown)
                priv->plat->serdes_powerdown(ndev, priv->plat->bsp_priv);
 
-       stmmac_mac_set(priv, priv->ioaddr, false);
-       netif_carrier_off(ndev);
-       unregister_netdev(ndev);
 #ifdef CONFIG_DEBUG_FS
        stmmac_exit_fs(ndev);
 #endif
@@ -5257,6 +5365,8 @@ static void stmmac_reset_queues_param(struct stmmac_priv *priv)
                tx_q->cur_tx = 0;
                tx_q->dirty_tx = 0;
                tx_q->mss = 0;
+
+               netdev_tx_reset_queue(netdev_get_tx_queue(priv->dev, queue));
        }
 }
 
@@ -5318,7 +5428,7 @@ int stmmac_resume(struct device *dev)
        mutex_lock(&priv->lock);
 
        stmmac_reset_queues_param(priv);
-
+       stmmac_reinit_rx_buffers(priv);
        stmmac_free_tx_skbufs(priv);
        stmmac_clear_descriptors(priv);
 
index 5698554..44bb133 100644 (file)
@@ -316,6 +316,32 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
        if (!priv->dma_cap.av)
                return -EOPNOTSUPP;
 
+       /* Port Transmit Rate and Speed Divider */
+       switch (priv->speed) {
+       case SPEED_10000:
+               ptr = 32;
+               speed_div = 10000000;
+               break;
+       case SPEED_5000:
+               ptr = 32;
+               speed_div = 5000000;
+               break;
+       case SPEED_2500:
+               ptr = 8;
+               speed_div = 2500000;
+               break;
+       case SPEED_1000:
+               ptr = 8;
+               speed_div = 1000000;
+               break;
+       case SPEED_100:
+               ptr = 4;
+               speed_div = 100000;
+               break;
+       default:
+               return -EOPNOTSUPP;
+       }
+
        mode_to_use = priv->plat->tx_queues_cfg[queue].mode_to_use;
        if (mode_to_use == MTL_QUEUE_DCB && qopt->enable) {
                ret = stmmac_dma_qmode(priv, priv->ioaddr, queue, MTL_QUEUE_AVB);
@@ -332,10 +358,6 @@ static int tc_setup_cbs(struct stmmac_priv *priv,
                priv->plat->tx_queues_cfg[queue].mode_to_use = MTL_QUEUE_DCB;
        }
 
-       /* Port Transmit Rate and Speed Divider */
-       ptr = (priv->speed == SPEED_100) ? 4 : 8;
-       speed_div = (priv->speed == SPEED_100) ? 100000 : 1000000;
-
        /* Final adjustments for HW */
        value = div_s64(qopt->idleslope * 1024ll * ptr, speed_div);
        priv->plat->tx_queues_cfg[queue].idle_slope = value & GENMASK(31, 0);
index 68695d4..707ccdd 100644 (file)
@@ -3931,8 +3931,6 @@ static void niu_xmac_interrupt(struct niu *np)
                mp->rx_mcasts += RXMAC_MC_FRM_CNT_COUNT;
        if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP)
                mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT;
-       if (val & XRXMAC_STATUS_RXBCAST_CNT_EXP)
-               mp->rx_bcasts += RXMAC_BC_FRM_CNT_COUNT;
        if (val & XRXMAC_STATUS_RXHIST1_CNT_EXP)
                mp->rx_hist_cnt1 += RXMAC_HIST_CNT1_COUNT;
        if (val & XRXMAC_STATUS_RXHIST2_CNT_EXP)
index b8f4f41..d054c6e 100644 (file)
@@ -2044,6 +2044,7 @@ bdx_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                /*bdx_hw_reset(priv); */
                if (bdx_read_mac(priv)) {
                        pr_err("load MAC address failed\n");
+                       err = -EFAULT;
                        goto err_out_iomap;
                }
                SET_NETDEV_DEV(ndev, &pdev->dev);
index 3a8775e..5d677db 100644 (file)
@@ -1880,7 +1880,7 @@ static int axienet_probe(struct platform_device *pdev)
        if (IS_ERR(lp->regs)) {
                dev_err(&pdev->dev, "could not map Axi Ethernet regs.\n");
                ret = PTR_ERR(lp->regs);
-               goto free_netdev;
+               goto cleanup_clk;
        }
        lp->regs_start = ethres->start;
 
@@ -1958,18 +1958,18 @@ static int axienet_probe(struct platform_device *pdev)
                        break;
                default:
                        ret = -EINVAL;
-                       goto free_netdev;
+                       goto cleanup_clk;
                }
        } else {
                ret = of_get_phy_mode(pdev->dev.of_node, &lp->phy_mode);
                if (ret)
-                       goto free_netdev;
+                       goto cleanup_clk;
        }
        if (lp->switch_x_sgmii && lp->phy_mode != PHY_INTERFACE_MODE_SGMII &&
            lp->phy_mode != PHY_INTERFACE_MODE_1000BASEX) {
                dev_err(&pdev->dev, "xlnx,switch-x-sgmii only supported with SGMII or 1000BaseX\n");
                ret = -EINVAL;
-               goto free_netdev;
+               goto cleanup_clk;
        }
 
        /* Find the DMA node, map the DMA registers, and decode the DMA IRQs */
@@ -1982,7 +1982,7 @@ static int axienet_probe(struct platform_device *pdev)
                        dev_err(&pdev->dev,
                                "unable to get DMA resource\n");
                        of_node_put(np);
-                       goto free_netdev;
+                       goto cleanup_clk;
                }
                lp->dma_regs = devm_ioremap_resource(&pdev->dev,
                                                     &dmares);
@@ -2002,12 +2002,12 @@ static int axienet_probe(struct platform_device *pdev)
        if (IS_ERR(lp->dma_regs)) {
                dev_err(&pdev->dev, "could not map DMA regs\n");
                ret = PTR_ERR(lp->dma_regs);
-               goto free_netdev;
+               goto cleanup_clk;
        }
        if ((lp->rx_irq <= 0) || (lp->tx_irq <= 0)) {
                dev_err(&pdev->dev, "could not determine irqs\n");
                ret = -ENOMEM;
-               goto free_netdev;
+               goto cleanup_clk;
        }
 
        /* Autodetect the need for 64-bit DMA pointers.
@@ -2037,7 +2037,7 @@ static int axienet_probe(struct platform_device *pdev)
        ret = dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(addr_width));
        if (ret) {
                dev_err(&pdev->dev, "No suitable DMA available\n");
-               goto free_netdev;
+               goto cleanup_clk;
        }
 
        /* Check for Ethernet core IRQ (optional) */
@@ -2068,12 +2068,12 @@ static int axienet_probe(struct platform_device *pdev)
                if (!lp->phy_node) {
                        dev_err(&pdev->dev, "phy-handle required for 1000BaseX/SGMII\n");
                        ret = -EINVAL;
-                       goto free_netdev;
+                       goto cleanup_mdio;
                }
                lp->pcs_phy = of_mdio_find_device(lp->phy_node);
                if (!lp->pcs_phy) {
                        ret = -EPROBE_DEFER;
-                       goto free_netdev;
+                       goto cleanup_mdio;
                }
                lp->phylink_config.pcs_poll = true;
        }
@@ -2087,17 +2087,30 @@ static int axienet_probe(struct platform_device *pdev)
        if (IS_ERR(lp->phylink)) {
                ret = PTR_ERR(lp->phylink);
                dev_err(&pdev->dev, "phylink_create error (%i)\n", ret);
-               goto free_netdev;
+               goto cleanup_mdio;
        }
 
        ret = register_netdev(lp->ndev);
        if (ret) {
                dev_err(lp->dev, "register_netdev() error (%i)\n", ret);
-               goto free_netdev;
+               goto cleanup_phylink;
        }
 
        return 0;
 
+cleanup_phylink:
+       phylink_destroy(lp->phylink);
+
+cleanup_mdio:
+       if (lp->pcs_phy)
+               put_device(&lp->pcs_phy->dev);
+       if (lp->mii_bus)
+               axienet_mdio_teardown(lp);
+       of_node_put(lp->phy_node);
+
+cleanup_clk:
+       clk_disable_unprepare(lp->clk);
+
 free_netdev:
        free_netdev(ndev);
 
index 9a70f05..39c00f0 100644 (file)
@@ -543,7 +543,6 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev,
        if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) &&
            mtu < ntohs(iph->tot_len)) {
                netdev_dbg(dev, "packet too big, fragmentation needed\n");
-               memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
                icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
                              htonl(mtu));
                goto err_rt;
index 71d6629..9f5b561 100644 (file)
@@ -171,11 +171,6 @@ static void sp_encaps(struct sixpack *sp, unsigned char *icp, int len)
                goto out_drop;
        }
 
-       if (len > sp->mtu) {    /* sp->mtu = AX25_MTU = max. PACLEN = 256 */
-               msg = "oversized transmit packet!";
-               goto out_drop;
-       }
-
        if (p[0] > 5) {
                msg = "invalid KISS command";
                goto out_drop;
index 36eeb80..4690c6a 100644 (file)
@@ -2167,7 +2167,6 @@ static void __exit scc_cleanup_driver(void)
 
 MODULE_AUTHOR("Joerg Reuter <jreuter@yaina.de>");
 MODULE_DESCRIPTION("AX.25 Device Driver for Z8530 based HDLC cards");
-MODULE_SUPPORTED_DEVICE("Z8530 based SCC cards for Amateur Radio");
 MODULE_LICENSE("GPL");
 module_init(scc_init_driver);
 module_exit(scc_cleanup_driver);
index e1a497d..59ac04a 100644 (file)
@@ -229,7 +229,7 @@ int netvsc_send(struct net_device *net,
                bool xdp_tx);
 void netvsc_linkstatus_callback(struct net_device *net,
                                struct rndis_message *resp,
-                               void *data);
+                               void *data, u32 data_buflen);
 int netvsc_recv_callback(struct net_device *net,
                         struct netvsc_device *nvdev,
                         struct netvsc_channel *nvchan);
index 8176fa0..15f262b 100644 (file)
@@ -744,7 +744,7 @@ static netdev_tx_t netvsc_start_xmit(struct sk_buff *skb,
  */
 void netvsc_linkstatus_callback(struct net_device *net,
                                struct rndis_message *resp,
-                               void *data)
+                               void *data, u32 data_buflen)
 {
        struct rndis_indicate_status *indicate = &resp->msg.indicate_status;
        struct net_device_context *ndev_ctx = netdev_priv(net);
@@ -765,11 +765,16 @@ void netvsc_linkstatus_callback(struct net_device *net,
        if (indicate->status == RNDIS_STATUS_LINK_SPEED_CHANGE) {
                u32 speed;
 
-               /* Validate status_buf_offset */
+               /* Validate status_buf_offset and status_buflen.
+                *
+                * Certain (pre-Fe) implementations of Hyper-V's vSwitch didn't account
+                * for the status buffer field in resp->msg_len; perform the validation
+                * using data_buflen (>= resp->msg_len).
+                */
                if (indicate->status_buflen < sizeof(speed) ||
                    indicate->status_buf_offset < sizeof(*indicate) ||
-                   resp->msg_len - RNDIS_HEADER_SIZE < indicate->status_buf_offset ||
-                   resp->msg_len - RNDIS_HEADER_SIZE - indicate->status_buf_offset
+                   data_buflen - RNDIS_HEADER_SIZE < indicate->status_buf_offset ||
+                   data_buflen - RNDIS_HEADER_SIZE - indicate->status_buf_offset
                                < indicate->status_buflen) {
                        netdev_err(net, "invalid rndis_indicate_status packet\n");
                        return;
index 123cc9d..c0e89e1 100644 (file)
@@ -620,7 +620,7 @@ int rndis_filter_receive(struct net_device *ndev,
 
        case RNDIS_MSG_INDICATE:
                /* notification msgs */
-               netvsc_linkstatus_callback(ndev, rndis_msg, data);
+               netvsc_linkstatus_callback(ndev, rndis_msg, data, buflen);
                break;
        default:
                netdev_err(ndev,
index 35e3585..d73b03a 100644 (file)
@@ -175,21 +175,23 @@ bool ipa_cmd_table_valid(struct ipa *ipa, const struct ipa_mem *mem,
                            : field_max(IP_FLTRT_FLAGS_NHASH_ADDR_FMASK);
        if (mem->offset > offset_max ||
            ipa->mem_offset > offset_max - mem->offset) {
-               dev_err(dev, "IPv%c %s%s table region offset too large "
-                             "(0x%04x + 0x%04x > 0x%04x)\n",
-                             ipv6 ? '6' : '4', hashed ? "hashed " : "",
-                             route ? "route" : "filter",
-                             ipa->mem_offset, mem->offset, offset_max);
+               dev_err(dev, "IPv%c %s%s table region offset too large\n",
+                       ipv6 ? '6' : '4', hashed ? "hashed " : "",
+                       route ? "route" : "filter");
+               dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
+                       ipa->mem_offset, mem->offset, offset_max);
+
                return false;
        }
 
        if (mem->offset > ipa->mem_size ||
            mem->size > ipa->mem_size - mem->offset) {
-               dev_err(dev, "IPv%c %s%s table region out of range "
-                             "(0x%04x + 0x%04x > 0x%04x)\n",
-                             ipv6 ? '6' : '4', hashed ? "hashed " : "",
-                             route ? "route" : "filter",
-                             mem->offset, mem->size, ipa->mem_size);
+               dev_err(dev, "IPv%c %s%s table region out of range\n",
+                       ipv6 ? '6' : '4', hashed ? "hashed " : "",
+                       route ? "route" : "filter");
+               dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
+                       mem->offset, mem->size, ipa->mem_size);
+
                return false;
        }
 
@@ -205,22 +207,36 @@ static bool ipa_cmd_header_valid(struct ipa *ipa)
        u32 size_max;
        u32 size;
 
+       /* In ipa_cmd_hdr_init_local_add() we record the offset and size
+        * of the header table memory area.  Make sure the offset and size
+        * fit in the fields that need to hold them, and that the entire
+        * range is within the overall IPA memory range.
+        */
        offset_max = field_max(HDR_INIT_LOCAL_FLAGS_HDR_ADDR_FMASK);
        if (mem->offset > offset_max ||
            ipa->mem_offset > offset_max - mem->offset) {
-               dev_err(dev, "header table region offset too large "
-                             "(0x%04x + 0x%04x > 0x%04x)\n",
-                             ipa->mem_offset + mem->offset, offset_max);
+               dev_err(dev, "header table region offset too large\n");
+               dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
+                       ipa->mem_offset, mem->offset, offset_max);
+
                return false;
        }
 
        size_max = field_max(HDR_INIT_LOCAL_FLAGS_TABLE_SIZE_FMASK);
        size = ipa->mem[IPA_MEM_MODEM_HEADER].size;
        size += ipa->mem[IPA_MEM_AP_HEADER].size;
-       if (mem->offset > ipa->mem_size || size > ipa->mem_size - mem->offset) {
-               dev_err(dev, "header table region out of range "
-                             "(0x%04x + 0x%04x > 0x%04x)\n",
-                             mem->offset, size, ipa->mem_size);
+
+       if (size > size_max) {
+               dev_err(dev, "header table region size too large\n");
+               dev_err(dev, "    (0x%04x > 0x%08x)\n", size, size_max);
+
+               return false;
+       }
+       if (size > ipa->mem_size || mem->offset > ipa->mem_size - size) {
+               dev_err(dev, "header table region out of range\n");
+               dev_err(dev, "    (0x%04x + 0x%04x > 0x%04x)\n",
+                       mem->offset, size, ipa->mem_size);
+
                return false;
        }
 
index 2fc6448..e594bf3 100644 (file)
@@ -249,6 +249,7 @@ static const struct qmi_msg_handler ipa_server_msg_handlers[] = {
                .decoded_size   = IPA_QMI_DRIVER_INIT_COMPLETE_REQ_SZ,
                .fn             = ipa_server_driver_init_complete,
        },
+       { },
 };
 
 /* Handle an INIT_DRIVER response message from the modem. */
@@ -269,6 +270,7 @@ static const struct qmi_msg_handler ipa_client_msg_handlers[] = {
                .decoded_size   = IPA_QMI_INIT_DRIVER_RSP_SZ,
                .fn             = ipa_client_init_driver,
        },
+       { },
 };
 
 /* Return a pointer to an init modem driver request structure, which contains
index aec9244..659d3dc 100644 (file)
@@ -294,6 +294,7 @@ nsim_create(struct nsim_dev *nsim_dev, struct nsim_dev_port *nsim_dev_port)
        dev_net_set(dev, nsim_dev_net(nsim_dev));
        ns = netdev_priv(dev);
        ns->netdev = dev;
+       u64_stats_init(&ns->syncp);
        ns->nsim_dev = nsim_dev;
        ns->nsim_dev_port = nsim_dev_port;
        ns->nsim_bus_dev = nsim_dev->nsim_bus_dev;
index fa0be59..82fe5f4 100644 (file)
@@ -342,6 +342,10 @@ static int bcm54xx_config_init(struct phy_device *phydev)
        bcm54xx_adjust_rxrefclk(phydev);
 
        switch (BRCM_PHY_MODEL(phydev)) {
+       case PHY_ID_BCM50610:
+       case PHY_ID_BCM50610M:
+               err = bcm54xx_config_clock_delay(phydev);
+               break;
        case PHY_ID_BCM54210E:
                err = bcm54210e_config_init(phydev);
                break;
@@ -399,6 +403,11 @@ static int bcm54xx_resume(struct phy_device *phydev)
        if (ret < 0)
                return ret;
 
+       /* Upon exiting power down, the PHY remains in an internal reset state
+        * for 40us
+        */
+       fsleep(40);
+
        return bcm54xx_config_init(phydev);
 }
 
index be1224b..f7a2ec1 100644 (file)
@@ -290,6 +290,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
 
 static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
 {
+       bool trigger_machine = false;
        int irq_status;
 
        /* The MISR1 and MISR2 registers are holding the interrupt status in
@@ -305,7 +306,7 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
                return IRQ_NONE;
        }
        if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-               goto trigger_machine;
+               trigger_machine = true;
 
        irq_status = phy_read(phydev, MII_DP83822_MISR2);
        if (irq_status < 0) {
@@ -313,11 +314,11 @@ static irqreturn_t dp83822_handle_interrupt(struct phy_device *phydev)
                return IRQ_NONE;
        }
        if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-               goto trigger_machine;
+               trigger_machine = true;
 
-       return IRQ_NONE;
+       if (!trigger_machine)
+               return IRQ_NONE;
 
-trigger_machine:
        phy_trigger_machine(phydev);
 
        return IRQ_HANDLED;
index 688fadf..7ea32fb 100644 (file)
@@ -264,6 +264,7 @@ static int dp83811_config_intr(struct phy_device *phydev)
 
 static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
 {
+       bool trigger_machine = false;
        int irq_status;
 
        /* The INT_STAT registers 1, 2 and 3 are holding the interrupt status
@@ -279,7 +280,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
                return IRQ_NONE;
        }
        if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-               goto trigger_machine;
+               trigger_machine = true;
 
        irq_status = phy_read(phydev, MII_DP83811_INT_STAT2);
        if (irq_status < 0) {
@@ -287,7 +288,7 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
                return IRQ_NONE;
        }
        if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-               goto trigger_machine;
+               trigger_machine = true;
 
        irq_status = phy_read(phydev, MII_DP83811_INT_STAT3);
        if (irq_status < 0) {
@@ -295,11 +296,11 @@ static irqreturn_t dp83811_handle_interrupt(struct phy_device *phydev)
                return IRQ_NONE;
        }
        if (irq_status & ((irq_status & GENMASK(7, 0)) << 8))
-               goto trigger_machine;
+               trigger_machine = true;
 
-       return IRQ_NONE;
+       if (!trigger_machine)
+               return IRQ_NONE;
 
-trigger_machine:
        phy_trigger_machine(phydev);
 
        return IRQ_HANDLED;
index 3e43173..a00a667 100644 (file)
@@ -239,7 +239,7 @@ static int ip101a_g_config_intr_pin(struct phy_device *phydev)
 
        oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE);
        if (oldpage < 0)
-               return oldpage;
+               goto out;
 
        /* configure the RXER/INTR_32 pin of the 32-pin IP101GR if needed: */
        switch (priv->sel_intr32) {
@@ -314,7 +314,7 @@ static int ip101a_g_read_status(struct phy_device *phydev)
 
        oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE);
        if (oldpage < 0)
-               return oldpage;
+               goto out;
 
        ret = __phy_read(phydev, IP10XX_SPEC_CTRL_STATUS);
        if (ret < 0)
@@ -349,7 +349,8 @@ out:
 static int ip101a_g_config_mdix(struct phy_device *phydev)
 {
        u16 ctrl = 0, ctrl2 = 0;
-       int oldpage, ret;
+       int oldpage;
+       int ret = 0;
 
        switch (phydev->mdix_ctrl) {
        case ETH_TP_MDI:
@@ -367,7 +368,7 @@ static int ip101a_g_config_mdix(struct phy_device *phydev)
 
        oldpage = phy_select_page(phydev, IP101G_DEFAULT_PAGE);
        if (oldpage < 0)
-               return oldpage;
+               goto out;
 
        ret = __phy_modify(phydev, IP10XX_SPEC_CTRL_STATUS,
                           IP101A_G_AUTO_MDIX_DIS, ctrl);
index 7ec6f70..a14a003 100644 (file)
@@ -1303,6 +1303,7 @@ static struct phy_driver ksphy_driver[] = {
        .driver_data    = &ksz8081_type,
        .probe          = kszphy_probe,
        .config_init    = ksz8081_config_init,
+       .soft_reset     = genphy_soft_reset,
        .config_intr    = kszphy_config_intr,
        .handle_interrupt = kszphy_handle_interrupt,
        .get_sset_count = kszphy_get_sset_count,
index 1be07e4..fc2e7cb 100644 (file)
@@ -276,14 +276,16 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 
        phydev->autoneg = autoneg;
 
-       phydev->speed = speed;
+       if (autoneg == AUTONEG_DISABLE) {
+               phydev->speed = speed;
+               phydev->duplex = duplex;
+       }
 
        linkmode_copy(phydev->advertising, advertising);
 
        linkmode_mod_bit(ETHTOOL_LINK_MODE_Autoneg_BIT,
                         phydev->advertising, autoneg == AUTONEG_ENABLE);
 
-       phydev->duplex = duplex;
        phydev->master_slave_set = cmd->base.master_slave_cfg;
        phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
 
index ce49547..cc38e32 100644 (file)
@@ -230,7 +230,6 @@ static struct phy_driver genphy_driver;
 static LIST_HEAD(phy_fixup_list);
 static DEFINE_MUTEX(phy_fixup_lock);
 
-#ifdef CONFIG_PM
 static bool mdio_bus_phy_may_suspend(struct phy_device *phydev)
 {
        struct device_driver *drv = phydev->mdio.dev.driver;
@@ -270,7 +269,7 @@ out:
        return !phydev->suspended;
 }
 
-static int mdio_bus_phy_suspend(struct device *dev)
+static __maybe_unused int mdio_bus_phy_suspend(struct device *dev)
 {
        struct phy_device *phydev = to_phy_device(dev);
 
@@ -290,7 +289,7 @@ static int mdio_bus_phy_suspend(struct device *dev)
        return phy_suspend(phydev);
 }
 
-static int mdio_bus_phy_resume(struct device *dev)
+static __maybe_unused int mdio_bus_phy_resume(struct device *dev)
 {
        struct phy_device *phydev = to_phy_device(dev);
        int ret;
@@ -316,7 +315,6 @@ no_resume:
 
 static SIMPLE_DEV_PM_OPS(mdio_bus_phy_pm_ops, mdio_bus_phy_suspend,
                         mdio_bus_phy_resume);
-#endif /* CONFIG_PM */
 
 /**
  * phy_register_fixup - creates a new phy_fixup and adds it to the list
index 053c92e..dc2800b 100644 (file)
@@ -476,7 +476,7 @@ static void phylink_major_config(struct phylink *pl, bool restart,
                err = pl->mac_ops->mac_finish(pl->config, pl->cur_link_an_mode,
                                              state->interface);
                if (err < 0)
-                       phylink_err(pl, "mac_prepare failed: %pe\n",
+                       phylink_err(pl, "mac_finish failed: %pe\n",
                                    ERR_PTR(err));
        }
 }
index 02e6bbb..8d1f69d 100644 (file)
@@ -387,6 +387,8 @@ static int usbpn_probe(struct usb_interface *intf, const struct usb_device_id *i
 
        err = register_netdev(dev);
        if (err) {
+               /* Set disconnected flag so that disconnect() returns early. */
+               pnd->disconnected = 1;
                usb_driver_release_interface(&usbpn_driver, data_intf);
                goto out;
        }
index 4087c9e..8acf301 100644 (file)
@@ -851,17 +851,17 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
 
        /* check if we got everything */
        if (!ctx->data) {
-               dev_dbg(&intf->dev, "CDC Union missing and no IAD found\n");
+               dev_err(&intf->dev, "CDC Union missing and no IAD found\n");
                goto error;
        }
        if (cdc_ncm_comm_intf_is_mbim(intf->cur_altsetting)) {
                if (!ctx->mbim_desc) {
-                       dev_dbg(&intf->dev, "MBIM functional descriptor missing\n");
+                       dev_err(&intf->dev, "MBIM functional descriptor missing\n");
                        goto error;
                }
        } else {
                if (!ctx->ether_desc || !ctx->func_desc) {
-                       dev_dbg(&intf->dev, "NCM or ECM functional descriptors missing\n");
+                       dev_err(&intf->dev, "NCM or ECM functional descriptors missing\n");
                        goto error;
                }
        }
@@ -870,7 +870,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
        if (ctx->data != ctx->control) {
                temp = usb_driver_claim_interface(driver, ctx->data, dev);
                if (temp) {
-                       dev_dbg(&intf->dev, "failed to claim data intf\n");
+                       dev_err(&intf->dev, "failed to claim data intf\n");
                        goto error;
                }
        }
@@ -926,7 +926,7 @@ int cdc_ncm_bind_common(struct usbnet *dev, struct usb_interface *intf, u8 data_
        if (ctx->ether_desc) {
                temp = usbnet_get_ethernet_addr(dev, ctx->ether_desc->iMACAddress);
                if (temp) {
-                       dev_dbg(&intf->dev, "failed to get mac address\n");
+                       dev_err(&intf->dev, "failed to get mac address\n");
                        goto error2;
                }
                dev_info(&intf->dev, "MAC-Address: %pM\n", dev->net->dev_addr);
index 6c3d8c2..6700f19 100644 (file)
@@ -429,13 +429,6 @@ static ssize_t add_mux_store(struct device *d,  struct device_attribute *attr, c
                goto err;
        }
 
-       /* we don't want to modify a running netdev */
-       if (netif_running(dev->net)) {
-               netdev_err(dev->net, "Cannot change a running device\n");
-               ret = -EBUSY;
-               goto err;
-       }
-
        ret = qmimux_register_device(dev->net, mux_id);
        if (!ret) {
                info->flags |= QMI_WWAN_FLAG_MUX;
@@ -465,13 +458,6 @@ static ssize_t del_mux_store(struct device *d,  struct device_attribute *attr, c
        if (!rtnl_trylock())
                return restart_syscall();
 
-       /* we don't want to modify a running netdev */
-       if (netif_running(dev->net)) {
-               netdev_err(dev->net, "Cannot change a running device\n");
-               ret = -EBUSY;
-               goto err;
-       }
-
        del_dev = qmimux_find_dev(dev, mux_id);
        if (!del_dev) {
                netdev_err(dev->net, "mux_id not present\n");
@@ -1318,6 +1304,7 @@ static const struct usb_device_id products[] = {
        {QMI_FIXED_INTF(0x19d2, 0x1255, 4)},
        {QMI_FIXED_INTF(0x19d2, 0x1256, 4)},
        {QMI_FIXED_INTF(0x19d2, 0x1270, 5)},    /* ZTE MF667 */
+       {QMI_FIXED_INTF(0x19d2, 0x1275, 3)},    /* ZTE P685M */
        {QMI_FIXED_INTF(0x19d2, 0x1401, 2)},
        {QMI_FIXED_INTF(0x19d2, 0x1402, 2)},    /* ZTE MF60 */
        {QMI_FIXED_INTF(0x19d2, 0x1424, 2)},
index 2d7cc63..20fb563 100644 (file)
@@ -2632,21 +2632,24 @@ static inline u8 rtl8152_get_speed(struct r8152 *tp)
        return ocp_read_byte(tp, MCU_TYPE_PLA, PLA_PHYSTATUS);
 }
 
-static void rtl_set_eee_plus(struct r8152 *tp)
+static void rtl_eee_plus_en(struct r8152 *tp, bool enable)
 {
        u32 ocp_data;
-       u8 speed;
 
-       speed = rtl8152_get_speed(tp);
-       if (speed & _10bps) {
-               ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR);
+       ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR);
+       if (enable)
                ocp_data |= EEEP_CR_EEEP_TX;
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data);
-       } else {
-               ocp_data = ocp_read_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR);
+       else
                ocp_data &= ~EEEP_CR_EEEP_TX;
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data);
-       }
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_EEEP_CR, ocp_data);
+}
+
+static void rtl_set_eee_plus(struct r8152 *tp)
+{
+       if (rtl8152_get_speed(tp) & _10bps)
+               rtl_eee_plus_en(tp, true);
+       else
+               rtl_eee_plus_en(tp, false);
 }
 
 static void rxdy_gated_en(struct r8152 *tp, bool enable)
@@ -3018,29 +3021,6 @@ static void __rtl_set_wol(struct r8152 *tp, u32 wolopts)
                device_set_wakeup_enable(&tp->udev->dev, false);
 }
 
-static void r8153_mac_clk_spd(struct r8152 *tp, bool enable)
-{
-       /* MAC clock speed down */
-       if (enable) {
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL,
-                              ALDPS_SPDWN_RATIO);
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2,
-                              EEE_SPDWN_RATIO);
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3,
-                              PKT_AVAIL_SPDWN_EN | SUSPEND_SPDWN_EN |
-                              U1U2_SPDWN_EN | L1_SPDWN_EN);
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4,
-                              PWRSAVE_SPDWN_EN | RXDV_SPDWN_EN | TX10MIDLE_EN |
-                              TP100_SPDWN_EN | TP500_SPDWN_EN | EEE_SPDWN_EN |
-                              TP1000_SPDWN_EN);
-       } else {
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
-               ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
-       }
-}
-
 static void r8153_u1u2en(struct r8152 *tp, bool enable)
 {
        u8 u1u2[8];
@@ -3150,10 +3130,22 @@ static void r8153b_ups_flags(struct r8152 *tp)
        ocp_write_dword(tp, MCU_TYPE_USB, USB_UPS_FLAGS, ups_flags);
 }
 
-static void r8153b_green_en(struct r8152 *tp, bool enable)
+static void rtl_green_en(struct r8152 *tp, bool enable)
 {
        u16 data;
 
+       data = sram_read(tp, SRAM_GREEN_CFG);
+       if (enable)
+               data |= GREEN_ETH_EN;
+       else
+               data &= ~GREEN_ETH_EN;
+       sram_write(tp, SRAM_GREEN_CFG, data);
+
+       tp->ups_info.green = enable;
+}
+
+static void r8153b_green_en(struct r8152 *tp, bool enable)
+{
        if (enable) {
                sram_write(tp, 0x8045, 0);      /* 10M abiq&ldvbias */
                sram_write(tp, 0x804d, 0x1222); /* 100M short abiq&ldvbias */
@@ -3164,11 +3156,7 @@ static void r8153b_green_en(struct r8152 *tp, bool enable)
                sram_write(tp, 0x805d, 0x2444); /* 1000M short abiq&ldvbias */
        }
 
-       data = sram_read(tp, SRAM_GREEN_CFG);
-       data |= GREEN_ETH_EN;
-       sram_write(tp, SRAM_GREEN_CFG, data);
-
-       tp->ups_info.green = enable;
+       rtl_green_en(tp, true);
 }
 
 static u16 r8153_phy_status(struct r8152 *tp, u16 desired)
@@ -3327,11 +3315,9 @@ static void rtl8153_runtime_enable(struct r8152 *tp, bool enable)
        if (enable) {
                r8153_u1u2en(tp, false);
                r8153_u2p3en(tp, false);
-               r8153_mac_clk_spd(tp, true);
                rtl_runtime_suspend_enable(tp, true);
        } else {
                rtl_runtime_suspend_enable(tp, false);
-               r8153_mac_clk_spd(tp, false);
 
                switch (tp->version) {
                case RTL_VER_03:
@@ -3360,7 +3346,7 @@ static void rtl8153b_runtime_enable(struct r8152 *tp, bool enable)
                r8153b_ups_en(tp, false);
                r8153_queue_wake(tp, false);
                rtl_runtime_suspend_enable(tp, false);
-               if (tp->udev->speed != USB_SPEED_HIGH)
+               if (tp->udev->speed >= USB_SPEED_SUPER)
                        r8153b_u1u2en(tp, true);
        }
 }
@@ -4707,7 +4693,6 @@ static void r8153_first_init(struct r8152 *tp)
 {
        u32 ocp_data;
 
-       r8153_mac_clk_spd(tp, false);
        rxdy_gated_en(tp, true);
        r8153_teredo_off(tp);
 
@@ -4758,8 +4743,6 @@ static void r8153_enter_oob(struct r8152 *tp)
 {
        u32 ocp_data;
 
-       r8153_mac_clk_spd(tp, true);
-
        ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL);
        ocp_data &= ~NOW_IS_OOB;
        ocp_write_byte(tp, MCU_TYPE_PLA, PLA_OOB_CTRL, ocp_data);
@@ -5056,7 +5039,7 @@ static void rtl8153b_up(struct r8152 *tp)
 
        r8153_aldps_en(tp, true);
 
-       if (tp->udev->speed != USB_SPEED_HIGH)
+       if (tp->udev->speed >= USB_SPEED_SUPER)
                r8153b_u1u2en(tp, true);
 }
 
@@ -5485,10 +5468,15 @@ static void r8153_init(struct r8152 *tp)
 
        ocp_write_word(tp, MCU_TYPE_USB, USB_CONNECT_TIMER, 0x0001);
 
+       /* MAC clock speed down */
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL, 0);
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL2, 0);
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL3, 0);
+       ocp_write_word(tp, MCU_TYPE_PLA, PLA_MAC_PWR_CTRL4, 0);
+
        r8153_power_cut_en(tp, false);
        rtl_runtime_suspend_enable(tp, false);
        r8153_u1u2en(tp, true);
-       r8153_mac_clk_spd(tp, false);
        usb_enable_lpm(tp->udev);
 
        ocp_data = ocp_read_byte(tp, MCU_TYPE_PLA, PLA_CONFIG6);
@@ -5572,8 +5560,9 @@ static void r8153b_init(struct r8152 *tp)
        ocp_data |= POLL_LINK_CHG;
        ocp_write_word(tp, MCU_TYPE_PLA, PLA_EXTRA_STATUS, ocp_data);
 
-       if (tp->udev->speed != USB_SPEED_HIGH)
+       if (tp->udev->speed >= USB_SPEED_SUPER)
                r8153b_u1u2en(tp, true);
+
        usb_enable_lpm(tp->udev);
 
        /* MAC clock speed down */
@@ -5756,6 +5745,9 @@ static int rtl8152_runtime_suspend(struct r8152 *tp)
        struct net_device *netdev = tp->netdev;
        int ret = 0;
 
+       if (!tp->rtl_ops.autosuspend_en)
+               return -EBUSY;
+
        set_bit(SELECTIVE_SUSPEND, &tp->flags);
        smp_mb__after_atomic();
 
@@ -6155,6 +6147,11 @@ rtl_ethtool_get_eee(struct net_device *net, struct ethtool_eee *edata)
        struct r8152 *tp = netdev_priv(net);
        int ret;
 
+       if (!tp->rtl_ops.eee_get) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
        ret = usb_autopm_get_interface(tp->intf);
        if (ret < 0)
                goto out;
@@ -6177,6 +6174,11 @@ rtl_ethtool_set_eee(struct net_device *net, struct ethtool_eee *edata)
        struct r8152 *tp = netdev_priv(net);
        int ret;
 
+       if (!tp->rtl_ops.eee_set) {
+               ret = -EOPNOTSUPP;
+               goto out;
+       }
+
        ret = usb_autopm_get_interface(tp->intf);
        if (ret < 0)
                goto out;
@@ -6551,7 +6553,10 @@ static int rtl_ops_init(struct r8152 *tp)
                ops->in_nway            = rtl8153_in_nway;
                ops->hw_phy_cfg         = r8153_hw_phy_cfg;
                ops->autosuspend_en     = rtl8153_runtime_enable;
-               tp->rx_buf_sz           = 32 * 1024;
+               if (tp->udev->speed < USB_SPEED_SUPER)
+                       tp->rx_buf_sz   = 16 * 1024;
+               else
+                       tp->rx_buf_sz   = 32 * 1024;
                tp->eee_en              = true;
                tp->eee_adv             = MDIO_EEE_1000T | MDIO_EEE_100TX;
                break;
@@ -6576,7 +6581,7 @@ static int rtl_ops_init(struct r8152 *tp)
 
        default:
                ret = -ENODEV;
-               netif_err(tp, probe, tp->netdev, "Unknown Device\n");
+               dev_err(&tp->intf->dev, "Unknown Device\n");
                break;
        }
 
@@ -6833,7 +6838,7 @@ static int rtl8152_probe(struct usb_interface *intf,
 
        ret = register_netdev(netdev);
        if (ret != 0) {
-               netif_err(tp, probe, netdev, "couldn't register the device\n");
+               dev_err(&intf->dev, "couldn't register the device\n");
                goto out1;
        }
 
index b4c8080..f4f37ec 100644 (file)
@@ -887,7 +887,7 @@ int usbnet_open (struct net_device *net)
 
        // insist peer be connected
        if (info->check_connect && (retval = info->check_connect (dev)) < 0) {
-               netif_dbg(dev, ifup, dev->net, "can't open; %d\n", retval);
+               netif_err(dev, ifup, dev->net, "can't open; %d\n", retval);
                goto done;
        }
 
index aa1a66a..34e49c7 100644 (file)
@@ -302,8 +302,7 @@ static netdev_tx_t veth_xmit(struct sk_buff *skb, struct net_device *dev)
        if (rxq < rcv->real_num_rx_queues) {
                rq = &rcv_priv->rq[rxq];
                rcv_xdp = rcu_access_pointer(rq->xdp_prog);
-               if (rcv_xdp)
-                       skb_record_rx_queue(skb, rxq);
+               skb_record_rx_queue(skb, rxq);
        }
 
        skb_tx_timestamp(skb);
index ba8e637..82e520d 100644 (file)
@@ -729,6 +729,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
                        fallthrough;
                case XDP_ABORTED:
                        trace_xdp_exception(vi->dev, xdp_prog, act);
+                       goto err_xdp;
                case XDP_DROP:
                        goto err_xdp;
                }
index 3929e43..666dd20 100644 (file)
@@ -4721,7 +4721,6 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
        struct vxlan_net *vn = net_generic(net, vxlan_net_id);
        struct vxlan_dev *vxlan, *next;
        struct net_device *dev, *aux;
-       unsigned int h;
 
        for_each_netdev_safe(net, dev, aux)
                if (dev->rtnl_link_ops == &vxlan_link_ops)
@@ -4735,14 +4734,13 @@ static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
                        unregister_netdevice_queue(vxlan->dev, head);
        }
 
-       for (h = 0; h < PORT_HASH_SIZE; ++h)
-               WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
 }
 
 static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
 {
        struct net *net;
        LIST_HEAD(list);
+       unsigned int h;
 
        rtnl_lock();
        list_for_each_entry(net, net_list, exit_list) {
@@ -4755,6 +4753,13 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
 
        unregister_netdevice_many(&list);
        rtnl_unlock();
+
+       list_for_each_entry(net, net_list, exit_list) {
+               struct vxlan_net *vn = net_generic(net, vxlan_net_id);
+
+               for (h = 0; h < PORT_HASH_SIZE; ++h)
+                       WARN_ON_ONCE(!hlist_empty(&vn->sock_list[h]));
+       }
 }
 
 static struct pernet_operations vxlan_net_ops = {
index dca97cd..7eac6a3 100644 (file)
@@ -204,14 +204,18 @@ static int uhdlc_init(struct ucc_hdlc_private *priv)
        priv->rx_skbuff = kcalloc(priv->rx_ring_size,
                                  sizeof(*priv->rx_skbuff),
                                  GFP_KERNEL);
-       if (!priv->rx_skbuff)
+       if (!priv->rx_skbuff) {
+               ret = -ENOMEM;
                goto free_ucc_pram;
+       }
 
        priv->tx_skbuff = kcalloc(priv->tx_ring_size,
                                  sizeof(*priv->tx_skbuff),
                                  GFP_KERNEL);
-       if (!priv->tx_skbuff)
+       if (!priv->tx_skbuff) {
+               ret = -ENOMEM;
                goto free_rx_skbuff;
+       }
 
        priv->skb_curtx = 0;
        priv->skb_dirtytx = 0;
index 4aaa638..5a6a945 100644 (file)
@@ -23,6 +23,8 @@
 
 struct x25_state {
        x25_hdlc_proto settings;
+       bool up;
+       spinlock_t up_lock; /* Protects "up" */
 };
 
 static int x25_ioctl(struct net_device *dev, struct ifreq *ifr);
@@ -104,6 +106,8 @@ static void x25_data_transmit(struct net_device *dev, struct sk_buff *skb)
 
 static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
 {
+       hdlc_device *hdlc = dev_to_hdlc(dev);
+       struct x25_state *x25st = state(hdlc);
        int result;
 
        /* There should be a pseudo header of 1 byte added by upper layers.
@@ -114,11 +118,19 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
                return NETDEV_TX_OK;
        }
 
+       spin_lock_bh(&x25st->up_lock);
+       if (!x25st->up) {
+               spin_unlock_bh(&x25st->up_lock);
+               kfree_skb(skb);
+               return NETDEV_TX_OK;
+       }
+
        switch (skb->data[0]) {
        case X25_IFACE_DATA:    /* Data to be transmitted */
                skb_pull(skb, 1);
                if ((result = lapb_data_request(dev, skb)) != LAPB_OK)
                        dev_kfree_skb(skb);
+               spin_unlock_bh(&x25st->up_lock);
                return NETDEV_TX_OK;
 
        case X25_IFACE_CONNECT:
@@ -147,6 +159,7 @@ static netdev_tx_t x25_xmit(struct sk_buff *skb, struct net_device *dev)
                break;
        }
 
+       spin_unlock_bh(&x25st->up_lock);
        dev_kfree_skb(skb);
        return NETDEV_TX_OK;
 }
@@ -164,6 +177,7 @@ static int x25_open(struct net_device *dev)
                .data_transmit = x25_data_transmit,
        };
        hdlc_device *hdlc = dev_to_hdlc(dev);
+       struct x25_state *x25st = state(hdlc);
        struct lapb_parms_struct params;
        int result;
 
@@ -190,6 +204,10 @@ static int x25_open(struct net_device *dev)
        if (result != LAPB_OK)
                return -EINVAL;
 
+       spin_lock_bh(&x25st->up_lock);
+       x25st->up = true;
+       spin_unlock_bh(&x25st->up_lock);
+
        return 0;
 }
 
@@ -197,6 +215,13 @@ static int x25_open(struct net_device *dev)
 
 static void x25_close(struct net_device *dev)
 {
+       hdlc_device *hdlc = dev_to_hdlc(dev);
+       struct x25_state *x25st = state(hdlc);
+
+       spin_lock_bh(&x25st->up_lock);
+       x25st->up = false;
+       spin_unlock_bh(&x25st->up_lock);
+
        lapb_unregister(dev);
 }
 
@@ -205,15 +230,28 @@ static void x25_close(struct net_device *dev)
 static int x25_rx(struct sk_buff *skb)
 {
        struct net_device *dev = skb->dev;
+       hdlc_device *hdlc = dev_to_hdlc(dev);
+       struct x25_state *x25st = state(hdlc);
 
        if ((skb = skb_share_check(skb, GFP_ATOMIC)) == NULL) {
                dev->stats.rx_dropped++;
                return NET_RX_DROP;
        }
 
-       if (lapb_data_received(dev, skb) == LAPB_OK)
+       spin_lock_bh(&x25st->up_lock);
+       if (!x25st->up) {
+               spin_unlock_bh(&x25st->up_lock);
+               kfree_skb(skb);
+               dev->stats.rx_dropped++;
+               return NET_RX_DROP;
+       }
+
+       if (lapb_data_received(dev, skb) == LAPB_OK) {
+               spin_unlock_bh(&x25st->up_lock);
                return NET_RX_SUCCESS;
+       }
 
+       spin_unlock_bh(&x25st->up_lock);
        dev->stats.rx_errors++;
        dev_kfree_skb_any(skb);
        return NET_RX_DROP;
@@ -298,6 +336,8 @@ static int x25_ioctl(struct net_device *dev, struct ifreq *ifr)
                        return result;
 
                memcpy(&state(hdlc)->settings, &new_settings, size);
+               state(hdlc)->up = false;
+               spin_lock_init(&state(hdlc)->up_lock);
 
                /* There's no header_ops so hard_header_len should be 0. */
                dev->hard_header_len = 0;
index 605fe55..c337249 100644 (file)
@@ -292,7 +292,6 @@ static int lapbeth_open(struct net_device *dev)
                return -ENODEV;
        }
 
-       netif_start_queue(dev);
        return 0;
 }
 
@@ -300,8 +299,6 @@ static int lapbeth_close(struct net_device *dev)
 {
        int err;
 
-       netif_stop_queue(dev);
-
        if ((err = lapb_unregister(dev)) != LAPB_OK)
                pr_err("lapb_unregister error: %d\n", err);
 
index a3ed49c..551ddaa 100644 (file)
@@ -138,7 +138,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
                else if (skb->protocol == htons(ETH_P_IPV6))
                        net_dbg_ratelimited("%s: No peer has allowed IPs matching %pI6\n",
                                            dev->name, &ipv6_hdr(skb)->daddr);
-               goto err;
+               goto err_icmp;
        }
 
        family = READ_ONCE(peer->endpoint.addr.sa_family);
@@ -157,7 +157,7 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
        } else {
                struct sk_buff *segs = skb_gso_segment(skb, 0);
 
-               if (unlikely(IS_ERR(segs))) {
+               if (IS_ERR(segs)) {
                        ret = PTR_ERR(segs);
                        goto err_peer;
                }
@@ -201,12 +201,13 @@ static netdev_tx_t wg_xmit(struct sk_buff *skb, struct net_device *dev)
 
 err_peer:
        wg_peer_put(peer);
-err:
-       ++dev->stats.tx_errors;
+err_icmp:
        if (skb->protocol == htons(ETH_P_IP))
                icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_HOST_UNREACH, 0);
        else if (skb->protocol == htons(ETH_P_IPV6))
                icmpv6_ndo_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH, 0);
+err:
+       ++dev->stats.tx_errors;
        kfree_skb(skb);
        return ret;
 }
@@ -234,8 +235,8 @@ static void wg_destruct(struct net_device *dev)
        destroy_workqueue(wg->handshake_receive_wq);
        destroy_workqueue(wg->handshake_send_wq);
        destroy_workqueue(wg->packet_crypt_wq);
-       wg_packet_queue_free(&wg->decrypt_queue, true);
-       wg_packet_queue_free(&wg->encrypt_queue, true);
+       wg_packet_queue_free(&wg->decrypt_queue);
+       wg_packet_queue_free(&wg->encrypt_queue);
        rcu_barrier(); /* Wait for all the peers to be actually freed. */
        wg_ratelimiter_uninit();
        memzero_explicit(&wg->static_identity, sizeof(wg->static_identity));
@@ -337,12 +338,12 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
                goto err_destroy_handshake_send;
 
        ret = wg_packet_queue_init(&wg->encrypt_queue, wg_packet_encrypt_worker,
-                                  true, MAX_QUEUED_PACKETS);
+                                  MAX_QUEUED_PACKETS);
        if (ret < 0)
                goto err_destroy_packet_crypt;
 
        ret = wg_packet_queue_init(&wg->decrypt_queue, wg_packet_decrypt_worker,
-                                  true, MAX_QUEUED_PACKETS);
+                                  MAX_QUEUED_PACKETS);
        if (ret < 0)
                goto err_free_encrypt_queue;
 
@@ -367,9 +368,9 @@ static int wg_newlink(struct net *src_net, struct net_device *dev,
 err_uninit_ratelimiter:
        wg_ratelimiter_uninit();
 err_free_decrypt_queue:
-       wg_packet_queue_free(&wg->decrypt_queue, true);
+       wg_packet_queue_free(&wg->decrypt_queue);
 err_free_encrypt_queue:
-       wg_packet_queue_free(&wg->encrypt_queue, true);
+       wg_packet_queue_free(&wg->encrypt_queue);
 err_destroy_packet_crypt:
        destroy_workqueue(wg->packet_crypt_wq);
 err_destroy_handshake_send:
index 4d0144e..854bc3d 100644 (file)
@@ -27,13 +27,14 @@ struct multicore_worker {
 
 struct crypt_queue {
        struct ptr_ring ring;
-       union {
-               struct {
-                       struct multicore_worker __percpu *worker;
-                       int last_cpu;
-               };
-               struct work_struct work;
-       };
+       struct multicore_worker __percpu *worker;
+       int last_cpu;
+};
+
+struct prev_queue {
+       struct sk_buff *head, *tail, *peeked;
+       struct { struct sk_buff *next, *prev; } empty; // Match first 2 members of struct sk_buff.
+       atomic_t count;
 };
 
 struct wg_device {
index b3b6370..cd5cb02 100644 (file)
@@ -32,27 +32,22 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
        peer = kzalloc(sizeof(*peer), GFP_KERNEL);
        if (unlikely(!peer))
                return ERR_PTR(ret);
-       peer->device = wg;
+       if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
+               goto err;
 
+       peer->device = wg;
        wg_noise_handshake_init(&peer->handshake, &wg->static_identity,
                                public_key, preshared_key, peer);
-       if (dst_cache_init(&peer->endpoint_cache, GFP_KERNEL))
-               goto err_1;
-       if (wg_packet_queue_init(&peer->tx_queue, wg_packet_tx_worker, false,
-                                MAX_QUEUED_PACKETS))
-               goto err_2;
-       if (wg_packet_queue_init(&peer->rx_queue, NULL, false,
-                                MAX_QUEUED_PACKETS))
-               goto err_3;
-
        peer->internal_id = atomic64_inc_return(&peer_counter);
        peer->serial_work_cpu = nr_cpumask_bits;
        wg_cookie_init(&peer->latest_cookie);
        wg_timers_init(peer);
        wg_cookie_checker_precompute_peer_keys(peer);
        spin_lock_init(&peer->keypairs.keypair_update_lock);
-       INIT_WORK(&peer->transmit_handshake_work,
-                 wg_packet_handshake_send_worker);
+       INIT_WORK(&peer->transmit_handshake_work, wg_packet_handshake_send_worker);
+       INIT_WORK(&peer->transmit_packet_work, wg_packet_tx_worker);
+       wg_prev_queue_init(&peer->tx_queue);
+       wg_prev_queue_init(&peer->rx_queue);
        rwlock_init(&peer->endpoint_lock);
        kref_init(&peer->refcount);
        skb_queue_head_init(&peer->staged_packet_queue);
@@ -68,11 +63,7 @@ struct wg_peer *wg_peer_create(struct wg_device *wg,
        pr_debug("%s: Peer %llu created\n", wg->dev->name, peer->internal_id);
        return peer;
 
-err_3:
-       wg_packet_queue_free(&peer->tx_queue, false);
-err_2:
-       dst_cache_destroy(&peer->endpoint_cache);
-err_1:
+err:
        kfree(peer);
        return ERR_PTR(ret);
 }
@@ -197,8 +188,7 @@ static void rcu_release(struct rcu_head *rcu)
        struct wg_peer *peer = container_of(rcu, struct wg_peer, rcu);
 
        dst_cache_destroy(&peer->endpoint_cache);
-       wg_packet_queue_free(&peer->rx_queue, false);
-       wg_packet_queue_free(&peer->tx_queue, false);
+       WARN_ON(wg_prev_queue_peek(&peer->tx_queue) || wg_prev_queue_peek(&peer->rx_queue));
 
        /* The final zeroing takes care of clearing any remaining handshake key
         * material and other potentially sensitive information.
index 23af409..8d53b68 100644 (file)
@@ -36,16 +36,17 @@ struct endpoint {
 
 struct wg_peer {
        struct wg_device *device;
-       struct crypt_queue tx_queue, rx_queue;
+       struct prev_queue tx_queue, rx_queue;
        struct sk_buff_head staged_packet_queue;
        int serial_work_cpu;
+       bool is_dead;
        struct noise_keypairs keypairs;
        struct endpoint endpoint;
        struct dst_cache endpoint_cache;
        rwlock_t endpoint_lock;
        struct noise_handshake handshake;
        atomic64_t last_sent_handshake;
-       struct work_struct transmit_handshake_work, clear_peer_work;
+       struct work_struct transmit_handshake_work, clear_peer_work, transmit_packet_work;
        struct cookie latest_cookie;
        struct hlist_node pubkey_hash;
        u64 rx_bytes, tx_bytes;
@@ -61,9 +62,8 @@ struct wg_peer {
        struct rcu_head rcu;
        struct list_head peer_list;
        struct list_head allowedips_list;
-       u64 internal_id;
        struct napi_struct napi;
-       bool is_dead;
+       u64 internal_id;
 };
 
 struct wg_peer *wg_peer_create(struct wg_device *wg,
index 71b8e80..48e7b98 100644 (file)
@@ -9,8 +9,7 @@ struct multicore_worker __percpu *
 wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
 {
        int cpu;
-       struct multicore_worker __percpu *worker =
-               alloc_percpu(struct multicore_worker);
+       struct multicore_worker __percpu *worker = alloc_percpu(struct multicore_worker);
 
        if (!worker)
                return NULL;
@@ -23,7 +22,7 @@ wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr)
 }
 
 int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
-                        bool multicore, unsigned int len)
+                        unsigned int len)
 {
        int ret;
 
@@ -31,25 +30,78 @@ int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
        ret = ptr_ring_init(&queue->ring, len, GFP_KERNEL);
        if (ret)
                return ret;
-       if (function) {
-               if (multicore) {
-                       queue->worker = wg_packet_percpu_multicore_worker_alloc(
-                               function, queue);
-                       if (!queue->worker) {
-                               ptr_ring_cleanup(&queue->ring, NULL);
-                               return -ENOMEM;
-                       }
-               } else {
-                       INIT_WORK(&queue->work, function);
-               }
+       queue->worker = wg_packet_percpu_multicore_worker_alloc(function, queue);
+       if (!queue->worker) {
+               ptr_ring_cleanup(&queue->ring, NULL);
+               return -ENOMEM;
        }
        return 0;
 }
 
-void wg_packet_queue_free(struct crypt_queue *queue, bool multicore)
+void wg_packet_queue_free(struct crypt_queue *queue)
 {
-       if (multicore)
-               free_percpu(queue->worker);
+       free_percpu(queue->worker);
        WARN_ON(!__ptr_ring_empty(&queue->ring));
        ptr_ring_cleanup(&queue->ring, NULL);
 }
+
+#define NEXT(skb) ((skb)->prev)
+#define STUB(queue) ((struct sk_buff *)&queue->empty)
+
+void wg_prev_queue_init(struct prev_queue *queue)
+{
+       NEXT(STUB(queue)) = NULL;
+       queue->head = queue->tail = STUB(queue);
+       queue->peeked = NULL;
+       atomic_set(&queue->count, 0);
+       BUILD_BUG_ON(
+               offsetof(struct sk_buff, next) != offsetof(struct prev_queue, empty.next) -
+                                                       offsetof(struct prev_queue, empty) ||
+               offsetof(struct sk_buff, prev) != offsetof(struct prev_queue, empty.prev) -
+                                                        offsetof(struct prev_queue, empty));
+}
+
+static void __wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb)
+{
+       WRITE_ONCE(NEXT(skb), NULL);
+       WRITE_ONCE(NEXT(xchg_release(&queue->head, skb)), skb);
+}
+
+bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb)
+{
+       if (!atomic_add_unless(&queue->count, 1, MAX_QUEUED_PACKETS))
+               return false;
+       __wg_prev_queue_enqueue(queue, skb);
+       return true;
+}
+
+struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue)
+{
+       struct sk_buff *tail = queue->tail, *next = smp_load_acquire(&NEXT(tail));
+
+       if (tail == STUB(queue)) {
+               if (!next)
+                       return NULL;
+               queue->tail = next;
+               tail = next;
+               next = smp_load_acquire(&NEXT(next));
+       }
+       if (next) {
+               queue->tail = next;
+               atomic_dec(&queue->count);
+               return tail;
+       }
+       if (tail != READ_ONCE(queue->head))
+               return NULL;
+       __wg_prev_queue_enqueue(queue, STUB(queue));
+       next = smp_load_acquire(&NEXT(tail));
+       if (next) {
+               queue->tail = next;
+               atomic_dec(&queue->count);
+               return tail;
+       }
+       return NULL;
+}
+
+#undef NEXT
+#undef STUB
index dfb674e..4ef2944 100644 (file)
@@ -17,12 +17,13 @@ struct wg_device;
 struct wg_peer;
 struct multicore_worker;
 struct crypt_queue;
+struct prev_queue;
 struct sk_buff;
 
 /* queueing.c APIs: */
 int wg_packet_queue_init(struct crypt_queue *queue, work_func_t function,
-                        bool multicore, unsigned int len);
-void wg_packet_queue_free(struct crypt_queue *queue, bool multicore);
+                        unsigned int len);
+void wg_packet_queue_free(struct crypt_queue *queue);
 struct multicore_worker __percpu *
 wg_packet_percpu_multicore_worker_alloc(work_func_t function, void *ptr);
 
@@ -135,8 +136,31 @@ static inline int wg_cpumask_next_online(int *next)
        return cpu;
 }
 
+void wg_prev_queue_init(struct prev_queue *queue);
+
+/* Multi producer */
+bool wg_prev_queue_enqueue(struct prev_queue *queue, struct sk_buff *skb);
+
+/* Single consumer */
+struct sk_buff *wg_prev_queue_dequeue(struct prev_queue *queue);
+
+/* Single consumer */
+static inline struct sk_buff *wg_prev_queue_peek(struct prev_queue *queue)
+{
+       if (queue->peeked)
+               return queue->peeked;
+       queue->peeked = wg_prev_queue_dequeue(queue);
+       return queue->peeked;
+}
+
+/* Single consumer */
+static inline void wg_prev_queue_drop_peeked(struct prev_queue *queue)
+{
+       queue->peeked = NULL;
+}
+
 static inline int wg_queue_enqueue_per_device_and_peer(
-       struct crypt_queue *device_queue, struct crypt_queue *peer_queue,
+       struct crypt_queue *device_queue, struct prev_queue *peer_queue,
        struct sk_buff *skb, struct workqueue_struct *wq, int *next_cpu)
 {
        int cpu;
@@ -145,8 +169,9 @@ static inline int wg_queue_enqueue_per_device_and_peer(
        /* We first queue this up for the peer ingestion, but the consumer
         * will wait for the state to change to CRYPTED or DEAD before.
         */
-       if (unlikely(ptr_ring_produce_bh(&peer_queue->ring, skb)))
+       if (unlikely(!wg_prev_queue_enqueue(peer_queue, skb)))
                return -ENOSPC;
+
        /* Then we queue it up in the device queue, which consumes the
         * packet as soon as it can.
         */
@@ -157,9 +182,7 @@ static inline int wg_queue_enqueue_per_device_and_peer(
        return 0;
 }
 
-static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
-                                            struct sk_buff *skb,
-                                            enum packet_state state)
+static inline void wg_queue_enqueue_per_peer_tx(struct sk_buff *skb, enum packet_state state)
 {
        /* We take a reference, because as soon as we call atomic_set, the
         * peer can be freed from below us.
@@ -167,14 +190,12 @@ static inline void wg_queue_enqueue_per_peer(struct crypt_queue *queue,
        struct wg_peer *peer = wg_peer_get(PACKET_PEER(skb));
 
        atomic_set_release(&PACKET_CB(skb)->state, state);
-       queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu,
-                                              peer->internal_id),
-                     peer->device->packet_crypt_wq, &queue->work);
+       queue_work_on(wg_cpumask_choose_online(&peer->serial_work_cpu, peer->internal_id),
+                     peer->device->packet_crypt_wq, &peer->transmit_packet_work);
        wg_peer_put(peer);
 }
 
-static inline void wg_queue_enqueue_per_peer_napi(struct sk_buff *skb,
-                                                 enum packet_state state)
+static inline void wg_queue_enqueue_per_peer_rx(struct sk_buff *skb, enum packet_state state)
 {
        /* We take a reference, because as soon as we call atomic_set, the
         * peer can be freed from below us.
index 2c9551e..7dc84bc 100644 (file)
@@ -444,7 +444,6 @@ packet_processed:
 int wg_packet_rx_poll(struct napi_struct *napi, int budget)
 {
        struct wg_peer *peer = container_of(napi, struct wg_peer, napi);
-       struct crypt_queue *queue = &peer->rx_queue;
        struct noise_keypair *keypair;
        struct endpoint endpoint;
        enum packet_state state;
@@ -455,11 +454,10 @@ int wg_packet_rx_poll(struct napi_struct *napi, int budget)
        if (unlikely(budget <= 0))
                return 0;
 
-       while ((skb = __ptr_ring_peek(&queue->ring)) != NULL &&
+       while ((skb = wg_prev_queue_peek(&peer->rx_queue)) != NULL &&
               (state = atomic_read_acquire(&PACKET_CB(skb)->state)) !=
                       PACKET_STATE_UNCRYPTED) {
-               __ptr_ring_discard_one(&queue->ring);
-               peer = PACKET_PEER(skb);
+               wg_prev_queue_drop_peeked(&peer->rx_queue);
                keypair = PACKET_CB(skb)->keypair;
                free = true;
 
@@ -508,7 +506,7 @@ void wg_packet_decrypt_worker(struct work_struct *work)
                enum packet_state state =
                        likely(decrypt_packet(skb, PACKET_CB(skb)->keypair)) ?
                                PACKET_STATE_CRYPTED : PACKET_STATE_DEAD;
-               wg_queue_enqueue_per_peer_napi(skb, state);
+               wg_queue_enqueue_per_peer_rx(skb, state);
                if (need_resched())
                        cond_resched();
        }
@@ -531,12 +529,10 @@ static void wg_packet_consume_data(struct wg_device *wg, struct sk_buff *skb)
        if (unlikely(READ_ONCE(peer->is_dead)))
                goto err;
 
-       ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue,
-                                                  &peer->rx_queue, skb,
-                                                  wg->packet_crypt_wq,
-                                                  &wg->decrypt_queue.last_cpu);
+       ret = wg_queue_enqueue_per_device_and_peer(&wg->decrypt_queue, &peer->rx_queue, skb,
+                                                  wg->packet_crypt_wq, &wg->decrypt_queue.last_cpu);
        if (unlikely(ret == -EPIPE))
-               wg_queue_enqueue_per_peer_napi(skb, PACKET_STATE_DEAD);
+               wg_queue_enqueue_per_peer_rx(skb, PACKET_STATE_DEAD);
        if (likely(!ret || ret == -EPIPE)) {
                rcu_read_unlock_bh();
                return;
index f74b934..5368f7c 100644 (file)
@@ -239,8 +239,7 @@ void wg_packet_send_keepalive(struct wg_peer *peer)
        wg_packet_send_staged_packets(peer);
 }
 
-static void wg_packet_create_data_done(struct sk_buff *first,
-                                      struct wg_peer *peer)
+static void wg_packet_create_data_done(struct wg_peer *peer, struct sk_buff *first)
 {
        struct sk_buff *skb, *next;
        bool is_keepalive, data_sent = false;
@@ -262,22 +261,19 @@ static void wg_packet_create_data_done(struct sk_buff *first,
 
 void wg_packet_tx_worker(struct work_struct *work)
 {
-       struct crypt_queue *queue = container_of(work, struct crypt_queue,
-                                                work);
+       struct wg_peer *peer = container_of(work, struct wg_peer, transmit_packet_work);
        struct noise_keypair *keypair;
        enum packet_state state;
        struct sk_buff *first;
-       struct wg_peer *peer;
 
-       while ((first = __ptr_ring_peek(&queue->ring)) != NULL &&
+       while ((first = wg_prev_queue_peek(&peer->tx_queue)) != NULL &&
               (state = atomic_read_acquire(&PACKET_CB(first)->state)) !=
                       PACKET_STATE_UNCRYPTED) {
-               __ptr_ring_discard_one(&queue->ring);
-               peer = PACKET_PEER(first);
+               wg_prev_queue_drop_peeked(&peer->tx_queue);
                keypair = PACKET_CB(first)->keypair;
 
                if (likely(state == PACKET_STATE_CRYPTED))
-                       wg_packet_create_data_done(first, peer);
+                       wg_packet_create_data_done(peer, first);
                else
                        kfree_skb_list(first);
 
@@ -306,16 +302,14 @@ void wg_packet_encrypt_worker(struct work_struct *work)
                                break;
                        }
                }
-               wg_queue_enqueue_per_peer(&PACKET_PEER(first)->tx_queue, first,
-                                         state);
+               wg_queue_enqueue_per_peer_tx(first, state);
                if (need_resched())
                        cond_resched();
        }
 }
 
-static void wg_packet_create_data(struct sk_buff *first)
+static void wg_packet_create_data(struct wg_peer *peer, struct sk_buff *first)
 {
-       struct wg_peer *peer = PACKET_PEER(first);
        struct wg_device *wg = peer->device;
        int ret = -EINVAL;
 
@@ -323,13 +317,10 @@ static void wg_packet_create_data(struct sk_buff *first)
        if (unlikely(READ_ONCE(peer->is_dead)))
                goto err;
 
-       ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue,
-                                                  &peer->tx_queue, first,
-                                                  wg->packet_crypt_wq,
-                                                  &wg->encrypt_queue.last_cpu);
+       ret = wg_queue_enqueue_per_device_and_peer(&wg->encrypt_queue, &peer->tx_queue, first,
+                                                  wg->packet_crypt_wq, &wg->encrypt_queue.last_cpu);
        if (unlikely(ret == -EPIPE))
-               wg_queue_enqueue_per_peer(&peer->tx_queue, first,
-                                         PACKET_STATE_DEAD);
+               wg_queue_enqueue_per_peer_tx(first, PACKET_STATE_DEAD);
 err:
        rcu_read_unlock_bh();
        if (likely(!ret || ret == -EPIPE))
@@ -393,7 +384,7 @@ void wg_packet_send_staged_packets(struct wg_peer *peer)
        packets.prev->next = NULL;
        wg_peer_get(keypair->entry.peer);
        PACKET_CB(packets.next)->keypair = keypair;
-       wg_packet_create_data(packets.next);
+       wg_packet_create_data(peer, packets.next);
        return;
 
 out_invalid:
index 410b318..d9ad850 100644 (file)
@@ -53,7 +53,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
                if (unlikely(!inet_confirm_addr(sock_net(sock), NULL, 0,
                                                fl.saddr, RT_SCOPE_HOST))) {
                        endpoint->src4.s_addr = 0;
-                       *(__force __be32 *)&endpoint->src_if4 = 0;
+                       endpoint->src_if4 = 0;
                        fl.saddr = 0;
                        if (cache)
                                dst_cache_reset(cache);
@@ -63,7 +63,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
                             PTR_ERR(rt) == -EINVAL) || (!IS_ERR(rt) &&
                             rt->dst.dev->ifindex != endpoint->src_if4)))) {
                        endpoint->src4.s_addr = 0;
-                       *(__force __be32 *)&endpoint->src_if4 = 0;
+                       endpoint->src_if4 = 0;
                        fl.saddr = 0;
                        if (cache)
                                dst_cache_reset(cache);
@@ -71,7 +71,7 @@ static int send4(struct wg_device *wg, struct sk_buff *skb,
                                ip_rt_put(rt);
                        rt = ip_route_output_flow(sock_net(sock), &fl, sock);
                }
-               if (unlikely(IS_ERR(rt))) {
+               if (IS_ERR(rt)) {
                        ret = PTR_ERR(rt);
                        net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
                                            wg->dev->name, &endpoint->addr, ret);
@@ -138,7 +138,7 @@ static int send6(struct wg_device *wg, struct sk_buff *skb,
                }
                dst = ipv6_stub->ipv6_dst_lookup_flow(sock_net(sock), sock, &fl,
                                                      NULL);
-               if (unlikely(IS_ERR(dst))) {
+               if (IS_ERR(dst)) {
                        ret = PTR_ERR(dst);
                        net_dbg_ratelimited("%s: No route to %pISpfsc, error %d\n",
                                            wg->dev->name, &endpoint->addr, ret);
index c41e725..2db9c94 100644 (file)
@@ -28,7 +28,6 @@
 MODULE_AUTHOR("Michael Wu <flamingice@sourmilk.net>");
 MODULE_AUTHOR("Jouni Malinen <j@w1.fi>");
 MODULE_DESCRIPTION("Driver for IEEE 802.11b wireless cards based on ADMtek ADM8211");
-MODULE_SUPPORTED_DEVICE("ADM8211");
 MODULE_LICENSE("GPL");
 
 static unsigned int tx_ring_size __read_mostly = 16;
index b391169..faa2e67 100644 (file)
@@ -5450,8 +5450,8 @@ ath11k_mac_op_assign_vif_chanctx(struct ieee80211_hw *hw,
        }
 
        if (ab->hw_params.vdev_start_delay &&
-           (arvif->vdev_type == WMI_VDEV_TYPE_AP ||
-           arvif->vdev_type == WMI_VDEV_TYPE_MONITOR)) {
+           arvif->vdev_type != WMI_VDEV_TYPE_AP &&
+           arvif->vdev_type != WMI_VDEV_TYPE_MONITOR) {
                param.vdev_id = arvif->vdev_id;
                param.peer_type = WMI_PEER_TYPE_DEFAULT;
                param.peer_addr = ar->mac_addr;
index 1aca841..7968fe4 100644 (file)
@@ -1687,8 +1687,8 @@ static int ath11k_qmi_respond_fw_mem_request(struct ath11k_base *ab)
                        req->mem_seg[i].size = ab->qmi.target_mem[i].size;
                        req->mem_seg[i].type = ab->qmi.target_mem[i].type;
                        ath11k_dbg(ab, ATH11K_DBG_QMI,
-                                  "qmi req mem_seg[%d] 0x%llx %u %u\n", i,
-                                   ab->qmi.target_mem[i].paddr,
+                                  "qmi req mem_seg[%d] %pad %u %u\n", i,
+                                   &ab->qmi.target_mem[i].paddr,
                                    ab->qmi.target_mem[i].size,
                                    ab->qmi.target_mem[i].type);
                }
index 4c6e57f..cef17f3 100644 (file)
@@ -90,7 +90,6 @@ MODULE_PARM_DESC(no_hw_rfkill_switch, "Ignore the GPIO RFKill switch state");
 MODULE_AUTHOR("Jiri Slaby");
 MODULE_AUTHOR("Nick Kossifidis");
 MODULE_DESCRIPTION("Support for 5xxx series of Atheros 802.11 wireless LAN cards.");
-MODULE_SUPPORTED_DEVICE("Atheros 5xxx WLAN cards");
 MODULE_LICENSE("Dual BSD/GPL");
 
 static int ath5k_init(struct ieee80211_hw *hw);
index 13b4f5f..ef6f5ea 100644 (file)
@@ -177,7 +177,8 @@ struct ath_frame_info {
        s8 txq;
        u8 keyix;
        u8 rtscts_rate;
-       u8 retries : 7;
+       u8 retries : 6;
+       u8 dyn_smps : 1;
        u8 baw_tracked : 1;
        u8 tx_power;
        enum ath9k_key_type keytype:2;
index b66eeb5..5abc2a5 100644 (file)
@@ -34,7 +34,6 @@ static bool ath9k_hw_set_reset_reg(struct ath_hw *ah, u32 type);
 
 MODULE_AUTHOR("Atheros Communications");
 MODULE_DESCRIPTION("Support for Atheros 802.11n wireless LAN cards.");
-MODULE_SUPPORTED_DEVICE("Atheros 802.11n WLAN cards");
 MODULE_LICENSE("Dual BSD/GPL");
 
 static void ath9k_hw_set_clockrate(struct ath_hw *ah)
index 42a2087..01f9c26 100644 (file)
@@ -37,7 +37,6 @@ static char *dev_info = "ath9k";
 
 MODULE_AUTHOR("Atheros Communications");
 MODULE_DESCRIPTION("Support for Atheros 802.11n wireless LAN cards.");
-MODULE_SUPPORTED_DEVICE("Atheros 802.11n WLAN cards");
 MODULE_LICENSE("Dual BSD/GPL");
 
 static unsigned int ath9k_debug = ATH_DBG_DEFAULT;
index e60d473..5691bd6 100644 (file)
@@ -1271,6 +1271,11 @@ static void ath_buf_set_rate(struct ath_softc *sc, struct ath_buf *bf,
                                 is_40, is_sgi, is_sp);
                        if (rix < 8 && (tx_info->flags & IEEE80211_TX_CTL_STBC))
                                info->rates[i].RateFlags |= ATH9K_RATESERIES_STBC;
+                       if (rix >= 8 && fi->dyn_smps) {
+                               info->rates[i].RateFlags |=
+                                       ATH9K_RATESERIES_RTS_CTS;
+                               info->flags |= ATH9K_TXDESC_CTSENA;
+                       }
 
                        info->txpower[i] = ath_get_rate_txpower(sc, bf, rix,
                                                                is_40, false);
@@ -2114,6 +2119,7 @@ static void setup_frame_info(struct ieee80211_hw *hw,
                fi->keyix = an->ps_key;
        else
                fi->keyix = ATH9K_TXKEYIX_INVALID;
+       fi->dyn_smps = sta && sta->smps_mode == IEEE80211_SMPS_DYNAMIC;
        fi->keytype = keytype;
        fi->framelen = framelen;
        fi->tx_power = txpower;
index 707fe66..febce4e 100644 (file)
@@ -75,7 +75,6 @@
 MODULE_AUTHOR("Simon Kelley");
 MODULE_DESCRIPTION("Support for Atmel at76c50x 802.11 wireless ethernet cards.");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("Atmel at76c50x wireless cards");
 
 /* The name of the firmware file to be loaded
    over-rides any automatic selection */
index 368eebe..453bb84 100644 (file)
@@ -57,7 +57,6 @@
 MODULE_AUTHOR("Simon Kelley");
 MODULE_DESCRIPTION("Support for Atmel at76c50x 802.11 wireless ethernet cards.");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("Atmel at76c50x PCMCIA cards");
 
 /*====================================================================*/
 
index 47f7ccb..f428dc7 100644 (file)
@@ -16,7 +16,6 @@
 MODULE_AUTHOR("Simon Kelley");
 MODULE_DESCRIPTION("Support for Atmel at76c50x 802.11 wireless ethernet cards.");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("Atmel at76c506 PCI wireless cards");
 
 static const struct pci_device_id card_ids[] = {
        { 0x1114, 0x0506, PCI_ANY_ID, PCI_ANY_ID },
index 818e523..39f3af2 100644 (file)
@@ -87,7 +87,6 @@ static int n_adapters_found;
 
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom 802.11n wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Broadcom 802.11n WLAN cards");
 MODULE_LICENSE("Dual BSD/GPL");
 /* This needs to be adjusted when brcms_firmwares changes */
 MODULE_FIRMWARE("brcm/bcm43xx-0.fw");
index 4c84c30..e87e68c 100644 (file)
@@ -12,7 +12,6 @@
 
 MODULE_AUTHOR("Broadcom Corporation");
 MODULE_DESCRIPTION("Broadcom 802.11n wireless LAN driver utilities.");
-MODULE_SUPPORTED_DEVICE("Broadcom 802.11n WLAN cards");
 MODULE_LICENSE("Dual BSD/GPL");
 
 struct sk_buff *brcmu_pkt_buf_get_skb(uint len)
index e35e138..60db38c 100644 (file)
@@ -251,7 +251,6 @@ MODULE_AUTHOR("Benjamin Reed");
 MODULE_DESCRIPTION("Support for Cisco/Aironet 802.11 wireless ethernet cards.  "
                   "Direct support for ISA/PCI/MPI cards and support for PCMCIA when used with airo_cs.");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_SUPPORTED_DEVICE("Aironet 4500, 4800 and Cisco 340/350");
 module_param_hw_array(io, int, ioport, NULL, 0);
 module_param_hw_array(irq, int, irq, NULL, 0);
 module_param_array(rates, int, NULL, 0);
index 3718f95..fcfe4c6 100644 (file)
@@ -47,7 +47,6 @@ MODULE_DESCRIPTION("Support for Cisco/Aironet 802.11 wireless ethernet "
                   "cards.  This is the module that links the PCMCIA card "
                   "with the airo module.");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_SUPPORTED_DEVICE("Aironet 4500, 4800 and Cisco 340 PCMCIA cards");
 
 /*====================================================================*/
 
index f2e7b73..35dffca 100644 (file)
@@ -870,7 +870,7 @@ struct iwl_fw_dbg_trigger_time_event {
  * tx_bar: tid bitmap to configure on what tid the trigger should occur
  *     when a BAR is send (for an Rx BlocAck session).
  * frame_timeout: tid bitmap to configure on what tid the trigger should occur
- *     when a frame times out in the reodering buffer.
+ *     when a frame times out in the reordering buffer.
  */
 struct iwl_fw_dbg_trigger_ba {
        __le16 rx_ba_start;
index fd070ca..40f2109 100644 (file)
@@ -271,12 +271,12 @@ static int iwl_pnvm_get_from_efi(struct iwl_trans *trans,
        err = efivar_entry_get(pnvm_efivar, NULL, &package_size, package);
        if (err) {
                IWL_DEBUG_FW(trans,
-                            "PNVM UEFI variable not found %d (len %zd)\n",
+                            "PNVM UEFI variable not found %d (len %lu)\n",
                             err, package_size);
                goto out;
        }
 
-       IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %zd\n", package_size);
+       IWL_DEBUG_FW(trans, "Read PNVM fro UEFI with size %lu\n", package_size);
 
        *data = kmemdup(package->data, *len, GFP_KERNEL);
        if (!*data)
index 868da7e..e6d2e09 100644 (file)
@@ -205,6 +205,8 @@ static inline void iwl_op_mode_time_point(struct iwl_op_mode *op_mode,
                                          enum iwl_fw_ini_time_point tp_id,
                                          union iwl_dbg_tlv_tp_data *tp_data)
 {
+       if (!op_mode || !op_mode->ops || !op_mode->ops->time_point)
+               return;
        op_mode->ops->time_point(op_mode, tp_id, tp_data);
 }
 
index 15e2773..5ee64f7 100644 (file)
@@ -1083,6 +1083,7 @@ static const struct dmi_system_id dmi_ppag_approved_list[] = {
                        DMI_MATCH(DMI_SYS_VENDOR, "ASUSTek COMPUTER INC."),
                },
        },
+       {}
 };
 
 static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
index 314fec4..ffaf973 100644 (file)
@@ -1106,6 +1106,8 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
                }
        }
 
+#if IS_ENABLED(CONFIG_IWLMVM)
+
        /*
         * Workaround for problematic SnJ device: sometimes when
         * certain RF modules are connected to SnJ, the device ID
@@ -1116,7 +1118,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
        if (CSR_HW_REV_TYPE(iwl_trans->hw_rev) == IWL_CFG_MAC_TYPE_SNJ)
                iwl_trans->trans_cfg = &iwl_so_trans_cfg;
 
-#if IS_ENABLED(CONFIG_IWLMVM)
        /*
         * special-case 7265D, it has the same PCI IDs.
         *
index 42426e2..2bec971 100644 (file)
@@ -1129,6 +1129,8 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
 
                iwl_pcie_rx_init_rxb_lists(rxq);
 
+               spin_unlock_bh(&rxq->lock);
+
                if (!rxq->napi.poll) {
                        int (*poll)(struct napi_struct *, int) = iwl_pcie_napi_poll;
 
@@ -1149,7 +1151,6 @@ static int _iwl_pcie_rx_init(struct iwl_trans *trans)
                        napi_enable(&rxq->napi);
                }
 
-               spin_unlock_bh(&rxq->lock);
        }
 
        /* move the pool to the default queue and allocator ownerships */
index 1a74867..ec7db2b 100644 (file)
@@ -26,7 +26,6 @@ static char *dev_info = "hostap_cs";
 MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Support for Intersil Prism2-based 802.11 wireless LAN "
                   "cards (PC Card).");
-MODULE_SUPPORTED_DEVICE("Intersil Prism2-based WLAN cards (PC Card)");
 MODULE_LICENSE("GPL");
 
 
index 101887e..52d7750 100644 (file)
@@ -27,7 +27,6 @@ static char *dev_info = "hostap_pci";
 MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Support for Intersil Prism2.5-based 802.11 wireless LAN "
                   "PCI cards.");
-MODULE_SUPPORTED_DEVICE("Intersil Prism2.5-based WLAN PCI cards");
 MODULE_LICENSE("GPL");
 
 
index 841cfc6..5824729 100644 (file)
@@ -30,7 +30,6 @@ static char *dev_info = "hostap_plx";
 MODULE_AUTHOR("Jouni Malinen");
 MODULE_DESCRIPTION("Support for Intersil Prism2-based 802.11 wireless LAN "
                   "cards (PLX).");
-MODULE_SUPPORTED_DEVICE("Intersil Prism2-based WLAN cards (PLX)");
 MODULE_LICENSE("GPL");
 
 
index 19098b8..2f27c43 100644 (file)
@@ -345,7 +345,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
        };
        struct ieee80211_hw *hw;
        int len, n = 0, ret = -ENOMEM;
-       struct mt76_queue_entry e;
        struct mt76_txwi_cache *t;
        struct sk_buff *iter;
        dma_addr_t addr;
@@ -387,6 +386,11 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
        }
        tx_info.nbuf = n;
 
+       if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
+               ret = -ENOMEM;
+               goto unmap;
+       }
+
        dma_sync_single_for_cpu(dev->dev, t->dma_addr, dev->drv->txwi_size,
                                DMA_TO_DEVICE);
        ret = dev->drv->tx_prepare_skb(dev, txwi, q->qid, wcid, sta, &tx_info);
@@ -395,11 +399,6 @@ mt76_dma_tx_queue_skb(struct mt76_dev *dev, struct mt76_queue *q,
        if (ret < 0)
                goto unmap;
 
-       if (q->queued + (tx_info.nbuf + 1) / 2 >= q->ndesc - 1) {
-               ret = -ENOMEM;
-               goto unmap;
-       }
-
        return mt76_dma_add_buf(dev, q, tx_info.buf, tx_info.nbuf,
                                tx_info.info, tx_info.skb, t);
 
@@ -419,9 +418,7 @@ free:
        }
 #endif
 
-       e.skb = tx_info.skb;
-       e.txwi = t;
-       dev->drv->tx_complete_skb(dev, &e);
+       dev_kfree_skb(tx_info.skb);
        mt76_put_txwi(dev, t);
        return ret;
 }
@@ -515,13 +512,13 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
 {
        struct sk_buff *skb = q->rx_head;
        struct skb_shared_info *shinfo = skb_shinfo(skb);
+       int nr_frags = shinfo->nr_frags;
 
-       if (shinfo->nr_frags < ARRAY_SIZE(shinfo->frags)) {
+       if (nr_frags < ARRAY_SIZE(shinfo->frags)) {
                struct page *page = virt_to_head_page(data);
                int offset = data - page_address(page) + q->buf_offset;
 
-               skb_add_rx_frag(skb, shinfo->nr_frags, page, offset, len,
-                               q->buf_size);
+               skb_add_rx_frag(skb, nr_frags, page, offset, len, q->buf_size);
        } else {
                skb_free_frag(data);
        }
@@ -530,7 +527,10 @@ mt76_add_fragment(struct mt76_dev *dev, struct mt76_queue *q, void *data,
                return;
 
        q->rx_head = NULL;
-       dev->drv->rx_skb(dev, q - dev->q_rx, skb);
+       if (nr_frags < ARRAY_SIZE(shinfo->frags))
+               dev->drv->rx_skb(dev, q - dev->q_rx, skb);
+       else
+               dev_kfree_skb(skb);
 }
 
 static int
index eb889f8..e5a2589 100644 (file)
@@ -967,11 +967,6 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
        }
        txp->nbuf = nbuf;
 
-       /* pass partial skb header to fw */
-       tx_info->buf[1].len = MT_CT_PARSE_LEN;
-       tx_info->buf[1].skip_unmap = true;
-       tx_info->nbuf = MT_CT_DMA_BUF_NUM;
-
        txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD | MT_CT_INFO_FROM_HOST);
 
        if (!key)
@@ -1009,6 +1004,11 @@ int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
                txp->rept_wds_wcid = cpu_to_le16(0x3ff);
        tx_info->skb = DMA_DUMMY_DATA;
 
+       /* pass partial skb header to fw */
+       tx_info->buf[1].len = MT_CT_PARSE_LEN;
+       tx_info->buf[1].skip_unmap = true;
+       tx_info->nbuf = MT_CT_DMA_BUF_NUM;
+
        return 0;
 }
 
index 7fb2170..bd798df 100644 (file)
@@ -543,7 +543,7 @@ mt7915_tm_set_tx_cont(struct mt7915_phy *phy, bool en)
                tx_cont->bw = CMD_CBW_20MHZ;
                break;
        default:
-               break;
+               return -EINVAL;
        }
 
        if (!en) {
@@ -591,7 +591,7 @@ mt7915_tm_set_tx_cont(struct mt7915_phy *phy, bool en)
                mode = MT_PHY_TYPE_HE_MU;
                break;
        default:
-               break;
+               return -EINVAL;
        }
 
        rateval =  mode << 6 | rate_idx;
index db125cd..b5cc72e 100644 (file)
@@ -405,10 +405,8 @@ mt7921_mcu_tx_rate_report(struct mt7921_dev *dev, struct sk_buff *skb,
        if (wlan_idx >= MT76_N_WCIDS)
                return;
        wcid = rcu_dereference(dev->mt76.wcid[wlan_idx]);
-       if (!wcid) {
-               stats->tx_rate = rate;
+       if (!wcid)
                return;
-       }
 
        msta = container_of(wcid, struct mt7921_sta, wcid);
        stats = &msta->stats;
index 8f860c1..dec6ffd 100644 (file)
@@ -1821,7 +1821,6 @@ static const struct pci_device_id rt2400pci_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT2400 PCI & PCMCIA Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2460 PCI & PCMCIA chipset based cards");
 MODULE_DEVICE_TABLE(pci, rt2400pci_device_table);
 MODULE_LICENSE("GPL");
 
index e940443..8faa0a8 100644 (file)
@@ -2119,7 +2119,6 @@ static const struct pci_device_id rt2500pci_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT2500 PCI & PCMCIA Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2560 PCI & PCMCIA chipset based cards");
 MODULE_DEVICE_TABLE(pci, rt2500pci_device_table);
 MODULE_LICENSE("GPL");
 
index fce05fc..bb5ed66 100644 (file)
@@ -1956,7 +1956,6 @@ static const struct usb_device_id rt2500usb_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT2500 USB Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2570 USB chipset based cards");
 MODULE_DEVICE_TABLE(usb, rt2500usb_device_table);
 MODULE_LICENSE("GPL");
 
index 9a33baa..1fde0e7 100644 (file)
@@ -439,7 +439,6 @@ static const struct pci_device_id rt2800pci_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT2800 PCI & PCMCIA Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2860 PCI & PCMCIA chipset based cards");
 MODULE_FIRMWARE(FIRMWARE_RT2860);
 MODULE_DEVICE_TABLE(pci, rt2800pci_device_table);
 MODULE_LICENSE("GPL");
index 36ac18c..b5c67f6 100644 (file)
@@ -1248,7 +1248,6 @@ static const struct usb_device_id rt2800usb_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT2800 USB Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2870 USB chipset based cards");
 MODULE_DEVICE_TABLE(usb, rt2800usb_device_table);
 MODULE_FIRMWARE(FIRMWARE_RT2870);
 MODULE_LICENSE("GPL");
index 02da5dd..82cfc2a 100644 (file)
@@ -2993,8 +2993,6 @@ static const struct pci_device_id rt61pci_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT61 PCI & PCMCIA Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2561, RT2561s & RT2661 "
-                       "PCI & PCMCIA chipset based cards");
 MODULE_DEVICE_TABLE(pci, rt61pci_device_table);
 MODULE_FIRMWARE(FIRMWARE_RT2561);
 MODULE_FIRMWARE(FIRMWARE_RT2561s);
index e697937..5ff2c74 100644 (file)
@@ -2513,7 +2513,6 @@ static const struct usb_device_id rt73usb_device_table[] = {
 MODULE_AUTHOR(DRV_PROJECT);
 MODULE_VERSION(DRV_VERSION);
 MODULE_DESCRIPTION("Ralink RT73 USB Wireless LAN driver.");
-MODULE_SUPPORTED_DEVICE("Ralink RT2571W & RT2671 USB chipset based cards");
 MODULE_DEVICE_TABLE(usb, rt73usb_device_table);
 MODULE_FIRMWARE(FIRMWARE_RT2571);
 MODULE_LICENSE("GPL");
index 9a3d243..d984832 100644 (file)
@@ -441,6 +441,5 @@ module_init(rsi_91x_hal_module_init);
 module_exit(rsi_91x_hal_module_exit);
 MODULE_AUTHOR("Redpine Signals Inc");
 MODULE_DESCRIPTION("Station driver for RSI 91x devices");
-MODULE_SUPPORTED_DEVICE("RSI-91x");
 MODULE_VERSION("0.1");
 MODULE_LICENSE("Dual BSD/GPL");
index 592e9da..fe0287b 100644 (file)
@@ -1571,7 +1571,6 @@ module_exit(rsi_module_exit);
 
 MODULE_AUTHOR("Redpine Signals Inc");
 MODULE_DESCRIPTION("Common SDIO layer for RSI drivers");
-MODULE_SUPPORTED_DEVICE("RSI-91x");
 MODULE_DEVICE_TABLE(sdio, rsi_dev_table);
 MODULE_FIRMWARE(FIRMWARE_RSI9113);
 MODULE_VERSION("0.1");
index a4a533c..3fbe2a3 100644 (file)
@@ -928,7 +928,6 @@ module_usb_driver(rsi_driver);
 
 MODULE_AUTHOR("Redpine Signals Inc");
 MODULE_DESCRIPTION("Common USB layer for RSI drivers");
-MODULE_SUPPORTED_DEVICE("RSI-91x");
 MODULE_DEVICE_TABLE(usb, rsi_dev_table);
 MODULE_FIRMWARE(FIRMWARE_RSI9113);
 MODULE_VERSION("0.1");
index e5c73f8..39a01c2 100644 (file)
@@ -557,8 +557,8 @@ check_frags:
        }
 
        if (skb_has_frag_list(skb) && !first_shinfo) {
-               first_shinfo = skb_shinfo(skb);
-               shinfo = skb_shinfo(skb_shinfo(skb)->frag_list);
+               first_shinfo = shinfo;
+               shinfo = skb_shinfo(shinfo->frag_list);
                nr_frags = shinfo->nr_frags;
 
                goto check_frags;
@@ -1343,11 +1343,21 @@ int xenvif_tx_action(struct xenvif_queue *queue, int budget)
                return 0;
 
        gnttab_batch_copy(queue->tx_copy_ops, nr_cops);
-       if (nr_mops != 0)
+       if (nr_mops != 0) {
                ret = gnttab_map_refs(queue->tx_map_ops,
                                      NULL,
                                      queue->pages_to_map,
                                      nr_mops);
+               if (ret) {
+                       unsigned int i;
+
+                       netdev_err(queue->vif->dev, "Map fail: nr %u ret %d\n",
+                                  nr_mops, ret);
+                       for (i = 0; i < nr_mops; ++i)
+                               WARN_ON_ONCE(queue->tx_map_ops[i].status ==
+                                            GNTST_okay);
+               }
+       }
 
        work_done = xenvif_tx_submit(queue);
 
index 5dad884..8fa7771 100644 (file)
@@ -44,15 +44,13 @@ static int microread_mei_probe(struct mei_cl_device *cldev,
        return 0;
 }
 
-static int microread_mei_remove(struct mei_cl_device *cldev)
+static void microread_mei_remove(struct mei_cl_device *cldev)
 {
        struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
        microread_remove(phy->hdev);
 
        nfc_mei_phy_free(phy);
-
-       return 0;
 }
 
 static struct mei_cl_device_id microread_mei_tbl[] = {
index 579bc59..5c10aac 100644 (file)
@@ -42,7 +42,7 @@ static int pn544_mei_probe(struct mei_cl_device *cldev,
        return 0;
 }
 
-static int pn544_mei_remove(struct mei_cl_device *cldev)
+static void pn544_mei_remove(struct mei_cl_device *cldev)
 {
        struct nfc_mei_phy *phy = mei_cldev_get_drvdata(cldev);
 
@@ -51,8 +51,6 @@ static int pn544_mei_remove(struct mei_cl_device *cldev)
        pn544_hci_remove(phy->hdev);
 
        nfc_mei_phy_free(phy);
-
-       return 0;
 }
 
 static struct mei_cl_device_id pn544_mei_tbl[] = {
index e77c587..c325be5 100644 (file)
@@ -2,4 +2,5 @@
 source "drivers/ntb/hw/amd/Kconfig"
 source "drivers/ntb/hw/idt/Kconfig"
 source "drivers/ntb/hw/intel/Kconfig"
+source "drivers/ntb/hw/epf/Kconfig"
 source "drivers/ntb/hw/mscc/Kconfig"
index 4714d62..223ca59 100644 (file)
@@ -2,4 +2,5 @@
 obj-$(CONFIG_NTB_AMD)  += amd/
 obj-$(CONFIG_NTB_IDT)  += idt/
 obj-$(CONFIG_NTB_INTEL)        += intel/
+obj-$(CONFIG_NTB_EPF)  += epf/
 obj-$(CONFIG_NTB_SWITCHTEC) += mscc/
diff --git a/drivers/ntb/hw/epf/Kconfig b/drivers/ntb/hw/epf/Kconfig
new file mode 100644 (file)
index 0000000..6197d1a
--- /dev/null
@@ -0,0 +1,6 @@
+config NTB_EPF
+       tristate "Generic EPF Non-Transparent Bridge support"
+       depends on m
+       help
+         This driver supports EPF NTB on configurable endpoint.
+         If unsure, say N.
diff --git a/drivers/ntb/hw/epf/Makefile b/drivers/ntb/hw/epf/Makefile
new file mode 100644 (file)
index 0000000..2f560a4
--- /dev/null
@@ -0,0 +1 @@
+obj-$(CONFIG_NTB_EPF) += ntb_hw_epf.o
diff --git a/drivers/ntb/hw/epf/ntb_hw_epf.c b/drivers/ntb/hw/epf/ntb_hw_epf.c
new file mode 100644 (file)
index 0000000..b019755
--- /dev/null
@@ -0,0 +1,753 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Host side endpoint driver to implement Non-Transparent Bridge functionality
+ *
+ * Copyright (C) 2020 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+#include <linux/delay.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/slab.h>
+#include <linux/ntb.h>
+
+#define NTB_EPF_COMMAND                0x0
+#define CMD_CONFIGURE_DOORBELL 1
+#define CMD_TEARDOWN_DOORBELL  2
+#define CMD_CONFIGURE_MW       3
+#define CMD_TEARDOWN_MW                4
+#define CMD_LINK_UP            5
+#define CMD_LINK_DOWN          6
+
+#define NTB_EPF_ARGUMENT       0x4
+#define MSIX_ENABLE            BIT(16)
+
+#define NTB_EPF_CMD_STATUS     0x8
+#define COMMAND_STATUS_OK      1
+#define COMMAND_STATUS_ERROR   2
+
+#define NTB_EPF_LINK_STATUS    0x0A
+#define LINK_STATUS_UP         BIT(0)
+
+#define NTB_EPF_TOPOLOGY       0x0C
+#define NTB_EPF_LOWER_ADDR     0x10
+#define NTB_EPF_UPPER_ADDR     0x14
+#define NTB_EPF_LOWER_SIZE     0x18
+#define NTB_EPF_UPPER_SIZE     0x1C
+#define NTB_EPF_MW_COUNT       0x20
+#define NTB_EPF_MW1_OFFSET     0x24
+#define NTB_EPF_SPAD_OFFSET    0x28
+#define NTB_EPF_SPAD_COUNT     0x2C
+#define NTB_EPF_DB_ENTRY_SIZE  0x30
+#define NTB_EPF_DB_DATA(n)     (0x34 + (n) * 4)
+#define NTB_EPF_DB_OFFSET(n)   (0xB4 + (n) * 4)
+
+#define NTB_EPF_MIN_DB_COUNT   3
+#define NTB_EPF_MAX_DB_COUNT   31
+#define NTB_EPF_MW_OFFSET      2
+
+#define NTB_EPF_COMMAND_TIMEOUT        1000 /* 1 Sec */
+
+enum pci_barno {
+       BAR_0,
+       BAR_1,
+       BAR_2,
+       BAR_3,
+       BAR_4,
+       BAR_5,
+};
+
+struct ntb_epf_dev {
+       struct ntb_dev ntb;
+       struct device *dev;
+       /* Mutex to protect providing commands to NTB EPF */
+       struct mutex cmd_lock;
+
+       enum pci_barno ctrl_reg_bar;
+       enum pci_barno peer_spad_reg_bar;
+       enum pci_barno db_reg_bar;
+
+       unsigned int mw_count;
+       unsigned int spad_count;
+       unsigned int db_count;
+
+       void __iomem *ctrl_reg;
+       void __iomem *db_reg;
+       void __iomem *peer_spad_reg;
+
+       unsigned int self_spad;
+       unsigned int peer_spad;
+
+       int db_val;
+       u64 db_valid_mask;
+};
+
+#define ntb_ndev(__ntb) container_of(__ntb, struct ntb_epf_dev, ntb)
+
+struct ntb_epf_data {
+       /* BAR that contains both control region and self spad region */
+       enum pci_barno ctrl_reg_bar;
+       /* BAR that contains peer spad region */
+       enum pci_barno peer_spad_reg_bar;
+       /* BAR that contains Doorbell region and Memory window '1' */
+       enum pci_barno db_reg_bar;
+};
+
+static int ntb_epf_send_command(struct ntb_epf_dev *ndev, u32 command,
+                               u32 argument)
+{
+       ktime_t timeout;
+       bool timedout;
+       int ret = 0;
+       u32 status;
+
+       mutex_lock(&ndev->cmd_lock);
+       writel(argument, ndev->ctrl_reg + NTB_EPF_ARGUMENT);
+       writel(command, ndev->ctrl_reg + NTB_EPF_COMMAND);
+
+       timeout = ktime_add_ms(ktime_get(), NTB_EPF_COMMAND_TIMEOUT);
+       while (1) {
+               timedout = ktime_after(ktime_get(), timeout);
+               status = readw(ndev->ctrl_reg + NTB_EPF_CMD_STATUS);
+
+               if (status == COMMAND_STATUS_ERROR) {
+                       ret = -EINVAL;
+                       break;
+               }
+
+               if (status == COMMAND_STATUS_OK)
+                       break;
+
+               if (WARN_ON(timedout)) {
+                       ret = -ETIMEDOUT;
+                       break;
+               }
+
+               usleep_range(5, 10);
+       }
+
+       writew(0, ndev->ctrl_reg + NTB_EPF_CMD_STATUS);
+       mutex_unlock(&ndev->cmd_lock);
+
+       return ret;
+}
+
+static int ntb_epf_mw_to_bar(struct ntb_epf_dev *ndev, int idx)
+{
+       struct device *dev = ndev->dev;
+
+       if (idx < 0 || idx > ndev->mw_count) {
+               dev_err(dev, "Unsupported Memory Window index %d\n", idx);
+               return -EINVAL;
+       }
+
+       return idx + 2;
+}
+
+static int ntb_epf_mw_count(struct ntb_dev *ntb, int pidx)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+
+       if (pidx != NTB_DEF_PEER_IDX) {
+               dev_err(dev, "Unsupported Peer ID %d\n", pidx);
+               return -EINVAL;
+       }
+
+       return ndev->mw_count;
+}
+
+static int ntb_epf_mw_get_align(struct ntb_dev *ntb, int pidx, int idx,
+                               resource_size_t *addr_align,
+                               resource_size_t *size_align,
+                               resource_size_t *size_max)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       int bar;
+
+       if (pidx != NTB_DEF_PEER_IDX) {
+               dev_err(dev, "Unsupported Peer ID %d\n", pidx);
+               return -EINVAL;
+       }
+
+       bar = ntb_epf_mw_to_bar(ndev, idx);
+       if (bar < 0)
+               return bar;
+
+       if (addr_align)
+               *addr_align = SZ_4K;
+
+       if (size_align)
+               *size_align = 1;
+
+       if (size_max)
+               *size_max = pci_resource_len(ndev->ntb.pdev, bar);
+
+       return 0;
+}
+
+static u64 ntb_epf_link_is_up(struct ntb_dev *ntb,
+                             enum ntb_speed *speed,
+                             enum ntb_width *width)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       u32 status;
+
+       status = readw(ndev->ctrl_reg + NTB_EPF_LINK_STATUS);
+
+       return status & LINK_STATUS_UP;
+}
+
+static u32 ntb_epf_spad_read(struct ntb_dev *ntb, int idx)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count) {
+               dev_err(dev, "READ: Invalid ScratchPad Index %d\n", idx);
+               return 0;
+       }
+
+       offset = readl(ndev->ctrl_reg + NTB_EPF_SPAD_OFFSET);
+       offset += (idx << 2);
+
+       return readl(ndev->ctrl_reg + offset);
+}
+
+static int ntb_epf_spad_write(struct ntb_dev *ntb,
+                             int idx, u32 val)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       u32 offset;
+
+       if (idx < 0 || idx >= ndev->spad_count) {
+               dev_err(dev, "WRITE: Invalid ScratchPad Index %d\n", idx);
+               return -EINVAL;
+       }
+
+       offset = readl(ndev->ctrl_reg + NTB_EPF_SPAD_OFFSET);
+       offset += (idx << 2);
+       writel(val, ndev->ctrl_reg + offset);
+
+       return 0;
+}
+
+static u32 ntb_epf_peer_spad_read(struct ntb_dev *ntb, int pidx, int idx)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       u32 offset;
+
+       if (pidx != NTB_DEF_PEER_IDX) {
+               dev_err(dev, "Unsupported Peer ID %d\n", pidx);
+               return -EINVAL;
+       }
+
+       if (idx < 0 || idx >= ndev->spad_count) {
+               dev_err(dev, "WRITE: Invalid Peer ScratchPad Index %d\n", idx);
+               return -EINVAL;
+       }
+
+       offset = (idx << 2);
+       return readl(ndev->peer_spad_reg + offset);
+}
+
+static int ntb_epf_peer_spad_write(struct ntb_dev *ntb, int pidx,
+                                  int idx, u32 val)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       u32 offset;
+
+       if (pidx != NTB_DEF_PEER_IDX) {
+               dev_err(dev, "Unsupported Peer ID %d\n", pidx);
+               return -EINVAL;
+       }
+
+       if (idx < 0 || idx >= ndev->spad_count) {
+               dev_err(dev, "WRITE: Invalid Peer ScratchPad Index %d\n", idx);
+               return -EINVAL;
+       }
+
+       offset = (idx << 2);
+       writel(val, ndev->peer_spad_reg + offset);
+
+       return 0;
+}
+
+static int ntb_epf_link_enable(struct ntb_dev *ntb,
+                              enum ntb_speed max_speed,
+                              enum ntb_width max_width)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       int ret;
+
+       ret = ntb_epf_send_command(ndev, CMD_LINK_UP, 0);
+       if (ret) {
+               dev_err(dev, "Fail to enable link\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static int ntb_epf_link_disable(struct ntb_dev *ntb)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       int ret;
+
+       ret = ntb_epf_send_command(ndev, CMD_LINK_DOWN, 0);
+       if (ret) {
+               dev_err(dev, "Fail to disable link\n");
+               return ret;
+       }
+
+       return 0;
+}
+
+static irqreturn_t ntb_epf_vec_isr(int irq, void *dev)
+{
+       struct ntb_epf_dev *ndev = dev;
+       int irq_no;
+
+       irq_no = irq - pci_irq_vector(ndev->ntb.pdev, 0);
+       ndev->db_val = irq_no + 1;
+
+       if (irq_no == 0)
+               ntb_link_event(&ndev->ntb);
+       else
+               ntb_db_event(&ndev->ntb, irq_no);
+
+       return IRQ_HANDLED;
+}
+
+static int ntb_epf_init_isr(struct ntb_epf_dev *ndev, int msi_min, int msi_max)
+{
+       struct pci_dev *pdev = ndev->ntb.pdev;
+       struct device *dev = ndev->dev;
+       u32 argument = MSIX_ENABLE;
+       int irq;
+       int ret;
+       int i;
+
+       irq = pci_alloc_irq_vectors(pdev, msi_min, msi_max, PCI_IRQ_MSIX);
+       if (irq < 0) {
+               dev_dbg(dev, "Failed to get MSIX interrupts\n");
+               irq = pci_alloc_irq_vectors(pdev, msi_min, msi_max,
+                                           PCI_IRQ_MSI);
+               if (irq < 0) {
+                       dev_err(dev, "Failed to get MSI interrupts\n");
+                       return irq;
+               }
+               argument &= ~MSIX_ENABLE;
+       }
+
+       for (i = 0; i < irq; i++) {
+               ret = request_irq(pci_irq_vector(pdev, i), ntb_epf_vec_isr,
+                                 0, "ntb_epf", ndev);
+               if (ret) {
+                       dev_err(dev, "Failed to request irq\n");
+                       goto err_request_irq;
+               }
+       }
+
+       ndev->db_count = irq - 1;
+
+       ret = ntb_epf_send_command(ndev, CMD_CONFIGURE_DOORBELL,
+                                  argument | irq);
+       if (ret) {
+               dev_err(dev, "Failed to configure doorbell\n");
+               goto err_configure_db;
+       }
+
+       return 0;
+
+err_configure_db:
+       for (i = 0; i < ndev->db_count + 1; i++)
+               free_irq(pci_irq_vector(pdev, i), ndev);
+
+err_request_irq:
+       pci_free_irq_vectors(pdev);
+
+       return ret;
+}
+
+static int ntb_epf_peer_mw_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->mw_count;
+}
+
+static int ntb_epf_spad_count(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->spad_count;
+}
+
+static u64 ntb_epf_db_valid_mask(struct ntb_dev *ntb)
+{
+       return ntb_ndev(ntb)->db_valid_mask;
+}
+
+static int ntb_epf_db_set_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       return 0;
+}
+
+static int ntb_epf_mw_set_trans(struct ntb_dev *ntb, int pidx, int idx,
+                               dma_addr_t addr, resource_size_t size)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       resource_size_t mw_size;
+       int bar;
+
+       if (pidx != NTB_DEF_PEER_IDX) {
+               dev_err(dev, "Unsupported Peer ID %d\n", pidx);
+               return -EINVAL;
+       }
+
+       bar = idx + NTB_EPF_MW_OFFSET;
+
+       mw_size = pci_resource_len(ntb->pdev, bar);
+
+       if (size > mw_size) {
+               dev_err(dev, "Size:%pa is greater than the MW size %pa\n",
+                       &size, &mw_size);
+               return -EINVAL;
+       }
+
+       writel(lower_32_bits(addr), ndev->ctrl_reg + NTB_EPF_LOWER_ADDR);
+       writel(upper_32_bits(addr), ndev->ctrl_reg + NTB_EPF_UPPER_ADDR);
+       writel(lower_32_bits(size), ndev->ctrl_reg + NTB_EPF_LOWER_SIZE);
+       writel(upper_32_bits(size), ndev->ctrl_reg + NTB_EPF_UPPER_SIZE);
+       ntb_epf_send_command(ndev, CMD_CONFIGURE_MW, idx);
+
+       return 0;
+}
+
+static int ntb_epf_mw_clear_trans(struct ntb_dev *ntb, int pidx, int idx)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       struct device *dev = ndev->dev;
+       int ret = 0;
+
+       ntb_epf_send_command(ndev, CMD_TEARDOWN_MW, idx);
+       if (ret)
+               dev_err(dev, "Failed to teardown memory window\n");
+
+       return ret;
+}
+
+static int ntb_epf_peer_mw_get_addr(struct ntb_dev *ntb, int idx,
+                                   phys_addr_t *base, resource_size_t *size)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       u32 offset = 0;
+       int bar;
+
+       if (idx == 0)
+               offset = readl(ndev->ctrl_reg + NTB_EPF_MW1_OFFSET);
+
+       bar = idx + NTB_EPF_MW_OFFSET;
+
+       if (base)
+               *base = pci_resource_start(ndev->ntb.pdev, bar) + offset;
+
+       if (size)
+               *size = pci_resource_len(ndev->ntb.pdev, bar) - offset;
+
+       return 0;
+}
+
+static int ntb_epf_peer_db_set(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+       u32 interrupt_num = ffs(db_bits) + 1;
+       struct device *dev = ndev->dev;
+       u32 db_entry_size;
+       u32 db_offset;
+       u32 db_data;
+
+       if (interrupt_num > ndev->db_count) {
+               dev_err(dev, "DB interrupt %d greater than Max Supported %d\n",
+                       interrupt_num, ndev->db_count);
+               return -EINVAL;
+       }
+
+       db_entry_size = readl(ndev->ctrl_reg + NTB_EPF_DB_ENTRY_SIZE);
+
+       db_data = readl(ndev->ctrl_reg + NTB_EPF_DB_DATA(interrupt_num));
+       db_offset = readl(ndev->ctrl_reg + NTB_EPF_DB_OFFSET(interrupt_num));
+       writel(db_data, ndev->db_reg + (db_entry_size * interrupt_num) +
+              db_offset);
+
+       return 0;
+}
+
+static u64 ntb_epf_db_read(struct ntb_dev *ntb)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+
+       return ndev->db_val;
+}
+
+static int ntb_epf_db_clear_mask(struct ntb_dev *ntb, u64 db_bits)
+{
+       return 0;
+}
+
+static int ntb_epf_db_clear(struct ntb_dev *ntb, u64 db_bits)
+{
+       struct ntb_epf_dev *ndev = ntb_ndev(ntb);
+
+       ndev->db_val = 0;
+
+       return 0;
+}
+
+static const struct ntb_dev_ops ntb_epf_ops = {
+       .mw_count               = ntb_epf_mw_count,
+       .spad_count             = ntb_epf_spad_count,
+       .peer_mw_count          = ntb_epf_peer_mw_count,
+       .db_valid_mask          = ntb_epf_db_valid_mask,
+       .db_set_mask            = ntb_epf_db_set_mask,
+       .mw_set_trans           = ntb_epf_mw_set_trans,
+       .mw_clear_trans         = ntb_epf_mw_clear_trans,
+       .peer_mw_get_addr       = ntb_epf_peer_mw_get_addr,
+       .link_enable            = ntb_epf_link_enable,
+       .spad_read              = ntb_epf_spad_read,
+       .spad_write             = ntb_epf_spad_write,
+       .peer_spad_read         = ntb_epf_peer_spad_read,
+       .peer_spad_write        = ntb_epf_peer_spad_write,
+       .peer_db_set            = ntb_epf_peer_db_set,
+       .db_read                = ntb_epf_db_read,
+       .mw_get_align           = ntb_epf_mw_get_align,
+       .link_is_up             = ntb_epf_link_is_up,
+       .db_clear_mask          = ntb_epf_db_clear_mask,
+       .db_clear               = ntb_epf_db_clear,
+       .link_disable           = ntb_epf_link_disable,
+};
+
+static inline void ntb_epf_init_struct(struct ntb_epf_dev *ndev,
+                                      struct pci_dev *pdev)
+{
+       ndev->ntb.pdev = pdev;
+       ndev->ntb.topo = NTB_TOPO_NONE;
+       ndev->ntb.ops = &ntb_epf_ops;
+}
+
+static int ntb_epf_init_dev(struct ntb_epf_dev *ndev)
+{
+       struct device *dev = ndev->dev;
+       int ret;
+
+       /* One Link interrupt and rest doorbell interrupt */
+       ret = ntb_epf_init_isr(ndev, NTB_EPF_MIN_DB_COUNT + 1,
+                              NTB_EPF_MAX_DB_COUNT + 1);
+       if (ret) {
+               dev_err(dev, "Failed to init ISR\n");
+               return ret;
+       }
+
+       ndev->db_valid_mask = BIT_ULL(ndev->db_count) - 1;
+       ndev->mw_count = readl(ndev->ctrl_reg + NTB_EPF_MW_COUNT);
+       ndev->spad_count = readl(ndev->ctrl_reg + NTB_EPF_SPAD_COUNT);
+
+       return 0;
+}
+
+static int ntb_epf_init_pci(struct ntb_epf_dev *ndev,
+                           struct pci_dev *pdev)
+{
+       struct device *dev = ndev->dev;
+       int ret;
+
+       pci_set_drvdata(pdev, ndev);
+
+       ret = pci_enable_device(pdev);
+       if (ret) {
+               dev_err(dev, "Cannot enable PCI device\n");
+               goto err_pci_enable;
+       }
+
+       ret = pci_request_regions(pdev, "ntb");
+       if (ret) {
+               dev_err(dev, "Cannot obtain PCI resources\n");
+               goto err_pci_regions;
+       }
+
+       pci_set_master(pdev);
+
+       ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64));
+       if (ret) {
+               ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+               if (ret) {
+                       dev_err(dev, "Cannot set DMA mask\n");
+                       goto err_dma_mask;
+               }
+               dev_warn(&pdev->dev, "Cannot DMA highmem\n");
+       }
+
+       ndev->ctrl_reg = pci_iomap(pdev, ndev->ctrl_reg_bar, 0);
+       if (!ndev->ctrl_reg) {
+               ret = -EIO;
+               goto err_dma_mask;
+       }
+
+       ndev->peer_spad_reg = pci_iomap(pdev, ndev->peer_spad_reg_bar, 0);
+       if (!ndev->peer_spad_reg) {
+               ret = -EIO;
+               goto err_dma_mask;
+       }
+
+       ndev->db_reg = pci_iomap(pdev, ndev->db_reg_bar, 0);
+       if (!ndev->db_reg) {
+               ret = -EIO;
+               goto err_dma_mask;
+       }
+
+       return 0;
+
+err_dma_mask:
+       pci_clear_master(pdev);
+
+err_pci_regions:
+       pci_disable_device(pdev);
+
+err_pci_enable:
+       pci_set_drvdata(pdev, NULL);
+
+       return ret;
+}
+
+static void ntb_epf_deinit_pci(struct ntb_epf_dev *ndev)
+{
+       struct pci_dev *pdev = ndev->ntb.pdev;
+
+       pci_iounmap(pdev, ndev->ctrl_reg);
+       pci_iounmap(pdev, ndev->peer_spad_reg);
+       pci_iounmap(pdev, ndev->db_reg);
+
+       pci_clear_master(pdev);
+       pci_release_regions(pdev);
+       pci_disable_device(pdev);
+       pci_set_drvdata(pdev, NULL);
+}
+
+static void ntb_epf_cleanup_isr(struct ntb_epf_dev *ndev)
+{
+       struct pci_dev *pdev = ndev->ntb.pdev;
+       int i;
+
+       ntb_epf_send_command(ndev, CMD_TEARDOWN_DOORBELL, ndev->db_count + 1);
+
+       for (i = 0; i < ndev->db_count + 1; i++)
+               free_irq(pci_irq_vector(pdev, i), ndev);
+       pci_free_irq_vectors(pdev);
+}
+
+static int ntb_epf_pci_probe(struct pci_dev *pdev,
+                            const struct pci_device_id *id)
+{
+       enum pci_barno peer_spad_reg_bar = BAR_1;
+       enum pci_barno ctrl_reg_bar = BAR_0;
+       enum pci_barno db_reg_bar = BAR_2;
+       struct device *dev = &pdev->dev;
+       struct ntb_epf_data *data;
+       struct ntb_epf_dev *ndev;
+       int ret;
+
+       if (pci_is_bridge(pdev))
+               return -ENODEV;
+
+       ndev = devm_kzalloc(dev, sizeof(*ndev), GFP_KERNEL);
+       if (!ndev)
+               return -ENOMEM;
+
+       data = (struct ntb_epf_data *)id->driver_data;
+       if (data) {
+               if (data->peer_spad_reg_bar)
+                       peer_spad_reg_bar = data->peer_spad_reg_bar;
+               if (data->ctrl_reg_bar)
+                       ctrl_reg_bar = data->ctrl_reg_bar;
+               if (data->db_reg_bar)
+                       db_reg_bar = data->db_reg_bar;
+       }
+
+       ndev->peer_spad_reg_bar = peer_spad_reg_bar;
+       ndev->ctrl_reg_bar = ctrl_reg_bar;
+       ndev->db_reg_bar = db_reg_bar;
+       ndev->dev = dev;
+
+       ntb_epf_init_struct(ndev, pdev);
+       mutex_init(&ndev->cmd_lock);
+
+       ret = ntb_epf_init_pci(ndev, pdev);
+       if (ret) {
+               dev_err(dev, "Failed to init PCI\n");
+               return ret;
+       }
+
+       ret = ntb_epf_init_dev(ndev);
+       if (ret) {
+               dev_err(dev, "Failed to init device\n");
+               goto err_init_dev;
+       }
+
+       ret = ntb_register_device(&ndev->ntb);
+       if (ret) {
+               dev_err(dev, "Failed to register NTB device\n");
+               goto err_register_dev;
+       }
+
+       return 0;
+
+err_register_dev:
+       ntb_epf_cleanup_isr(ndev);
+
+err_init_dev:
+       ntb_epf_deinit_pci(ndev);
+
+       return ret;
+}
+
+static void ntb_epf_pci_remove(struct pci_dev *pdev)
+{
+       struct ntb_epf_dev *ndev = pci_get_drvdata(pdev);
+
+       ntb_unregister_device(&ndev->ntb);
+       ntb_epf_cleanup_isr(ndev);
+       ntb_epf_deinit_pci(ndev);
+}
+
+static const struct ntb_epf_data j721e_data = {
+       .ctrl_reg_bar = BAR_0,
+       .peer_spad_reg_bar = BAR_1,
+       .db_reg_bar = BAR_2,
+};
+
+static const struct pci_device_id ntb_epf_pci_tbl[] = {
+       {
+               PCI_DEVICE(PCI_VENDOR_ID_TI, PCI_DEVICE_ID_TI_J721E),
+               .class = PCI_CLASS_MEMORY_RAM << 8, .class_mask = 0xffff00,
+               .driver_data = (kernel_ulong_t)&j721e_data,
+       },
+       { },
+};
+
+static struct pci_driver ntb_epf_pci_driver = {
+       .name           = KBUILD_MODNAME,
+       .id_table       = ntb_epf_pci_tbl,
+       .probe          = ntb_epf_pci_probe,
+       .remove         = ntb_epf_pci_remove,
+};
+module_pci_driver(ntb_epf_pci_driver);
+
+MODULE_DESCRIPTION("PCI ENDPOINT NTB HOST DRIVER");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
index e03a1f3..7b95562 100644 (file)
@@ -310,11 +310,10 @@ static int nd_blk_probe(struct device *dev)
                return nsblk_attach_disk(nsblk);
 }
 
-static int nd_blk_remove(struct device *dev)
+static void nd_blk_remove(struct device *dev)
 {
        if (is_nd_btt(dev))
                nvdimm_namespace_detach_btt(to_nd_btt(dev));
-       return 0;
 }
 
 static struct nd_device_driver nd_blk_driver = {
index 2304c61..48f0985 100644 (file)
@@ -113,18 +113,17 @@ static int nvdimm_bus_remove(struct device *dev)
        struct nd_device_driver *nd_drv = to_nd_device_driver(dev->driver);
        struct module *provider = to_bus_provider(dev);
        struct nvdimm_bus *nvdimm_bus = walk_to_nvdimm_bus(dev);
-       int rc = 0;
 
        if (nd_drv->remove) {
                debug_nvdimm_lock(dev);
-               rc = nd_drv->remove(dev);
+               nd_drv->remove(dev);
                debug_nvdimm_unlock(dev);
        }
 
-       dev_dbg(&nvdimm_bus->dev, "%s.remove(%s) = %d\n", dev->driver->name,
-                       dev_name(dev), rc);
+       dev_dbg(&nvdimm_bus->dev, "%s.remove(%s)\n", dev->driver->name,
+                       dev_name(dev));
        module_put(provider);
-       return rc;
+       return 0;
 }
 
 static void nvdimm_bus_shutdown(struct device *dev)
@@ -427,7 +426,7 @@ static void free_badrange_list(struct list_head *badrange_list)
        list_del_init(badrange_list);
 }
 
-static int nd_bus_remove(struct device *dev)
+static void nd_bus_remove(struct device *dev)
 {
        struct nvdimm_bus *nvdimm_bus = to_nvdimm_bus(dev);
 
@@ -446,8 +445,6 @@ static int nd_bus_remove(struct device *dev)
        spin_unlock(&nvdimm_bus->badrange.lock);
 
        nvdimm_bus_destroy_ndctl(nvdimm_bus);
-
-       return 0;
 }
 
 static int nd_bus_probe(struct device *dev)
index 7d4ddc4..91d9163 100644 (file)
@@ -113,19 +113,14 @@ static int nvdimm_probe(struct device *dev)
        return rc;
 }
 
-static int nvdimm_remove(struct device *dev)
+static void nvdimm_remove(struct device *dev)
 {
        struct nvdimm_drvdata *ndd = dev_get_drvdata(dev);
 
-       if (!ndd)
-               return 0;
-
        nvdimm_bus_lock(dev);
        dev_set_drvdata(dev, NULL);
        nvdimm_bus_unlock(dev);
        put_ndd(ndd);
-
-       return 0;
 }
 
 static struct nd_device_driver nvdimm_driver = {
index 281fedb..b8a85bf 100644 (file)
@@ -563,7 +563,7 @@ static int nd_pmem_probe(struct device *dev)
        return pmem_attach_disk(dev, ndns);
 }
 
-static int nd_pmem_remove(struct device *dev)
+static void nd_pmem_remove(struct device *dev)
 {
        struct pmem_device *pmem = dev_get_drvdata(dev);
 
@@ -578,8 +578,6 @@ static int nd_pmem_remove(struct device *dev)
                pmem->bb_state = NULL;
        }
        nvdimm_flush(to_nd_region(dev->parent), NULL);
-
-       return 0;
 }
 
 static void nd_pmem_shutdown(struct device *dev)
index bfce87e..e0c3412 100644 (file)
@@ -87,7 +87,7 @@ static int child_unregister(struct device *dev, void *data)
        return 0;
 }
 
-static int nd_region_remove(struct device *dev)
+static void nd_region_remove(struct device *dev)
 {
        struct nd_region *nd_region = to_nd_region(dev);
 
@@ -108,8 +108,6 @@ static int nd_region_remove(struct device *dev)
         */
        sysfs_put(nd_region->bb_state);
        nd_region->bb_state = NULL;
-
-       return 0;
 }
 
 static int child_notify(struct device *dev, void *data)
index e68a8c4..0896e21 100644 (file)
@@ -380,6 +380,7 @@ bool nvme_cancel_request(struct request *req, void *data, bool reserved)
                return true;
 
        nvme_req(req)->status = NVME_SC_HOST_ABORTED_CMD;
+       nvme_req(req)->flags |= NVME_REQ_CANCELLED;
        blk_mq_complete_request(req);
        return true;
 }
@@ -1225,28 +1226,12 @@ static void nvme_keep_alive_end_io(struct request *rq, blk_status_t status)
                queue_delayed_work(nvme_wq, &ctrl->ka_work, ctrl->kato * HZ);
 }
 
-static int nvme_keep_alive(struct nvme_ctrl *ctrl)
-{
-       struct request *rq;
-
-       rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
-                       BLK_MQ_REQ_RESERVED);
-       if (IS_ERR(rq))
-               return PTR_ERR(rq);
-
-       rq->timeout = ctrl->kato * HZ;
-       rq->end_io_data = ctrl;
-
-       blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
-
-       return 0;
-}
-
 static void nvme_keep_alive_work(struct work_struct *work)
 {
        struct nvme_ctrl *ctrl = container_of(to_delayed_work(work),
                        struct nvme_ctrl, ka_work);
        bool comp_seen = ctrl->comp_seen;
+       struct request *rq;
 
        if ((ctrl->ctratt & NVME_CTRL_ATTR_TBKAS) && comp_seen) {
                dev_dbg(ctrl->device,
@@ -1256,12 +1241,18 @@ static void nvme_keep_alive_work(struct work_struct *work)
                return;
        }
 
-       if (nvme_keep_alive(ctrl)) {
+       rq = nvme_alloc_request(ctrl->admin_q, &ctrl->ka_cmd,
+                               BLK_MQ_REQ_RESERVED | BLK_MQ_REQ_NOWAIT);
+       if (IS_ERR(rq)) {
                /* allocation failure, reset the controller */
-               dev_err(ctrl->device, "keep-alive failed\n");
+               dev_err(ctrl->device, "keep-alive failed: %ld\n", PTR_ERR(rq));
                nvme_reset_ctrl(ctrl);
                return;
        }
+
+       rq->timeout = ctrl->kato * HZ;
+       rq->end_io_data = ctrl;
+       blk_execute_rq_nowait(NULL, rq, 0, nvme_keep_alive_end_io);
 }
 
 static void nvme_start_keep_alive(struct nvme_ctrl *ctrl)
@@ -1440,7 +1431,7 @@ static int nvme_identify_ns(struct nvme_ctrl *ctrl, unsigned nsid,
                goto out_free_id;
        }
 
-       error = -ENODEV;
+       error = NVME_SC_INVALID_NS | NVME_SC_DNR;
        if ((*id)->ncap == 0) /* namespace not allocated or attached */
                goto out_free_id;
 
@@ -1963,30 +1954,18 @@ static void nvme_config_discard(struct gendisk *disk, struct nvme_ns *ns)
                blk_queue_max_write_zeroes_sectors(queue, UINT_MAX);
 }
 
-static void nvme_config_write_zeroes(struct gendisk *disk, struct nvme_ns *ns)
+/*
+ * Even though NVMe spec explicitly states that MDTS is not applicable to the
+ * write-zeroes, we are cautious and limit the size to the controllers
+ * max_hw_sectors value, which is based on the MDTS field and possibly other
+ * limiting factors.
+ */
+static void nvme_config_write_zeroes(struct request_queue *q,
+               struct nvme_ctrl *ctrl)
 {
-       u64 max_blocks;
-
-       if (!(ns->ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) ||
-           (ns->ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
-               return;
-       /*
-        * Even though NVMe spec explicitly states that MDTS is not
-        * applicable to the write-zeroes:- "The restriction does not apply to
-        * commands that do not transfer data between the host and the
-        * controller (e.g., Write Uncorrectable ro Write Zeroes command).".
-        * In order to be more cautious use controller's max_hw_sectors value
-        * to configure the maximum sectors for the write-zeroes which is
-        * configured based on the controller's MDTS field in the
-        * nvme_init_identify() if available.
-        */
-       if (ns->ctrl->max_hw_sectors == UINT_MAX)
-               max_blocks = (u64)USHRT_MAX + 1;
-       else
-               max_blocks = ns->ctrl->max_hw_sectors + 1;
-
-       blk_queue_max_write_zeroes_sectors(disk->queue,
-                                          nvme_lba_to_sect(ns, max_blocks));
+       if ((ctrl->oncs & NVME_CTRL_ONCS_WRITE_ZEROES) &&
+           !(ctrl->quirks & NVME_QUIRK_DISABLE_WRITE_ZEROES))
+               blk_queue_max_write_zeroes_sectors(q, ctrl->max_hw_sectors);
 }
 
 static bool nvme_ns_ids_valid(struct nvme_ns_ids *ids)
@@ -2158,7 +2137,7 @@ static void nvme_update_disk_info(struct gendisk *disk,
        set_capacity_and_notify(disk, capacity);
 
        nvme_config_discard(disk, ns);
-       nvme_config_write_zeroes(disk, ns);
+       nvme_config_write_zeroes(disk->queue, ns->ctrl);
 
        set_disk_ro(disk, (id->nsattr & NVME_NS_ATTR_RO) ||
                test_bit(NVME_NS_FORCE_RO, &ns->flags));
@@ -4038,7 +4017,7 @@ static void nvme_ns_remove_by_nsid(struct nvme_ctrl *ctrl, u32 nsid)
 static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
 {
        struct nvme_id_ns *id;
-       int ret = -ENODEV;
+       int ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
 
        if (test_bit(NVME_NS_DEAD, &ns->flags))
                goto out;
@@ -4047,7 +4026,7 @@ static void nvme_validate_ns(struct nvme_ns *ns, struct nvme_ns_ids *ids)
        if (ret)
                goto out;
 
-       ret = -ENODEV;
+       ret = NVME_SC_INVALID_NS | NVME_SC_DNR;
        if (!nvme_ns_ids_equal(&ns->head->ids, ids)) {
                dev_err(ns->ctrl->device,
                        "identifiers changed for nsid %d\n", ns->head->ns_id);
@@ -4065,7 +4044,7 @@ out:
         *
         * TODO: we should probably schedule a delayed retry here.
         */
-       if (ret && ret != -ENOMEM && !(ret > 0 && !(ret & NVME_SC_DNR)))
+       if (ret > 0 && (ret & NVME_SC_DNR))
                nvme_ns_remove(ns);
 }
 
@@ -4095,6 +4074,12 @@ static void nvme_validate_or_alloc_ns(struct nvme_ctrl *ctrl, unsigned nsid)
                                nsid);
                        break;
                }
+               if (!nvme_multi_css(ctrl)) {
+                       dev_warn(ctrl->device,
+                               "command set not reported for nsid: %d\n",
+                               nsid);
+                       break;
+               }
                nvme_alloc_ns(ctrl, nsid, &ids);
                break;
        default:
index 5dfd806..604ab0e 100644 (file)
@@ -630,7 +630,7 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
        opts->queue_size = NVMF_DEF_QUEUE_SIZE;
        opts->nr_io_queues = num_online_cpus();
        opts->reconnect_delay = NVMF_DEF_RECONNECT_DELAY;
-       opts->kato = NVME_DEFAULT_KATO;
+       opts->kato = 0;
        opts->duplicate_connect = false;
        opts->fast_io_fail_tmo = NVMF_DEF_FAIL_FAST_TMO;
        opts->hdr_digest = false;
@@ -893,6 +893,9 @@ static int nvmf_parse_options(struct nvmf_ctrl_options *opts,
                opts->nr_write_queues = 0;
                opts->nr_poll_queues = 0;
                opts->duplicate_connect = true;
+       } else {
+               if (!opts->kato)
+                       opts->kato = NVME_DEFAULT_KATO;
        }
        if (ctrl_loss_tmo < 0) {
                opts->max_reconnects = -1;
index 733010d..888b108 100644 (file)
 #define NVMF_DEF_FAIL_FAST_TMO         -1
 
 /*
+ * Reserved one command for internal usage.  This command is used for sending
+ * the connect command, as well as for the keep alive command on the admin
+ * queue once live.
+ */
+#define NVMF_RESERVED_TAGS     1
+
+/*
  * Define a host as seen by the target.  We allocate one at boot, but also
  * allow the override it when creating controllers.  This is both to provide
  * persistence of the Host NQN over multiple boots, and to allow using
index 20dadd8..6ffa8de 100644 (file)
@@ -1956,7 +1956,7 @@ nvme_fc_fcpio_done(struct nvmefc_fcp_req *req)
                                sizeof(op->rsp_iu), DMA_FROM_DEVICE);
 
        if (opstate == FCPOP_STATE_ABORTED)
-               status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
+               status = cpu_to_le16(NVME_SC_HOST_ABORTED_CMD << 1);
        else if (freq->status) {
                status = cpu_to_le16(NVME_SC_HOST_PATH_ERROR << 1);
                dev_info(ctrl->ctrl.device,
@@ -2055,7 +2055,7 @@ done:
                nvme_fc_complete_rq(rq);
 
 check_error:
-       if (terminate_assoc)
+       if (terminate_assoc && ctrl->ctrl.state != NVME_CTRL_RESETTING)
                queue_work(nvme_reset_wq, &ctrl->ioerr_work);
 }
 
@@ -2443,6 +2443,7 @@ nvme_fc_terminate_exchange(struct request *req, void *data, bool reserved)
        struct nvme_fc_ctrl *ctrl = to_fc_ctrl(nctrl);
        struct nvme_fc_fcp_op *op = blk_mq_rq_to_pdu(req);
 
+       op->nreq.flags |= NVME_REQ_CANCELLED;
        __nvme_fc_abort_op(ctrl, op);
        return true;
 }
@@ -2862,7 +2863,7 @@ nvme_fc_create_io_queues(struct nvme_fc_ctrl *ctrl)
        memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
        ctrl->tag_set.ops = &nvme_fc_mq_ops;
        ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
-       ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+       ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
        ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
        ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
        ctrl->tag_set.cmd_size =
@@ -3484,7 +3485,7 @@ nvme_fc_init_ctrl(struct device *dev, struct nvmf_ctrl_options *opts,
        memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
        ctrl->admin_tag_set.ops = &nvme_fc_admin_mq_ops;
        ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
-       ctrl->admin_tag_set.reserved_tags = 2; /* fabric connect + Keep-Alive */
+       ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
        ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
        ctrl->admin_tag_set.cmd_size =
                struct_size((struct nvme_fcp_op_w_sgl *)NULL, priv,
index 8f9e969..0a586d7 100644 (file)
@@ -248,6 +248,7 @@ int nvme_hwmon_init(struct nvme_ctrl *ctrl)
        if (IS_ERR(hwmon)) {
                dev_warn(dev, "Failed to instantiate hwmon device\n");
                kfree(data);
+               return PTR_ERR(hwmon);
        }
        ctrl->hwmon_device = hwmon;
        return 0;
index 7b6632c..7249ae7 100644 (file)
@@ -2632,6 +2632,7 @@ static void nvme_reset_work(struct work_struct *work)
         * Don't limit the IOMMU merged segment size.
         */
        dma_set_max_seg_size(dev->dev, 0xffffffff);
+       dma_set_min_align_mask(dev->dev, NVME_CTRL_PAGE_SIZE - 1);
 
        mutex_unlock(&dev->shutdown_lock);
 
@@ -3233,7 +3234,8 @@ static const struct pci_device_id nvme_id_table[] = {
        { PCI_DEVICE(0x126f, 0x2263),   /* Silicon Motion unidentified */
                .driver_data = NVME_QUIRK_NO_NS_DESC_LIST, },
        { PCI_DEVICE(0x1bb1, 0x0100),   /* Seagate Nytro Flash Storage */
-               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
+               .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
+                               NVME_QUIRK_NO_NS_DESC_LIST, },
        { PCI_DEVICE(0x1c58, 0x0003),   /* HGST adapter */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x1c58, 0x0023),   /* WDC SN200 adapter */
@@ -3244,9 +3246,13 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY, },
        { PCI_DEVICE(0x144d, 0xa822),   /* Samsung PM1725a */
                .driver_data = NVME_QUIRK_DELAY_BEFORE_CHK_RDY |
+                               NVME_QUIRK_DISABLE_WRITE_ZEROES|
                                NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_DEVICE(0x1987, 0x5016),   /* Phison E16 */
                .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
+       { PCI_DEVICE(0x1b4b, 0x1092),   /* Lexar 256 GB SSD */
+               .driver_data = NVME_QUIRK_NO_NS_DESC_LIST |
+                               NVME_QUIRK_IGNORE_DEV_SUBNQN, },
        { PCI_DEVICE(0x1d1d, 0x1f1f),   /* LighNVM qemu device */
                .driver_data = NVME_QUIRK_LIGHTNVM, },
        { PCI_DEVICE(0x1d1d, 0x2807),   /* CNEX WL */
@@ -3264,6 +3270,8 @@ static const struct pci_device_id nvme_id_table[] = {
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
        { PCI_DEVICE(0x1d97, 0x2263),   /* SPCC */
                .driver_data = NVME_QUIRK_DISABLE_WRITE_ZEROES, },
+       { PCI_DEVICE(0x2646, 0x2262),   /* KINGSTON SKC2000 NVMe SSD */
+               .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
        { PCI_DEVICE(0x2646, 0x2263),   /* KINGSTON A2000 NVMe SSD  */
                .driver_data = NVME_QUIRK_NO_DEEPEST_PS, },
        { PCI_DEVICE(PCI_VENDOR_ID_AMAZON, 0x0061),
index 53ac4d7..be905d4 100644 (file)
@@ -736,8 +736,11 @@ static int nvme_rdma_alloc_io_queues(struct nvme_rdma_ctrl *ctrl)
                return ret;
 
        ctrl->ctrl.queue_count = nr_io_queues + 1;
-       if (ctrl->ctrl.queue_count < 2)
-               return 0;
+       if (ctrl->ctrl.queue_count < 2) {
+               dev_err(ctrl->ctrl.device,
+                       "unable to set any I/O queues\n");
+               return -ENOMEM;
+       }
 
        dev_info(ctrl->ctrl.device,
                "creating %d I/O queues.\n", nr_io_queues);
@@ -798,7 +801,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_rdma_admin_mq_ops;
                set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
-               set->reserved_tags = 2; /* connect + keep-alive */
+               set->reserved_tags = NVMF_RESERVED_TAGS;
                set->numa_node = nctrl->numa_node;
                set->cmd_size = sizeof(struct nvme_rdma_request) +
                                NVME_RDMA_DATA_SGL_SIZE;
@@ -811,7 +814,7 @@ static struct blk_mq_tag_set *nvme_rdma_alloc_tagset(struct nvme_ctrl *nctrl,
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_rdma_mq_ops;
                set->queue_depth = nctrl->sqsize + 1;
-               set->reserved_tags = 1; /* fabric connect */
+               set->reserved_tags = NVMF_RESERVED_TAGS;
                set->numa_node = nctrl->numa_node;
                set->flags = BLK_MQ_F_SHOULD_MERGE;
                set->cmd_size = sizeof(struct nvme_rdma_request) +
index 69f59d2..a0f00cb 100644 (file)
@@ -287,7 +287,7 @@ static inline void nvme_tcp_queue_request(struct nvme_tcp_request *req,
         * directly, otherwise queue io_work. Also, only do that if we
         * are on the same cpu, so we don't introduce contention.
         */
-       if (queue->io_cpu == __smp_processor_id() &&
+       if (queue->io_cpu == raw_smp_processor_id() &&
            sync && empty && mutex_trylock(&queue->send_mutex)) {
                queue->more_requests = !last;
                nvme_tcp_send_all(queue);
@@ -568,6 +568,13 @@ static int nvme_tcp_setup_h2c_data_pdu(struct nvme_tcp_request *req,
        req->pdu_len = le32_to_cpu(pdu->r2t_length);
        req->pdu_sent = 0;
 
+       if (unlikely(!req->pdu_len)) {
+               dev_err(queue->ctrl->ctrl.device,
+                       "req %d r2t len is %u, probably a bug...\n",
+                       rq->tag, req->pdu_len);
+               return -EPROTO;
+       }
+
        if (unlikely(req->data_sent + req->pdu_len > req->data_len)) {
                dev_err(queue->ctrl->ctrl.device,
                        "req %d r2t len %u exceeded data len %u (%zu sent)\n",
@@ -1575,7 +1582,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_tcp_admin_mq_ops;
                set->queue_depth = NVME_AQ_MQ_TAG_DEPTH;
-               set->reserved_tags = 2; /* connect + keep-alive */
+               set->reserved_tags = NVMF_RESERVED_TAGS;
                set->numa_node = nctrl->numa_node;
                set->flags = BLK_MQ_F_BLOCKING;
                set->cmd_size = sizeof(struct nvme_tcp_request);
@@ -1587,7 +1594,7 @@ static struct blk_mq_tag_set *nvme_tcp_alloc_tagset(struct nvme_ctrl *nctrl,
                memset(set, 0, sizeof(*set));
                set->ops = &nvme_tcp_mq_ops;
                set->queue_depth = nctrl->sqsize + 1;
-               set->reserved_tags = 1; /* fabric connect */
+               set->reserved_tags = NVMF_RESERVED_TAGS;
                set->numa_node = nctrl->numa_node;
                set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
                set->cmd_size = sizeof(struct nvme_tcp_request);
@@ -1745,8 +1752,11 @@ static int nvme_tcp_alloc_io_queues(struct nvme_ctrl *ctrl)
                return ret;
 
        ctrl->queue_count = nr_io_queues + 1;
-       if (ctrl->queue_count < 2)
-               return 0;
+       if (ctrl->queue_count < 2) {
+               dev_err(ctrl->device,
+                       "unable to set any I/O queues\n");
+               return -ENOMEM;
+       }
 
        dev_info(ctrl->device,
                "creating %d I/O queues.\n", nr_io_queues);
index c7e3ec5..bc2f344 100644 (file)
@@ -9,7 +9,13 @@
 
 int nvme_revalidate_zones(struct nvme_ns *ns)
 {
-       return blk_revalidate_disk_zones(ns->disk, NULL);
+       struct request_queue *q = ns->queue;
+       int ret;
+
+       ret = blk_revalidate_disk_zones(ns->disk, NULL);
+       if (!ret)
+               blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
+       return ret;
 }
 
 static int nvme_set_max_append(struct nvme_ctrl *ctrl)
@@ -107,7 +113,6 @@ int nvme_update_zone_info(struct nvme_ns *ns, unsigned lbaf)
        blk_queue_flag_set(QUEUE_FLAG_ZONE_RESETALL, q);
        blk_queue_max_open_zones(q, le32_to_cpu(id->mor) + 1);
        blk_queue_max_active_zones(q, le32_to_cpu(id->mar) + 1);
-       blk_queue_max_zone_append_sectors(q, ns->ctrl->max_zone_append);
 free_data:
        kfree(id);
        return status;
index bc6a774..fe6b8aa 100644 (file)
@@ -313,27 +313,40 @@ static void nvmet_execute_get_log_page(struct nvmet_req *req)
        nvmet_req_complete(req, NVME_SC_INVALID_FIELD | NVME_SC_DNR);
 }
 
-static void nvmet_id_set_model_number(struct nvme_id_ctrl *id,
-                                     struct nvmet_subsys *subsys)
+static u16 nvmet_set_model_number(struct nvmet_subsys *subsys)
 {
-       const char *model = NVMET_DEFAULT_CTRL_MODEL;
-       struct nvmet_subsys_model *subsys_model;
+       u16 status = 0;
+
+       mutex_lock(&subsys->lock);
+       if (!subsys->model_number) {
+               subsys->model_number =
+                       kstrdup(NVMET_DEFAULT_CTRL_MODEL, GFP_KERNEL);
+               if (!subsys->model_number)
+                       status = NVME_SC_INTERNAL;
+       }
+       mutex_unlock(&subsys->lock);
 
-       rcu_read_lock();
-       subsys_model = rcu_dereference(subsys->model);
-       if (subsys_model)
-               model = subsys_model->number;
-       memcpy_and_pad(id->mn, sizeof(id->mn), model, strlen(model), ' ');
-       rcu_read_unlock();
+       return status;
 }
 
 static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
 {
        struct nvmet_ctrl *ctrl = req->sq->ctrl;
+       struct nvmet_subsys *subsys = ctrl->subsys;
        struct nvme_id_ctrl *id;
        u32 cmd_capsule_size;
        u16 status = 0;
 
+       /*
+        * If there is no model number yet, set it now.  It will then remain
+        * stable for the life time of the subsystem.
+        */
+       if (!subsys->model_number) {
+               status = nvmet_set_model_number(subsys);
+               if (status)
+                       goto out;
+       }
+
        id = kzalloc(sizeof(*id), GFP_KERNEL);
        if (!id) {
                status = NVME_SC_INTERNAL;
@@ -347,7 +360,8 @@ static void nvmet_execute_identify_ctrl(struct nvmet_req *req)
        memset(id->sn, ' ', sizeof(id->sn));
        bin2hex(id->sn, &ctrl->subsys->serial,
                min(sizeof(ctrl->subsys->serial), sizeof(id->sn) / 2));
-       nvmet_id_set_model_number(id, ctrl->subsys);
+       memcpy_and_pad(id->mn, sizeof(id->mn), subsys->model_number,
+                      strlen(subsys->model_number), ' ');
        memcpy_and_pad(id->fr, sizeof(id->fr),
                       UTS_RELEASE, strlen(UTS_RELEASE), ' ');
 
index 635a7cb..e5dbd19 100644 (file)
@@ -1118,16 +1118,12 @@ static ssize_t nvmet_subsys_attr_model_show(struct config_item *item,
                                            char *page)
 {
        struct nvmet_subsys *subsys = to_subsys(item);
-       struct nvmet_subsys_model *subsys_model;
-       char *model = NVMET_DEFAULT_CTRL_MODEL;
        int ret;
 
-       rcu_read_lock();
-       subsys_model = rcu_dereference(subsys->model);
-       if (subsys_model)
-               model = subsys_model->number;
-       ret = snprintf(page, PAGE_SIZE, "%s\n", model);
-       rcu_read_unlock();
+       mutex_lock(&subsys->lock);
+       ret = snprintf(page, PAGE_SIZE, "%s\n", subsys->model_number ?
+                       subsys->model_number : NVMET_DEFAULT_CTRL_MODEL);
+       mutex_unlock(&subsys->lock);
 
        return ret;
 }
@@ -1138,14 +1134,17 @@ static bool nvmet_is_ascii(const char c)
        return c >= 0x20 && c <= 0x7e;
 }
 
-static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
-                                            const char *page, size_t count)
+static ssize_t nvmet_subsys_attr_model_store_locked(struct nvmet_subsys *subsys,
+               const char *page, size_t count)
 {
-       struct nvmet_subsys *subsys = to_subsys(item);
-       struct nvmet_subsys_model *new_model;
-       char *new_model_number;
        int pos = 0, len;
 
+       if (subsys->model_number) {
+               pr_err("Can't set model number. %s is already assigned\n",
+                      subsys->model_number);
+               return -EINVAL;
+       }
+
        len = strcspn(page, "\n");
        if (!len)
                return -EINVAL;
@@ -1155,28 +1154,25 @@ static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
                        return -EINVAL;
        }
 
-       new_model_number = kmemdup_nul(page, len, GFP_KERNEL);
-       if (!new_model_number)
+       subsys->model_number = kmemdup_nul(page, len, GFP_KERNEL);
+       if (!subsys->model_number)
                return -ENOMEM;
+       return count;
+}
 
-       new_model = kzalloc(sizeof(*new_model) + len + 1, GFP_KERNEL);
-       if (!new_model) {
-               kfree(new_model_number);
-               return -ENOMEM;
-       }
-       memcpy(new_model->number, new_model_number, len);
+static ssize_t nvmet_subsys_attr_model_store(struct config_item *item,
+                                            const char *page, size_t count)
+{
+       struct nvmet_subsys *subsys = to_subsys(item);
+       ssize_t ret;
 
        down_write(&nvmet_config_sem);
        mutex_lock(&subsys->lock);
-       new_model = rcu_replace_pointer(subsys->model, new_model,
-                                       mutex_is_locked(&subsys->lock));
+       ret = nvmet_subsys_attr_model_store_locked(subsys, page, count);
        mutex_unlock(&subsys->lock);
        up_write(&nvmet_config_sem);
 
-       kfree_rcu(new_model, rcuhead);
-       kfree(new_model_number);
-
-       return count;
+       return ret;
 }
 CONFIGFS_ATTR(nvmet_subsys_, attr_model);
 
index 67bbf0e..a027433 100644 (file)
@@ -1118,9 +1118,20 @@ static void nvmet_start_ctrl(struct nvmet_ctrl *ctrl)
 {
        lockdep_assert_held(&ctrl->lock);
 
-       if (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
-           nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES ||
-           nvmet_cc_mps(ctrl->cc) != 0 ||
+       /*
+        * Only I/O controllers should verify iosqes,iocqes.
+        * Strictly speaking, the spec says a discovery controller
+        * should verify iosqes,iocqes are zeroed, however that
+        * would break backwards compatibility, so don't enforce it.
+        */
+       if (ctrl->subsys->type != NVME_NQN_DISC &&
+           (nvmet_cc_iosqes(ctrl->cc) != NVME_NVM_IOSQES ||
+            nvmet_cc_iocqes(ctrl->cc) != NVME_NVM_IOCQES)) {
+               ctrl->csts = NVME_CSTS_CFS;
+               return;
+       }
+
+       if (nvmet_cc_mps(ctrl->cc) != 0 ||
            nvmet_cc_ams(ctrl->cc) != 0 ||
            nvmet_cc_css(ctrl->cc) != 0) {
                ctrl->csts = NVME_CSTS_CFS;
@@ -1532,7 +1543,7 @@ static void nvmet_subsys_free(struct kref *ref)
        nvmet_passthru_subsys_free(subsys);
 
        kfree(subsys->subsysnqn);
-       kfree_rcu(subsys->model, rcuhead);
+       kfree(subsys->model_number);
        kfree(subsys);
 }
 
index 3d9a5d3..9a8b372 100644 (file)
@@ -185,7 +185,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
        }
 
        bip = bio_integrity_alloc(bio, GFP_NOIO,
-               min_t(unsigned int, req->metadata_sg_cnt, BIO_MAX_PAGES));
+                                       bio_max_segs(req->metadata_sg_cnt));
        if (IS_ERR(bip)) {
                pr_err("Unable to allocate bio_integrity_payload\n");
                return PTR_ERR(bip);
@@ -225,7 +225,7 @@ static int nvmet_bdev_alloc_bip(struct nvmet_req *req, struct bio *bio,
 
 static void nvmet_bdev_execute_rw(struct nvmet_req *req)
 {
-       int sg_cnt = req->sg_cnt;
+       unsigned int sg_cnt = req->sg_cnt;
        struct bio *bio;
        struct scatterlist *sg;
        struct blk_plug plug;
@@ -262,7 +262,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
                bio = &req->b.inline_bio;
                bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
        } else {
-               bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+               bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
        }
        bio_set_dev(bio, req->ns->bdev);
        bio->bi_iter.bi_sector = sector;
@@ -289,7 +289,7 @@ static void nvmet_bdev_execute_rw(struct nvmet_req *req)
                                }
                        }
 
-                       bio = bio_alloc(GFP_KERNEL, min(sg_cnt, BIO_MAX_PAGES));
+                       bio = bio_alloc(GFP_KERNEL, bio_max_segs(sg_cnt));
                        bio_set_dev(bio, req->ns->bdev);
                        bio->bi_iter.bi_sector = sector;
                        bio->bi_opf = op;
index cb6f865..3e189e7 100644 (file)
@@ -349,7 +349,7 @@ static int nvme_loop_configure_admin_queue(struct nvme_loop_ctrl *ctrl)
        memset(&ctrl->admin_tag_set, 0, sizeof(ctrl->admin_tag_set));
        ctrl->admin_tag_set.ops = &nvme_loop_admin_mq_ops;
        ctrl->admin_tag_set.queue_depth = NVME_AQ_MQ_TAG_DEPTH;
-       ctrl->admin_tag_set.reserved_tags = 2; /* connect + keep-alive */
+       ctrl->admin_tag_set.reserved_tags = NVMF_RESERVED_TAGS;
        ctrl->admin_tag_set.numa_node = ctrl->ctrl.numa_node;
        ctrl->admin_tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
                NVME_INLINE_SG_CNT * sizeof(struct scatterlist);
@@ -520,7 +520,7 @@ static int nvme_loop_create_io_queues(struct nvme_loop_ctrl *ctrl)
        memset(&ctrl->tag_set, 0, sizeof(ctrl->tag_set));
        ctrl->tag_set.ops = &nvme_loop_mq_ops;
        ctrl->tag_set.queue_depth = ctrl->ctrl.opts->queue_size;
-       ctrl->tag_set.reserved_tags = 1; /* fabric connect */
+       ctrl->tag_set.reserved_tags = NVMF_RESERVED_TAGS;
        ctrl->tag_set.numa_node = ctrl->ctrl.numa_node;
        ctrl->tag_set.flags = BLK_MQ_F_SHOULD_MERGE;
        ctrl->tag_set.cmd_size = sizeof(struct nvme_loop_iod) +
index cdfa537..4b84edb 100644 (file)
@@ -208,11 +208,6 @@ struct nvmet_ctrl {
        bool                    pi_support;
 };
 
-struct nvmet_subsys_model {
-       struct rcu_head         rcuhead;
-       char                    number[];
-};
-
 struct nvmet_subsys {
        enum nvme_subsys_type   type;
 
@@ -242,7 +237,7 @@ struct nvmet_subsys {
        struct config_group     namespaces_group;
        struct config_group     allowed_hosts_group;
 
-       struct nvmet_subsys_model       __rcu *model;
+       char                    *model_number;
 
 #ifdef CONFIG_NVME_TARGET_PASSTHRU
        struct nvme_ctrl        *passthru_ctrl;
index f50c7b2..2798944 100644 (file)
@@ -26,7 +26,7 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
        struct nvme_ctrl *pctrl = ctrl->subsys->passthru_ctrl;
        u16 status = NVME_SC_SUCCESS;
        struct nvme_id_ctrl *id;
-       int max_hw_sectors;
+       unsigned int max_hw_sectors;
        int page_shift;
 
        id = kzalloc(sizeof(*id), GFP_KERNEL);
@@ -50,9 +50,9 @@ static u16 nvmet_passthru_override_id_ctrl(struct nvmet_req *req)
 
        /*
         * nvmet_passthru_map_sg is limitted to using a single bio so limit
-        * the mdts based on BIO_MAX_PAGES as well
+        * the mdts based on BIO_MAX_VECS as well
         */
-       max_hw_sectors = min_not_zero(BIO_MAX_PAGES << (PAGE_SHIFT - 9),
+       max_hw_sectors = min_not_zero(BIO_MAX_VECS << (PAGE_SHIFT - 9),
                                      max_hw_sectors);
 
        page_shift = NVME_CAP_MPSMIN(ctrl->cap) + 12;
@@ -191,14 +191,14 @@ static int nvmet_passthru_map_sg(struct nvmet_req *req, struct request *rq)
        struct bio *bio;
        int i;
 
-       if (req->sg_cnt > BIO_MAX_PAGES)
+       if (req->sg_cnt > BIO_MAX_VECS)
                return -EINVAL;
 
        if (req->transfer_len <= NVMET_MAX_INLINE_DATA_LEN) {
                bio = &req->p.inline_bio;
                bio_init(bio, req->inline_bvec, ARRAY_SIZE(req->inline_bvec));
        } else {
-               bio = bio_alloc(GFP_KERNEL, min(req->sg_cnt, BIO_MAX_PAGES));
+               bio = bio_alloc(GFP_KERNEL, bio_max_segs(req->sg_cnt));
                bio->bi_end_io = bio_put;
        }
        bio->bi_opf = req_op(rq);
index 06b6b74..6c1f3ab 100644 (file)
@@ -802,9 +802,8 @@ static void nvmet_rdma_write_data_done(struct ib_cq *cq, struct ib_wc *wc)
                nvmet_req_uninit(&rsp->req);
                nvmet_rdma_release_rsp(rsp);
                if (wc->status != IB_WC_WR_FLUSH_ERR) {
-                       pr_info("RDMA WRITE for CQE 0x%p failed with status %s (%d).\n",
-                               wc->wr_cqe, ib_wc_status_msg(wc->status),
-                               wc->status);
+                       pr_info("RDMA WRITE for CQE failed with status %s (%d).\n",
+                               ib_wc_status_msg(wc->status), wc->status);
                        nvmet_rdma_error_comp(queue);
                }
                return;
index 8b0485a..d658c6e 100644 (file)
@@ -1098,11 +1098,11 @@ static int nvmet_tcp_try_recv_data(struct nvmet_tcp_queue *queue)
                cmd->rbytes_done += ret;
        }
 
+       nvmet_tcp_unmap_pdu_iovec(cmd);
        if (queue->data_digest) {
                nvmet_tcp_prep_recv_ddgst(cmd);
                return 0;
        }
-       nvmet_tcp_unmap_pdu_iovec(cmd);
 
        if (!(cmd->flags & NVMET_TCP_F_INIT_FAILED) &&
            cmd->rbytes_done == cmd->req.transfer_len) {
index 954d3b4..75d2594 100644 (file)
@@ -270,4 +270,12 @@ config SPRD_EFUSE
          This driver can also be built as a module. If so, the module
          will be called nvmem-sprd-efuse.
 
+config NVMEM_RMEM
+       tristate "Reserved Memory Based Driver Support"
+       help
+         This driver maps reserved memory into an nvmem device. It might be
+         useful to expose information left by firmware in memory.
+
+         This driver can also be built as a module. If so, the module
+         will be called nvmem-rmem.
 endif
index a7c3772..5376b8e 100644 (file)
@@ -55,3 +55,5 @@ obj-$(CONFIG_NVMEM_ZYNQMP)    += nvmem_zynqmp_nvmem.o
 nvmem_zynqmp_nvmem-y           := zynqmp_nvmem.o
 obj-$(CONFIG_SPRD_EFUSE)       += nvmem_sprd_efuse.o
 nvmem_sprd_efuse-y             := sprd-efuse.o
+obj-$(CONFIG_NVMEM_RMEM)       += nvmem-rmem.o
+nvmem-rmem-y                   := rmem.o
index 177f5bf..a5ab1e0 100644 (file)
@@ -682,7 +682,9 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
 
        for_each_child_of_node(parent, child) {
                addr = of_get_property(child, "reg", &len);
-               if (!addr || (len < 2 * sizeof(u32))) {
+               if (!addr)
+                       continue;
+               if (len < 2 * sizeof(u32)) {
                        dev_err(dev, "nvmem: invalid reg on %pOF\n", child);
                        return -EINVAL;
                }
@@ -713,6 +715,7 @@ static int nvmem_add_cells_from_of(struct nvmem_device *nvmem)
                                cell->name, nvmem->stride);
                        /* Cells already added will be freed later. */
                        kfree_const(cell->name);
+                       of_node_put(cell->np);
                        kfree(cell);
                        return -EINVAL;
                }
index 701704b..c86339a 100644 (file)
@@ -96,7 +96,6 @@ MODULE_DEVICE_TABLE(of, imx_iim_dt_ids);
 
 static int imx_iim_probe(struct platform_device *pdev)
 {
-       const struct of_device_id *of_id;
        struct device *dev = &pdev->dev;
        struct iim_priv *iim;
        struct nvmem_device *nvmem;
@@ -111,11 +110,7 @@ static int imx_iim_probe(struct platform_device *pdev)
        if (IS_ERR(iim->base))
                return PTR_ERR(iim->base);
 
-       of_id = of_match_device(imx_iim_dt_ids, dev);
-       if (!of_id)
-               return -ENODEV;
-
-       drvdata = of_id->data;
+       drvdata = of_device_get_match_data(&pdev->dev);
 
        iim->clk = devm_clk_get(dev, NULL);
        if (IS_ERR(iim->clk))
index a72704c..f6e9f96 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2017, 2020 The Linux Foundation. All rights reserved.
+ * Copyright (c) 2017, 2020-2021, The Linux Foundation. All rights reserved.
  */
 
 #include <linux/device.h>
@@ -18,7 +18,6 @@
 #define SDAM_PBS_TRIG_CLR              0xE6
 
 struct sdam_chip {
-       struct platform_device          *pdev;
        struct regmap                   *regmap;
        struct nvmem_config             sdam_config;
        unsigned int                    base;
@@ -65,7 +64,7 @@ static int sdam_read(void *priv, unsigned int offset, void *val,
                                size_t bytes)
 {
        struct sdam_chip *sdam = priv;
-       struct device *dev = &sdam->pdev->dev;
+       struct device *dev = sdam->sdam_config.dev;
        int rc;
 
        if (!sdam_is_valid(sdam, offset, bytes)) {
@@ -86,7 +85,7 @@ static int sdam_write(void *priv, unsigned int offset, void *val,
                                size_t bytes)
 {
        struct sdam_chip *sdam = priv;
-       struct device *dev = &sdam->pdev->dev;
+       struct device *dev = sdam->sdam_config.dev;
        int rc;
 
        if (!sdam_is_valid(sdam, offset, bytes)) {
diff --git a/drivers/nvmem/rmem.c b/drivers/nvmem/rmem.c
new file mode 100644 (file)
index 0000000..b11c3c9
--- /dev/null
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Copyright (C) 2020 Nicolas Saenz Julienne <nsaenzjulienne@suse.de>
+ */
+
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/nvmem-provider.h>
+#include <linux/of_reserved_mem.h>
+#include <linux/platform_device.h>
+
+struct rmem {
+       struct device *dev;
+       struct nvmem_device *nvmem;
+       struct reserved_mem *mem;
+
+       phys_addr_t size;
+};
+
+static int rmem_read(void *context, unsigned int offset,
+                    void *val, size_t bytes)
+{
+       struct rmem *priv = context;
+       size_t available = priv->mem->size;
+       loff_t off = offset;
+       void *addr;
+       int count;
+
+       /*
+        * Only map the reserved memory at this point to avoid potential rogue
+        * kernel threads inadvertently modifying it. Based on the current
+        * uses-cases for this driver, the performance hit isn't a concern.
+        * Nor is likely to be, given the nature of the subsystem. Most nvmem
+        * devices operate over slow buses to begin with.
+        *
+        * An alternative would be setting the memory as RO, set_memory_ro(),
+        * but as of Dec 2020 this isn't possible on arm64.
+        */
+       addr = memremap(priv->mem->base, available, MEMREMAP_WB);
+       if (IS_ERR(addr)) {
+               dev_err(priv->dev, "Failed to remap memory region\n");
+               return PTR_ERR(addr);
+       }
+
+       count = memory_read_from_buffer(val, bytes, &off, addr, available);
+
+       memunmap(addr);
+
+       return count;
+}
+
+static int rmem_probe(struct platform_device *pdev)
+{
+       struct nvmem_config config = { };
+       struct device *dev = &pdev->dev;
+       struct reserved_mem *mem;
+       struct rmem *priv;
+
+       priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+       if (!priv)
+               return -ENOMEM;
+       priv->dev = dev;
+
+       mem = of_reserved_mem_lookup(dev->of_node);
+       if (!mem) {
+               dev_err(dev, "Failed to lookup reserved memory\n");
+               return -EINVAL;
+       }
+       priv->mem = mem;
+
+       config.dev = dev;
+       config.priv = priv;
+       config.name = "rmem";
+       config.size = mem->size;
+       config.reg_read = rmem_read;
+
+       return PTR_ERR_OR_ZERO(devm_nvmem_register(dev, &config));
+}
+
+static const struct of_device_id rmem_match[] = {
+       { .compatible = "nvmem-rmem", },
+       { /* sentinel */ },
+};
+MODULE_DEVICE_TABLE(of, rmem_match);
+
+static struct platform_driver rmem_driver = {
+       .probe = rmem_probe,
+       .driver = {
+               .name = "rmem",
+               .of_match_table = rmem_match,
+       },
+};
+module_platform_driver(rmem_driver);
+
+MODULE_AUTHOR("Nicolas Saenz Julienne <nsaenzjulienne@suse.de>");
+MODULE_DESCRIPTION("Reserved Memory Based nvmem Driver");
+MODULE_LICENSE("GPL");
index 020bf86..0da8620 100644 (file)
@@ -511,6 +511,7 @@ static const struct of_device_id reserved_mem_matches[] = {
        { .compatible = "qcom,rmtfs-mem" },
        { .compatible = "qcom,cmd-db" },
        { .compatible = "ramoops" },
+       { .compatible = "nvmem-rmem" },
        {}
 };
 
index 6d8368b..78427c8 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/of.h>
 #include <linux/of_device.h>
 #include <linux/of_graph.h>
+#include <linux/of_irq.h>
 #include <linux/string.h>
 #include <linux/moduleparam.h>
 
@@ -1102,7 +1103,9 @@ static int of_link_to_phandle(struct device_node *con_np,
         * created for them.
         */
        sup_dev = get_dev_from_fwnode(&sup_np->fwnode);
-       if (!sup_dev && of_node_check_flag(sup_np, OF_POPULATED)) {
+       if (!sup_dev &&
+           (of_node_check_flag(sup_np, OF_POPULATED) ||
+            sup_np->fwnode.flags & FWNODE_FLAG_NOT_DEVICE)) {
                pr_debug("Not linking %pOFP to %pOFP - No struct device\n",
                         con_np, sup_np);
                of_node_put(sup_np);
@@ -1232,6 +1235,7 @@ static struct device_node *parse_##fname(struct device_node *np,       \
 struct supplier_bindings {
        struct device_node *(*parse_prop)(struct device_node *np,
                                          const char *prop_name, int index);
+       bool optional;
 };
 
 DEFINE_SIMPLE_PROP(clocks, "clocks", "#clock-cells")
@@ -1244,8 +1248,6 @@ DEFINE_SIMPLE_PROP(dmas, "dmas", "#dma-cells")
 DEFINE_SIMPLE_PROP(power_domains, "power-domains", "#power-domain-cells")
 DEFINE_SIMPLE_PROP(hwlocks, "hwlocks", "#hwlock-cells")
 DEFINE_SIMPLE_PROP(extcon, "extcon", NULL)
-DEFINE_SIMPLE_PROP(interrupts_extended, "interrupts-extended",
-                                       "#interrupt-cells")
 DEFINE_SIMPLE_PROP(nvmem_cells, "nvmem-cells", NULL)
 DEFINE_SIMPLE_PROP(phys, "phys", "#phy-cells")
 DEFINE_SIMPLE_PROP(wakeup_parent, "wakeup-parent", NULL)
@@ -1280,19 +1282,55 @@ static struct device_node *parse_iommu_maps(struct device_node *np,
        return of_parse_phandle(np, prop_name, (index * 4) + 1);
 }
 
+static struct device_node *parse_gpio_compat(struct device_node *np,
+                                            const char *prop_name, int index)
+{
+       struct of_phandle_args sup_args;
+
+       if (strcmp(prop_name, "gpio") && strcmp(prop_name, "gpios"))
+               return NULL;
+
+       /*
+        * Ignore node with gpio-hog property since its gpios are all provided
+        * by its parent.
+        */
+       if (of_find_property(np, "gpio-hog", NULL))
+               return NULL;
+
+       if (of_parse_phandle_with_args(np, prop_name, "#gpio-cells", index,
+                                      &sup_args))
+               return NULL;
+
+       return sup_args.np;
+}
+
+static struct device_node *parse_interrupts(struct device_node *np,
+                                           const char *prop_name, int index)
+{
+       struct of_phandle_args sup_args;
+
+       if (!IS_ENABLED(CONFIG_OF_IRQ) || IS_ENABLED(CONFIG_PPC))
+               return NULL;
+
+       if (strcmp(prop_name, "interrupts") &&
+           strcmp(prop_name, "interrupts-extended"))
+               return NULL;
+
+       return of_irq_parse_one(np, index, &sup_args) ? NULL : sup_args.np;
+}
+
 static const struct supplier_bindings of_supplier_bindings[] = {
        { .parse_prop = parse_clocks, },
        { .parse_prop = parse_interconnects, },
-       { .parse_prop = parse_iommus, },
-       { .parse_prop = parse_iommu_maps, },
+       { .parse_prop = parse_iommus, .optional = true, },
+       { .parse_prop = parse_iommu_maps, .optional = true, },
        { .parse_prop = parse_mboxes, },
        { .parse_prop = parse_io_channels, },
        { .parse_prop = parse_interrupt_parent, },
-       { .parse_prop = parse_dmas, },
+       { .parse_prop = parse_dmas, .optional = true, },
        { .parse_prop = parse_power_domains, },
        { .parse_prop = parse_hwlocks, },
        { .parse_prop = parse_extcon, },
-       { .parse_prop = parse_interrupts_extended, },
        { .parse_prop = parse_nvmem_cells, },
        { .parse_prop = parse_phys, },
        { .parse_prop = parse_wakeup_parent, },
@@ -1305,6 +1343,8 @@ static const struct supplier_bindings of_supplier_bindings[] = {
        { .parse_prop = parse_pinctrl6, },
        { .parse_prop = parse_pinctrl7, },
        { .parse_prop = parse_pinctrl8, },
+       { .parse_prop = parse_gpio_compat, },
+       { .parse_prop = parse_interrupts, },
        { .parse_prop = parse_regulators, },
        { .parse_prop = parse_gpio, },
        { .parse_prop = parse_gpios, },
@@ -1341,6 +1381,11 @@ static int of_link_property(struct device_node *con_np, const char *prop_name)
 
        /* Do not stop at first failed link, link all available suppliers. */
        while (!matched && s->parse_prop) {
+               if (s->optional && !fw_devlink_is_strict()) {
+                       s++;
+                       continue;
+               }
+
                while ((phandle = s->parse_prop(con_np, prop_name, i))) {
                        matched = true;
                        i++;
index c3f3d92..1556998 100644 (file)
@@ -998,14 +998,15 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
        old_opp = opp_table->current_opp;
 
        /* Return early if nothing to do */
-       if (opp_table->enabled && old_opp == opp) {
+       if (old_opp == opp && opp_table->current_rate == freq &&
+           opp_table->enabled) {
                dev_dbg(dev, "%s: OPPs are same, nothing to do\n", __func__);
                return 0;
        }
 
        dev_dbg(dev, "%s: switching OPP: Freq %lu -> %lu Hz, Level %u -> %u, Bw %u -> %u\n",
-               __func__, old_opp->rate, freq, old_opp->level, opp->level,
-               old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
+               __func__, opp_table->current_rate, freq, old_opp->level,
+               opp->level, old_opp->bandwidth ? old_opp->bandwidth[0].peak : 0,
                opp->bandwidth ? opp->bandwidth[0].peak : 0);
 
        scaling_down = _opp_compare_key(old_opp, opp);
@@ -1061,6 +1062,7 @@ static int _set_opp(struct device *dev, struct opp_table *opp_table,
        /* Make sure current_opp doesn't get freed */
        dev_pm_opp_get(opp);
        opp_table->current_opp = opp;
+       opp_table->current_rate = freq;
 
        return ret;
 }
@@ -1490,7 +1492,11 @@ static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table,
 
        mutex_lock(&opp_table->lock);
        list_for_each_entry(temp, &opp_table->opp_list, node) {
-               if (dynamic == temp->dynamic) {
+               /*
+                * Refcount must be dropped only once for each OPP by OPP core,
+                * do that with help of "removed" flag.
+                */
+               if (!temp->removed && dynamic == temp->dynamic) {
                        opp = temp;
                        break;
                }
@@ -1500,10 +1506,27 @@ static struct dev_pm_opp *_opp_get_next(struct opp_table *opp_table,
        return opp;
 }
 
-bool _opp_remove_all_static(struct opp_table *opp_table)
+/*
+ * Can't call dev_pm_opp_put() from under the lock as debugfs removal needs to
+ * happen lock less to avoid circular dependency issues. This routine must be
+ * called without the opp_table->lock held.
+ */
+static void _opp_remove_all(struct opp_table *opp_table, bool dynamic)
 {
        struct dev_pm_opp *opp;
 
+       while ((opp = _opp_get_next(opp_table, dynamic))) {
+               opp->removed = true;
+               dev_pm_opp_put(opp);
+
+               /* Drop the references taken by dev_pm_opp_add() */
+               if (dynamic)
+                       dev_pm_opp_put_opp_table(opp_table);
+       }
+}
+
+bool _opp_remove_all_static(struct opp_table *opp_table)
+{
        mutex_lock(&opp_table->lock);
 
        if (!opp_table->parsed_static_opps) {
@@ -1518,13 +1541,7 @@ bool _opp_remove_all_static(struct opp_table *opp_table)
 
        mutex_unlock(&opp_table->lock);
 
-       /*
-        * Can't remove the OPP from under the lock, debugfs removal needs to
-        * happen lock less to avoid circular dependency issues.
-        */
-       while ((opp = _opp_get_next(opp_table, false)))
-               dev_pm_opp_put(opp);
-
+       _opp_remove_all(opp_table, false);
        return true;
 }
 
@@ -1537,25 +1554,12 @@ bool _opp_remove_all_static(struct opp_table *opp_table)
 void dev_pm_opp_remove_all_dynamic(struct device *dev)
 {
        struct opp_table *opp_table;
-       struct dev_pm_opp *opp;
-       int count = 0;
 
        opp_table = _find_opp_table(dev);
        if (IS_ERR(opp_table))
                return;
 
-       /*
-        * Can't remove the OPP from under the lock, debugfs removal needs to
-        * happen lock less to avoid circular dependency issues.
-        */
-       while ((opp = _opp_get_next(opp_table, true))) {
-               dev_pm_opp_put(opp);
-               count++;
-       }
-
-       /* Drop the references taken by dev_pm_opp_add() */
-       while (count--)
-               dev_pm_opp_put_opp_table(opp_table);
+       _opp_remove_all(opp_table, true);
 
        /* Drop the reference taken by _find_opp_table() */
        dev_pm_opp_put_opp_table(opp_table);
index 9b9daf8..407c3bf 100644 (file)
@@ -56,6 +56,7 @@ extern struct list_head opp_tables, lazy_opp_tables;
  * @dynamic:   not-created from static DT entries.
  * @turbo:     true if turbo (boost) OPP
  * @suspend:   true if suspend OPP
+ * @removed:   flag indicating that OPP's reference is dropped by OPP core.
  * @pstate: Device's power domain's performance state.
  * @rate:      Frequency in hertz
  * @level:     Performance level
@@ -78,6 +79,7 @@ struct dev_pm_opp {
        bool dynamic;
        bool turbo;
        bool suspend;
+       bool removed;
        unsigned int pstate;
        unsigned long rate;
        unsigned int level;
@@ -135,6 +137,7 @@ enum opp_table_access {
  * @clock_latency_ns_max: Max clock latency in nanoseconds.
  * @parsed_static_opps: Count of devices for which OPPs are initialized from DT.
  * @shared_opp: OPP is shared between multiple devices.
+ * @current_rate: Currently configured frequency.
  * @current_opp: Currently configured OPP for the table.
  * @suspend_opp: Pointer to OPP to be used during device suspend.
  * @genpd_virt_dev_lock: Mutex protecting the genpd virtual device pointers.
@@ -184,6 +187,7 @@ struct opp_table {
 
        unsigned int parsed_static_opps;
        enum opp_table_access shared_opp;
+       unsigned long current_rate;
        struct dev_pm_opp *current_opp;
        struct dev_pm_opp *suspend_opp;
 
index 1e88bcf..84d5701 100644 (file)
@@ -241,6 +241,5 @@ module_platform_driver_probe(amiga_parallel_driver, amiga_parallel_probe);
 
 MODULE_AUTHOR("Joerg Dorchain <joerg@dorchain.net>");
 MODULE_DESCRIPTION("Parport Driver for Amiga builtin Port");
-MODULE_SUPPORTED_DEVICE("Amiga builtin Parallel Port");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("platform:amiga-parallel");
index 2ff0fe0..1623f01 100644 (file)
@@ -218,7 +218,6 @@ static void __exit parport_atari_exit(void)
 
 MODULE_AUTHOR("Andreas Schwab");
 MODULE_DESCRIPTION("Parport Driver for Atari builtin Port");
-MODULE_SUPPORTED_DEVICE("Atari builtin Parallel Port");
 MODULE_LICENSE("GPL");
 
 module_init(parport_atari_init)
index 9228e8f..1e43b3f 100644 (file)
@@ -41,7 +41,6 @@
 
 MODULE_AUTHOR("Helge Deller <deller@gmx.de>");
 MODULE_DESCRIPTION("HP-PARISC PC-style parallel port driver");
-MODULE_SUPPORTED_DEVICE("integrated PC-style parallel port");
 MODULE_LICENSE("GPL");
 
 
index d6bbe84..f4d0da7 100644 (file)
@@ -359,7 +359,6 @@ static void __exit parport_mfc3_exit(void)
 
 MODULE_AUTHOR("Joerg Dorchain <joerg@dorchain.net>");
 MODULE_DESCRIPTION("Parport Driver for Multiface 3 expansion cards Parallel Port");
-MODULE_SUPPORTED_DEVICE("Multiface 3 Parallel Port");
 MODULE_LICENSE("GPL");
 
 module_init(parport_mfc3_init)
index e840c1b..865fc41 100644 (file)
@@ -377,6 +377,5 @@ module_platform_driver(bpp_sbus_driver);
 
 MODULE_AUTHOR("Derrick J Brashear");
 MODULE_DESCRIPTION("Parport Driver for Sparc bidirectional Port");
-MODULE_SUPPORTED_DEVICE("Sparc Bidirectional Parallel Port");
 MODULE_VERSION("2.0");
 MODULE_LICENSE("GPL");
index 11cc794..d62c4ac 100644 (file)
@@ -36,4 +36,4 @@ obj-$(CONFIG_PCI_ENDPOINT)    += endpoint/
 obj-y                          += controller/
 obj-y                          += switch/
 
-ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
+subdir-ccflags-$(CONFIG_PCI_DEBUG) := -DDEBUG
index 64e2f5e..5aa8977 100644 (file)
@@ -55,15 +55,6 @@ config PCI_RCAR_GEN2
          There are 3 internal PCI controllers available with a single
          built-in EHCI/OHCI host controller present on each one.
 
-config PCIE_RCAR
-       bool "Renesas R-Car PCIe controller"
-       depends on ARCH_RENESAS || COMPILE_TEST
-       depends on PCI_MSI_IRQ_DOMAIN
-       select PCIE_RCAR_HOST
-       help
-         Say Y here if you want PCIe controller support on R-Car SoCs.
-         This option will be removed after arm64 defconfig is updated.
-
 config PCIE_RCAR_HOST
        bool "Renesas R-Car PCIe host controller"
        depends on ARCH_RENESAS || COMPILE_TEST
@@ -242,20 +233,6 @@ config PCIE_MEDIATEK
          Say Y here if you want to enable PCIe controller support on
          MediaTek SoCs.
 
-config PCIE_TANGO_SMP8759
-       bool "Tango SMP8759 PCIe controller (DANGEROUS)"
-       depends on ARCH_TANGO && PCI_MSI && OF
-       depends on BROKEN
-       select PCI_HOST_COMMON
-       help
-         Say Y here to enable PCIe controller support for Sigma Designs
-         Tango SMP8759-based systems.
-
-         Note: The SMP8759 controller multiplexes PCI config and MMIO
-         accesses, and Linux doesn't provide a way to serialize them.
-         This can lead to data corruption if drivers perform concurrent
-         config and MMIO accesses.
-
 config VMD
        depends on PCI_MSI && X86_64 && SRCU
        tristate "Intel Volume Management Device Driver"
@@ -273,7 +250,7 @@ config VMD
 
 config PCIE_BRCMSTB
        tristate "Broadcom Brcmstb PCIe host controller"
-       depends on ARCH_BRCMSTB || ARCH_BCM2835 || COMPILE_TEST
+       depends on ARCH_BRCMSTB || ARCH_BCM2835 || ARCH_BCM4908 || COMPILE_TEST
        depends on OF
        depends on PCI_MSI_IRQ_DOMAIN
        default ARCH_BRCMSTB
@@ -298,6 +275,16 @@ config PCI_LOONGSON
          Say Y here if you want to enable PCI controller support on
          Loongson systems.
 
+config PCIE_MICROCHIP_HOST
+       bool "Microchip AXI PCIe host bridge support"
+       depends on PCI_MSI && OF
+       select PCI_MSI_IRQ_DOMAIN
+       select GENERIC_MSI_IRQ_DOMAIN
+       select PCI_HOST_COMMON
+       help
+         Say Y here if you want kernel to support the Microchip AXI PCIe
+         Host Bridge driver.
+
 config PCIE_HISI_ERR
        depends on ACPI_APEI_GHES && (ARM64 || COMPILE_TEST)
        bool "HiSilicon HIP PCIe controller error handling driver"
index 04c6edc..e4559f2 100644 (file)
@@ -27,7 +27,7 @@ obj-$(CONFIG_PCIE_ROCKCHIP) += pcie-rockchip.o
 obj-$(CONFIG_PCIE_ROCKCHIP_EP) += pcie-rockchip-ep.o
 obj-$(CONFIG_PCIE_ROCKCHIP_HOST) += pcie-rockchip-host.o
 obj-$(CONFIG_PCIE_MEDIATEK) += pcie-mediatek.o
-obj-$(CONFIG_PCIE_TANGO_SMP8759) += pcie-tango.o
+obj-$(CONFIG_PCIE_MICROCHIP_HOST) += pcie-microchip-host.o
 obj-$(CONFIG_VMD) += vmd.o
 obj-$(CONFIG_PCIE_BRCMSTB) += pcie-brcmstb.o
 obj-$(CONFIG_PCI_LOONGSON) += pci-loongson.o
index dac1ac8..849f1e4 100644 (file)
@@ -64,6 +64,7 @@ enum j721e_pcie_mode {
 
 struct j721e_pcie_data {
        enum j721e_pcie_mode    mode;
+       bool quirk_retrain_flag;
 };
 
 static inline u32 j721e_pcie_user_readl(struct j721e_pcie *pcie, u32 offset)
@@ -280,6 +281,7 @@ static struct pci_ops cdns_ti_pcie_host_ops = {
 
 static const struct j721e_pcie_data j721e_pcie_rc_data = {
        .mode = PCI_MODE_RC,
+       .quirk_retrain_flag = true,
 };
 
 static const struct j721e_pcie_data j721e_pcie_ep_data = {
@@ -388,6 +390,7 @@ static int j721e_pcie_probe(struct platform_device *pdev)
 
                bridge->ops = &cdns_ti_pcie_host_ops;
                rc = pci_host_bridge_priv(bridge);
+               rc->quirk_retrain_flag = data->quirk_retrain_flag;
 
                cdns_pcie = &rc->pcie;
                cdns_pcie->dev = dev;
index 9e2b024..897cdde 100644 (file)
@@ -382,6 +382,57 @@ static int cdns_pcie_ep_send_msi_irq(struct cdns_pcie_ep *ep, u8 fn,
        return 0;
 }
 
+static int cdns_pcie_ep_map_msi_irq(struct pci_epc *epc, u8 fn,
+                                   phys_addr_t addr, u8 interrupt_num,
+                                   u32 entry_size, u32 *msi_data,
+                                   u32 *msi_addr_offset)
+{
+       struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
+       u32 cap = CDNS_PCIE_EP_FUNC_MSI_CAP_OFFSET;
+       struct cdns_pcie *pcie = &ep->pcie;
+       u64 pci_addr, pci_addr_mask = 0xff;
+       u16 flags, mme, data, data_mask;
+       u8 msi_count;
+       int ret;
+       int i;
+
+       /* Check whether the MSI feature has been enabled by the PCI host. */
+       flags = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_FLAGS);
+       if (!(flags & PCI_MSI_FLAGS_ENABLE))
+               return -EINVAL;
+
+       /* Get the number of enabled MSIs */
+       mme = (flags & PCI_MSI_FLAGS_QSIZE) >> 4;
+       msi_count = 1 << mme;
+       if (!interrupt_num || interrupt_num > msi_count)
+               return -EINVAL;
+
+       /* Compute the data value to be written. */
+       data_mask = msi_count - 1;
+       data = cdns_pcie_ep_fn_readw(pcie, fn, cap + PCI_MSI_DATA_64);
+       data = data & ~data_mask;
+
+       /* Get the PCI address where to write the data into. */
+       pci_addr = cdns_pcie_ep_fn_readl(pcie, fn, cap + PCI_MSI_ADDRESS_HI);
+       pci_addr <<= 32;
+       pci_addr |= cdns_pcie_ep_fn_readl(pcie, fn, cap + PCI_MSI_ADDRESS_LO);
+       pci_addr &= GENMASK_ULL(63, 2);
+
+       for (i = 0; i < interrupt_num; i++) {
+               ret = cdns_pcie_ep_map_addr(epc, fn, addr,
+                                           pci_addr & ~pci_addr_mask,
+                                           entry_size);
+               if (ret)
+                       return ret;
+               addr = addr + entry_size;
+       }
+
+       *msi_data = data;
+       *msi_addr_offset = pci_addr & pci_addr_mask;
+
+       return 0;
+}
+
 static int cdns_pcie_ep_send_msix_irq(struct cdns_pcie_ep *ep, u8 fn,
                                      u16 interrupt_num)
 {
@@ -455,18 +506,13 @@ static int cdns_pcie_ep_start(struct pci_epc *epc)
        struct cdns_pcie_ep *ep = epc_get_drvdata(epc);
        struct cdns_pcie *pcie = &ep->pcie;
        struct device *dev = pcie->dev;
-       struct pci_epf *epf;
-       u32 cfg;
        int ret;
 
        /*
         * BIT(0) is hardwired to 1, hence function 0 is always enabled
         * and can't be disabled anyway.
         */
-       cfg = BIT(0);
-       list_for_each_entry(epf, &epc->pci_epf, list)
-               cfg |= BIT(epf->func_no);
-       cdns_pcie_writel(pcie, CDNS_PCIE_LM_EP_FUNC_CFG, cfg);
+       cdns_pcie_writel(pcie, CDNS_PCIE_LM_EP_FUNC_CFG, epc->function_num_map);
 
        ret = cdns_pcie_start_link(pcie);
        if (ret) {
@@ -481,6 +527,7 @@ static const struct pci_epc_features cdns_pcie_epc_features = {
        .linkup_notifier = false,
        .msi_capable = true,
        .msix_capable = true,
+       .align = 256,
 };
 
 static const struct pci_epc_features*
@@ -500,6 +547,7 @@ static const struct pci_epc_ops cdns_pcie_epc_ops = {
        .set_msix       = cdns_pcie_ep_set_msix,
        .get_msix       = cdns_pcie_ep_get_msix,
        .raise_irq      = cdns_pcie_ep_raise_irq,
+       .map_msi_irq    = cdns_pcie_ep_map_msi_irq,
        .start          = cdns_pcie_ep_start,
        .get_features   = cdns_pcie_ep_get_features,
 };
index 811c1cb..73dcf8c 100644 (file)
@@ -77,6 +77,68 @@ static struct pci_ops cdns_pcie_host_ops = {
        .write          = pci_generic_config_write,
 };
 
+static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie)
+{
+       struct device *dev = pcie->dev;
+       int retries;
+
+       /* Check if the link is up or not */
+       for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
+               if (cdns_pcie_link_up(pcie)) {
+                       dev_info(dev, "Link up\n");
+                       return 0;
+               }
+               usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
+       }
+
+       return -ETIMEDOUT;
+}
+
+static int cdns_pcie_retrain(struct cdns_pcie *pcie)
+{
+       u32 lnk_cap_sls, pcie_cap_off = CDNS_PCIE_RP_CAP_OFFSET;
+       u16 lnk_stat, lnk_ctl;
+       int ret = 0;
+
+       /*
+        * Set retrain bit if current speed is 2.5 GB/s,
+        * but the PCIe root port support is > 2.5 GB/s.
+        */
+
+       lnk_cap_sls = cdns_pcie_readl(pcie, (CDNS_PCIE_RP_BASE + pcie_cap_off +
+                                            PCI_EXP_LNKCAP));
+       if ((lnk_cap_sls & PCI_EXP_LNKCAP_SLS) <= PCI_EXP_LNKCAP_SLS_2_5GB)
+               return ret;
+
+       lnk_stat = cdns_pcie_rp_readw(pcie, pcie_cap_off + PCI_EXP_LNKSTA);
+       if ((lnk_stat & PCI_EXP_LNKSTA_CLS) == PCI_EXP_LNKSTA_CLS_2_5GB) {
+               lnk_ctl = cdns_pcie_rp_readw(pcie,
+                                            pcie_cap_off + PCI_EXP_LNKCTL);
+               lnk_ctl |= PCI_EXP_LNKCTL_RL;
+               cdns_pcie_rp_writew(pcie, pcie_cap_off + PCI_EXP_LNKCTL,
+                                   lnk_ctl);
+
+               ret = cdns_pcie_host_wait_for_link(pcie);
+       }
+       return ret;
+}
+
+static int cdns_pcie_host_start_link(struct cdns_pcie_rc *rc)
+{
+       struct cdns_pcie *pcie = &rc->pcie;
+       int ret;
+
+       ret = cdns_pcie_host_wait_for_link(pcie);
+
+       /*
+        * Retrain link for Gen2 training defect
+        * if quirk flag is set.
+        */
+       if (!ret && rc->quirk_retrain_flag)
+               ret = cdns_pcie_retrain(pcie);
+
+       return ret;
+}
 
 static int cdns_pcie_host_init_root_port(struct cdns_pcie_rc *rc)
 {
@@ -321,9 +383,10 @@ static int cdns_pcie_host_map_dma_ranges(struct cdns_pcie_rc *rc)
 
        resource_list_for_each_entry(entry, &bridge->dma_ranges) {
                err = cdns_pcie_host_bar_config(rc, entry);
-               if (err)
+               if (err) {
                        dev_err(dev, "Fail to configure IB using dma-ranges\n");
-               return err;
+                       return err;
+               }
        }
 
        return 0;
@@ -398,23 +461,6 @@ static int cdns_pcie_host_init(struct device *dev,
        return cdns_pcie_host_init_address_translation(rc);
 }
 
-static int cdns_pcie_host_wait_for_link(struct cdns_pcie *pcie)
-{
-       struct device *dev = pcie->dev;
-       int retries;
-
-       /* Check if the link is up or not */
-       for (retries = 0; retries < LINK_WAIT_MAX_RETRIES; retries++) {
-               if (cdns_pcie_link_up(pcie)) {
-                       dev_info(dev, "Link up\n");
-                       return 0;
-               }
-               usleep_range(LINK_WAIT_USLEEP_MIN, LINK_WAIT_USLEEP_MAX);
-       }
-
-       return -ETIMEDOUT;
-}
-
 int cdns_pcie_host_setup(struct cdns_pcie_rc *rc)
 {
        struct device *dev = rc->pcie.dev;
@@ -457,7 +503,7 @@ int cdns_pcie_host_setup(struct cdns_pcie_rc *rc)
                return ret;
        }
 
-       ret = cdns_pcie_host_wait_for_link(pcie);
+       ret = cdns_pcie_host_start_link(rc);
        if (ret)
                dev_dbg(dev, "PCIe link never came up\n");
 
index 30eba6c..254d257 100644 (file)
  * Root Port Registers (PCI configuration space for the root port function)
  */
 #define CDNS_PCIE_RP_BASE      0x00200000
-
+#define CDNS_PCIE_RP_CAP_OFFSET 0xc0
 
 /*
  * Address Translation Registers
@@ -291,6 +291,7 @@ struct cdns_pcie {
  * @device_id: PCI device ID
  * @avail_ib_bar: Satus of RP_BAR0, RP_BAR1 and        RP_NO_BAR if it's free or
  *                available
+ * @quirk_retrain_flag: Retrain link as quirk for PCIe Gen2
  */
 struct cdns_pcie_rc {
        struct cdns_pcie        pcie;
@@ -299,6 +300,7 @@ struct cdns_pcie_rc {
        u32                     vendor_id;
        u32                     device_id;
        bool                    avail_ib_bar[CDNS_PCIE_RP_MAX_IB];
+       bool                    quirk_retrain_flag;
 };
 
 /**
@@ -414,6 +416,13 @@ static inline void cdns_pcie_rp_writew(struct cdns_pcie *pcie,
        cdns_pcie_write_sz(addr, 0x2, value);
 }
 
+static inline u16 cdns_pcie_rp_readw(struct cdns_pcie *pcie, u32 reg)
+{
+       void __iomem *addr = pcie->reg_base + CDNS_PCIE_RP_BASE + reg;
+
+       return cdns_pcie_read_sz(addr, 0x2);
+}
+
 /* Endpoint Function register access */
 static inline void cdns_pcie_ep_fn_writeb(struct cdns_pcie *pcie, u8 fn,
                                          u32 reg, u8 value)
index 4d12efd..39fe2ed 100644 (file)
@@ -115,10 +115,17 @@ static const struct ls_pcie_ep_drvdata ls2_ep_drvdata = {
        .dw_pcie_ops = &dw_ls_pcie_ep_ops,
 };
 
+static const struct ls_pcie_ep_drvdata lx2_ep_drvdata = {
+       .func_offset = 0x8000,
+       .ops = &ls_pcie_ep_ops,
+       .dw_pcie_ops = &dw_ls_pcie_ep_ops,
+};
+
 static const struct of_device_id ls_pcie_ep_of_match[] = {
        { .compatible = "fsl,ls1046a-pcie-ep", .data = &ls1_ep_drvdata },
        { .compatible = "fsl,ls1088a-pcie-ep", .data = &ls2_ep_drvdata },
        { .compatible = "fsl,ls2088a-pcie-ep", .data = &ls2_ep_drvdata },
+       { .compatible = "fsl,lx2160ar2-pcie-ep", .data = &lx2_ep_drvdata },
        { },
 };
 
index 44ad34c..5b9c625 100644 (file)
@@ -232,7 +232,7 @@ static const struct of_device_id ls_pcie_of_match[] = {
        { },
 };
 
-static int __init ls_pcie_probe(struct platform_device *pdev)
+static int ls_pcie_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct dw_pcie *pci;
@@ -271,10 +271,11 @@ static int __init ls_pcie_probe(struct platform_device *pdev)
 }
 
 static struct platform_driver ls_pcie_driver = {
+       .probe = ls_pcie_probe,
        .driver = {
                .name = "layerscape-pcie",
                .of_match_table = ls_pcie_of_match,
                .suppress_bind_attrs = true,
        },
 };
-builtin_platform_driver_probe(ls_pcie_driver, ls_pcie_probe);
+builtin_platform_driver(ls_pcie_driver);
index abf37aa..e8afa50 100644 (file)
@@ -314,9 +314,6 @@ static const struct dw_pcie_host_ops al_pcie_host_ops = {
        .host_init = al_pcie_host_init,
 };
 
-static const struct dw_pcie_ops dw_pcie_ops = {
-};
-
 static int al_pcie_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
@@ -334,7 +331,6 @@ static int al_pcie_probe(struct platform_device *pdev)
                return -ENOMEM;
 
        pci->dev = dev;
-       pci->ops = &dw_pcie_ops;
        pci->pp.ops = &al_pcie_host_ops;
 
        al_pcie->pci = pci;
index bcd1cd9..1c25d83 100644 (file)
@@ -434,10 +434,8 @@ static void dw_pcie_ep_stop(struct pci_epc *epc)
        struct dw_pcie_ep *ep = epc_get_drvdata(epc);
        struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
-       if (!pci->ops->stop_link)
-               return;
-
-       pci->ops->stop_link(pci);
+       if (pci->ops && pci->ops->stop_link)
+               pci->ops->stop_link(pci);
 }
 
 static int dw_pcie_ep_start(struct pci_epc *epc)
@@ -445,7 +443,7 @@ static int dw_pcie_ep_start(struct pci_epc *epc)
        struct dw_pcie_ep *ep = epc_get_drvdata(epc);
        struct dw_pcie *pci = to_dw_pcie_from_ep(ep);
 
-       if (!pci->ops->start_link)
+       if (!pci->ops || !pci->ops->start_link)
                return -EINVAL;
 
        return pci->ops->start_link(pci);
index 8a84c00..7e55b2b 100644 (file)
@@ -258,10 +258,8 @@ int dw_pcie_allocate_domains(struct pcie_port *pp)
 
 static void dw_pcie_free_msi(struct pcie_port *pp)
 {
-       if (pp->msi_irq) {
-               irq_set_chained_handler(pp->msi_irq, NULL);
-               irq_set_handler_data(pp->msi_irq, NULL);
-       }
+       if (pp->msi_irq)
+               irq_set_chained_handler_and_data(pp->msi_irq, NULL, NULL);
 
        irq_domain_remove(pp->msi_domain);
        irq_domain_remove(pp->irq_domain);
@@ -305,8 +303,13 @@ int dw_pcie_host_init(struct pcie_port *pp)
        if (cfg_res) {
                pp->cfg0_size = resource_size(cfg_res);
                pp->cfg0_base = cfg_res->start;
-       } else if (!pp->va_cfg0_base) {
+
+               pp->va_cfg0_base = devm_pci_remap_cfg_resource(dev, cfg_res);
+               if (IS_ERR(pp->va_cfg0_base))
+                       return PTR_ERR(pp->va_cfg0_base);
+       } else {
                dev_err(dev, "Missing *config* reg space\n");
+               return -ENODEV;
        }
 
        if (!pci->dbi_base) {
@@ -322,38 +325,12 @@ int dw_pcie_host_init(struct pcie_port *pp)
 
        pp->bridge = bridge;
 
-       /* Get the I/O and memory ranges from DT */
-       resource_list_for_each_entry(win, &bridge->windows) {
-               switch (resource_type(win->res)) {
-               case IORESOURCE_IO:
-                       pp->io_size = resource_size(win->res);
-                       pp->io_bus_addr = win->res->start - win->offset;
-                       pp->io_base = pci_pio_to_address(win->res->start);
-                       break;
-               case 0:
-                       dev_err(dev, "Missing *config* reg space\n");
-                       pp->cfg0_size = resource_size(win->res);
-                       pp->cfg0_base = win->res->start;
-                       if (!pci->dbi_base) {
-                               pci->dbi_base = devm_pci_remap_cfgspace(dev,
-                                                               pp->cfg0_base,
-                                                               pp->cfg0_size);
-                               if (!pci->dbi_base) {
-                                       dev_err(dev, "Error with ioremap\n");
-                                       return -ENOMEM;
-                               }
-                       }
-                       break;
-               }
-       }
-
-       if (!pp->va_cfg0_base) {
-               pp->va_cfg0_base = devm_pci_remap_cfgspace(dev,
-                                       pp->cfg0_base, pp->cfg0_size);
-               if (!pp->va_cfg0_base) {
-                       dev_err(dev, "Error with ioremap in function\n");
-                       return -ENOMEM;
-               }
+       /* Get the I/O range from DT */
+       win = resource_list_first_type(&bridge->windows, IORESOURCE_IO);
+       if (win) {
+               pp->io_size = resource_size(win->res);
+               pp->io_bus_addr = win->res->start - win->offset;
+               pp->io_base = pci_pio_to_address(win->res->start);
        }
 
        if (pci->link_gen < 1)
@@ -425,7 +402,7 @@ int dw_pcie_host_init(struct pcie_port *pp)
        dw_pcie_setup_rc(pp);
        dw_pcie_msi_init(pp);
 
-       if (!dw_pcie_link_up(pci) && pci->ops->start_link) {
+       if (!dw_pcie_link_up(pci) && pci->ops && pci->ops->start_link) {
                ret = pci->ops->start_link(pci);
                if (ret)
                        goto err_free_msi;
index 645fa18..004cb86 100644 (file)
@@ -141,7 +141,7 @@ u32 dw_pcie_read_dbi(struct dw_pcie *pci, u32 reg, size_t size)
        int ret;
        u32 val;
 
-       if (pci->ops->read_dbi)
+       if (pci->ops && pci->ops->read_dbi)
                return pci->ops->read_dbi(pci, pci->dbi_base, reg, size);
 
        ret = dw_pcie_read(pci->dbi_base + reg, size, &val);
@@ -156,7 +156,7 @@ void dw_pcie_write_dbi(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
 {
        int ret;
 
-       if (pci->ops->write_dbi) {
+       if (pci->ops && pci->ops->write_dbi) {
                pci->ops->write_dbi(pci, pci->dbi_base, reg, size, val);
                return;
        }
@@ -171,7 +171,7 @@ void dw_pcie_write_dbi2(struct dw_pcie *pci, u32 reg, size_t size, u32 val)
 {
        int ret;
 
-       if (pci->ops->write_dbi2) {
+       if (pci->ops && pci->ops->write_dbi2) {
                pci->ops->write_dbi2(pci, pci->dbi_base2, reg, size, val);
                return;
        }
@@ -186,7 +186,7 @@ static u32 dw_pcie_readl_atu(struct dw_pcie *pci, u32 reg)
        int ret;
        u32 val;
 
-       if (pci->ops->read_dbi)
+       if (pci->ops && pci->ops->read_dbi)
                return pci->ops->read_dbi(pci, pci->atu_base, reg, 4);
 
        ret = dw_pcie_read(pci->atu_base + reg, 4, &val);
@@ -200,7 +200,7 @@ static void dw_pcie_writel_atu(struct dw_pcie *pci, u32 reg, u32 val)
 {
        int ret;
 
-       if (pci->ops->write_dbi) {
+       if (pci->ops && pci->ops->write_dbi) {
                pci->ops->write_dbi(pci, pci->atu_base, reg, 4, val);
                return;
        }
@@ -225,6 +225,47 @@ static void dw_pcie_writel_ob_unroll(struct dw_pcie *pci, u32 index, u32 reg,
        dw_pcie_writel_atu(pci, offset + reg, val);
 }
 
+static inline u32 dw_pcie_enable_ecrc(u32 val)
+{
+       /*
+        * DesignWare core version 4.90A has a design issue where the 'TD'
+        * bit in the Control register-1 of the ATU outbound region acts
+        * like an override for the ECRC setting, i.e., the presence of TLP
+        * Digest (ECRC) in the outgoing TLPs is solely determined by this
+        * bit. This is contrary to the PCIe spec which says that the
+        * enablement of the ECRC is solely determined by the AER
+        * registers.
+        *
+        * Because of this, even when the ECRC is enabled through AER
+        * registers, the transactions going through ATU won't have TLP
+        * Digest as there is no way the PCI core AER code could program
+        * the TD bit which is specific to the DesignWare core.
+        *
+        * The best way to handle this scenario is to program the TD bit
+        * always. It affects only the traffic from root port to downstream
+        * devices.
+        *
+        * At this point,
+        * When ECRC is enabled in AER registers, everything works normally
+        * When ECRC is NOT enabled in AER registers, then,
+        * on Root Port:- TLP Digest (DWord size) gets appended to each packet
+        *                even through it is not required. Since downstream
+        *                TLPs are mostly for configuration accesses and BAR
+        *                accesses, they are not in critical path and won't
+        *                have much negative effect on the performance.
+        * on End Point:- TLP Digest is received for some/all the packets coming
+        *                from the root port. TLP Digest is ignored because,
+        *                as per the PCIe Spec r5.0 v1.0 section 2.2.3
+        *                "TLP Digest Rules", when an endpoint receives TLP
+        *                Digest when its ECRC check functionality is disabled
+        *                in AER registers, received TLP Digest is just ignored.
+        * Since there is no issue or error reported either side, best way to
+        * handle the scenario is to program TD bit by default.
+        */
+
+       return val | PCIE_ATU_TD;
+}
+
 static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, u8 func_no,
                                             int index, int type,
                                             u64 cpu_addr, u64 pci_addr,
@@ -248,6 +289,8 @@ static void dw_pcie_prog_outbound_atu_unroll(struct dw_pcie *pci, u8 func_no,
        val = type | PCIE_ATU_FUNC_NUM(func_no);
        val = upper_32_bits(size - 1) ?
                val | PCIE_ATU_INCREASE_REGION_SIZE : val;
+       if (pci->version == 0x490A)
+               val = dw_pcie_enable_ecrc(val);
        dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL1, val);
        dw_pcie_writel_ob_unroll(pci, index, PCIE_ATU_UNR_REGION_CTRL2,
                                 PCIE_ATU_ENABLE);
@@ -273,7 +316,7 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no,
 {
        u32 retries, val;
 
-       if (pci->ops->cpu_addr_fixup)
+       if (pci->ops && pci->ops->cpu_addr_fixup)
                cpu_addr = pci->ops->cpu_addr_fixup(pci, cpu_addr);
 
        if (pci->iatu_unroll_enabled) {
@@ -290,12 +333,19 @@ static void __dw_pcie_prog_outbound_atu(struct dw_pcie *pci, u8 func_no,
                           upper_32_bits(cpu_addr));
        dw_pcie_writel_dbi(pci, PCIE_ATU_LIMIT,
                           lower_32_bits(cpu_addr + size - 1));
+       if (pci->version >= 0x460A)
+               dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_LIMIT,
+                                  upper_32_bits(cpu_addr + size - 1));
        dw_pcie_writel_dbi(pci, PCIE_ATU_LOWER_TARGET,
                           lower_32_bits(pci_addr));
        dw_pcie_writel_dbi(pci, PCIE_ATU_UPPER_TARGET,
                           upper_32_bits(pci_addr));
-       dw_pcie_writel_dbi(pci, PCIE_ATU_CR1, type |
-                          PCIE_ATU_FUNC_NUM(func_no));
+       val = type | PCIE_ATU_FUNC_NUM(func_no);
+       val = ((upper_32_bits(size - 1)) && (pci->version >= 0x460A)) ?
+               val | PCIE_ATU_INCREASE_REGION_SIZE : val;
+       if (pci->version == 0x490A)
+               val = dw_pcie_enable_ecrc(val);
+       dw_pcie_writel_dbi(pci, PCIE_ATU_CR1, val);
        dw_pcie_writel_dbi(pci, PCIE_ATU_CR2, PCIE_ATU_ENABLE);
 
        /*
@@ -321,7 +371,7 @@ void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index, int type,
 
 void dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index,
                                  int type, u64 cpu_addr, u64 pci_addr,
-                                 u32 size)
+                                 u64 size)
 {
        __dw_pcie_prog_outbound_atu(pci, func_no, index, type,
                                    cpu_addr, pci_addr, size);
@@ -481,7 +531,7 @@ int dw_pcie_link_up(struct dw_pcie *pci)
 {
        u32 val;
 
-       if (pci->ops->link_up)
+       if (pci->ops && pci->ops->link_up)
                return pci->ops->link_up(pci);
 
        val = readl(pci->dbi_base + PCIE_PORT_DEBUG1);
index 0207840..7247c8b 100644 (file)
@@ -86,6 +86,7 @@
 #define PCIE_ATU_TYPE_IO               0x2
 #define PCIE_ATU_TYPE_CFG0             0x4
 #define PCIE_ATU_TYPE_CFG1             0x5
+#define PCIE_ATU_TD                    BIT(8)
 #define PCIE_ATU_FUNC_NUM(pf)           ((pf) << 20)
 #define PCIE_ATU_CR2                   0x908
 #define PCIE_ATU_ENABLE                        BIT(31)
 #define PCIE_ATU_DEV(x)                        FIELD_PREP(GENMASK(23, 19), x)
 #define PCIE_ATU_FUNC(x)               FIELD_PREP(GENMASK(18, 16), x)
 #define PCIE_ATU_UPPER_TARGET          0x91C
+#define PCIE_ATU_UPPER_LIMIT           0x924
 
 #define PCIE_MISC_CONTROL_1_OFF                0x8BC
 #define PCIE_DBI_RO_WR_EN              BIT(0)
@@ -297,7 +299,7 @@ void dw_pcie_prog_outbound_atu(struct dw_pcie *pci, int index,
                               u64 size);
 void dw_pcie_prog_ep_outbound_atu(struct dw_pcie *pci, u8 func_no, int index,
                                  int type, u64 cpu_addr, u64 pci_addr,
-                                 u32 size);
+                                 u64 size);
 int dw_pcie_prog_inbound_atu(struct dw_pcie *pci, u8 func_no, int index,
                             int bar, u64 cpu_addr,
                             enum dw_pcie_as_type as_type);
index affa271..8a7a300 100644 (file)
@@ -159,8 +159,10 @@ struct qcom_pcie_resources_2_3_3 {
        struct reset_control *rst[7];
 };
 
+/* 6 clocks typically, 7 for sm8250 */
 struct qcom_pcie_resources_2_7_0 {
-       struct clk_bulk_data clks[6];
+       struct clk_bulk_data clks[7];
+       int num_clks;
        struct regulator_bulk_data supplies[2];
        struct reset_control *pci_reset;
        struct clk *pipe_clk;
@@ -398,7 +400,9 @@ static int qcom_pcie_init_2_1_0(struct qcom_pcie *pcie)
 
        /* enable external reference clock */
        val = readl(pcie->parf + PCIE20_PARF_PHY_REFCLK);
-       val &= ~PHY_REFCLK_USE_PAD;
+       /* USE_PAD is required only for ipq806x */
+       if (!of_device_is_compatible(node, "qcom,pcie-apq8064"))
+               val &= ~PHY_REFCLK_USE_PAD;
        val |= PHY_REFCLK_SSP_EN;
        writel(val, pcie->parf + PCIE20_PARF_PHY_REFCLK);
 
@@ -1152,8 +1156,14 @@ static int qcom_pcie_get_resources_2_7_0(struct qcom_pcie *pcie)
        res->clks[3].id = "bus_slave";
        res->clks[4].id = "slave_q2a";
        res->clks[5].id = "tbu";
+       if (of_device_is_compatible(dev->of_node, "qcom,pcie-sm8250")) {
+               res->clks[6].id = "ddrss_sf_tbu";
+               res->num_clks = 7;
+       } else {
+               res->num_clks = 6;
+       }
 
-       ret = devm_clk_bulk_get(dev, ARRAY_SIZE(res->clks), res->clks);
+       ret = devm_clk_bulk_get(dev, res->num_clks, res->clks);
        if (ret < 0)
                return ret;
 
@@ -1175,7 +1185,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
                return ret;
        }
 
-       ret = clk_bulk_prepare_enable(ARRAY_SIZE(res->clks), res->clks);
+       ret = clk_bulk_prepare_enable(res->num_clks, res->clks);
        if (ret < 0)
                goto err_disable_regulators;
 
@@ -1227,7 +1237,7 @@ static int qcom_pcie_init_2_7_0(struct qcom_pcie *pcie)
 
        return 0;
 err_disable_clocks:
-       clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+       clk_bulk_disable_unprepare(res->num_clks, res->clks);
 err_disable_regulators:
        regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
 
@@ -1238,7 +1248,7 @@ static void qcom_pcie_deinit_2_7_0(struct qcom_pcie *pcie)
 {
        struct qcom_pcie_resources_2_7_0 *res = &pcie->res.v2_7_0;
 
-       clk_bulk_disable_unprepare(ARRAY_SIZE(res->clks), res->clks);
+       clk_bulk_disable_unprepare(res->num_clks, res->clks);
        regulator_bulk_disable(ARRAY_SIZE(res->supplies), res->supplies);
 }
 
index 6ce34a1..6ab694f 100644 (file)
@@ -64,6 +64,8 @@ int pci_host_common_probe(struct platform_device *pdev)
        if (!bridge)
                return -ENOMEM;
 
+       platform_set_drvdata(pdev, bridge);
+
        of_pci_check_probe_only();
 
        /* Parse and map our Configuration Space windows */
@@ -78,8 +80,6 @@ int pci_host_common_probe(struct platform_device *pdev)
        bridge->sysdata = cfg;
        bridge->ops = (struct pci_ops *)&ops->pci_ops;
 
-       platform_set_drvdata(pdev, bridge);
-
        return pci_host_probe(bridge);
 }
 EXPORT_SYMBOL_GPL(pci_host_common_probe);
index 87aa62e..27a17a1 100644 (file)
@@ -1714,7 +1714,7 @@ static void prepopulate_bars(struct hv_pcibus_device *hbus)
         * resumed and suspended again: see hibernation_snapshot() and
         * hibernation_platform_enter().
         *
-        * If the memory enable bit is already set, Hyper-V sliently ignores
+        * If the memory enable bit is already set, Hyper-V silently ignores
         * the below BAR updates, and the related PCI device driver can not
         * work, because reading from the device register(s) always returns
         * 0xFFFFFFFF.
index 2470782..1c34c89 100644 (file)
@@ -384,13 +384,9 @@ static int xgene_msi_hwirq_alloc(unsigned int cpu)
                if (!msi_group->gic_irq)
                        continue;
 
-               irq_set_chained_handler(msi_group->gic_irq,
-                                       xgene_msi_isr);
-               err = irq_set_handler_data(msi_group->gic_irq, msi_group);
-               if (err) {
-                       pr_err("failed to register GIC IRQ handler\n");
-                       return -EINVAL;
-               }
+               irq_set_chained_handler_and_data(msi_group->gic_irq,
+                       xgene_msi_isr, msi_group);
+
                /*
                 * Statically allocate MSI GIC IRQs to each CPU core.
                 * With 8-core X-Gene v1, 2 MSI GIC IRQs are allocated
index 85e7c98..2afdc86 100644 (file)
@@ -173,12 +173,13 @@ static int xgene_pcie_config_read32(struct pci_bus *bus, unsigned int devfn,
 
        /*
         * The v1 controller has a bug in its Configuration Request
-        * Retry Status (CRS) logic: when CRS is enabled and we read the
-        * Vendor and Device ID of a non-existent device, the controller
-        * fabricates return data of 0xFFFF0001 ("device exists but is not
-        * ready") instead of 0xFFFFFFFF ("device does not exist").  This
-        * causes the PCI core to retry the read until it times out.
-        * Avoid this by not claiming to support CRS.
+        * Retry Status (CRS) logic: when CRS Software Visibility is
+        * enabled and we read the Vendor and Device ID of a non-existent
+        * device, the controller fabricates return data of 0xFFFF0001
+        * ("device exists but is not ready") instead of 0xFFFFFFFF
+        * ("device does not exist").  This causes the PCI core to retry
+        * the read until it times out.  Avoid this by not claiming to
+        * support CRS SV.
         */
        if (pci_is_root_bus(bus) && (port->version == XGENE_PCIE_IP_VER_1) &&
            ((where & ~0x3) == XGENE_V1_PCI_EXP_CAP + PCI_EXP_RTCTL))
index e1636f7..42691dd 100644 (file)
@@ -204,8 +204,7 @@ static int altera_msi_remove(struct platform_device *pdev)
        struct altera_msi *msi = platform_get_drvdata(pdev);
 
        msi_writel(msi, 0, MSI_INTMASK);
-       irq_set_chained_handler(msi->irq, NULL);
-       irq_set_handler_data(msi->irq, NULL);
+       irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
 
        altera_free_domains(msi);
 
index d41257f..e330e68 100644 (file)
@@ -97,6 +97,7 @@
 
 #define PCIE_MISC_REVISION                             0x406c
 #define  BRCM_PCIE_HW_REV_33                           0x0303
+#define  BRCM_PCIE_HW_REV_3_20                         0x0320
 
 #define PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT               0x4070
 #define  PCIE_MISC_CPU_2_PCIE_MEM_WIN0_BASE_LIMIT_LIMIT_MASK   0xfff00000
 struct brcm_pcie;
 static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32 val);
 static inline void brcm_pcie_bridge_sw_init_set_generic(struct brcm_pcie *pcie, u32 val);
+static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val);
 static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val);
 static inline void brcm_pcie_perst_set_generic(struct brcm_pcie *pcie, u32 val);
 
@@ -203,6 +205,7 @@ enum {
 
 enum pcie_type {
        GENERIC,
+       BCM4908,
        BCM7278,
        BCM2711,
 };
@@ -227,6 +230,13 @@ static const struct pcie_cfg_data generic_cfg = {
        .bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
 };
 
+static const struct pcie_cfg_data bcm4908_cfg = {
+       .offsets        = pcie_offsets,
+       .type           = BCM4908,
+       .perst_set      = brcm_pcie_perst_set_4908,
+       .bridge_sw_init_set = brcm_pcie_bridge_sw_init_set_generic,
+};
+
 static const int pcie_offset_bcm7278[] = {
        [RGR1_SW_INIT_1] = 0xc010,
        [EXT_CFG_INDEX] = 0x9000,
@@ -279,6 +289,7 @@ struct brcm_pcie {
        const int               *reg_offsets;
        enum pcie_type          type;
        struct reset_control    *rescal;
+       struct reset_control    *perst_reset;
        int                     num_memc;
        u64                     memc_size[PCIE_BRCM_MAX_MEMC];
        u32                     hw_rev;
@@ -603,8 +614,7 @@ static void brcm_msi_remove(struct brcm_pcie *pcie)
 
        if (!msi)
                return;
-       irq_set_chained_handler(msi->irq, NULL);
-       irq_set_handler_data(msi->irq, NULL);
+       irq_set_chained_handler_and_data(msi->irq, NULL, NULL);
        brcm_free_domains(msi);
 }
 
@@ -735,6 +745,17 @@ static inline void brcm_pcie_bridge_sw_init_set_7278(struct brcm_pcie *pcie, u32
        writel(tmp, pcie->base + PCIE_RGR1_SW_INIT_1(pcie));
 }
 
+static inline void brcm_pcie_perst_set_4908(struct brcm_pcie *pcie, u32 val)
+{
+       if (WARN_ONCE(!pcie->perst_reset, "missing PERST# reset controller\n"))
+               return;
+
+       if (val)
+               reset_control_assert(pcie->perst_reset);
+       else
+               reset_control_deassert(pcie->perst_reset);
+}
+
 static inline void brcm_pcie_perst_set_7278(struct brcm_pcie *pcie, u32 val)
 {
        u32 tmp;
@@ -1194,6 +1215,7 @@ static int brcm_pcie_remove(struct platform_device *pdev)
 
 static const struct of_device_id brcm_pcie_match[] = {
        { .compatible = "brcm,bcm2711-pcie", .data = &bcm2711_cfg },
+       { .compatible = "brcm,bcm4908-pcie", .data = &bcm4908_cfg },
        { .compatible = "brcm,bcm7211-pcie", .data = &generic_cfg },
        { .compatible = "brcm,bcm7278-pcie", .data = &bcm7278_cfg },
        { .compatible = "brcm,bcm7216-pcie", .data = &bcm7278_cfg },
@@ -1250,6 +1272,11 @@ static int brcm_pcie_probe(struct platform_device *pdev)
                clk_disable_unprepare(pcie->clk);
                return PTR_ERR(pcie->rescal);
        }
+       pcie->perst_reset = devm_reset_control_get_optional_exclusive(&pdev->dev, "perst");
+       if (IS_ERR(pcie->perst_reset)) {
+               clk_disable_unprepare(pcie->clk);
+               return PTR_ERR(pcie->perst_reset);
+       }
 
        ret = reset_control_deassert(pcie->rescal);
        if (ret)
@@ -1267,6 +1294,10 @@ static int brcm_pcie_probe(struct platform_device *pdev)
                goto fail;
 
        pcie->hw_rev = readl(pcie->base + PCIE_MISC_REVISION);
+       if (pcie->type == BCM4908 && pcie->hw_rev >= BRCM_PCIE_HW_REV_3_20) {
+               dev_err(pcie->dev, "hardware revision with unsupported PERST# setup\n");
+               goto fail;
+       }
 
        msi_np = of_parse_phandle(pcie->np, "msi-parent", 0);
        if (pci_msi_enabled() && msi_np == pcie->np) {
index cf4c18f..23548b5 100644 (file)
@@ -1035,14 +1035,14 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie)
                err = of_pci_get_devfn(child);
                if (err < 0) {
                        dev_err(dev, "failed to parse devfn: %d\n", err);
-                       return err;
+                       goto error_put_node;
                }
 
                slot = PCI_SLOT(err);
 
                err = mtk_pcie_parse_port(pcie, child, slot);
                if (err)
-                       return err;
+                       goto error_put_node;
        }
 
        err = mtk_pcie_subsys_powerup(pcie);
@@ -1058,6 +1058,9 @@ static int mtk_pcie_setup(struct mtk_pcie *pcie)
                mtk_pcie_subsys_powerdown(pcie);
 
        return 0;
+error_put_node:
+       of_node_put(child);
+       return err;
 }
 
 static int mtk_pcie_probe(struct platform_device *pdev)
diff --git a/drivers/pci/controller/pcie-microchip-host.c b/drivers/pci/controller/pcie-microchip-host.c
new file mode 100644 (file)
index 0000000..04c19ff
--- /dev/null
@@ -0,0 +1,1138 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Microchip AXI PCIe Bridge host controller driver
+ *
+ * Copyright (c) 2018 - 2020 Microchip Corporation. All rights reserved.
+ *
+ * Author: Daire McNamara <daire.mcnamara@microchip.com>
+ */
+
+#include <linux/clk.h>
+#include <linux/irqchip/chained_irq.h>
+#include <linux/module.h>
+#include <linux/msi.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/pci-ecam.h>
+#include <linux/platform_device.h>
+
+#include "../pci.h"
+
+/* Number of MSI IRQs */
+#define MC_NUM_MSI_IRQS                                32
+#define MC_NUM_MSI_IRQS_CODED                  5
+
+/* PCIe Bridge Phy and Controller Phy offsets */
+#define MC_PCIE1_BRIDGE_ADDR                   0x00008000u
+#define MC_PCIE1_CTRL_ADDR                     0x0000a000u
+
+#define MC_PCIE_BRIDGE_ADDR                    (MC_PCIE1_BRIDGE_ADDR)
+#define MC_PCIE_CTRL_ADDR                      (MC_PCIE1_CTRL_ADDR)
+
+/* PCIe Controller Phy Regs */
+#define SEC_ERROR_CNT                          0x20
+#define DED_ERROR_CNT                          0x24
+#define SEC_ERROR_INT                          0x28
+#define  SEC_ERROR_INT_TX_RAM_SEC_ERR_INT      GENMASK(3, 0)
+#define  SEC_ERROR_INT_RX_RAM_SEC_ERR_INT      GENMASK(7, 4)
+#define  SEC_ERROR_INT_PCIE2AXI_RAM_SEC_ERR_INT        GENMASK(11, 8)
+#define  SEC_ERROR_INT_AXI2PCIE_RAM_SEC_ERR_INT        GENMASK(15, 12)
+#define  NUM_SEC_ERROR_INTS                    (4)
+#define SEC_ERROR_INT_MASK                     0x2c
+#define DED_ERROR_INT                          0x30
+#define  DED_ERROR_INT_TX_RAM_DED_ERR_INT      GENMASK(3, 0)
+#define  DED_ERROR_INT_RX_RAM_DED_ERR_INT      GENMASK(7, 4)
+#define  DED_ERROR_INT_PCIE2AXI_RAM_DED_ERR_INT        GENMASK(11, 8)
+#define  DED_ERROR_INT_AXI2PCIE_RAM_DED_ERR_INT        GENMASK(15, 12)
+#define  NUM_DED_ERROR_INTS                    (4)
+#define DED_ERROR_INT_MASK                     0x34
+#define ECC_CONTROL                            0x38
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_0                BIT(0)
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_1                BIT(1)
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_2                BIT(2)
+#define  ECC_CONTROL_TX_RAM_INJ_ERROR_3                BIT(3)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_0                BIT(4)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_1                BIT(5)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_2                BIT(6)
+#define  ECC_CONTROL_RX_RAM_INJ_ERROR_3                BIT(7)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_0  BIT(8)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_1  BIT(9)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_2  BIT(10)
+#define  ECC_CONTROL_PCIE2AXI_RAM_INJ_ERROR_3  BIT(11)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_0  BIT(12)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_1  BIT(13)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_2  BIT(14)
+#define  ECC_CONTROL_AXI2PCIE_RAM_INJ_ERROR_3  BIT(15)
+#define  ECC_CONTROL_TX_RAM_ECC_BYPASS         BIT(24)
+#define  ECC_CONTROL_RX_RAM_ECC_BYPASS         BIT(25)
+#define  ECC_CONTROL_PCIE2AXI_RAM_ECC_BYPASS   BIT(26)
+#define  ECC_CONTROL_AXI2PCIE_RAM_ECC_BYPASS   BIT(27)
+#define LTSSM_STATE                            0x5c
+#define  LTSSM_L0_STATE                                0x10
+#define PCIE_EVENT_INT                         0x14c
+#define  PCIE_EVENT_INT_L2_EXIT_INT            BIT(0)
+#define  PCIE_EVENT_INT_HOTRST_EXIT_INT                BIT(1)
+#define  PCIE_EVENT_INT_DLUP_EXIT_INT          BIT(2)
+#define  PCIE_EVENT_INT_MASK                   GENMASK(2, 0)
+#define  PCIE_EVENT_INT_L2_EXIT_INT_MASK       BIT(16)
+#define  PCIE_EVENT_INT_HOTRST_EXIT_INT_MASK   BIT(17)
+#define  PCIE_EVENT_INT_DLUP_EXIT_INT_MASK     BIT(18)
+#define  PCIE_EVENT_INT_ENB_MASK               GENMASK(18, 16)
+#define  PCIE_EVENT_INT_ENB_SHIFT              16
+#define  NUM_PCIE_EVENTS                       (3)
+
+/* PCIe Bridge Phy Regs */
+#define PCIE_PCI_IDS_DW1                       0x9c
+
+/* PCIe Config space MSI capability structure */
+#define MC_MSI_CAP_CTRL_OFFSET                 0xe0u
+#define  MC_MSI_MAX_Q_AVAIL                    (MC_NUM_MSI_IRQS_CODED << 1)
+#define  MC_MSI_Q_SIZE                         (MC_NUM_MSI_IRQS_CODED << 4)
+
+#define IMASK_LOCAL                            0x180
+#define  DMA_END_ENGINE_0_MASK                 0x00000000u
+#define  DMA_END_ENGINE_0_SHIFT                        0
+#define  DMA_END_ENGINE_1_MASK                 0x00000000u
+#define  DMA_END_ENGINE_1_SHIFT                        1
+#define  DMA_ERROR_ENGINE_0_MASK               0x00000100u
+#define  DMA_ERROR_ENGINE_0_SHIFT              8
+#define  DMA_ERROR_ENGINE_1_MASK               0x00000200u
+#define  DMA_ERROR_ENGINE_1_SHIFT              9
+#define  A_ATR_EVT_POST_ERR_MASK               0x00010000u
+#define  A_ATR_EVT_POST_ERR_SHIFT              16
+#define  A_ATR_EVT_FETCH_ERR_MASK              0x00020000u
+#define  A_ATR_EVT_FETCH_ERR_SHIFT             17
+#define  A_ATR_EVT_DISCARD_ERR_MASK            0x00040000u
+#define  A_ATR_EVT_DISCARD_ERR_SHIFT           18
+#define  A_ATR_EVT_DOORBELL_MASK               0x00000000u
+#define  A_ATR_EVT_DOORBELL_SHIFT              19
+#define  P_ATR_EVT_POST_ERR_MASK               0x00100000u
+#define  P_ATR_EVT_POST_ERR_SHIFT              20
+#define  P_ATR_EVT_FETCH_ERR_MASK              0x00200000u
+#define  P_ATR_EVT_FETCH_ERR_SHIFT             21
+#define  P_ATR_EVT_DISCARD_ERR_MASK            0x00400000u
+#define  P_ATR_EVT_DISCARD_ERR_SHIFT           22
+#define  P_ATR_EVT_DOORBELL_MASK               0x00000000u
+#define  P_ATR_EVT_DOORBELL_SHIFT              23
+#define  PM_MSI_INT_INTA_MASK                  0x01000000u
+#define  PM_MSI_INT_INTA_SHIFT                 24
+#define  PM_MSI_INT_INTB_MASK                  0x02000000u
+#define  PM_MSI_INT_INTB_SHIFT                 25
+#define  PM_MSI_INT_INTC_MASK                  0x04000000u
+#define  PM_MSI_INT_INTC_SHIFT                 26
+#define  PM_MSI_INT_INTD_MASK                  0x08000000u
+#define  PM_MSI_INT_INTD_SHIFT                 27
+#define  PM_MSI_INT_INTX_MASK                  0x0f000000u
+#define  PM_MSI_INT_INTX_SHIFT                 24
+#define  PM_MSI_INT_MSI_MASK                   0x10000000u
+#define  PM_MSI_INT_MSI_SHIFT                  28
+#define  PM_MSI_INT_AER_EVT_MASK               0x20000000u
+#define  PM_MSI_INT_AER_EVT_SHIFT              29
+#define  PM_MSI_INT_EVENTS_MASK                        0x40000000u
+#define  PM_MSI_INT_EVENTS_SHIFT               30
+#define  PM_MSI_INT_SYS_ERR_MASK               0x80000000u
+#define  PM_MSI_INT_SYS_ERR_SHIFT              31
+#define  NUM_LOCAL_EVENTS                      15
+#define ISTATUS_LOCAL                          0x184
+#define IMASK_HOST                             0x188
+#define ISTATUS_HOST                           0x18c
+#define MSI_ADDR                               0x190
+#define ISTATUS_MSI                            0x194
+
+/* PCIe Master table init defines */
+#define ATR0_PCIE_WIN0_SRCADDR_PARAM           0x600u
+#define  ATR0_PCIE_ATR_SIZE                    0x25
+#define  ATR0_PCIE_ATR_SIZE_SHIFT              1
+#define ATR0_PCIE_WIN0_SRC_ADDR                        0x604u
+#define ATR0_PCIE_WIN0_TRSL_ADDR_LSB           0x608u
+#define ATR0_PCIE_WIN0_TRSL_ADDR_UDW           0x60cu
+#define ATR0_PCIE_WIN0_TRSL_PARAM              0x610u
+
+/* PCIe AXI slave table init defines */
+#define ATR0_AXI4_SLV0_SRCADDR_PARAM           0x800u
+#define  ATR_SIZE_SHIFT                                1
+#define  ATR_IMPL_ENABLE                       1
+#define ATR0_AXI4_SLV0_SRC_ADDR                        0x804u
+#define ATR0_AXI4_SLV0_TRSL_ADDR_LSB           0x808u
+#define ATR0_AXI4_SLV0_TRSL_ADDR_UDW           0x80cu
+#define ATR0_AXI4_SLV0_TRSL_PARAM              0x810u
+#define  PCIE_TX_RX_INTERFACE                  0x00000000u
+#define  PCIE_CONFIG_INTERFACE                 0x00000001u
+
+#define ATR_ENTRY_SIZE                         32
+
+#define EVENT_PCIE_L2_EXIT                     0
+#define EVENT_PCIE_HOTRST_EXIT                 1
+#define EVENT_PCIE_DLUP_EXIT                   2
+#define EVENT_SEC_TX_RAM_SEC_ERR               3
+#define EVENT_SEC_RX_RAM_SEC_ERR               4
+#define EVENT_SEC_AXI2PCIE_RAM_SEC_ERR         5
+#define EVENT_SEC_PCIE2AXI_RAM_SEC_ERR         6
+#define EVENT_DED_TX_RAM_DED_ERR               7
+#define EVENT_DED_RX_RAM_DED_ERR               8
+#define EVENT_DED_AXI2PCIE_RAM_DED_ERR         9
+#define EVENT_DED_PCIE2AXI_RAM_DED_ERR         10
+#define EVENT_LOCAL_DMA_END_ENGINE_0           11
+#define EVENT_LOCAL_DMA_END_ENGINE_1           12
+#define EVENT_LOCAL_DMA_ERROR_ENGINE_0         13
+#define EVENT_LOCAL_DMA_ERROR_ENGINE_1         14
+#define EVENT_LOCAL_A_ATR_EVT_POST_ERR         15
+#define EVENT_LOCAL_A_ATR_EVT_FETCH_ERR                16
+#define EVENT_LOCAL_A_ATR_EVT_DISCARD_ERR      17
+#define EVENT_LOCAL_A_ATR_EVT_DOORBELL         18
+#define EVENT_LOCAL_P_ATR_EVT_POST_ERR         19
+#define EVENT_LOCAL_P_ATR_EVT_FETCH_ERR                20
+#define EVENT_LOCAL_P_ATR_EVT_DISCARD_ERR      21
+#define EVENT_LOCAL_P_ATR_EVT_DOORBELL         22
+#define EVENT_LOCAL_PM_MSI_INT_INTX            23
+#define EVENT_LOCAL_PM_MSI_INT_MSI             24
+#define EVENT_LOCAL_PM_MSI_INT_AER_EVT         25
+#define EVENT_LOCAL_PM_MSI_INT_EVENTS          26
+#define EVENT_LOCAL_PM_MSI_INT_SYS_ERR         27
+#define NUM_EVENTS                             28
+
+#define PCIE_EVENT_CAUSE(x, s) \
+       [EVENT_PCIE_ ## x] = { __stringify(x), s }
+
+#define SEC_ERROR_CAUSE(x, s) \
+       [EVENT_SEC_ ## x] = { __stringify(x), s }
+
+#define DED_ERROR_CAUSE(x, s) \
+       [EVENT_DED_ ## x] = { __stringify(x), s }
+
+#define LOCAL_EVENT_CAUSE(x, s) \
+       [EVENT_LOCAL_ ## x] = { __stringify(x), s }
+
+#define PCIE_EVENT(x) \
+       .base = MC_PCIE_CTRL_ADDR, \
+       .offset = PCIE_EVENT_INT, \
+       .mask_offset = PCIE_EVENT_INT, \
+       .mask_high = 1, \
+       .mask = PCIE_EVENT_INT_ ## x ## _INT, \
+       .enb_mask = PCIE_EVENT_INT_ENB_MASK
+
+#define SEC_EVENT(x) \
+       .base = MC_PCIE_CTRL_ADDR, \
+       .offset = SEC_ERROR_INT, \
+       .mask_offset = SEC_ERROR_INT_MASK, \
+       .mask = SEC_ERROR_INT_ ## x ## _INT, \
+       .mask_high = 1, \
+       .enb_mask = 0
+
+#define DED_EVENT(x) \
+       .base = MC_PCIE_CTRL_ADDR, \
+       .offset = DED_ERROR_INT, \
+       .mask_offset = DED_ERROR_INT_MASK, \
+       .mask_high = 1, \
+       .mask = DED_ERROR_INT_ ## x ## _INT, \
+       .enb_mask = 0
+
+#define LOCAL_EVENT(x) \
+       .base = MC_PCIE_BRIDGE_ADDR, \
+       .offset = ISTATUS_LOCAL, \
+       .mask_offset = IMASK_LOCAL, \
+       .mask_high = 0, \
+       .mask = x ## _MASK, \
+       .enb_mask = 0
+
+#define PCIE_EVENT_TO_EVENT_MAP(x) \
+       { PCIE_EVENT_INT_ ## x ## _INT, EVENT_PCIE_ ## x }
+
+#define SEC_ERROR_TO_EVENT_MAP(x) \
+       { SEC_ERROR_INT_ ## x ## _INT, EVENT_SEC_ ## x }
+
+#define DED_ERROR_TO_EVENT_MAP(x) \
+       { DED_ERROR_INT_ ## x ## _INT, EVENT_DED_ ## x }
+
+#define LOCAL_STATUS_TO_EVENT_MAP(x) \
+       { x ## _MASK, EVENT_LOCAL_ ## x }
+
+struct event_map {
+       u32 reg_mask;
+       u32 event_bit;
+};
+
+struct mc_msi {
+       struct mutex lock;              /* Protect used bitmap */
+       struct irq_domain *msi_domain;
+       struct irq_domain *dev_domain;
+       u32 num_vectors;
+       u64 vector_phy;
+       DECLARE_BITMAP(used, MC_NUM_MSI_IRQS);
+};
+
+struct mc_port {
+       void __iomem *axi_base_addr;
+       struct device *dev;
+       struct irq_domain *intx_domain;
+       struct irq_domain *event_domain;
+       raw_spinlock_t lock;
+       struct mc_msi msi;
+};
+
+struct cause {
+       const char *sym;
+       const char *str;
+};
+
+static const struct cause event_cause[NUM_EVENTS] = {
+       PCIE_EVENT_CAUSE(L2_EXIT, "L2 exit event"),
+       PCIE_EVENT_CAUSE(HOTRST_EXIT, "Hot reset exit event"),
+       PCIE_EVENT_CAUSE(DLUP_EXIT, "DLUP exit event"),
+       SEC_ERROR_CAUSE(TX_RAM_SEC_ERR,  "sec error in tx buffer"),
+       SEC_ERROR_CAUSE(RX_RAM_SEC_ERR,  "sec error in rx buffer"),
+       SEC_ERROR_CAUSE(PCIE2AXI_RAM_SEC_ERR,  "sec error in pcie2axi buffer"),
+       SEC_ERROR_CAUSE(AXI2PCIE_RAM_SEC_ERR,  "sec error in axi2pcie buffer"),
+       DED_ERROR_CAUSE(TX_RAM_DED_ERR,  "ded error in tx buffer"),
+       DED_ERROR_CAUSE(RX_RAM_DED_ERR,  "ded error in rx buffer"),
+       DED_ERROR_CAUSE(PCIE2AXI_RAM_DED_ERR,  "ded error in pcie2axi buffer"),
+       DED_ERROR_CAUSE(AXI2PCIE_RAM_DED_ERR,  "ded error in axi2pcie buffer"),
+       LOCAL_EVENT_CAUSE(DMA_ERROR_ENGINE_0, "dma engine 0 error"),
+       LOCAL_EVENT_CAUSE(DMA_ERROR_ENGINE_1, "dma engine 1 error"),
+       LOCAL_EVENT_CAUSE(A_ATR_EVT_POST_ERR, "axi write request error"),
+       LOCAL_EVENT_CAUSE(A_ATR_EVT_FETCH_ERR, "axi read request error"),
+       LOCAL_EVENT_CAUSE(A_ATR_EVT_DISCARD_ERR, "axi read timeout"),
+       LOCAL_EVENT_CAUSE(P_ATR_EVT_POST_ERR, "pcie write request error"),
+       LOCAL_EVENT_CAUSE(P_ATR_EVT_FETCH_ERR, "pcie read request error"),
+       LOCAL_EVENT_CAUSE(P_ATR_EVT_DISCARD_ERR, "pcie read timeout"),
+       LOCAL_EVENT_CAUSE(PM_MSI_INT_AER_EVT, "aer event"),
+       LOCAL_EVENT_CAUSE(PM_MSI_INT_EVENTS, "pm/ltr/hotplug event"),
+       LOCAL_EVENT_CAUSE(PM_MSI_INT_SYS_ERR, "system error"),
+};
+
+struct event_map pcie_event_to_event[] = {
+       PCIE_EVENT_TO_EVENT_MAP(L2_EXIT),
+       PCIE_EVENT_TO_EVENT_MAP(HOTRST_EXIT),
+       PCIE_EVENT_TO_EVENT_MAP(DLUP_EXIT),
+};
+
+struct event_map sec_error_to_event[] = {
+       SEC_ERROR_TO_EVENT_MAP(TX_RAM_SEC_ERR),
+       SEC_ERROR_TO_EVENT_MAP(RX_RAM_SEC_ERR),
+       SEC_ERROR_TO_EVENT_MAP(PCIE2AXI_RAM_SEC_ERR),
+       SEC_ERROR_TO_EVENT_MAP(AXI2PCIE_RAM_SEC_ERR),
+};
+
+struct event_map ded_error_to_event[] = {
+       DED_ERROR_TO_EVENT_MAP(TX_RAM_DED_ERR),
+       DED_ERROR_TO_EVENT_MAP(RX_RAM_DED_ERR),
+       DED_ERROR_TO_EVENT_MAP(PCIE2AXI_RAM_DED_ERR),
+       DED_ERROR_TO_EVENT_MAP(AXI2PCIE_RAM_DED_ERR),
+};
+
+struct event_map local_status_to_event[] = {
+       LOCAL_STATUS_TO_EVENT_MAP(DMA_END_ENGINE_0),
+       LOCAL_STATUS_TO_EVENT_MAP(DMA_END_ENGINE_1),
+       LOCAL_STATUS_TO_EVENT_MAP(DMA_ERROR_ENGINE_0),
+       LOCAL_STATUS_TO_EVENT_MAP(DMA_ERROR_ENGINE_1),
+       LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_POST_ERR),
+       LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_FETCH_ERR),
+       LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_DISCARD_ERR),
+       LOCAL_STATUS_TO_EVENT_MAP(A_ATR_EVT_DOORBELL),
+       LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_POST_ERR),
+       LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_FETCH_ERR),
+       LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_DISCARD_ERR),
+       LOCAL_STATUS_TO_EVENT_MAP(P_ATR_EVT_DOORBELL),
+       LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_INTX),
+       LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_MSI),
+       LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_AER_EVT),
+       LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_EVENTS),
+       LOCAL_STATUS_TO_EVENT_MAP(PM_MSI_INT_SYS_ERR),
+};
+
+struct {
+       u32 base;
+       u32 offset;
+       u32 mask;
+       u32 shift;
+       u32 enb_mask;
+       u32 mask_high;
+       u32 mask_offset;
+} event_descs[] = {
+       { PCIE_EVENT(L2_EXIT) },
+       { PCIE_EVENT(HOTRST_EXIT) },
+       { PCIE_EVENT(DLUP_EXIT) },
+       { SEC_EVENT(TX_RAM_SEC_ERR) },
+       { SEC_EVENT(RX_RAM_SEC_ERR) },
+       { SEC_EVENT(PCIE2AXI_RAM_SEC_ERR) },
+       { SEC_EVENT(AXI2PCIE_RAM_SEC_ERR) },
+       { DED_EVENT(TX_RAM_DED_ERR) },
+       { DED_EVENT(RX_RAM_DED_ERR) },
+       { DED_EVENT(PCIE2AXI_RAM_DED_ERR) },
+       { DED_EVENT(AXI2PCIE_RAM_DED_ERR) },
+       { LOCAL_EVENT(DMA_END_ENGINE_0) },
+       { LOCAL_EVENT(DMA_END_ENGINE_1) },
+       { LOCAL_EVENT(DMA_ERROR_ENGINE_0) },
+       { LOCAL_EVENT(DMA_ERROR_ENGINE_1) },
+       { LOCAL_EVENT(A_ATR_EVT_POST_ERR) },
+       { LOCAL_EVENT(A_ATR_EVT_FETCH_ERR) },
+       { LOCAL_EVENT(A_ATR_EVT_DISCARD_ERR) },
+       { LOCAL_EVENT(A_ATR_EVT_DOORBELL) },
+       { LOCAL_EVENT(P_ATR_EVT_POST_ERR) },
+       { LOCAL_EVENT(P_ATR_EVT_FETCH_ERR) },
+       { LOCAL_EVENT(P_ATR_EVT_DISCARD_ERR) },
+       { LOCAL_EVENT(P_ATR_EVT_DOORBELL) },
+       { LOCAL_EVENT(PM_MSI_INT_INTX) },
+       { LOCAL_EVENT(PM_MSI_INT_MSI) },
+       { LOCAL_EVENT(PM_MSI_INT_AER_EVT) },
+       { LOCAL_EVENT(PM_MSI_INT_EVENTS) },
+       { LOCAL_EVENT(PM_MSI_INT_SYS_ERR) },
+};
+
+static char poss_clks[][5] = { "fic0", "fic1", "fic2", "fic3" };
+
+static void mc_pcie_enable_msi(struct mc_port *port, void __iomem *base)
+{
+       struct mc_msi *msi = &port->msi;
+       u32 cap_offset = MC_MSI_CAP_CTRL_OFFSET;
+       u16 msg_ctrl = readw_relaxed(base + cap_offset + PCI_MSI_FLAGS);
+
+       msg_ctrl |= PCI_MSI_FLAGS_ENABLE;
+       msg_ctrl &= ~PCI_MSI_FLAGS_QMASK;
+       msg_ctrl |= MC_MSI_MAX_Q_AVAIL;
+       msg_ctrl &= ~PCI_MSI_FLAGS_QSIZE;
+       msg_ctrl |= MC_MSI_Q_SIZE;
+       msg_ctrl |= PCI_MSI_FLAGS_64BIT;
+
+       writew_relaxed(msg_ctrl, base + cap_offset + PCI_MSI_FLAGS);
+
+       writel_relaxed(lower_32_bits(msi->vector_phy),
+                      base + cap_offset + PCI_MSI_ADDRESS_LO);
+       writel_relaxed(upper_32_bits(msi->vector_phy),
+                      base + cap_offset + PCI_MSI_ADDRESS_HI);
+}
+
+static void mc_handle_msi(struct irq_desc *desc)
+{
+       struct mc_port *port = irq_desc_get_handler_data(desc);
+       struct device *dev = port->dev;
+       struct mc_msi *msi = &port->msi;
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       unsigned long status;
+       u32 bit;
+       u32 virq;
+
+       status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
+       if (status & PM_MSI_INT_MSI_MASK) {
+               status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
+               for_each_set_bit(bit, &status, msi->num_vectors) {
+                       virq = irq_find_mapping(msi->dev_domain, bit);
+                       if (virq)
+                               generic_handle_irq(virq);
+                       else
+                               dev_err_ratelimited(dev, "bad MSI IRQ %d\n",
+                                                   bit);
+               }
+       }
+}
+
+static void mc_msi_bottom_irq_ack(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       u32 bitpos = data->hwirq;
+       unsigned long status;
+
+       writel_relaxed(BIT(bitpos), bridge_base_addr + ISTATUS_MSI);
+       status = readl_relaxed(bridge_base_addr + ISTATUS_MSI);
+       if (!status)
+               writel_relaxed(BIT(PM_MSI_INT_MSI_SHIFT),
+                              bridge_base_addr + ISTATUS_LOCAL);
+}
+
+static void mc_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       phys_addr_t addr = port->msi.vector_phy;
+
+       msg->address_lo = lower_32_bits(addr);
+       msg->address_hi = upper_32_bits(addr);
+       msg->data = data->hwirq;
+
+       dev_dbg(port->dev, "msi#%x address_hi %#x address_lo %#x\n",
+               (int)data->hwirq, msg->address_hi, msg->address_lo);
+}
+
+static int mc_msi_set_affinity(struct irq_data *irq_data,
+                              const struct cpumask *mask, bool force)
+{
+       return -EINVAL;
+}
+
+static struct irq_chip mc_msi_bottom_irq_chip = {
+       .name = "Microchip MSI",
+       .irq_ack = mc_msi_bottom_irq_ack,
+       .irq_compose_msi_msg = mc_compose_msi_msg,
+       .irq_set_affinity = mc_msi_set_affinity,
+};
+
+static int mc_irq_msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+                                  unsigned int nr_irqs, void *args)
+{
+       struct mc_port *port = domain->host_data;
+       struct mc_msi *msi = &port->msi;
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       unsigned long bit;
+       u32 val;
+
+       mutex_lock(&msi->lock);
+       bit = find_first_zero_bit(msi->used, msi->num_vectors);
+       if (bit >= msi->num_vectors) {
+               mutex_unlock(&msi->lock);
+               return -ENOSPC;
+       }
+
+       set_bit(bit, msi->used);
+
+       irq_domain_set_info(domain, virq, bit, &mc_msi_bottom_irq_chip,
+                           domain->host_data, handle_edge_irq, NULL, NULL);
+
+       /* Enable MSI interrupts */
+       val = readl_relaxed(bridge_base_addr + IMASK_LOCAL);
+       val |= PM_MSI_INT_MSI_MASK;
+       writel_relaxed(val, bridge_base_addr + IMASK_LOCAL);
+
+       mutex_unlock(&msi->lock);
+
+       return 0;
+}
+
+static void mc_irq_msi_domain_free(struct irq_domain *domain, unsigned int virq,
+                                  unsigned int nr_irqs)
+{
+       struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+       struct mc_port *port = irq_data_get_irq_chip_data(d);
+       struct mc_msi *msi = &port->msi;
+
+       mutex_lock(&msi->lock);
+
+       if (test_bit(d->hwirq, msi->used))
+               __clear_bit(d->hwirq, msi->used);
+       else
+               dev_err(port->dev, "trying to free unused MSI%lu\n", d->hwirq);
+
+       mutex_unlock(&msi->lock);
+}
+
+static const struct irq_domain_ops msi_domain_ops = {
+       .alloc  = mc_irq_msi_domain_alloc,
+       .free   = mc_irq_msi_domain_free,
+};
+
+static struct irq_chip mc_msi_irq_chip = {
+       .name = "Microchip PCIe MSI",
+       .irq_ack = irq_chip_ack_parent,
+       .irq_mask = pci_msi_mask_irq,
+       .irq_unmask = pci_msi_unmask_irq,
+};
+
+static struct msi_domain_info mc_msi_domain_info = {
+       .flags = (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+                 MSI_FLAG_PCI_MSIX),
+       .chip = &mc_msi_irq_chip,
+};
+
+static int mc_allocate_msi_domains(struct mc_port *port)
+{
+       struct device *dev = port->dev;
+       struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
+       struct mc_msi *msi = &port->msi;
+
+       mutex_init(&port->msi.lock);
+
+       msi->dev_domain = irq_domain_add_linear(NULL, msi->num_vectors,
+                                               &msi_domain_ops, port);
+       if (!msi->dev_domain) {
+               dev_err(dev, "failed to create IRQ domain\n");
+               return -ENOMEM;
+       }
+
+       msi->msi_domain = pci_msi_create_irq_domain(fwnode, &mc_msi_domain_info,
+                                                   msi->dev_domain);
+       if (!msi->msi_domain) {
+               dev_err(dev, "failed to create MSI domain\n");
+               irq_domain_remove(msi->dev_domain);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
+static void mc_handle_intx(struct irq_desc *desc)
+{
+       struct mc_port *port = irq_desc_get_handler_data(desc);
+       struct device *dev = port->dev;
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       unsigned long status;
+       u32 bit;
+       u32 virq;
+
+       status = readl_relaxed(bridge_base_addr + ISTATUS_LOCAL);
+       if (status & PM_MSI_INT_INTX_MASK) {
+               status &= PM_MSI_INT_INTX_MASK;
+               status >>= PM_MSI_INT_INTX_SHIFT;
+               for_each_set_bit(bit, &status, PCI_NUM_INTX) {
+                       virq = irq_find_mapping(port->intx_domain, bit);
+                       if (virq)
+                               generic_handle_irq(virq);
+                       else
+                               dev_err_ratelimited(dev, "bad INTx IRQ %d\n",
+                                                   bit);
+               }
+       }
+}
+
+static void mc_ack_intx_irq(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
+
+       writel_relaxed(mask, bridge_base_addr + ISTATUS_LOCAL);
+}
+
+static void mc_mask_intx_irq(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       unsigned long flags;
+       u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
+       u32 val;
+
+       raw_spin_lock_irqsave(&port->lock, flags);
+       val = readl_relaxed(bridge_base_addr + IMASK_LOCAL);
+       val &= ~mask;
+       writel_relaxed(val, bridge_base_addr + IMASK_LOCAL);
+       raw_spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static void mc_unmask_intx_irq(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       unsigned long flags;
+       u32 mask = BIT(data->hwirq + PM_MSI_INT_INTX_SHIFT);
+       u32 val;
+
+       raw_spin_lock_irqsave(&port->lock, flags);
+       val = readl_relaxed(bridge_base_addr + IMASK_LOCAL);
+       val |= mask;
+       writel_relaxed(val, bridge_base_addr + IMASK_LOCAL);
+       raw_spin_unlock_irqrestore(&port->lock, flags);
+}
+
+static struct irq_chip mc_intx_irq_chip = {
+       .name = "Microchip PCIe INTx",
+       .irq_ack = mc_ack_intx_irq,
+       .irq_mask = mc_mask_intx_irq,
+       .irq_unmask = mc_unmask_intx_irq,
+};
+
+static int mc_pcie_intx_map(struct irq_domain *domain, unsigned int irq,
+                           irq_hw_number_t hwirq)
+{
+       irq_set_chip_and_handler(irq, &mc_intx_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, domain->host_data);
+
+       return 0;
+}
+
+static const struct irq_domain_ops intx_domain_ops = {
+       .map = mc_pcie_intx_map,
+};
+
+static inline u32 reg_to_event(u32 reg, struct event_map field)
+{
+       return (reg & field.reg_mask) ? BIT(field.event_bit) : 0;
+}
+
+static u32 pcie_events(void __iomem *addr)
+{
+       u32 reg = readl_relaxed(addr);
+       u32 val = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(pcie_event_to_event); i++)
+               val |= reg_to_event(reg, pcie_event_to_event[i]);
+
+       return val;
+}
+
+static u32 sec_errors(void __iomem *addr)
+{
+       u32 reg = readl_relaxed(addr);
+       u32 val = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(sec_error_to_event); i++)
+               val |= reg_to_event(reg, sec_error_to_event[i]);
+
+       return val;
+}
+
+static u32 ded_errors(void __iomem *addr)
+{
+       u32 reg = readl_relaxed(addr);
+       u32 val = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(ded_error_to_event); i++)
+               val |= reg_to_event(reg, ded_error_to_event[i]);
+
+       return val;
+}
+
+static u32 local_events(void __iomem *addr)
+{
+       u32 reg = readl_relaxed(addr);
+       u32 val = 0;
+       int i;
+
+       for (i = 0; i < ARRAY_SIZE(local_status_to_event); i++)
+               val |= reg_to_event(reg, local_status_to_event[i]);
+
+       return val;
+}
+
+static u32 get_events(struct mc_port *port)
+{
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       void __iomem *ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+       u32 events = 0;
+
+       events |= pcie_events(ctrl_base_addr + PCIE_EVENT_INT);
+       events |= sec_errors(ctrl_base_addr + SEC_ERROR_INT);
+       events |= ded_errors(ctrl_base_addr + DED_ERROR_INT);
+       events |= local_events(bridge_base_addr + ISTATUS_LOCAL);
+
+       return events;
+}
+
+static irqreturn_t mc_event_handler(int irq, void *dev_id)
+{
+       struct mc_port *port = dev_id;
+       struct device *dev = port->dev;
+       struct irq_data *data;
+
+       data = irq_domain_get_irq_data(port->event_domain, irq);
+
+       if (event_cause[data->hwirq].str)
+               dev_err_ratelimited(dev, "%s\n", event_cause[data->hwirq].str);
+       else
+               dev_err_ratelimited(dev, "bad event IRQ %ld\n", data->hwirq);
+
+       return IRQ_HANDLED;
+}
+
+static void mc_handle_event(struct irq_desc *desc)
+{
+       struct mc_port *port = irq_desc_get_handler_data(desc);
+       unsigned long events;
+       u32 bit;
+       struct irq_chip *chip = irq_desc_get_chip(desc);
+
+       chained_irq_enter(chip, desc);
+
+       events = get_events(port);
+
+       for_each_set_bit(bit, &events, NUM_EVENTS)
+               generic_handle_irq(irq_find_mapping(port->event_domain, bit));
+
+       chained_irq_exit(chip, desc);
+}
+
+static void mc_ack_event_irq(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       u32 event = data->hwirq;
+       void __iomem *addr;
+       u32 mask;
+
+       addr = port->axi_base_addr + event_descs[event].base +
+               event_descs[event].offset;
+       mask = event_descs[event].mask;
+       mask |= event_descs[event].enb_mask;
+
+       writel_relaxed(mask, addr);
+}
+
+static void mc_mask_event_irq(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       u32 event = data->hwirq;
+       void __iomem *addr;
+       u32 mask;
+       u32 val;
+
+       addr = port->axi_base_addr + event_descs[event].base +
+               event_descs[event].mask_offset;
+       mask = event_descs[event].mask;
+       if (event_descs[event].enb_mask) {
+               mask <<= PCIE_EVENT_INT_ENB_SHIFT;
+               mask &= PCIE_EVENT_INT_ENB_MASK;
+       }
+
+       if (!event_descs[event].mask_high)
+               mask = ~mask;
+
+       raw_spin_lock(&port->lock);
+       val = readl_relaxed(addr);
+       if (event_descs[event].mask_high)
+               val |= mask;
+       else
+               val &= mask;
+
+       writel_relaxed(val, addr);
+       raw_spin_unlock(&port->lock);
+}
+
+static void mc_unmask_event_irq(struct irq_data *data)
+{
+       struct mc_port *port = irq_data_get_irq_chip_data(data);
+       u32 event = data->hwirq;
+       void __iomem *addr;
+       u32 mask;
+       u32 val;
+
+       addr = port->axi_base_addr + event_descs[event].base +
+               event_descs[event].mask_offset;
+       mask = event_descs[event].mask;
+
+       if (event_descs[event].enb_mask)
+               mask <<= PCIE_EVENT_INT_ENB_SHIFT;
+
+       if (event_descs[event].mask_high)
+               mask = ~mask;
+
+       if (event_descs[event].enb_mask)
+               mask &= PCIE_EVENT_INT_ENB_MASK;
+
+       raw_spin_lock(&port->lock);
+       val = readl_relaxed(addr);
+       if (event_descs[event].mask_high)
+               val &= mask;
+       else
+               val |= mask;
+       writel_relaxed(val, addr);
+       raw_spin_unlock(&port->lock);
+}
+
+static struct irq_chip mc_event_irq_chip = {
+       .name = "Microchip PCIe EVENT",
+       .irq_ack = mc_ack_event_irq,
+       .irq_mask = mc_mask_event_irq,
+       .irq_unmask = mc_unmask_event_irq,
+};
+
+static int mc_pcie_event_map(struct irq_domain *domain, unsigned int irq,
+                            irq_hw_number_t hwirq)
+{
+       irq_set_chip_and_handler(irq, &mc_event_irq_chip, handle_level_irq);
+       irq_set_chip_data(irq, domain->host_data);
+
+       return 0;
+}
+
+static const struct irq_domain_ops event_domain_ops = {
+       .map = mc_pcie_event_map,
+};
+
+static inline struct clk *mc_pcie_init_clk(struct device *dev, const char *id)
+{
+       struct clk *clk;
+       int ret;
+
+       clk = devm_clk_get_optional(dev, id);
+       if (IS_ERR(clk))
+               return clk;
+       if (!clk)
+               return clk;
+
+       ret = clk_prepare_enable(clk);
+       if (ret)
+               return ERR_PTR(ret);
+
+       devm_add_action_or_reset(dev, (void (*) (void *))clk_disable_unprepare,
+                                clk);
+
+       return clk;
+}
+
+static int mc_pcie_init_clks(struct device *dev)
+{
+       int i;
+       struct clk *fic;
+
+       /*
+        * PCIe may be clocked via Fabric Interface using between 1 and 4
+        * clocks. Scan DT for clocks and enable them if present
+        */
+       for (i = 0; i < ARRAY_SIZE(poss_clks); i++) {
+               fic = mc_pcie_init_clk(dev, poss_clks[i]);
+               if (IS_ERR(fic))
+                       return PTR_ERR(fic);
+       }
+
+       return 0;
+}
+
+static int mc_pcie_init_irq_domains(struct mc_port *port)
+{
+       struct device *dev = port->dev;
+       struct device_node *node = dev->of_node;
+       struct device_node *pcie_intc_node;
+
+       /* Setup INTx */
+       pcie_intc_node = of_get_next_child(node, NULL);
+       if (!pcie_intc_node) {
+               dev_err(dev, "failed to find PCIe Intc node\n");
+               return -EINVAL;
+       }
+
+       port->event_domain = irq_domain_add_linear(pcie_intc_node, NUM_EVENTS,
+                                                  &event_domain_ops, port);
+       if (!port->event_domain) {
+               dev_err(dev, "failed to get event domain\n");
+               return -ENOMEM;
+       }
+
+       irq_domain_update_bus_token(port->event_domain, DOMAIN_BUS_NEXUS);
+
+       port->intx_domain = irq_domain_add_linear(pcie_intc_node, PCI_NUM_INTX,
+                                                 &intx_domain_ops, port);
+       if (!port->intx_domain) {
+               dev_err(dev, "failed to get an INTx IRQ domain\n");
+               return -ENOMEM;
+       }
+
+       irq_domain_update_bus_token(port->intx_domain, DOMAIN_BUS_WIRED);
+
+       of_node_put(pcie_intc_node);
+       raw_spin_lock_init(&port->lock);
+
+       return mc_allocate_msi_domains(port);
+}
+
+static void mc_pcie_setup_window(void __iomem *bridge_base_addr, u32 index,
+                                phys_addr_t axi_addr, phys_addr_t pci_addr,
+                                size_t size)
+{
+       u32 atr_sz = ilog2(size) - 1;
+       u32 val;
+
+       if (index == 0)
+               val = PCIE_CONFIG_INTERFACE;
+       else
+               val = PCIE_TX_RX_INTERFACE;
+
+       writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+              ATR0_AXI4_SLV0_TRSL_PARAM);
+
+       val = lower_32_bits(axi_addr) | (atr_sz << ATR_SIZE_SHIFT) |
+                           ATR_IMPL_ENABLE;
+       writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+              ATR0_AXI4_SLV0_SRCADDR_PARAM);
+
+       val = upper_32_bits(axi_addr);
+       writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+              ATR0_AXI4_SLV0_SRC_ADDR);
+
+       val = lower_32_bits(pci_addr);
+       writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+              ATR0_AXI4_SLV0_TRSL_ADDR_LSB);
+
+       val = upper_32_bits(pci_addr);
+       writel(val, bridge_base_addr + (index * ATR_ENTRY_SIZE) +
+              ATR0_AXI4_SLV0_TRSL_ADDR_UDW);
+
+       val = readl(bridge_base_addr + ATR0_PCIE_WIN0_SRCADDR_PARAM);
+       val |= (ATR0_PCIE_ATR_SIZE << ATR0_PCIE_ATR_SIZE_SHIFT);
+       writel(val, bridge_base_addr + ATR0_PCIE_WIN0_SRCADDR_PARAM);
+       writel(0, bridge_base_addr + ATR0_PCIE_WIN0_SRC_ADDR);
+}
+
+static int mc_pcie_setup_windows(struct platform_device *pdev,
+                                struct mc_port *port)
+{
+       void __iomem *bridge_base_addr =
+               port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       struct pci_host_bridge *bridge = platform_get_drvdata(pdev);
+       struct resource_entry *entry;
+       u64 pci_addr;
+       u32 index = 1;
+
+       resource_list_for_each_entry(entry, &bridge->windows) {
+               if (resource_type(entry->res) == IORESOURCE_MEM) {
+                       pci_addr = entry->res->start - entry->offset;
+                       mc_pcie_setup_window(bridge_base_addr, index,
+                                            entry->res->start, pci_addr,
+                                            resource_size(entry->res));
+                       index++;
+               }
+       }
+
+       return 0;
+}
+
+static int mc_platform_init(struct pci_config_window *cfg)
+{
+       struct device *dev = cfg->parent;
+       struct platform_device *pdev = to_platform_device(dev);
+       struct mc_port *port;
+       void __iomem *bridge_base_addr;
+       void __iomem *ctrl_base_addr;
+       int ret;
+       int irq;
+       int i, intx_irq, msi_irq, event_irq;
+       u32 val;
+       int err;
+
+       port = devm_kzalloc(dev, sizeof(*port), GFP_KERNEL);
+       if (!port)
+               return -ENOMEM;
+       port->dev = dev;
+
+       ret = mc_pcie_init_clks(dev);
+       if (ret) {
+               dev_err(dev, "failed to get clock resources, error %d\n", ret);
+               return -ENODEV;
+       }
+
+       port->axi_base_addr = devm_platform_ioremap_resource(pdev, 1);
+       if (IS_ERR(port->axi_base_addr))
+               return PTR_ERR(port->axi_base_addr);
+
+       bridge_base_addr = port->axi_base_addr + MC_PCIE_BRIDGE_ADDR;
+       ctrl_base_addr = port->axi_base_addr + MC_PCIE_CTRL_ADDR;
+
+       port->msi.vector_phy = MSI_ADDR;
+       port->msi.num_vectors = MC_NUM_MSI_IRQS;
+       ret = mc_pcie_init_irq_domains(port);
+       if (ret) {
+               dev_err(dev, "failed creating IRQ domains\n");
+               return ret;
+       }
+
+       irq = platform_get_irq(pdev, 0);
+       if (irq < 0) {
+               dev_err(dev, "unable to request IRQ%d\n", irq);
+               return -ENODEV;
+       }
+
+       for (i = 0; i < NUM_EVENTS; i++) {
+               event_irq = irq_create_mapping(port->event_domain, i);
+               if (!event_irq) {
+                       dev_err(dev, "failed to map hwirq %d\n", i);
+                       return -ENXIO;
+               }
+
+               err = devm_request_irq(dev, event_irq, mc_event_handler,
+                                      0, event_cause[i].sym, port);
+               if (err) {
+                       dev_err(dev, "failed to request IRQ %d\n", event_irq);
+                       return err;
+               }
+       }
+
+       intx_irq = irq_create_mapping(port->event_domain,
+                                     EVENT_LOCAL_PM_MSI_INT_INTX);
+       if (!intx_irq) {
+               dev_err(dev, "failed to map INTx interrupt\n");
+               return -ENXIO;
+       }
+
+       /* Plug the INTx chained handler */
+       irq_set_chained_handler_and_data(intx_irq, mc_handle_intx, port);
+
+       msi_irq = irq_create_mapping(port->event_domain,
+                                    EVENT_LOCAL_PM_MSI_INT_MSI);
+       if (!msi_irq)
+               return -ENXIO;
+
+       /* Plug the MSI chained handler */
+       irq_set_chained_handler_and_data(msi_irq, mc_handle_msi, port);
+
+       /* Plug the main event chained handler */
+       irq_set_chained_handler_and_data(irq, mc_handle_event, port);
+
+       /* Hardware doesn't setup MSI by default */
+       mc_pcie_enable_msi(port, cfg->win);
+
+       val = readl_relaxed(bridge_base_addr + IMASK_LOCAL);
+       val |= PM_MSI_INT_INTX_MASK;
+       writel_relaxed(val, bridge_base_addr + IMASK_LOCAL);
+
+       writel_relaxed(val, ctrl_base_addr + ECC_CONTROL);
+
+       val = PCIE_EVENT_INT_L2_EXIT_INT |
+             PCIE_EVENT_INT_HOTRST_EXIT_INT |
+             PCIE_EVENT_INT_DLUP_EXIT_INT;
+       writel_relaxed(val, ctrl_base_addr + PCIE_EVENT_INT);
+
+       val = SEC_ERROR_INT_TX_RAM_SEC_ERR_INT |
+             SEC_ERROR_INT_RX_RAM_SEC_ERR_INT |
+             SEC_ERROR_INT_PCIE2AXI_RAM_SEC_ERR_INT |
+             SEC_ERROR_INT_AXI2PCIE_RAM_SEC_ERR_INT;
+       writel_relaxed(val, ctrl_base_addr + SEC_ERROR_INT);
+       writel_relaxed(0, ctrl_base_addr + SEC_ERROR_INT_MASK);
+       writel_relaxed(0, ctrl_base_addr + SEC_ERROR_CNT);
+
+       val = DED_ERROR_INT_TX_RAM_DED_ERR_INT |
+             DED_ERROR_INT_RX_RAM_DED_ERR_INT |
+             DED_ERROR_INT_PCIE2AXI_RAM_DED_ERR_INT |
+             DED_ERROR_INT_AXI2PCIE_RAM_DED_ERR_INT;
+       writel_relaxed(val, ctrl_base_addr + DED_ERROR_INT);
+       writel_relaxed(0, ctrl_base_addr + DED_ERROR_INT_MASK);
+       writel_relaxed(0, ctrl_base_addr + DED_ERROR_CNT);
+
+       writel_relaxed(0, bridge_base_addr + IMASK_HOST);
+       writel_relaxed(GENMASK(31, 0), bridge_base_addr + ISTATUS_HOST);
+
+       /* Configure Address Translation Table 0 for PCIe config space */
+       mc_pcie_setup_window(bridge_base_addr, 0, cfg->res.start & 0xffffffff,
+                            cfg->res.start, resource_size(&cfg->res));
+
+       return mc_pcie_setup_windows(pdev, port);
+}
+
+static const struct pci_ecam_ops mc_ecam_ops = {
+       .init = mc_platform_init,
+       .pci_ops = {
+               .map_bus = pci_ecam_map_bus,
+               .read = pci_generic_config_read,
+               .write = pci_generic_config_write,
+       }
+};
+
+static const struct of_device_id mc_pcie_of_match[] = {
+       {
+               .compatible = "microchip,pcie-host-1.0",
+               .data = &mc_ecam_ops,
+       },
+       {},
+};
+
+MODULE_DEVICE_TABLE(of, mc_pcie_of_match)
+
+static struct platform_driver mc_pcie_driver = {
+       .probe = pci_host_common_probe,
+       .driver = {
+               .name = "microchip-pcie",
+               .of_match_table = mc_pcie_of_match,
+               .suppress_bind_attrs = true,
+       },
+};
+
+builtin_platform_driver(mc_pcie_driver);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Microchip PCIe host controller driver");
+MODULE_AUTHOR("Daire McNamara <daire.mcnamara@microchip.com>");
index 4d1c4b2..a728e8f 100644 (file)
@@ -735,7 +735,7 @@ static int rcar_pcie_enable_msi(struct rcar_pcie_host *host)
        }
 
        /* setup MSI data target */
-       msi->pages = __get_free_pages(GFP_KERNEL, 0);
+       msi->pages = __get_free_pages(GFP_KERNEL | GFP_DMA32, 0);
        rcar_pcie_hw_enable_msi(host);
 
        return 0;
index 904dec0..990a00e 100644 (file)
@@ -82,7 +82,7 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
        }
 
        rockchip->mgmt_sticky_rst = devm_reset_control_get_exclusive(dev,
-                                                                    "mgmt-sticky");
+                                                               "mgmt-sticky");
        if (IS_ERR(rockchip->mgmt_sticky_rst)) {
                if (PTR_ERR(rockchip->mgmt_sticky_rst) != -EPROBE_DEFER)
                        dev_err(dev, "missing mgmt-sticky reset property in node\n");
@@ -118,11 +118,11 @@ int rockchip_pcie_parse_dt(struct rockchip_pcie *rockchip)
        }
 
        if (rockchip->is_rc) {
-               rockchip->ep_gpio = devm_gpiod_get(dev, "ep", GPIOD_OUT_HIGH);
-               if (IS_ERR(rockchip->ep_gpio)) {
-                       dev_err(dev, "missing ep-gpios property in node\n");
-                       return PTR_ERR(rockchip->ep_gpio);
-               }
+               rockchip->ep_gpio = devm_gpiod_get_optional(dev, "ep",
+                                                           GPIOD_OUT_HIGH);
+               if (IS_ERR(rockchip->ep_gpio))
+                       return dev_err_probe(dev, PTR_ERR(rockchip->ep_gpio),
+                                            "failed to get ep GPIO\n");
        }
 
        rockchip->aclk_pcie = devm_clk_get(dev, "aclk");
diff --git a/drivers/pci/controller/pcie-tango.c b/drivers/pci/controller/pcie-tango.c
deleted file mode 100644 (file)
index 62a061f..0000000
+++ /dev/null
@@ -1,341 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-#include <linux/irqchip/chained_irq.h>
-#include <linux/irqdomain.h>
-#include <linux/pci-ecam.h>
-#include <linux/delay.h>
-#include <linux/msi.h>
-#include <linux/of_address.h>
-
-#define MSI_MAX                        256
-
-#define SMP8759_MUX            0x48
-#define SMP8759_TEST_OUT       0x74
-#define SMP8759_DOORBELL       0x7c
-#define SMP8759_STATUS         0x80
-#define SMP8759_ENABLE         0xa0
-
-struct tango_pcie {
-       DECLARE_BITMAP(used_msi, MSI_MAX);
-       u64                     msi_doorbell;
-       spinlock_t              used_msi_lock;
-       void __iomem            *base;
-       struct irq_domain       *dom;
-};
-
-static void tango_msi_isr(struct irq_desc *desc)
-{
-       struct irq_chip *chip = irq_desc_get_chip(desc);
-       struct tango_pcie *pcie = irq_desc_get_handler_data(desc);
-       unsigned long status, base, virq, idx, pos = 0;
-
-       chained_irq_enter(chip, desc);
-       spin_lock(&pcie->used_msi_lock);
-
-       while ((pos = find_next_bit(pcie->used_msi, MSI_MAX, pos)) < MSI_MAX) {
-               base = round_down(pos, 32);
-               status = readl_relaxed(pcie->base + SMP8759_STATUS + base / 8);
-               for_each_set_bit(idx, &status, 32) {
-                       virq = irq_find_mapping(pcie->dom, base + idx);
-                       generic_handle_irq(virq);
-               }
-               pos = base + 32;
-       }
-
-       spin_unlock(&pcie->used_msi_lock);
-       chained_irq_exit(chip, desc);
-}
-
-static void tango_ack(struct irq_data *d)
-{
-       struct tango_pcie *pcie = d->chip_data;
-       u32 offset = (d->hwirq / 32) * 4;
-       u32 bit = BIT(d->hwirq % 32);
-
-       writel_relaxed(bit, pcie->base + SMP8759_STATUS + offset);
-}
-
-static void update_msi_enable(struct irq_data *d, bool unmask)
-{
-       unsigned long flags;
-       struct tango_pcie *pcie = d->chip_data;
-       u32 offset = (d->hwirq / 32) * 4;
-       u32 bit = BIT(d->hwirq % 32);
-       u32 val;
-
-       spin_lock_irqsave(&pcie->used_msi_lock, flags);
-       val = readl_relaxed(pcie->base + SMP8759_ENABLE + offset);
-       val = unmask ? val | bit : val & ~bit;
-       writel_relaxed(val, pcie->base + SMP8759_ENABLE + offset);
-       spin_unlock_irqrestore(&pcie->used_msi_lock, flags);
-}
-
-static void tango_mask(struct irq_data *d)
-{
-       update_msi_enable(d, false);
-}
-
-static void tango_unmask(struct irq_data *d)
-{
-       update_msi_enable(d, true);
-}
-
-static int tango_set_affinity(struct irq_data *d, const struct cpumask *mask,
-                             bool force)
-{
-       return -EINVAL;
-}
-
-static void tango_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
-{
-       struct tango_pcie *pcie = d->chip_data;
-       msg->address_lo = lower_32_bits(pcie->msi_doorbell);
-       msg->address_hi = upper_32_bits(pcie->msi_doorbell);
-       msg->data = d->hwirq;
-}
-
-static struct irq_chip tango_chip = {
-       .irq_ack                = tango_ack,
-       .irq_mask               = tango_mask,
-       .irq_unmask             = tango_unmask,
-       .irq_set_affinity       = tango_set_affinity,
-       .irq_compose_msi_msg    = tango_compose_msi_msg,
-};
-
-static void msi_ack(struct irq_data *d)
-{
-       irq_chip_ack_parent(d);
-}
-
-static void msi_mask(struct irq_data *d)
-{
-       pci_msi_mask_irq(d);
-       irq_chip_mask_parent(d);
-}
-
-static void msi_unmask(struct irq_data *d)
-{
-       pci_msi_unmask_irq(d);
-       irq_chip_unmask_parent(d);
-}
-
-static struct irq_chip msi_chip = {
-       .name = "MSI",
-       .irq_ack = msi_ack,
-       .irq_mask = msi_mask,
-       .irq_unmask = msi_unmask,
-};
-
-static struct msi_domain_info msi_dom_info = {
-       .flags  = MSI_FLAG_PCI_MSIX
-               | MSI_FLAG_USE_DEF_DOM_OPS
-               | MSI_FLAG_USE_DEF_CHIP_OPS,
-       .chip   = &msi_chip,
-};
-
-static int tango_irq_domain_alloc(struct irq_domain *dom, unsigned int virq,
-                                 unsigned int nr_irqs, void *args)
-{
-       struct tango_pcie *pcie = dom->host_data;
-       unsigned long flags;
-       int pos;
-
-       spin_lock_irqsave(&pcie->used_msi_lock, flags);
-       pos = find_first_zero_bit(pcie->used_msi, MSI_MAX);
-       if (pos >= MSI_MAX) {
-               spin_unlock_irqrestore(&pcie->used_msi_lock, flags);
-               return -ENOSPC;
-       }
-       __set_bit(pos, pcie->used_msi);
-       spin_unlock_irqrestore(&pcie->used_msi_lock, flags);
-       irq_domain_set_info(dom, virq, pos, &tango_chip,
-                       pcie, handle_edge_irq, NULL, NULL);
-
-       return 0;
-}
-
-static void tango_irq_domain_free(struct irq_domain *dom, unsigned int virq,
-                                 unsigned int nr_irqs)
-{
-       unsigned long flags;
-       struct irq_data *d = irq_domain_get_irq_data(dom, virq);
-       struct tango_pcie *pcie = d->chip_data;
-
-       spin_lock_irqsave(&pcie->used_msi_lock, flags);
-       __clear_bit(d->hwirq, pcie->used_msi);
-       spin_unlock_irqrestore(&pcie->used_msi_lock, flags);
-}
-
-static const struct irq_domain_ops dom_ops = {
-       .alloc  = tango_irq_domain_alloc,
-       .free   = tango_irq_domain_free,
-};
-
-static int smp8759_config_read(struct pci_bus *bus, unsigned int devfn,
-                              int where, int size, u32 *val)
-{
-       struct pci_config_window *cfg = bus->sysdata;
-       struct tango_pcie *pcie = dev_get_drvdata(cfg->parent);
-       int ret;
-
-       /* Reads in configuration space outside devfn 0 return garbage */
-       if (devfn != 0)
-               return PCIBIOS_FUNC_NOT_SUPPORTED;
-
-       /*
-        * PCI config and MMIO accesses are muxed.  Linux doesn't have a
-        * mutual exclusion mechanism for config vs. MMIO accesses, so
-        * concurrent accesses may cause corruption.
-        */
-       writel_relaxed(1, pcie->base + SMP8759_MUX);
-       ret = pci_generic_config_read(bus, devfn, where, size, val);
-       writel_relaxed(0, pcie->base + SMP8759_MUX);
-
-       return ret;
-}
-
-static int smp8759_config_write(struct pci_bus *bus, unsigned int devfn,
-                               int where, int size, u32 val)
-{
-       struct pci_config_window *cfg = bus->sysdata;
-       struct tango_pcie *pcie = dev_get_drvdata(cfg->parent);
-       int ret;
-
-       writel_relaxed(1, pcie->base + SMP8759_MUX);
-       ret = pci_generic_config_write(bus, devfn, where, size, val);
-       writel_relaxed(0, pcie->base + SMP8759_MUX);
-
-       return ret;
-}
-
-static const struct pci_ecam_ops smp8759_ecam_ops = {
-       .pci_ops        = {
-               .map_bus        = pci_ecam_map_bus,
-               .read           = smp8759_config_read,
-               .write          = smp8759_config_write,
-       }
-};
-
-static int tango_pcie_link_up(struct tango_pcie *pcie)
-{
-       void __iomem *test_out = pcie->base + SMP8759_TEST_OUT;
-       int i;
-
-       writel_relaxed(16, test_out);
-       for (i = 0; i < 10; ++i) {
-               u32 ltssm_state = readl_relaxed(test_out) >> 8;
-               if ((ltssm_state & 0x1f) == 0xf) /* L0 */
-                       return 1;
-               usleep_range(3000, 4000);
-       }
-
-       return 0;
-}
-
-static int tango_pcie_probe(struct platform_device *pdev)
-{
-       struct device *dev = &pdev->dev;
-       struct tango_pcie *pcie;
-       struct resource *res;
-       struct fwnode_handle *fwnode = of_node_to_fwnode(dev->of_node);
-       struct irq_domain *msi_dom, *irq_dom;
-       struct of_pci_range_parser parser;
-       struct of_pci_range range;
-       int virq, offset;
-
-       dev_warn(dev, "simultaneous PCI config and MMIO accesses may cause data corruption\n");
-       add_taint(TAINT_CRAP, LOCKDEP_STILL_OK);
-
-       pcie = devm_kzalloc(dev, sizeof(*pcie), GFP_KERNEL);
-       if (!pcie)
-               return -ENOMEM;
-
-       res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
-       pcie->base = devm_ioremap_resource(dev, res);
-       if (IS_ERR(pcie->base))
-               return PTR_ERR(pcie->base);
-
-       platform_set_drvdata(pdev, pcie);
-
-       if (!tango_pcie_link_up(pcie))
-               return -ENODEV;
-
-       if (of_pci_dma_range_parser_init(&parser, dev->of_node) < 0)
-               return -ENOENT;
-
-       if (of_pci_range_parser_one(&parser, &range) == NULL)
-               return -ENOENT;
-
-       range.pci_addr += range.size;
-       pcie->msi_doorbell = range.pci_addr + res->start + SMP8759_DOORBELL;
-
-       for (offset = 0; offset < MSI_MAX / 8; offset += 4)
-               writel_relaxed(0, pcie->base + SMP8759_ENABLE + offset);
-
-       virq = platform_get_irq(pdev, 1);
-       if (virq < 0)
-               return virq;
-
-       irq_dom = irq_domain_create_linear(fwnode, MSI_MAX, &dom_ops, pcie);
-       if (!irq_dom) {
-               dev_err(dev, "Failed to create IRQ domain\n");
-               return -ENOMEM;
-       }
-
-       msi_dom = pci_msi_create_irq_domain(fwnode, &msi_dom_info, irq_dom);
-       if (!msi_dom) {
-               dev_err(dev, "Failed to create MSI domain\n");
-               irq_domain_remove(irq_dom);
-               return -ENOMEM;
-       }
-
-       pcie->dom = irq_dom;
-       spin_lock_init(&pcie->used_msi_lock);
-       irq_set_chained_handler_and_data(virq, tango_msi_isr, pcie);
-
-       return pci_host_common_probe(pdev);
-}
-
-static const struct of_device_id tango_pcie_ids[] = {
-       {
-               .compatible = "sigma,smp8759-pcie",
-               .data = &smp8759_ecam_ops,
-       },
-       { },
-};
-
-static struct platform_driver tango_pcie_driver = {
-       .probe  = tango_pcie_probe,
-       .driver = {
-               .name = KBUILD_MODNAME,
-               .of_match_table = tango_pcie_ids,
-               .suppress_bind_attrs = true,
-       },
-};
-builtin_platform_driver(tango_pcie_driver);
-
-/*
- * The root complex advertises the wrong device class.
- * Header Type 1 is for PCI-to-PCI bridges.
- */
-static void tango_fixup_class(struct pci_dev *dev)
-{
-       dev->class = PCI_CLASS_BRIDGE_PCI << 8;
-}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0024, tango_fixup_class);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0028, tango_fixup_class);
-
-/*
- * The root complex exposes a "fake" BAR, which is used to filter
- * bus-to-system accesses.  Only accesses within the range defined by this
- * BAR are forwarded to the host, others are ignored.
- *
- * By default, the DMA framework expects an identity mapping, and DRAM0 is
- * mapped at 0x80000000.
- */
-static void tango_fixup_bar(struct pci_dev *dev)
-{
-       dev->non_compliant_bars = true;
-       pci_write_config_dword(dev, PCI_BASE_ADDRESS_0, 0x80000000);
-}
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0024, tango_fixup_bar);
-DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_SIGMA, 0x0028, tango_fixup_bar);
index f92e015..67937fa 100644 (file)
@@ -404,6 +404,7 @@ static int xilinx_cpm_pcie_init_irq_domain(struct xilinx_cpm_pcie_port *port)
        return 0;
 out:
        xilinx_cpm_free_irq_domains(port);
+       of_node_put(pcie_intc_node);
        dev_err(dev, "Failed to allocate IRQ domains\n");
 
        return -ENOMEM;
index 8820d0f..5f1242c 100644 (file)
@@ -12,3 +12,16 @@ config PCI_EPF_TEST
           for PCI Endpoint.
 
           If in doubt, say "N" to disable Endpoint test driver.
+
+config PCI_EPF_NTB
+       tristate "PCI Endpoint NTB driver"
+       depends on PCI_ENDPOINT
+       select CONFIGFS_FS
+       help
+         Select this configuration option to enable the Non-Transparent
+         Bridge (NTB) driver for PCI Endpoint. NTB driver implements NTB
+         controller functionality using multiple PCIe endpoint instances.
+         It can support NTB endpoint function devices created using
+         device tree.
+
+         If in doubt, say "N" to disable Endpoint NTB driver.
index d6fafff..96ab932 100644 (file)
@@ -4,3 +4,4 @@
 #
 
 obj-$(CONFIG_PCI_EPF_TEST)             += pci-epf-test.o
+obj-$(CONFIG_PCI_EPF_NTB)              += pci-epf-ntb.o
diff --git a/drivers/pci/endpoint/functions/pci-epf-ntb.c b/drivers/pci/endpoint/functions/pci-epf-ntb.c
new file mode 100644 (file)
index 0000000..338148c
--- /dev/null
@@ -0,0 +1,2128 @@
+// SPDX-License-Identifier: GPL-2.0
+/**
+ * Endpoint Function Driver to implement Non-Transparent Bridge functionality
+ *
+ * Copyright (C) 2020 Texas Instruments
+ * Author: Kishon Vijay Abraham I <kishon@ti.com>
+ */
+
+/*
+ * The PCI NTB function driver configures the SoC with multiple PCIe Endpoint
+ * (EP) controller instances (see diagram below) in such a way that
+ * transactions from one EP controller are routed to the other EP controller.
+ * Once PCI NTB function driver configures the SoC with multiple EP instances,
+ * HOST1 and HOST2 can communicate with each other using SoC as a bridge.
+ *
+ *    +-------------+                                   +-------------+
+ *    |             |                                   |             |
+ *    |    HOST1    |                                   |    HOST2    |
+ *    |             |                                   |             |
+ *    +------^------+                                   +------^------+
+ *           |                                                 |
+ *           |                                                 |
+ * +---------|-------------------------------------------------|---------+
+ * |  +------v------+                                   +------v------+  |
+ * |  |             |                                   |             |  |
+ * |  |     EP      |                                   |     EP      |  |
+ * |  | CONTROLLER1 |                                   | CONTROLLER2 |  |
+ * |  |             <----------------------------------->             |  |
+ * |  |             |                                   |             |  |
+ * |  |             |                                   |             |  |
+ * |  |             |  SoC With Multiple EP Instances   |             |  |
+ * |  |             |  (Configured using NTB Function)  |             |  |
+ * |  +-------------+                                   +-------------+  |
+ * +---------------------------------------------------------------------+
+ */
+
+#include <linux/delay.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <linux/pci-epc.h>
+#include <linux/pci-epf.h>
+
+static struct workqueue_struct *kpcintb_workqueue;
+
+#define COMMAND_CONFIGURE_DOORBELL     1
+#define COMMAND_TEARDOWN_DOORBELL      2
+#define COMMAND_CONFIGURE_MW           3
+#define COMMAND_TEARDOWN_MW            4
+#define COMMAND_LINK_UP                        5
+#define COMMAND_LINK_DOWN              6
+
+#define COMMAND_STATUS_OK              1
+#define COMMAND_STATUS_ERROR           2
+
+#define LINK_STATUS_UP                 BIT(0)
+
+#define SPAD_COUNT                     64
+#define DB_COUNT                       4
+#define NTB_MW_OFFSET                  2
+#define DB_COUNT_MASK                  GENMASK(15, 0)
+#define MSIX_ENABLE                    BIT(16)
+#define MAX_DB_COUNT                   32
+#define MAX_MW                         4
+
+enum epf_ntb_bar {
+       BAR_CONFIG,
+       BAR_PEER_SPAD,
+       BAR_DB_MW1,
+       BAR_MW2,
+       BAR_MW3,
+       BAR_MW4,
+};
+
+struct epf_ntb {
+       u32 num_mws;
+       u32 db_count;
+       u32 spad_count;
+       struct pci_epf *epf;
+       u64 mws_size[MAX_MW];
+       struct config_group group;
+       struct epf_ntb_epc *epc[2];
+};
+
+#define to_epf_ntb(epf_group) container_of((epf_group), struct epf_ntb, group)
+
+struct epf_ntb_epc {
+       u8 func_no;
+       bool linkup;
+       bool is_msix;
+       int msix_bar;
+       u32 spad_size;
+       struct pci_epc *epc;
+       struct epf_ntb *epf_ntb;
+       void __iomem *mw_addr[6];
+       size_t msix_table_offset;
+       struct epf_ntb_ctrl *reg;
+       struct pci_epf_bar *epf_bar;
+       enum pci_barno epf_ntb_bar[6];
+       struct delayed_work cmd_handler;
+       enum pci_epc_interface_type type;
+       const struct pci_epc_features *epc_features;
+};
+
+struct epf_ntb_ctrl {
+       u32     command;
+       u32     argument;
+       u16     command_status;
+       u16     link_status;
+       u32     topology;
+       u64     addr;
+       u64     size;
+       u32     num_mws;
+       u32     mw1_offset;
+       u32     spad_offset;
+       u32     spad_count;
+       u32     db_entry_size;
+       u32     db_data[MAX_DB_COUNT];
+       u32     db_offset[MAX_DB_COUNT];
+} __packed;
+
+static struct pci_epf_header epf_ntb_header = {
+       .vendorid       = PCI_ANY_ID,
+       .deviceid       = PCI_ANY_ID,
+       .baseclass_code = PCI_BASE_CLASS_MEMORY,
+       .interrupt_pin  = PCI_INTERRUPT_INTA,
+};
+
+/**
+ * epf_ntb_link_up() - Raise link_up interrupt to both the hosts
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @link_up: true or false indicating Link is UP or Down
+ *
+ * Once NTB function in HOST1 and the NTB function in HOST2 invoke
+ * ntb_link_enable(), this NTB function driver will trigger a link event to
+ * the NTB client in both the hosts.
+ */
+static int epf_ntb_link_up(struct epf_ntb *ntb, bool link_up)
+{
+       enum pci_epc_interface_type type;
+       enum pci_epc_irq_type irq_type;
+       struct epf_ntb_epc *ntb_epc;
+       struct epf_ntb_ctrl *ctrl;
+       struct pci_epc *epc;
+       bool is_msix;
+       u8 func_no;
+       int ret;
+
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+               ntb_epc = ntb->epc[type];
+               epc = ntb_epc->epc;
+               func_no = ntb_epc->func_no;
+               is_msix = ntb_epc->is_msix;
+               ctrl = ntb_epc->reg;
+               if (link_up)
+                       ctrl->link_status |= LINK_STATUS_UP;
+               else
+                       ctrl->link_status &= ~LINK_STATUS_UP;
+               irq_type = is_msix ? PCI_EPC_IRQ_MSIX : PCI_EPC_IRQ_MSI;
+               ret = pci_epc_raise_irq(epc, func_no, irq_type, 1);
+               if (ret) {
+                       dev_err(&epc->dev,
+                               "%s intf: Failed to raise Link Up IRQ\n",
+                               pci_epc_interface_string(type));
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_configure_mw() - Configure the Outbound Address Space for one host
+ *   to access the memory window of other host
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @mw: Index of the memory window (either 0, 1, 2 or 3)
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * This function performs stage (B) in the above diagram (see MW1) i.e., map OB
+ * address space of memory window to PCI address space.
+ *
+ * This operation requires 3 parameters
+ *  1) Address in the outbound address space
+ *  2) Address in the PCI Address space
+ *  3) Size of the address region to be mapped
+ *
+ * The address in the outbound address space (for MW1, MW2, MW3 and MW4) is
+ * stored in epf_bar corresponding to BAR_DB_MW1 for MW1 and BAR_MW2, BAR_MW3
+ * BAR_MW4 for rest of the BARs of epf_ntb_epc that is connected to HOST1. This
+ * is populated in epf_ntb_alloc_peer_mem() in this driver.
+ *
+ * The address and size of the PCI address region that has to be mapped would
+ * be provided by HOST2 in ctrl->addr and ctrl->size of epf_ntb_epc that is
+ * connected to HOST2.
+ *
+ * Please note Memory window1 (MW1) and Doorbell registers together will be
+ * mapped to a single BAR (BAR2) above for 32-bit BARs. The exact BAR that's
+ * used for Memory window (MW) can be obtained from epf_ntb_bar[BAR_DB_MW1],
+ * epf_ntb_bar[BAR_MW2], epf_ntb_bar[BAR_MW2], epf_ntb_bar[BAR_MW2].
+ */
+static int epf_ntb_configure_mw(struct epf_ntb *ntb,
+                               enum pci_epc_interface_type type, u32 mw)
+{
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       struct pci_epf_bar *peer_epf_bar;
+       enum pci_barno peer_barno;
+       struct epf_ntb_ctrl *ctrl;
+       phys_addr_t phys_addr;
+       struct pci_epc *epc;
+       u64 addr, size;
+       int ret = 0;
+       u8 func_no;
+
+       ntb_epc = ntb->epc[type];
+       epc = ntb_epc->epc;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_barno = peer_ntb_epc->epf_ntb_bar[mw + NTB_MW_OFFSET];
+       peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+
+       phys_addr = peer_epf_bar->phys_addr;
+       ctrl = ntb_epc->reg;
+       addr = ctrl->addr;
+       size = ctrl->size;
+       if (mw + NTB_MW_OFFSET == BAR_DB_MW1)
+               phys_addr += ctrl->mw1_offset;
+
+       if (size > ntb->mws_size[mw]) {
+               dev_err(&epc->dev,
+                       "%s intf: MW: %d Req Sz:%llxx > Supported Sz:%llx\n",
+                       pci_epc_interface_string(type), mw, size,
+                       ntb->mws_size[mw]);
+               ret = -EINVAL;
+               goto err_invalid_size;
+       }
+
+       func_no = ntb_epc->func_no;
+
+       ret = pci_epc_map_addr(epc, func_no, phys_addr, addr, size);
+       if (ret)
+               dev_err(&epc->dev,
+                       "%s intf: Failed to map memory window %d address\n",
+                       pci_epc_interface_string(type), mw);
+
+err_invalid_size:
+
+       return ret;
+}
+
+/**
+ * epf_ntb_teardown_mw() - Teardown the configured OB ATU
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @mw: Index of the memory window (either 0, 1, 2 or 3)
+ *
+ * Teardown the configured OB ATU configured in epf_ntb_configure_mw() using
+ * pci_epc_unmap_addr()
+ */
+static void epf_ntb_teardown_mw(struct epf_ntb *ntb,
+                               enum pci_epc_interface_type type, u32 mw)
+{
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       struct pci_epf_bar *peer_epf_bar;
+       enum pci_barno peer_barno;
+       struct epf_ntb_ctrl *ctrl;
+       phys_addr_t phys_addr;
+       struct pci_epc *epc;
+       u8 func_no;
+
+       ntb_epc = ntb->epc[type];
+       epc = ntb_epc->epc;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_barno = peer_ntb_epc->epf_ntb_bar[mw + NTB_MW_OFFSET];
+       peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+
+       phys_addr = peer_epf_bar->phys_addr;
+       ctrl = ntb_epc->reg;
+       if (mw + NTB_MW_OFFSET == BAR_DB_MW1)
+               phys_addr += ctrl->mw1_offset;
+       func_no = ntb_epc->func_no;
+
+       pci_epc_unmap_addr(epc, func_no, phys_addr);
+}
+
+/**
+ * epf_ntb_configure_msi() - Map OB address space to MSI address
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @db_count: Number of doorbell interrupts to map
+ *
+ *+-----------------+    +----->+----------------+-----------+-----------------+
+ *|       BAR0      |    |      |   Doorbell 1   +---+------->   MSI ADDRESS   |
+ *+-----------------+    |      +----------------+   |       +-----------------+
+ *|       BAR1      |    |      |   Doorbell 2   +---+       |                 |
+ *+-----------------+----+      +----------------+   |       |                 |
+ *|       BAR2      |           |   Doorbell 3   +---+       |                 |
+ *+-----------------+----+      +----------------+   |       |                 |
+ *|       BAR3      |    |      |   Doorbell 4   +---+       |                 |
+ *+-----------------+    |      |----------------+           |                 |
+ *|       BAR4      |    |      |                |           |                 |
+ *+-----------------+    |      |      MW1       |           |                 |
+ *|       BAR5      |    |      |                |           |                 |
+ *+-----------------+    +----->-----------------+           |                 |
+ *  EP CONTROLLER 1             |                |           |                 |
+ *                              |                |           |                 |
+ *                              +----------------+           +-----------------+
+ *                     (A)       EP CONTROLLER 2             |                 |
+ *                                 (OB SPACE)                |                 |
+ *                                                           |      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ *
+ * This function performs stage (B) in the above diagram (see Doorbell 1,
+ * Doorbell 2, Doorbell 3, Doorbell 4) i.e map OB address space corresponding to
+ * doorbell to MSI address in PCI address space.
+ *
+ * This operation requires 3 parameters
+ *  1) Address reserved for doorbell in the outbound address space
+ *  2) MSI-X address in the PCIe Address space
+ *  3) Number of MSI-X interrupts that has to be configured
+ *
+ * The address in the outbound address space (for the Doorbell) is stored in
+ * epf_bar corresponding to BAR_DB_MW1 of epf_ntb_epc that is connected to
+ * HOST1. This is populated in epf_ntb_alloc_peer_mem() in this driver along
+ * with address for MW1.
+ *
+ * pci_epc_map_msi_irq() takes the MSI address from MSI capability register
+ * and maps the OB address (obtained in epf_ntb_alloc_peer_mem()) to the MSI
+ * address.
+ *
+ * epf_ntb_configure_msi() also stores the MSI data to raise each interrupt
+ * in db_data of the peer's control region. This helps the peer to raise
+ * doorbell of the other host by writing db_data to the BAR corresponding to
+ * BAR_DB_MW1.
+ */
+static int epf_ntb_configure_msi(struct epf_ntb *ntb,
+                                enum pci_epc_interface_type type, u16 db_count)
+{
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       u32 db_entry_size, db_data, db_offset;
+       struct pci_epf_bar *peer_epf_bar;
+       struct epf_ntb_ctrl *peer_ctrl;
+       enum pci_barno peer_barno;
+       phys_addr_t phys_addr;
+       struct pci_epc *epc;
+       u8 func_no;
+       int ret, i;
+
+       ntb_epc = ntb->epc[type];
+       epc = ntb_epc->epc;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_DB_MW1];
+       peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+       peer_ctrl = peer_ntb_epc->reg;
+       db_entry_size = peer_ctrl->db_entry_size;
+
+       phys_addr = peer_epf_bar->phys_addr;
+       func_no = ntb_epc->func_no;
+
+       ret = pci_epc_map_msi_irq(epc, func_no, phys_addr, db_count,
+                                 db_entry_size, &db_data, &db_offset);
+       if (ret) {
+               dev_err(&epc->dev, "%s intf: Failed to map MSI IRQ\n",
+                       pci_epc_interface_string(type));
+               return ret;
+       }
+
+       for (i = 0; i < db_count; i++) {
+               peer_ctrl->db_data[i] = db_data | i;
+               peer_ctrl->db_offset[i] = db_offset;
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_configure_msix() - Map OB address space to MSI-X address
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @db_count: Number of doorbell interrupts to map
+ *
+ *+-----------------+    +----->+----------------+-----------+-----------------+
+ *|       BAR0      |    |      |   Doorbell 1   +-----------> MSI-X ADDRESS 1 |
+ *+-----------------+    |      +----------------+           +-----------------+
+ *|       BAR1      |    |      |   Doorbell 2   +---------+ |                 |
+ *+-----------------+----+      +----------------+         | |                 |
+ *|       BAR2      |           |   Doorbell 3   +-------+ | +-----------------+
+ *+-----------------+----+      +----------------+       | +-> MSI-X ADDRESS 2 |
+ *|       BAR3      |    |      |   Doorbell 4   +-----+ |   +-----------------+
+ *+-----------------+    |      |----------------+     | |   |                 |
+ *|       BAR4      |    |      |                |     | |   +-----------------+
+ *+-----------------+    |      |      MW1       +     | +-->+ MSI-X ADDRESS 3||
+ *|       BAR5      |    |      |                |     |     +-----------------+
+ *+-----------------+    +----->-----------------+     |     |                 |
+ *  EP CONTROLLER 1             |                |     |     +-----------------+
+ *                              |                |     +---->+ MSI-X ADDRESS 4 |
+ *                              +----------------+           +-----------------+
+ *                     (A)       EP CONTROLLER 2             |                 |
+ *                                 (OB SPACE)                |                 |
+ *                                                           |      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * This function performs stage (B) in the above diagram (see Doorbell 1,
+ * Doorbell 2, Doorbell 3, Doorbell 4) i.e map OB address space corresponding to
+ * doorbell to MSI-X address in PCI address space.
+ *
+ * This operation requires 3 parameters
+ *  1) Address reserved for doorbell in the outbound address space
+ *  2) MSI-X address in the PCIe Address space
+ *  3) Number of MSI-X interrupts that has to be configured
+ *
+ * The address in the outbound address space (for the Doorbell) is stored in
+ * epf_bar corresponding to BAR_DB_MW1 of epf_ntb_epc that is connected to
+ * HOST1. This is populated in epf_ntb_alloc_peer_mem() in this driver along
+ * with address for MW1.
+ *
+ * The MSI-X address is in the MSI-X table of EP CONTROLLER 2 and
+ * the count of doorbell is in ctrl->argument of epf_ntb_epc that is connected
+ * to HOST2. MSI-X table is stored memory mapped to ntb_epc->msix_bar and the
+ * offset is in ntb_epc->msix_table_offset. From this epf_ntb_configure_msix()
+ * gets the MSI-X address and data.
+ *
+ * epf_ntb_configure_msix() also stores the MSI-X data to raise each interrupt
+ * in db_data of the peer's control region. This helps the peer to raise
+ * doorbell of the other host by writing db_data to the BAR corresponding to
+ * BAR_DB_MW1.
+ */
+static int epf_ntb_configure_msix(struct epf_ntb *ntb,
+                                 enum pci_epc_interface_type type,
+                                 u16 db_count)
+{
+       const struct pci_epc_features *epc_features;
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       struct pci_epf_bar *peer_epf_bar, *epf_bar;
+       struct pci_epf_msix_tbl *msix_tbl;
+       struct epf_ntb_ctrl *peer_ctrl;
+       u32 db_entry_size, msg_data;
+       enum pci_barno peer_barno;
+       phys_addr_t phys_addr;
+       struct pci_epc *epc;
+       size_t align;
+       u64 msg_addr;
+       u8 func_no;
+       int ret, i;
+
+       ntb_epc = ntb->epc[type];
+       epc = ntb_epc->epc;
+
+       epf_bar = &ntb_epc->epf_bar[ntb_epc->msix_bar];
+       msix_tbl = epf_bar->addr + ntb_epc->msix_table_offset;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_DB_MW1];
+       peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+       phys_addr = peer_epf_bar->phys_addr;
+       peer_ctrl = peer_ntb_epc->reg;
+       epc_features = ntb_epc->epc_features;
+       align = epc_features->align;
+
+       func_no = ntb_epc->func_no;
+       db_entry_size = peer_ctrl->db_entry_size;
+
+       for (i = 0; i < db_count; i++) {
+               msg_addr = ALIGN_DOWN(msix_tbl[i].msg_addr, align);
+               msg_data = msix_tbl[i].msg_data;
+               ret = pci_epc_map_addr(epc, func_no, phys_addr, msg_addr,
+                                      db_entry_size);
+               if (ret) {
+                       dev_err(&epc->dev,
+                               "%s intf: Failed to configure MSI-X IRQ\n",
+                               pci_epc_interface_string(type));
+                       return ret;
+               }
+               phys_addr = phys_addr + db_entry_size;
+               peer_ctrl->db_data[i] = msg_data;
+               peer_ctrl->db_offset[i] = msix_tbl[i].msg_addr & (align - 1);
+       }
+       ntb_epc->is_msix = true;
+
+       return 0;
+}
+
+/**
+ * epf_ntb_configure_db() - Configure the Outbound Address Space for one host
+ *   to ring the doorbell of other host
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ * @db_count: Count of the number of doorbells that has to be configured
+ * @msix: Indicates whether MSI-X or MSI should be used
+ *
+ * Invokes epf_ntb_configure_msix() or epf_ntb_configure_msi() required for
+ * one HOST to ring the doorbell of other HOST.
+ */
+static int epf_ntb_configure_db(struct epf_ntb *ntb,
+                               enum pci_epc_interface_type type,
+                               u16 db_count, bool msix)
+{
+       struct epf_ntb_epc *ntb_epc;
+       struct pci_epc *epc;
+       int ret;
+
+       if (db_count > MAX_DB_COUNT)
+               return -EINVAL;
+
+       ntb_epc = ntb->epc[type];
+       epc = ntb_epc->epc;
+
+       if (msix)
+               ret = epf_ntb_configure_msix(ntb, type, db_count);
+       else
+               ret = epf_ntb_configure_msi(ntb, type, db_count);
+
+       if (ret)
+               dev_err(&epc->dev, "%s intf: Failed to configure DB\n",
+                       pci_epc_interface_string(type));
+
+       return ret;
+}
+
+/**
+ * epf_ntb_teardown_db() - Unmap address in OB address space to MSI/MSI-X
+ *   address
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Invoke pci_epc_unmap_addr() to unmap OB address to MSI/MSI-X address.
+ */
+static void
+epf_ntb_teardown_db(struct epf_ntb *ntb, enum pci_epc_interface_type type)
+{
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       struct pci_epf_bar *peer_epf_bar;
+       enum pci_barno peer_barno;
+       phys_addr_t phys_addr;
+       struct pci_epc *epc;
+       u8 func_no;
+
+       ntb_epc = ntb->epc[type];
+       epc = ntb_epc->epc;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_DB_MW1];
+       peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+       phys_addr = peer_epf_bar->phys_addr;
+       func_no = ntb_epc->func_no;
+
+       pci_epc_unmap_addr(epc, func_no, phys_addr);
+}
+
+/**
+ * epf_ntb_cmd_handler() - Handle commands provided by the NTB Host
+ * @work: work_struct for the two epf_ntb_epc (PRIMARY and SECONDARY)
+ *
+ * Workqueue function that gets invoked for the two epf_ntb_epc
+ * periodically (once every 5ms) to see if it has received any commands
+ * from NTB host. The host can send commands to configure doorbell or
+ * configure memory window or to update link status.
+ */
+static void epf_ntb_cmd_handler(struct work_struct *work)
+{
+       enum pci_epc_interface_type type;
+       struct epf_ntb_epc *ntb_epc;
+       struct epf_ntb_ctrl *ctrl;
+       u32 command, argument;
+       struct epf_ntb *ntb;
+       struct device *dev;
+       u16 db_count;
+       bool is_msix;
+       int ret;
+
+       ntb_epc = container_of(work, struct epf_ntb_epc, cmd_handler.work);
+       ctrl = ntb_epc->reg;
+       command = ctrl->command;
+       if (!command)
+               goto reset_handler;
+       argument = ctrl->argument;
+
+       ctrl->command = 0;
+       ctrl->argument = 0;
+
+       ctrl = ntb_epc->reg;
+       type = ntb_epc->type;
+       ntb = ntb_epc->epf_ntb;
+       dev = &ntb->epf->dev;
+
+       switch (command) {
+       case COMMAND_CONFIGURE_DOORBELL:
+               db_count = argument & DB_COUNT_MASK;
+               is_msix = argument & MSIX_ENABLE;
+               ret = epf_ntb_configure_db(ntb, type, db_count, is_msix);
+               if (ret < 0)
+                       ctrl->command_status = COMMAND_STATUS_ERROR;
+               else
+                       ctrl->command_status = COMMAND_STATUS_OK;
+               break;
+       case COMMAND_TEARDOWN_DOORBELL:
+               epf_ntb_teardown_db(ntb, type);
+               ctrl->command_status = COMMAND_STATUS_OK;
+               break;
+       case COMMAND_CONFIGURE_MW:
+               ret = epf_ntb_configure_mw(ntb, type, argument);
+               if (ret < 0)
+                       ctrl->command_status = COMMAND_STATUS_ERROR;
+               else
+                       ctrl->command_status = COMMAND_STATUS_OK;
+               break;
+       case COMMAND_TEARDOWN_MW:
+               epf_ntb_teardown_mw(ntb, type, argument);
+               ctrl->command_status = COMMAND_STATUS_OK;
+               break;
+       case COMMAND_LINK_UP:
+               ntb_epc->linkup = true;
+               if (ntb->epc[PRIMARY_INTERFACE]->linkup &&
+                   ntb->epc[SECONDARY_INTERFACE]->linkup) {
+                       ret = epf_ntb_link_up(ntb, true);
+                       if (ret < 0)
+                               ctrl->command_status = COMMAND_STATUS_ERROR;
+                       else
+                               ctrl->command_status = COMMAND_STATUS_OK;
+                       goto reset_handler;
+               }
+               ctrl->command_status = COMMAND_STATUS_OK;
+               break;
+       case COMMAND_LINK_DOWN:
+               ntb_epc->linkup = false;
+               ret = epf_ntb_link_up(ntb, false);
+               if (ret < 0)
+                       ctrl->command_status = COMMAND_STATUS_ERROR;
+               else
+                       ctrl->command_status = COMMAND_STATUS_OK;
+               break;
+       default:
+               dev_err(dev, "%s intf UNKNOWN command: %d\n",
+                       pci_epc_interface_string(type), command);
+               break;
+       }
+
+reset_handler:
+       queue_delayed_work(kpcintb_workqueue, &ntb_epc->cmd_handler,
+                          msecs_to_jiffies(5));
+}
+
+/**
+ * epf_ntb_peer_spad_bar_clear() - Clear Peer Scratchpad BAR
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ *+-----------------+------->+------------------+        +-----------------+
+ *|       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ *+-----------------+----+   +------------------+<-------+-----------------+
+ *|       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ *+-----------------+    +-->+------------------+<-------+-----------------+
+ *|       BAR2      |            Local Memory            |       BAR2      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR3      |                                    |       BAR3      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR4      |                                    |       BAR4      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR5      |                                    |       BAR5      |
+ *+-----------------+                                    +-----------------+
+ *  EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Clear BAR1 of EP CONTROLLER 2 which contains the HOST2's peer scratchpad
+ * region. While BAR1 is the default peer scratchpad BAR, an NTB could have
+ * other BARs for peer scratchpad (because of 64-bit BARs or reserved BARs).
+ * This function can get the exact BAR used for peer scratchpad from
+ * epf_ntb_bar[BAR_PEER_SPAD].
+ *
+ * Since HOST2's peer scratchpad is also HOST1's self scratchpad, this function
+ * gets the address of peer scratchpad from
+ * peer_ntb_epc->epf_ntb_bar[BAR_CONFIG].
+ */
+static void epf_ntb_peer_spad_bar_clear(struct epf_ntb_epc *ntb_epc)
+{
+       struct pci_epf_bar *epf_bar;
+       enum pci_barno barno;
+       struct pci_epc *epc;
+       u8 func_no;
+
+       epc = ntb_epc->epc;
+       func_no = ntb_epc->func_no;
+       barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
+       epf_bar = &ntb_epc->epf_bar[barno];
+       pci_epc_clear_bar(epc, func_no, epf_bar);
+}
+
+/**
+ * epf_ntb_peer_spad_bar_set() - Set peer scratchpad BAR
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ *+-----------------+------->+------------------+        +-----------------+
+ *|       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ *+-----------------+----+   +------------------+<-------+-----------------+
+ *|       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ *+-----------------+    +-->+------------------+<-------+-----------------+
+ *|       BAR2      |            Local Memory            |       BAR2      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR3      |                                    |       BAR3      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR4      |                                    |       BAR4      |
+ *+-----------------+                                    +-----------------+
+ *|       BAR5      |                                    |       BAR5      |
+ *+-----------------+                                    +-----------------+
+ *  EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Set BAR1 of EP CONTROLLER 2 which contains the HOST2's peer scratchpad
+ * region. While BAR1 is the default peer scratchpad BAR, an NTB could have
+ * other BARs for peer scratchpad (because of 64-bit BARs or reserved BARs).
+ * This function can get the exact BAR used for peer scratchpad from
+ * epf_ntb_bar[BAR_PEER_SPAD].
+ *
+ * Since HOST2's peer scratchpad is also HOST1's self scratchpad, this function
+ * gets the address of peer scratchpad from
+ * peer_ntb_epc->epf_ntb_bar[BAR_CONFIG].
+ */
+static int epf_ntb_peer_spad_bar_set(struct epf_ntb *ntb,
+                                    enum pci_epc_interface_type type)
+{
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       struct pci_epf_bar *peer_epf_bar, *epf_bar;
+       enum pci_barno peer_barno, barno;
+       u32 peer_spad_offset;
+       struct pci_epc *epc;
+       struct device *dev;
+       u8 func_no;
+       int ret;
+
+       dev = &ntb->epf->dev;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_barno = peer_ntb_epc->epf_ntb_bar[BAR_CONFIG];
+       peer_epf_bar = &peer_ntb_epc->epf_bar[peer_barno];
+
+       ntb_epc = ntb->epc[type];
+       barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
+       epf_bar = &ntb_epc->epf_bar[barno];
+       func_no = ntb_epc->func_no;
+       epc = ntb_epc->epc;
+
+       peer_spad_offset = peer_ntb_epc->reg->spad_offset;
+       epf_bar->phys_addr = peer_epf_bar->phys_addr + peer_spad_offset;
+       epf_bar->size = peer_ntb_epc->spad_size;
+       epf_bar->barno = barno;
+       epf_bar->flags = PCI_BASE_ADDRESS_MEM_TYPE_32;
+
+       ret = pci_epc_set_bar(epc, func_no, epf_bar);
+       if (ret) {
+               dev_err(dev, "%s intf: peer SPAD BAR set failed\n",
+                       pci_epc_interface_string(type));
+               return ret;
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_config_sspad_bar_clear() - Clear Config + Self scratchpad BAR
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Clear BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
+ * self scratchpad region (removes inbound ATU configuration). While BAR0 is
+ * the default self scratchpad BAR, an NTB could have other BARs for self
+ * scratchpad (because of reserved BARs). This function can get the exact BAR
+ * used for self scratchpad from epf_ntb_bar[BAR_CONFIG].
+ *
+ * Please note the self scratchpad region and config region is combined to
+ * a single region and mapped using the same BAR. Also note HOST2's peer
+ * scratchpad is HOST1's self scratchpad.
+ */
+static void epf_ntb_config_sspad_bar_clear(struct epf_ntb_epc *ntb_epc)
+{
+       struct pci_epf_bar *epf_bar;
+       enum pci_barno barno;
+       struct pci_epc *epc;
+       u8 func_no;
+
+       epc = ntb_epc->epc;
+       func_no = ntb_epc->func_no;
+       barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+       epf_bar = &ntb_epc->epf_bar[barno];
+       pci_epc_clear_bar(epc, func_no, epf_bar);
+}
+
+/**
+ * epf_ntb_config_sspad_bar_set() - Set Config + Self scratchpad BAR
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Map BAR0 of EP CONTROLLER 1 which contains the HOST1's config and
+ * self scratchpad region. While BAR0 is the default self scratchpad BAR, an
+ * NTB could have other BARs for self scratchpad (because of reserved BARs).
+ * This function can get the exact BAR used for self scratchpad from
+ * epf_ntb_bar[BAR_CONFIG].
+ *
+ * Please note the self scratchpad region and config region is combined to
+ * a single region and mapped using the same BAR. Also note HOST2's peer
+ * scratchpad is HOST1's self scratchpad.
+ */
+static int epf_ntb_config_sspad_bar_set(struct epf_ntb_epc *ntb_epc)
+{
+       struct pci_epf_bar *epf_bar;
+       enum pci_barno barno;
+       struct epf_ntb *ntb;
+       struct pci_epc *epc;
+       struct device *dev;
+       u8 func_no;
+       int ret;
+
+       ntb = ntb_epc->epf_ntb;
+       dev = &ntb->epf->dev;
+
+       epc = ntb_epc->epc;
+       func_no = ntb_epc->func_no;
+       barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+       epf_bar = &ntb_epc->epf_bar[barno];
+
+       ret = pci_epc_set_bar(epc, func_no, epf_bar);
+       if (ret) {
+               dev_err(dev, "%s inft: Config/Status/SPAD BAR set failed\n",
+                       pci_epc_interface_string(ntb_epc->type));
+               return ret;
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_config_spad_bar_free() - Free the physical memory associated with
+ *   config + scratchpad region
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Free the Local Memory mentioned in the above diagram. After invoking this
+ * function, any of config + self scratchpad region of HOST1 or peer scratchpad
+ * region of HOST2 should not be accessed.
+ */
+static void epf_ntb_config_spad_bar_free(struct epf_ntb *ntb)
+{
+       enum pci_epc_interface_type type;
+       struct epf_ntb_epc *ntb_epc;
+       enum pci_barno barno;
+       struct pci_epf *epf;
+
+       epf = ntb->epf;
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+               ntb_epc = ntb->epc[type];
+               barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+               if (ntb_epc->reg)
+                       pci_epf_free_space(epf, ntb_epc->reg, barno, type);
+       }
+}
+
+/**
+ * epf_ntb_config_spad_bar_alloc() - Allocate memory for config + scratchpad
+ *   region
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * +-----------------+------->+------------------+        +-----------------+
+ * |       BAR0      |        |  CONFIG REGION   |        |       BAR0      |
+ * +-----------------+----+   +------------------+<-------+-----------------+
+ * |       BAR1      |    |   |SCRATCHPAD REGION |        |       BAR1      |
+ * +-----------------+    +-->+------------------+<-------+-----------------+
+ * |       BAR2      |            Local Memory            |       BAR2      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR3      |                                    |       BAR3      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR4      |                                    |       BAR4      |
+ * +-----------------+                                    +-----------------+
+ * |       BAR5      |                                    |       BAR5      |
+ * +-----------------+                                    +-----------------+
+ *   EP CONTROLLER 1                                        EP CONTROLLER 2
+ *
+ * Allocate the Local Memory mentioned in the above diagram. The size of
+ * CONFIG REGION is sizeof(struct epf_ntb_ctrl) and size of SCRATCHPAD REGION
+ * is obtained from "spad-count" configfs entry.
+ *
+ * The size of both config region and scratchpad region has to be aligned,
+ * since the scratchpad region will also be mapped as PEER SCRATCHPAD of
+ * other host using a separate BAR.
+ */
+static int epf_ntb_config_spad_bar_alloc(struct epf_ntb *ntb,
+                                        enum pci_epc_interface_type type)
+{
+       const struct pci_epc_features *peer_epc_features, *epc_features;
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       size_t msix_table_size, pba_size, align;
+       enum pci_barno peer_barno, barno;
+       struct epf_ntb_ctrl *ctrl;
+       u32 spad_size, ctrl_size;
+       u64 size, peer_size;
+       struct pci_epf *epf;
+       struct device *dev;
+       bool msix_capable;
+       u32 spad_count;
+       void *base;
+
+       epf = ntb->epf;
+       dev = &epf->dev;
+       ntb_epc = ntb->epc[type];
+
+       epc_features = ntb_epc->epc_features;
+       barno = ntb_epc->epf_ntb_bar[BAR_CONFIG];
+       size = epc_features->bar_fixed_size[barno];
+       align = epc_features->align;
+
+       peer_ntb_epc = ntb->epc[!type];
+       peer_epc_features = peer_ntb_epc->epc_features;
+       peer_barno = ntb_epc->epf_ntb_bar[BAR_PEER_SPAD];
+       peer_size = peer_epc_features->bar_fixed_size[peer_barno];
+
+       /* Check if epc_features is populated incorrectly */
+       if ((!IS_ALIGNED(size, align)))
+               return -EINVAL;
+
+       spad_count = ntb->spad_count;
+
+       ctrl_size = sizeof(struct epf_ntb_ctrl);
+       spad_size = spad_count * 4;
+
+       msix_capable = epc_features->msix_capable;
+       if (msix_capable) {
+               msix_table_size = PCI_MSIX_ENTRY_SIZE * ntb->db_count;
+               ctrl_size = ALIGN(ctrl_size, 8);
+               ntb_epc->msix_table_offset = ctrl_size;
+               ntb_epc->msix_bar = barno;
+               /* Align to QWORD or 8 Bytes */
+               pba_size = ALIGN(DIV_ROUND_UP(ntb->db_count, 8), 8);
+               ctrl_size = ctrl_size + msix_table_size + pba_size;
+       }
+
+       if (!align) {
+               ctrl_size = roundup_pow_of_two(ctrl_size);
+               spad_size = roundup_pow_of_two(spad_size);
+       } else {
+               ctrl_size = ALIGN(ctrl_size, align);
+               spad_size = ALIGN(spad_size, align);
+       }
+
+       if (peer_size) {
+               if (peer_size < spad_size)
+                       spad_count = peer_size / 4;
+               spad_size = peer_size;
+       }
+
+       /*
+        * In order to make sure SPAD offset is aligned to its size,
+        * expand control region size to the size of SPAD if SPAD size
+        * is greater than control region size.
+        */
+       if (spad_size > ctrl_size)
+               ctrl_size = spad_size;
+
+       if (!size)
+               size = ctrl_size + spad_size;
+       else if (size < ctrl_size + spad_size)
+               return -EINVAL;
+
+       base = pci_epf_alloc_space(epf, size, barno, align, type);
+       if (!base) {
+               dev_err(dev, "%s intf: Config/Status/SPAD alloc region fail\n",
+                       pci_epc_interface_string(type));
+               return -ENOMEM;
+       }
+
+       ntb_epc->reg = base;
+
+       ctrl = ntb_epc->reg;
+       ctrl->spad_offset = ctrl_size;
+       ctrl->spad_count = spad_count;
+       ctrl->num_mws = ntb->num_mws;
+       ctrl->db_entry_size = align ? align : 4;
+       ntb_epc->spad_size = spad_size;
+
+       return 0;
+}
+
+/**
+ * epf_ntb_config_spad_bar_alloc_interface() - Allocate memory for config +
+ *   scratchpad region for each of PRIMARY and SECONDARY interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper for epf_ntb_config_spad_bar_alloc() which allocates memory for
+ * config + scratchpad region for a specific interface
+ */
+static int epf_ntb_config_spad_bar_alloc_interface(struct epf_ntb *ntb)
+{
+       enum pci_epc_interface_type type;
+       struct device *dev;
+       int ret;
+
+       dev = &ntb->epf->dev;
+
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+               ret = epf_ntb_config_spad_bar_alloc(ntb, type);
+               if (ret) {
+                       dev_err(dev, "%s intf: Config/SPAD BAR alloc failed\n",
+                               pci_epc_interface_string(type));
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_free_peer_mem() - Free memory allocated in peers outbound address
+ *   space
+ * @ntb_epc: EPC associated with one of the HOST which holds peers outbound
+ *   address regions
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * Free memory allocated in EP CONTROLLER 2 (OB SPACE) in the above diagram.
+ * It'll free Doorbell 1, Doorbell 2, Doorbell 3, Doorbell 4, MW1 (and MW2, MW3,
+ * MW4).
+ */
+static void epf_ntb_free_peer_mem(struct epf_ntb_epc *ntb_epc)
+{
+       struct pci_epf_bar *epf_bar;
+       void __iomem *mw_addr;
+       phys_addr_t phys_addr;
+       enum epf_ntb_bar bar;
+       enum pci_barno barno;
+       struct pci_epc *epc;
+       size_t size;
+
+       epc = ntb_epc->epc;
+
+       for (bar = BAR_DB_MW1; bar < BAR_MW4; bar++) {
+               barno = ntb_epc->epf_ntb_bar[bar];
+               mw_addr = ntb_epc->mw_addr[barno];
+               epf_bar = &ntb_epc->epf_bar[barno];
+               phys_addr = epf_bar->phys_addr;
+               size = epf_bar->size;
+               if (mw_addr) {
+                       pci_epc_mem_free_addr(epc, phys_addr, mw_addr, size);
+                       ntb_epc->mw_addr[barno] = NULL;
+               }
+       }
+}
+
+/**
+ * epf_ntb_db_mw_bar_clear() - Clear doorbell and memory BAR
+ * @ntb_epc: EPC associated with one of the HOST which holds peer's outbound
+ *   address
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * Clear doorbell and memory BARs (remove inbound ATU configuration). In the above
+ * diagram it clears BAR2 TO BAR5 of EP CONTROLLER 1 (Doorbell BAR, MW1 BAR, MW2
+ * BAR, MW3 BAR and MW4 BAR).
+ */
+static void epf_ntb_db_mw_bar_clear(struct epf_ntb_epc *ntb_epc)
+{
+       struct pci_epf_bar *epf_bar;
+       enum epf_ntb_bar bar;
+       enum pci_barno barno;
+       struct pci_epc *epc;
+       u8 func_no;
+
+       epc = ntb_epc->epc;
+
+       func_no = ntb_epc->func_no;
+
+       for (bar = BAR_DB_MW1; bar < BAR_MW4; bar++) {
+               barno = ntb_epc->epf_ntb_bar[bar];
+               epf_bar = &ntb_epc->epf_bar[barno];
+               pci_epc_clear_bar(epc, func_no, epf_bar);
+       }
+}
+
+/**
+ * epf_ntb_db_mw_bar_cleanup() - Clear doorbell/memory BAR and free memory
+ *   allocated in peers outbound address space
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper for epf_ntb_db_mw_bar_clear() to clear HOST1's BAR and
+ * epf_ntb_free_peer_mem() which frees up HOST2 outbound memory.
+ */
+static void epf_ntb_db_mw_bar_cleanup(struct epf_ntb *ntb,
+                                     enum pci_epc_interface_type type)
+{
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+
+       ntb_epc = ntb->epc[type];
+       peer_ntb_epc = ntb->epc[!type];
+
+       epf_ntb_db_mw_bar_clear(ntb_epc);
+       epf_ntb_free_peer_mem(peer_ntb_epc);
+}
+
+/**
+ * epf_ntb_configure_interrupt() - Configure MSI/MSI-X capaiblity
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Configure MSI/MSI-X capability for each interface with number of
+ * interrupts equal to "db_count" configfs entry.
+ */
+static int epf_ntb_configure_interrupt(struct epf_ntb *ntb,
+                                      enum pci_epc_interface_type type)
+{
+       const struct pci_epc_features *epc_features;
+       bool msix_capable, msi_capable;
+       struct epf_ntb_epc *ntb_epc;
+       struct pci_epc *epc;
+       struct device *dev;
+       u32 db_count;
+       u8 func_no;
+       int ret;
+
+       ntb_epc = ntb->epc[type];
+       dev = &ntb->epf->dev;
+
+       epc_features = ntb_epc->epc_features;
+       msix_capable = epc_features->msix_capable;
+       msi_capable = epc_features->msi_capable;
+
+       if (!(msix_capable || msi_capable)) {
+               dev_err(dev, "MSI or MSI-X is required for doorbell\n");
+               return -EINVAL;
+       }
+
+       func_no = ntb_epc->func_no;
+
+       db_count = ntb->db_count;
+       if (db_count > MAX_DB_COUNT) {
+               dev_err(dev, "DB count cannot be more than %d\n", MAX_DB_COUNT);
+               return -EINVAL;
+       }
+
+       ntb->db_count = db_count;
+       epc = ntb_epc->epc;
+
+       if (msi_capable) {
+               ret = pci_epc_set_msi(epc, func_no, db_count);
+               if (ret) {
+                       dev_err(dev, "%s intf: MSI configuration failed\n",
+                               pci_epc_interface_string(type));
+                       return ret;
+               }
+       }
+
+       if (msix_capable) {
+               ret = pci_epc_set_msix(epc, func_no, db_count,
+                                      ntb_epc->msix_bar,
+                                      ntb_epc->msix_table_offset);
+               if (ret) {
+                       dev_err(dev, "MSI configuration failed\n");
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_alloc_peer_mem() - Allocate memory in peer's outbound address space
+ * @ntb_epc: EPC associated with one of the HOST whose BAR holds peer's outbound
+ *   address
+ * @bar: BAR of @ntb_epc in for which memory has to be allocated (could be
+ *   BAR_DB_MW1, BAR_MW2, BAR_MW3, BAR_MW4)
+ * @peer_ntb_epc: EPC associated with HOST whose outbound address space is
+ *   used by @ntb_epc
+ * @size: Size of the address region that has to be allocated in peers OB SPACE
+ *
+ *
+ * +-----------------+    +---->+----------------+-----------+-----------------+
+ * |       BAR0      |    |     |   Doorbell 1   +-----------> MSI|X ADDRESS 1 |
+ * +-----------------+    |     +----------------+           +-----------------+
+ * |       BAR1      |    |     |   Doorbell 2   +---------+ |                 |
+ * +-----------------+----+     +----------------+         | |                 |
+ * |       BAR2      |          |   Doorbell 3   +-------+ | +-----------------+
+ * +-----------------+----+     +----------------+       | +-> MSI|X ADDRESS 2 |
+ * |       BAR3      |    |     |   Doorbell 4   +-----+ |   +-----------------+
+ * +-----------------+    |     |----------------+     | |   |                 |
+ * |       BAR4      |    |     |                |     | |   +-----------------+
+ * +-----------------+    |     |      MW1       +---+ | +-->+ MSI|X ADDRESS 3||
+ * |       BAR5      |    |     |                |   | |     +-----------------+
+ * +-----------------+    +---->-----------------+   | |     |                 |
+ *   EP CONTROLLER 1            |                |   | |     +-----------------+
+ *                              |                |   | +---->+ MSI|X ADDRESS 4 |
+ *                              +----------------+   |       +-----------------+
+ *                      (A)      EP CONTROLLER 2     |       |                 |
+ *                                 (OB SPACE)        |       |                 |
+ *                                                   +------->      MW1        |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                   (B)     +-----------------+
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           |                 |
+ *                                                           +-----------------+
+ *                                                           PCI Address Space
+ *                                                           (Managed by HOST2)
+ *
+ * Allocate memory in OB space of EP CONTROLLER 2 in the above diagram. Allocate
+ * for Doorbell 1, Doorbell 2, Doorbell 3, Doorbell 4, MW1 (and MW2, MW3, MW4).
+ */
+static int epf_ntb_alloc_peer_mem(struct device *dev,
+                                 struct epf_ntb_epc *ntb_epc,
+                                 enum epf_ntb_bar bar,
+                                 struct epf_ntb_epc *peer_ntb_epc,
+                                 size_t size)
+{
+       const struct pci_epc_features *epc_features;
+       struct pci_epf_bar *epf_bar;
+       struct pci_epc *peer_epc;
+       phys_addr_t phys_addr;
+       void __iomem *mw_addr;
+       enum pci_barno barno;
+       size_t align;
+
+       epc_features = ntb_epc->epc_features;
+       align = epc_features->align;
+
+       if (size < 128)
+               size = 128;
+
+       if (align)
+               size = ALIGN(size, align);
+       else
+               size = roundup_pow_of_two(size);
+
+       peer_epc = peer_ntb_epc->epc;
+       mw_addr = pci_epc_mem_alloc_addr(peer_epc, &phys_addr, size);
+       if (!mw_addr) {
+               dev_err(dev, "%s intf: Failed to allocate OB address\n",
+                       pci_epc_interface_string(peer_ntb_epc->type));
+               return -ENOMEM;
+       }
+
+       barno = ntb_epc->epf_ntb_bar[bar];
+       epf_bar = &ntb_epc->epf_bar[barno];
+       ntb_epc->mw_addr[barno] = mw_addr;
+
+       epf_bar->phys_addr = phys_addr;
+       epf_bar->size = size;
+       epf_bar->barno = barno;
+       epf_bar->flags = PCI_BASE_ADDRESS_MEM_TYPE_32;
+
+       return 0;
+}
+
+/**
+ * epf_ntb_db_mw_bar_init() - Configure Doorbell and Memory window BARs
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper for epf_ntb_alloc_peer_mem() and pci_epc_set_bar() that allocates
+ * memory in OB address space of HOST2 and configures BAR of HOST1
+ */
+static int epf_ntb_db_mw_bar_init(struct epf_ntb *ntb,
+                                 enum pci_epc_interface_type type)
+{
+       const struct pci_epc_features *epc_features;
+       struct epf_ntb_epc *peer_ntb_epc, *ntb_epc;
+       struct pci_epf_bar *epf_bar;
+       struct epf_ntb_ctrl *ctrl;
+       u32 num_mws, db_count;
+       enum epf_ntb_bar bar;
+       enum pci_barno barno;
+       struct pci_epc *epc;
+       struct device *dev;
+       size_t align;
+       int ret, i;
+       u8 func_no;
+       u64 size;
+
+       ntb_epc = ntb->epc[type];
+       peer_ntb_epc = ntb->epc[!type];
+
+       dev = &ntb->epf->dev;
+       epc_features = ntb_epc->epc_features;
+       align = epc_features->align;
+       func_no = ntb_epc->func_no;
+       epc = ntb_epc->epc;
+       num_mws = ntb->num_mws;
+       db_count = ntb->db_count;
+
+       for (bar = BAR_DB_MW1, i = 0; i < num_mws; bar++, i++) {
+               if (bar == BAR_DB_MW1) {
+                       align = align ? align : 4;
+                       size = db_count * align;
+                       size = ALIGN(size, ntb->mws_size[i]);
+                       ctrl = ntb_epc->reg;
+                       ctrl->mw1_offset = size;
+                       size += ntb->mws_size[i];
+               } else {
+                       size = ntb->mws_size[i];
+               }
+
+               ret = epf_ntb_alloc_peer_mem(dev, ntb_epc, bar,
+                                            peer_ntb_epc, size);
+               if (ret) {
+                       dev_err(dev, "%s intf: DoorBell mem alloc failed\n",
+                               pci_epc_interface_string(type));
+                       goto err_alloc_peer_mem;
+               }
+
+               barno = ntb_epc->epf_ntb_bar[bar];
+               epf_bar = &ntb_epc->epf_bar[barno];
+
+               ret = pci_epc_set_bar(epc, func_no, epf_bar);
+               if (ret) {
+                       dev_err(dev, "%s intf: DoorBell BAR set failed\n",
+                               pci_epc_interface_string(type));
+                       goto err_alloc_peer_mem;
+               }
+       }
+
+       return 0;
+
+err_alloc_peer_mem:
+       epf_ntb_db_mw_bar_cleanup(ntb, type);
+
+       return ret;
+}
+
+/**
+ * epf_ntb_epc_destroy_interface() - Cleanup NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Unbind NTB function device from EPC and relinquish reference to pci_epc
+ * for each of the interface.
+ */
+static void epf_ntb_epc_destroy_interface(struct epf_ntb *ntb,
+                                         enum pci_epc_interface_type type)
+{
+       struct epf_ntb_epc *ntb_epc;
+       struct pci_epc *epc;
+       struct pci_epf *epf;
+
+       if (type < 0)
+               return;
+
+       epf = ntb->epf;
+       ntb_epc = ntb->epc[type];
+       if (!ntb_epc)
+               return;
+       epc = ntb_epc->epc;
+       pci_epc_remove_epf(epc, epf, type);
+       pci_epc_put(epc);
+}
+
+/**
+ * epf_ntb_epc_destroy() - Cleanup NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper for epf_ntb_epc_destroy_interface() to cleanup all the NTB interfaces
+ */
+static void epf_ntb_epc_destroy(struct epf_ntb *ntb)
+{
+       enum pci_epc_interface_type type;
+
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++)
+               epf_ntb_epc_destroy_interface(ntb, type);
+}
+
+/**
+ * epf_ntb_epc_create_interface() - Create and initialize NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @epc: struct pci_epc to which a particular NTB interface should be associated
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Allocate memory for NTB EPC interface and initialize it.
+ */
+static int epf_ntb_epc_create_interface(struct epf_ntb *ntb,
+                                       struct pci_epc *epc,
+                                       enum pci_epc_interface_type type)
+{
+       const struct pci_epc_features *epc_features;
+       struct pci_epf_bar *epf_bar;
+       struct epf_ntb_epc *ntb_epc;
+       struct pci_epf *epf;
+       struct device *dev;
+       u8 func_no;
+
+       dev = &ntb->epf->dev;
+
+       ntb_epc = devm_kzalloc(dev, sizeof(*ntb_epc), GFP_KERNEL);
+       if (!ntb_epc)
+               return -ENOMEM;
+
+       epf = ntb->epf;
+       if (type == PRIMARY_INTERFACE) {
+               func_no = epf->func_no;
+               epf_bar = epf->bar;
+       } else {
+               func_no = epf->sec_epc_func_no;
+               epf_bar = epf->sec_epc_bar;
+       }
+
+       ntb_epc->linkup = false;
+       ntb_epc->epc = epc;
+       ntb_epc->func_no = func_no;
+       ntb_epc->type = type;
+       ntb_epc->epf_bar = epf_bar;
+       ntb_epc->epf_ntb = ntb;
+
+       epc_features = pci_epc_get_features(epc, func_no);
+       if (!epc_features)
+               return -EINVAL;
+       ntb_epc->epc_features = epc_features;
+
+       ntb->epc[type] = ntb_epc;
+
+       return 0;
+}
+
+/**
+ * epf_ntb_epc_create() - Create and initialize NTB EPC interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Get a reference to EPC device and bind NTB function device to that EPC
+ * for each of the interface. It is also a wrapper to
+ * epf_ntb_epc_create_interface() to allocate memory for NTB EPC interface
+ * and initialize it
+ */
+static int epf_ntb_epc_create(struct epf_ntb *ntb)
+{
+       struct pci_epf *epf;
+       struct device *dev;
+       int ret;
+
+       epf = ntb->epf;
+       dev = &epf->dev;
+
+       ret = epf_ntb_epc_create_interface(ntb, epf->epc, PRIMARY_INTERFACE);
+       if (ret) {
+               dev_err(dev, "PRIMARY intf: Fail to create NTB EPC\n");
+               return ret;
+       }
+
+       ret = epf_ntb_epc_create_interface(ntb, epf->sec_epc,
+                                          SECONDARY_INTERFACE);
+       if (ret) {
+               dev_err(dev, "SECONDARY intf: Fail to create NTB EPC\n");
+               goto err_epc_create;
+       }
+
+       return 0;
+
+err_epc_create:
+       epf_ntb_epc_destroy_interface(ntb, PRIMARY_INTERFACE);
+
+       return ret;
+}
+
+/**
+ * epf_ntb_init_epc_bar_interface() - Identify BARs to be used for each of
+ *   the NTB constructs (scratchpad region, doorbell, memorywindow)
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Identify the free BARs to be used for each of BAR_CONFIG, BAR_PEER_SPAD,
+ * BAR_DB_MW1, BAR_MW2, BAR_MW3 and BAR_MW4.
+ */
+static int epf_ntb_init_epc_bar_interface(struct epf_ntb *ntb,
+                                         enum pci_epc_interface_type type)
+{
+       const struct pci_epc_features *epc_features;
+       struct epf_ntb_epc *ntb_epc;
+       enum pci_barno barno;
+       enum epf_ntb_bar bar;
+       struct device *dev;
+       u32 num_mws;
+       int i;
+
+       barno = BAR_0;
+       ntb_epc = ntb->epc[type];
+       num_mws = ntb->num_mws;
+       dev = &ntb->epf->dev;
+       epc_features = ntb_epc->epc_features;
+
+       /* These are required BARs which are mandatory for NTB functionality */
+       for (bar = BAR_CONFIG; bar <= BAR_DB_MW1; bar++, barno++) {
+               barno = pci_epc_get_next_free_bar(epc_features, barno);
+               if (barno < 0) {
+                       dev_err(dev, "%s intf: Fail to get NTB function BAR\n",
+                               pci_epc_interface_string(type));
+                       return barno;
+               }
+               ntb_epc->epf_ntb_bar[bar] = barno;
+       }
+
+       /* These are optional BARs which don't impact NTB functionality */
+       for (bar = BAR_MW2, i = 1; i < num_mws; bar++, barno++, i++) {
+               barno = pci_epc_get_next_free_bar(epc_features, barno);
+               if (barno < 0) {
+                       ntb->num_mws = i;
+                       dev_dbg(dev, "BAR not available for > MW%d\n", i + 1);
+               }
+               ntb_epc->epf_ntb_bar[bar] = barno;
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_init_epc_bar() - Identify BARs to be used for each of the NTB
+ * constructs (scratchpad region, doorbell, memorywindow)
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper to epf_ntb_init_epc_bar_interface() to identify the free BARs
+ * to be used for each of BAR_CONFIG, BAR_PEER_SPAD, BAR_DB_MW1, BAR_MW2,
+ * BAR_MW3 and BAR_MW4 for all the interfaces.
+ */
+static int epf_ntb_init_epc_bar(struct epf_ntb *ntb)
+{
+       enum pci_epc_interface_type type;
+       struct device *dev;
+       int ret;
+
+       dev = &ntb->epf->dev;
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+               ret = epf_ntb_init_epc_bar_interface(ntb, type);
+               if (ret) {
+                       dev_err(dev, "Fail to init EPC bar for %s interface\n",
+                               pci_epc_interface_string(type));
+                       return ret;
+               }
+       }
+
+       return 0;
+}
+
+/**
+ * epf_ntb_epc_init_interface() - Initialize NTB interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper to initialize a particular EPC interface and start the workqueue
+ * to check for commands from host. This function will write to the
+ * EP controller HW for configuring it.
+ */
+static int epf_ntb_epc_init_interface(struct epf_ntb *ntb,
+                                     enum pci_epc_interface_type type)
+{
+       struct epf_ntb_epc *ntb_epc;
+       struct pci_epc *epc;
+       struct pci_epf *epf;
+       struct device *dev;
+       u8 func_no;
+       int ret;
+
+       ntb_epc = ntb->epc[type];
+       epf = ntb->epf;
+       dev = &epf->dev;
+       epc = ntb_epc->epc;
+       func_no = ntb_epc->func_no;
+
+       ret = epf_ntb_config_sspad_bar_set(ntb->epc[type]);
+       if (ret) {
+               dev_err(dev, "%s intf: Config/self SPAD BAR init failed\n",
+                       pci_epc_interface_string(type));
+               return ret;
+       }
+
+       ret = epf_ntb_peer_spad_bar_set(ntb, type);
+       if (ret) {
+               dev_err(dev, "%s intf: Peer SPAD BAR init failed\n",
+                       pci_epc_interface_string(type));
+               goto err_peer_spad_bar_init;
+       }
+
+       ret = epf_ntb_configure_interrupt(ntb, type);
+       if (ret) {
+               dev_err(dev, "%s intf: Interrupt configuration failed\n",
+                       pci_epc_interface_string(type));
+               goto err_peer_spad_bar_init;
+       }
+
+       ret = epf_ntb_db_mw_bar_init(ntb, type);
+       if (ret) {
+               dev_err(dev, "%s intf: DB/MW BAR init failed\n",
+                       pci_epc_interface_string(type));
+               goto err_db_mw_bar_init;
+       }
+
+       ret = pci_epc_write_header(epc, func_no, epf->header);
+       if (ret) {
+               dev_err(dev, "%s intf: Configuration header write failed\n",
+                       pci_epc_interface_string(type));
+               goto err_write_header;
+       }
+
+       INIT_DELAYED_WORK(&ntb->epc[type]->cmd_handler, epf_ntb_cmd_handler);
+       queue_work(kpcintb_workqueue, &ntb->epc[type]->cmd_handler.work);
+
+       return 0;
+
+err_write_header:
+       epf_ntb_db_mw_bar_cleanup(ntb, type);
+
+err_db_mw_bar_init:
+       epf_ntb_peer_spad_bar_clear(ntb->epc[type]);
+
+err_peer_spad_bar_init:
+       epf_ntb_config_sspad_bar_clear(ntb->epc[type]);
+
+       return ret;
+}
+
+/**
+ * epf_ntb_epc_cleanup_interface() - Cleanup NTB interface
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ * @type: PRIMARY interface or SECONDARY interface
+ *
+ * Wrapper to cleanup a particular NTB interface.
+ */
+static void epf_ntb_epc_cleanup_interface(struct epf_ntb *ntb,
+                                         enum pci_epc_interface_type type)
+{
+       struct epf_ntb_epc *ntb_epc;
+
+       if (type < 0)
+               return;
+
+       ntb_epc = ntb->epc[type];
+       cancel_delayed_work(&ntb_epc->cmd_handler);
+       epf_ntb_db_mw_bar_cleanup(ntb, type);
+       epf_ntb_peer_spad_bar_clear(ntb_epc);
+       epf_ntb_config_sspad_bar_clear(ntb_epc);
+}
+
+/**
+ * epf_ntb_epc_cleanup() - Cleanup all NTB interfaces
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper to cleanup all NTB interfaces.
+ */
+static void epf_ntb_epc_cleanup(struct epf_ntb *ntb)
+{
+       enum pci_epc_interface_type type;
+
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++)
+               epf_ntb_epc_cleanup_interface(ntb, type);
+}
+
+/**
+ * epf_ntb_epc_init() - Initialize all NTB interfaces
+ * @ntb: NTB device that facilitates communication between HOST1 and HOST2
+ *
+ * Wrapper to initialize all NTB interface and start the workqueue
+ * to check for commands from host.
+ */
+static int epf_ntb_epc_init(struct epf_ntb *ntb)
+{
+       enum pci_epc_interface_type type;
+       struct device *dev;
+       int ret;
+
+       dev = &ntb->epf->dev;
+
+       for (type = PRIMARY_INTERFACE; type <= SECONDARY_INTERFACE; type++) {
+               ret = epf_ntb_epc_init_interface(ntb, type);
+               if (ret) {
+                       dev_err(dev, "%s intf: Failed to initialize\n",
+                               pci_epc_interface_string(type));
+                       goto err_init_type;
+               }
+       }
+
+       return 0;
+
+err_init_type:
+       epf_ntb_epc_cleanup_interface(ntb, type - 1);
+
+       return ret;
+}
+
+/**
+ * epf_ntb_bind() - Initialize endpoint controller to provide NTB functionality
+ * @epf: NTB endpoint function device
+ *
+ * Initialize both the endpoint controllers associated with NTB function device.
+ * Invoked when a primary interface or secondary interface is bound to EPC
+ * device. This function will succeed only when EPC is bound to both the
+ * interfaces.
+ */
+static int epf_ntb_bind(struct pci_epf *epf)
+{
+       struct epf_ntb *ntb = epf_get_drvdata(epf);
+       struct device *dev = &epf->dev;
+       int ret;
+
+       if (!epf->epc) {
+               dev_dbg(dev, "PRIMARY EPC interface not yet bound\n");
+               return 0;
+       }
+
+       if (!epf->sec_epc) {
+               dev_dbg(dev, "SECONDARY EPC interface not yet bound\n");
+               return 0;
+       }
+
+       ret = epf_ntb_epc_create(ntb);
+       if (ret) {
+               dev_err(dev, "Failed to create NTB EPC\n");
+               return ret;
+       }
+
+       ret = epf_ntb_init_epc_bar(ntb);
+       if (ret) {
+               dev_err(dev, "Failed to create NTB EPC\n");
+               goto err_bar_init;
+       }
+
+       ret = epf_ntb_config_spad_bar_alloc_interface(ntb);
+       if (ret) {
+               dev_err(dev, "Failed to allocate BAR memory\n");
+               goto err_bar_alloc;
+       }
+
+       ret = epf_ntb_epc_init(ntb);
+       if (ret) {
+               dev_err(dev, "Failed to initialize EPC\n");
+               goto err_bar_alloc;
+       }
+
+       epf_set_drvdata(epf, ntb);
+
+       return 0;
+
+err_bar_alloc:
+       epf_ntb_config_spad_bar_free(ntb);
+
+err_bar_init:
+       epf_ntb_epc_destroy(ntb);
+
+       return ret;
+}
+
+/**
+ * epf_ntb_unbind() - Cleanup the initialization from epf_ntb_bind()
+ * @epf: NTB endpoint function device
+ *
+ * Cleanup the initialization from epf_ntb_bind()
+ */
+static void epf_ntb_unbind(struct pci_epf *epf)
+{
+       struct epf_ntb *ntb = epf_get_drvdata(epf);
+
+       epf_ntb_epc_cleanup(ntb);
+       epf_ntb_config_spad_bar_free(ntb);
+       epf_ntb_epc_destroy(ntb);
+}
+
+#define EPF_NTB_R(_name)                                               \
+static ssize_t epf_ntb_##_name##_show(struct config_item *item,                \
+                                     char *page)                       \
+{                                                                      \
+       struct config_group *group = to_config_group(item);             \
+       struct epf_ntb *ntb = to_epf_ntb(group);                        \
+                                                                       \
+       return sprintf(page, "%d\n", ntb->_name);                       \
+}
+
+#define EPF_NTB_W(_name)                                               \
+static ssize_t epf_ntb_##_name##_store(struct config_item *item,       \
+                                      const char *page, size_t len)    \
+{                                                                      \
+       struct config_group *group = to_config_group(item);             \
+       struct epf_ntb *ntb = to_epf_ntb(group);                        \
+       u32 val;                                                        \
+       int ret;                                                        \
+                                                                       \
+       ret = kstrtou32(page, 0, &val);                                 \
+       if (ret)                                                        \
+               return ret;                                             \
+                                                                       \
+       ntb->_name = val;                                               \
+                                                                       \
+       return len;                                                     \
+}
+
+#define EPF_NTB_MW_R(_name)                                            \
+static ssize_t epf_ntb_##_name##_show(struct config_item *item,                \
+                                     char *page)                       \
+{                                                                      \
+       struct config_group *group = to_config_group(item);             \
+       struct epf_ntb *ntb = to_epf_ntb(group);                        \
+       int win_no;                                                     \
+                                                                       \
+       sscanf(#_name, "mw%d", &win_no);                                \
+                                                                       \
+       return sprintf(page, "%lld\n", ntb->mws_size[win_no - 1]);      \
+}
+
+#define EPF_NTB_MW_W(_name)                                            \
+static ssize_t epf_ntb_##_name##_store(struct config_item *item,       \
+                                      const char *page, size_t len)    \
+{                                                                      \
+       struct config_group *group = to_config_group(item);             \
+       struct epf_ntb *ntb = to_epf_ntb(group);                        \
+       struct device *dev = &ntb->epf->dev;                            \
+       int win_no;                                                     \
+       u64 val;                                                        \
+       int ret;                                                        \
+                                                                       \
+       ret = kstrtou64(page, 0, &val);                                 \
+       if (ret)                                                        \
+               return ret;                                             \
+                                                                       \
+       if (sscanf(#_name, "mw%d", &win_no) != 1)                       \
+               return -EINVAL;                                         \
+                                                                       \
+       if (ntb->num_mws < win_no) {                                    \
+               dev_err(dev, "Invalid num_nws: %d value\n", ntb->num_mws); \
+               return -EINVAL;                                         \
+       }                                                               \
+                                                                       \
+       ntb->mws_size[win_no - 1] = val;                                \
+                                                                       \
+       return len;                                                     \
+}
+
+static ssize_t epf_ntb_num_mws_store(struct config_item *item,
+                                    const char *page, size_t len)
+{
+       struct config_group *group = to_config_group(item);
+       struct epf_ntb *ntb = to_epf_ntb(group);
+       u32 val;
+       int ret;
+
+       ret = kstrtou32(page, 0, &val);
+       if (ret)
+               return ret;
+
+       if (val > MAX_MW)
+               return -EINVAL;
+
+       ntb->num_mws = val;
+
+       return len;
+}
+
+EPF_NTB_R(spad_count)
+EPF_NTB_W(spad_count)
+EPF_NTB_R(db_count)
+EPF_NTB_W(db_count)
+EPF_NTB_R(num_mws)
+EPF_NTB_MW_R(mw1)
+EPF_NTB_MW_W(mw1)
+EPF_NTB_MW_R(mw2)
+EPF_NTB_MW_W(mw2)
+EPF_NTB_MW_R(mw3)
+EPF_NTB_MW_W(mw3)
+EPF_NTB_MW_R(mw4)
+EPF_NTB_MW_W(mw4)
+
+CONFIGFS_ATTR(epf_ntb_, spad_count);
+CONFIGFS_ATTR(epf_ntb_, db_count);
+CONFIGFS_ATTR(epf_ntb_, num_mws);
+CONFIGFS_ATTR(epf_ntb_, mw1);
+CONFIGFS_ATTR(epf_ntb_, mw2);
+CONFIGFS_ATTR(epf_ntb_, mw3);
+CONFIGFS_ATTR(epf_ntb_, mw4);
+
+static struct configfs_attribute *epf_ntb_attrs[] = {
+       &epf_ntb_attr_spad_count,
+       &epf_ntb_attr_db_count,
+       &epf_ntb_attr_num_mws,
+       &epf_ntb_attr_mw1,
+       &epf_ntb_attr_mw2,
+       &epf_ntb_attr_mw3,
+       &epf_ntb_attr_mw4,
+       NULL,
+};
+
+static const struct config_item_type ntb_group_type = {
+       .ct_attrs       = epf_ntb_attrs,
+       .ct_owner       = THIS_MODULE,
+};
+
+/**
+ * epf_ntb_add_cfs() - Add configfs directory specific to NTB
+ * @epf: NTB endpoint function device
+ *
+ * Add configfs directory specific to NTB. This directory will hold
+ * NTB specific properties like db_count, spad_count, num_mws etc.,
+ */
+static struct config_group *epf_ntb_add_cfs(struct pci_epf *epf,
+                                           struct config_group *group)
+{
+       struct epf_ntb *ntb = epf_get_drvdata(epf);
+       struct config_group *ntb_group = &ntb->group;
+       struct device *dev = &epf->dev;
+
+       config_group_init_type_name(ntb_group, dev_name(dev), &ntb_group_type);
+
+       return ntb_group;
+}
+
+/**
+ * epf_ntb_probe() - Probe NTB function driver
+ * @epf: NTB endpoint function device
+ *
+ * Probe NTB function driver when endpoint function bus detects a NTB
+ * endpoint function.
+ */
+static int epf_ntb_probe(struct pci_epf *epf)
+{
+       struct epf_ntb *ntb;
+       struct device *dev;
+
+       dev = &epf->dev;
+
+       ntb = devm_kzalloc(dev, sizeof(*ntb), GFP_KERNEL);
+       if (!ntb)
+               return -ENOMEM;
+
+       epf->header = &epf_ntb_header;
+       ntb->epf = epf;
+       epf_set_drvdata(epf, ntb);
+
+       return 0;
+}
+
+static struct pci_epf_ops epf_ntb_ops = {
+       .bind   = epf_ntb_bind,
+       .unbind = epf_ntb_unbind,
+       .add_cfs = epf_ntb_add_cfs,
+};
+
+static const struct pci_epf_device_id epf_ntb_ids[] = {
+       {
+               .name = "pci_epf_ntb",
+       },
+       {},
+};
+
+static struct pci_epf_driver epf_ntb_driver = {
+       .driver.name    = "pci_epf_ntb",
+       .probe          = epf_ntb_probe,
+       .id_table       = epf_ntb_ids,
+       .ops            = &epf_ntb_ops,
+       .owner          = THIS_MODULE,
+};
+
+static int __init epf_ntb_init(void)
+{
+       int ret;
+
+       kpcintb_workqueue = alloc_workqueue("kpcintb", WQ_MEM_RECLAIM |
+                                           WQ_HIGHPRI, 0);
+       ret = pci_epf_register_driver(&epf_ntb_driver);
+       if (ret) {
+               destroy_workqueue(kpcintb_workqueue);
+               pr_err("Failed to register pci epf ntb driver --> %d\n", ret);
+               return ret;
+       }
+
+       return 0;
+}
+module_init(epf_ntb_init);
+
+static void __exit epf_ntb_exit(void)
+{
+       pci_epf_unregister_driver(&epf_ntb_driver);
+       destroy_workqueue(kpcintb_workqueue);
+}
+module_exit(epf_ntb_exit);
+
+MODULE_DESCRIPTION("PCI EPF NTB DRIVER");
+MODULE_AUTHOR("Kishon Vijay Abraham I <kishon@ti.com>");
+MODULE_LICENSE("GPL v2");
index e4e51d8..c0ac4e9 100644 (file)
@@ -619,7 +619,8 @@ static void pci_epf_test_unbind(struct pci_epf *epf)
 
                if (epf_test->reg[bar]) {
                        pci_epc_clear_bar(epc, epf->func_no, epf_bar);
-                       pci_epf_free_space(epf, epf_test->reg[bar], bar);
+                       pci_epf_free_space(epf, epf_test->reg[bar], bar,
+                                          PRIMARY_INTERFACE);
                }
        }
 }
@@ -651,7 +652,8 @@ static int pci_epf_test_set_bar(struct pci_epf *epf)
 
                ret = pci_epc_set_bar(epc, epf->func_no, epf_bar);
                if (ret) {
-                       pci_epf_free_space(epf, epf_test->reg[bar], bar);
+                       pci_epf_free_space(epf, epf_test->reg[bar], bar,
+                                          PRIMARY_INTERFACE);
                        dev_err(dev, "Failed to set BAR%d\n", bar);
                        if (bar == test_reg_bar)
                                return ret;
@@ -771,7 +773,7 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
        }
 
        base = pci_epf_alloc_space(epf, test_reg_size, test_reg_bar,
-                                  epc_features->align);
+                                  epc_features->align, PRIMARY_INTERFACE);
        if (!base) {
                dev_err(dev, "Failed to allocated register space\n");
                return -ENOMEM;
@@ -789,7 +791,8 @@ static int pci_epf_test_alloc_space(struct pci_epf *epf)
                        continue;
 
                base = pci_epf_alloc_space(epf, bar_size[bar], bar,
-                                          epc_features->align);
+                                          epc_features->align,
+                                          PRIMARY_INTERFACE);
                if (!base)
                        dev_err(dev, "Failed to allocate space for BAR%d\n",
                                bar);
@@ -834,6 +837,8 @@ static int pci_epf_test_bind(struct pci_epf *epf)
                linkup_notifier = epc_features->linkup_notifier;
                core_init_notifier = epc_features->core_init_notifier;
                test_reg_bar = pci_epc_get_first_free_bar(epc_features);
+               if (test_reg_bar < 0)
+                       return -EINVAL;
                pci_epf_configure_bar(epf, epc_features);
        }
 
index 3710adf..f3a8b83 100644 (file)
@@ -21,6 +21,9 @@ static struct config_group *controllers_group;
 
 struct pci_epf_group {
        struct config_group group;
+       struct config_group primary_epc_group;
+       struct config_group secondary_epc_group;
+       struct delayed_work cfs_work;
        struct pci_epf *epf;
        int index;
 };
@@ -41,6 +44,127 @@ static inline struct pci_epc_group *to_pci_epc_group(struct config_item *item)
        return container_of(to_config_group(item), struct pci_epc_group, group);
 }
 
+static int pci_secondary_epc_epf_link(struct config_item *epf_item,
+                                     struct config_item *epc_item)
+{
+       int ret;
+       struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+       struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+       struct pci_epc *epc = epc_group->epc;
+       struct pci_epf *epf = epf_group->epf;
+
+       ret = pci_epc_add_epf(epc, epf, SECONDARY_INTERFACE);
+       if (ret)
+               return ret;
+
+       ret = pci_epf_bind(epf);
+       if (ret) {
+               pci_epc_remove_epf(epc, epf, SECONDARY_INTERFACE);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void pci_secondary_epc_epf_unlink(struct config_item *epc_item,
+                                        struct config_item *epf_item)
+{
+       struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+       struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+       struct pci_epc *epc;
+       struct pci_epf *epf;
+
+       WARN_ON_ONCE(epc_group->start);
+
+       epc = epc_group->epc;
+       epf = epf_group->epf;
+       pci_epf_unbind(epf);
+       pci_epc_remove_epf(epc, epf, SECONDARY_INTERFACE);
+}
+
+static struct configfs_item_operations pci_secondary_epc_item_ops = {
+       .allow_link     = pci_secondary_epc_epf_link,
+       .drop_link      = pci_secondary_epc_epf_unlink,
+};
+
+static const struct config_item_type pci_secondary_epc_type = {
+       .ct_item_ops    = &pci_secondary_epc_item_ops,
+       .ct_owner       = THIS_MODULE,
+};
+
+static struct config_group
+*pci_ep_cfs_add_secondary_group(struct pci_epf_group *epf_group)
+{
+       struct config_group *secondary_epc_group;
+
+       secondary_epc_group = &epf_group->secondary_epc_group;
+       config_group_init_type_name(secondary_epc_group, "secondary",
+                                   &pci_secondary_epc_type);
+       configfs_register_group(&epf_group->group, secondary_epc_group);
+
+       return secondary_epc_group;
+}
+
+static int pci_primary_epc_epf_link(struct config_item *epf_item,
+                                   struct config_item *epc_item)
+{
+       int ret;
+       struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+       struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+       struct pci_epc *epc = epc_group->epc;
+       struct pci_epf *epf = epf_group->epf;
+
+       ret = pci_epc_add_epf(epc, epf, PRIMARY_INTERFACE);
+       if (ret)
+               return ret;
+
+       ret = pci_epf_bind(epf);
+       if (ret) {
+               pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
+               return ret;
+       }
+
+       return 0;
+}
+
+static void pci_primary_epc_epf_unlink(struct config_item *epc_item,
+                                      struct config_item *epf_item)
+{
+       struct pci_epf_group *epf_group = to_pci_epf_group(epf_item->ci_parent);
+       struct pci_epc_group *epc_group = to_pci_epc_group(epc_item);
+       struct pci_epc *epc;
+       struct pci_epf *epf;
+
+       WARN_ON_ONCE(epc_group->start);
+
+       epc = epc_group->epc;
+       epf = epf_group->epf;
+       pci_epf_unbind(epf);
+       pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
+}
+
+static struct configfs_item_operations pci_primary_epc_item_ops = {
+       .allow_link     = pci_primary_epc_epf_link,
+       .drop_link      = pci_primary_epc_epf_unlink,
+};
+
+static const struct config_item_type pci_primary_epc_type = {
+       .ct_item_ops    = &pci_primary_epc_item_ops,
+       .ct_owner       = THIS_MODULE,
+};
+
+static struct config_group
+*pci_ep_cfs_add_primary_group(struct pci_epf_group *epf_group)
+{
+       struct config_group *primary_epc_group = &epf_group->primary_epc_group;
+
+       config_group_init_type_name(primary_epc_group, "primary",
+                                   &pci_primary_epc_type);
+       configfs_register_group(&epf_group->group, primary_epc_group);
+
+       return primary_epc_group;
+}
+
 static ssize_t pci_epc_start_store(struct config_item *item, const char *page,
                                   size_t len)
 {
@@ -94,13 +218,13 @@ static int pci_epc_epf_link(struct config_item *epc_item,
        struct pci_epc *epc = epc_group->epc;
        struct pci_epf *epf = epf_group->epf;
 
-       ret = pci_epc_add_epf(epc, epf);
+       ret = pci_epc_add_epf(epc, epf, PRIMARY_INTERFACE);
        if (ret)
                return ret;
 
        ret = pci_epf_bind(epf);
        if (ret) {
-               pci_epc_remove_epf(epc, epf);
+               pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
                return ret;
        }
 
@@ -120,7 +244,7 @@ static void pci_epc_epf_unlink(struct config_item *epc_item,
        epc = epc_group->epc;
        epf = epf_group->epf;
        pci_epf_unbind(epf);
-       pci_epc_remove_epf(epc, epf);
+       pci_epc_remove_epf(epc, epf, PRIMARY_INTERFACE);
 }
 
 static struct configfs_item_operations pci_epc_item_ops = {
@@ -366,12 +490,53 @@ static struct configfs_item_operations pci_epf_ops = {
        .release                = pci_epf_release,
 };
 
+static struct config_group *pci_epf_type_make(struct config_group *group,
+                                             const char *name)
+{
+       struct pci_epf_group *epf_group = to_pci_epf_group(&group->cg_item);
+       struct config_group *epf_type_group;
+
+       epf_type_group = pci_epf_type_add_cfs(epf_group->epf, group);
+       return epf_type_group;
+}
+
+static void pci_epf_type_drop(struct config_group *group,
+                             struct config_item *item)
+{
+       config_item_put(item);
+}
+
+static struct configfs_group_operations pci_epf_type_group_ops = {
+       .make_group     = &pci_epf_type_make,
+       .drop_item      = &pci_epf_type_drop,
+};
+
 static const struct config_item_type pci_epf_type = {
+       .ct_group_ops   = &pci_epf_type_group_ops,
        .ct_item_ops    = &pci_epf_ops,
        .ct_attrs       = pci_epf_attrs,
        .ct_owner       = THIS_MODULE,
 };
 
+static void pci_epf_cfs_work(struct work_struct *work)
+{
+       struct pci_epf_group *epf_group;
+       struct config_group *group;
+
+       epf_group = container_of(work, struct pci_epf_group, cfs_work.work);
+       group = pci_ep_cfs_add_primary_group(epf_group);
+       if (IS_ERR(group)) {
+               pr_err("failed to create 'primary' EPC interface\n");
+               return;
+       }
+
+       group = pci_ep_cfs_add_secondary_group(epf_group);
+       if (IS_ERR(group)) {
+               pr_err("failed to create 'secondary' EPC interface\n");
+               return;
+       }
+}
+
 static struct config_group *pci_epf_make(struct config_group *group,
                                         const char *name)
 {
@@ -410,10 +575,15 @@ static struct config_group *pci_epf_make(struct config_group *group,
                goto free_name;
        }
 
+       epf->group = &epf_group->group;
        epf_group->epf = epf;
 
        kfree(epf_name);
 
+       INIT_DELAYED_WORK(&epf_group->cfs_work, pci_epf_cfs_work);
+       queue_delayed_work(system_wq, &epf_group->cfs_work,
+                          msecs_to_jiffies(1));
+
        return &epf_group->group;
 
 free_name:
index cadd3db..cc8f9eb 100644 (file)
@@ -87,24 +87,50 @@ EXPORT_SYMBOL_GPL(pci_epc_get);
  * pci_epc_get_first_free_bar() - helper to get first unreserved BAR
  * @epc_features: pci_epc_features structure that holds the reserved bar bitmap
  *
- * Invoke to get the first unreserved BAR that can be used for endpoint
+ * Invoke to get the first unreserved BAR that can be used by the endpoint
  * function. For any incorrect value in reserved_bar return '0'.
  */
-unsigned int pci_epc_get_first_free_bar(const struct pci_epc_features
-                                       *epc_features)
+enum pci_barno
+pci_epc_get_first_free_bar(const struct pci_epc_features *epc_features)
 {
-       int free_bar;
+       return pci_epc_get_next_free_bar(epc_features, BAR_0);
+}
+EXPORT_SYMBOL_GPL(pci_epc_get_first_free_bar);
+
+/**
+ * pci_epc_get_next_free_bar() - helper to get unreserved BAR starting from @bar
+ * @epc_features: pci_epc_features structure that holds the reserved bar bitmap
+ * @bar: the starting BAR number from where unreserved BAR should be searched
+ *
+ * Invoke to get the next unreserved BAR starting from @bar that can be used
+ * for endpoint function. For any incorrect value in reserved_bar return '0'.
+ */
+enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
+                                        *epc_features, enum pci_barno bar)
+{
+       unsigned long free_bar;
 
        if (!epc_features)
-               return 0;
+               return BAR_0;
+
+       /* If 'bar - 1' is a 64-bit BAR, move to the next BAR */
+       if ((epc_features->bar_fixed_64bit << 1) & 1 << bar)
+               bar++;
+
+       /* Find if the reserved BAR is also a 64-bit BAR */
+       free_bar = epc_features->reserved_bar & epc_features->bar_fixed_64bit;
 
-       free_bar = ffz(epc_features->reserved_bar);
+       /* Set the adjacent bit if the reserved BAR is also a 64-bit BAR */
+       free_bar <<= 1;
+       free_bar |= epc_features->reserved_bar;
+
+       free_bar = find_next_zero_bit(&free_bar, 6, bar);
        if (free_bar > 5)
-               return 0;
+               return NO_BAR;
 
        return free_bar;
 }
-EXPORT_SYMBOL_GPL(pci_epc_get_first_free_bar);
+EXPORT_SYMBOL_GPL(pci_epc_get_next_free_bar);
 
 /**
  * pci_epc_get_features() - get the features supported by EPC
@@ -205,6 +231,47 @@ int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
 EXPORT_SYMBOL_GPL(pci_epc_raise_irq);
 
 /**
+ * pci_epc_map_msi_irq() - Map physical address to MSI address and return
+ *                         MSI data
+ * @epc: the EPC device which has the MSI capability
+ * @func_no: the physical endpoint function number in the EPC device
+ * @phys_addr: the physical address of the outbound region
+ * @interrupt_num: the MSI interrupt number
+ * @entry_size: Size of Outbound address region for each interrupt
+ * @msi_data: the data that should be written in order to raise MSI interrupt
+ *            with interrupt number as 'interrupt num'
+ * @msi_addr_offset: Offset of MSI address from the aligned outbound address
+ *                   to which the MSI address is mapped
+ *
+ * Invoke to map physical address to MSI address and return MSI data. The
+ * physical address should be an address in the outbound region. This is
+ * required to implement doorbell functionality of NTB wherein EPC on either
+ * side of the interface (primary and secondary) can directly write to the
+ * physical address (in outbound region) of the other interface to ring
+ * doorbell.
+ */
+int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no, phys_addr_t phys_addr,
+                       u8 interrupt_num, u32 entry_size, u32 *msi_data,
+                       u32 *msi_addr_offset)
+{
+       int ret;
+
+       if (IS_ERR_OR_NULL(epc))
+               return -EINVAL;
+
+       if (!epc->ops->map_msi_irq)
+               return -EINVAL;
+
+       mutex_lock(&epc->lock);
+       ret = epc->ops->map_msi_irq(epc, func_no, phys_addr, interrupt_num,
+                                   entry_size, msi_data, msi_addr_offset);
+       mutex_unlock(&epc->lock);
+
+       return ret;
+}
+EXPORT_SYMBOL_GPL(pci_epc_map_msi_irq);
+
+/**
  * pci_epc_get_msi() - get the number of MSI interrupt numbers allocated
  * @epc: the EPC device to which MSI interrupts was requested
  * @func_no: the endpoint function number in the EPC device
@@ -467,21 +534,28 @@ EXPORT_SYMBOL_GPL(pci_epc_write_header);
  * pci_epc_add_epf() - bind PCI endpoint function to an endpoint controller
  * @epc: the EPC device to which the endpoint function should be added
  * @epf: the endpoint function to be added
+ * @type: Identifies if the EPC is connected to the primary or secondary
+ *        interface of EPF
  *
  * A PCI endpoint device can have one or more functions. In the case of PCIe,
  * the specification allows up to 8 PCIe endpoint functions. Invoke
  * pci_epc_add_epf() to add a PCI endpoint function to an endpoint controller.
  */
-int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf)
+int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf,
+                   enum pci_epc_interface_type type)
 {
+       struct list_head *list;
        u32 func_no;
        int ret = 0;
 
-       if (epf->epc)
+       if (IS_ERR_OR_NULL(epc))
+               return -EINVAL;
+
+       if (type == PRIMARY_INTERFACE && epf->epc)
                return -EBUSY;
 
-       if (IS_ERR(epc))
-               return -EINVAL;
+       if (type == SECONDARY_INTERFACE && epf->sec_epc)
+               return -EBUSY;
 
        mutex_lock(&epc->lock);
        func_no = find_first_zero_bit(&epc->function_num_map,
@@ -498,11 +572,17 @@ int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf)
        }
 
        set_bit(func_no, &epc->function_num_map);
-       epf->func_no = func_no;
-       epf->epc = epc;
-
-       list_add_tail(&epf->list, &epc->pci_epf);
+       if (type == PRIMARY_INTERFACE) {
+               epf->func_no = func_no;
+               epf->epc = epc;
+               list = &epf->list;
+       } else {
+               epf->sec_epc_func_no = func_no;
+               epf->sec_epc = epc;
+               list = &epf->sec_epc_list;
+       }
 
+       list_add_tail(list, &epc->pci_epf);
 ret:
        mutex_unlock(&epc->lock);
 
@@ -517,14 +597,26 @@ EXPORT_SYMBOL_GPL(pci_epc_add_epf);
  *
  * Invoke to remove PCI endpoint function from the endpoint controller.
  */
-void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf)
+void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
+                       enum pci_epc_interface_type type)
 {
+       struct list_head *list;
+       u32 func_no = 0;
+
        if (!epc || IS_ERR(epc) || !epf)
                return;
 
+       if (type == PRIMARY_INTERFACE) {
+               func_no = epf->func_no;
+               list = &epf->list;
+       } else {
+               func_no = epf->sec_epc_func_no;
+               list = &epf->sec_epc_list;
+       }
+
        mutex_lock(&epc->lock);
-       clear_bit(epf->func_no, &epc->function_num_map);
-       list_del(&epf->list);
+       clear_bit(func_no, &epc->function_num_map);
+       list_del(list);
        epf->epc = NULL;
        mutex_unlock(&epc->lock);
 }
index c977cf9..7646c86 100644 (file)
@@ -21,6 +21,38 @@ static struct bus_type pci_epf_bus_type;
 static const struct device_type pci_epf_type;
 
 /**
+ * pci_epf_type_add_cfs() - Help function drivers to expose function specific
+ *                          attributes in configfs
+ * @epf: the EPF device that has to be configured using configfs
+ * @group: the parent configfs group (corresponding to entries in
+ *         pci_epf_device_id)
+ *
+ * Invoke to expose function specific attributes in configfs. If the function
+ * driver does not have anything to expose (attributes configured by user),
+ * return NULL.
+ */
+struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
+                                         struct config_group *group)
+{
+       struct config_group *epf_type_group;
+
+       if (!epf->driver) {
+               dev_err(&epf->dev, "epf device not bound to driver\n");
+               return NULL;
+       }
+
+       if (!epf->driver->ops->add_cfs)
+               return NULL;
+
+       mutex_lock(&epf->lock);
+       epf_type_group = epf->driver->ops->add_cfs(epf, group);
+       mutex_unlock(&epf->lock);
+
+       return epf_type_group;
+}
+EXPORT_SYMBOL_GPL(pci_epf_type_add_cfs);
+
+/**
  * pci_epf_unbind() - Notify the function driver that the binding between the
  *                   EPF device and EPC device has been lost
  * @epf: the EPF device which has lost the binding with the EPC device
@@ -74,24 +106,37 @@ EXPORT_SYMBOL_GPL(pci_epf_bind);
  * @epf: the EPF device from whom to free the memory
  * @addr: the virtual address of the PCI EPF register space
  * @bar: the BAR number corresponding to the register space
+ * @type: Identifies if the allocated space is for primary EPC or secondary EPC
  *
  * Invoke to free the allocated PCI EPF register space.
  */
-void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar)
+void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
+                       enum pci_epc_interface_type type)
 {
        struct device *dev = epf->epc->dev.parent;
+       struct pci_epf_bar *epf_bar;
+       struct pci_epc *epc;
 
        if (!addr)
                return;
 
-       dma_free_coherent(dev, epf->bar[bar].size, addr,
-                         epf->bar[bar].phys_addr);
+       if (type == PRIMARY_INTERFACE) {
+               epc = epf->epc;
+               epf_bar = epf->bar;
+       } else {
+               epc = epf->sec_epc;
+               epf_bar = epf->sec_epc_bar;
+       }
 
-       epf->bar[bar].phys_addr = 0;
-       epf->bar[bar].addr = NULL;
-       epf->bar[bar].size = 0;
-       epf->bar[bar].barno = 0;
-       epf->bar[bar].flags = 0;
+       dev = epc->dev.parent;
+       dma_free_coherent(dev, epf_bar[bar].size, addr,
+                         epf_bar[bar].phys_addr);
+
+       epf_bar[bar].phys_addr = 0;
+       epf_bar[bar].addr = NULL;
+       epf_bar[bar].size = 0;
+       epf_bar[bar].barno = 0;
+       epf_bar[bar].flags = 0;
 }
 EXPORT_SYMBOL_GPL(pci_epf_free_space);
 
@@ -101,15 +146,18 @@ EXPORT_SYMBOL_GPL(pci_epf_free_space);
  * @size: the size of the memory that has to be allocated
  * @bar: the BAR number corresponding to the allocated register space
  * @align: alignment size for the allocation region
+ * @type: Identifies if the allocation is for primary EPC or secondary EPC
  *
  * Invoke to allocate memory for the PCI EPF register space.
  */
 void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar,
-                         size_t align)
+                         size_t align, enum pci_epc_interface_type type)
 {
-       void *space;
-       struct device *dev = epf->epc->dev.parent;
+       struct pci_epf_bar *epf_bar;
        dma_addr_t phys_addr;
+       struct pci_epc *epc;
+       struct device *dev;
+       void *space;
 
        if (size < 128)
                size = 128;
@@ -119,17 +167,26 @@ void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar,
        else
                size = roundup_pow_of_two(size);
 
+       if (type == PRIMARY_INTERFACE) {
+               epc = epf->epc;
+               epf_bar = epf->bar;
+       } else {
+               epc = epf->sec_epc;
+               epf_bar = epf->sec_epc_bar;
+       }
+
+       dev = epc->dev.parent;
        space = dma_alloc_coherent(dev, size, &phys_addr, GFP_KERNEL);
        if (!space) {
                dev_err(dev, "failed to allocate mem space\n");
                return NULL;
        }
 
-       epf->bar[bar].phys_addr = phys_addr;
-       epf->bar[bar].addr = space;
-       epf->bar[bar].size = size;
-       epf->bar[bar].barno = bar;
-       epf->bar[bar].flags |= upper_32_bits(size) ?
+       epf_bar[bar].phys_addr = phys_addr;
+       epf_bar[bar].addr = space;
+       epf_bar[bar].size = size;
+       epf_bar[bar].barno = bar;
+       epf_bar[bar].flags |= upper_32_bits(size) ?
                                PCI_BASE_ADDRESS_MEM_TYPE_64 :
                                PCI_BASE_ADDRESS_MEM_TYPE_32;
 
@@ -282,22 +339,6 @@ struct pci_epf *pci_epf_create(const char *name)
 }
 EXPORT_SYMBOL_GPL(pci_epf_create);
 
-const struct pci_epf_device_id *
-pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf)
-{
-       if (!id || !epf)
-               return NULL;
-
-       while (*id->name) {
-               if (strcmp(epf->name, id->name) == 0)
-                       return id;
-               id++;
-       }
-
-       return NULL;
-}
-EXPORT_SYMBOL_GPL(pci_epf_match_device);
-
 static void pci_epf_dev_release(struct device *dev)
 {
        struct pci_epf *epf = to_pci_epf(dev);
index a2094c0..a74b274 100644 (file)
@@ -176,9 +176,6 @@ int acpiphp_unregister_attention(struct acpiphp_attention_info *info);
 int acpiphp_register_hotplug_slot(struct acpiphp_slot *slot, unsigned int sun);
 void acpiphp_unregister_hotplug_slot(struct acpiphp_slot *slot);
 
-/* acpiphp_glue.c */
-typedef int (*acpiphp_callback)(struct acpiphp_slot *slot, void *data);
-
 int acpiphp_enable_slot(struct acpiphp_slot *slot);
 int acpiphp_disable_slot(struct acpiphp_slot *slot);
 u8 acpiphp_get_power_status(struct acpiphp_slot *slot);
index cdbfa5d..dbfa0b5 100644 (file)
@@ -34,12 +34,11 @@ static ssize_t add_slot_store(struct kobject *kobj, struct kobj_attribute *attr,
        if (nbytes >= MAX_DRC_NAME_LEN)
                return 0;
 
-       memcpy(drc_name, buf, nbytes);
+       strscpy(drc_name, buf, nbytes + 1);
 
        end = strchr(drc_name, '\n');
-       if (!end)
-               end = &drc_name[nbytes];
-       *end = '\0';
+       if (end)
+               *end = '\0';
 
        rc = dlpar_add_slot(drc_name);
        if (rc)
@@ -65,12 +64,11 @@ static ssize_t remove_slot_store(struct kobject *kobj,
        if (nbytes >= MAX_DRC_NAME_LEN)
                return 0;
 
-       memcpy(drc_name, buf, nbytes);
+       strscpy(drc_name, buf, nbytes + 1);
 
        end = strchr(drc_name, '\n');
-       if (!end)
-               end = &drc_name[nbytes];
-       *end = '\0';
+       if (end)
+               *end = '\0';
 
        rc = dlpar_remove_slot(drc_name);
        if (rc)
index c9e790c..a047c42 100644 (file)
@@ -93,8 +93,9 @@ static int disable_slot(struct hotplug_slot *hotplug_slot)
                pci_dev_put(pdev);
                return -EBUSY;
        }
+       pci_dev_put(pdev);
 
-       zpci_remove_device(zdev);
+       zpci_remove_device(zdev, false);
 
        rc = zpci_disable_device(zdev);
        if (rc)
index 139869d..fdaf86a 100644 (file)
@@ -21,8 +21,9 @@
 #include "pci-bridge-emul.h"
 
 #define PCI_BRIDGE_CONF_END    PCI_STD_HEADER_SIZEOF
+#define PCI_CAP_PCIE_SIZEOF    (PCI_EXP_SLTSTA2 + 2)
 #define PCI_CAP_PCIE_START     PCI_BRIDGE_CONF_END
-#define PCI_CAP_PCIE_END       (PCI_CAP_PCIE_START + PCI_EXP_SLTSTA2 + 2)
+#define PCI_CAP_PCIE_END       (PCI_CAP_PCIE_START + PCI_CAP_PCIE_SIZEOF)
 
 /**
  * struct pci_bridge_reg_behavior - register bits behaviors
@@ -46,7 +47,8 @@ struct pci_bridge_reg_behavior {
        u32 w1c;
 };
 
-static const struct pci_bridge_reg_behavior pci_regs_behavior[] = {
+static const
+struct pci_bridge_reg_behavior pci_regs_behavior[PCI_STD_HEADER_SIZEOF / 4] = {
        [PCI_VENDOR_ID / 4] = { .ro = ~0 },
        [PCI_COMMAND / 4] = {
                .rw = (PCI_COMMAND_IO | PCI_COMMAND_MEMORY |
@@ -164,7 +166,8 @@ static const struct pci_bridge_reg_behavior pci_regs_behavior[] = {
        },
 };
 
-static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = {
+static const
+struct pci_bridge_reg_behavior pcie_cap_regs_behavior[PCI_CAP_PCIE_SIZEOF / 4] = {
        [PCI_CAP_LIST_ID / 4] = {
                /*
                 * Capability ID, Next Capability Pointer and
@@ -260,6 +263,8 @@ static const struct pci_bridge_reg_behavior pcie_cap_regs_behavior[] = {
 int pci_bridge_emul_init(struct pci_bridge_emul *bridge,
                         unsigned int flags)
 {
+       BUILD_BUG_ON(sizeof(bridge->conf) != PCI_BRIDGE_CONF_END);
+
        bridge->conf.class_revision |= cpu_to_le32(PCI_CLASS_BRIDGE_PCI << 16);
        bridge->conf.header_type = PCI_HEADER_TYPE_BRIDGE;
        bridge->conf.cache_line_size = 0x10;
index b67c432..16a1721 100644 (file)
@@ -4030,6 +4030,10 @@ int pci_register_io_range(struct fwnode_handle *fwnode, phys_addr_t addr,
        ret = logic_pio_register_range(range);
        if (ret)
                kfree(range);
+
+       /* Ignore duplicates due to deferred probing */
+       if (ret == -EEXIST)
+               ret = 0;
 #endif
 
        return ret;
index 3946555..45a2ef7 100644 (file)
@@ -133,14 +133,6 @@ config PCIE_PTM
          This is only useful if you have devices that support PTM, but it
          is safe to enable even if you don't.
 
-config PCIE_BW
-       bool "PCI Express Bandwidth Change Notification"
-       depends on PCIEPORTBUS
-       help
-         This enables PCI Express Bandwidth Change Notification.  If
-         you know link width or rate changes occur only to correct
-         unreliable links, you may answer Y.
-
 config PCIE_EDR
        bool "PCI Express Error Disconnect Recover support"
        depends on PCIE_DPC && ACPI
index d969789..b2980db 100644 (file)
@@ -12,5 +12,4 @@ obj-$(CONFIG_PCIEAER_INJECT)  += aer_inject.o
 obj-$(CONFIG_PCIE_PME)         += pme.o
 obj-$(CONFIG_PCIE_DPC)         += dpc.o
 obj-$(CONFIG_PCIE_PTM)         += ptm.o
-obj-$(CONFIG_PCIE_BW)          += bw_notification.o
 obj-$(CONFIG_PCIE_EDR)         += edr.o
index 77b0f2c..ba22388 100644 (file)
@@ -1388,7 +1388,7 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
        if (type == PCI_EXP_TYPE_RC_END)
                root = dev->rcec;
        else
-               root = dev;
+               root = pcie_find_root_port(dev);
 
        /*
         * If the platform retained control of AER, an RCiEP may not have
@@ -1414,7 +1414,8 @@ static pci_ers_result_t aer_root_reset(struct pci_dev *dev)
                }
        } else {
                rc = pci_bus_error_reset(dev);
-               pci_info(dev, "Root Port link has been reset (%d)\n", rc);
+               pci_info(dev, "%s Port link has been reset (%d)\n",
+                       pci_is_root_bus(dev->bus) ? "Root" : "Downstream", rc);
        }
 
        if ((host->native_aer || pcie_ports_native) && aer) {
diff --git a/drivers/pci/pcie/bw_notification.c b/drivers/pci/pcie/bw_notification.c
deleted file mode 100644 (file)
index 565d23c..0000000
+++ /dev/null
@@ -1,138 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * PCI Express Link Bandwidth Notification services driver
- * Author: Alexandru Gagniuc <mr.nuke.me@gmail.com>
- *
- * Copyright (C) 2019, Dell Inc
- *
- * The PCIe Link Bandwidth Notification provides a way to notify the
- * operating system when the link width or data rate changes.  This
- * capability is required for all root ports and downstream ports
- * supporting links wider than x1 and/or multiple link speeds.
- *
- * This service port driver hooks into the bandwidth notification interrupt
- * and warns when links become degraded in operation.
- */
-
-#define dev_fmt(fmt) "bw_notification: " fmt
-
-#include "../pci.h"
-#include "portdrv.h"
-
-static bool pcie_link_bandwidth_notification_supported(struct pci_dev *dev)
-{
-       int ret;
-       u32 lnk_cap;
-
-       ret = pcie_capability_read_dword(dev, PCI_EXP_LNKCAP, &lnk_cap);
-       return (ret == PCIBIOS_SUCCESSFUL) && (lnk_cap & PCI_EXP_LNKCAP_LBNC);
-}
-
-static void pcie_enable_link_bandwidth_notification(struct pci_dev *dev)
-{
-       u16 lnk_ctl;
-
-       pcie_capability_write_word(dev, PCI_EXP_LNKSTA, PCI_EXP_LNKSTA_LBMS);
-
-       pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl);
-       lnk_ctl |= PCI_EXP_LNKCTL_LBMIE;
-       pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
-}
-
-static void pcie_disable_link_bandwidth_notification(struct pci_dev *dev)
-{
-       u16 lnk_ctl;
-
-       pcie_capability_read_word(dev, PCI_EXP_LNKCTL, &lnk_ctl);
-       lnk_ctl &= ~PCI_EXP_LNKCTL_LBMIE;
-       pcie_capability_write_word(dev, PCI_EXP_LNKCTL, lnk_ctl);
-}
-
-static irqreturn_t pcie_bw_notification_irq(int irq, void *context)
-{
-       struct pcie_device *srv = context;
-       struct pci_dev *port = srv->port;
-       u16 link_status, events;
-       int ret;
-
-       ret = pcie_capability_read_word(port, PCI_EXP_LNKSTA, &link_status);
-       events = link_status & PCI_EXP_LNKSTA_LBMS;
-
-       if (ret != PCIBIOS_SUCCESSFUL || !events)
-               return IRQ_NONE;
-
-       pcie_capability_write_word(port, PCI_EXP_LNKSTA, events);
-       pcie_update_link_speed(port->subordinate, link_status);
-       return IRQ_WAKE_THREAD;
-}
-
-static irqreturn_t pcie_bw_notification_handler(int irq, void *context)
-{
-       struct pcie_device *srv = context;
-       struct pci_dev *port = srv->port;
-       struct pci_dev *dev;
-
-       /*
-        * Print status from downstream devices, not this root port or
-        * downstream switch port.
-        */
-       down_read(&pci_bus_sem);
-       list_for_each_entry(dev, &port->subordinate->devices, bus_list)
-               pcie_report_downtraining(dev);
-       up_read(&pci_bus_sem);
-
-       return IRQ_HANDLED;
-}
-
-static int pcie_bandwidth_notification_probe(struct pcie_device *srv)
-{
-       int ret;
-
-       /* Single-width or single-speed ports do not have to support this. */
-       if (!pcie_link_bandwidth_notification_supported(srv->port))
-               return -ENODEV;
-
-       ret = request_threaded_irq(srv->irq, pcie_bw_notification_irq,
-                                  pcie_bw_notification_handler,
-                                  IRQF_SHARED, "PCIe BW notif", srv);
-       if (ret)
-               return ret;
-
-       pcie_enable_link_bandwidth_notification(srv->port);
-       pci_info(srv->port, "enabled with IRQ %d\n", srv->irq);
-
-       return 0;
-}
-
-static void pcie_bandwidth_notification_remove(struct pcie_device *srv)
-{
-       pcie_disable_link_bandwidth_notification(srv->port);
-       free_irq(srv->irq, srv);
-}
-
-static int pcie_bandwidth_notification_suspend(struct pcie_device *srv)
-{
-       pcie_disable_link_bandwidth_notification(srv->port);
-       return 0;
-}
-
-static int pcie_bandwidth_notification_resume(struct pcie_device *srv)
-{
-       pcie_enable_link_bandwidth_notification(srv->port);
-       return 0;
-}
-
-static struct pcie_port_service_driver pcie_bandwidth_notification_driver = {
-       .name           = "pcie_bw_notification",
-       .port_type      = PCIE_ANY_PORT,
-       .service        = PCIE_PORT_SERVICE_BWNOTIF,
-       .probe          = pcie_bandwidth_notification_probe,
-       .suspend        = pcie_bandwidth_notification_suspend,
-       .resume         = pcie_bandwidth_notification_resume,
-       .remove         = pcie_bandwidth_notification_remove,
-};
-
-int __init pcie_bandwidth_notification_init(void)
-{
-       return pcie_port_service_register(&pcie_bandwidth_notification_driver);
-}
index 510f31f..b576aa8 100644 (file)
@@ -198,8 +198,7 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
        pci_dbg(bridge, "broadcast error_detected message\n");
        if (state == pci_channel_io_frozen) {
                pci_walk_bridge(bridge, report_frozen_detected, &status);
-               status = reset_subordinates(bridge);
-               if (status != PCI_ERS_RESULT_RECOVERED) {
+               if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) {
                        pci_warn(bridge, "subordinate device reset failed\n");
                        goto failed;
                }
@@ -231,15 +230,14 @@ pci_ers_result_t pcie_do_recovery(struct pci_dev *dev,
        pci_walk_bridge(bridge, report_resume, &status);
 
        /*
-        * If we have native control of AER, clear error status in the Root
-        * Port or Downstream Port that signaled the error.  If the
-        * platform retained control of AER, it is responsible for clearing
-        * this status.  In that case, the signaling device may not even be
-        * visible to the OS.
+        * If we have native control of AER, clear error status in the device
+        * that detected the error.  If the platform retained control of AER,
+        * it is responsible for clearing this status.  In that case, the
+        * signaling device may not even be visible to the OS.
         */
        if (host->native_aer || pcie_ports_native) {
-               pcie_clear_device_status(bridge);
-               pci_aer_clear_nonfatal_status(bridge);
+               pcie_clear_device_status(dev);
+               pci_aer_clear_nonfatal_status(dev);
        }
        pci_info(bridge, "device recovery successful\n");
        return status;
index af7cf23..2ff5724 100644 (file)
@@ -53,12 +53,6 @@ int pcie_dpc_init(void);
 static inline int pcie_dpc_init(void) { return 0; }
 #endif
 
-#ifdef CONFIG_PCIE_BW
-int pcie_bandwidth_notification_init(void);
-#else
-static inline int pcie_bandwidth_notification_init(void) { return 0; }
-#endif
-
 /* Port Type */
 #define PCIE_ANY_PORT                  (~0)
 
index 0b250bc..c7ff1ee 100644 (file)
@@ -153,7 +153,8 @@ static void pcie_portdrv_remove(struct pci_dev *dev)
 static pci_ers_result_t pcie_portdrv_error_detected(struct pci_dev *dev,
                                        pci_channel_state_t error)
 {
-       /* Root Port has no impact. Always recovers. */
+       if (error == pci_channel_io_frozen)
+               return PCI_ERS_RESULT_NEED_RESET;
        return PCI_ERS_RESULT_CAN_RECOVER;
 }
 
@@ -255,7 +256,6 @@ static void __init pcie_init_services(void)
        pcie_pme_init();
        pcie_dpc_init();
        pcie_hp_init();
-       pcie_bandwidth_notification_init();
 }
 
 static int __init pcie_portdrv_init(void)
index 2061672..b4c138a 100644 (file)
@@ -168,7 +168,6 @@ struct pci_bus *pci_find_next_bus(const struct pci_bus *from)
        struct list_head *n;
        struct pci_bus *b = NULL;
 
-       WARN_ON(in_interrupt());
        down_read(&pci_bus_sem);
        n = from ? from->node.next : pci_root_buses.next;
        if (n != &pci_root_buses)
@@ -196,7 +195,6 @@ struct pci_dev *pci_get_slot(struct pci_bus *bus, unsigned int devfn)
 {
        struct pci_dev *dev;
 
-       WARN_ON(in_interrupt());
        down_read(&pci_bus_sem);
 
        list_for_each_entry(dev, &bus->devices, bus_list) {
@@ -274,7 +272,6 @@ static struct pci_dev *pci_get_dev_by_id(const struct pci_device_id *id,
        struct device *dev_start = NULL;
        struct pci_dev *pdev = NULL;
 
-       WARN_ON(in_interrupt());
        if (from)
                dev_start = &from->dev;
        dev = bus_find_device(&pci_bus_type, dev_start, (void *)id,
@@ -381,7 +378,6 @@ int pci_dev_present(const struct pci_device_id *ids)
 {
        struct pci_dev *found = NULL;
 
-       WARN_ON(in_interrupt());
        while (ids->vendor || ids->subvendor || ids->class_mask) {
                found = pci_get_dev_by_id(ids, NULL);
                if (found) {
index 43eda10..7f1acb3 100644 (file)
@@ -410,10 +410,16 @@ EXPORT_SYMBOL(pci_release_resource);
 int pci_resize_resource(struct pci_dev *dev, int resno, int size)
 {
        struct resource *res = dev->resource + resno;
+       struct pci_host_bridge *host;
        int old, ret;
        u32 sizes;
        u16 cmd;
 
+       /* Check if we must preserve the firmware's resource assignment */
+       host = pci_find_host_bridge(dev->bus);
+       if (host->preserve_config)
+               return -ENOTSUPP;
+
        /* Make sure the resource isn't assigned before resizing it. */
        if (!(res->flags & IORESOURCE_UNSET))
                return -EBUSY;
index 31e3955..8b003c8 100644 (file)
@@ -20,7 +20,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
        u16 word;
        u32 dword;
        long err;
-       long cfg_ret;
+       int cfg_ret;
 
        if (!capable(CAP_SYS_ADMIN))
                return -EPERM;
@@ -46,7 +46,7 @@ SYSCALL_DEFINE5(pciconfig_read, unsigned long, bus, unsigned long, dfn,
        }
 
        err = -EIO;
-       if (cfg_ret != PCIBIOS_SUCCESSFUL)
+       if (cfg_ret)
                goto error;
 
        switch (len) {
@@ -105,7 +105,7 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
                if (err)
                        break;
                err = pci_user_write_config_byte(dev, off, byte);
-               if (err != PCIBIOS_SUCCESSFUL)
+               if (err)
                        err = -EIO;
                break;
 
@@ -114,7 +114,7 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
                if (err)
                        break;
                err = pci_user_write_config_word(dev, off, word);
-               if (err != PCIBIOS_SUCCESSFUL)
+               if (err)
                        err = -EIO;
                break;
 
@@ -123,7 +123,7 @@ SYSCALL_DEFINE5(pciconfig_write, unsigned long, bus, unsigned long, dfn,
                if (err)
                        break;
                err = pci_user_write_config_dword(dev, off, dword);
-               if (err != PCIBIOS_SUCCESSFUL)
+               if (err)
                        err = -EIO;
                break;
 
index c6fe0cf..2d75026 100644 (file)
@@ -26,7 +26,7 @@
 #include <xen/platform_pci.h>
 
 #include <asm/xen/swiotlb-xen.h>
-#define INVALID_GRANT_REF (0)
+
 #define INVALID_EVTCHN    (-1)
 
 struct pci_bus_entry {
@@ -42,7 +42,7 @@ struct pcifront_device {
        struct list_head root_buses;
 
        int evtchn;
-       int gnt_ref;
+       grant_ref_t gnt_ref;
 
        int irq;
 
index cf109d9..e693910 100644 (file)
@@ -1554,7 +1554,7 @@ static ssize_t pccard_show_cis(struct file *filp, struct kobject *kobj,
                if (off + count > size)
                        count = size - off;
 
-               s = to_socket(container_of(kobj, struct device, kobj));
+               s = to_socket(kobj_to_dev(kobj));
 
                if (!(s->state & SOCKET_PRESENT))
                        return -ENODEV;
@@ -1581,7 +1581,7 @@ static ssize_t pccard_store_cis(struct file *filp, struct kobject *kobj,
        if (error)
                return error;
 
-       s = to_socket(container_of(kobj, struct device, kobj));
+       s = to_socket(kobj_to_dev(kobj));
 
        if (off)
                return -EINVAL;
index 66ad5b3..f2a8550 100644 (file)
@@ -681,6 +681,7 @@ static int dmc620_pmu_device_probe(struct platform_device *pdev)
        if (!name) {
                dev_err(&pdev->dev,
                          "Create name failed, PMU @%pa\n", &res->start);
+               ret = -ENOMEM;
                goto out_teardown_dev;
        }
 
index 00dabe5..68d9c2f 100644 (file)
@@ -52,6 +52,7 @@ config PHY_XGENE
 config USB_LGM_PHY
        tristate "INTEL Lightning Mountain USB PHY Driver"
        depends on USB_SUPPORT
+       depends on X86 || COMPILE_TEST
        select USB_PHY
        select REGULATOR
        select REGULATOR_FIXED_VOLTAGE
index a1f1a9c..0925633 100644 (file)
@@ -91,10 +91,11 @@ config PHY_BRCM_SATA
 
 config PHY_BRCM_USB
        tristate "Broadcom STB USB PHY driver"
-       depends on ARCH_BRCMSTB || COMPILE_TEST
+       depends on ARCH_BCM4908 || ARCH_BRCMSTB || COMPILE_TEST
        depends on OF
        select GENERIC_PHY
        select SOC_BRCMSTB
+       default ARCH_BCM4908
        default ARCH_BRCMSTB
        help
          Enable this to support the Broadcom STB USB PHY.
index 3ecf413..769c707 100644 (file)
@@ -651,7 +651,7 @@ static int brcm_dsl_sata_init(struct brcm_sata_port *port)
                        break;
                msleep(20);
                try--;
-       };
+       }
 
        if (!try) {
                /* PLL did not lock; give up */
index 99fbc7e..116fb23 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/io.h>
 #include <linux/module.h>
 #include <linux/of.h>
+#include <linux/of_device.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/interrupt.h>
@@ -34,7 +35,7 @@ struct value_to_name_map {
 };
 
 struct match_chip_info {
-       void *init_func;
+       void (*init_func)(struct brcm_usb_init_params *params);
        u8 required_regs[BRCM_REGS_MAX + 1];
        u8 optional_reg;
 };
@@ -286,6 +287,10 @@ static const struct match_chip_info chip_info_7445 = {
 
 static const struct of_device_id brcm_usb_dt_ids[] = {
        {
+               .compatible = "brcm,bcm4908-usb-phy",
+               .data = &chip_info_7445,
+       },
+       {
                .compatible = "brcm,bcm7216-usb-phy",
                .data = &chip_info_7216,
        },
@@ -427,8 +432,6 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
        struct device_node *dn = pdev->dev.of_node;
        int err;
        const char *mode;
-       const struct of_device_id *match;
-       void (*dvr_init)(struct brcm_usb_init_params *params);
        const struct match_chip_info *info;
        struct regmap *rmap;
        int x;
@@ -441,10 +444,11 @@ static int brcm_usb_phy_probe(struct platform_device *pdev)
        priv->ini.family_id = brcmstb_get_family_id();
        priv->ini.product_id = brcmstb_get_product_id();
 
-       match = of_match_node(brcm_usb_dt_ids, dev->of_node);
-       info = match->data;
-       dvr_init = info->init_func;
-       (*dvr_init)(&priv->ini);
+       info = of_device_get_match_data(&pdev->dev);
+       if (!info)
+               return -ENOENT;
+
+       info->init_func(&priv->ini);
 
        dev_dbg(dev, "Best mapping table is for %s\n",
                priv->ini.family_name);
index f310e15..591a158 100644 (file)
@@ -2298,6 +2298,7 @@ static int cdns_torrent_phy_probe(struct platform_device *pdev)
 
        if (total_num_lanes > MAX_NUM_LANES) {
                dev_err(dev, "Invalid lane configuration\n");
+               ret = -EINVAL;
                goto put_lnk_rst;
        }
 
index 4d1587d..ea127b1 100644 (file)
 #define USBPCR1_PORT_RST                       BIT(21)
 #define USBPCR1_WORD_IF_16BIT          BIT(19)
 
-enum ingenic_usb_phy_version {
-       ID_JZ4770,
-       ID_JZ4775,
-       ID_JZ4780,
-       ID_X1000,
-       ID_X1830,
-       ID_X2000,
-};
-
 struct ingenic_soc_info {
-       enum ingenic_usb_phy_version version;
-
        void (*usb_phy_init)(struct phy *phy);
 };
 
@@ -300,38 +289,26 @@ static void x2000_usb_phy_init(struct phy *phy)
 }
 
 static const struct ingenic_soc_info jz4770_soc_info = {
-       .version = ID_JZ4770,
-
        .usb_phy_init = jz4770_usb_phy_init,
 };
 
 static const struct ingenic_soc_info jz4775_soc_info = {
-       .version = ID_JZ4775,
-
        .usb_phy_init = jz4775_usb_phy_init,
 };
 
 static const struct ingenic_soc_info jz4780_soc_info = {
-       .version = ID_JZ4780,
-
        .usb_phy_init = jz4780_usb_phy_init,
 };
 
 static const struct ingenic_soc_info x1000_soc_info = {
-       .version = ID_X1000,
-
        .usb_phy_init = x1000_usb_phy_init,
 };
 
 static const struct ingenic_soc_info x1830_soc_info = {
-       .version = ID_X1830,
-
        .usb_phy_init = x1830_usb_phy_init,
 };
 
 static const struct ingenic_soc_info x2000_soc_info = {
-       .version = ID_X2000,
-
        .usb_phy_init = x2000_usb_phy_init,
 };
 
index a7d1261..29d246e 100644 (file)
@@ -124,8 +124,16 @@ static int ltq_rcu_usb2_phy_power_on(struct phy *phy)
        reset_control_deassert(priv->phy_reset);
 
        ret = clk_prepare_enable(priv->phy_gate_clk);
-       if (ret)
+       if (ret) {
                dev_err(dev, "failed to enable PHY gate\n");
+               return ret;
+       }
+
+       /*
+        * at least the xrx200 usb2 phy requires some extra time to be
+        * operational after enabling the clock
+        */
+       usleep_range(100, 200);
 
        return ret;
 }
index 45be8aa..8313bd5 100644 (file)
@@ -201,6 +201,7 @@ static const struct of_device_id mtk_hdmi_phy_match[] = {
        },
        {},
 };
+MODULE_DEVICE_TABLE(of, mtk_hdmi_phy_match);
 
 static struct platform_driver mtk_hdmi_phy_driver = {
        .probe = mtk_hdmi_phy_probe,
index 18c4812..c51114d 100644 (file)
@@ -233,8 +233,9 @@ static const struct of_device_id mtk_mipi_tx_match[] = {
          .data = &mt8183_mipitx_data },
        { },
 };
+MODULE_DEVICE_TABLE(of, mtk_mipi_tx_match);
 
-struct platform_driver mtk_mipi_tx_driver = {
+static struct platform_driver mtk_mipi_tx_driver = {
        .probe = mtk_mipi_tx_probe,
        .remove = mtk_mipi_tx_remove,
        .driver = {
index 4728e2b..6ee478b 100644 (file)
@@ -143,7 +143,7 @@ static bool cpcap_usb_vbus_valid(struct cpcap_phy_ddata *ddata)
 
        error = iio_read_channel_processed(ddata->vbus, &value);
        if (error >= 0)
-               return value > 3900 ? true : false;
+               return value > 3900;
 
        dev_err(ddata->dev, "error reading VBUS: %i\n", error);
 
index 0939a9e..9cdebe7 100644 (file)
@@ -212,6 +212,15 @@ static const unsigned int qmp_v4_usb3_uniphy_regs_layout[QPHY_LAYOUT_SIZE] = {
        [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x614,
 };
 
+static const unsigned int sm8350_usb3_uniphy_regs_layout[QPHY_LAYOUT_SIZE] = {
+       [QPHY_SW_RESET]                 = 0x00,
+       [QPHY_START_CTRL]               = 0x44,
+       [QPHY_PCS_STATUS]               = 0x14,
+       [QPHY_PCS_POWER_DOWN_CONTROL]   = 0x40,
+       [QPHY_PCS_AUTONOMOUS_MODE_CTRL] = 0x1008,
+       [QPHY_PCS_LFPS_RXTERM_IRQ_CLEAR]  = 0x1014,
+};
+
 static const unsigned int sdm845_ufsphy_regs_layout[QPHY_LAYOUT_SIZE] = {
        [QPHY_START_CTRL]               = 0x00,
        [QPHY_PCS_READY_STATUS]         = 0x160,
@@ -1974,6 +1983,291 @@ static const struct qmp_phy_init_tbl sm8250_qmp_gen3x2_pcie_pcs_misc_tbl[] = {
        QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCIE_POWER_STATE_CONFIG4, 0x07),
 };
 
+static const struct qmp_phy_init_tbl sdx55_usb3_uniphy_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V4_TX_RCV_DETECT_LVL_2, 0x12),
+       QMP_PHY_INIT_CFG(QSERDES_V4_TX_LANE_MODE_1, 0xd5),
+       QMP_PHY_INIT_CFG(QSERDES_V4_TX_LANE_MODE_2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V4_TX_PI_QEC_CTRL, 0x20),
+       QMP_PHY_INIT_CFG(QSERDES_V4_TX_RES_CODE_LANE_OFFSET_TX, 0x08),
+};
+
+static const struct qmp_phy_init_tbl sdx55_usb3_uniphy_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH4, 0x26),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH3, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH2, 0xbf),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_HIGH, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_00_LOW, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH4, 0xb4),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH3, 0x7b),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH2, 0x5c),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_HIGH, 0xdc),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_MODE_01_LOW, 0xdc),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_PI_CONTROLS, 0x99),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_THRESH1, 0x048),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_THRESH2, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_GAIN1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SB2_GAIN2, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_FO_GAIN, 0x09),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_VGA_CAL_CNTRL1, 0x54),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_VGA_CAL_CNTRL2, 0x0c),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_DFE_EN_TIMER, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_CNTRL, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_SIGDET_DEGLITCH_CNTRL, 0x0e),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_UCDR_SO_GAIN, 0x05),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_DCC_CTRL1, 0x0c),
+       QMP_PHY_INIT_CFG(QSERDES_V4_RX_GM_CAL, 0x1f),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_serdes_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_SYSCLK_EN_SEL, 0xd9),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_SEL, 0x11),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP_EN, 0x42),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x02),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_IVCO, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_INITVAL2, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL, 0x11),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE0, 0x82),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE0, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE0, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE0, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE0, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE0, 0x19),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0, 0xac),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0, 0x1e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_DEC_START_MODE1, 0x98),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_CP_CTRL_MODE1, 0x14),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_RCTRL_MODE1, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_PLL_CCTRL_MODE1, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP1_MODE1, 0x65),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_LOCK_CMP2_MODE1, 0x1e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1, 0xdd),
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1, 0x23),
+
+       /* Rate B */
+       QMP_PHY_INIT_CFG(QSERDES_V5_COM_VCO_TUNE_MAP, 0x06),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_1_DIVIDER_BAND0_1, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_2_DIVIDER_BAND0_1, 0x03),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_3_DIVIDER_BAND0_1, 0x01),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_PWM_GEAR_4_DIVIDER_BAND0_1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xf5),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_3, 0x3f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x09),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x09),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_TRAN_DRVR_EMP_EN, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_LVL, 0x24),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_CNTRL, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x1e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_BAND, 0x18),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x5a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0xf1),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_TERM_BW, 0x1b),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL1, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x06),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x1a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x17),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_MEASURE_TIME, 0x10),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0x6d),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0x6d),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xed),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3b),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0x3c),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0xe0),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0xc8),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xc8),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x3b),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xb7),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_LOW, 0xe0),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH, 0xc8),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH2, 0xc8),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH3, 0x3b),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_10_HIGH4, 0xb7),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_DCC_CTRL1, 0x0c),
+};
+
+static const struct qmp_phy_init_tbl sm8350_ufsphy_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_SIGDET_CTRL2, 0x6d),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_LARGE_AMP_DRV_LVL, 0x0a),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_SMALL_AMP_DRV_LVL, 0x02),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_MID_TERM_CTRL1, 0x43),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_DEBUG_BUS_CLKSEL, 0x1f),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_MIN_HIBERN8_TIME, 0xff),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_PLL_CNTL, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_MSB, 0x16),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_LSB, 0xd8),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_PWM_GEAR_BAND, 0xaa),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_HS_GEAR_BAND, 0x06),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_TX_HSGEAR_CAPABILITY, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_HSGEAR_CAPABILITY, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_RX_SIGDET_CTRL1, 0x0e),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1, 0x02),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_TX, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_RX, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x16),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0x35),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_3, 0x3f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_5, 0x3f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RCV_DETECT_LVL_2, 0x12),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x21),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x05),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0x99),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN2, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL1, 0x54),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3, 0x4a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW, 0xc0),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_CNTRL, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x0e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0xbb),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0x7b),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xbb),
+       QMP_PHY_INIT_CFG_LANE(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3d, 1),
+       QMP_PHY_INIT_CFG_LANE(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0x3c, 2),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xdb),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0x64),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0x24),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xd2),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x13),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa9),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_EN_TIMER, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_AUX_DATA_TCOARSE_TFINE, 0xa0),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_DCC_CTRL1, 0x0c),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_VTH_CODE, 0x10),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_L, 0x40),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_H, 0x00),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG1, 0xd0),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG2, 0x07),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG3, 0x20),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG6, 0x13),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x21),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_CDR_RESET_TIME, 0x0a),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG1, 0x88),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG2, 0x13),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCS_TX_RX_CONFIG, 0x0c),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG1, 0x4b),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG5, 0x10),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL, 0xf8),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2, 0x07),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_tx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_1, 0xa5),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_2, 0x82),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_3, 0x3f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_LANE_MODE_4, 0x3f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_PI_QEC_CTRL, 0x21),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX, 0x10),
+       QMP_PHY_INIT_CFG(QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX, 0x0e),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_rx_tbl[] = {
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH4, 0xdc),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH3, 0xbd),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH2, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_HIGH, 0x7f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_00_LOW, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH4, 0xa9),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH3, 0x7b),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH2, 0xe4),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_HIGH, 0x24),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_MODE_01_LOW, 0x64),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_PI_CONTROLS, 0x99),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH1, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_THRESH2, 0x08),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN1, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SB2_GAIN2, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN, 0x2f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW, 0xff),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_FO_GAIN, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL1, 0x54),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_VGA_CAL_CNTRL2, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2, 0x0f),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4, 0x0a),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1, 0x47),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2, 0x80),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_CNTRL, 0x04),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL, 0x0e),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET, 0x38),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_UCDR_SO_GAIN, 0x05),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_GM_CAL, 0x00),
+       QMP_PHY_INIT_CFG(QSERDES_V5_RX_SIGDET_ENABLES, 0x00),
+};
+
+static const struct qmp_phy_init_tbl sm8350_usb3_uniphy_pcs_tbl[] = {
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG1, 0xd0),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG2, 0x07),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG3, 0x20),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_LOCK_DETECT_CONFIG6, 0x13),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_L, 0xe7),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_RCVR_DTCT_DLY_P1U2_H, 0x03),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_RX_SIGDET_LVL, 0xaa),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_PCS_TX_RX_CONFIG, 0x0c),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_UNI_RXEQTRAINING_DFE_TIME_S2, 0x07),
+       QMP_PHY_INIT_CFG(QPHY_V5_PCS_USB3_UNI_LFPS_DET_HIGH_COUNT_VAL, 0xf8),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_CDR_RESET_TIME, 0x0a),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG1, 0x88),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_ALIGN_DETECT_CONFIG2, 0x13),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG1, 0x4b),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_EQ_CONFIG5, 0x10),
+       QMP_PHY_INIT_CFG(QPHY_V4_PCS_REFGEN_REQ_CONFIG1, 0x21),
+};
+
 /* struct qmp_phy_cfg - per-PHY initialization config */
 struct qmp_phy_cfg {
        /* phy-type - PCIE/UFS/USB */
@@ -2183,6 +2477,11 @@ static const char * const sdm845_ufs_phy_clk_l[] = {
        "ref", "ref_aux",
 };
 
+/* usb3 phy on sdx55 doesn't have com_aux clock */
+static const char * const qmp_v4_sdx55_usbphy_clk_l[] = {
+       "aux", "cfg_ahb", "ref"
+};
+
 /* list of resets */
 static const char * const msm8996_pciephy_reset_l[] = {
        "phy", "common", "cfg",
@@ -2824,6 +3123,117 @@ static const struct qmp_phy_cfg sm8250_usb3_uniphy_cfg = {
        .pwrdn_delay_max        = POWER_DOWN_DELAY_US_MAX,
 };
 
+static const struct qmp_phy_cfg sdx55_usb3_uniphy_cfg = {
+       .type                   = PHY_TYPE_USB3,
+       .nlanes                 = 1,
+
+       .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(sm8150_usb3_uniphy_serdes_tbl),
+       .tx_tbl                 = sdx55_usb3_uniphy_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(sdx55_usb3_uniphy_tx_tbl),
+       .rx_tbl                 = sdx55_usb3_uniphy_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(sdx55_usb3_uniphy_rx_tbl),
+       .pcs_tbl                = sm8250_usb3_uniphy_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(sm8250_usb3_uniphy_pcs_tbl),
+       .clk_list               = qmp_v4_sdx55_usbphy_clk_l,
+       .num_clks               = ARRAY_SIZE(qmp_v4_sdx55_usbphy_clk_l),
+       .reset_list             = msm8996_usb3phy_reset_l,
+       .num_resets             = ARRAY_SIZE(msm8996_usb3phy_reset_l),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = qmp_v4_usb3_uniphy_regs_layout,
+
+       .start_ctrl             = SERDES_START | PCS_START,
+       .pwrdn_ctrl             = SW_PWRDN,
+
+       .has_pwrdn_delay        = true,
+       .pwrdn_delay_min        = POWER_DOWN_DELAY_US_MIN,
+       .pwrdn_delay_max        = POWER_DOWN_DELAY_US_MAX,
+};
+
+static const struct qmp_phy_cfg sm8350_ufsphy_cfg = {
+       .type                   = PHY_TYPE_UFS,
+       .nlanes                 = 2,
+
+       .serdes_tbl             = sm8350_ufsphy_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(sm8350_ufsphy_serdes_tbl),
+       .tx_tbl                 = sm8350_ufsphy_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(sm8350_ufsphy_tx_tbl),
+       .rx_tbl                 = sm8350_ufsphy_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(sm8350_ufsphy_rx_tbl),
+       .pcs_tbl                = sm8350_ufsphy_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(sm8350_ufsphy_pcs_tbl),
+       .clk_list               = sdm845_ufs_phy_clk_l,
+       .num_clks               = ARRAY_SIZE(sdm845_ufs_phy_clk_l),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = sm8150_ufsphy_regs_layout,
+
+       .start_ctrl             = SERDES_START,
+       .pwrdn_ctrl             = SW_PWRDN,
+
+       .is_dual_lane_phy       = true,
+};
+
+static const struct qmp_phy_cfg sm8350_usb3phy_cfg = {
+       .type                   = PHY_TYPE_USB3,
+       .nlanes                 = 1,
+
+       .serdes_tbl             = sm8150_usb3_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(sm8150_usb3_serdes_tbl),
+       .tx_tbl                 = sm8350_usb3_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(sm8350_usb3_tx_tbl),
+       .rx_tbl                 = sm8350_usb3_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(sm8350_usb3_rx_tbl),
+       .pcs_tbl                = sm8350_usb3_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(sm8350_usb3_pcs_tbl),
+       .clk_list               = qmp_v4_sm8250_usbphy_clk_l,
+       .num_clks               = ARRAY_SIZE(qmp_v4_sm8250_usbphy_clk_l),
+       .reset_list             = msm8996_usb3phy_reset_l,
+       .num_resets             = ARRAY_SIZE(msm8996_usb3phy_reset_l),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = qmp_v4_usb3phy_regs_layout,
+
+       .start_ctrl             = SERDES_START | PCS_START,
+       .pwrdn_ctrl             = SW_PWRDN,
+
+       .has_pwrdn_delay        = true,
+       .pwrdn_delay_min        = POWER_DOWN_DELAY_US_MIN,
+       .pwrdn_delay_max        = POWER_DOWN_DELAY_US_MAX,
+
+       .has_phy_dp_com_ctrl    = true,
+       .is_dual_lane_phy       = true,
+};
+
+static const struct qmp_phy_cfg sm8350_usb3_uniphy_cfg = {
+       .type                   = PHY_TYPE_USB3,
+       .nlanes                 = 1,
+
+       .serdes_tbl             = sm8150_usb3_uniphy_serdes_tbl,
+       .serdes_tbl_num         = ARRAY_SIZE(sm8150_usb3_uniphy_serdes_tbl),
+       .tx_tbl                 = sm8350_usb3_uniphy_tx_tbl,
+       .tx_tbl_num             = ARRAY_SIZE(sm8350_usb3_uniphy_tx_tbl),
+       .rx_tbl                 = sm8350_usb3_uniphy_rx_tbl,
+       .rx_tbl_num             = ARRAY_SIZE(sm8350_usb3_uniphy_rx_tbl),
+       .pcs_tbl                = sm8350_usb3_uniphy_pcs_tbl,
+       .pcs_tbl_num            = ARRAY_SIZE(sm8350_usb3_uniphy_pcs_tbl),
+       .clk_list               = qmp_v4_phy_clk_l,
+       .num_clks               = ARRAY_SIZE(qmp_v4_phy_clk_l),
+       .reset_list             = msm8996_usb3phy_reset_l,
+       .num_resets             = ARRAY_SIZE(msm8996_usb3phy_reset_l),
+       .vreg_list              = qmp_phy_vreg_l,
+       .num_vregs              = ARRAY_SIZE(qmp_phy_vreg_l),
+       .regs                   = sm8350_usb3_uniphy_regs_layout,
+
+       .start_ctrl             = SERDES_START | PCS_START,
+       .pwrdn_ctrl             = SW_PWRDN,
+
+       .has_pwrdn_delay        = true,
+       .pwrdn_delay_min        = POWER_DOWN_DELAY_US_MIN,
+       .pwrdn_delay_max        = POWER_DOWN_DELAY_US_MAX,
+};
+
 static void qcom_qmp_phy_configure_lane(void __iomem *base,
                                        const unsigned int *regs,
                                        const struct qmp_phy_init_tbl tbl[],
@@ -3135,7 +3545,7 @@ static int qcom_qmp_phy_configure_dp_phy(struct qmp_phy *qphy)
 static int qcom_qmp_dp_phy_calibrate(struct phy *phy)
 {
        struct qmp_phy *qphy = phy_get_drvdata(phy);
-       const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d };
+       static const u8 cfg1_settings[] = { 0x13, 0x23, 0x1d };
        u8 val;
 
        qphy->dp_aux_cfg++;
@@ -4129,6 +4539,12 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
                .compatible = "qcom,sc7180-qmp-usb3-dp-phy",
                /* It's a combo phy */
        }, {
+               .compatible = "qcom,sc8180x-qmp-ufs-phy",
+               .data = &sm8150_ufsphy_cfg,
+       }, {
+               .compatible = "qcom,sc8180x-qmp-usb3-phy",
+               .data = &sm8150_usb3phy_cfg,
+       }, {
                .compatible = "qcom,sdm845-qhp-pcie-phy",
                .data = &sdm845_qhp_pciephy_cfg,
        }, {
@@ -4171,8 +4587,20 @@ static const struct of_device_id qcom_qmp_phy_of_match_table[] = {
                .compatible = "qcom,sm8250-qmp-gen3x2-pcie-phy",
                .data = &sm8250_qmp_gen3x2_pciephy_cfg,
        }, {
+               .compatible = "qcom,sm8350-qmp-ufs-phy",
+               .data = &sm8350_ufsphy_cfg,
+       }, {
                .compatible = "qcom,sm8250-qmp-modem-pcie-phy",
                .data = &sm8250_qmp_gen3x2_pciephy_cfg,
+       }, {
+               .compatible = "qcom,sdx55-qmp-usb3-uni-phy",
+               .data = &sdx55_usb3_uniphy_cfg,
+       }, {
+               .compatible = "qcom,sm8350-qmp-usb3-phy",
+               .data = &sm8350_usb3phy_cfg,
+       }, {
+               .compatible = "qcom,sm8350-qmp-usb3-uni-phy",
+               .data = &sm8350_usb3_uniphy_cfg,
        },
        { },
 };
index db92a46..71ce3aa 100644 (file)
 #define QPHY_V4_PCS_PCIE_PRESET_P10_PRE                        0xbc
 #define QPHY_V4_PCS_PCIE_PRESET_P10_POST               0xe0
 
+/* Only for QMP V5 PHY - QSERDES COM registers */
+#define QSERDES_V5_COM_PLL_IVCO                                0x058
+#define QSERDES_V5_COM_CP_CTRL_MODE0                   0x074
+#define QSERDES_V5_COM_CP_CTRL_MODE1                   0x078
+#define QSERDES_V5_COM_PLL_RCTRL_MODE0                 0x07c
+#define QSERDES_V5_COM_PLL_RCTRL_MODE1                 0x080
+#define QSERDES_V5_COM_PLL_CCTRL_MODE0                 0x084
+#define QSERDES_V5_COM_PLL_CCTRL_MODE1                 0x088
+#define QSERDES_V5_COM_SYSCLK_EN_SEL                   0x094
+#define QSERDES_V5_COM_LOCK_CMP_EN                     0x0a4
+#define QSERDES_V5_COM_LOCK_CMP1_MODE0                 0x0ac
+#define QSERDES_V5_COM_LOCK_CMP2_MODE0                 0x0b0
+#define QSERDES_V5_COM_LOCK_CMP1_MODE1                 0x0b4
+#define QSERDES_V5_COM_DEC_START_MODE0                 0x0bc
+#define QSERDES_V5_COM_LOCK_CMP2_MODE1                 0x0b8
+#define QSERDES_V5_COM_DEC_START_MODE1                 0x0c4
+#define QSERDES_V5_COM_VCO_TUNE_MAP                    0x10c
+#define QSERDES_V5_COM_VCO_TUNE_INITVAL2               0x124
+#define QSERDES_V5_COM_HSCLK_SEL                       0x158
+#define QSERDES_V5_COM_HSCLK_HS_SWITCH_SEL             0x15c
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE0      0x1ac
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE0      0x1b0
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE1_MODE1      0x1b4
+#define QSERDES_V5_COM_BIN_VCOCAL_HSCLK_SEL            0x1bc
+#define QSERDES_V5_COM_BIN_VCOCAL_CMP_CODE2_MODE1      0x1b8
+
+/* Only for QMP V5 PHY - TX registers */
+#define QSERDES_V5_TX_RES_CODE_LANE_TX                 0x34
+#define QSERDES_V5_TX_RES_CODE_LANE_RX                 0x38
+#define QSERDES_V5_TX_RES_CODE_LANE_OFFSET_TX          0x3c
+#define QSERDES_V5_TX_RES_CODE_LANE_OFFSET_RX          0x40
+#define QSERDES_V5_TX_LANE_MODE_1                      0x84
+#define QSERDES_V5_TX_LANE_MODE_2                      0x88
+#define QSERDES_V5_TX_LANE_MODE_3                      0x8c
+#define QSERDES_V5_TX_LANE_MODE_4                      0x90
+#define QSERDES_V5_TX_LANE_MODE_5                      0x94
+#define QSERDES_V5_TX_RCV_DETECT_LVL_2                 0xa4
+#define QSERDES_V5_TX_TRAN_DRVR_EMP_EN                 0xc0
+#define QSERDES_V5_TX_PI_QEC_CTRL                      0xe4
+#define QSERDES_V5_TX_PWM_GEAR_1_DIVIDER_BAND0_1       0x178
+#define QSERDES_V5_TX_PWM_GEAR_2_DIVIDER_BAND0_1       0x17c
+#define QSERDES_V5_TX_PWM_GEAR_3_DIVIDER_BAND0_1       0x180
+#define QSERDES_V5_TX_PWM_GEAR_4_DIVIDER_BAND0_1       0x184
+
+/* Only for QMP V5 PHY - RX registers */
+#define QSERDES_V5_RX_UCDR_FO_GAIN                     0x008
+#define QSERDES_V5_RX_UCDR_SO_GAIN                     0x014
+#define QSERDES_V5_RX_UCDR_FASTLOCK_FO_GAIN            0x030
+#define QSERDES_V5_RX_UCDR_SO_SATURATION_AND_ENABLE    0x034
+#define QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_LOW          0x03c
+#define QSERDES_V5_RX_UCDR_FASTLOCK_COUNT_HIGH         0x040
+#define QSERDES_V5_RX_UCDR_PI_CONTROLS                 0x044
+#define QSERDES_V5_RX_UCDR_PI_CTRL2                    0x048
+#define QSERDES_V5_RX_UCDR_SB2_THRESH1                 0x04c
+#define QSERDES_V5_RX_UCDR_SB2_THRESH2                 0x050
+#define QSERDES_V5_RX_UCDR_SB2_GAIN1                   0x054
+#define QSERDES_V5_RX_UCDR_SB2_GAIN2                   0x058
+#define QSERDES_V5_RX_AUX_DATA_TCOARSE_TFINE           0x060
+#define QSERDES_V5_RX_RCLK_AUXDATA_SEL                 0x064
+#define QSERDES_V5_RX_AC_JTAG_ENABLE                   0x068
+#define QSERDES_V5_RX_AC_JTAG_MODE                     0x078
+#define QSERDES_V5_RX_RX_TERM_BW                       0x080
+#define QSERDES_V5_RX_VGA_CAL_CNTRL1                   0x0d4
+#define QSERDES_V5_RX_VGA_CAL_CNTRL2                   0x0d8
+#define QSERDES_V5_RX_GM_CAL                           0x0dc
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL1            0x0e8
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL2            0x0ec
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL3            0x0f0
+#define QSERDES_V5_RX_RX_EQU_ADAPTOR_CNTRL4            0x0f4
+#define QSERDES_V5_RX_RX_IDAC_TSETTLE_LOW              0x0f8
+#define QSERDES_V5_RX_RX_IDAC_TSETTLE_HIGH             0x0fc
+#define QSERDES_V5_RX_RX_IDAC_MEASURE_TIME             0x100
+#define QSERDES_V5_RX_RX_EQ_OFFSET_ADAPTOR_CNTRL1      0x110
+#define QSERDES_V5_RX_RX_OFFSET_ADAPTOR_CNTRL2         0x114
+#define QSERDES_V5_RX_SIGDET_ENABLES                   0x118
+#define QSERDES_V5_RX_SIGDET_CNTRL                     0x11c
+#define QSERDES_V5_RX_SIGDET_LVL                       0x120
+#define QSERDES_V5_RX_SIGDET_DEGLITCH_CNTRL            0x124
+#define QSERDES_V5_RX_RX_BAND                          0x128
+#define QSERDES_V5_RX_RX_MODE_00_LOW                   0x15c
+#define QSERDES_V5_RX_RX_MODE_00_HIGH                  0x160
+#define QSERDES_V5_RX_RX_MODE_00_HIGH2                 0x164
+#define QSERDES_V5_RX_RX_MODE_00_HIGH3                 0x168
+#define QSERDES_V5_RX_RX_MODE_00_HIGH4                 0x16c
+#define QSERDES_V5_RX_RX_MODE_01_LOW                   0x170
+#define QSERDES_V5_RX_RX_MODE_01_HIGH                  0x174
+#define QSERDES_V5_RX_RX_MODE_01_HIGH2                 0x178
+#define QSERDES_V5_RX_RX_MODE_01_HIGH3                 0x17c
+#define QSERDES_V5_RX_RX_MODE_01_HIGH4                 0x180
+#define QSERDES_V5_RX_RX_MODE_10_LOW                   0x184
+#define QSERDES_V5_RX_RX_MODE_10_HIGH                  0x188
+#define QSERDES_V5_RX_RX_MODE_10_HIGH2                 0x18c
+#define QSERDES_V5_RX_RX_MODE_10_HIGH3                 0x190
+#define QSERDES_V5_RX_RX_MODE_10_HIGH4                 0x194
+#define QSERDES_V5_RX_DFE_EN_TIMER                     0x1a0
+#define QSERDES_V5_RX_DFE_CTLE_POST_CAL_OFFSET         0x1a4
+#define QSERDES_V5_RX_DCC_CTRL1                                0x1a8
+#define QSERDES_V5_RX_VTH_CODE                         0x1b0
+
+/* Only for QMP V5 PHY - UFS PCS registers */
+#define QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_MSB   0x00c
+#define QPHY_V5_PCS_UFS_TIMER_20US_CORECLK_STEPS_LSB   0x010
+#define QPHY_V5_PCS_UFS_PLL_CNTL                       0x02c
+#define QPHY_V5_PCS_UFS_TX_LARGE_AMP_DRV_LVL           0x030
+#define QPHY_V5_PCS_UFS_TX_SMALL_AMP_DRV_LVL           0x038
+#define QPHY_V5_PCS_UFS_TX_HSGEAR_CAPABILITY           0x074
+#define QPHY_V5_PCS_UFS_RX_HSGEAR_CAPABILITY           0x0b4
+#define QPHY_V5_PCS_UFS_DEBUG_BUS_CLKSEL               0x124
+#define QPHY_V5_PCS_UFS_RX_MIN_HIBERN8_TIME            0x150
+#define QPHY_V5_PCS_UFS_RX_SIGDET_CTRL1                        0x154
+#define QPHY_V5_PCS_UFS_RX_SIGDET_CTRL2                        0x158
+#define QPHY_V5_PCS_UFS_TX_PWM_GEAR_BAND               0x160
+#define QPHY_V5_PCS_UFS_TX_HS_GEAR_BAND                        0x168
+#define QPHY_V5_PCS_UFS_TX_MID_TERM_CTRL1              0x1d8
+#define QPHY_V5_PCS_UFS_MULTI_LANE_CTRL1               0x1e0
+
+/* Only for QMP V5 PHY - USB3 have different offsets than V4 */
+#define QPHY_V5_PCS_USB3_POWER_STATE_CONFIG1           0x300
+#define QPHY_V5_PCS_USB3_AUTONOMOUS_MODE_STATUS                0x304
+#define QPHY_V5_PCS_USB3_AUTONOMOUS_MODE_CTRL          0x308
+#define QPHY_V5_PCS_USB3_AUTONOMOUS_MODE_CTRL2         0x30c
+#define QPHY_V5_PCS_USB3_LFPS_RXTERM_IRQ_SOURCE_STATUS 0x310
+#define QPHY_V5_PCS_USB3_LFPS_RXTERM_IRQ_CLEAR         0x314
+#define QPHY_V5_PCS_USB3_LFPS_DET_HIGH_COUNT_VAL       0x318
+#define QPHY_V5_PCS_USB3_LFPS_TX_ECSTART               0x31c
+#define QPHY_V5_PCS_USB3_LFPS_PER_TIMER_VAL            0x320
+#define QPHY_V5_PCS_USB3_LFPS_TX_END_CNT_U3_START      0x324
+#define QPHY_V5_PCS_USB3_LFPS_CONFIG1                  0x328
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_LOCK_TIME                0x32c
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_WAIT_TIME                0x330
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_CTLE_TIME                0x334
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_WAIT_TIME_S2     0x338
+#define QPHY_V5_PCS_USB3_RXEQTRAINING_DFE_TIME_S2      0x33c
+#define QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_L            0x340
+#define QPHY_V5_PCS_USB3_RCVR_DTCT_DLY_U3_H            0x344
+#define QPHY_V5_PCS_USB3_ARCVR_DTCT_EN_PERIOD          0x348
+#define QPHY_V5_PCS_USB3_ARCVR_DTCT_CM_DLY             0x34c
+#define QPHY_V5_PCS_USB3_TXONESZEROS_RUN_LENGTH                0x350
+#define QPHY_V5_PCS_USB3_ALFPS_DEGLITCH_VAL            0x354
+#define QPHY_V5_PCS_USB3_SIGDET_STARTUP_TIMER_VAL      0x358
+#define QPHY_V5_PCS_USB3_TEST_CONTROL                  0x35c
+#define QPHY_V5_PCS_USB3_RXTERMINATION_DLY_SEL         0x360
+
+/* Only for QMP V5 PHY - UNI has 0x1000 offset for PCS_USB3 regs */
+#define QPHY_V5_PCS_USB3_UNI_LFPS_DET_HIGH_COUNT_VAL   0x1018
+#define QPHY_V5_PCS_USB3_UNI_RXEQTRAINING_DFE_TIME_S2  0x103c
+
 #endif
index 1097922..8f1bf7e 100644 (file)
@@ -22,6 +22,7 @@
 
 #include <dt-bindings/phy/phy-qcom-qusb2.h>
 
+#define QUSB2PHY_PLL                   0x0
 #define QUSB2PHY_PLL_TEST              0x04
 #define CLK_REF_SEL                    BIT(7)
 
@@ -135,6 +136,35 @@ enum qusb2phy_reg_layout {
        QUSB2PHY_INTR_CTRL,
 };
 
+static const struct qusb2_phy_init_tbl ipq6018_init_tbl[] = {
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL, 0x14),
+       QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE1, 0xF8),
+       QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE2, 0xB3),
+       QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE3, 0x83),
+       QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE4, 0xC0),
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TUNE, 0x30),
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL1, 0x79),
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_USER_CTL2, 0x21),
+       QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TUNE5, 0x00),
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_PWR_CTRL, 0x00),
+       QUSB2_PHY_INIT_CFG_L(QUSB2PHY_PORT_TEST2, 0x14),
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_TEST, 0x80),
+       QUSB2_PHY_INIT_CFG(QUSB2PHY_PLL_AUTOPGM_CTL1, 0x9F),
+};
+
+static const unsigned int ipq6018_regs_layout[] = {
+       [QUSB2PHY_PLL_STATUS]              = 0x38,
+       [QUSB2PHY_PORT_TUNE1]              = 0x80,
+       [QUSB2PHY_PORT_TUNE2]              = 0x84,
+       [QUSB2PHY_PORT_TUNE3]              = 0x88,
+       [QUSB2PHY_PORT_TUNE4]              = 0x8C,
+       [QUSB2PHY_PORT_TUNE5]              = 0x90,
+       [QUSB2PHY_PORT_TEST1]              = 0x98,
+       [QUSB2PHY_PORT_TEST2]              = 0x9C,
+       [QUSB2PHY_PORT_POWERDOWN]          = 0xB4,
+       [QUSB2PHY_INTR_CTRL]               = 0xBC,
+};
+
 static const unsigned int msm8996_regs_layout[] = {
        [QUSB2PHY_PLL_STATUS]           = 0x38,
        [QUSB2PHY_PORT_TUNE1]           = 0x80,
@@ -245,6 +275,9 @@ struct qusb2_phy_cfg {
 
        /* true if PHY has PLL_CORE_INPUT_OVERRIDE register to reset PLL */
        bool has_pll_override;
+
+       /* true if PHY default clk scheme is single-ended */
+       bool se_clk_scheme_default;
 };
 
 static const struct qusb2_phy_cfg msm8996_phy_cfg = {
@@ -253,6 +286,7 @@ static const struct qusb2_phy_cfg msm8996_phy_cfg = {
        .regs           = msm8996_regs_layout,
 
        .has_pll_test   = true,
+       .se_clk_scheme_default = true,
        .disable_ctrl   = (CLAMP_N_EN | FREEZIO_N | POWER_DOWN),
        .mask_core_ready = PLL_LOCKED,
        .autoresume_en   = BIT(3),
@@ -266,10 +300,22 @@ static const struct qusb2_phy_cfg msm8998_phy_cfg = {
        .disable_ctrl   = POWER_DOWN,
        .mask_core_ready = CORE_READY_STATUS,
        .has_pll_override = true,
+       .se_clk_scheme_default = true,
        .autoresume_en   = BIT(0),
        .update_tune1_with_efuse = true,
 };
 
+static const struct qusb2_phy_cfg ipq6018_phy_cfg = {
+       .tbl            = ipq6018_init_tbl,
+       .tbl_num        = ARRAY_SIZE(ipq6018_init_tbl),
+       .regs           = ipq6018_regs_layout,
+
+       .disable_ctrl   = POWER_DOWN,
+       .mask_core_ready = PLL_LOCKED,
+       /* autoresume not used */
+       .autoresume_en   = BIT(0),
+};
+
 static const struct qusb2_phy_cfg qusb2_v2_phy_cfg = {
        .tbl            = qusb2_v2_init_tbl,
        .tbl_num        = ARRAY_SIZE(qusb2_v2_init_tbl),
@@ -279,10 +325,23 @@ static const struct qusb2_phy_cfg qusb2_v2_phy_cfg = {
                           POWER_DOWN),
        .mask_core_ready = CORE_READY_STATUS,
        .has_pll_override = true,
+       .se_clk_scheme_default = true,
        .autoresume_en    = BIT(0),
        .update_tune1_with_efuse = true,
 };
 
+static const struct qusb2_phy_cfg sdm660_phy_cfg = {
+       .tbl            = msm8996_init_tbl,
+       .tbl_num        = ARRAY_SIZE(msm8996_init_tbl),
+       .regs           = msm8996_regs_layout,
+
+       .has_pll_test   = true,
+       .se_clk_scheme_default = false,
+       .disable_ctrl   = (CLAMP_N_EN | FREEZIO_N | POWER_DOWN),
+       .mask_core_ready = PLL_LOCKED,
+       .autoresume_en   = BIT(3),
+};
+
 static const char * const qusb2_phy_vreg_names[] = {
        "vdda-pll", "vdda-phy-dpdm",
 };
@@ -701,8 +760,13 @@ static int qusb2_phy_init(struct phy *phy)
        /* Required to get phy pll lock successfully */
        usleep_range(150, 160);
 
-       /* Default is single-ended clock on msm8996 */
-       qphy->has_se_clk_scheme = true;
+       /*
+        * Not all the SoCs have got a readable TCSR_PHY_CLK_SCHEME
+        * register in the TCSR so, if there's none, use the default
+        * value hardcoded in the configuration.
+        */
+       qphy->has_se_clk_scheme = cfg->se_clk_scheme_default;
+
        /*
         * read TCSR_PHY_CLK_SCHEME register to check if single-ended
         * clock scheme is selected. If yes, then disable differential
@@ -810,6 +874,9 @@ static const struct phy_ops qusb2_phy_gen_ops = {
 
 static const struct of_device_id qusb2_phy_of_match_table[] = {
        {
+               .compatible     = "qcom,ipq6018-qusb2-phy",
+               .data           = &ipq6018_phy_cfg,
+       }, {
                .compatible     = "qcom,ipq8074-qusb2-phy",
                .data           = &msm8996_phy_cfg,
        }, {
@@ -819,6 +886,9 @@ static const struct of_device_id qusb2_phy_of_match_table[] = {
                .compatible     = "qcom,msm8998-qusb2-phy",
                .data           = &msm8998_phy_cfg,
        }, {
+               .compatible     = "qcom,sdm660-qusb2-phy",
+               .data           = &sdm660_phy_cfg,
+       }, {
                /*
                 * Deprecated. Only here to support legacy device
                 * trees that didn't include "qcom,qusb2-v2-phy"
index a52a9bf..8807e59 100644 (file)
@@ -401,13 +401,26 @@ static const struct hsphy_init_seq init_seq_femtophy[] = {
        HSPHY_INIT_CFG(0x90, 0x60, 0),
 };
 
+static const struct hsphy_init_seq init_seq_mdm9607[] = {
+       HSPHY_INIT_CFG(0x80, 0x44, 0),
+       HSPHY_INIT_CFG(0x81, 0x38, 0),
+       HSPHY_INIT_CFG(0x82, 0x24, 0),
+       HSPHY_INIT_CFG(0x83, 0x13, 0),
+};
+
 static const struct hsphy_data hsphy_data_femtophy = {
        .init_seq = init_seq_femtophy,
        .init_seq_num = ARRAY_SIZE(init_seq_femtophy),
 };
 
+static const struct hsphy_data hsphy_data_mdm9607 = {
+       .init_seq = init_seq_mdm9607,
+       .init_seq_num = ARRAY_SIZE(init_seq_mdm9607),
+};
+
 static const struct of_device_id qcom_snps_hsphy_match[] = {
        { .compatible = "qcom,usb-hs-28nm-femtophy", .data = &hsphy_data_femtophy, },
+       { .compatible = "qcom,usb-hs-28nm-mdm9607", .data = &hsphy_data_mdm9607, },
        { },
 };
 MODULE_DEVICE_TABLE(of, qcom_snps_hsphy_match);
index 1e424f2..20023f6 100644 (file)
@@ -248,15 +248,17 @@ static int rockchip_emmc_phy_init(struct phy *phy)
         * - SDHCI driver to get the PHY
         * - SDHCI driver to init the PHY
         *
-        * The clock is optional, so upon any error we just set to NULL.
+        * The clock is optional, using clk_get_optional() to get the clock
+        * and do error processing if the return value != NULL
         *
         * NOTE: we don't do anything special for EPROBE_DEFER here.  Given the
         * above expected use case, EPROBE_DEFER isn't sensible to expect, so
         * it's just like any other error.
         */
-       rk_phy->emmcclk = clk_get(&phy->dev, "emmcclk");
+       rk_phy->emmcclk = clk_get_optional(&phy->dev, "emmcclk");
        if (IS_ERR(rk_phy->emmcclk)) {
-               dev_dbg(&phy->dev, "Error getting emmcclk: %d\n", ret);
+               ret = PTR_ERR(rk_phy->emmcclk);
+               dev_err(&phy->dev, "Error getting emmcclk: %d\n", ret);
                rk_phy->emmcclk = NULL;
        }
 
@@ -380,10 +382,10 @@ static int rockchip_emmc_phy_probe(struct platform_device *pdev)
        if (!of_property_read_u32(dev->of_node, "drive-impedance-ohm", &val))
                rk_phy->drive_impedance = convert_drive_impedance_ohm(pdev, val);
 
-       if (of_property_read_bool(dev->of_node, "enable-strobe-pulldown"))
+       if (of_property_read_bool(dev->of_node, "rockchip,enable-strobe-pulldown"))
                rk_phy->enable_strobe_pulldown = PHYCTRL_REN_STRB_ENABLE;
 
-       if (!of_property_read_u32(dev->of_node, "output-tapdelay-select", &val)) {
+       if (!of_property_read_u32(dev->of_node, "rockchip,output-tapdelay-select", &val)) {
                if (val <= PHYCTRL_OTAPDLYSEL_MAXVALUE)
                        rk_phy->output_tapdelay_select = val;
                else
index a54317e..d08fbb1 100644 (file)
@@ -8,7 +8,7 @@
 #include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/io.h>
+#include <linux/iopoll.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/of_platform.h>
@@ -17,6 +17,7 @@
 
 #define STM32_USBPHYC_PLL      0x0
 #define STM32_USBPHYC_MISC     0x8
+#define STM32_USBPHYC_MONITOR(X) (0x108 + ((X) * 0x100))
 #define STM32_USBPHYC_VERSION  0x3F4
 
 /* STM32_USBPHYC_PLL bit fields */
 /* STM32_USBPHYC_MISC bit fields */
 #define SWITHOST               BIT(0)
 
+/* STM32_USBPHYC_MONITOR bit fields */
+#define STM32_USBPHYC_MON_OUT  GENMASK(3, 0)
+#define STM32_USBPHYC_MON_SEL  GENMASK(8, 4)
+#define STM32_USBPHYC_MON_SEL_LOCKP 0x1F
+#define STM32_USBPHYC_MON_OUT_LOCKP BIT(3)
+
 /* STM32_USBPHYC_VERSION bit fields */
 #define MINREV                 GENMASK(3, 0)
 #define MAJREV                 GENMASK(7, 4)
 
-static const char * const supplies_names[] = {
-       "vdda1v1",      /* 1V1 */
-       "vdda1v8",      /* 1V8 */
-};
-
-#define NUM_SUPPLIES           ARRAY_SIZE(supplies_names)
-
-#define PLL_LOCK_TIME_US       100
-#define PLL_PWR_DOWN_TIME_US   5
 #define PLL_FVCO_MHZ           2880
 #define PLL_INFF_MIN_RATE_HZ   19200000
 #define PLL_INFF_MAX_RATE_HZ   38400000
@@ -58,7 +56,6 @@ struct pll_params {
 struct stm32_usbphyc_phy {
        struct phy *phy;
        struct stm32_usbphyc *usbphyc;
-       struct regulator_bulk_data supplies[NUM_SUPPLIES];
        u32 index;
        bool active;
 };
@@ -70,6 +67,9 @@ struct stm32_usbphyc {
        struct reset_control *rst;
        struct stm32_usbphyc_phy **phys;
        int nphys;
+       struct regulator *vdda1v1;
+       struct regulator *vdda1v8;
+       atomic_t n_pll_cons;
        int switch_setup;
 };
 
@@ -83,6 +83,41 @@ static inline void stm32_usbphyc_clr_bits(void __iomem *reg, u32 bits)
        writel_relaxed(readl_relaxed(reg) & ~bits, reg);
 }
 
+static int stm32_usbphyc_regulators_enable(struct stm32_usbphyc *usbphyc)
+{
+       int ret;
+
+       ret = regulator_enable(usbphyc->vdda1v1);
+       if (ret)
+               return ret;
+
+       ret = regulator_enable(usbphyc->vdda1v8);
+       if (ret)
+               goto vdda1v1_disable;
+
+       return 0;
+
+vdda1v1_disable:
+       regulator_disable(usbphyc->vdda1v1);
+
+       return ret;
+}
+
+static int stm32_usbphyc_regulators_disable(struct stm32_usbphyc *usbphyc)
+{
+       int ret;
+
+       ret = regulator_disable(usbphyc->vdda1v8);
+       if (ret)
+               return ret;
+
+       ret = regulator_disable(usbphyc->vdda1v1);
+       if (ret)
+               return ret;
+
+       return 0;
+}
+
 static void stm32_usbphyc_get_pll_params(u32 clk_rate,
                                         struct pll_params *pll_params)
 {
@@ -142,83 +177,106 @@ static int stm32_usbphyc_pll_init(struct stm32_usbphyc *usbphyc)
        return 0;
 }
 
-static bool stm32_usbphyc_has_one_phy_active(struct stm32_usbphyc *usbphyc)
+static int __stm32_usbphyc_pll_disable(struct stm32_usbphyc *usbphyc)
 {
-       int i;
+       void __iomem *pll_reg = usbphyc->base + STM32_USBPHYC_PLL;
+       u32 pllen;
+
+       stm32_usbphyc_clr_bits(pll_reg, PLLEN);
 
-       for (i = 0; i < usbphyc->nphys; i++)
-               if (usbphyc->phys[i]->active)
-                       return true;
+       /* Wait for minimum width of powerdown pulse (ENABLE = Low) */
+       if (readl_relaxed_poll_timeout(pll_reg, pllen, !(pllen & PLLEN), 5, 50))
+               dev_err(usbphyc->dev, "PLL not reset\n");
 
-       return false;
+       return stm32_usbphyc_regulators_disable(usbphyc);
+}
+
+static int stm32_usbphyc_pll_disable(struct stm32_usbphyc *usbphyc)
+{
+       /* Check if a phy port is still active or clk48 in use */
+       if (atomic_dec_return(&usbphyc->n_pll_cons) > 0)
+               return 0;
+
+       return __stm32_usbphyc_pll_disable(usbphyc);
 }
 
 static int stm32_usbphyc_pll_enable(struct stm32_usbphyc *usbphyc)
 {
        void __iomem *pll_reg = usbphyc->base + STM32_USBPHYC_PLL;
-       bool pllen = (readl_relaxed(pll_reg) & PLLEN);
+       bool pllen = readl_relaxed(pll_reg) & PLLEN;
        int ret;
 
-       /* Check if one phy port has already configured the pll */
-       if (pllen && stm32_usbphyc_has_one_phy_active(usbphyc))
+       /*
+        * Check if a phy port or clk48 prepare has configured the pll
+        * and ensure the PLL is enabled
+        */
+       if (atomic_inc_return(&usbphyc->n_pll_cons) > 1 && pllen)
                return 0;
 
        if (pllen) {
-               stm32_usbphyc_clr_bits(pll_reg, PLLEN);
-               /* Wait for minimum width of powerdown pulse (ENABLE = Low) */
-               udelay(PLL_PWR_DOWN_TIME_US);
+               /*
+                * PLL shouldn't be enabled without known consumer,
+                * disable it and reinit n_pll_cons
+                */
+               dev_warn(usbphyc->dev, "PLL enabled without known consumers\n");
+
+               ret = __stm32_usbphyc_pll_disable(usbphyc);
+               if (ret)
+                       return ret;
        }
 
+       ret = stm32_usbphyc_regulators_enable(usbphyc);
+       if (ret)
+               goto dec_n_pll_cons;
+
        ret = stm32_usbphyc_pll_init(usbphyc);
        if (ret)
-               return ret;
+               goto reg_disable;
 
        stm32_usbphyc_set_bits(pll_reg, PLLEN);
 
-       /* Wait for maximum lock time */
-       udelay(PLL_LOCK_TIME_US);
-
-       if (!(readl_relaxed(pll_reg) & PLLEN)) {
-               dev_err(usbphyc->dev, "PLLEN not set\n");
-               return -EIO;
-       }
-
        return 0;
-}
-
-static int stm32_usbphyc_pll_disable(struct stm32_usbphyc *usbphyc)
-{
-       void __iomem *pll_reg = usbphyc->base + STM32_USBPHYC_PLL;
 
-       /* Check if other phy port active */
-       if (stm32_usbphyc_has_one_phy_active(usbphyc))
-               return 0;
-
-       stm32_usbphyc_clr_bits(pll_reg, PLLEN);
-       /* Wait for minimum width of powerdown pulse (ENABLE = Low) */
-       udelay(PLL_PWR_DOWN_TIME_US);
+reg_disable:
+       stm32_usbphyc_regulators_disable(usbphyc);
 
-       if (readl_relaxed(pll_reg) & PLLEN) {
-               dev_err(usbphyc->dev, "PLL not reset\n");
-               return -EIO;
-       }
+dec_n_pll_cons:
+       atomic_dec(&usbphyc->n_pll_cons);
 
-       return 0;
+       return ret;
 }
 
 static int stm32_usbphyc_phy_init(struct phy *phy)
 {
        struct stm32_usbphyc_phy *usbphyc_phy = phy_get_drvdata(phy);
        struct stm32_usbphyc *usbphyc = usbphyc_phy->usbphyc;
+       u32 reg_mon = STM32_USBPHYC_MONITOR(usbphyc_phy->index);
+       u32 monsel = FIELD_PREP(STM32_USBPHYC_MON_SEL,
+                               STM32_USBPHYC_MON_SEL_LOCKP);
+       u32 monout;
        int ret;
 
        ret = stm32_usbphyc_pll_enable(usbphyc);
        if (ret)
                return ret;
 
+       /* Check that PLL Lock input to PHY is High */
+       writel_relaxed(monsel, usbphyc->base + reg_mon);
+       ret = readl_relaxed_poll_timeout(usbphyc->base + reg_mon, monout,
+                                        (monout & STM32_USBPHYC_MON_OUT_LOCKP),
+                                        100, 1000);
+       if (ret) {
+               dev_err(usbphyc->dev, "PLL Lock input to PHY is Low (val=%x)\n",
+                       (u32)(monout & STM32_USBPHYC_MON_OUT));
+               goto pll_disable;
+       }
+
        usbphyc_phy->active = true;
 
        return 0;
+
+pll_disable:
+       return stm32_usbphyc_pll_disable(usbphyc);
 }
 
 static int stm32_usbphyc_phy_exit(struct phy *phy)
@@ -231,25 +289,9 @@ static int stm32_usbphyc_phy_exit(struct phy *phy)
        return stm32_usbphyc_pll_disable(usbphyc);
 }
 
-static int stm32_usbphyc_phy_power_on(struct phy *phy)
-{
-       struct stm32_usbphyc_phy *usbphyc_phy = phy_get_drvdata(phy);
-
-       return regulator_bulk_enable(NUM_SUPPLIES, usbphyc_phy->supplies);
-}
-
-static int stm32_usbphyc_phy_power_off(struct phy *phy)
-{
-       struct stm32_usbphyc_phy *usbphyc_phy = phy_get_drvdata(phy);
-
-       return regulator_bulk_disable(NUM_SUPPLIES, usbphyc_phy->supplies);
-}
-
 static const struct phy_ops stm32_usbphyc_phy_ops = {
        .init = stm32_usbphyc_phy_init,
        .exit = stm32_usbphyc_phy_exit,
-       .power_on = stm32_usbphyc_phy_power_on,
-       .power_off = stm32_usbphyc_phy_power_off,
        .owner = THIS_MODULE,
 };
 
@@ -312,7 +354,7 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
        struct device *dev = &pdev->dev;
        struct device_node *child, *np = dev->of_node;
        struct phy_provider *phy_provider;
-       u32 version;
+       u32 pllen, version;
        int ret, port = 0;
 
        usbphyc = devm_kzalloc(dev, sizeof(*usbphyc), GFP_KERNEL);
@@ -344,6 +386,19 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
                ret = PTR_ERR(usbphyc->rst);
                if (ret == -EPROBE_DEFER)
                        goto clk_disable;
+
+               stm32_usbphyc_clr_bits(usbphyc->base + STM32_USBPHYC_PLL, PLLEN);
+       }
+
+       /*
+        * Wait for minimum width of powerdown pulse (ENABLE = Low):
+        * we have to ensure the PLL is disabled before phys initialization.
+        */
+       if (readl_relaxed_poll_timeout(usbphyc->base + STM32_USBPHYC_PLL,
+                                      pllen, !(pllen & PLLEN), 5, 50)) {
+               dev_warn(usbphyc->dev, "PLL not reset\n");
+               ret = -EPROBE_DEFER;
+               goto clk_disable;
        }
 
        usbphyc->switch_setup = -EINVAL;
@@ -355,11 +410,26 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
                goto clk_disable;
        }
 
+       usbphyc->vdda1v1 = devm_regulator_get(dev, "vdda1v1");
+       if (IS_ERR(usbphyc->vdda1v1)) {
+               ret = PTR_ERR(usbphyc->vdda1v1);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "failed to get vdda1v1 supply: %d\n", ret);
+               goto clk_disable;
+       }
+
+       usbphyc->vdda1v8 = devm_regulator_get(dev, "vdda1v8");
+       if (IS_ERR(usbphyc->vdda1v8)) {
+               ret = PTR_ERR(usbphyc->vdda1v8);
+               if (ret != -EPROBE_DEFER)
+                       dev_err(dev, "failed to get vdda1v8 supply: %d\n", ret);
+               goto clk_disable;
+       }
+
        for_each_child_of_node(np, child) {
                struct stm32_usbphyc_phy *usbphyc_phy;
                struct phy *phy;
                u32 index;
-               int i;
 
                phy = devm_phy_create(dev, child, &stm32_usbphyc_phy_ops);
                if (IS_ERR(phy)) {
@@ -377,18 +447,6 @@ static int stm32_usbphyc_probe(struct platform_device *pdev)
                        goto put_child;
                }
 
-               for (i = 0; i < NUM_SUPPLIES; i++)
-                       usbphyc_phy->supplies[i].supply = supplies_names[i];
-
-               ret = devm_regulator_bulk_get(&phy->dev, NUM_SUPPLIES,
-                                             usbphyc_phy->supplies);
-               if (ret) {
-                       if (ret != -EPROBE_DEFER)
-                               dev_err(&phy->dev,
-                                       "failed to get regulators: %d\n", ret);
-                       goto put_child;
-               }
-
                ret = of_property_read_u32(child, "reg", &index);
                if (ret || index > usbphyc->nphys) {
                        dev_err(&phy->dev, "invalid reg property: %d\n", ret);
@@ -432,6 +490,12 @@ clk_disable:
 static int stm32_usbphyc_remove(struct platform_device *pdev)
 {
        struct stm32_usbphyc *usbphyc = dev_get_drvdata(&pdev->dev);
+       int port;
+
+       /* Ensure PHYs are not active, to allow PLL disabling */
+       for (port = 0; port < usbphyc->nphys; port++)
+               if (usbphyc->phys[port]->active)
+                       stm32_usbphyc_phy_exit(usbphyc->phys[port]->phy);
 
        clk_disable_unprepare(usbphyc->clk);
 
index 2b0f921..2b65f84 100644 (file)
@@ -874,13 +874,10 @@ static int xpsgtr_get_ref_clocks(struct xpsgtr_dev *gtr_dev)
 
                snprintf(name, sizeof(name), "ref%u", refclk);
                clk = devm_clk_get_optional(gtr_dev->dev, name);
-               if (IS_ERR(clk)) {
-                       if (PTR_ERR(clk) != -EPROBE_DEFER)
-                               dev_err(gtr_dev->dev,
-                                       "Failed to get reference clock %u: %ld\n",
-                                       refclk, PTR_ERR(clk));
-                       return PTR_ERR(clk);
-               }
+               if (IS_ERR(clk))
+                       return dev_err_probe(gtr_dev->dev, PTR_ERR(clk),
+                                            "Failed to get reference clock %u\n",
+                                            refclk);
 
                if (!clk)
                        continue;
index 03c62e1..b7675cc 100644 (file)
@@ -370,6 +370,19 @@ config PINCTRL_MICROCHIP_SGPIO
          connect control signals from SFP modules and to act as an
          LED controller.
 
+config PINCTRL_K210
+       bool "Pinctrl driver for the Canaan Kendryte K210 SoC"
+       depends on RISCV && SOC_CANAAN && OF
+       select GENERIC_PINMUX_FUNCTIONS
+       select GENERIC_PINCONF
+       select GPIOLIB
+       select OF_GPIO
+       select REGMAP_MMIO
+       default SOC_CANAAN
+       help
+         Add support for the Canaan Kendryte K210 RISC-V SOC Field
+         Programmable IO Array (FPIOA) controller.
+
 source "drivers/pinctrl/actions/Kconfig"
 source "drivers/pinctrl/aspeed/Kconfig"
 source "drivers/pinctrl/bcm/Kconfig"
index efc96f2..8bf459c 100644 (file)
@@ -45,6 +45,7 @@ obj-$(CONFIG_PINCTRL_RK805)   += pinctrl-rk805.o
 obj-$(CONFIG_PINCTRL_OCELOT)   += pinctrl-ocelot.o
 obj-$(CONFIG_PINCTRL_MICROCHIP_SGPIO)  += pinctrl-microchip-sgpio.o
 obj-$(CONFIG_PINCTRL_EQUILIBRIUM)   += pinctrl-equilibrium.o
+obj-$(CONFIG_PINCTRL_K210)     += pinctrl-k210.o
 
 obj-y                          += actions/
 obj-$(CONFIG_ARCH_ASPEED)      += aspeed/
index 8085782..9f3361c 100644 (file)
@@ -1357,6 +1357,7 @@ static int intel_pinctrl_add_padgroups_by_gpps(struct intel_pinctrl *pctrl,
                                gpps[i].gpio_base = 0;
                                break;
                        case INTEL_GPIO_BASE_NOMAP:
+                               break;
                        default:
                                break;
                }
@@ -1393,6 +1394,7 @@ static int intel_pinctrl_add_padgroups_by_size(struct intel_pinctrl *pctrl,
                gpps[i].size = min(gpp_size, npins);
                npins -= gpps[i].size;
 
+               gpps[i].gpio_base = gpps[i].base;
                gpps[i].padown_num = padown_num;
 
                /*
@@ -1491,8 +1493,13 @@ static int intel_pinctrl_probe(struct platform_device *pdev,
                if (IS_ERR(regs))
                        return PTR_ERR(regs);
 
-               /* Determine community features based on the revision */
+               /*
+                * Determine community features based on the revision.
+                * A value of all ones means the device is not present.
+                */
                value = readl(regs + REVID);
+               if (value == ~0u)
+                       return -ENODEV;
                if (((value & REVID_MASK) >> REVID_SHIFT) >= 0x94) {
                        community->features |= PINCTRL_FEATURE_DEBOUNCE;
                        community->features |= PINCTRL_FEATURE_1K_PD;
diff --git a/drivers/pinctrl/pinctrl-k210.c b/drivers/pinctrl/pinctrl-k210.c
new file mode 100644 (file)
index 0000000..8a733cf
--- /dev/null
@@ -0,0 +1,985 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (C) 2020 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <linux/io.h>
+#include <linux/of_device.h>
+#include <linux/clk.h>
+#include <linux/mfd/syscon.h>
+#include <linux/platform_device.h>
+#include <linux/bitfield.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/pinctrl/pinctrl.h>
+#include <linux/pinctrl/pinmux.h>
+#include <linux/pinctrl/pinconf.h>
+#include <linux/pinctrl/pinconf-generic.h>
+#include <linux/io.h>
+
+#include <dt-bindings/pinctrl/k210-fpioa.h>
+
+#include "core.h"
+#include "pinconf.h"
+#include "pinctrl-utils.h"
+
+/*
+ * The K210 only implements 8 drive levels, even though
+ * there is register space for 16
+ */
+#define K210_PC_DRIVE_MASK     GENMASK(11, 8)
+#define K210_PC_DRIVE_SHIFT    8
+#define K210_PC_DRIVE_0                (0 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_1                (1 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_2                (2 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_3                (3 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_4                (4 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_5                (5 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_6                (6 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_7                (7 << K210_PC_DRIVE_SHIFT)
+#define K210_PC_DRIVE_MAX      7
+#define K210_PC_MODE_MASK      GENMASK(23, 12)
+
+/*
+ * output enabled == PC_OE & (PC_OE_INV ^ FUNCTION_OE)
+ * where FUNCTION_OE is a physical signal from the function.
+ */
+#define K210_PC_OE             BIT(12) /* Output Enable */
+#define K210_PC_OE_INV         BIT(13) /* INVert Output Enable */
+#define K210_PC_DO_OE          BIT(14) /* set Data Out to Output Enable sig */
+#define K210_PC_DO_INV         BIT(15) /* INVert final Data Output */
+#define K210_PC_PU             BIT(16) /* Pull Up */
+#define K210_PC_PD             BIT(17) /* Pull Down */
+/* Strong pull up not implemented on K210 */
+#define K210_PC_SL             BIT(19) /* reduce SLew rate */
+/* Same semantics as OE above */
+#define K210_PC_IE             BIT(20) /* Input Enable */
+#define K210_PC_IE_INV         BIT(21) /* INVert Input Enable */
+#define K210_PC_DI_INV         BIT(22) /* INVert Data Input */
+#define K210_PC_ST             BIT(23) /* Schmitt Trigger */
+#define K210_PC_DI             BIT(31) /* raw Data Input */
+
+#define K210_PC_BIAS_MASK      (K210_PC_PU & K210_PC_PD)
+
+#define K210_PC_MODE_IN                (K210_PC_IE | K210_PC_ST)
+#define K210_PC_MODE_OUT       (K210_PC_DRIVE_7 | K210_PC_OE)
+#define K210_PC_MODE_I2C       (K210_PC_MODE_IN | K210_PC_SL | \
+                                K210_PC_OE | K210_PC_PU)
+#define K210_PC_MODE_SCCB      (K210_PC_MODE_I2C | \
+                                K210_PC_OE_INV | K210_PC_IE_INV)
+#define K210_PC_MODE_SPI       (K210_PC_MODE_IN | K210_PC_IE_INV | \
+                                K210_PC_MODE_OUT | K210_PC_OE_INV)
+#define K210_PC_MODE_GPIO      (K210_PC_MODE_IN | K210_PC_MODE_OUT)
+
+#define K210_PG_FUNC           GENMASK(7, 0)
+#define K210_PG_DO             BIT(8)
+#define K210_PG_PIN            GENMASK(22, 16)
+
+/*
+ * struct k210_fpioa: Kendryte K210 FPIOA memory mapped registers
+ * @pins: 48 32-bits IO pin registers
+ * @tie_en: 256 (one per function) input tie enable bits
+ * @tie_val: 256 (one per function) input tie value bits
+ */
+struct k210_fpioa {
+       u32 pins[48];
+       u32 tie_en[8];
+       u32 tie_val[8];
+};
+
+struct k210_fpioa_data {
+
+       struct device *dev;
+       struct pinctrl_dev *pctl;
+
+       struct k210_fpioa __iomem *fpioa;
+       struct regmap *sysctl_map;
+       u32 power_offset;
+       struct clk *clk;
+       struct clk *pclk;
+};
+
+#define K210_PIN_NAME(i)       ("IO_" #i)
+#define K210_PIN(i)            [(i)] = PINCTRL_PIN((i), K210_PIN_NAME(i))
+
+static const struct pinctrl_pin_desc k210_pins[] = {
+       K210_PIN(0),  K210_PIN(1),  K210_PIN(2),
+       K210_PIN(3),  K210_PIN(4),  K210_PIN(5),
+       K210_PIN(6),  K210_PIN(7),  K210_PIN(8),
+       K210_PIN(9),  K210_PIN(10), K210_PIN(11),
+       K210_PIN(12), K210_PIN(13), K210_PIN(14),
+       K210_PIN(15), K210_PIN(16), K210_PIN(17),
+       K210_PIN(18), K210_PIN(19), K210_PIN(20),
+       K210_PIN(21), K210_PIN(22), K210_PIN(23),
+       K210_PIN(24), K210_PIN(25), K210_PIN(26),
+       K210_PIN(27), K210_PIN(28), K210_PIN(29),
+       K210_PIN(30), K210_PIN(31), K210_PIN(32),
+       K210_PIN(33), K210_PIN(34), K210_PIN(35),
+       K210_PIN(36), K210_PIN(37), K210_PIN(38),
+       K210_PIN(39), K210_PIN(40), K210_PIN(41),
+       K210_PIN(42), K210_PIN(43), K210_PIN(44),
+       K210_PIN(45), K210_PIN(46), K210_PIN(47)
+};
+
+#define K210_NPINS ARRAY_SIZE(k210_pins)
+
+/*
+ * Pin groups: each of the 48 programmable pins is a group.
+ * To this are added 8 power domain groups, which for the purposes of
+ * the pin subsystem, contain no pins. The power domain groups only exist
+ * to set the power level. The id should never be used (since there are
+ * no pins 48-55).
+ */
+static const char *const k210_group_names[] = {
+       /* The first 48 groups are for pins, one each */
+       K210_PIN_NAME(0),  K210_PIN_NAME(1),  K210_PIN_NAME(2),
+       K210_PIN_NAME(3),  K210_PIN_NAME(4),  K210_PIN_NAME(5),
+       K210_PIN_NAME(6),  K210_PIN_NAME(7),  K210_PIN_NAME(8),
+       K210_PIN_NAME(9),  K210_PIN_NAME(10), K210_PIN_NAME(11),
+       K210_PIN_NAME(12), K210_PIN_NAME(13), K210_PIN_NAME(14),
+       K210_PIN_NAME(15), K210_PIN_NAME(16), K210_PIN_NAME(17),
+       K210_PIN_NAME(18), K210_PIN_NAME(19), K210_PIN_NAME(20),
+       K210_PIN_NAME(21), K210_PIN_NAME(22), K210_PIN_NAME(23),
+       K210_PIN_NAME(24), K210_PIN_NAME(25), K210_PIN_NAME(26),
+       K210_PIN_NAME(27), K210_PIN_NAME(28), K210_PIN_NAME(29),
+       K210_PIN_NAME(30), K210_PIN_NAME(31), K210_PIN_NAME(32),
+       K210_PIN_NAME(33), K210_PIN_NAME(34), K210_PIN_NAME(35),
+       K210_PIN_NAME(36), K210_PIN_NAME(37), K210_PIN_NAME(38),
+       K210_PIN_NAME(39), K210_PIN_NAME(40), K210_PIN_NAME(41),
+       K210_PIN_NAME(42), K210_PIN_NAME(43), K210_PIN_NAME(44),
+       K210_PIN_NAME(45), K210_PIN_NAME(46), K210_PIN_NAME(47),
+       [48] = "A0", [49] = "A1", [50] = "A2",
+       [51] = "B3", [52] = "B4", [53] = "B5",
+       [54] = "C6", [55] = "C7"
+};
+
+#define K210_NGROUPS   ARRAY_SIZE(k210_group_names)
+
+enum k210_pinctrl_mode_id {
+       K210_PC_DEFAULT_DISABLED,
+       K210_PC_DEFAULT_IN,
+       K210_PC_DEFAULT_IN_TIE,
+       K210_PC_DEFAULT_OUT,
+       K210_PC_DEFAULT_I2C,
+       K210_PC_DEFAULT_SCCB,
+       K210_PC_DEFAULT_SPI,
+       K210_PC_DEFAULT_GPIO,
+       K210_PC_DEFAULT_INT13,
+};
+
+#define K210_PC_DEFAULT(mode) \
+       [K210_PC_DEFAULT_##mode] = K210_PC_MODE_##mode
+
+static const u32 k210_pinconf_mode_id_to_mode[] = {
+       [K210_PC_DEFAULT_DISABLED] = 0,
+       K210_PC_DEFAULT(IN),
+       [K210_PC_DEFAULT_IN_TIE] = K210_PC_MODE_IN,
+       K210_PC_DEFAULT(OUT),
+       K210_PC_DEFAULT(I2C),
+       K210_PC_DEFAULT(SCCB),
+       K210_PC_DEFAULT(SPI),
+       K210_PC_DEFAULT(GPIO),
+       [K210_PC_DEFAULT_INT13] = K210_PC_MODE_IN | K210_PC_PU,
+};
+
+#undef DEFAULT
+
+/*
+ * Pin functions configuration information.
+ */
+struct k210_pcf_info {
+       char name[15];
+       u8 mode_id;
+};
+
+#define K210_FUNC(id, mode)                            \
+       [K210_PCF_##id] = {                             \
+               .name = #id,                            \
+               .mode_id = K210_PC_DEFAULT_##mode       \
+       }
+
+static const struct k210_pcf_info k210_pcf_infos[] = {
+       K210_FUNC(JTAG_TCLK,            IN),
+       K210_FUNC(JTAG_TDI,             IN),
+       K210_FUNC(JTAG_TMS,             IN),
+       K210_FUNC(JTAG_TDO,             OUT),
+       K210_FUNC(SPI0_D0,              SPI),
+       K210_FUNC(SPI0_D1,              SPI),
+       K210_FUNC(SPI0_D2,              SPI),
+       K210_FUNC(SPI0_D3,              SPI),
+       K210_FUNC(SPI0_D4,              SPI),
+       K210_FUNC(SPI0_D5,              SPI),
+       K210_FUNC(SPI0_D6,              SPI),
+       K210_FUNC(SPI0_D7,              SPI),
+       K210_FUNC(SPI0_SS0,             OUT),
+       K210_FUNC(SPI0_SS1,             OUT),
+       K210_FUNC(SPI0_SS2,             OUT),
+       K210_FUNC(SPI0_SS3,             OUT),
+       K210_FUNC(SPI0_ARB,             IN_TIE),
+       K210_FUNC(SPI0_SCLK,            OUT),
+       K210_FUNC(UARTHS_RX,            IN),
+       K210_FUNC(UARTHS_TX,            OUT),
+       K210_FUNC(RESV6,                IN),
+       K210_FUNC(RESV7,                IN),
+       K210_FUNC(CLK_SPI1,             OUT),
+       K210_FUNC(CLK_I2C1,             OUT),
+       K210_FUNC(GPIOHS0,              GPIO),
+       K210_FUNC(GPIOHS1,              GPIO),
+       K210_FUNC(GPIOHS2,              GPIO),
+       K210_FUNC(GPIOHS3,              GPIO),
+       K210_FUNC(GPIOHS4,              GPIO),
+       K210_FUNC(GPIOHS5,              GPIO),
+       K210_FUNC(GPIOHS6,              GPIO),
+       K210_FUNC(GPIOHS7,              GPIO),
+       K210_FUNC(GPIOHS8,              GPIO),
+       K210_FUNC(GPIOHS9,              GPIO),
+       K210_FUNC(GPIOHS10,             GPIO),
+       K210_FUNC(GPIOHS11,             GPIO),
+       K210_FUNC(GPIOHS12,             GPIO),
+       K210_FUNC(GPIOHS13,             GPIO),
+       K210_FUNC(GPIOHS14,             GPIO),
+       K210_FUNC(GPIOHS15,             GPIO),
+       K210_FUNC(GPIOHS16,             GPIO),
+       K210_FUNC(GPIOHS17,             GPIO),
+       K210_FUNC(GPIOHS18,             GPIO),
+       K210_FUNC(GPIOHS19,             GPIO),
+       K210_FUNC(GPIOHS20,             GPIO),
+       K210_FUNC(GPIOHS21,             GPIO),
+       K210_FUNC(GPIOHS22,             GPIO),
+       K210_FUNC(GPIOHS23,             GPIO),
+       K210_FUNC(GPIOHS24,             GPIO),
+       K210_FUNC(GPIOHS25,             GPIO),
+       K210_FUNC(GPIOHS26,             GPIO),
+       K210_FUNC(GPIOHS27,             GPIO),
+       K210_FUNC(GPIOHS28,             GPIO),
+       K210_FUNC(GPIOHS29,             GPIO),
+       K210_FUNC(GPIOHS30,             GPIO),
+       K210_FUNC(GPIOHS31,             GPIO),
+       K210_FUNC(GPIO0,                GPIO),
+       K210_FUNC(GPIO1,                GPIO),
+       K210_FUNC(GPIO2,                GPIO),
+       K210_FUNC(GPIO3,                GPIO),
+       K210_FUNC(GPIO4,                GPIO),
+       K210_FUNC(GPIO5,                GPIO),
+       K210_FUNC(GPIO6,                GPIO),
+       K210_FUNC(GPIO7,                GPIO),
+       K210_FUNC(UART1_RX,             IN),
+       K210_FUNC(UART1_TX,             OUT),
+       K210_FUNC(UART2_RX,             IN),
+       K210_FUNC(UART2_TX,             OUT),
+       K210_FUNC(UART3_RX,             IN),
+       K210_FUNC(UART3_TX,             OUT),
+       K210_FUNC(SPI1_D0,              SPI),
+       K210_FUNC(SPI1_D1,              SPI),
+       K210_FUNC(SPI1_D2,              SPI),
+       K210_FUNC(SPI1_D3,              SPI),
+       K210_FUNC(SPI1_D4,              SPI),
+       K210_FUNC(SPI1_D5,              SPI),
+       K210_FUNC(SPI1_D6,              SPI),
+       K210_FUNC(SPI1_D7,              SPI),
+       K210_FUNC(SPI1_SS0,             OUT),
+       K210_FUNC(SPI1_SS1,             OUT),
+       K210_FUNC(SPI1_SS2,             OUT),
+       K210_FUNC(SPI1_SS3,             OUT),
+       K210_FUNC(SPI1_ARB,             IN_TIE),
+       K210_FUNC(SPI1_SCLK,            OUT),
+       K210_FUNC(SPI2_D0,              SPI),
+       K210_FUNC(SPI2_SS,              IN),
+       K210_FUNC(SPI2_SCLK,            IN),
+       K210_FUNC(I2S0_MCLK,            OUT),
+       K210_FUNC(I2S0_SCLK,            OUT),
+       K210_FUNC(I2S0_WS,              OUT),
+       K210_FUNC(I2S0_IN_D0,           IN),
+       K210_FUNC(I2S0_IN_D1,           IN),
+       K210_FUNC(I2S0_IN_D2,           IN),
+       K210_FUNC(I2S0_IN_D3,           IN),
+       K210_FUNC(I2S0_OUT_D0,          OUT),
+       K210_FUNC(I2S0_OUT_D1,          OUT),
+       K210_FUNC(I2S0_OUT_D2,          OUT),
+       K210_FUNC(I2S0_OUT_D3,          OUT),
+       K210_FUNC(I2S1_MCLK,            OUT),
+       K210_FUNC(I2S1_SCLK,            OUT),
+       K210_FUNC(I2S1_WS,              OUT),
+       K210_FUNC(I2S1_IN_D0,           IN),
+       K210_FUNC(I2S1_IN_D1,           IN),
+       K210_FUNC(I2S1_IN_D2,           IN),
+       K210_FUNC(I2S1_IN_D3,           IN),
+       K210_FUNC(I2S1_OUT_D0,          OUT),
+       K210_FUNC(I2S1_OUT_D1,          OUT),
+       K210_FUNC(I2S1_OUT_D2,          OUT),
+       K210_FUNC(I2S1_OUT_D3,          OUT),
+       K210_FUNC(I2S2_MCLK,            OUT),
+       K210_FUNC(I2S2_SCLK,            OUT),
+       K210_FUNC(I2S2_WS,              OUT),
+       K210_FUNC(I2S2_IN_D0,           IN),
+       K210_FUNC(I2S2_IN_D1,           IN),
+       K210_FUNC(I2S2_IN_D2,           IN),
+       K210_FUNC(I2S2_IN_D3,           IN),
+       K210_FUNC(I2S2_OUT_D0,          OUT),
+       K210_FUNC(I2S2_OUT_D1,          OUT),
+       K210_FUNC(I2S2_OUT_D2,          OUT),
+       K210_FUNC(I2S2_OUT_D3,          OUT),
+       K210_FUNC(RESV0,                DISABLED),
+       K210_FUNC(RESV1,                DISABLED),
+       K210_FUNC(RESV2,                DISABLED),
+       K210_FUNC(RESV3,                DISABLED),
+       K210_FUNC(RESV4,                DISABLED),
+       K210_FUNC(RESV5,                DISABLED),
+       K210_FUNC(I2C0_SCLK,            I2C),
+       K210_FUNC(I2C0_SDA,             I2C),
+       K210_FUNC(I2C1_SCLK,            I2C),
+       K210_FUNC(I2C1_SDA,             I2C),
+       K210_FUNC(I2C2_SCLK,            I2C),
+       K210_FUNC(I2C2_SDA,             I2C),
+       K210_FUNC(DVP_XCLK,             OUT),
+       K210_FUNC(DVP_RST,              OUT),
+       K210_FUNC(DVP_PWDN,             OUT),
+       K210_FUNC(DVP_VSYNC,            IN),
+       K210_FUNC(DVP_HSYNC,            IN),
+       K210_FUNC(DVP_PCLK,             IN),
+       K210_FUNC(DVP_D0,               IN),
+       K210_FUNC(DVP_D1,               IN),
+       K210_FUNC(DVP_D2,               IN),
+       K210_FUNC(DVP_D3,               IN),
+       K210_FUNC(DVP_D4,               IN),
+       K210_FUNC(DVP_D5,               IN),
+       K210_FUNC(DVP_D6,               IN),
+       K210_FUNC(DVP_D7,               IN),
+       K210_FUNC(SCCB_SCLK,            SCCB),
+       K210_FUNC(SCCB_SDA,             SCCB),
+       K210_FUNC(UART1_CTS,            IN),
+       K210_FUNC(UART1_DSR,            IN),
+       K210_FUNC(UART1_DCD,            IN),
+       K210_FUNC(UART1_RI,             IN),
+       K210_FUNC(UART1_SIR_IN,         IN),
+       K210_FUNC(UART1_DTR,            OUT),
+       K210_FUNC(UART1_RTS,            OUT),
+       K210_FUNC(UART1_OUT2,           OUT),
+       K210_FUNC(UART1_OUT1,           OUT),
+       K210_FUNC(UART1_SIR_OUT,        OUT),
+       K210_FUNC(UART1_BAUD,           OUT),
+       K210_FUNC(UART1_RE,             OUT),
+       K210_FUNC(UART1_DE,             OUT),
+       K210_FUNC(UART1_RS485_EN,       OUT),
+       K210_FUNC(UART2_CTS,            IN),
+       K210_FUNC(UART2_DSR,            IN),
+       K210_FUNC(UART2_DCD,            IN),
+       K210_FUNC(UART2_RI,             IN),
+       K210_FUNC(UART2_SIR_IN,         IN),
+       K210_FUNC(UART2_DTR,            OUT),
+       K210_FUNC(UART2_RTS,            OUT),
+       K210_FUNC(UART2_OUT2,           OUT),
+       K210_FUNC(UART2_OUT1,           OUT),
+       K210_FUNC(UART2_SIR_OUT,        OUT),
+       K210_FUNC(UART2_BAUD,           OUT),
+       K210_FUNC(UART2_RE,             OUT),
+       K210_FUNC(UART2_DE,             OUT),
+       K210_FUNC(UART2_RS485_EN,       OUT),
+       K210_FUNC(UART3_CTS,            IN),
+       K210_FUNC(UART3_DSR,            IN),
+       K210_FUNC(UART3_DCD,            IN),
+       K210_FUNC(UART3_RI,             IN),
+       K210_FUNC(UART3_SIR_IN,         IN),
+       K210_FUNC(UART3_DTR,            OUT),
+       K210_FUNC(UART3_RTS,            OUT),
+       K210_FUNC(UART3_OUT2,           OUT),
+       K210_FUNC(UART3_OUT1,           OUT),
+       K210_FUNC(UART3_SIR_OUT,        OUT),
+       K210_FUNC(UART3_BAUD,           OUT),
+       K210_FUNC(UART3_RE,             OUT),
+       K210_FUNC(UART3_DE,             OUT),
+       K210_FUNC(UART3_RS485_EN,       OUT),
+       K210_FUNC(TIMER0_TOGGLE1,       OUT),
+       K210_FUNC(TIMER0_TOGGLE2,       OUT),
+       K210_FUNC(TIMER0_TOGGLE3,       OUT),
+       K210_FUNC(TIMER0_TOGGLE4,       OUT),
+       K210_FUNC(TIMER1_TOGGLE1,       OUT),
+       K210_FUNC(TIMER1_TOGGLE2,       OUT),
+       K210_FUNC(TIMER1_TOGGLE3,       OUT),
+       K210_FUNC(TIMER1_TOGGLE4,       OUT),
+       K210_FUNC(TIMER2_TOGGLE1,       OUT),
+       K210_FUNC(TIMER2_TOGGLE2,       OUT),
+       K210_FUNC(TIMER2_TOGGLE3,       OUT),
+       K210_FUNC(TIMER2_TOGGLE4,       OUT),
+       K210_FUNC(CLK_SPI2,             OUT),
+       K210_FUNC(CLK_I2C2,             OUT),
+       K210_FUNC(INTERNAL0,            OUT),
+       K210_FUNC(INTERNAL1,            OUT),
+       K210_FUNC(INTERNAL2,            OUT),
+       K210_FUNC(INTERNAL3,            OUT),
+       K210_FUNC(INTERNAL4,            OUT),
+       K210_FUNC(INTERNAL5,            OUT),
+       K210_FUNC(INTERNAL6,            OUT),
+       K210_FUNC(INTERNAL7,            OUT),
+       K210_FUNC(INTERNAL8,            OUT),
+       K210_FUNC(INTERNAL9,            IN),
+       K210_FUNC(INTERNAL10,           IN),
+       K210_FUNC(INTERNAL11,           IN),
+       K210_FUNC(INTERNAL12,           IN),
+       K210_FUNC(INTERNAL13,           INT13),
+       K210_FUNC(INTERNAL14,           I2C),
+       K210_FUNC(INTERNAL15,           IN),
+       K210_FUNC(INTERNAL16,           IN),
+       K210_FUNC(INTERNAL17,           IN),
+       K210_FUNC(CONSTANT,             DISABLED),
+       K210_FUNC(INTERNAL18,           IN),
+       K210_FUNC(DEBUG0,               OUT),
+       K210_FUNC(DEBUG1,               OUT),
+       K210_FUNC(DEBUG2,               OUT),
+       K210_FUNC(DEBUG3,               OUT),
+       K210_FUNC(DEBUG4,               OUT),
+       K210_FUNC(DEBUG5,               OUT),
+       K210_FUNC(DEBUG6,               OUT),
+       K210_FUNC(DEBUG7,               OUT),
+       K210_FUNC(DEBUG8,               OUT),
+       K210_FUNC(DEBUG9,               OUT),
+       K210_FUNC(DEBUG10,              OUT),
+       K210_FUNC(DEBUG11,              OUT),
+       K210_FUNC(DEBUG12,              OUT),
+       K210_FUNC(DEBUG13,              OUT),
+       K210_FUNC(DEBUG14,              OUT),
+       K210_FUNC(DEBUG15,              OUT),
+       K210_FUNC(DEBUG16,              OUT),
+       K210_FUNC(DEBUG17,              OUT),
+       K210_FUNC(DEBUG18,              OUT),
+       K210_FUNC(DEBUG19,              OUT),
+       K210_FUNC(DEBUG20,              OUT),
+       K210_FUNC(DEBUG21,              OUT),
+       K210_FUNC(DEBUG22,              OUT),
+       K210_FUNC(DEBUG23,              OUT),
+       K210_FUNC(DEBUG24,              OUT),
+       K210_FUNC(DEBUG25,              OUT),
+       K210_FUNC(DEBUG26,              OUT),
+       K210_FUNC(DEBUG27,              OUT),
+       K210_FUNC(DEBUG28,              OUT),
+       K210_FUNC(DEBUG29,              OUT),
+       K210_FUNC(DEBUG30,              OUT),
+       K210_FUNC(DEBUG31,              OUT),
+};
+
+#define PIN_CONFIG_OUTPUT_INVERT       (PIN_CONFIG_END + 1)
+#define PIN_CONFIG_INPUT_INVERT                (PIN_CONFIG_END + 2)
+
+static const struct pinconf_generic_params k210_pinconf_custom_params[] = {
+       { "output-polarity-invert", PIN_CONFIG_OUTPUT_INVERT, 1 },
+       { "input-polarity-invert",  PIN_CONFIG_INPUT_INVERT, 1 },
+};
+
+/*
+ * Max drive strength in uA.
+ */
+static const int k210_pinconf_drive_strength[] = {
+       [0] = 11200,
+       [1] = 16800,
+       [2] = 22300,
+       [3] = 27800,
+       [4] = 33300,
+       [5] = 38700,
+       [6] = 44100,
+       [7] = 49500,
+};
+
+static int k210_pinconf_get_drive(unsigned int max_strength_ua)
+{
+       int i;
+
+       for (i = K210_PC_DRIVE_MAX; i; i--) {
+               if (k210_pinconf_drive_strength[i] <= max_strength_ua)
+                       return i;
+       }
+
+       return -EINVAL;
+}
+
+static void k210_pinmux_set_pin_function(struct pinctrl_dev *pctldev,
+                                        u32 pin, u32 func)
+{
+       struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+       const struct k210_pcf_info *info = &k210_pcf_infos[func];
+       u32 mode = k210_pinconf_mode_id_to_mode[info->mode_id];
+       u32 val = func | mode;
+
+       dev_dbg(pdata->dev, "set pin %u function %s (%u) -> 0x%08x\n",
+               pin, info->name, func, val);
+
+       writel(val, &pdata->fpioa->pins[pin]);
+}
+
+static int k210_pinconf_set_param(struct pinctrl_dev *pctldev,
+                                 unsigned int pin,
+                                 unsigned int param, unsigned int arg)
+{
+       struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+       u32 val = readl(&pdata->fpioa->pins[pin]);
+       int drive;
+
+       dev_dbg(pdata->dev, "set pin %u param %u, arg 0x%x\n",
+               pin, param, arg);
+
+       switch (param) {
+       case PIN_CONFIG_BIAS_DISABLE:
+               val &= ~K210_PC_BIAS_MASK;
+               break;
+       case PIN_CONFIG_BIAS_PULL_DOWN:
+               if (!arg)
+                       return -EINVAL;
+               val |= K210_PC_PD;
+               break;
+       case PIN_CONFIG_BIAS_PULL_UP:
+               if (!arg)
+                       return -EINVAL;
+               val |= K210_PC_PD;
+               break;
+       case PIN_CONFIG_DRIVE_STRENGTH:
+               arg *= 1000;
+               fallthrough;
+       case PIN_CONFIG_DRIVE_STRENGTH_UA:
+               drive = k210_pinconf_get_drive(arg);
+               if (drive < 0)
+                       return drive;
+               val &= ~K210_PC_DRIVE_MASK;
+               val |= FIELD_PREP(K210_PC_DRIVE_MASK, drive);
+               break;
+       case PIN_CONFIG_INPUT_ENABLE:
+               if (arg)
+                       val |= K210_PC_IE;
+               else
+                       val &= ~K210_PC_IE;
+               break;
+       case PIN_CONFIG_INPUT_SCHMITT_ENABLE:
+               if (arg)
+                       val |= K210_PC_ST;
+               else
+                       val &= ~K210_PC_ST;
+               break;
+       case PIN_CONFIG_OUTPUT:
+               k210_pinmux_set_pin_function(pctldev, pin, K210_PCF_CONSTANT);
+               val = readl(&pdata->fpioa->pins[pin]);
+               val |= K210_PC_MODE_OUT;
+               if (!arg)
+                       val |= K210_PC_DO_INV;
+               break;
+       case PIN_CONFIG_OUTPUT_ENABLE:
+               if (arg)
+                       val |= K210_PC_OE;
+               else
+                       val &= ~K210_PC_OE;
+               break;
+       case PIN_CONFIG_SLEW_RATE:
+               if (arg)
+                       val |= K210_PC_SL;
+               else
+                       val &= ~K210_PC_SL;
+               break;
+       case PIN_CONFIG_OUTPUT_INVERT:
+               if (arg)
+                       val |= K210_PC_DO_INV;
+               else
+                       val &= ~K210_PC_DO_INV;
+               break;
+       case PIN_CONFIG_INPUT_INVERT:
+               if (arg)
+                       val |= K210_PC_DI_INV;
+               else
+                       val &= ~K210_PC_DI_INV;
+               break;
+       default:
+               return -EINVAL;
+       }
+
+       writel(val, &pdata->fpioa->pins[pin]);
+
+       return 0;
+}
+
+static int k210_pinconf_set(struct pinctrl_dev *pctldev, unsigned int pin,
+                           unsigned long *configs, unsigned int num_configs)
+{
+       unsigned int param, arg;
+       int i, ret;
+
+       if (WARN_ON(pin >= K210_NPINS))
+               return -EINVAL;
+
+       for (i = 0; i < num_configs; i++) {
+               param = pinconf_to_config_param(configs[i]);
+               arg = pinconf_to_config_argument(configs[i]);
+               ret = k210_pinconf_set_param(pctldev, pin, param, arg);
+               if (ret)
+                       return ret;
+       }
+
+       return 0;
+}
+
+static void k210_pinconf_dbg_show(struct pinctrl_dev *pctldev,
+                                 struct seq_file *s, unsigned int pin)
+{
+       struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+
+       seq_printf(s, "%#x", readl(&pdata->fpioa->pins[pin]));
+}
+
+static int k210_pinconf_group_set(struct pinctrl_dev *pctldev,
+                                 unsigned int selector, unsigned long *configs,
+                                 unsigned int num_configs)
+{
+       struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+       unsigned int param, arg;
+       u32 bit;
+       int i;
+
+       /* Pins should be configured with pinmux, not groups*/
+       if (selector < K210_NPINS)
+               return -EINVAL;
+
+       /* Otherwise it's a power domain */
+       for (i = 0; i < num_configs; i++) {
+               param = pinconf_to_config_param(configs[i]);
+               if (param != PIN_CONFIG_POWER_SOURCE)
+                       return -EINVAL;
+
+               arg = pinconf_to_config_argument(configs[i]);
+               bit = BIT(selector - K210_NPINS);
+               regmap_update_bits(pdata->sysctl_map,
+                                  pdata->power_offset,
+                                  bit, arg ? bit : 0);
+       }
+
+       return 0;
+}
+
+static void k210_pinconf_group_dbg_show(struct pinctrl_dev *pctldev,
+                                       struct seq_file *s,
+                                       unsigned int selector)
+{
+       struct k210_fpioa_data *pdata = pinctrl_dev_get_drvdata(pctldev);
+       int ret;
+       u32 val;
+
+       if (selector < K210_NPINS)
+               return k210_pinconf_dbg_show(pctldev, s, selector);
+
+       ret = regmap_read(pdata->sysctl_map, pdata->power_offset, &val);
+       if (ret) {
+               dev_err(pdata->dev, "Failed to read power reg\n");
+               return;
+       }
+
+       seq_printf(s, "%s: %s V", k210_group_names[selector],
+                  val & BIT(selector - K210_NPINS) ? "1.8" : "3.3");
+}
+
+static const struct pinconf_ops k210_pinconf_ops = {
+       .is_generic = true,
+       .pin_config_set = k210_pinconf_set,
+       .pin_config_group_set = k210_pinconf_group_set,
+       .pin_config_dbg_show = k210_pinconf_dbg_show,
+       .pin_config_group_dbg_show = k210_pinconf_group_dbg_show,
+};
+
+static int k210_pinmux_get_function_count(struct pinctrl_dev *pctldev)
+{
+       return ARRAY_SIZE(k210_pcf_infos);
+}
+
+static const char *k210_pinmux_get_function_name(struct pinctrl_dev *pctldev,
+                                                unsigned int selector)
+{
+       return k210_pcf_infos[selector].name;
+}
+
+static int k210_pinmux_get_function_groups(struct pinctrl_dev *pctldev,
+                                          unsigned int selector,
+                                          const char * const **groups,
+                                          unsigned int * const num_groups)
+{
+       /* Any function can be mapped to any pin */
+       *groups = k210_group_names;
+       *num_groups = K210_NPINS;
+
+       return 0;
+}
+
+static int k210_pinmux_set_mux(struct pinctrl_dev *pctldev,
+                              unsigned int function,
+                              unsigned int group)
+{
+       /* Can't mux power domains */
+       if (group >= K210_NPINS)
+               return -EINVAL;
+
+       k210_pinmux_set_pin_function(pctldev, group, function);
+
+       return 0;
+}
+
+static const struct pinmux_ops k210_pinmux_ops = {
+       .get_functions_count = k210_pinmux_get_function_count,
+       .get_function_name = k210_pinmux_get_function_name,
+       .get_function_groups = k210_pinmux_get_function_groups,
+       .set_mux = k210_pinmux_set_mux,
+       .strict = true,
+};
+
+static int k210_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
+{
+       return K210_NGROUPS;
+}
+
+static const char *k210_pinctrl_get_group_name(struct pinctrl_dev *pctldev,
+                                              unsigned int group)
+{
+       return k210_group_names[group];
+}
+
+static int k210_pinctrl_get_group_pins(struct pinctrl_dev *pctldev,
+                                      unsigned int group,
+                                      const unsigned int **pins,
+                                      unsigned int *npins)
+{
+       if (group >= K210_NPINS) {
+               *pins = NULL;
+               *npins = 0;
+               return 0;
+       }
+
+       *pins = &k210_pins[group].number;
+       *npins = 1;
+
+       return 0;
+}
+
+static void k210_pinctrl_pin_dbg_show(struct pinctrl_dev *pctldev,
+                                     struct seq_file *s, unsigned int offset)
+{
+       seq_printf(s, "%s", dev_name(pctldev->dev));
+}
+
+static int k210_pinctrl_dt_subnode_to_map(struct pinctrl_dev *pctldev,
+                                         struct device_node *np,
+                                         struct pinctrl_map **map,
+                                         unsigned int *reserved_maps,
+                                         unsigned int *num_maps)
+{
+       struct property *prop;
+       const __be32 *p;
+       int ret, pinmux_groups;
+       u32 pinmux_group;
+       unsigned long *configs = NULL;
+       unsigned int num_configs = 0;
+       unsigned int reserve = 0;
+
+       ret = of_property_count_strings(np, "groups");
+       if (!ret)
+               return pinconf_generic_dt_subnode_to_map(pctldev, np, map,
+                                               reserved_maps, num_maps,
+                                               PIN_MAP_TYPE_CONFIGS_GROUP);
+
+       pinmux_groups = of_property_count_u32_elems(np, "pinmux");
+       if (pinmux_groups <= 0) {
+               /* Ignore this node */
+               return 0;
+       }
+
+       ret = pinconf_generic_parse_dt_config(np, pctldev, &configs,
+                                             &num_configs);
+       if (ret < 0) {
+               dev_err(pctldev->dev, "%pOF: could not parse node property\n",
+                       np);
+               return ret;
+       }
+
+       reserve = pinmux_groups * (1 + num_configs);
+       ret = pinctrl_utils_reserve_map(pctldev, map, reserved_maps, num_maps,
+                                       reserve);
+       if (ret < 0)
+               goto exit;
+
+       of_property_for_each_u32(np, "pinmux", prop, p, pinmux_group) {
+               const char *group_name, *func_name;
+               u32 pin = FIELD_GET(K210_PG_PIN, pinmux_group);
+               u32 func = FIELD_GET(K210_PG_FUNC, pinmux_group);
+
+               if (pin >= K210_NPINS) {
+                       ret = -EINVAL;
+                       goto exit;
+               }
+
+               group_name = k210_group_names[pin];
+               func_name = k210_pcf_infos[func].name;
+
+               dev_dbg(pctldev->dev, "Pinmux %s: pin %u func %s\n",
+                       np->name, pin, func_name);
+
+               ret = pinctrl_utils_add_map_mux(pctldev, map, reserved_maps,
+                                               num_maps, group_name,
+                                               func_name);
+               if (ret < 0) {
+                       dev_err(pctldev->dev, "%pOF add mux map failed %d\n",
+                               np, ret);
+                       goto exit;
+               }
+
+               if (num_configs) {
+                       ret = pinctrl_utils_add_map_configs(pctldev, map,
+                                       reserved_maps, num_maps, group_name,
+                                       configs, num_configs,
+                                       PIN_MAP_TYPE_CONFIGS_PIN);
+                       if (ret < 0) {
+                               dev_err(pctldev->dev,
+                                       "%pOF add configs map failed %d\n",
+                                       np, ret);
+                               goto exit;
+                       }
+               }
+       }
+
+       ret = 0;
+
+exit:
+       kfree(configs);
+       return ret;
+}
+
+static int k210_pinctrl_dt_node_to_map(struct pinctrl_dev *pctldev,
+                                      struct device_node *np_config,
+                                      struct pinctrl_map **map,
+                                      unsigned int *num_maps)
+{
+       unsigned int reserved_maps;
+       struct device_node *np;
+       int ret;
+
+       reserved_maps = 0;
+       *map = NULL;
+       *num_maps = 0;
+
+       ret = k210_pinctrl_dt_subnode_to_map(pctldev, np_config, map,
+                                            &reserved_maps, num_maps);
+       if (ret < 0)
+               goto err;
+
+       for_each_available_child_of_node(np_config, np) {
+               ret = k210_pinctrl_dt_subnode_to_map(pctldev, np, map,
+                                                    &reserved_maps, num_maps);
+               if (ret < 0)
+                       goto err;
+       }
+       return 0;
+
+err:
+       pinctrl_utils_free_map(pctldev, *map, *num_maps);
+       return ret;
+}
+
+
+static const struct pinctrl_ops k210_pinctrl_ops = {
+       .get_groups_count = k210_pinctrl_get_groups_count,
+       .get_group_name = k210_pinctrl_get_group_name,
+       .get_group_pins = k210_pinctrl_get_group_pins,
+       .pin_dbg_show = k210_pinctrl_pin_dbg_show,
+       .dt_node_to_map = k210_pinctrl_dt_node_to_map,
+       .dt_free_map = pinconf_generic_dt_free_map,
+};
+
+static struct pinctrl_desc k210_pinctrl_desc = {
+       .name = "k210-pinctrl",
+       .pins = k210_pins,
+       .npins = K210_NPINS,
+       .pctlops = &k210_pinctrl_ops,
+       .pmxops = &k210_pinmux_ops,
+       .confops = &k210_pinconf_ops,
+       .custom_params = k210_pinconf_custom_params,
+       .num_custom_params = ARRAY_SIZE(k210_pinconf_custom_params),
+};
+
+static void k210_fpioa_init_ties(struct k210_fpioa_data *pdata)
+{
+       struct k210_fpioa __iomem *fpioa = pdata->fpioa;
+       u32 val;
+       int i, j;
+
+       dev_dbg(pdata->dev, "Init pin ties\n");
+
+       /* Init pin functions input ties */
+       for (i = 0; i < ARRAY_SIZE(fpioa->tie_en); i++) {
+               val = 0;
+               for (j = 0; j < 32; j++) {
+                       if (k210_pcf_infos[i * 32 + j].mode_id ==
+                           K210_PC_DEFAULT_IN_TIE) {
+                               dev_dbg(pdata->dev,
+                                       "tie_en function %d (%s)\n",
+                                       i * 32 + j,
+                                       k210_pcf_infos[i * 32 + j].name);
+                               val |= BIT(j);
+                       }
+               }
+
+               /* Set value before enable */
+               writel(val, &fpioa->tie_val[i]);
+               writel(val, &fpioa->tie_en[i]);
+       }
+}
+
+static int k210_fpioa_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *np = dev->of_node;
+       struct k210_fpioa_data *pdata;
+       int ret;
+
+       dev_info(dev, "K210 FPIOA pin controller\n");
+
+       pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+       if (!pdata)
+               return -ENOMEM;
+
+       pdata->dev = dev;
+       platform_set_drvdata(pdev, pdata);
+
+       pdata->fpioa = devm_platform_ioremap_resource(pdev, 0);
+       if (IS_ERR(pdata->fpioa))
+               return PTR_ERR(pdata->fpioa);
+
+       pdata->clk = devm_clk_get(dev, "ref");
+       if (IS_ERR(pdata->clk))
+               return PTR_ERR(pdata->clk);
+
+       ret = clk_prepare_enable(pdata->clk);
+       if (ret)
+               return ret;
+
+       pdata->pclk = devm_clk_get_optional(dev, "pclk");
+       if (!IS_ERR(pdata->pclk))
+               clk_prepare_enable(pdata->pclk);
+
+       pdata->sysctl_map =
+               syscon_regmap_lookup_by_phandle_args(np,
+                                               "canaan,k210-sysctl-power",
+                                               1, &pdata->power_offset);
+       if (IS_ERR(pdata->sysctl_map))
+               return PTR_ERR(pdata->sysctl_map);
+
+       k210_fpioa_init_ties(pdata);
+
+       pdata->pctl = pinctrl_register(&k210_pinctrl_desc, dev, (void *)pdata);
+       if (IS_ERR(pdata->pctl))
+               return PTR_ERR(pdata->pctl);
+
+       return 0;
+}
+
+static const struct of_device_id k210_fpioa_dt_ids[] = {
+       { .compatible = "canaan,k210-fpioa" },
+       { /* sentinel */ },
+};
+
+static struct platform_driver k210_fpioa_driver = {
+       .probe  = k210_fpioa_probe,
+       .driver = {
+               .name           = "k210-fpioa",
+               .of_match_table = k210_fpioa_dt_ids,
+       },
+};
+builtin_platform_driver(k210_fpioa_driver);
index f35edb0..c12fa57 100644 (file)
@@ -572,7 +572,7 @@ static void microchip_sgpio_irq_settype(struct irq_data *data,
        /* Type value spread over 2 registers sets: low, high bit */
        sgpio_clrsetbits(bank->priv, REG_INT_TRIGGER, addr.bit,
                         BIT(addr.port), (!!(type & 0x1)) << addr.port);
-       sgpio_clrsetbits(bank->priv, REG_INT_TRIGGER + SGPIO_MAX_BITS, addr.bit,
+       sgpio_clrsetbits(bank->priv, REG_INT_TRIGGER, SGPIO_MAX_BITS + addr.bit,
                         BIT(addr.port), (!!(type & 0x2)) << addr.port);
 
        if (type == SGPIO_INT_TRG_LEVEL)
index aa1a1c8..53a0bad 100644 (file)
@@ -3727,12 +3727,15 @@ static int __maybe_unused rockchip_pinctrl_suspend(struct device *dev)
 static int __maybe_unused rockchip_pinctrl_resume(struct device *dev)
 {
        struct rockchip_pinctrl *info = dev_get_drvdata(dev);
-       int ret = regmap_write(info->regmap_base, RK3288_GRF_GPIO6C_IOMUX,
-                              rk3288_grf_gpio6c_iomux |
-                              GPIO6C6_SEL_WRITE_ENABLE);
+       int ret;
 
-       if (ret)
-               return ret;
+       if (info->ctrl->type == RK3288) {
+               ret = regmap_write(info->regmap_base, RK3288_GRF_GPIO6C_IOMUX,
+                                  rk3288_grf_gpio6c_iomux |
+                                  GPIO6C6_SEL_WRITE_ENABLE);
+               if (ret)
+                       return ret;
+       }
 
        return pinctrl_force_default(info->pctl_dev);
 }
index 369ee20..2f19ab4 100644 (file)
@@ -392,7 +392,7 @@ static int lpi_config_set(struct pinctrl_dev *pctldev, unsigned int group,
                          unsigned long *configs, unsigned int nconfs)
 {
        struct lpi_pinctrl *pctrl = dev_get_drvdata(pctldev->dev);
-       unsigned int param, arg, pullup, strength;
+       unsigned int param, arg, pullup = LPI_GPIO_BIAS_DISABLE, strength = 2;
        bool value, output_enabled = false;
        const struct lpi_pingroup *g;
        unsigned long sval;
index 8daccd5..9d41abf 100644 (file)
@@ -1439,14 +1439,14 @@ static const struct msm_pingroup sc7280_groups[] = {
        [172] = PINGROUP(172, qdss, _, _, _, _, _, _, _, _),
        [173] = PINGROUP(173, qdss, _, _, _, _, _, _, _, _),
        [174] = PINGROUP(174, qdss, _, _, _, _, _, _, _, _),
-       [175] = UFS_RESET(ufs_reset, 0x1be000),
-       [176] = SDC_QDSD_PINGROUP(sdc1_rclk, 0x1b3000, 15, 0),
-       [177] = SDC_QDSD_PINGROUP(sdc1_clk, 0x1b3000, 13, 6),
-       [178] = SDC_QDSD_PINGROUP(sdc1_cmd, 0x1b3000, 11, 3),
-       [179] = SDC_QDSD_PINGROUP(sdc1_data, 0x1b3000, 9, 0),
-       [180] = SDC_QDSD_PINGROUP(sdc2_clk, 0x1b4000, 14, 6),
-       [181] = SDC_QDSD_PINGROUP(sdc2_cmd, 0x1b4000, 11, 3),
-       [182] = SDC_QDSD_PINGROUP(sdc2_data, 0x1b4000, 9, 0),
+       [175] = UFS_RESET(ufs_reset, 0xbe000),
+       [176] = SDC_QDSD_PINGROUP(sdc1_rclk, 0xb3004, 0, 6),
+       [177] = SDC_QDSD_PINGROUP(sdc1_clk, 0xb3000, 13, 6),
+       [178] = SDC_QDSD_PINGROUP(sdc1_cmd, 0xb3000, 11, 3),
+       [179] = SDC_QDSD_PINGROUP(sdc1_data, 0xb3000, 9, 0),
+       [180] = SDC_QDSD_PINGROUP(sdc2_clk, 0xb4000, 14, 6),
+       [181] = SDC_QDSD_PINGROUP(sdc2_cmd, 0xb4000, 11, 3),
+       [182] = SDC_QDSD_PINGROUP(sdc2_data, 0xb4000, 9, 0),
 };
 
 static const struct msm_pinctrl_soc_data sc7280_pinctrl = {
index 2b5b0e2..5aaf57b 100644 (file)
@@ -423,7 +423,7 @@ static const char * const gpio_groups[] = {
 
 static const char * const qdss_stm_groups[] = {
        "gpio0", "gpio1", "gpio2", "gpio3", "gpio4", "gpio5", "gpio6", "gpio7", "gpio12", "gpio13",
-       "gpio14", "gpio15", "gpio16", "gpio17", "gpio18", "gpio19" "gpio20", "gpio21", "gpio22",
+       "gpio14", "gpio15", "gpio16", "gpio17", "gpio18", "gpio19", "gpio20", "gpio21", "gpio22",
        "gpio23", "gpio44", "gpio45", "gpio52", "gpio53", "gpio56", "gpio57", "gpio61", "gpio62",
        "gpio63", "gpio64", "gpio65", "gpio66",
 };
index 1ab207e..b67539f 100644 (file)
@@ -212,9 +212,6 @@ struct goldfish_pipe_dev {
        int version;
        unsigned char __iomem *base;
 
-       /* an irq tasklet to run goldfish_interrupt_task */
-       struct tasklet_struct irq_tasklet;
-
        struct miscdevice miscdev;
 };
 
@@ -577,10 +574,10 @@ static struct goldfish_pipe *signalled_pipes_pop_front(
        return pipe;
 }
 
-static void goldfish_interrupt_task(unsigned long dev_addr)
+static irqreturn_t goldfish_interrupt_task(int irq, void *dev_addr)
 {
        /* Iterate over the signalled pipes and wake them one by one */
-       struct goldfish_pipe_dev *dev = (struct goldfish_pipe_dev *)dev_addr;
+       struct goldfish_pipe_dev *dev = dev_addr;
        struct goldfish_pipe *pipe;
        int wakes;
 
@@ -599,13 +596,14 @@ static void goldfish_interrupt_task(unsigned long dev_addr)
                 */
                wake_up_interruptible(&pipe->wake_queue);
        }
+       return IRQ_HANDLED;
 }
 
 static void goldfish_pipe_device_deinit(struct platform_device *pdev,
                                        struct goldfish_pipe_dev *dev);
 
 /*
- * The general idea of the interrupt handling:
+ * The general idea of the (threaded) interrupt handling:
  *
  *  1. device raises an interrupt if there's at least one signalled pipe
  *  2. IRQ handler reads the signalled pipes and their count from the device
@@ -614,8 +612,8 @@ static void goldfish_pipe_device_deinit(struct platform_device *pdev,
  *      otherwise it leaves it raised, so IRQ handler will be called
  *      again for the next chunk
  *  4. IRQ handler adds all returned pipes to the device's signalled pipes list
- *  5. IRQ handler launches a tasklet to process the signalled pipes from the
- *      list in a separate context
+ *  5. IRQ handler defers processing the signalled pipes from the list in a
+ *      separate context
  */
 static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 {
@@ -645,8 +643,7 @@ static irqreturn_t goldfish_pipe_interrupt(int irq, void *dev_id)
 
        spin_unlock_irqrestore(&dev->lock, flags);
 
-       tasklet_schedule(&dev->irq_tasklet);
-       return IRQ_HANDLED;
+       return IRQ_WAKE_THREAD;
 }
 
 static int get_free_pipe_id_locked(struct goldfish_pipe_dev *dev)
@@ -811,12 +808,10 @@ static int goldfish_pipe_device_init(struct platform_device *pdev,
 {
        int err;
 
-       tasklet_init(&dev->irq_tasklet, &goldfish_interrupt_task,
-                    (unsigned long)dev);
-
-       err = devm_request_irq(&pdev->dev, dev->irq,
-                              goldfish_pipe_interrupt,
-                              IRQF_SHARED, "goldfish_pipe", dev);
+       err = devm_request_threaded_irq(&pdev->dev, dev->irq,
+                                       goldfish_pipe_interrupt,
+                                       goldfish_interrupt_task,
+                                       IRQF_SHARED, "goldfish_pipe", dev);
        if (err) {
                dev_err(&pdev->dev, "unable to allocate IRQ for v2\n");
                return err;
@@ -874,7 +869,6 @@ static void goldfish_pipe_device_deinit(struct platform_device *pdev,
                                        struct goldfish_pipe_dev *dev)
 {
        misc_deregister(&dev->miscdev);
-       tasklet_kill(&dev->irq_tasklet);
        kfree(dev->pipes);
        free_page((unsigned long)dev->buffers);
 }
index 56353e8..461ec61 100644 (file)
@@ -450,7 +450,7 @@ config IDEAPAD_LAPTOP
        depends on BACKLIGHT_CLASS_DEVICE
        depends on ACPI_VIDEO || ACPI_VIDEO = n
        depends on ACPI_WMI || ACPI_WMI = n
-       depends on ACPI_PLATFORM_PROFILE
+       select ACPI_PLATFORM_PROFILE
        select INPUT_SPARSEKMAP
        select NEW_LEDS
        select LEDS_CLASS
@@ -484,7 +484,7 @@ config THINKPAD_ACPI
        depends on RFKILL || RFKILL = n
        depends on ACPI_VIDEO || ACPI_VIDEO = n
        depends on BACKLIGHT_CLASS_DEVICE
-       depends on ACPI_PLATFORM_PROFILE
+       select ACPI_PLATFORM_PROFILE
        select HWMON
        select NVRAM
        select NEW_LEDS
@@ -1173,15 +1173,20 @@ config INTEL_PMC_CORE
        depends on PCI
        help
          The Intel Platform Controller Hub for Intel Core SoCs provides access
-         to Power Management Controller registers via a PCI interface. This
+         to Power Management Controller registers via various interfaces. This
          driver can utilize debugging capabilities and supported features as
-         exposed by the Power Management Controller.
+         exposed by the Power Management Controller. It also may perform some
+         tasks in the PMC in order to enable transition into the SLPS0 state.
+         It should be selected on all Intel platforms supported by the driver.
 
          Supported features:
                - SLP_S0_RESIDENCY counter
                - PCH IP Power Gating status
-               - LTR Ignore
+               - LTR Ignore / LTR Show
                - MPHY/PLL gating status (Sunrisepoint PCH only)
+               - SLPS0 Debug registers (Cannonlake/Icelake PCH)
+               - Low Power Mode registers (Tigerlake and beyond)
+               - PMC quirks as needed to enable SLPS0/S0ix
 
 config INTEL_PMT_CLASS
        tristate
index 80f4b77..091e48c 100644 (file)
@@ -185,5 +185,8 @@ void exit_enum_attributes(void)
                        sysfs_remove_group(wmi_priv.enumeration_data[instance_id].attr_name_kobj,
                                                                &enumeration_attr_group);
        }
+       wmi_priv.enumeration_instances_count = 0;
+
        kfree(wmi_priv.enumeration_data);
+       wmi_priv.enumeration_data = NULL;
 }
index 75aedbb..8a49ba6 100644 (file)
@@ -175,5 +175,8 @@ void exit_int_attributes(void)
                        sysfs_remove_group(wmi_priv.integer_data[instance_id].attr_name_kobj,
                                                                &integer_attr_group);
        }
+       wmi_priv.integer_instances_count = 0;
+
        kfree(wmi_priv.integer_data);
+       wmi_priv.integer_data = NULL;
 }
index 3abcd95..834b3e8 100644 (file)
@@ -183,5 +183,8 @@ void exit_po_attributes(void)
                        sysfs_remove_group(wmi_priv.po_data[instance_id].attr_name_kobj,
                                                                &po_attr_group);
        }
+       wmi_priv.po_instances_count = 0;
+
        kfree(wmi_priv.po_data);
+       wmi_priv.po_data = NULL;
 }
index ac75dce..5525378 100644 (file)
@@ -155,5 +155,8 @@ void exit_str_attributes(void)
                        sysfs_remove_group(wmi_priv.str_data[instance_id].attr_name_kobj,
                                                                &str_attr_group);
        }
+       wmi_priv.str_instances_count = 0;
+
        kfree(wmi_priv.str_data);
+       wmi_priv.str_data = NULL;
 }
index cb81010..7410cca 100644 (file)
@@ -210,25 +210,17 @@ static struct kobj_attribute pending_reboot = __ATTR_RO(pending_reboot);
  */
 static int create_attributes_level_sysfs_files(void)
 {
-       int ret = sysfs_create_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr);
+       int ret;
 
-       if (ret) {
-               pr_debug("could not create reset_bios file\n");
+       ret = sysfs_create_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr);
+       if (ret)
                return ret;
-       }
 
        ret = sysfs_create_file(&wmi_priv.main_dir_kset->kobj, &pending_reboot.attr);
-       if (ret) {
-               pr_debug("could not create changing_pending_reboot file\n");
-               sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr);
-       }
-       return ret;
-}
+       if (ret)
+               return ret;
 
-static void release_reset_bios_data(void)
-{
-       sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr);
-       sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &pending_reboot.attr);
+       return 0;
 }
 
 static ssize_t wmi_sysman_attr_show(struct kobject *kobj, struct attribute *attr,
@@ -373,8 +365,6 @@ static void destroy_attribute_objs(struct kset *kset)
  */
 static void release_attributes_data(void)
 {
-       release_reset_bios_data();
-
        mutex_lock(&wmi_priv.mutex);
        exit_enum_attributes();
        exit_int_attributes();
@@ -386,11 +376,13 @@ static void release_attributes_data(void)
                wmi_priv.authentication_dir_kset = NULL;
        }
        if (wmi_priv.main_dir_kset) {
+               sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &reset_bios.attr);
+               sysfs_remove_file(&wmi_priv.main_dir_kset->kobj, &pending_reboot.attr);
                destroy_attribute_objs(wmi_priv.main_dir_kset);
                kset_unregister(wmi_priv.main_dir_kset);
+               wmi_priv.main_dir_kset = NULL;
        }
        mutex_unlock(&wmi_priv.mutex);
-
 }
 
 /**
@@ -497,7 +489,6 @@ nextobj:
 
 err_attr_init:
        mutex_unlock(&wmi_priv.mutex);
-       release_attributes_data();
        kfree(obj);
        return retval;
 }
@@ -513,102 +504,91 @@ static int __init sysman_init(void)
        }
 
        ret = init_bios_attr_set_interface();
-       if (ret || !wmi_priv.bios_attr_wdev) {
-               pr_debug("failed to initialize set interface\n");
-               goto fail_set_interface;
-       }
+       if (ret)
+               return ret;
 
        ret = init_bios_attr_pass_interface();
-       if (ret || !wmi_priv.password_attr_wdev) {
-               pr_debug("failed to initialize pass interface\n");
-               goto fail_pass_interface;
+       if (ret)
+               goto err_exit_bios_attr_set_interface;
+
+       if (!wmi_priv.bios_attr_wdev || !wmi_priv.password_attr_wdev) {
+               pr_debug("failed to find set or pass interface\n");
+               ret = -ENODEV;
+               goto err_exit_bios_attr_pass_interface;
        }
 
        ret = class_register(&firmware_attributes_class);
        if (ret)
-               goto fail_class;
+               goto err_exit_bios_attr_pass_interface;
 
        wmi_priv.class_dev = device_create(&firmware_attributes_class, NULL, MKDEV(0, 0),
                                  NULL, "%s", DRIVER_NAME);
        if (IS_ERR(wmi_priv.class_dev)) {
                ret = PTR_ERR(wmi_priv.class_dev);
-               goto fail_classdev;
+               goto err_unregister_class;
        }
 
        wmi_priv.main_dir_kset = kset_create_and_add("attributes", NULL,
                                                     &wmi_priv.class_dev->kobj);
        if (!wmi_priv.main_dir_kset) {
                ret = -ENOMEM;
-               goto fail_main_kset;
+               goto err_destroy_classdev;
        }
 
        wmi_priv.authentication_dir_kset = kset_create_and_add("authentication", NULL,
                                                                &wmi_priv.class_dev->kobj);
        if (!wmi_priv.authentication_dir_kset) {
                ret = -ENOMEM;
-               goto fail_authentication_kset;
+               goto err_release_attributes_data;
        }
 
        ret = create_attributes_level_sysfs_files();
        if (ret) {
                pr_debug("could not create reset BIOS attribute\n");
-               goto fail_reset_bios;
+               goto err_release_attributes_data;
        }
 
        ret = init_bios_attributes(ENUM, DELL_WMI_BIOS_ENUMERATION_ATTRIBUTE_GUID);
        if (ret) {
                pr_debug("failed to populate enumeration type attributes\n");
-               goto fail_create_group;
+               goto err_release_attributes_data;
        }
 
        ret = init_bios_attributes(INT, DELL_WMI_BIOS_INTEGER_ATTRIBUTE_GUID);
        if (ret) {
                pr_debug("failed to populate integer type attributes\n");
-               goto fail_create_group;
+               goto err_release_attributes_data;
        }
 
        ret = init_bios_attributes(STR, DELL_WMI_BIOS_STRING_ATTRIBUTE_GUID);
        if (ret) {
                pr_debug("failed to populate string type attributes\n");
-               goto fail_create_group;
+               goto err_release_attributes_data;
        }
 
        ret = init_bios_attributes(PO, DELL_WMI_BIOS_PASSOBJ_ATTRIBUTE_GUID);
        if (ret) {
                pr_debug("failed to populate pass object type attributes\n");
-               goto fail_create_group;
+               goto err_release_attributes_data;
        }
 
        return 0;
 
-fail_create_group:
+err_release_attributes_data:
        release_attributes_data();
 
-fail_reset_bios:
-       if (wmi_priv.authentication_dir_kset) {
-               kset_unregister(wmi_priv.authentication_dir_kset);
-               wmi_priv.authentication_dir_kset = NULL;
-       }
-
-fail_authentication_kset:
-       if (wmi_priv.main_dir_kset) {
-               kset_unregister(wmi_priv.main_dir_kset);
-               wmi_priv.main_dir_kset = NULL;
-       }
-
-fail_main_kset:
+err_destroy_classdev:
        device_destroy(&firmware_attributes_class, MKDEV(0, 0));
 
-fail_classdev:
+err_unregister_class:
        class_unregister(&firmware_attributes_class);
 
-fail_class:
+err_exit_bios_attr_pass_interface:
        exit_bios_attr_pass_interface();
 
-fail_pass_interface:
+err_exit_bios_attr_set_interface:
        exit_bios_attr_set_interface();
 
-fail_set_interface:
        return ret;
 }
 
index 2f5b8d0..078648a 100644 (file)
@@ -90,6 +90,13 @@ static const struct dmi_system_id button_array_table[] = {
                        DMI_MATCH(DMI_PRODUCT_NAME, "HP Spectre x2 Detachable"),
                },
        },
+       {
+               .ident = "Lenovo ThinkPad X1 Tablet Gen 2",
+               .matches = {
+                       DMI_MATCH(DMI_SYS_VENDOR, "LENOVO"),
+                       DMI_MATCH(DMI_PRODUCT_FAMILY, "ThinkPad X1 Tablet Gen 2"),
+               },
+       },
        { }
 };
 
@@ -476,11 +483,16 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
                        goto wakeup;
 
                /*
-                * Switch events will wake the device and report the new switch
-                * position to the input subsystem.
+                * Some devices send (duplicate) tablet-mode events when moved
+                * around even though the mode has not changed; and they do this
+                * even when suspended.
+                * Update the switch state in case it changed and then return
+                * without waking up to avoid spurious wakeups.
                 */
-               if (priv->switches && (event == 0xcc || event == 0xcd))
-                       goto wakeup;
+               if (event == 0xcc || event == 0xcd) {
+                       report_tablet_mode_event(priv->switches, event);
+                       return;
+               }
 
                /* Wake up on 5-button array events only. */
                if (event == 0xc0 || !priv->array)
@@ -494,9 +506,6 @@ static void notify_handler(acpi_handle handle, u32 event, void *context)
 wakeup:
                pm_wakeup_hard_event(&device->dev);
 
-               if (report_tablet_mode_event(priv->switches, event))
-                       return;
-
                return;
        }
 
index 8a8017f..3fdf4cb 100644 (file)
@@ -48,8 +48,16 @@ static const struct key_entry intel_vbtn_keymap[] = {
 };
 
 static const struct key_entry intel_vbtn_switchmap[] = {
-       { KE_SW,     0xCA, { .sw = { SW_DOCK, 1 } } },          /* Docked */
-       { KE_SW,     0xCB, { .sw = { SW_DOCK, 0 } } },          /* Undocked */
+       /*
+        * SW_DOCK should only be reported for docking stations, but DSDTs using the
+        * intel-vbtn code, always seem to use this for 2-in-1s / convertibles and set
+        * SW_DOCK=1 when in laptop-mode (in tandem with setting SW_TABLET_MODE=0).
+        * This causes userspace to think the laptop is docked to a port-replicator
+        * and to disable suspend-on-lid-close, which is undesirable.
+        * Map the dock events to KEY_IGNORE to avoid this broken SW_DOCK reporting.
+        */
+       { KE_IGNORE, 0xCA, { .sw = { SW_DOCK, 1 } } },          /* Docked */
+       { KE_IGNORE, 0xCB, { .sw = { SW_DOCK, 0 } } },          /* Undocked */
        { KE_SW,     0xCC, { .sw = { SW_TABLET_MODE, 1 } } },   /* Tablet */
        { KE_SW,     0xCD, { .sw = { SW_TABLET_MODE, 0 } } },   /* Laptop */
        { KE_END }
index ee2f757..b5888ae 100644 (file)
@@ -863,34 +863,45 @@ out_unlock:
 }
 DEFINE_SHOW_ATTRIBUTE(pmc_core_pll);
 
-static ssize_t pmc_core_ltr_ignore_write(struct file *file,
-                                        const char __user *userbuf,
-                                        size_t count, loff_t *ppos)
+static int pmc_core_send_ltr_ignore(u32 value)
 {
        struct pmc_dev *pmcdev = &pmc;
        const struct pmc_reg_map *map = pmcdev->map;
-       u32 val, buf_size, fd;
-       int err;
-
-       buf_size = count < 64 ? count : 64;
-
-       err = kstrtou32_from_user(userbuf, buf_size, 10, &val);
-       if (err)
-               return err;
+       u32 reg;
+       int err = 0;
 
        mutex_lock(&pmcdev->lock);
 
-       if (val > map->ltr_ignore_max) {
+       if (value > map->ltr_ignore_max) {
                err = -EINVAL;
                goto out_unlock;
        }
 
-       fd = pmc_core_reg_read(pmcdev, map->ltr_ignore_offset);
-       fd |= (1U << val);
-       pmc_core_reg_write(pmcdev, map->ltr_ignore_offset, fd);
+       reg = pmc_core_reg_read(pmcdev, map->ltr_ignore_offset);
+       reg |= BIT(value);
+       pmc_core_reg_write(pmcdev, map->ltr_ignore_offset, reg);
 
 out_unlock:
        mutex_unlock(&pmcdev->lock);
+
+       return err;
+}
+
+static ssize_t pmc_core_ltr_ignore_write(struct file *file,
+                                        const char __user *userbuf,
+                                        size_t count, loff_t *ppos)
+{
+       u32 buf_size, value;
+       int err;
+
+       buf_size = min_t(u32, count, 64);
+
+       err = kstrtou32_from_user(userbuf, buf_size, 10, &value);
+       if (err)
+               return err;
+
+       err = pmc_core_send_ltr_ignore(value);
+
        return err == 0 ? count : err;
 }
 
@@ -1244,6 +1255,15 @@ static int pmc_core_probe(struct platform_device *pdev)
        pmcdev->pmc_xram_read_bit = pmc_core_check_read_lock_bit();
        dmi_check_system(pmc_core_dmi_table);
 
+       /*
+        * On TGL, due to a hardware limitation, the GBE LTR blocks PC10 when
+        * a cable is attached. Tell the PMC to ignore it.
+        */
+       if (pmcdev->map == &tgl_reg_map) {
+               dev_dbg(&pdev->dev, "ignoring GBE LTR\n");
+               pmc_core_send_ltr_ignore(3);
+       }
+
        pmc_core_dbgfs_register(pmcdev);
 
        device_initialized = true;
index c8939fb..ee2b3bb 100644 (file)
@@ -173,7 +173,7 @@ static int intel_pmt_dev_register(struct intel_pmt_entry *entry,
                                  struct intel_pmt_namespace *ns,
                                  struct device *parent)
 {
-       struct resource res;
+       struct resource res = {0};
        struct device *dev;
        int ret;
 
index 97dd749..92d315a 100644 (file)
 #define CRASH_TYPE_OOBMSM      1
 
 /* Control Flags */
-#define CRASHLOG_FLAG_DISABLE          BIT(27)
+#define CRASHLOG_FLAG_DISABLE          BIT(28)
 
 /*
- * Bits 28 and 29 control the state of bit 31.
+ * Bits 29 and 30 control the state of bit 31.
  *
- * Bit 28 will clear bit 31, if set, allowing a new crashlog to be captured.
- * Bit 29 will immediately trigger a crashlog to be generated, setting bit 31.
- * Bit 30 is read-only and reserved as 0.
+ * Bit 29 will clear bit 31, if set, allowing a new crashlog to be captured.
+ * Bit 30 will immediately trigger a crashlog to be generated, setting bit 31.
  * Bit 31 is the read-only status with a 1 indicating log is complete.
  */
-#define CRASHLOG_FLAG_TRIGGER_CLEAR    BIT(28)
-#define CRASHLOG_FLAG_TRIGGER_EXECUTE  BIT(29)
+#define CRASHLOG_FLAG_TRIGGER_CLEAR    BIT(29)
+#define CRASHLOG_FLAG_TRIGGER_EXECUTE  BIT(30)
 #define CRASHLOG_FLAG_TRIGGER_COMPLETE BIT(31)
 #define CRASHLOG_FLAG_TRIGGER_MASK     GENMASK(31, 28)
 
index 8c5fd82..80abc70 100644 (file)
@@ -17,7 +17,6 @@
 static int intel_scu_pci_probe(struct pci_dev *pdev,
                               const struct pci_device_id *id)
 {
-       void (*setup_fn)(void) = (void (*)(void))id->driver_data;
        struct intel_scu_ipc_data scu_data = {};
        struct intel_scu_ipc_dev *scu;
        int ret;
@@ -30,27 +29,14 @@ static int intel_scu_pci_probe(struct pci_dev *pdev,
        scu_data.irq = pdev->irq;
 
        scu = intel_scu_ipc_register(&pdev->dev, &scu_data);
-       if (IS_ERR(scu))
-               return PTR_ERR(scu);
-
-       if (setup_fn)
-               setup_fn();
-       return 0;
-}
-
-static void intel_mid_scu_setup(void)
-{
-       intel_scu_devices_create();
+       return PTR_ERR_OR_ZERO(scu);
 }
 
 static const struct pci_device_id pci_ids[] = {
-       { PCI_VDEVICE(INTEL, 0x080e),
-         .driver_data = (kernel_ulong_t)intel_mid_scu_setup },
-       { PCI_VDEVICE(INTEL, 0x08ea),
-         .driver_data = (kernel_ulong_t)intel_mid_scu_setup },
+       { PCI_VDEVICE(INTEL, 0x080e) },
+       { PCI_VDEVICE(INTEL, 0x08ea) },
        { PCI_VDEVICE(INTEL, 0x0a94) },
-       { PCI_VDEVICE(INTEL, 0x11a0),
-         .driver_data = (kernel_ulong_t)intel_mid_scu_setup },
+       { PCI_VDEVICE(INTEL, 0x11a0) },
        { PCI_VDEVICE(INTEL, 0x1a94) },
        { PCI_VDEVICE(INTEL, 0x5a94) },
        {}
index b881044..0d9e2dd 100644 (file)
@@ -4081,13 +4081,19 @@ static bool hotkey_notify_6xxx(const u32 hkey,
 
        case TP_HKEY_EV_KEY_NUMLOCK:
        case TP_HKEY_EV_KEY_FN:
-       case TP_HKEY_EV_KEY_FN_ESC:
                /* key press events, we just ignore them as long as the EC
                 * is still reporting them in the normal keyboard stream */
                *send_acpi_ev = false;
                *ignore_acpi_ev = true;
                return true;
 
+       case TP_HKEY_EV_KEY_FN_ESC:
+               /* Get the media key status to foce the status LED to update */
+               acpi_evalf(hkey_handle, NULL, "GMKS", "v");
+               *send_acpi_ev = false;
+               *ignore_acpi_ev = true;
+               return true;
+
        case TP_HKEY_EV_TABLET_CHANGED:
                tpacpi_input_send_tabletsw();
                hotkey_tablet_mode_notify_change();
@@ -9845,6 +9851,11 @@ static struct ibm_struct lcdshadow_driver_data = {
  * Thinkpad sensor interfaces
  */
 
+#define DYTC_CMD_QUERY        0 /* To get DYTC status - enable/revision */
+#define DYTC_QUERY_ENABLE_BIT 8  /* Bit        8 - 0 = disabled, 1 = enabled */
+#define DYTC_QUERY_SUBREV_BIT 16 /* Bits 16 - 27 - sub revision */
+#define DYTC_QUERY_REV_BIT    28 /* Bits 28 - 31 - revision */
+
 #define DYTC_CMD_GET          2 /* To get current IC function and mode */
 #define DYTC_GET_LAPMODE_BIT 17 /* Set when in lapmode */
 
@@ -9855,6 +9866,7 @@ static bool has_palmsensor;
 static bool has_lapsensor;
 static bool palm_state;
 static bool lap_state;
+static int dytc_version;
 
 static int dytc_command(int command, int *output)
 {
@@ -9869,6 +9881,33 @@ static int dytc_command(int command, int *output)
        return 0;
 }
 
+static int dytc_get_version(void)
+{
+       int err, output;
+
+       /* Check if we've been called before - and just return cached value */
+       if (dytc_version)
+               return dytc_version;
+
+       /* Otherwise query DYTC and extract version information */
+       err = dytc_command(DYTC_CMD_QUERY, &output);
+       /*
+        * If support isn't available (ENODEV) then don't return an error
+        * and don't create the sysfs group
+        */
+       if (err == -ENODEV)
+               return 0;
+       /* For all other errors we can flag the failure */
+       if (err)
+               return err;
+
+       /* Check DYTC is enabled and supports mode setting */
+       if (output & BIT(DYTC_QUERY_ENABLE_BIT))
+               dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF;
+
+       return 0;
+}
+
 static int lapsensor_get(bool *present, bool *state)
 {
        int output, err;
@@ -9974,7 +10013,18 @@ static int tpacpi_proxsensor_init(struct ibm_init_struct *iibm)
                if (err)
                        return err;
        }
-       if (has_lapsensor) {
+
+       /* Check if we know the DYTC version, if we don't then get it */
+       if (!dytc_version) {
+               err = dytc_get_version();
+               if (err)
+                       return err;
+       }
+       /*
+        * Platforms before DYTC version 5 claim to have a lap sensor, but it doesn't work, so we
+        * ignore them
+        */
+       if (has_lapsensor && (dytc_version >= 5)) {
                err = sysfs_create_file(&tpacpi_pdev->dev.kobj, &dev_attr_dytc_lapmode.attr);
                if (err)
                        return err;
@@ -9999,14 +10049,9 @@ static struct ibm_struct proxsensor_driver_data = {
  * DYTC Platform Profile interface
  */
 
-#define DYTC_CMD_QUERY        0 /* To get DYTC status - enable/revision */
 #define DYTC_CMD_SET          1 /* To enable/disable IC function mode */
 #define DYTC_CMD_RESET    0x1ff /* To reset back to default */
 
-#define DYTC_QUERY_ENABLE_BIT 8  /* Bit        8 - 0 = disabled, 1 = enabled */
-#define DYTC_QUERY_SUBREV_BIT 16 /* Bits 16 - 27 - sub revision */
-#define DYTC_QUERY_REV_BIT    28 /* Bits 28 - 31 - revision */
-
 #define DYTC_GET_FUNCTION_BIT 8  /* Bits  8-11 - function setting */
 #define DYTC_GET_MODE_BIT     12 /* Bits 12-15 - mode setting */
 
@@ -10142,8 +10187,13 @@ static int dytc_profile_set(struct platform_profile_handler *pprof,
                return err;
 
        if (profile == PLATFORM_PROFILE_BALANCED) {
-               /* To get back to balanced mode we just issue a reset command */
-               err = dytc_command(DYTC_CMD_RESET, &output);
+               /*
+                * To get back to balanced mode we need to issue a reset command.
+                * Note we still need to disable CQL mode before hand and re-enable
+                * it afterwards, otherwise dytc_lapmode gets reset to 0 and stays
+                * stuck at 0 for aprox. 30 minutes.
+                */
+               err = dytc_cql_command(DYTC_CMD_RESET, &output);
                if (err)
                        goto unlock;
        } else {
@@ -10211,28 +10261,28 @@ static int tpacpi_dytc_profile_init(struct ibm_init_struct *iibm)
        if (err)
                return err;
 
+       /* Check if we know the DYTC version, if we don't then get it */
+       if (!dytc_version) {
+               err = dytc_get_version();
+               if (err)
+                       return err;
+       }
        /* Check DYTC is enabled and supports mode setting */
-       if (output & BIT(DYTC_QUERY_ENABLE_BIT)) {
-               /* Only DYTC v5.0 and later has this feature. */
-               int dytc_version;
-
-               dytc_version = (output >> DYTC_QUERY_REV_BIT) & 0xF;
-               if (dytc_version >= 5) {
-                       dbg_printk(TPACPI_DBG_INIT,
-                                  "DYTC version %d: thermal mode available\n", dytc_version);
-                       /* Create platform_profile structure and register */
-                       err = platform_profile_register(&dytc_profile);
-                       /*
-                        * If for some reason platform_profiles aren't enabled
-                        * don't quit terminally.
-                        */
-                       if (err)
-                               return 0;
+       if (dytc_version >= 5) {
+               dbg_printk(TPACPI_DBG_INIT,
+                               "DYTC version %d: thermal mode available\n", dytc_version);
+               /* Create platform_profile structure and register */
+               err = platform_profile_register(&dytc_profile);
+               /*
+                * If for some reason platform_profiles aren't enabled
+                * don't quit terminally.
+                */
+               if (err)
+                       return 0;
 
-                       dytc_profile_available = true;
-                       /* Ensure initial values are correct */
-                       dytc_profile_refresh();
-               }
+               dytc_profile_available = true;
+               /* Ensure initial values are correct */
+               dytc_profile_refresh();
        }
        return 0;
 }
index 20b4325..8242e8c 100644 (file)
@@ -45,7 +45,7 @@ config IDLE_INJECT
          on a per CPU basis.
 
 config DTPM
-       bool "Power capping for Dynamic Thermal Power Management"
+       bool "Power capping for Dynamic Thermal Power Management (EXPERIMENTAL)"
        help
          This enables support for the power capping for the dynamic
          thermal power management userspace engine.
index 5a51cd3..c2185ec 100644 (file)
@@ -207,6 +207,9 @@ int dtpm_release_zone(struct powercap_zone *pcz)
        if (dtpm->ops)
                dtpm->ops->release(dtpm);
 
+       if (root == dtpm)
+               root = NULL;
+
        kfree(dtpm);
 
        return 0;
index beb5f74..08f4cf0 100644 (file)
@@ -189,15 +189,16 @@ int ptp_qoriq_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
        tmr_add = ptp_qoriq->tmr_add;
        adj = tmr_add;
 
-       /* calculate diff as adj*(scaled_ppm/65536)/1000000
-        * and round() to the nearest integer
+       /*
+        * Calculate diff and round() to the nearest integer
+        *
+        * diff = adj * (ppb / 1000000000)
+        *      = adj * scaled_ppm / 65536000000
         */
-       adj *= scaled_ppm;
-       diff = div_u64(adj, 8000000);
-       diff = (diff >> 13) + ((diff >> 12) & 1);
+       diff = mul_u64_u64_div_u64(adj, scaled_ppm, 32768000000);
+       diff = DIV64_U64_ROUND_UP(diff, 2);
 
        tmr_add = neg_adj ? tmr_add - diff : tmr_add + diff;
-
        ptp_qoriq->write(&regs->ctrl_regs->tmr_add, tmr_add);
 
        return 0;
index 0937e1c..9a4f66a 100644 (file)
@@ -611,14 +611,4 @@ config PWM_VT8500
          To compile this driver as a module, choose M here: the module
          will be called pwm-vt8500.
 
-config PWM_ZX
-       tristate "ZTE ZX PWM support"
-       depends on ARCH_ZX || COMPILE_TEST
-       depends on HAS_IOMEM
-       help
-         Generic PWM framework driver for ZTE ZX family SoCs.
-
-         To compile this driver as a module, choose M here: the module
-         will be called pwm-zx.
-
 endif
index 18b89d7..6374d3b 100644 (file)
@@ -57,4 +57,3 @@ obj-$(CONFIG_PWM_TIEHRPWM)    += pwm-tiehrpwm.o
 obj-$(CONFIG_PWM_TWL)          += pwm-twl.o
 obj-$(CONFIG_PWM_TWL_LED)      += pwm-twl-led.o
 obj-$(CONFIG_PWM_VT8500)       += pwm-vt8500.o
-obj-$(CONFIG_PWM_ZX)           += pwm-zx.o
index 5ede825..957b972 100644 (file)
@@ -37,16 +37,34 @@ struct iqs620_pwm_private {
        struct pwm_chip chip;
        struct notifier_block notifier;
        struct mutex lock;
-       bool out_en;
-       u8 duty_val;
+       unsigned int duty_scale;
 };
 
+static int iqs620_pwm_init(struct iqs620_pwm_private *iqs620_pwm,
+                          unsigned int duty_scale)
+{
+       struct iqs62x_core *iqs62x = iqs620_pwm->iqs62x;
+       int ret;
+
+       if (!duty_scale)
+               return regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
+                                         IQS620_PWR_SETTINGS_PWM_OUT, 0);
+
+       ret = regmap_write(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE,
+                          duty_scale - 1);
+       if (ret)
+               return ret;
+
+       return regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
+                                 IQS620_PWR_SETTINGS_PWM_OUT, 0xff);
+}
+
 static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                            const struct pwm_state *state)
 {
        struct iqs620_pwm_private *iqs620_pwm;
-       struct iqs62x_core *iqs62x;
-       u64 duty_scale;
+       unsigned int duty_cycle;
+       unsigned int duty_scale;
        int ret;
 
        if (state->polarity != PWM_POLARITY_NORMAL)
@@ -56,7 +74,6 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
                return -EINVAL;
 
        iqs620_pwm = container_of(chip, struct iqs620_pwm_private, chip);
-       iqs62x = iqs620_pwm->iqs62x;
 
        /*
         * The duty cycle generated by the device is calculated as follows:
@@ -70,38 +87,18 @@ static int iqs620_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
         * For lower duty cycles (e.g. 0), the PWM output is simply disabled to
         * allow an external pull-down resistor to hold the GPIO3/LTX pin low.
         */
-       duty_scale = div_u64(state->duty_cycle * 256, IQS620_PWM_PERIOD_NS);
-
-       mutex_lock(&iqs620_pwm->lock);
-
-       if (!state->enabled || !duty_scale) {
-               ret = regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
-                                        IQS620_PWR_SETTINGS_PWM_OUT, 0);
-               if (ret)
-                       goto err_mutex;
-       }
+       duty_cycle = min_t(u64, state->duty_cycle, IQS620_PWM_PERIOD_NS);
+       duty_scale = duty_cycle * 256 / IQS620_PWM_PERIOD_NS;
 
-       if (duty_scale) {
-               u8 duty_val = min_t(u64, duty_scale - 1, 0xff);
+       if (!state->enabled)
+               duty_scale = 0;
 
-               ret = regmap_write(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE,
-                                  duty_val);
-               if (ret)
-                       goto err_mutex;
-
-               iqs620_pwm->duty_val = duty_val;
-       }
-
-       if (state->enabled && duty_scale) {
-               ret = regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
-                                        IQS620_PWR_SETTINGS_PWM_OUT, 0xff);
-               if (ret)
-                       goto err_mutex;
-       }
+       mutex_lock(&iqs620_pwm->lock);
 
-       iqs620_pwm->out_en = state->enabled;
+       ret = iqs620_pwm_init(iqs620_pwm, duty_scale);
+       if (!ret)
+               iqs620_pwm->duty_scale = duty_scale;
 
-err_mutex:
        mutex_unlock(&iqs620_pwm->lock);
 
        return ret;
@@ -119,12 +116,11 @@ static void iqs620_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
        /*
         * Since the device cannot generate a 0% duty cycle, requests to do so
         * cause subsequent calls to iqs620_pwm_get_state to report the output
-        * as disabled with duty cycle equal to that which was in use prior to
-        * the request. This is not ideal, but is the best compromise based on
+        * as disabled. This is not ideal, but is the best compromise based on
         * the capabilities of the device.
         */
-       state->enabled = iqs620_pwm->out_en;
-       state->duty_cycle = DIV_ROUND_UP((iqs620_pwm->duty_val + 1) *
+       state->enabled = iqs620_pwm->duty_scale > 0;
+       state->duty_cycle = DIV_ROUND_UP(iqs620_pwm->duty_scale *
                                         IQS620_PWM_PERIOD_NS, 256);
 
        mutex_unlock(&iqs620_pwm->lock);
@@ -136,7 +132,6 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier,
                               unsigned long event_flags, void *context)
 {
        struct iqs620_pwm_private *iqs620_pwm;
-       struct iqs62x_core *iqs62x;
        int ret;
 
        if (!(event_flags & BIT(IQS62X_EVENT_SYS_RESET)))
@@ -144,7 +139,6 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier,
 
        iqs620_pwm = container_of(notifier, struct iqs620_pwm_private,
                                  notifier);
-       iqs62x = iqs620_pwm->iqs62x;
 
        mutex_lock(&iqs620_pwm->lock);
 
@@ -153,16 +147,8 @@ static int iqs620_pwm_notifier(struct notifier_block *notifier,
         * of a device reset, so nothing else is printed here unless there is
         * an additional failure.
         */
-       ret = regmap_write(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE,
-                          iqs620_pwm->duty_val);
-       if (ret)
-               goto err_mutex;
+       ret = iqs620_pwm_init(iqs620_pwm, iqs620_pwm->duty_scale);
 
-       ret = regmap_update_bits(iqs62x->regmap, IQS620_PWR_SETTINGS,
-                                IQS620_PWR_SETTINGS_PWM_OUT,
-                                iqs620_pwm->out_en ? 0xff : 0);
-
-err_mutex:
        mutex_unlock(&iqs620_pwm->lock);
 
        if (ret) {
@@ -209,12 +195,14 @@ static int iqs620_pwm_probe(struct platform_device *pdev)
        ret = regmap_read(iqs62x->regmap, IQS620_PWR_SETTINGS, &val);
        if (ret)
                return ret;
-       iqs620_pwm->out_en = val & IQS620_PWR_SETTINGS_PWM_OUT;
 
-       ret = regmap_read(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE, &val);
-       if (ret)
-               return ret;
-       iqs620_pwm->duty_val = val;
+       if (val & IQS620_PWR_SETTINGS_PWM_OUT) {
+               ret = regmap_read(iqs62x->regmap, IQS620_PWM_DUTY_CYCLE, &val);
+               if (ret)
+                       return ret;
+
+               iqs620_pwm->duty_scale = val + 1;
+       }
 
        iqs620_pwm->chip.dev = &pdev->dev;
        iqs620_pwm->chip.ops = &iqs620_pwm_ops;
index dc5133b..7ef4024 100644 (file)
@@ -289,7 +289,7 @@ static int lpc18xx_pwm_request(struct pwm_chip *chip, struct pwm_device *pwm)
                dev_err(lpc18xx_pwm->dev,
                        "maximum number of simultaneous channels reached\n");
                return -EBUSY;
-       };
+       }
 
        set_bit(event, &lpc18xx_pwm->event_map);
        lpc18xx_data->duty_event = event;
index 389a5e1..6ad7d0a 100644 (file)
@@ -72,6 +72,10 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip,
        if (ret)
                return;
 
+       ret = clk_enable(pc->clk);
+       if (ret)
+               return;
+
        clk_rate = clk_get_rate(pc->clk);
 
        tmp = readl_relaxed(pc->base + pc->data->regs.period);
@@ -90,6 +94,7 @@ static void rockchip_pwm_get_state(struct pwm_chip *chip,
        else
                state->polarity = PWM_POLARITY_NORMAL;
 
+       clk_disable(pc->clk);
        clk_disable(pc->pclk);
 }
 
@@ -189,6 +194,10 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        if (ret)
                return ret;
 
+       ret = clk_enable(pc->clk);
+       if (ret)
+               return ret;
+
        pwm_get_state(pwm, &curstate);
        enabled = curstate.enabled;
 
@@ -208,6 +217,7 @@ static int rockchip_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
        }
 
 out:
+       clk_disable(pc->clk);
        clk_disable(pc->pclk);
 
        return ret;
@@ -288,6 +298,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
        const struct of_device_id *id;
        struct rockchip_pwm_chip *pc;
        u32 enable_conf, ctrl;
+       bool enabled;
        int ret, count;
 
        id = of_match_device(rockchip_pwm_dt_ids, &pdev->dev);
@@ -307,7 +318,7 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
                pc->clk = devm_clk_get(&pdev->dev, NULL);
                if (IS_ERR(pc->clk))
                        return dev_err_probe(&pdev->dev, PTR_ERR(pc->clk),
-                                            "Can't get bus clk\n");
+                                            "Can't get PWM clk\n");
        }
 
        count = of_count_phandle_with_args(pdev->dev.of_node,
@@ -326,13 +337,13 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
 
        ret = clk_prepare_enable(pc->clk);
        if (ret) {
-               dev_err(&pdev->dev, "Can't prepare enable bus clk: %d\n", ret);
+               dev_err(&pdev->dev, "Can't prepare enable PWM clk: %d\n", ret);
                return ret;
        }
 
-       ret = clk_prepare(pc->pclk);
+       ret = clk_prepare_enable(pc->pclk);
        if (ret) {
-               dev_err(&pdev->dev, "Can't prepare APB clk: %d\n", ret);
+               dev_err(&pdev->dev, "Can't prepare enable APB clk: %d\n", ret);
                goto err_clk;
        }
 
@@ -349,23 +360,26 @@ static int rockchip_pwm_probe(struct platform_device *pdev)
                pc->chip.of_pwm_n_cells = 3;
        }
 
+       enable_conf = pc->data->enable_conf;
+       ctrl = readl_relaxed(pc->base + pc->data->regs.ctrl);
+       enabled = (ctrl & enable_conf) == enable_conf;
+
        ret = pwmchip_add(&pc->chip);
        if (ret < 0) {
-               clk_unprepare(pc->clk);
                dev_err(&pdev->dev, "pwmchip_add() failed: %d\n", ret);
                goto err_pclk;
        }
 
        /* Keep the PWM clk enabled if the PWM appears to be up and running. */
-       enable_conf = pc->data->enable_conf;
-       ctrl = readl_relaxed(pc->base + pc->data->regs.ctrl);
-       if ((ctrl & enable_conf) != enable_conf)
+       if (!enabled)
                clk_disable(pc->clk);
 
+       clk_disable(pc->pclk);
+
        return 0;
 
 err_pclk:
-       clk_unprepare(pc->pclk);
+       clk_disable_unprepare(pc->pclk);
 err_clk:
        clk_disable_unprepare(pc->clk);
 
diff --git a/drivers/pwm/pwm-zx.c b/drivers/pwm/pwm-zx.c
deleted file mode 100644 (file)
index 34e9119..0000000
+++ /dev/null
@@ -1,278 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/*
- * Copyright (C) 2017 Sanechips Technology Co., Ltd.
- * Copyright 2017 Linaro Ltd.
- */
-
-#include <linux/clk.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/pwm.h>
-#include <linux/slab.h>
-
-#define ZX_PWM_MODE            0x0
-#define ZX_PWM_CLKDIV_SHIFT    2
-#define ZX_PWM_CLKDIV_MASK     GENMASK(11, 2)
-#define ZX_PWM_CLKDIV(x)       (((x) << ZX_PWM_CLKDIV_SHIFT) & \
-                                        ZX_PWM_CLKDIV_MASK)
-#define ZX_PWM_POLAR           BIT(1)
-#define ZX_PWM_EN              BIT(0)
-#define ZX_PWM_PERIOD          0x4
-#define ZX_PWM_DUTY            0x8
-
-#define ZX_PWM_CLKDIV_MAX      1023
-#define ZX_PWM_PERIOD_MAX      65535
-
-struct zx_pwm_chip {
-       struct pwm_chip chip;
-       struct clk *pclk;
-       struct clk *wclk;
-       void __iomem *base;
-};
-
-static inline struct zx_pwm_chip *to_zx_pwm_chip(struct pwm_chip *chip)
-{
-       return container_of(chip, struct zx_pwm_chip, chip);
-}
-
-static inline u32 zx_pwm_readl(struct zx_pwm_chip *zpc, unsigned int hwpwm,
-                              unsigned int offset)
-{
-       return readl(zpc->base + (hwpwm + 1) * 0x10 + offset);
-}
-
-static inline void zx_pwm_writel(struct zx_pwm_chip *zpc, unsigned int hwpwm,
-                                unsigned int offset, u32 value)
-{
-       writel(value, zpc->base + (hwpwm + 1) * 0x10 + offset);
-}
-
-static void zx_pwm_set_mask(struct zx_pwm_chip *zpc, unsigned int hwpwm,
-                           unsigned int offset, u32 mask, u32 value)
-{
-       u32 data;
-
-       data = zx_pwm_readl(zpc, hwpwm, offset);
-       data &= ~mask;
-       data |= value & mask;
-       zx_pwm_writel(zpc, hwpwm, offset, data);
-}
-
-static void zx_pwm_get_state(struct pwm_chip *chip, struct pwm_device *pwm,
-                            struct pwm_state *state)
-{
-       struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip);
-       unsigned long rate;
-       unsigned int div;
-       u32 value;
-       u64 tmp;
-
-       value = zx_pwm_readl(zpc, pwm->hwpwm, ZX_PWM_MODE);
-
-       if (value & ZX_PWM_POLAR)
-               state->polarity = PWM_POLARITY_NORMAL;
-       else
-               state->polarity = PWM_POLARITY_INVERSED;
-
-       if (value & ZX_PWM_EN)
-               state->enabled = true;
-       else
-               state->enabled = false;
-
-       div = (value & ZX_PWM_CLKDIV_MASK) >> ZX_PWM_CLKDIV_SHIFT;
-       rate = clk_get_rate(zpc->wclk);
-
-       tmp = zx_pwm_readl(zpc, pwm->hwpwm, ZX_PWM_PERIOD);
-       tmp *= div * NSEC_PER_SEC;
-       state->period = DIV_ROUND_CLOSEST_ULL(tmp, rate);
-
-       tmp = zx_pwm_readl(zpc, pwm->hwpwm, ZX_PWM_DUTY);
-       tmp *= div * NSEC_PER_SEC;
-       state->duty_cycle = DIV_ROUND_CLOSEST_ULL(tmp, rate);
-}
-
-static int zx_pwm_config(struct pwm_chip *chip, struct pwm_device *pwm,
-                        unsigned int duty_ns, unsigned int period_ns)
-{
-       struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip);
-       unsigned int period_cycles, duty_cycles;
-       unsigned long long c;
-       unsigned int div = 1;
-       unsigned long rate;
-
-       /* Find out the best divider */
-       rate = clk_get_rate(zpc->wclk);
-
-       while (1) {
-               c = rate / div;
-               c = c * period_ns;
-               do_div(c, NSEC_PER_SEC);
-
-               if (c < ZX_PWM_PERIOD_MAX)
-                       break;
-
-               div++;
-
-               if (div > ZX_PWM_CLKDIV_MAX)
-                       return -ERANGE;
-       }
-
-       /* Calculate duty cycles */
-       period_cycles = c;
-       c *= duty_ns;
-       do_div(c, period_ns);
-       duty_cycles = c;
-
-       /*
-        * If the PWM is being enabled, we have to temporarily disable it
-        * before configuring the registers.
-        */
-       if (pwm_is_enabled(pwm))
-               zx_pwm_set_mask(zpc, pwm->hwpwm, ZX_PWM_MODE, ZX_PWM_EN, 0);
-
-       /* Set up registers */
-       zx_pwm_set_mask(zpc, pwm->hwpwm, ZX_PWM_MODE, ZX_PWM_CLKDIV_MASK,
-                       ZX_PWM_CLKDIV(div));
-       zx_pwm_writel(zpc, pwm->hwpwm, ZX_PWM_PERIOD, period_cycles);
-       zx_pwm_writel(zpc, pwm->hwpwm, ZX_PWM_DUTY, duty_cycles);
-
-       /* Re-enable the PWM if needed */
-       if (pwm_is_enabled(pwm))
-               zx_pwm_set_mask(zpc, pwm->hwpwm, ZX_PWM_MODE,
-                               ZX_PWM_EN, ZX_PWM_EN);
-
-       return 0;
-}
-
-static int zx_pwm_apply(struct pwm_chip *chip, struct pwm_device *pwm,
-                       const struct pwm_state *state)
-{
-       struct zx_pwm_chip *zpc = to_zx_pwm_chip(chip);
-       struct pwm_state cstate;
-       int ret;
-
-       pwm_get_state(pwm, &cstate);
-
-       if (state->polarity != cstate.polarity)
-               zx_pwm_set_mask(zpc, pwm->hwpwm, ZX_PWM_MODE, ZX_PWM_POLAR,
-                               (state->polarity == PWM_POLARITY_INVERSED) ?
-                                0 : ZX_PWM_POLAR);
-
-       if (state->period != cstate.period ||
-           state->duty_cycle != cstate.duty_cycle) {
-               ret = zx_pwm_config(chip, pwm, state->duty_cycle,
-                                   state->period);
-               if (ret)
-                       return ret;
-       }
-
-       if (state->enabled != cstate.enabled) {
-               if (state->enabled) {
-                       ret = clk_prepare_enable(zpc->wclk);
-                       if (ret)
-                               return ret;
-
-                       zx_pwm_set_mask(zpc, pwm->hwpwm, ZX_PWM_MODE,
-                                       ZX_PWM_EN, ZX_PWM_EN);
-               } else {
-                       zx_pwm_set_mask(zpc, pwm->hwpwm, ZX_PWM_MODE,
-                                       ZX_PWM_EN, 0);
-                       clk_disable_unprepare(zpc->wclk);
-               }
-       }
-
-       return 0;
-}
-
-static const struct pwm_ops zx_pwm_ops = {
-       .apply = zx_pwm_apply,
-       .get_state = zx_pwm_get_state,
-       .owner = THIS_MODULE,
-};
-
-static int zx_pwm_probe(struct platform_device *pdev)
-{
-       struct zx_pwm_chip *zpc;
-       unsigned int i;
-       int ret;
-
-       zpc = devm_kzalloc(&pdev->dev, sizeof(*zpc), GFP_KERNEL);
-       if (!zpc)
-               return -ENOMEM;
-
-       zpc->base = devm_platform_ioremap_resource(pdev, 0);
-       if (IS_ERR(zpc->base))
-               return PTR_ERR(zpc->base);
-
-       zpc->pclk = devm_clk_get(&pdev->dev, "pclk");
-       if (IS_ERR(zpc->pclk))
-               return PTR_ERR(zpc->pclk);
-
-       zpc->wclk = devm_clk_get(&pdev->dev, "wclk");
-       if (IS_ERR(zpc->wclk))
-               return PTR_ERR(zpc->wclk);
-
-       ret = clk_prepare_enable(zpc->pclk);
-       if (ret)
-               return ret;
-
-       zpc->chip.dev = &pdev->dev;
-       zpc->chip.ops = &zx_pwm_ops;
-       zpc->chip.base = -1;
-       zpc->chip.npwm = 4;
-       zpc->chip.of_xlate = of_pwm_xlate_with_flags;
-       zpc->chip.of_pwm_n_cells = 3;
-
-       /*
-        * PWM devices may be enabled by firmware, and let's disable all of
-        * them initially to save power.
-        */
-       for (i = 0; i < zpc->chip.npwm; i++)
-               zx_pwm_set_mask(zpc, i, ZX_PWM_MODE, ZX_PWM_EN, 0);
-
-       ret = pwmchip_add(&zpc->chip);
-       if (ret < 0) {
-               dev_err(&pdev->dev, "failed to add PWM chip: %d\n", ret);
-               clk_disable_unprepare(zpc->pclk);
-               return ret;
-       }
-
-       platform_set_drvdata(pdev, zpc);
-
-       return 0;
-}
-
-static int zx_pwm_remove(struct platform_device *pdev)
-{
-       struct zx_pwm_chip *zpc = platform_get_drvdata(pdev);
-       int ret;
-
-       ret = pwmchip_remove(&zpc->chip);
-       clk_disable_unprepare(zpc->pclk);
-
-       return ret;
-}
-
-static const struct of_device_id zx_pwm_dt_ids[] = {
-       { .compatible = "zte,zx296718-pwm", },
-       { /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, zx_pwm_dt_ids);
-
-static struct platform_driver zx_pwm_driver = {
-       .driver = {
-               .name = "zx-pwm",
-               .of_match_table = zx_pwm_dt_ids,
-       },
-       .probe = zx_pwm_probe,
-       .remove = zx_pwm_remove,
-};
-module_platform_driver(zx_pwm_driver);
-
-MODULE_ALIAS("platform:zx-pwm");
-MODULE_AUTHOR("Shawn Guo <shawn.guo@linaro.org>");
-MODULE_DESCRIPTION("ZTE ZX PWM Driver");
-MODULE_LICENSE("GPL v2");
index 7b0cd08..ba020a4 100644 (file)
@@ -125,7 +125,7 @@ static const struct regulator_ops vid_ops = {
 
 static const struct regulator_desc regulators[] = {
        BD9571MWV_REG("VD09", "vd09", VD09, avs_ops, 0, 0x7f,
-                     0x80, 600000, 10000, 0x3c),
+                     0x6f, 600000, 10000, 0x3c),
        BD9571MWV_REG("VD18", "vd18", VD18, vid_ops, BD9571MWV_VD18_VID, 0xf,
                      16, 1625000, 25000, 0),
        BD9571MWV_REG("VD25", "vd25", VD25, vid_ops, BD9571MWV_VD25_VID, 0xf,
@@ -134,7 +134,7 @@ static const struct regulator_desc regulators[] = {
                      11, 2800000, 100000, 0),
        BD9571MWV_REG("DVFS", "dvfs", DVFS, reg_ops,
                      BD9571MWV_DVFS_MONIVDAC, 0x7f,
-                     0x80, 600000, 10000, 0x3c),
+                     0x6f, 600000, 10000, 0x3c),
 };
 
 #ifdef CONFIG_PM_SLEEP
@@ -174,7 +174,7 @@ static ssize_t backup_mode_show(struct device *dev,
 {
        struct bd9571mwv_reg *bdreg = dev_get_drvdata(dev);
 
-       return sprintf(buf, "%s\n", bdreg->bkup_mode_enabled ? "on" : "off");
+       return sysfs_emit(buf, "%s\n", bdreg->bkup_mode_enabled ? "on" : "off");
 }
 
 static ssize_t backup_mode_store(struct device *dev,
@@ -301,7 +301,7 @@ static int bd9571mwv_regulator_probe(struct platform_device *pdev)
                                               &config);
                if (IS_ERR(rdev)) {
                        dev_err(&pdev->dev, "failed to register %s regulator\n",
-                               pdev->name);
+                               regulators[i].name);
                        return PTR_ERR(rdev);
                }
        }
index d49a153..9edc349 100644 (file)
@@ -41,7 +41,7 @@ struct mt6315_chip {
                .type = REGULATOR_VOLTAGE,                      \
                .id = _bid,                                     \
                .owner = THIS_MODULE,                           \
-               .n_voltages = 0xbf,                             \
+               .n_voltages = 0xc0,                             \
                .linear_ranges = mt_volt_range1,                \
                .n_linear_ranges = ARRAY_SIZE(mt_volt_range1),  \
                .vsel_reg = _vsel,                              \
@@ -69,7 +69,7 @@ static unsigned int mt6315_map_mode(u32 mode)
        case MT6315_BUCK_MODE_LP:
                return REGULATOR_MODE_IDLE;
        default:
-               return -EINVAL;
+               return REGULATOR_MODE_INVALID;
        }
 }
 
index 833d398..2f7ee21 100644 (file)
@@ -797,6 +797,14 @@ static int pca9450_i2c_probe(struct i2c_client *i2c,
                return ret;
        }
 
+       /* Clear PRESET_EN bit in BUCK123_DVS to use DVS registers */
+       ret = regmap_clear_bits(pca9450->regmap, PCA9450_REG_BUCK123_DVS,
+                               BUCK123_PRESET_EN);
+       if (ret) {
+               dev_err(&i2c->dev, "Failed to clear PRESET_EN bit: %d\n", ret);
+               return ret;
+       }
+
        /* Set reset behavior on assertion of WDOG_B signal */
        ret = regmap_update_bits(pca9450->regmap, PCA9450_REG_RESET_CTRL,
                                WDOG_B_CFG_MASK, WDOG_B_CFG_COLD_LDO12);
@@ -814,7 +822,7 @@ static int pca9450_i2c_probe(struct i2c_client *i2c,
 
        if (IS_ERR(pca9450->sd_vsel_gpio)) {
                dev_err(&i2c->dev, "Failed to get SD_VSEL GPIO\n");
-               return ret;
+               return PTR_ERR(pca9450->sd_vsel_gpio);
        }
 
        dev_info(&i2c->dev, "%s probed.\n",
index 79a554f..65a108c 100644 (file)
@@ -726,8 +726,8 @@ static const struct rpmh_vreg_hw_data pmic5_ftsmps510 = {
 static const struct rpmh_vreg_hw_data pmic5_hfsmps515 = {
        .regulator_type = VRM,
        .ops = &rpmh_regulator_vrm_ops,
-       .voltage_range = REGULATOR_LINEAR_RANGE(2800000, 0, 4, 16000),
-       .n_voltages = 5,
+       .voltage_range = REGULATOR_LINEAR_RANGE(320000, 0, 235, 16000),
+       .n_voltages = 236,
        .pmic_mode_map = pmic_mode_map_pmic5_smps,
        .of_map_mode = rpmh_regulator_pmic4_smps_of_map_mode,
 };
@@ -901,7 +901,7 @@ static const struct rpmh_vreg_init_data pm8350_vreg_data[] = {
 };
 
 static const struct rpmh_vreg_init_data pm8350c_vreg_data[] = {
-       RPMH_VREG("smps1",  "smp%s1",  &pmic5_hfsmps510, "vdd-s1"),
+       RPMH_VREG("smps1",  "smp%s1",  &pmic5_hfsmps515, "vdd-s1"),
        RPMH_VREG("smps2",  "smp%s2",  &pmic5_ftsmps510, "vdd-s2"),
        RPMH_VREG("smps3",  "smp%s3",  &pmic5_ftsmps510, "vdd-s3"),
        RPMH_VREG("smps4",  "smp%s4",  &pmic5_ftsmps510, "vdd-s4"),
index 3d4695d..e3aaac9 100644 (file)
@@ -153,9 +153,9 @@ static int rt4831_regulator_probe(struct platform_device *pdev)
        int i, ret;
 
        regmap = dev_get_regmap(pdev->dev.parent, NULL);
-       if (IS_ERR(regmap)) {
+       if (!regmap) {
                dev_err(&pdev->dev, "Failed to init regmap\n");
-               return PTR_ERR(regmap);
+               return -ENODEV;
        }
 
        /* Configure DSV mode to normal by default */
index 9e7efe5..15d1574 100644 (file)
@@ -155,6 +155,7 @@ config QCOM_Q6V5_ADSP
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
        depends on QCOM_SYSMON || QCOM_SYSMON=n
+       depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
        select MFD_SYSCON
        select QCOM_PIL_INFO
        select QCOM_MDT_LOADER
@@ -162,7 +163,9 @@ config QCOM_Q6V5_ADSP
        select QCOM_RPROC_COMMON
        help
          Say y here to support the Peripheral Image Loader
-         for the Qualcomm Technology Inc. ADSP remote processors.
+         for the non-TrustZone part of Qualcomm Technology Inc. ADSP and CDSP
+         remote processors. The TrustZone part is handled by QCOM_Q6V5_PAS
+         driver.
 
 config QCOM_Q6V5_MSS
        tristate "Qualcomm Hexagon V5 self-authenticating modem subsystem support"
@@ -171,6 +174,7 @@ config QCOM_Q6V5_MSS
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
        depends on QCOM_SYSMON || QCOM_SYSMON=n
+       depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
        select MFD_SYSCON
        select QCOM_MDT_LOADER
        select QCOM_PIL_INFO
@@ -179,7 +183,8 @@ config QCOM_Q6V5_MSS
        select QCOM_SCM
        help
          Say y here to support the Qualcomm self-authenticating modem
-         subsystem based on Hexagon V5.
+         subsystem based on Hexagon V5. The TrustZone based system is
+         handled by QCOM_Q6V5_PAS driver.
 
 config QCOM_Q6V5_PAS
        tristate "Qualcomm Hexagon v5 Peripheral Authentication Service support"
@@ -188,6 +193,7 @@ config QCOM_Q6V5_PAS
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
        depends on QCOM_SYSMON || QCOM_SYSMON=n
+       depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
        select MFD_SYSCON
        select QCOM_PIL_INFO
        select QCOM_MDT_LOADER
@@ -197,7 +203,9 @@ config QCOM_Q6V5_PAS
        help
          Say y here to support the TrustZone based Peripheral Image Loader
          for the Qualcomm Hexagon v5 based remote processors. This is commonly
-         used to control subsystems such as ADSP, Compute and Sensor.
+         used to control subsystems such as ADSP (Audio DSP),
+         CDSP (Compute DSP), MPSS (Modem Peripheral SubSystem), and
+         SLPI (Sensor Low Power Island).
 
 config QCOM_Q6V5_WCSS
        tristate "Qualcomm Hexagon based WCSS Peripheral Image Loader"
@@ -206,6 +214,7 @@ config QCOM_Q6V5_WCSS
        depends on RPMSG_QCOM_SMD || (COMPILE_TEST && RPMSG_QCOM_SMD=n)
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
        depends on QCOM_SYSMON || QCOM_SYSMON=n
+       depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
        select MFD_SYSCON
        select QCOM_MDT_LOADER
        select QCOM_PIL_INFO
@@ -214,7 +223,8 @@ config QCOM_Q6V5_WCSS
        select QCOM_SCM
        help
          Say y here to support the Qualcomm Peripheral Image Loader for the
-         Hexagon V5 based WCSS remote processors.
+         Hexagon V5 based WCSS remote processors on e.g. IPQ8074.  This is
+         a non-TrustZone wireless subsystem.
 
 config QCOM_SYSMON
        tristate "Qualcomm sysmon driver"
@@ -238,13 +248,16 @@ config QCOM_WCNSS_PIL
        depends on RPMSG_QCOM_GLINK_SMEM || RPMSG_QCOM_GLINK_SMEM=n
        depends on QCOM_SMEM
        depends on QCOM_SYSMON || QCOM_SYSMON=n
+       depends on RPMSG_QCOM_GLINK || RPMSG_QCOM_GLINK=n
        select QCOM_MDT_LOADER
        select QCOM_PIL_INFO
        select QCOM_RPROC_COMMON
        select QCOM_SCM
        help
-         Say y here to support the Peripheral Image Loader for the Qualcomm
-         Wireless Connectivity Subsystem.
+         Say y here to support the Peripheral Image Loader for loading WCNSS
+         firmware and boot the core on e.g. MSM8974, MSM8916. The firmware is
+         verified and booted with the help of the Peripheral Authentication
+         System (PAS) in TrustZone.
 
 config ST_REMOTEPROC
        tristate "ST remoteproc support"
index 26e19e6..e2618c3 100644 (file)
 #define AUX_CTRL_NMI           BIT(1)
 #define AUX_CTRL_SW_RESET      BIT(0)
 
+static bool auto_boot;
+module_param(auto_boot, bool, 0400);
+MODULE_PARM_DESC(auto_boot,
+                "Auto-boot the remote processor [default=false]");
+
 struct vpu_mem_map {
        const char *name;
        unsigned int da;
@@ -172,6 +177,8 @@ static int ingenic_rproc_probe(struct platform_device *pdev)
        if (!rproc)
                return -ENOMEM;
 
+       rproc->auto_boot = auto_boot;
+
        vpu = rproc->priv;
        vpu->dev = &pdev->dev;
        platform_set_drvdata(pdev, vpu);
index 988edb4..61901f5 100644 (file)
@@ -47,6 +47,8 @@
 
 #define MT8192_CORE0_SW_RSTN_CLR       0x10000
 #define MT8192_CORE0_SW_RSTN_SET       0x10004
+#define MT8192_CORE0_MEM_ATT_PREDEF    0x10008
+#define MT8192_CORE0_WDT_IRQ           0x10030
 #define MT8192_CORE0_WDT_CFG           0x10034
 
 #define SCP_FW_VER_LEN                 32
@@ -75,6 +77,7 @@ struct mtk_scp_of_data {
        void (*scp_reset_assert)(struct mtk_scp *scp);
        void (*scp_reset_deassert)(struct mtk_scp *scp);
        void (*scp_stop)(struct mtk_scp *scp);
+       void *(*scp_da_to_va)(struct mtk_scp *scp, u64 da, size_t len);
 
        u32 host_to_scp_reg;
        u32 host_to_scp_int_bit;
@@ -89,6 +92,10 @@ struct mtk_scp {
        void __iomem *reg_base;
        void __iomem *sram_base;
        size_t sram_size;
+       phys_addr_t sram_phys;
+       void __iomem *l1tcm_base;
+       size_t l1tcm_size;
+       phys_addr_t l1tcm_phys;
 
        const struct mtk_scp_of_data *data;
 
index e0c2356..ce72759 100644 (file)
@@ -197,17 +197,19 @@ static void mt8192_scp_irq_handler(struct mtk_scp *scp)
 
        scp_to_host = readl(scp->reg_base + MT8192_SCP2APMCU_IPC_SET);
 
-       if (scp_to_host & MT8192_SCP_IPC_INT_BIT)
+       if (scp_to_host & MT8192_SCP_IPC_INT_BIT) {
                scp_ipi_handler(scp);
-       else
-               scp_wdt_handler(scp, scp_to_host);
 
-       /*
-        * SCP won't send another interrupt until we clear
-        * MT8192_SCP2APMCU_IPC.
-        */
-       writel(MT8192_SCP_IPC_INT_BIT,
-              scp->reg_base + MT8192_SCP2APMCU_IPC_CLR);
+               /*
+                * SCP won't send another interrupt until we clear
+                * MT8192_SCP2APMCU_IPC.
+                */
+               writel(MT8192_SCP_IPC_INT_BIT,
+                      scp->reg_base + MT8192_SCP2APMCU_IPC_CLR);
+       } else {
+               scp_wdt_handler(scp, scp_to_host);
+               writel(1, scp->reg_base + MT8192_CORE0_WDT_IRQ);
+       }
 }
 
 static irqreturn_t scp_irq_handler(int irq, void *priv)
@@ -369,6 +371,9 @@ static int mt8192_scp_before_load(struct mtk_scp *scp)
        mt8192_power_on_sram(scp->reg_base + MT8192_L1TCM_SRAM_PDN);
        mt8192_power_on_sram(scp->reg_base + MT8192_CPU0_SRAM_PD);
 
+       /* enable MPU for all memory regions */
+       writel(0xff, scp->reg_base + MT8192_CORE0_MEM_ATT_PREDEF);
+
        return 0;
 }
 
@@ -458,9 +463,8 @@ stop:
        return ret;
 }
 
-static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len)
+static void *mt8183_scp_da_to_va(struct mtk_scp *scp, u64 da, size_t len)
 {
-       struct mtk_scp *scp = (struct mtk_scp *)rproc->priv;
        int offset;
 
        if (da < scp->sram_size) {
@@ -476,6 +480,42 @@ static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len)
        return NULL;
 }
 
+static void *mt8192_scp_da_to_va(struct mtk_scp *scp, u64 da, size_t len)
+{
+       int offset;
+
+       if (da >= scp->sram_phys &&
+           (da + len) <= scp->sram_phys + scp->sram_size) {
+               offset = da - scp->sram_phys;
+               return (void __force *)scp->sram_base + offset;
+       }
+
+       /* optional memory region */
+       if (scp->l1tcm_size &&
+           da >= scp->l1tcm_phys &&
+           (da + len) <= scp->l1tcm_phys + scp->l1tcm_size) {
+               offset = da - scp->l1tcm_phys;
+               return (void __force *)scp->l1tcm_base + offset;
+       }
+
+       /* optional memory region */
+       if (scp->dram_size &&
+           da >= scp->dma_addr &&
+           (da + len) <= scp->dma_addr + scp->dram_size) {
+               offset = da - scp->dma_addr;
+               return scp->cpu_addr + offset;
+       }
+
+       return NULL;
+}
+
+static void *scp_da_to_va(struct rproc *rproc, u64 da, size_t len)
+{
+       struct mtk_scp *scp = (struct mtk_scp *)rproc->priv;
+
+       return scp->data->scp_da_to_va(scp, da, len);
+}
+
 static void mt8183_scp_stop(struct mtk_scp *scp)
 {
        /* Disable SCP watchdog */
@@ -714,13 +754,27 @@ static int scp_probe(struct platform_device *pdev)
                goto free_rproc;
        }
        scp->sram_size = resource_size(res);
+       scp->sram_phys = res->start;
+
+       /* l1tcm is an optional memory region */
+       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "l1tcm");
+       scp->l1tcm_base = devm_ioremap_resource(dev, res);
+       if (IS_ERR((__force void *)scp->l1tcm_base)) {
+               ret = PTR_ERR((__force void *)scp->l1tcm_base);
+               if (ret != -EINVAL) {
+                       dev_err(dev, "Failed to map l1tcm memory\n");
+                       goto free_rproc;
+               }
+       } else {
+               scp->l1tcm_size = resource_size(res);
+               scp->l1tcm_phys = res->start;
+       }
 
        mutex_init(&scp->send_lock);
        for (i = 0; i < SCP_IPI_MAX; i++)
                mutex_init(&scp->ipi_desc[i].lock);
 
-       res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cfg");
-       scp->reg_base = devm_ioremap_resource(dev, res);
+       scp->reg_base = devm_platform_ioremap_resource_byname(pdev, "cfg");
        if (IS_ERR((__force void *)scp->reg_base)) {
                dev_err(dev, "Failed to parse and map cfg memory\n");
                ret = PTR_ERR((__force void *)scp->reg_base);
@@ -803,6 +857,7 @@ static const struct mtk_scp_of_data mt8183_of_data = {
        .scp_reset_assert = mt8183_scp_reset_assert,
        .scp_reset_deassert = mt8183_scp_reset_deassert,
        .scp_stop = mt8183_scp_stop,
+       .scp_da_to_va = mt8183_scp_da_to_va,
        .host_to_scp_reg = MT8183_HOST_TO_SCP,
        .host_to_scp_int_bit = MT8183_HOST_IPC_INT_BIT,
        .ipi_buf_offset = 0x7bdb0,
@@ -814,6 +869,7 @@ static const struct mtk_scp_of_data mt8192_of_data = {
        .scp_reset_assert = mt8192_scp_reset_assert,
        .scp_reset_deassert = mt8192_scp_reset_deassert,
        .scp_stop = mt8192_scp_stop,
+       .scp_da_to_va = mt8192_scp_da_to_va,
        .host_to_scp_reg = MT8192_GIPC_IN_SET,
        .host_to_scp_int_bit = MT8192_HOST_IPC_INT_BIT,
 };
index 2667919..dcb380e 100644 (file)
@@ -450,6 +450,24 @@ static void *pru_i_da_to_va(struct pru_rproc *pru, u32 da, size_t len)
        if (len == 0)
                return NULL;
 
+       /*
+        * GNU binutils do not support multiple address spaces. The GNU
+        * linker's default linker script places IRAM at an arbitrary high
+        * offset, in order to differentiate it from DRAM. Hence we need to
+        * strip the artificial offset in the IRAM addresses coming from the
+        * ELF file.
+        *
+        * The TI proprietary linker would never set those higher IRAM address
+        * bits anyway. PRU architecture limits the program counter to 16-bit
+        * word-address range. This in turn corresponds to 18-bit IRAM
+        * byte-address range for ELF.
+        *
+        * Two more bits are added just in case to make the final 20-bit mask.
+        * Idea is to have a safeguard in case TI decides to add banking
+        * in future SoCs.
+        */
+       da &= 0xfffff;
+
        if (da >= PRU_IRAM_DA &&
            da + len <= PRU_IRAM_DA + pru->mem_regions[PRU_IOMEM_IRAM].size) {
                offset = da - PRU_IRAM_DA;
@@ -585,7 +603,7 @@ pru_rproc_load_elf_segments(struct rproc *rproc, const struct firmware *fw)
                        break;
                }
 
-               if (pru->data->is_k3 && is_iram) {
+               if (pru->data->is_k3) {
                        ret = pru_rproc_memcpy(ptr, elf_data + phdr->p_offset,
                                               filesz);
                        if (ret) {
index 5521c44..7c007dd 100644 (file)
@@ -56,7 +56,7 @@ static int qcom_pil_info_init(void)
        memset_io(base, 0, resource_size(&imem));
 
        _reloc.base = base;
-       _reloc.num_entries = resource_size(&imem) / PIL_RELOC_ENTRY_SIZE;
+       _reloc.num_entries = (u32)resource_size(&imem) / PIL_RELOC_ENTRY_SIZE;
 
        return 0;
 }
index ee58622..e635454 100644 (file)
@@ -565,6 +565,26 @@ static const struct adsp_data sm8250_adsp_resource = {
        .ssctl_id = 0x14,
 };
 
+static const struct adsp_data sm8350_adsp_resource = {
+       .crash_reason_smem = 423,
+       .firmware_name = "adsp.mdt",
+       .pas_id = 1,
+       .has_aggre2_clk = false,
+       .auto_boot = true,
+       .active_pd_names = (char*[]){
+               "load_state",
+               NULL
+       },
+       .proxy_pd_names = (char*[]){
+               "lcx",
+               "lmx",
+               NULL
+       },
+       .ssr_name = "lpass",
+       .sysmon_name = "adsp",
+       .ssctl_id = 0x14,
+};
+
 static const struct adsp_data msm8998_adsp_resource = {
                .crash_reason_smem = 423,
                .firmware_name = "adsp.mdt",
@@ -629,6 +649,25 @@ static const struct adsp_data sm8250_cdsp_resource = {
        .ssctl_id = 0x17,
 };
 
+static const struct adsp_data sm8350_cdsp_resource = {
+       .crash_reason_smem = 601,
+       .firmware_name = "cdsp.mdt",
+       .pas_id = 18,
+       .has_aggre2_clk = false,
+       .auto_boot = true,
+       .active_pd_names = (char*[]){
+               "load_state",
+               NULL
+       },
+       .proxy_pd_names = (char*[]){
+               "cx",
+               NULL
+       },
+       .ssr_name = "cdsp",
+       .sysmon_name = "cdsp",
+       .ssctl_id = 0x17,
+};
+
 static const struct adsp_data mpss_resource_init = {
        .crash_reason_smem = 421,
        .firmware_name = "modem.mdt",
@@ -701,6 +740,26 @@ static const struct adsp_data sm8250_slpi_resource = {
        .ssctl_id = 0x16,
 };
 
+static const struct adsp_data sm8350_slpi_resource = {
+       .crash_reason_smem = 424,
+       .firmware_name = "slpi.mdt",
+       .pas_id = 12,
+       .has_aggre2_clk = false,
+       .auto_boot = true,
+       .active_pd_names = (char*[]){
+               "load_state",
+               NULL
+       },
+       .proxy_pd_names = (char*[]){
+               "lcx",
+               "lmx",
+               NULL
+       },
+       .ssr_name = "dsps",
+       .sysmon_name = "slpi",
+       .ssctl_id = 0x16,
+};
+
 static const struct adsp_data msm8998_slpi_resource = {
                .crash_reason_smem = 424,
                .firmware_name = "slpi.mdt",
@@ -745,6 +804,10 @@ static const struct of_device_id adsp_of_match[] = {
        { .compatible = "qcom,sm8250-adsp-pas", .data = &sm8250_adsp_resource},
        { .compatible = "qcom,sm8250-cdsp-pas", .data = &sm8250_cdsp_resource},
        { .compatible = "qcom,sm8250-slpi-pas", .data = &sm8250_slpi_resource},
+       { .compatible = "qcom,sm8350-adsp-pas", .data = &sm8350_adsp_resource},
+       { .compatible = "qcom,sm8350-cdsp-pas", .data = &sm8350_cdsp_resource},
+       { .compatible = "qcom,sm8350-slpi-pas", .data = &sm8350_slpi_resource},
+       { .compatible = "qcom,sm8350-mpss-pas", .data = &mpss_resource_init},
        { },
 };
 MODULE_DEVICE_TABLE(of, adsp_of_match);
index f958542..2a6a23c 100644 (file)
@@ -570,7 +570,7 @@ static int wcnss_probe(struct platform_device *pdev)
        if (IS_ERR(mmio)) {
                ret = PTR_ERR(mmio);
                goto free_rproc;
-       };
+       }
 
        ret = wcnss_alloc_memory_region(wcnss);
        if (ret)
index 0e0ae1e..169acd3 100644 (file)
@@ -160,6 +160,7 @@ static int qcom_iris_remove(struct platform_device *pdev)
 static const struct of_device_id iris_of_match[] = {
        { .compatible = "qcom,wcn3620", .data = &wcn3620_data },
        { .compatible = "qcom,wcn3660", .data = &wcn3660_data },
+       { .compatible = "qcom,wcn3660b", .data = &wcn3680_data },
        { .compatible = "qcom,wcn3680", .data = &wcn3680_data },
        {}
 };
index 2394eef..ab15076 100644 (file)
@@ -1988,7 +1988,7 @@ int rproc_set_firmware(struct rproc *rproc, const char *fw_name)
                goto out;
        }
 
-       kfree(rproc->firmware);
+       kfree_const(rproc->firmware);
        rproc->firmware = p;
 
 out:
index a180aea..ccb3c14 100644 (file)
@@ -370,8 +370,13 @@ static int stm32_rproc_request_mbox(struct rproc *rproc)
 
                ddata->mb[i].chan = mbox_request_channel_byname(cl, name);
                if (IS_ERR(ddata->mb[i].chan)) {
-                       if (PTR_ERR(ddata->mb[i].chan) == -EPROBE_DEFER)
+                       if (PTR_ERR(ddata->mb[i].chan) == -EPROBE_DEFER) {
+                               dev_err_probe(dev->parent,
+                                             PTR_ERR(ddata->mb[i].chan),
+                                             "failed to request mailbox %s\n",
+                                             name);
                                goto err_probe;
+                       }
                        dev_warn(dev, "cannot get %s mbox\n", name);
                        ddata->mb[i].chan = NULL;
                }
@@ -592,15 +597,14 @@ static int stm32_rproc_parse_dt(struct platform_device *pdev,
 
        irq = platform_get_irq(pdev, 0);
        if (irq == -EPROBE_DEFER)
-               return -EPROBE_DEFER;
+               return dev_err_probe(dev, irq, "failed to get interrupt\n");
 
        if (irq > 0) {
                err = devm_request_irq(dev, irq, stm32_rproc_wdg, 0,
                                       dev_name(dev), pdev);
-               if (err) {
-                       dev_err(dev, "failed to request wdg irq\n");
-                       return err;
-               }
+               if (err)
+                       return dev_err_probe(dev, err,
+                                            "failed to request wdg irq\n");
 
                ddata->wdg_irq = irq;
 
@@ -613,10 +617,9 @@ static int stm32_rproc_parse_dt(struct platform_device *pdev,
        }
 
        ddata->rst = devm_reset_control_get_by_index(dev, 0);
-       if (IS_ERR(ddata->rst)) {
-               dev_err(dev, "failed to get mcu reset\n");
-               return PTR_ERR(ddata->rst);
-       }
+       if (IS_ERR(ddata->rst))
+               return dev_err_probe(dev, PTR_ERR(ddata->rst),
+                                    "failed to get mcu_reset\n");
 
        /*
         * if platform is secured the hold boot bit must be written by
index 8ac5627..4171c6f 100644 (file)
@@ -89,6 +89,16 @@ config RESET_INTEL_GW
          Say Y to control the reset signals provided by reset controller.
          Otherwise, say N.
 
+config RESET_K210
+       bool "Reset controller driver for Canaan Kendryte K210 SoC"
+       depends on (SOC_CANAAN || COMPILE_TEST) && OF
+       select MFD_SYSCON
+       default SOC_CANAAN
+       help
+         Support for the Canaan Kendryte K210 RISC-V SoC reset controller.
+         Say Y if you want to control reset signals provided by this
+         controller.
+
 config RESET_LANTIQ
        bool "Lantiq XWAY Reset Driver" if COMPILE_TEST
        default SOC_TYPE_XWAY
index 1054123..65a118a 100644 (file)
@@ -13,6 +13,7 @@ obj-$(CONFIG_RESET_BRCMSTB_RESCAL) += reset-brcmstb-rescal.o
 obj-$(CONFIG_RESET_HSDK) += reset-hsdk.o
 obj-$(CONFIG_RESET_IMX7) += reset-imx7.o
 obj-$(CONFIG_RESET_INTEL_GW) += reset-intel-gw.o
+obj-$(CONFIG_RESET_K210) += reset-k210.o
 obj-$(CONFIG_RESET_LANTIQ) += reset-lantiq.o
 obj-$(CONFIG_RESET_LPC18XX) += reset-lpc18xx.o
 obj-$(CONFIG_RESET_MESON) += reset-meson.o
diff --git a/drivers/reset/reset-k210.c b/drivers/reset/reset-k210.c
new file mode 100644 (file)
index 0000000..1b6e035
--- /dev/null
@@ -0,0 +1,131 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/platform_device.h>
+#include <linux/reset-controller.h>
+#include <linux/delay.h>
+#include <linux/mfd/syscon.h>
+#include <linux/regmap.h>
+#include <soc/canaan/k210-sysctl.h>
+
+#include <dt-bindings/reset/k210-rst.h>
+
+#define K210_RST_MASK  0x27FFFFFF
+
+struct k210_rst {
+       struct regmap *map;
+       struct reset_controller_dev rcdev;
+};
+
+static inline struct k210_rst *
+to_k210_rst(struct reset_controller_dev *rcdev)
+{
+       return container_of(rcdev, struct k210_rst, rcdev);
+}
+
+static inline int k210_rst_assert(struct reset_controller_dev *rcdev,
+                                 unsigned long id)
+{
+       struct k210_rst *ksr = to_k210_rst(rcdev);
+
+       return regmap_update_bits(ksr->map, K210_SYSCTL_PERI_RESET, BIT(id), 1);
+}
+
+static inline int k210_rst_deassert(struct reset_controller_dev *rcdev,
+                                   unsigned long id)
+{
+       struct k210_rst *ksr = to_k210_rst(rcdev);
+
+       return regmap_update_bits(ksr->map, K210_SYSCTL_PERI_RESET, BIT(id), 0);
+}
+
+static int k210_rst_reset(struct reset_controller_dev *rcdev,
+                         unsigned long id)
+{
+       int ret;
+
+       ret = k210_rst_assert(rcdev, id);
+       if (ret == 0) {
+               udelay(10);
+               ret = k210_rst_deassert(rcdev, id);
+       }
+
+       return ret;
+}
+
+static int k210_rst_status(struct reset_controller_dev *rcdev,
+                          unsigned long id)
+{
+       struct k210_rst *ksr = to_k210_rst(rcdev);
+       u32 reg, bit = BIT(id);
+       int ret;
+
+       ret = regmap_read(ksr->map, K210_SYSCTL_PERI_RESET, &reg);
+       if (ret)
+               return ret;
+
+       return reg & bit;
+}
+
+static int k210_rst_xlate(struct reset_controller_dev *rcdev,
+                         const struct of_phandle_args *reset_spec)
+{
+       unsigned long id = reset_spec->args[0];
+
+       if (!(BIT(id) & K210_RST_MASK))
+               return -EINVAL;
+
+       return id;
+}
+
+static const struct reset_control_ops k210_rst_ops = {
+       .assert         = k210_rst_assert,
+       .deassert       = k210_rst_deassert,
+       .reset          = k210_rst_reset,
+       .status         = k210_rst_status,
+};
+
+static int k210_rst_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct device_node *parent_np = of_get_parent(dev->of_node);
+       struct k210_rst *ksr;
+
+       dev_info(dev, "K210 reset controller\n");
+
+       ksr = devm_kzalloc(dev, sizeof(*ksr), GFP_KERNEL);
+       if (!ksr)
+               return -ENOMEM;
+
+       ksr->map = syscon_node_to_regmap(parent_np);
+       of_node_put(parent_np);
+       if (IS_ERR(ksr->map))
+               return PTR_ERR(ksr->map);
+
+       ksr->rcdev.owner = THIS_MODULE;
+       ksr->rcdev.dev = dev;
+       ksr->rcdev.of_node = dev->of_node;
+       ksr->rcdev.ops = &k210_rst_ops;
+       ksr->rcdev.nr_resets = fls(K210_RST_MASK);
+       ksr->rcdev.of_reset_n_cells = 1;
+       ksr->rcdev.of_xlate = k210_rst_xlate;
+
+       return devm_reset_controller_register(dev, &ksr->rcdev);
+}
+
+static const struct of_device_id k210_rst_dt_ids[] = {
+       { .compatible = "canaan,k210-rst" },
+       { /* sentinel */ },
+};
+
+static struct platform_driver k210_rst_driver = {
+       .probe  = k210_rst_probe,
+       .driver = {
+               .name           = "k210-rst",
+               .of_match_table = k210_rst_dt_ids,
+       },
+};
+builtin_platform_driver(k210_rst_driver);
index dcd1ce6..dea929c 100644 (file)
@@ -8,15 +8,16 @@
 #include <linux/module.h>
 #include <linux/notifier.h>
 #include <linux/rpmsg.h>
+#include <linux/rpmsg/qcom_glink.h>
 #include <linux/remoteproc/qcom_rproc.h>
 
 /**
  * struct do_cleanup_msg - The data structure for an SSR do_cleanup message
- * version:     The G-Link SSR protocol version
- * command:     The G-Link SSR command - do_cleanup
- * seq_num:     Sequence number
- * name_len:    Length of the name of the subsystem being restarted
- * name:        G-Link edge name of the subsystem being restarted
+ * @version:   The G-Link SSR protocol version
+ * @command:   The G-Link SSR command - do_cleanup
+ * @seq_num:   Sequence number
+ * @name_len:  Length of the name of the subsystem being restarted
+ * @name:      G-Link edge name of the subsystem being restarted
  */
 struct do_cleanup_msg {
        __le32 version;
@@ -28,9 +29,9 @@ struct do_cleanup_msg {
 
 /**
  * struct cleanup_done_msg - The data structure for an SSR cleanup_done message
- * version:     The G-Link SSR protocol version
- * response:    The G-Link SSR response to a do_cleanup command, cleanup_done
- * seq_num:     Sequence number
+ * @version:   The G-Link SSR protocol version
+ * @response:  The G-Link SSR response to a do_cleanup command, cleanup_done
+ * @seq_num:   Sequence number
  */
 struct cleanup_done_msg {
        __le32 version;
index cce5b52..89128fc 100644 (file)
@@ -785,7 +785,7 @@ static long wdt_unlocked_ioctl(struct file *file, unsigned int cmd,
  */
 static int wdt_open(struct inode *inode, struct file *file)
 {
-       if (MINOR(inode->i_rdev) == WATCHDOG_MINOR) {
+       if (iminor(inode) == WATCHDOG_MINOR) {
                mutex_lock(&m41t80_rtc_mutex);
                if (test_and_set_bit(0, &wdt_is_open)) {
                        mutex_unlock(&m41t80_rtc_mutex);
@@ -809,7 +809,7 @@ static int wdt_open(struct inode *inode, struct file *file)
  */
 static int wdt_release(struct inode *inode, struct file *file)
 {
-       if (MINOR(inode->i_rdev) == WATCHDOG_MINOR)
+       if (iminor(inode) == WATCHDOG_MINOR)
                clear_bit(0, &wdt_is_open);
        return 0;
 }
index 28c04a4..3a945ab 100644 (file)
@@ -63,7 +63,6 @@ void dasd_int_handler(struct ccw_device *, unsigned long, struct irb *);
 MODULE_AUTHOR("Holger Smolinski <Holger.Smolinski@de.ibm.com>");
 MODULE_DESCRIPTION("Linux on S/390 DASD device driver,"
                   " Copyright IBM Corp. 2000");
-MODULE_SUPPORTED_DEVICE("dasd");
 MODULE_LICENSE("GPL");
 
 /*
@@ -3052,7 +3051,8 @@ static blk_status_t do_dasd_request(struct blk_mq_hw_ctx *hctx,
 
        basedev = block->base;
        spin_lock_irq(&dq->lock);
-       if (basedev->state < DASD_STATE_READY) {
+       if (basedev->state < DASD_STATE_READY ||
+           test_bit(DASD_FLAG_OFFLINE, &basedev->flags)) {
                DBF_DEV_EVENT(DBF_ERR, basedev,
                              "device not ready for request %p", req);
                rc = BLK_STS_IOERR;
@@ -3487,8 +3487,6 @@ void dasd_generic_remove(struct ccw_device *cdev)
        struct dasd_device *device;
        struct dasd_block *block;
 
-       cdev->handler = NULL;
-
        device = dasd_device_from_cdev(cdev);
        if (IS_ERR(device)) {
                dasd_remove_sysfs_files(cdev);
@@ -3507,6 +3505,7 @@ void dasd_generic_remove(struct ccw_device *cdev)
         * no quite down yet.
         */
        dasd_set_target_state(device, DASD_STATE_NEW);
+       cdev->handler = NULL;
        /* dasd_delete_device destroys the device reference. */
        block = device->block;
        dasd_delete_device(device);
index 1569244..307a80f 100644 (file)
@@ -424,8 +424,10 @@ tty3270_update(struct timer_list *t)
                         * last output position matches the start address
                         * of this line.
                         */
-                       if (s->string[1] == sba[0] && s->string[2] == sba[1])
-                               str += 3, len -= 3;
+                       if (s->string[1] == sba[0] && s->string[2] == sba[1]) {
+                               str += 3;
+                               len -= 3;
+                       }
                        if (raw3270_request_add_data(wrq, str, len) != 0)
                                break;
                        list_del_init(&s->update);
index 1bbf27b..68f49e2 100644 (file)
@@ -681,7 +681,7 @@ static int ur_open(struct inode *inode, struct file *file)
         * We treat the minor number as the devno of the ur device
         * to find in the driver tree.
         */
-       devno = MINOR(file_inode(file)->i_rdev);
+       devno = iminor(file_inode(file));
 
        urd = urdev_get_from_devno(devno);
        if (!urd) {
index 1515fdc..bd3c724 100644 (file)
@@ -15,6 +15,7 @@
 #include <linux/init.h>
 #include <linux/slab.h>
 #include <linux/debugfs.h>
+#include <linux/reboot.h>
 
 #include <asm/asm-offsets.h>
 #include <asm/ipl.h>
@@ -238,6 +239,28 @@ static int __init zcore_reipl_init(void)
        return 0;
 }
 
+static int zcore_reboot_and_on_panic_handler(struct notifier_block *self,
+                                            unsigned long         event,
+                                            void                  *data)
+{
+       if (hsa_available)
+               release_hsa();
+
+       return NOTIFY_OK;
+}
+
+static struct notifier_block zcore_reboot_notifier = {
+       .notifier_call  = zcore_reboot_and_on_panic_handler,
+       /* we need to be notified before reipl and kdump */
+       .priority       = INT_MAX,
+};
+
+static struct notifier_block zcore_on_panic_notifier = {
+       .notifier_call  = zcore_reboot_and_on_panic_handler,
+       /* we need to be notified before reipl and kdump */
+       .priority       = INT_MAX,
+};
+
 static int __init zcore_init(void)
 {
        unsigned char arch;
@@ -293,28 +316,15 @@ static int __init zcore_init(void)
                goto fail;
 
        zcore_dir = debugfs_create_dir("zcore" , NULL);
-       if (!zcore_dir) {
-               rc = -ENOMEM;
-               goto fail;
-       }
        zcore_reipl_file = debugfs_create_file("reipl", S_IRUSR, zcore_dir,
                                                NULL, &zcore_reipl_fops);
-       if (!zcore_reipl_file) {
-               rc = -ENOMEM;
-               goto fail_dir;
-       }
        zcore_hsa_file = debugfs_create_file("hsa", S_IRUSR|S_IWUSR, zcore_dir,
                                             NULL, &zcore_hsa_fops);
-       if (!zcore_hsa_file) {
-               rc = -ENOMEM;
-               goto fail_reipl_file;
-       }
-       return 0;
 
-fail_reipl_file:
-       debugfs_remove(zcore_reipl_file);
-fail_dir:
-       debugfs_remove(zcore_dir);
+       register_reboot_notifier(&zcore_reboot_notifier);
+       atomic_notifier_chain_register(&panic_notifier_list, &zcore_on_panic_notifier);
+
+       return 0;
 fail:
        diag308(DIAG308_REL_HSA, NULL);
        return rc;
index 6420b19..05e136c 100644 (file)
@@ -47,7 +47,7 @@ static void ccw_timeout_log(struct ccw_device *cdev)
        orb = &private->orb;
        cc = stsch(sch->schid, &schib);
 
-       printk(KERN_WARNING "cio: ccw device timeout occurred at %llx, "
+       printk(KERN_WARNING "cio: ccw device timeout occurred at %lx, "
               "device information:\n", get_tod_clock());
        printk(KERN_WARNING "cio: orb:\n");
        print_hex_dump(KERN_WARNING, "cio:  ", DUMP_PREFIX_NONE, 16, 1,
index 68106be..767ac41 100644 (file)
@@ -543,7 +543,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
                if (ret)
                        return ret;
 
-               return copy_to_user((void __user *)arg, &info, minsz);
+               return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
        }
        case VFIO_DEVICE_GET_REGION_INFO:
        {
@@ -561,7 +561,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
                if (ret)
                        return ret;
 
-               return copy_to_user((void __user *)arg, &info, minsz);
+               return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
        }
        case VFIO_DEVICE_GET_IRQ_INFO:
        {
@@ -582,7 +582,7 @@ static ssize_t vfio_ccw_mdev_ioctl(struct mdev_device *mdev,
                if (info.count == -1)
                        return -EINVAL;
 
-               return copy_to_user((void __user *)arg, &info, minsz);
+               return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
        }
        case VFIO_DEVICE_SET_IRQS:
        {
index 41fc2e4..1ffdd41 100644 (file)
@@ -1286,7 +1286,7 @@ static int vfio_ap_mdev_get_device_info(unsigned long arg)
        info.num_regions = 0;
        info.num_irqs = 0;
 
-       return copy_to_user((void __user *)arg, &info, minsz);
+       return copy_to_user((void __user *)arg, &info, minsz) ? -EFAULT : 0;
 }
 
 static ssize_t vfio_ap_mdev_ioctl(struct mdev_device *mdev,
index a1da83b..91acff4 100644 (file)
@@ -436,7 +436,7 @@ struct qeth_qdio_out_buffer {
        int is_header[QDIO_MAX_ELEMENTS_PER_BUFFER];
 
        struct qeth_qdio_out_q *q;
-       struct qeth_qdio_out_buffer *next_pending;
+       struct list_head list_entry;
 };
 
 struct qeth_card;
@@ -500,6 +500,7 @@ struct qeth_qdio_out_q {
        struct qdio_buffer *qdio_bufs[QDIO_MAX_BUFFERS_PER_Q];
        struct qeth_qdio_out_buffer *bufs[QDIO_MAX_BUFFERS_PER_Q];
        struct qdio_outbuf_state *bufstates; /* convenience pointer */
+       struct list_head pending_bufs;
        struct qeth_out_q_stats stats;
        spinlock_t lock;
        unsigned int priority;
index b71b890..a814698 100644 (file)
@@ -73,8 +73,6 @@ static void qeth_free_qdio_queues(struct qeth_card *card);
 static void qeth_notify_skbs(struct qeth_qdio_out_q *queue,
                struct qeth_qdio_out_buffer *buf,
                enum iucv_tx_notify notification);
-static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
-                                int budget);
 
 static void qeth_close_dev_handler(struct work_struct *work)
 {
@@ -465,41 +463,6 @@ static enum iucv_tx_notify qeth_compute_cq_notification(int sbalf15,
        return n;
 }
 
-static void qeth_cleanup_handled_pending(struct qeth_qdio_out_q *q, int bidx,
-                                        int forced_cleanup)
-{
-       if (q->card->options.cq != QETH_CQ_ENABLED)
-               return;
-
-       if (q->bufs[bidx]->next_pending != NULL) {
-               struct qeth_qdio_out_buffer *head = q->bufs[bidx];
-               struct qeth_qdio_out_buffer *c = q->bufs[bidx]->next_pending;
-
-               while (c) {
-                       if (forced_cleanup ||
-                           atomic_read(&c->state) == QETH_QDIO_BUF_EMPTY) {
-                               struct qeth_qdio_out_buffer *f = c;
-
-                               QETH_CARD_TEXT(f->q->card, 5, "fp");
-                               QETH_CARD_TEXT_(f->q->card, 5, "%lx", (long) f);
-                               /* release here to avoid interleaving between
-                                  outbound tasklet and inbound tasklet
-                                  regarding notifications and lifecycle */
-                               qeth_tx_complete_buf(c, forced_cleanup, 0);
-
-                               c = f->next_pending;
-                               WARN_ON_ONCE(head->next_pending != f);
-                               head->next_pending = c;
-                               kmem_cache_free(qeth_qdio_outbuf_cache, f);
-                       } else {
-                               head = c;
-                               c = c->next_pending;
-                       }
-
-               }
-       }
-}
-
 static void qeth_qdio_handle_aob(struct qeth_card *card,
                                 unsigned long phys_aob_addr)
 {
@@ -507,6 +470,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
        struct qaob *aob;
        struct qeth_qdio_out_buffer *buffer;
        enum iucv_tx_notify notification;
+       struct qeth_qdio_out_q *queue;
        unsigned int i;
 
        aob = (struct qaob *) phys_to_virt(phys_aob_addr);
@@ -537,7 +501,7 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
                qeth_notify_skbs(buffer->q, buffer, notification);
 
                /* Free dangling allocations. The attached skbs are handled by
-                * qeth_cleanup_handled_pending().
+                * qeth_tx_complete_pending_bufs().
                 */
                for (i = 0;
                     i < aob->sb_count && i < QETH_MAX_BUFFER_ELEMENTS(card);
@@ -549,7 +513,9 @@ static void qeth_qdio_handle_aob(struct qeth_card *card,
                        buffer->is_header[i] = 0;
                }
 
+               queue = buffer->q;
                atomic_set(&buffer->state, QETH_QDIO_BUF_EMPTY);
+               napi_schedule(&queue->napi);
                break;
        default:
                WARN_ON_ONCE(1);
@@ -1424,9 +1390,6 @@ static void qeth_tx_complete_buf(struct qeth_qdio_out_buffer *buf, bool error,
        struct qeth_qdio_out_q *queue = buf->q;
        struct sk_buff *skb;
 
-       if (atomic_read(&buf->state) == QETH_QDIO_BUF_PENDING)
-               qeth_notify_skbs(queue, buf, TX_NOTIFY_GENERALERROR);
-
        /* Empty buffer? */
        if (buf->next_element_to_fill == 0)
                return;
@@ -1488,14 +1451,38 @@ static void qeth_clear_output_buffer(struct qeth_qdio_out_q *queue,
        atomic_set(&buf->state, QETH_QDIO_BUF_EMPTY);
 }
 
+static void qeth_tx_complete_pending_bufs(struct qeth_card *card,
+                                         struct qeth_qdio_out_q *queue,
+                                         bool drain)
+{
+       struct qeth_qdio_out_buffer *buf, *tmp;
+
+       list_for_each_entry_safe(buf, tmp, &queue->pending_bufs, list_entry) {
+               if (drain || atomic_read(&buf->state) == QETH_QDIO_BUF_EMPTY) {
+                       QETH_CARD_TEXT(card, 5, "fp");
+                       QETH_CARD_TEXT_(card, 5, "%lx", (long) buf);
+
+                       if (drain)
+                               qeth_notify_skbs(queue, buf,
+                                                TX_NOTIFY_GENERALERROR);
+                       qeth_tx_complete_buf(buf, drain, 0);
+
+                       list_del(&buf->list_entry);
+                       kmem_cache_free(qeth_qdio_outbuf_cache, buf);
+               }
+       }
+}
+
 static void qeth_drain_output_queue(struct qeth_qdio_out_q *q, bool free)
 {
        int j;
 
+       qeth_tx_complete_pending_bufs(q->card, q, true);
+
        for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
                if (!q->bufs[j])
                        continue;
-               qeth_cleanup_handled_pending(q, j, 1);
+
                qeth_clear_output_buffer(q, q->bufs[j], true, 0);
                if (free) {
                        kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[j]);
@@ -2615,7 +2602,6 @@ static int qeth_init_qdio_out_buf(struct qeth_qdio_out_q *q, int bidx)
        skb_queue_head_init(&newbuf->skb_list);
        lockdep_set_class(&newbuf->skb_list.lock, &qdio_out_skb_queue_key);
        newbuf->q = q;
-       newbuf->next_pending = q->bufs[bidx];
        atomic_set(&newbuf->state, QETH_QDIO_BUF_EMPTY);
        q->bufs[bidx] = newbuf;
        return 0;
@@ -2634,15 +2620,28 @@ static void qeth_free_output_queue(struct qeth_qdio_out_q *q)
 static struct qeth_qdio_out_q *qeth_alloc_output_queue(void)
 {
        struct qeth_qdio_out_q *q = kzalloc(sizeof(*q), GFP_KERNEL);
+       unsigned int i;
 
        if (!q)
                return NULL;
 
-       if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q)) {
-               kfree(q);
-               return NULL;
+       if (qdio_alloc_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q))
+               goto err_qdio_bufs;
+
+       for (i = 0; i < QDIO_MAX_BUFFERS_PER_Q; i++) {
+               if (qeth_init_qdio_out_buf(q, i))
+                       goto err_out_bufs;
        }
+
        return q;
+
+err_out_bufs:
+       while (i > 0)
+               kmem_cache_free(qeth_qdio_outbuf_cache, q->bufs[--i]);
+       qdio_free_buffers(q->qdio_bufs, QDIO_MAX_BUFFERS_PER_Q);
+err_qdio_bufs:
+       kfree(q);
+       return NULL;
 }
 
 static void qeth_tx_completion_timer(struct timer_list *timer)
@@ -2655,7 +2654,7 @@ static void qeth_tx_completion_timer(struct timer_list *timer)
 
 static int qeth_alloc_qdio_queues(struct qeth_card *card)
 {
-       int i, j;
+       unsigned int i;
 
        QETH_CARD_TEXT(card, 2, "allcqdbf");
 
@@ -2684,18 +2683,12 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
                card->qdio.out_qs[i] = queue;
                queue->card = card;
                queue->queue_no = i;
+               INIT_LIST_HEAD(&queue->pending_bufs);
                spin_lock_init(&queue->lock);
                timer_setup(&queue->timer, qeth_tx_completion_timer, 0);
                queue->coalesce_usecs = QETH_TX_COALESCE_USECS;
                queue->max_coalesced_frames = QETH_TX_MAX_COALESCED_FRAMES;
                queue->priority = QETH_QIB_PQUE_PRIO_DEFAULT;
-
-               /* give outbound qeth_qdio_buffers their qdio_buffers */
-               for (j = 0; j < QDIO_MAX_BUFFERS_PER_Q; ++j) {
-                       WARN_ON(queue->bufs[j]);
-                       if (qeth_init_qdio_out_buf(queue, j))
-                               goto out_freeoutqbufs;
-               }
        }
 
        /* completion */
@@ -2704,13 +2697,6 @@ static int qeth_alloc_qdio_queues(struct qeth_card *card)
 
        return 0;
 
-out_freeoutqbufs:
-       while (j > 0) {
-               --j;
-               kmem_cache_free(qeth_qdio_outbuf_cache,
-                               card->qdio.out_qs[i]->bufs[j]);
-               card->qdio.out_qs[i]->bufs[j] = NULL;
-       }
 out_freeoutq:
        while (i > 0) {
                qeth_free_output_queue(card->qdio.out_qs[--i]);
@@ -6107,6 +6093,8 @@ static void qeth_iqd_tx_complete(struct qeth_qdio_out_q *queue,
                                        qeth_schedule_recovery(card);
                                }
 
+                               list_add(&buffer->list_entry,
+                                        &queue->pending_bufs);
                                /* Skip clearing the buffer: */
                                return;
                        case QETH_QDIO_BUF_QAOB_OK:
@@ -6162,6 +6150,8 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
                unsigned int bytes = 0;
                int completed;
 
+               qeth_tx_complete_pending_bufs(card, queue, false);
+
                if (qeth_out_queue_is_empty(queue)) {
                        napi_complete(napi);
                        return 0;
@@ -6194,7 +6184,6 @@ static int qeth_tx_poll(struct napi_struct *napi, int budget)
 
                        qeth_handle_send_error(card, buffer, error);
                        qeth_iqd_tx_complete(queue, bidx, error, budget);
-                       qeth_cleanup_handled_pending(queue, bidx, false);
                }
 
                netdev_tx_completed_queue(txq, packets, bytes);
@@ -7249,9 +7238,7 @@ int qeth_open(struct net_device *dev)
        card->data.state = CH_STATE_UP;
        netif_tx_start_all_queues(dev);
 
-       napi_enable(&card->napi);
        local_bh_disable();
-       napi_schedule(&card->napi);
        if (IS_IQD(card)) {
                struct qeth_qdio_out_q *queue;
                unsigned int i;
@@ -7263,8 +7250,12 @@ int qeth_open(struct net_device *dev)
                        napi_schedule(&queue->napi);
                }
        }
+
+       napi_enable(&card->napi);
+       napi_schedule(&card->napi);
        /* kick-start the NAPI softirq: */
        local_bh_enable();
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(qeth_open);
@@ -7274,6 +7265,11 @@ int qeth_stop(struct net_device *dev)
        struct qeth_card *card = dev->ml_priv;
 
        QETH_CARD_TEXT(card, 4, "qethstop");
+
+       napi_disable(&card->napi);
+       cancel_delayed_work_sync(&card->buffer_reclaim_work);
+       qdio_stop_irq(CARD_DDEV(card));
+
        if (IS_IQD(card)) {
                struct qeth_qdio_out_q *queue;
                unsigned int i;
@@ -7294,10 +7290,6 @@ int qeth_stop(struct net_device *dev)
                netif_tx_disable(dev);
        }
 
-       napi_disable(&card->napi);
-       cancel_delayed_work_sync(&card->buffer_reclaim_work);
-       qdio_stop_irq(CARD_DDEV(card));
-
        return 0;
 }
 EXPORT_SYMBOL_GPL(qeth_stop);
index 5730572..54e686d 100644 (file)
@@ -117,7 +117,7 @@ struct virtio_rev_info {
 };
 
 /* the highest virtio-ccw revision we support */
-#define VIRTIO_CCW_REV_MAX 1
+#define VIRTIO_CCW_REV_MAX 2
 
 struct virtio_ccw_vq_info {
        struct virtqueue *vq;
@@ -952,7 +952,7 @@ static u8 virtio_ccw_get_status(struct virtio_device *vdev)
        u8 old_status = vcdev->dma_area->status;
        struct ccw1 *ccw;
 
-       if (vcdev->revision < 1)
+       if (vcdev->revision < 2)
                return vcdev->dma_area->status;
 
        ccw = ccw_device_dma_zalloc(vcdev->cdev, sizeof(*ccw));
index fad936e..d93595b 100644 (file)
@@ -50,7 +50,6 @@ MODULE_PARM_DESC(sol_compat,
 MODULE_AUTHOR("Eric Brower <ebrower@usa.net>");
 MODULE_DESCRIPTION("7-Segment Display driver for Sun Microsystems CP1400/1500");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("d7s");
 
 struct d7s {
        void __iomem    *regs;
@@ -186,7 +185,7 @@ static int d7s_probe(struct platform_device *op)
        p->regs = of_ioremap(&op->resource[0], 0, sizeof(u8), "d7s");
        if (!p->regs) {
                printk(KERN_ERR PFX "Cannot map chip registers\n");
-               goto out_free;
+               goto out;
        }
 
        err = misc_register(&d7s_miscdev);
@@ -228,8 +227,6 @@ out:
 
 out_iounmap:
        of_iounmap(&op->resource[0], p->regs, sizeof(u8));
-
-out_free:
        goto out;
 }
 
index 77e1d6b..59e9321 100644 (file)
@@ -1175,7 +1175,7 @@ struct ahd_softc {
        uint8_t                   tqinfifonext;
 
        /*
-        * Cached verson of the hs_mailbox so we can avoid
+        * Cached version of the hs_mailbox so we can avoid
         * pausing the sequencer during mailbox updates.
         */
        uint8_t                   hs_mailbox;
index 11a0979..9bc755a 100644 (file)
@@ -896,8 +896,6 @@ union ahc_bus_softc {
 
 typedef void (*ahc_bus_intr_t)(struct ahc_softc *);
 typedef int (*ahc_bus_chip_init_t)(struct ahc_softc *);
-typedef int (*ahc_bus_suspend_t)(struct ahc_softc *);
-typedef int (*ahc_bus_resume_t)(struct ahc_softc *);
 typedef void ahc_callback_t (void *);
 
 struct ahc_softc {
index 3cf7e08..ecdc0f0 100644 (file)
@@ -5,6 +5,7 @@ config SCSI_BNX2X_FCOE
        depends on (IPV6 || IPV6=n)
        depends on LIBFC
        depends on LIBFCOE
+       depends on MMU
        select NETDEVICES
        select ETHERNET
        select NET_VENDOR_BROADCOM
index fdd4467..1e6d8f6 100644 (file)
@@ -1171,10 +1171,8 @@ static void bnx2i_cleanup_task(struct iscsi_task *task)
                bnx2i_send_cmd_cleanup_req(hba, task->dd_data);
 
                spin_unlock_bh(&conn->session->back_lock);
-               spin_unlock_bh(&conn->session->frwd_lock);
                wait_for_completion_timeout(&bnx2i_conn->cmd_cleanup_cmpl,
                                msecs_to_jiffies(ISCSI_CMD_CLEANUP_TIMEOUT));
-               spin_lock_bh(&conn->session->frwd_lock);
                spin_lock_bh(&conn->session->back_lock);
        }
        bnx2i_iscsi_unmap_sg_list(task->dd_data);
index 337d3aa..f135a10 100644 (file)
@@ -80,7 +80,6 @@
 MODULE_AUTHOR("Hewlett-Packard Company");
 MODULE_DESCRIPTION("Driver for HP Smart Array Controller version " \
        HPSA_DRIVER_VERSION);
-MODULE_SUPPORTED_DEVICE("HP Smart Array Controllers");
 MODULE_VERSION(HPSA_DRIVER_VERSION);
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("cciss");
@@ -1151,7 +1150,10 @@ static void __enqueue_cmd_and_start_io(struct ctlr_info *h,
 {
        dial_down_lockup_detection_during_fw_flash(h, c);
        atomic_inc(&h->commands_outstanding);
-       if (c->device)
+       /*
+        * Check to see if the command is being retried.
+        */
+       if (c->device && !c->retry_pending)
                atomic_inc(&c->device->commands_outstanding);
 
        reply_queue = h->reply_map[raw_smp_processor_id()];
@@ -5567,7 +5569,8 @@ static inline void hpsa_cmd_partial_init(struct ctlr_info *h, int index,
 }
 
 static int hpsa_ioaccel_submit(struct ctlr_info *h,
-               struct CommandList *c, struct scsi_cmnd *cmd)
+               struct CommandList *c, struct scsi_cmnd *cmd,
+               bool retry)
 {
        struct hpsa_scsi_dev_t *dev = cmd->device->hostdata;
        int rc = IO_ACCEL_INELIGIBLE;
@@ -5584,18 +5587,22 @@ static int hpsa_ioaccel_submit(struct ctlr_info *h,
        cmd->host_scribble = (unsigned char *) c;
 
        if (dev->offload_enabled) {
-               hpsa_cmd_init(h, c->cmdindex, c);
+               hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */
                c->cmd_type = CMD_SCSI;
                c->scsi_cmd = cmd;
                c->device = dev;
+               if (retry) /* Resubmit but do not increment device->commands_outstanding. */
+                       c->retry_pending = true;
                rc = hpsa_scsi_ioaccel_raid_map(h, c);
                if (rc < 0)     /* scsi_dma_map failed. */
                        rc = SCSI_MLQUEUE_HOST_BUSY;
        } else if (dev->hba_ioaccel_enabled) {
-               hpsa_cmd_init(h, c->cmdindex, c);
+               hpsa_cmd_init(h, c->cmdindex, c); /* Zeroes out all fields */
                c->cmd_type = CMD_SCSI;
                c->scsi_cmd = cmd;
                c->device = dev;
+               if (retry) /* Resubmit but do not increment device->commands_outstanding. */
+                       c->retry_pending = true;
                rc = hpsa_scsi_ioaccel_direct_map(h, c);
                if (rc < 0)     /* scsi_dma_map failed. */
                        rc = SCSI_MLQUEUE_HOST_BUSY;
@@ -5628,7 +5635,8 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
 
                if (c2->error_data.serv_response ==
                                IOACCEL2_STATUS_SR_TASK_COMP_SET_FULL) {
-                       rc = hpsa_ioaccel_submit(h, c, cmd);
+                       /* Resubmit with the retry_pending flag set. */
+                       rc = hpsa_ioaccel_submit(h, c, cmd, true);
                        if (rc == 0)
                                return;
                        if (rc == SCSI_MLQUEUE_HOST_BUSY) {
@@ -5644,6 +5652,15 @@ static void hpsa_command_resubmit_worker(struct work_struct *work)
                }
        }
        hpsa_cmd_partial_init(c->h, c->cmdindex, c);
+       /*
+        * Here we have not come in though queue_command, so we
+        * can set the retry_pending flag to true for a driver initiated
+        * retry attempt (I.E. not a SML retry).
+        * I.E. We are submitting a driver initiated retry.
+        * Note: hpsa_ciss_submit does not zero out the command fields like
+        *       ioaccel submit does.
+        */
+       c->retry_pending = true;
        if (hpsa_ciss_submit(c->h, c, cmd, dev)) {
                /*
                 * If we get here, it means dma mapping failed. Try
@@ -5706,11 +5723,16 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *sh, struct scsi_cmnd *cmd)
        /*
         * Call alternate submit routine for I/O accelerated commands.
         * Retries always go down the normal I/O path.
+        * Note: If cmd->retries is non-zero, then this is a SML
+        *       initiated retry and not a driver initiated retry.
+        *       This command has been obtained from cmd_tagged_alloc
+        *       and is therefore a brand-new command.
         */
        if (likely(cmd->retries == 0 &&
                        !blk_rq_is_passthrough(cmd->request) &&
                        h->acciopath_status)) {
-               rc = hpsa_ioaccel_submit(h, c, cmd);
+               /* Submit with the retry_pending flag unset. */
+               rc = hpsa_ioaccel_submit(h, c, cmd, false);
                if (rc == 0)
                        return 0;
                if (rc == SCSI_MLQUEUE_HOST_BUSY) {
@@ -6105,6 +6127,7 @@ return_reset_status:
  * at init, and managed by cmd_tagged_alloc() and cmd_tagged_free() using the
  * block request tag as an index into a table of entries.  cmd_tagged_free() is
  * the complement, although cmd_free() may be called instead.
+ * This function is only called for new requests from queue_command.
  */
 static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
                                            struct scsi_cmnd *scmd)
@@ -6139,8 +6162,14 @@ static struct CommandList *cmd_tagged_alloc(struct ctlr_info *h,
        }
 
        atomic_inc(&c->refcount);
-
        hpsa_cmd_partial_init(h, idx, c);
+
+       /*
+        * This is a new command obtained from queue_command so
+        * there have not been any driver initiated retry attempts.
+        */
+       c->retry_pending = false;
+
        return c;
 }
 
@@ -6208,6 +6237,13 @@ static struct CommandList *cmd_alloc(struct ctlr_info *h)
        }
        hpsa_cmd_partial_init(h, i, c);
        c->device = NULL;
+
+       /*
+        * cmd_alloc is for "internal" commands and they are never
+        * retried.
+        */
+       c->retry_pending = false;
+
        return c;
 }
 
index 46df2e3..d126bb8 100644 (file)
@@ -448,7 +448,7 @@ struct CommandList {
         */
        struct hpsa_scsi_dev_t *phys_disk;
 
-       int abort_pending;
+       bool retry_pending;
        struct hpsa_scsi_dev_t *device;
        atomic_t refcount; /* Must be last to avoid memset in hpsa_cmd_init() */
 } __aligned(COMMANDLIST_ALIGNMENT);
index 755313b..61831f2 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/bsg-lib.h>
 #include <asm/firmware.h>
 #include <asm/irq.h>
+#include <asm/rtas.h>
 #include <asm/vio.h>
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -158,6 +159,9 @@ static void ibmvfc_npiv_logout(struct ibmvfc_host *);
 static void ibmvfc_tgt_implicit_logout_and_del(struct ibmvfc_target *);
 static void ibmvfc_tgt_move_login(struct ibmvfc_target *);
 
+static void ibmvfc_release_sub_crqs(struct ibmvfc_host *);
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *);
+
 static const char *unknown_error = "unknown error";
 
 static long h_reg_sub_crq(unsigned long unit_address, unsigned long ioba,
@@ -899,6 +903,9 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
 {
        int rc = 0;
        struct vio_dev *vdev = to_vio_dev(vhost->dev);
+       unsigned long flags;
+
+       ibmvfc_release_sub_crqs(vhost);
 
        /* Re-enable the CRQ */
        do {
@@ -910,6 +917,15 @@ static int ibmvfc_reenable_crq_queue(struct ibmvfc_host *vhost)
        if (rc)
                dev_err(vhost->dev, "Error enabling adapter (rc=%d)\n", rc);
 
+       spin_lock_irqsave(vhost->host->host_lock, flags);
+       spin_lock(vhost->crq.q_lock);
+       vhost->do_enquiry = 1;
+       vhost->using_channels = 0;
+       spin_unlock(vhost->crq.q_lock);
+       spin_unlock_irqrestore(vhost->host->host_lock, flags);
+
+       ibmvfc_init_sub_crqs(vhost);
+
        return rc;
 }
 
@@ -926,8 +942,8 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
        unsigned long flags;
        struct vio_dev *vdev = to_vio_dev(vhost->dev);
        struct ibmvfc_queue *crq = &vhost->crq;
-       struct ibmvfc_queue *scrq;
-       int i;
+
+       ibmvfc_release_sub_crqs(vhost);
 
        /* Close the CRQ */
        do {
@@ -947,16 +963,6 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
        memset(crq->msgs.crq, 0, PAGE_SIZE);
        crq->cur = 0;
 
-       if (vhost->scsi_scrqs.scrqs) {
-               for (i = 0; i < nr_scsi_hw_queues; i++) {
-                       scrq = &vhost->scsi_scrqs.scrqs[i];
-                       spin_lock(scrq->q_lock);
-                       memset(scrq->msgs.scrq, 0, PAGE_SIZE);
-                       scrq->cur = 0;
-                       spin_unlock(scrq->q_lock);
-               }
-       }
-
        /* And re-open it again */
        rc = plpar_hcall_norets(H_REG_CRQ, vdev->unit_address,
                                crq->msg_token, PAGE_SIZE);
@@ -966,9 +972,12 @@ static int ibmvfc_reset_crq(struct ibmvfc_host *vhost)
                dev_warn(vhost->dev, "Partner adapter not ready\n");
        else if (rc != 0)
                dev_warn(vhost->dev, "Couldn't register crq (rc=%d)\n", rc);
+
        spin_unlock(vhost->crq.q_lock);
        spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
+       ibmvfc_init_sub_crqs(vhost);
+
        return rc;
 }
 
@@ -2363,6 +2372,24 @@ static int ibmvfc_match_lun(struct ibmvfc_event *evt, void *device)
 }
 
 /**
+ * ibmvfc_event_is_free - Check if event is free or not
+ * @evt:       ibmvfc event struct
+ *
+ * Returns:
+ *     true / false
+ **/
+static bool ibmvfc_event_is_free(struct ibmvfc_event *evt)
+{
+       struct ibmvfc_event *loop_evt;
+
+       list_for_each_entry(loop_evt, &evt->queue->free, queue_list)
+               if (loop_evt == evt)
+                       return true;
+
+       return false;
+}
+
+/**
  * ibmvfc_wait_for_ops - Wait for ops to complete
  * @vhost:     ibmvfc host struct
  * @device:    device to match (starget or sdev)
@@ -2376,35 +2403,58 @@ static int ibmvfc_wait_for_ops(struct ibmvfc_host *vhost, void *device,
 {
        struct ibmvfc_event *evt;
        DECLARE_COMPLETION_ONSTACK(comp);
-       int wait;
+       int wait, i, q_index, q_size;
        unsigned long flags;
        signed long timeout = IBMVFC_ABORT_WAIT_TIMEOUT * HZ;
+       struct ibmvfc_queue *queues;
 
        ENTER;
+       if (vhost->mq_enabled && vhost->using_channels) {
+               queues = vhost->scsi_scrqs.scrqs;
+               q_size = vhost->scsi_scrqs.active_queues;
+       } else {
+               queues = &vhost->crq;
+               q_size = 1;
+       }
+
        do {
                wait = 0;
-               spin_lock_irqsave(&vhost->crq.l_lock, flags);
-               list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
-                       if (match(evt, device)) {
-                               evt->eh_comp = &comp;
-                               wait++;
+               spin_lock_irqsave(vhost->host->host_lock, flags);
+               for (q_index = 0; q_index < q_size; q_index++) {
+                       spin_lock(&queues[q_index].l_lock);
+                       for (i = 0; i < queues[q_index].evt_pool.size; i++) {
+                               evt = &queues[q_index].evt_pool.events[i];
+                               if (!ibmvfc_event_is_free(evt)) {
+                                       if (match(evt, device)) {
+                                               evt->eh_comp = &comp;
+                                               wait++;
+                                       }
+                               }
                        }
+                       spin_unlock(&queues[q_index].l_lock);
                }
-               spin_unlock_irqrestore(&vhost->crq.l_lock, flags);
+               spin_unlock_irqrestore(vhost->host->host_lock, flags);
 
                if (wait) {
                        timeout = wait_for_completion_timeout(&comp, timeout);
 
                        if (!timeout) {
                                wait = 0;
-                               spin_lock_irqsave(&vhost->crq.l_lock, flags);
-                               list_for_each_entry(evt, &vhost->crq.sent, queue_list) {
-                                       if (match(evt, device)) {
-                                               evt->eh_comp = NULL;
-                                               wait++;
+                               spin_lock_irqsave(vhost->host->host_lock, flags);
+                               for (q_index = 0; q_index < q_size; q_index++) {
+                                       spin_lock(&queues[q_index].l_lock);
+                                       for (i = 0; i < queues[q_index].evt_pool.size; i++) {
+                                               evt = &queues[q_index].evt_pool.events[i];
+                                               if (!ibmvfc_event_is_free(evt)) {
+                                                       if (match(evt, device)) {
+                                                               evt->eh_comp = NULL;
+                                                               wait++;
+                                                       }
+                                               }
                                        }
+                                       spin_unlock(&queues[q_index].l_lock);
                                }
-                               spin_unlock_irqrestore(&vhost->crq.l_lock, flags);
+                               spin_unlock_irqrestore(vhost->host->host_lock, flags);
                                if (wait)
                                        dev_err(vhost->dev, "Timed out waiting for aborted commands\n");
                                LEAVE;
@@ -5642,7 +5692,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
        rc = h_reg_sub_crq(vdev->unit_address, scrq->msg_token, PAGE_SIZE,
                           &scrq->cookie, &scrq->hw_irq);
 
-       if (rc) {
+       /* H_CLOSED indicates successful register, but no CRQ partner */
+       if (rc && rc != H_CLOSED) {
                dev_warn(dev, "Error registering sub-crq: %d\n", rc);
                if (rc == H_PARAMETER)
                        dev_warn_once(dev, "Firmware may not support MQ\n");
@@ -5675,8 +5726,8 @@ static int ibmvfc_register_scsi_channel(struct ibmvfc_host *vhost,
 
 irq_failed:
        do {
-               plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
-       } while (rc == H_BUSY || H_IS_LONG_BUSY(rc));
+               rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address, scrq->cookie);
+       } while (rtas_busy_delay(rc));
 reg_failed:
        ibmvfc_free_queue(vhost, scrq);
        LEAVE;
@@ -5694,6 +5745,7 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)
 
        free_irq(scrq->irq, scrq);
        irq_dispose_mapping(scrq->irq);
+       scrq->irq = 0;
 
        do {
                rc = plpar_hcall_norets(H_FREE_SUB_CRQ, vdev->unit_address,
@@ -5707,17 +5759,21 @@ static void ibmvfc_deregister_scsi_channel(struct ibmvfc_host *vhost, int index)
        LEAVE;
 }
 
-static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
+static void ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
 {
        int i, j;
 
        ENTER;
+       if (!vhost->mq_enabled)
+               return;
 
        vhost->scsi_scrqs.scrqs = kcalloc(nr_scsi_hw_queues,
                                          sizeof(*vhost->scsi_scrqs.scrqs),
                                          GFP_KERNEL);
-       if (!vhost->scsi_scrqs.scrqs)
-               return -1;
+       if (!vhost->scsi_scrqs.scrqs) {
+               vhost->do_enquiry = 0;
+               return;
+       }
 
        for (i = 0; i < nr_scsi_hw_queues; i++) {
                if (ibmvfc_register_scsi_channel(vhost, i)) {
@@ -5726,13 +5782,12 @@ static int ibmvfc_init_sub_crqs(struct ibmvfc_host *vhost)
                        kfree(vhost->scsi_scrqs.scrqs);
                        vhost->scsi_scrqs.scrqs = NULL;
                        vhost->scsi_scrqs.active_queues = 0;
-                       LEAVE;
-                       return -1;
+                       vhost->do_enquiry = 0;
+                       break;
                }
        }
 
        LEAVE;
-       return 0;
 }
 
 static void ibmvfc_release_sub_crqs(struct ibmvfc_host *vhost)
@@ -5770,6 +5825,8 @@ static void ibmvfc_free_mem(struct ibmvfc_host *vhost)
                          vhost->disc_buf_dma);
        dma_free_coherent(vhost->dev, sizeof(*vhost->login_buf),
                          vhost->login_buf, vhost->login_buf_dma);
+       dma_free_coherent(vhost->dev, sizeof(*vhost->channel_setup_buf),
+                         vhost->channel_setup_buf, vhost->channel_setup_dma);
        dma_pool_destroy(vhost->sg_pool);
        ibmvfc_free_queue(vhost, async_q);
        LEAVE;
@@ -5999,11 +6056,7 @@ static int ibmvfc_probe(struct vio_dev *vdev, const struct vio_device_id *id)
                goto remove_shost;
        }
 
-       if (vhost->mq_enabled) {
-               rc = ibmvfc_init_sub_crqs(vhost);
-               if (rc)
-                       dev_warn(dev, "Failed to allocate Sub-CRQs. rc=%d\n", rc);
-       }
+       ibmvfc_init_sub_crqs(vhost);
 
        if (shost_to_fc_host(shost)->rqst_q)
                blk_queue_max_segments(shost_to_fc_host(shost)->rqst_q, 1);
@@ -6038,7 +6091,7 @@ out:
  * Return value:
  *     0
  **/
-static int ibmvfc_remove(struct vio_dev *vdev)
+static void ibmvfc_remove(struct vio_dev *vdev)
 {
        struct ibmvfc_host *vhost = dev_get_drvdata(&vdev->dev);
        LIST_HEAD(purge);
@@ -6070,7 +6123,6 @@ static int ibmvfc_remove(struct vio_dev *vdev)
        spin_unlock(&ibmvfc_driver_lock);
        scsi_host_put(vhost->host);
        LEAVE;
-       return 0;
 }
 
 /**
index 29fcc44..77fafb1 100644 (file)
@@ -2335,7 +2335,7 @@ static int ibmvscsi_probe(struct vio_dev *vdev, const struct vio_device_id *id)
        return -1;
 }
 
-static int ibmvscsi_remove(struct vio_dev *vdev)
+static void ibmvscsi_remove(struct vio_dev *vdev)
 {
        struct ibmvscsi_host_data *hostdata = dev_get_drvdata(&vdev->dev);
 
@@ -2356,8 +2356,6 @@ static int ibmvscsi_remove(struct vio_dev *vdev)
        spin_unlock(&ibmvscsi_driver_lock);
 
        scsi_host_put(hostdata->host);
-
-       return 0;
 }
 
 /**
index cc3908c..9abd9e2 100644 (file)
@@ -3595,7 +3595,7 @@ free_adapter:
        return rc;
 }
 
-static int ibmvscsis_remove(struct vio_dev *vdev)
+static void ibmvscsis_remove(struct vio_dev *vdev)
 {
        struct scsi_info *vscsi = dev_get_drvdata(&vdev->dev);
 
@@ -3622,8 +3622,6 @@ static int ibmvscsis_remove(struct vio_dev *vdev)
        list_del(&vscsi->list);
        spin_unlock_bh(&ibmvscsis_dev_lock);
        kfree(vscsi);
-
-       return 0;
 }
 
 static ssize_t system_id_show(struct device *dev,
index bee1685..58e6216 100644 (file)
@@ -1480,8 +1480,6 @@ static enum sci_status
 stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq,
                                              u32 completion_code)
 {
-       enum sci_status status = SCI_SUCCESS;
-
        switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) {
        case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD):
                ireq->scu_status = SCU_TASK_DONE_GOOD;
@@ -1500,7 +1498,7 @@ stp_request_pio_await_h2d_completion_tc_event(struct isci_request *ireq,
                break;
        }
 
-       return status;
+       return SCI_SUCCESS;
 }
 
 static enum sci_status
@@ -2152,8 +2150,6 @@ static enum sci_status stp_request_udma_await_tc_event(struct isci_request *ireq
 static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 completion_code,
                                                  enum sci_base_request_states next)
 {
-       enum sci_status status = SCI_SUCCESS;
-
        switch (SCU_GET_COMPLETION_TL_STATUS(completion_code)) {
        case SCU_MAKE_COMPLETION_STATUS(SCU_TASK_DONE_GOOD):
                ireq->scu_status = SCU_TASK_DONE_GOOD;
@@ -2172,7 +2168,7 @@ static enum sci_status atapi_raw_completion(struct isci_request *ireq, u32 compl
                break;
        }
 
-       return status;
+       return SCI_SUCCESS;
 }
 
 static enum sci_status atapi_data_tc_completion_handler(struct isci_request *ireq,
index a9ce629..dd33ce0 100644 (file)
@@ -847,6 +847,7 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
        struct iscsi_session *session;
        struct iscsi_sw_tcp_host *tcp_sw_host;
        struct Scsi_Host *shost;
+       int rc;
 
        if (ep) {
                printk(KERN_ERR "iscsi_tcp: invalid ep %p.\n", ep);
@@ -864,6 +865,11 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
        shost->max_channel = 0;
        shost->max_cmd_len = SCSI_MAX_VARLEN_CDB_SIZE;
 
+       rc = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
+       if (rc < 0)
+               goto free_host;
+       shost->can_queue = rc;
+
        if (iscsi_host_add(shost, NULL))
                goto free_host;
 
@@ -878,7 +884,6 @@ iscsi_sw_tcp_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max,
        tcp_sw_host = iscsi_host_priv(shost);
        tcp_sw_host->session = session;
 
-       shost->can_queue = session->scsi_cmds_max;
        if (iscsi_tcp_r2tpool_alloc(session))
                goto remove_session;
        return cls_session;
@@ -981,7 +986,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
        .name                   = "iSCSI Initiator over TCP/IP",
        .queuecommand           = iscsi_queuecommand,
        .change_queue_depth     = scsi_change_queue_depth,
-       .can_queue              = ISCSI_DEF_XMIT_CMDS_MAX - 1,
+       .can_queue              = ISCSI_TOTAL_CMDS_MAX,
        .sg_tablesize           = 4096,
        .max_sectors            = 0xFFFF,
        .cmd_per_lun            = ISCSI_DEF_CMD_PER_LUN,
index 4e668aa..04633e5 100644 (file)
@@ -523,16 +523,6 @@ static void iscsi_complete_task(struct iscsi_task *task, int state)
        WARN_ON_ONCE(task->state == ISCSI_TASK_FREE);
        task->state = state;
 
-       spin_lock_bh(&conn->taskqueuelock);
-       if (!list_empty(&task->running)) {
-               pr_debug_once("%s while task on list", __func__);
-               list_del_init(&task->running);
-       }
-       spin_unlock_bh(&conn->taskqueuelock);
-
-       if (conn->task == task)
-               conn->task = NULL;
-
        if (READ_ONCE(conn->ping_task) == task)
                WRITE_ONCE(conn->ping_task, NULL);
 
@@ -564,11 +554,41 @@ void iscsi_complete_scsi_task(struct iscsi_task *task,
 }
 EXPORT_SYMBOL_GPL(iscsi_complete_scsi_task);
 
+/*
+ * Must be called with back and frwd lock
+ */
+static bool cleanup_queued_task(struct iscsi_task *task)
+{
+       struct iscsi_conn *conn = task->conn;
+       bool early_complete = false;
+
+       /* Bad target might have completed task while it was still running */
+       if (task->state == ISCSI_TASK_COMPLETED)
+               early_complete = true;
+
+       if (!list_empty(&task->running)) {
+               list_del_init(&task->running);
+               /*
+                * If it's on a list but still running, this could be from
+                * a bad target sending a rsp early, cleanup from a TMF, or
+                * session recovery.
+                */
+               if (task->state == ISCSI_TASK_RUNNING ||
+                   task->state == ISCSI_TASK_COMPLETED)
+                       __iscsi_put_task(task);
+       }
+
+       if (conn->task == task) {
+               conn->task = NULL;
+               __iscsi_put_task(task);
+       }
+
+       return early_complete;
+}
 
 /*
- * session back_lock must be held and if not called for a task that is
- * still pending or from the xmit thread, then xmit thread must
- * be suspended.
+ * session frwd lock must be held and if not called for a task that is still
+ * pending or from the xmit thread, then xmit thread must be suspended
  */
 static void fail_scsi_task(struct iscsi_task *task, int err)
 {
@@ -576,14 +596,11 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
        struct scsi_cmnd *sc;
        int state;
 
-       /*
-        * if a command completes and we get a successful tmf response
-        * we will hit this because the scsi eh abort code does not take
-        * a ref to the task.
-        */
-       sc = task->sc;
-       if (!sc)
+       spin_lock_bh(&conn->session->back_lock);
+       if (cleanup_queued_task(task)) {
+               spin_unlock_bh(&conn->session->back_lock);
                return;
+       }
 
        if (task->state == ISCSI_TASK_PENDING) {
                /*
@@ -598,11 +615,9 @@ static void fail_scsi_task(struct iscsi_task *task, int err)
        else
                state = ISCSI_TASK_ABRT_TMF;
 
+       sc = task->sc;
        sc->result = err << 16;
        scsi_set_resid(sc, scsi_bufflen(sc));
-
-       /* regular RX path uses back_lock */
-       spin_lock_bh(&conn->session->back_lock);
        iscsi_complete_task(task, state);
        spin_unlock_bh(&conn->session->back_lock);
 }
@@ -748,9 +763,7 @@ __iscsi_conn_send_pdu(struct iscsi_conn *conn, struct iscsi_hdr *hdr,
                if (session->tt->xmit_task(task))
                        goto free_task;
        } else {
-               spin_lock_bh(&conn->taskqueuelock);
                list_add_tail(&task->running, &conn->mgmtqueue);
-               spin_unlock_bh(&conn->taskqueuelock);
                iscsi_conn_queue_work(conn);
        }
 
@@ -1411,31 +1424,61 @@ static int iscsi_check_cmdsn_window_closed(struct iscsi_conn *conn)
        return 0;
 }
 
-static int iscsi_xmit_task(struct iscsi_conn *conn)
+static int iscsi_xmit_task(struct iscsi_conn *conn, struct iscsi_task *task,
+                          bool was_requeue)
 {
-       struct iscsi_task *task = conn->task;
        int rc;
 
-       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx))
-               return -ENODATA;
-
        spin_lock_bh(&conn->session->back_lock);
-       if (conn->task == NULL) {
+
+       if (!conn->task) {
+               /* Take a ref so we can access it after xmit_task() */
+               __iscsi_get_task(task);
+       } else {
+               /* Already have a ref from when we failed to send it last call */
+               conn->task = NULL;
+       }
+
+       /*
+        * If this was a requeue for a R2T we have an extra ref on the task in
+        * case a bad target sends a cmd rsp before we have handled the task.
+        */
+       if (was_requeue)
+               __iscsi_put_task(task);
+
+       /*
+        * Do this after dropping the extra ref because if this was a requeue
+        * it's removed from that list and cleanup_queued_task would miss it.
+        */
+       if (test_bit(ISCSI_SUSPEND_BIT, &conn->suspend_tx)) {
+               /*
+                * Save the task and ref in case we weren't cleaning up this
+                * task and get woken up again.
+                */
+               conn->task = task;
                spin_unlock_bh(&conn->session->back_lock);
                return -ENODATA;
        }
-       __iscsi_get_task(task);
        spin_unlock_bh(&conn->session->back_lock);
+
        spin_unlock_bh(&conn->session->frwd_lock);
        rc = conn->session->tt->xmit_task(task);
        spin_lock_bh(&conn->session->frwd_lock);
        if (!rc) {
                /* done with this task */
                task->last_xfer = jiffies;
-               conn->task = NULL;
        }
        /* regular RX path uses back_lock */
        spin_lock(&conn->session->back_lock);
+       if (rc && task->state == ISCSI_TASK_RUNNING) {
+               /*
+                * get an extra ref that is released next time we access it
+                * as conn->task above.
+                */
+               __iscsi_get_task(task);
+               conn->task = task;
+       }
+
        __iscsi_put_task(task);
        spin_unlock(&conn->session->back_lock);
        return rc;
@@ -1445,9 +1488,7 @@ static int iscsi_xmit_task(struct iscsi_conn *conn)
  * iscsi_requeue_task - requeue task to run from session workqueue
  * @task: task to requeue
  *
- * LLDs that need to run a task from the session workqueue should call
- * this. The session frwd_lock must be held. This should only be called
- * by software drivers.
+ * Callers must have taken a ref to the task that is going to be requeued.
  */
 void iscsi_requeue_task(struct iscsi_task *task)
 {
@@ -1457,11 +1498,18 @@ void iscsi_requeue_task(struct iscsi_task *task)
         * this may be on the requeue list already if the xmit_task callout
         * is handling the r2ts while we are adding new ones
         */
-       spin_lock_bh(&conn->taskqueuelock);
-       if (list_empty(&task->running))
+       spin_lock_bh(&conn->session->frwd_lock);
+       if (list_empty(&task->running)) {
                list_add_tail(&task->running, &conn->requeue);
-       spin_unlock_bh(&conn->taskqueuelock);
+       } else {
+               /*
+                * Don't need the extra ref since it's already requeued and
+                * has a ref.
+                */
+               iscsi_put_task(task);
+       }
        iscsi_conn_queue_work(conn);
+       spin_unlock_bh(&conn->session->frwd_lock);
 }
 EXPORT_SYMBOL_GPL(iscsi_requeue_task);
 
@@ -1487,7 +1535,7 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
        }
 
        if (conn->task) {
-               rc = iscsi_xmit_task(conn);
+               rc = iscsi_xmit_task(conn, conn->task, false);
                if (rc)
                        goto done;
        }
@@ -1497,54 +1545,41 @@ static int iscsi_data_xmit(struct iscsi_conn *conn)
         * only have one nop-out as a ping from us and targets should not
         * overflow us with nop-ins
         */
-       spin_lock_bh(&conn->taskqueuelock);
 check_mgmt:
        while (!list_empty(&conn->mgmtqueue)) {
-               conn->task = list_entry(conn->mgmtqueue.next,
-                                        struct iscsi_task, running);
-               list_del_init(&conn->task->running);
-               spin_unlock_bh(&conn->taskqueuelock);
-               if (iscsi_prep_mgmt_task(conn, conn->task)) {
+               task = list_entry(conn->mgmtqueue.next, struct iscsi_task,
+                                 running);
+               list_del_init(&task->running);
+               if (iscsi_prep_mgmt_task(conn, task)) {
                        /* regular RX path uses back_lock */
                        spin_lock_bh(&conn->session->back_lock);
-                       __iscsi_put_task(conn->task);
+                       __iscsi_put_task(task);
                        spin_unlock_bh(&conn->session->back_lock);
-                       conn->task = NULL;
-                       spin_lock_bh(&conn->taskqueuelock);
                        continue;
                }
-               rc = iscsi_xmit_task(conn);
+               rc = iscsi_xmit_task(conn, task, false);
                if (rc)
                        goto done;
-               spin_lock_bh(&conn->taskqueuelock);
        }
 
        /* process pending command queue */
        while (!list_empty(&conn->cmdqueue)) {
-               conn->task = list_entry(conn->cmdqueue.next, struct iscsi_task,
-                                       running);
-               list_del_init(&conn->task->running);
-               spin_unlock_bh(&conn->taskqueuelock);
+               task = list_entry(conn->cmdqueue.next, struct iscsi_task,
+                                 running);
+               list_del_init(&task->running);
                if (conn->session->state == ISCSI_STATE_LOGGING_OUT) {
-                       fail_scsi_task(conn->task, DID_IMM_RETRY);
-                       spin_lock_bh(&conn->taskqueuelock);
+                       fail_scsi_task(task, DID_IMM_RETRY);
                        continue;
                }
-               rc = iscsi_prep_scsi_cmd_pdu(conn->task);
+               rc = iscsi_prep_scsi_cmd_pdu(task);
                if (rc) {
-                       if (rc == -ENOMEM || rc == -EACCES) {
-                               spin_lock_bh(&conn->taskqueuelock);
-                               list_add_tail(&conn->task->running,
-                                             &conn->cmdqueue);
-                               conn->task = NULL;
-                               spin_unlock_bh(&conn->taskqueuelock);
-                               goto done;
-                       } else
-                               fail_scsi_task(conn->task, DID_ABORT);
-                       spin_lock_bh(&conn->taskqueuelock);
+                       if (rc == -ENOMEM || rc == -EACCES)
+                               fail_scsi_task(task, DID_IMM_RETRY);
+                       else
+                               fail_scsi_task(task, DID_ABORT);
                        continue;
                }
-               rc = iscsi_xmit_task(conn);
+               rc = iscsi_xmit_task(conn, task, false);
                if (rc)
                        goto done;
                /*
@@ -1552,7 +1587,6 @@ check_mgmt:
                 * we need to check the mgmt queue for nops that need to
                 * be sent to aviod starvation
                 */
-               spin_lock_bh(&conn->taskqueuelock);
                if (!list_empty(&conn->mgmtqueue))
                        goto check_mgmt;
        }
@@ -1566,21 +1600,17 @@ check_mgmt:
 
                task = list_entry(conn->requeue.next, struct iscsi_task,
                                  running);
+
                if (iscsi_check_tmf_restrictions(task, ISCSI_OP_SCSI_DATA_OUT))
                        break;
 
-               conn->task = task;
-               list_del_init(&conn->task->running);
-               conn->task->state = ISCSI_TASK_RUNNING;
-               spin_unlock_bh(&conn->taskqueuelock);
-               rc = iscsi_xmit_task(conn);
+               list_del_init(&task->running);
+               rc = iscsi_xmit_task(conn, task, true);
                if (rc)
                        goto done;
-               spin_lock_bh(&conn->taskqueuelock);
                if (!list_empty(&conn->mgmtqueue))
                        goto check_mgmt;
        }
-       spin_unlock_bh(&conn->taskqueuelock);
        spin_unlock_bh(&conn->session->frwd_lock);
        return -ENODATA;
 
@@ -1746,9 +1776,7 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc)
                        goto prepd_reject;
                }
        } else {
-               spin_lock_bh(&conn->taskqueuelock);
                list_add_tail(&task->running, &conn->cmdqueue);
-               spin_unlock_bh(&conn->taskqueuelock);
                iscsi_conn_queue_work(conn);
        }
 
@@ -1855,27 +1883,39 @@ static int iscsi_exec_task_mgmt_fn(struct iscsi_conn *conn,
 }
 
 /*
- * Fail commands. session lock held and recv side suspended and xmit
- * thread flushed
+ * Fail commands. session frwd lock held and xmit thread flushed.
  */
 static void fail_scsi_tasks(struct iscsi_conn *conn, u64 lun, int error)
 {
+       struct iscsi_session *session = conn->session;
        struct iscsi_task *task;
        int i;
 
-       for (i = 0; i < conn->session->cmds_max; i++) {
-               task = conn->session->cmds[i];
+       spin_lock_bh(&session->back_lock);
+       for (i = 0; i < session->cmds_max; i++) {
+               task = session->cmds[i];
                if (!task->sc || task->state == ISCSI_TASK_FREE)
                        continue;
 
                if (lun != -1 && lun != task->sc->device->lun)
                        continue;
 
-               ISCSI_DBG_SESSION(conn->session,
+               __iscsi_get_task(task);
+               spin_unlock_bh(&session->back_lock);
+
+               ISCSI_DBG_SESSION(session,
                                  "failing sc %p itt 0x%x state %d\n",
                                  task->sc, task->itt, task->state);
                fail_scsi_task(task, error);
+
+               spin_unlock_bh(&session->frwd_lock);
+               iscsi_put_task(task);
+               spin_lock_bh(&session->frwd_lock);
+
+               spin_lock_bh(&session->back_lock);
        }
+
+       spin_unlock_bh(&session->back_lock);
 }
 
 /**
@@ -1953,6 +1993,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
        ISCSI_DBG_EH(session, "scsi cmd %p timedout\n", sc);
 
        spin_lock_bh(&session->frwd_lock);
+       spin_lock(&session->back_lock);
        task = (struct iscsi_task *)sc->SCp.ptr;
        if (!task) {
                /*
@@ -1960,8 +2001,11 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
                 * so let timeout code complete it now.
                 */
                rc = BLK_EH_DONE;
+               spin_unlock(&session->back_lock);
                goto done;
        }
+       __iscsi_get_task(task);
+       spin_unlock(&session->back_lock);
 
        if (session->state != ISCSI_STATE_LOGGED_IN) {
                /*
@@ -2020,6 +2064,7 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
                goto done;
        }
 
+       spin_lock(&session->back_lock);
        for (i = 0; i < conn->session->cmds_max; i++) {
                running_task = conn->session->cmds[i];
                if (!running_task->sc || running_task == task ||
@@ -2052,10 +2097,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
                                     "last xfer %lu/%lu. Last check %lu.\n",
                                     task->last_xfer, running_task->last_xfer,
                                     task->last_timeout);
+                       spin_unlock(&session->back_lock);
                        rc = BLK_EH_RESET_TIMER;
                        goto done;
                }
        }
+       spin_unlock(&session->back_lock);
 
        /* Assumes nop timeout is shorter than scsi cmd timeout */
        if (task->have_checked_conn)
@@ -2077,9 +2124,12 @@ enum blk_eh_timer_return iscsi_eh_cmd_timed_out(struct scsi_cmnd *sc)
        rc = BLK_EH_RESET_TIMER;
 
 done:
-       if (task)
-               task->last_timeout = jiffies;
        spin_unlock_bh(&session->frwd_lock);
+
+       if (task) {
+               task->last_timeout = jiffies;
+               iscsi_put_task(task);
+       }
        ISCSI_DBG_EH(session, "return %s\n", rc == BLK_EH_RESET_TIMER ?
                     "timer reset" : "shutdown or nh");
        return rc;
@@ -2187,15 +2237,20 @@ int iscsi_eh_abort(struct scsi_cmnd *sc)
        conn->eh_abort_cnt++;
        age = session->age;
 
+       spin_lock(&session->back_lock);
        task = (struct iscsi_task *)sc->SCp.ptr;
-       ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n",
-                    sc, task->itt);
-
-       /* task completed before time out */
-       if (!task->sc) {
+       if (!task || !task->sc) {
+               /* task completed before time out */
                ISCSI_DBG_EH(session, "sc completed while abort in progress\n");
-               goto success;
+
+               spin_unlock(&session->back_lock);
+               spin_unlock_bh(&session->frwd_lock);
+               mutex_unlock(&session->eh_mutex);
+               return SUCCESS;
        }
+       ISCSI_DBG_EH(session, "aborting [sc %p itt 0x%x]\n", sc, task->itt);
+       __iscsi_get_task(task);
+       spin_unlock(&session->back_lock);
 
        if (task->state == ISCSI_TASK_PENDING) {
                fail_scsi_task(task, DID_ABORT);
@@ -2257,6 +2312,7 @@ success:
 success_unlocked:
        ISCSI_DBG_EH(session, "abort success [sc %p itt 0x%x]\n",
                     sc, task->itt);
+       iscsi_put_task(task);
        mutex_unlock(&session->eh_mutex);
        return SUCCESS;
 
@@ -2265,6 +2321,7 @@ failed:
 failed_unlocked:
        ISCSI_DBG_EH(session, "abort failed [sc %p itt 0x%x]\n", sc,
                     task ? task->itt : 0);
+       iscsi_put_task(task);
        mutex_unlock(&session->eh_mutex);
        return FAILED;
 }
@@ -2591,6 +2648,56 @@ void iscsi_pool_free(struct iscsi_pool *q)
 }
 EXPORT_SYMBOL_GPL(iscsi_pool_free);
 
+int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+                                uint16_t requested_cmds_max)
+{
+       int scsi_cmds, total_cmds = requested_cmds_max;
+
+check:
+       if (!total_cmds)
+               total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
+       /*
+        * The iscsi layer needs some tasks for nop handling and tmfs,
+        * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
+        * + 1 command for scsi IO.
+        */
+       if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
+               printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of two that is at least %d.\n",
+                      total_cmds, ISCSI_TOTAL_CMDS_MIN);
+               return -EINVAL;
+       }
+
+       if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
+               printk(KERN_INFO "iscsi: invalid max cmds of %d. Must be a power of 2 less than or equal to %d. Using %d.\n",
+                      requested_cmds_max, ISCSI_TOTAL_CMDS_MAX,
+                      ISCSI_TOTAL_CMDS_MAX);
+               total_cmds = ISCSI_TOTAL_CMDS_MAX;
+       }
+
+       if (!is_power_of_2(total_cmds)) {
+               total_cmds = rounddown_pow_of_two(total_cmds);
+               if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
+                       printk(KERN_ERR "iscsi: invalid max cmds of %d. Must be a power of 2 greater than %d.\n", requested_cmds_max, ISCSI_TOTAL_CMDS_MIN);
+                       return -EINVAL;
+               }
+
+               printk(KERN_INFO "iscsi: invalid max cmds %d. Must be a power of 2. Rounding max cmds down to %d.\n",
+                      requested_cmds_max, total_cmds);
+       }
+
+       scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
+       if (shost->can_queue && scsi_cmds > shost->can_queue) {
+               total_cmds = shost->can_queue;
+
+               printk(KERN_INFO "iscsi: requested max cmds %u is higher than driver limit. Using driver limit %u\n",
+                      requested_cmds_max, shost->can_queue);
+               goto check;
+       }
+
+       return scsi_cmds;
+}
+EXPORT_SYMBOL_GPL(iscsi_host_get_max_scsi_cmds);
+
 /**
  * iscsi_host_add - add host to system
  * @shost: scsi host
@@ -2681,8 +2788,6 @@ void iscsi_host_remove(struct Scsi_Host *shost)
                flush_signals(current);
 
        scsi_remove_host(shost);
-       if (ihost->workq)
-               destroy_workqueue(ihost->workq);
 }
 EXPORT_SYMBOL_GPL(iscsi_host_remove);
 
@@ -2690,6 +2795,9 @@ void iscsi_host_free(struct Scsi_Host *shost)
 {
        struct iscsi_host *ihost = shost_priv(shost);
 
+       if (ihost->workq)
+               destroy_workqueue(ihost->workq);
+
        kfree(ihost->netdev);
        kfree(ihost->hwaddress);
        kfree(ihost->initiatorname);
@@ -2743,7 +2851,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
        struct iscsi_host *ihost = shost_priv(shost);
        struct iscsi_session *session;
        struct iscsi_cls_session *cls_session;
-       int cmd_i, scsi_cmds, total_cmds = cmds_max;
+       int cmd_i, scsi_cmds;
        unsigned long flags;
 
        spin_lock_irqsave(&ihost->lock, flags);
@@ -2754,37 +2862,9 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
        ihost->num_sessions++;
        spin_unlock_irqrestore(&ihost->lock, flags);
 
-       if (!total_cmds)
-               total_cmds = ISCSI_DEF_XMIT_CMDS_MAX;
-       /*
-        * The iscsi layer needs some tasks for nop handling and tmfs,
-        * so the cmds_max must at least be greater than ISCSI_MGMT_CMDS_MAX
-        * + 1 command for scsi IO.
-        */
-       if (total_cmds < ISCSI_TOTAL_CMDS_MIN) {
-               printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
-                      "must be a power of two that is at least %d.\n",
-                      total_cmds, ISCSI_TOTAL_CMDS_MIN);
+       scsi_cmds = iscsi_host_get_max_scsi_cmds(shost, cmds_max);
+       if (scsi_cmds < 0)
                goto dec_session_count;
-       }
-
-       if (total_cmds > ISCSI_TOTAL_CMDS_MAX) {
-               printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
-                      "must be a power of 2 less than or equal to %d.\n",
-                      cmds_max, ISCSI_TOTAL_CMDS_MAX);
-               total_cmds = ISCSI_TOTAL_CMDS_MAX;
-       }
-
-       if (!is_power_of_2(total_cmds)) {
-               printk(KERN_ERR "iscsi: invalid can_queue of %d. can_queue "
-                      "must be a power of 2.\n", total_cmds);
-               total_cmds = rounddown_pow_of_two(total_cmds);
-               if (total_cmds < ISCSI_TOTAL_CMDS_MIN)
-                       goto dec_session_count;
-               printk(KERN_INFO "iscsi: Rounding can_queue to %d.\n",
-                      total_cmds);
-       }
-       scsi_cmds = total_cmds - ISCSI_MGMT_CMDS_MAX;
 
        cls_session = iscsi_alloc_session(shost, iscsit,
                                          sizeof(struct iscsi_session) +
@@ -2800,7 +2880,7 @@ iscsi_session_setup(struct iscsi_transport *iscsit, struct Scsi_Host *shost,
        session->lu_reset_timeout = 15;
        session->abort_timeout = 10;
        session->scsi_cmds_max = scsi_cmds;
-       session->cmds_max = total_cmds;
+       session->cmds_max = scsi_cmds + ISCSI_MGMT_CMDS_MAX;
        session->queued_cmdsn = session->cmdsn = initial_cmdsn;
        session->exp_cmdsn = initial_cmdsn + 1;
        session->max_cmdsn = initial_cmdsn + 1;
@@ -2919,7 +2999,6 @@ iscsi_conn_setup(struct iscsi_cls_session *cls_session, int dd_size,
        INIT_LIST_HEAD(&conn->mgmtqueue);
        INIT_LIST_HEAD(&conn->cmdqueue);
        INIT_LIST_HEAD(&conn->requeue);
-       spin_lock_init(&conn->taskqueuelock);
        INIT_WORK(&conn->xmitwork, iscsi_xmitworker);
 
        /* allocate login_task used for the login/text sequences */
@@ -3085,10 +3164,16 @@ fail_mgmt_tasks(struct iscsi_session *session, struct iscsi_conn *conn)
                ISCSI_DBG_SESSION(conn->session,
                                  "failing mgmt itt 0x%x state %d\n",
                                  task->itt, task->state);
+
+               spin_lock_bh(&session->back_lock);
+               if (cleanup_queued_task(task)) {
+                       spin_unlock_bh(&session->back_lock);
+                       continue;
+               }
+
                state = ISCSI_TASK_ABRT_SESS_RECOV;
                if (task->state == ISCSI_TASK_PENDING)
                        state = ISCSI_TASK_COMPLETED;
-               spin_lock_bh(&session->back_lock);
                iscsi_complete_task(task, state);
                spin_unlock_bh(&session->back_lock);
        }
@@ -3189,6 +3274,13 @@ int iscsi_conn_bind(struct iscsi_cls_session *cls_session,
        spin_unlock_bh(&session->frwd_lock);
 
        /*
+        * The target could have reduced it's window size between logins, so
+        * we have to reset max/exp cmdsn so we can see the new values.
+        */
+       spin_lock_bh(&session->back_lock);
+       session->max_cmdsn = session->exp_cmdsn = session->cmdsn + 1;
+       spin_unlock_bh(&session->back_lock);
+       /*
         * Unblock xmitworker(), Login Phase will pass through.
         */
        clear_bit(ISCSI_SUSPEND_BIT, &conn->suspend_rx);
@@ -3338,125 +3430,125 @@ int iscsi_session_get_param(struct iscsi_cls_session *cls_session,
 
        switch(param) {
        case ISCSI_PARAM_FAST_ABORT:
-               len = sprintf(buf, "%d\n", session->fast_abort);
+               len = sysfs_emit(buf, "%d\n", session->fast_abort);
                break;
        case ISCSI_PARAM_ABORT_TMO:
-               len = sprintf(buf, "%d\n", session->abort_timeout);
+               len = sysfs_emit(buf, "%d\n", session->abort_timeout);
                break;
        case ISCSI_PARAM_LU_RESET_TMO:
-               len = sprintf(buf, "%d\n", session->lu_reset_timeout);
+               len = sysfs_emit(buf, "%d\n", session->lu_reset_timeout);
                break;
        case ISCSI_PARAM_TGT_RESET_TMO:
-               len = sprintf(buf, "%d\n", session->tgt_reset_timeout);
+               len = sysfs_emit(buf, "%d\n", session->tgt_reset_timeout);
                break;
        case ISCSI_PARAM_INITIAL_R2T_EN:
-               len = sprintf(buf, "%d\n", session->initial_r2t_en);
+               len = sysfs_emit(buf, "%d\n", session->initial_r2t_en);
                break;
        case ISCSI_PARAM_MAX_R2T:
-               len = sprintf(buf, "%hu\n", session->max_r2t);
+               len = sysfs_emit(buf, "%hu\n", session->max_r2t);
                break;
        case ISCSI_PARAM_IMM_DATA_EN:
-               len = sprintf(buf, "%d\n", session->imm_data_en);
+               len = sysfs_emit(buf, "%d\n", session->imm_data_en);
                break;
        case ISCSI_PARAM_FIRST_BURST:
-               len = sprintf(buf, "%u\n", session->first_burst);
+               len = sysfs_emit(buf, "%u\n", session->first_burst);
                break;
        case ISCSI_PARAM_MAX_BURST:
-               len = sprintf(buf, "%u\n", session->max_burst);
+               len = sysfs_emit(buf, "%u\n", session->max_burst);
                break;
        case ISCSI_PARAM_PDU_INORDER_EN:
-               len = sprintf(buf, "%d\n", session->pdu_inorder_en);
+               len = sysfs_emit(buf, "%d\n", session->pdu_inorder_en);
                break;
        case ISCSI_PARAM_DATASEQ_INORDER_EN:
-               len = sprintf(buf, "%d\n", session->dataseq_inorder_en);
+               len = sysfs_emit(buf, "%d\n", session->dataseq_inorder_en);
                break;
        case ISCSI_PARAM_DEF_TASKMGMT_TMO:
-               len = sprintf(buf, "%d\n", session->def_taskmgmt_tmo);
+               len = sysfs_emit(buf, "%d\n", session->def_taskmgmt_tmo);
                break;
        case ISCSI_PARAM_ERL:
-               len = sprintf(buf, "%d\n", session->erl);
+               len = sysfs_emit(buf, "%d\n", session->erl);
                break;
        case ISCSI_PARAM_TARGET_NAME:
-               len = sprintf(buf, "%s\n", session->targetname);
+               len = sysfs_emit(buf, "%s\n", session->targetname);
                break;
        case ISCSI_PARAM_TARGET_ALIAS:
-               len = sprintf(buf, "%s\n", session->targetalias);
+               len = sysfs_emit(buf, "%s\n", session->targetalias);
                break;
        case ISCSI_PARAM_TPGT:
-               len = sprintf(buf, "%d\n", session->tpgt);
+               len = sysfs_emit(buf, "%d\n", session->tpgt);
                break;
        case ISCSI_PARAM_USERNAME:
-               len = sprintf(buf, "%s\n", session->username);
+               len = sysfs_emit(buf, "%s\n", session->username);
                break;
        case ISCSI_PARAM_USERNAME_IN:
-               len = sprintf(buf, "%s\n", session->username_in);
+               len = sysfs_emit(buf, "%s\n", session->username_in);
                break;
        case ISCSI_PARAM_PASSWORD:
-               len = sprintf(buf, "%s\n", session->password);
+               len = sysfs_emit(buf, "%s\n", session->password);
                break;
        case ISCSI_PARAM_PASSWORD_IN:
-               len = sprintf(buf, "%s\n", session->password_in);
+               len = sysfs_emit(buf, "%s\n", session->password_in);
                break;
        case ISCSI_PARAM_IFACE_NAME:
-               len = sprintf(buf, "%s\n", session->ifacename);
+               len = sysfs_emit(buf, "%s\n", session->ifacename);
                break;
        case ISCSI_PARAM_INITIATOR_NAME:
-               len = sprintf(buf, "%s\n", session->initiatorname);
+               len = sysfs_emit(buf, "%s\n", session->initiatorname);
                break;
        case ISCSI_PARAM_BOOT_ROOT:
-               len = sprintf(buf, "%s\n", session->boot_root);
+               len = sysfs_emit(buf, "%s\n", session->boot_root);
                break;
        case ISCSI_PARAM_BOOT_NIC:
-               len = sprintf(buf, "%s\n", session->boot_nic);
+               len = sysfs_emit(buf, "%s\n", session->boot_nic);
                break;
        case ISCSI_PARAM_BOOT_TARGET:
-               len = sprintf(buf, "%s\n", session->boot_target);
+               len = sysfs_emit(buf, "%s\n", session->boot_target);
                break;
        case ISCSI_PARAM_AUTO_SND_TGT_DISABLE:
-               len = sprintf(buf, "%u\n", session->auto_snd_tgt_disable);
+               len = sysfs_emit(buf, "%u\n", session->auto_snd_tgt_disable);
                break;
        case ISCSI_PARAM_DISCOVERY_SESS:
-               len = sprintf(buf, "%u\n", session->discovery_sess);
+               len = sysfs_emit(buf, "%u\n", session->discovery_sess);
                break;
        case ISCSI_PARAM_PORTAL_TYPE:
-               len = sprintf(buf, "%s\n", session->portal_type);
+               len = sysfs_emit(buf, "%s\n", session->portal_type);
                break;
        case ISCSI_PARAM_CHAP_AUTH_EN:
-               len = sprintf(buf, "%u\n", session->chap_auth_en);
+               len = sysfs_emit(buf, "%u\n", session->chap_auth_en);
                break;
        case ISCSI_PARAM_DISCOVERY_LOGOUT_EN:
-               len = sprintf(buf, "%u\n", session->discovery_logout_en);
+               len = sysfs_emit(buf, "%u\n", session->discovery_logout_en);
                break;
        case ISCSI_PARAM_BIDI_CHAP_EN:
-               len = sprintf(buf, "%u\n", session->bidi_chap_en);
+               len = sysfs_emit(buf, "%u\n", session->bidi_chap_en);
                break;
        case ISCSI_PARAM_DISCOVERY_AUTH_OPTIONAL:
-               len = sprintf(buf, "%u\n", session->discovery_auth_optional);
+               len = sysfs_emit(buf, "%u\n", session->discovery_auth_optional);
                break;
        case ISCSI_PARAM_DEF_TIME2WAIT:
-               len = sprintf(buf, "%d\n", session->time2wait);
+               len = sysfs_emit(buf, "%d\n", session->time2wait);
                break;
        case ISCSI_PARAM_DEF_TIME2RETAIN:
-               len = sprintf(buf, "%d\n", session->time2retain);
+               len = sysfs_emit(buf, "%d\n", session->time2retain);
                break;
        case ISCSI_PARAM_TSID:
-               len = sprintf(buf, "%u\n", session->tsid);
+               len = sysfs_emit(buf, "%u\n", session->tsid);
                break;
        case ISCSI_PARAM_ISID:
-               len = sprintf(buf, "%02x%02x%02x%02x%02x%02x\n",
+               len = sysfs_emit(buf, "%02x%02x%02x%02x%02x%02x\n",
                              session->isid[0], session->isid[1],
                              session->isid[2], session->isid[3],
                              session->isid[4], session->isid[5]);
                break;
        case ISCSI_PARAM_DISCOVERY_PARENT_IDX:
-               len = sprintf(buf, "%u\n", session->discovery_parent_idx);
+               len = sysfs_emit(buf, "%u\n", session->discovery_parent_idx);
                break;
        case ISCSI_PARAM_DISCOVERY_PARENT_TYPE:
                if (session->discovery_parent_type)
-                       len = sprintf(buf, "%s\n",
+                       len = sysfs_emit(buf, "%s\n",
                                      session->discovery_parent_type);
                else
-                       len = sprintf(buf, "\n");
+                       len = sysfs_emit(buf, "\n");
                break;
        default:
                return -ENOSYS;
@@ -3488,16 +3580,16 @@ int iscsi_conn_get_addr_param(struct sockaddr_storage *addr,
        case ISCSI_PARAM_CONN_ADDRESS:
        case ISCSI_HOST_PARAM_IPADDRESS:
                if (sin)
-                       len = sprintf(buf, "%pI4\n", &sin->sin_addr.s_addr);
+                       len = sysfs_emit(buf, "%pI4\n", &sin->sin_addr.s_addr);
                else
-                       len = sprintf(buf, "%pI6\n", &sin6->sin6_addr);
+                       len = sysfs_emit(buf, "%pI6\n", &sin6->sin6_addr);
                break;
        case ISCSI_PARAM_CONN_PORT:
        case ISCSI_PARAM_LOCAL_PORT:
                if (sin)
-                       len = sprintf(buf, "%hu\n", be16_to_cpu(sin->sin_port));
+                       len = sysfs_emit(buf, "%hu\n", be16_to_cpu(sin->sin_port));
                else
-                       len = sprintf(buf, "%hu\n",
+                       len = sysfs_emit(buf, "%hu\n",
                                      be16_to_cpu(sin6->sin6_port));
                break;
        default:
@@ -3516,88 +3608,88 @@ int iscsi_conn_get_param(struct iscsi_cls_conn *cls_conn,
 
        switch(param) {
        case ISCSI_PARAM_PING_TMO:
-               len = sprintf(buf, "%u\n", conn->ping_timeout);
+               len = sysfs_emit(buf, "%u\n", conn->ping_timeout);
                break;
        case ISCSI_PARAM_RECV_TMO:
-               len = sprintf(buf, "%u\n", conn->recv_timeout);
+               len = sysfs_emit(buf, "%u\n", conn->recv_timeout);
                break;
        case ISCSI_PARAM_MAX_RECV_DLENGTH:
-               len = sprintf(buf, "%u\n", conn->max_recv_dlength);
+               len = sysfs_emit(buf, "%u\n", conn->max_recv_dlength);
                break;
        case ISCSI_PARAM_MAX_XMIT_DLENGTH:
-               len = sprintf(buf, "%u\n", conn->max_xmit_dlength);
+               len = sysfs_emit(buf, "%u\n", conn->max_xmit_dlength);
                break;
        case ISCSI_PARAM_HDRDGST_EN:
-               len = sprintf(buf, "%d\n", conn->hdrdgst_en);
+               len = sysfs_emit(buf, "%d\n", conn->hdrdgst_en);
                break;
        case ISCSI_PARAM_DATADGST_EN:
-               len = sprintf(buf, "%d\n", conn->datadgst_en);
+               len = sysfs_emit(buf, "%d\n", conn->datadgst_en);
                break;
        case ISCSI_PARAM_IFMARKER_EN:
-               len = sprintf(buf, "%d\n", conn->ifmarker_en);
+               len = sysfs_emit(buf, "%d\n", conn->ifmarker_en);
                break;
        case ISCSI_PARAM_OFMARKER_EN:
-               len = sprintf(buf, "%d\n", conn->ofmarker_en);
+               len = sysfs_emit(buf, "%d\n", conn->ofmarker_en);
                break;
        case ISCSI_PARAM_EXP_STATSN:
-               len = sprintf(buf, "%u\n", conn->exp_statsn);
+               len = sysfs_emit(buf, "%u\n", conn->exp_statsn);
                break;
        case ISCSI_PARAM_PERSISTENT_PORT:
-               len = sprintf(buf, "%d\n", conn->persistent_port);
+               len = sysfs_emit(buf, "%d\n", conn->persistent_port);
                break;
        case ISCSI_PARAM_PERSISTENT_ADDRESS:
-               len = sprintf(buf, "%s\n", conn->persistent_address);
+               len = sysfs_emit(buf, "%s\n", conn->persistent_address);
                break;
        case ISCSI_PARAM_STATSN:
-               len = sprintf(buf, "%u\n", conn->statsn);
+               len = sysfs_emit(buf, "%u\n", conn->statsn);
                break;
        case ISCSI_PARAM_MAX_SEGMENT_SIZE:
-               len = sprintf(buf, "%u\n", conn->max_segment_size);
+               len = sysfs_emit(buf, "%u\n", conn->max_segment_size);
                break;
        case ISCSI_PARAM_KEEPALIVE_TMO:
-               len = sprintf(buf, "%u\n", conn->keepalive_tmo);
+               len = sysfs_emit(buf, "%u\n", conn->keepalive_tmo);
                break;
        case ISCSI_PARAM_LOCAL_PORT:
-               len = sprintf(buf, "%u\n", conn->local_port);
+               len = sysfs_emit(buf, "%u\n", conn->local_port);
                break;
        case ISCSI_PARAM_TCP_TIMESTAMP_STAT:
-               len = sprintf(buf, "%u\n", conn->tcp_timestamp_stat);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_stat);
                break;
        case ISCSI_PARAM_TCP_NAGLE_DISABLE:
-               len = sprintf(buf, "%u\n", conn->tcp_nagle_disable);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_nagle_disable);
                break;
        case ISCSI_PARAM_TCP_WSF_DISABLE:
-               len = sprintf(buf, "%u\n", conn->tcp_wsf_disable);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_wsf_disable);
                break;
        case ISCSI_PARAM_TCP_TIMER_SCALE:
-               len = sprintf(buf, "%u\n", conn->tcp_timer_scale);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_timer_scale);
                break;
        case ISCSI_PARAM_TCP_TIMESTAMP_EN:
-               len = sprintf(buf, "%u\n", conn->tcp_timestamp_en);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_timestamp_en);
                break;
        case ISCSI_PARAM_IP_FRAGMENT_DISABLE:
-               len = sprintf(buf, "%u\n", conn->fragment_disable);
+               len = sysfs_emit(buf, "%u\n", conn->fragment_disable);
                break;
        case ISCSI_PARAM_IPV4_TOS:
-               len = sprintf(buf, "%u\n", conn->ipv4_tos);
+               len = sysfs_emit(buf, "%u\n", conn->ipv4_tos);
                break;
        case ISCSI_PARAM_IPV6_TC:
-               len = sprintf(buf, "%u\n", conn->ipv6_traffic_class);
+               len = sysfs_emit(buf, "%u\n", conn->ipv6_traffic_class);
                break;
        case ISCSI_PARAM_IPV6_FLOW_LABEL:
-               len = sprintf(buf, "%u\n", conn->ipv6_flow_label);
+               len = sysfs_emit(buf, "%u\n", conn->ipv6_flow_label);
                break;
        case ISCSI_PARAM_IS_FW_ASSIGNED_IPV6:
-               len = sprintf(buf, "%u\n", conn->is_fw_assigned_ipv6);
+               len = sysfs_emit(buf, "%u\n", conn->is_fw_assigned_ipv6);
                break;
        case ISCSI_PARAM_TCP_XMIT_WSF:
-               len = sprintf(buf, "%u\n", conn->tcp_xmit_wsf);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_xmit_wsf);
                break;
        case ISCSI_PARAM_TCP_RECV_WSF:
-               len = sprintf(buf, "%u\n", conn->tcp_recv_wsf);
+               len = sysfs_emit(buf, "%u\n", conn->tcp_recv_wsf);
                break;
        case ISCSI_PARAM_LOCAL_IPADDR:
-               len = sprintf(buf, "%s\n", conn->local_ipaddr);
+               len = sysfs_emit(buf, "%s\n", conn->local_ipaddr);
                break;
        default:
                return -ENOSYS;
@@ -3615,13 +3707,13 @@ int iscsi_host_get_param(struct Scsi_Host *shost, enum iscsi_host_param param,
 
        switch (param) {
        case ISCSI_HOST_PARAM_NETDEV_NAME:
-               len = sprintf(buf, "%s\n", ihost->netdev);
+               len = sysfs_emit(buf, "%s\n", ihost->netdev);
                break;
        case ISCSI_HOST_PARAM_HWADDRESS:
-               len = sprintf(buf, "%s\n", ihost->hwaddress);
+               len = sysfs_emit(buf, "%s\n", ihost->hwaddress);
                break;
        case ISCSI_HOST_PARAM_INITIATOR_NAME:
-               len = sprintf(buf, "%s\n", ihost->initiatorname);
+               len = sysfs_emit(buf, "%s\n", ihost->initiatorname);
                break;
        default:
                return -ENOSYS;
index 83f14b2..2e9ffe3 100644 (file)
@@ -524,48 +524,79 @@ static int iscsi_tcp_data_in(struct iscsi_conn *conn, struct iscsi_task *task)
 /**
  * iscsi_tcp_r2t_rsp - iSCSI R2T Response processing
  * @conn: iscsi connection
- * @task: scsi command task
+ * @hdr: PDU header
  */
-static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
+static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
 {
        struct iscsi_session *session = conn->session;
-       struct iscsi_tcp_task *tcp_task = task->dd_data;
-       struct iscsi_tcp_conn *tcp_conn = conn->dd_data;
-       struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+       struct iscsi_tcp_task *tcp_task;
+       struct iscsi_tcp_conn *tcp_conn;
+       struct iscsi_r2t_rsp *rhdr;
        struct iscsi_r2t_info *r2t;
-       int r2tsn = be32_to_cpu(rhdr->r2tsn);
+       struct iscsi_task *task;
        u32 data_length;
        u32 data_offset;
+       int r2tsn;
        int rc;
 
+       spin_lock(&session->back_lock);
+       task = iscsi_itt_to_ctask(conn, hdr->itt);
+       if (!task) {
+               spin_unlock(&session->back_lock);
+               return ISCSI_ERR_BAD_ITT;
+       } else if (task->sc->sc_data_direction != DMA_TO_DEVICE) {
+               spin_unlock(&session->back_lock);
+               return ISCSI_ERR_PROTO;
+       }
+       /*
+        * A bad target might complete the cmd before we have handled R2Ts
+        * so get a ref to the task that will be dropped in the xmit path.
+        */
+       if (task->state != ISCSI_TASK_RUNNING) {
+               spin_unlock(&session->back_lock);
+               /* Let the path that got the early rsp complete it */
+               return 0;
+       }
+       task->last_xfer = jiffies;
+       __iscsi_get_task(task);
+
+       tcp_conn = conn->dd_data;
+       rhdr = (struct iscsi_r2t_rsp *)tcp_conn->in.hdr;
+       /* fill-in new R2T associated with the task */
+       iscsi_update_cmdsn(session, (struct iscsi_nopin *)rhdr);
+       spin_unlock(&session->back_lock);
+
        if (tcp_conn->in.datalen) {
                iscsi_conn_printk(KERN_ERR, conn,
                                  "invalid R2t with datalen %d\n",
                                  tcp_conn->in.datalen);
-               return ISCSI_ERR_DATALEN;
+               rc = ISCSI_ERR_DATALEN;
+               goto put_task;
        }
 
+       tcp_task = task->dd_data;
+       r2tsn = be32_to_cpu(rhdr->r2tsn);
        if (tcp_task->exp_datasn != r2tsn){
                ISCSI_DBG_TCP(conn, "task->exp_datasn(%d) != rhdr->r2tsn(%d)\n",
                              tcp_task->exp_datasn, r2tsn);
-               return ISCSI_ERR_R2TSN;
+               rc = ISCSI_ERR_R2TSN;
+               goto put_task;
        }
 
-       /* fill-in new R2T associated with the task */
-       iscsi_update_cmdsn(session, (struct iscsi_nopin*)rhdr);
-
-       if (!task->sc || session->state != ISCSI_STATE_LOGGED_IN) {
+       if (session->state != ISCSI_STATE_LOGGED_IN) {
                iscsi_conn_printk(KERN_INFO, conn,
                                  "dropping R2T itt %d in recovery.\n",
                                  task->itt);
-               return 0;
+               rc = 0;
+               goto put_task;
        }
 
        data_length = be32_to_cpu(rhdr->data_length);
        if (data_length == 0) {
                iscsi_conn_printk(KERN_ERR, conn,
                                  "invalid R2T with zero data len\n");
-               return ISCSI_ERR_DATALEN;
+               rc = ISCSI_ERR_DATALEN;
+               goto put_task;
        }
 
        if (data_length > session->max_burst)
@@ -579,7 +610,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
                                  "invalid R2T with data len %u at offset %u "
                                  "and total length %d\n", data_length,
                                  data_offset, task->sc->sdb.length);
-               return ISCSI_ERR_DATALEN;
+               rc = ISCSI_ERR_DATALEN;
+               goto put_task;
        }
 
        spin_lock(&tcp_task->pool2queue);
@@ -589,7 +621,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
                                  "Target has sent more R2Ts than it "
                                  "negotiated for or driver has leaked.\n");
                spin_unlock(&tcp_task->pool2queue);
-               return ISCSI_ERR_PROTO;
+               rc = ISCSI_ERR_PROTO;
+               goto put_task;
        }
 
        r2t->exp_statsn = rhdr->statsn;
@@ -607,6 +640,10 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task)
 
        iscsi_requeue_task(task);
        return 0;
+
+put_task:
+       iscsi_put_task(task);
+       return rc;
 }
 
 /*
@@ -730,20 +767,11 @@ iscsi_tcp_hdr_dissect(struct iscsi_conn *conn, struct iscsi_hdr *hdr)
                rc = iscsi_complete_pdu(conn, hdr, NULL, 0);
                break;
        case ISCSI_OP_R2T:
-               spin_lock(&conn->session->back_lock);
-               task = iscsi_itt_to_ctask(conn, hdr->itt);
-               spin_unlock(&conn->session->back_lock);
-               if (!task)
-                       rc = ISCSI_ERR_BAD_ITT;
-               else if (ahslen)
+               if (ahslen) {
                        rc = ISCSI_ERR_AHSLEN;
-               else if (task->sc->sc_data_direction == DMA_TO_DEVICE) {
-                       task->last_xfer = jiffies;
-                       spin_lock(&conn->session->frwd_lock);
-                       rc = iscsi_tcp_r2t_rsp(conn, task);
-                       spin_unlock(&conn->session->frwd_lock);
-               } else
-                       rc = ISCSI_ERR_PROTO;
+                       break;
+               }
+               rc = iscsi_tcp_r2t_rsp(conn, hdr);
                break;
        case ISCSI_OP_LOGIN_RSP:
        case ISCSI_OP_TEXT_RSP:
index bc79a01..46a8f2d 100644 (file)
@@ -2421,7 +2421,7 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf,
        memset(dstbuf, 0, 33);
        size = (nbytes < 32) ? nbytes : 32;
        if (copy_from_user(dstbuf, buf, size))
-               return 0;
+               return -EFAULT;
 
        if (dent == phba->debug_InjErrLBA) {
                if ((dstbuf[0] == 'o') && (dstbuf[1] == 'f') &&
@@ -2430,7 +2430,7 @@ lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf,
        }
 
        if ((tmp == 0) && (kstrtoull(dstbuf, 0, &tmp)))
-               return 0;
+               return -EINVAL;
 
        if (dent == phba->debug_writeGuard)
                phba->lpfc_injerr_wgrd_cnt = (uint32_t)tmp;
index f5582c8..ac0eef9 100644 (file)
@@ -3648,25 +3648,16 @@ _base_get_msix_index(struct MPT3SAS_ADAPTER *ioc,
                    base_mod64(atomic64_add_return(1,
                    &ioc->total_io_cnt), ioc->reply_queue_count) : 0;
 
-       return ioc->cpu_msix_table[raw_smp_processor_id()];
-}
+       if (scmd && ioc->shost->nr_hw_queues > 1) {
+               u32 tag = blk_mq_unique_tag(scmd->request);
 
-/**
- * _base_sdev_nr_inflight_request -get number of inflight requests
- *                                of a request queue.
- * @q: request_queue object
- *
- * returns number of inflight request of a request queue.
- */
-inline unsigned long
-_base_sdev_nr_inflight_request(struct request_queue *q)
-{
-       struct blk_mq_hw_ctx *hctx = q->queue_hw_ctx[0];
+               return blk_mq_unique_tag_to_hwq(tag) +
+                       ioc->high_iops_queues;
+       }
 
-       return atomic_read(&hctx->nr_active);
+       return ioc->cpu_msix_table[raw_smp_processor_id()];
 }
 
-
 /**
  * _base_get_high_iops_msix_index - get the msix index of
  *                             high iops queues
@@ -3686,7 +3677,8 @@ _base_get_high_iops_msix_index(struct MPT3SAS_ADAPTER *ioc,
         * reply queues in terms of batch count 16 when outstanding
         * IOs on the target device is >=8.
         */
-       if (_base_sdev_nr_inflight_request(scmd->device->request_queue) >
+
+       if (atomic_read(&scmd->device->device_busy) >
            MPT3SAS_DEVICE_HIGH_IOPS_DEPTH)
                return base_mod64((
                    atomic64_add_return(1, &ioc->high_iops_outstanding) /
@@ -3739,8 +3731,23 @@ mpt3sas_base_get_smid_scsiio(struct MPT3SAS_ADAPTER *ioc, u8 cb_idx,
        struct scsi_cmnd *scmd)
 {
        struct scsiio_tracker *request = scsi_cmd_priv(scmd);
-       unsigned int tag = scmd->request->tag;
        u16 smid;
+       u32 tag, unique_tag;
+
+       unique_tag = blk_mq_unique_tag(scmd->request);
+       tag = blk_mq_unique_tag_to_tag(unique_tag);
+
+       /*
+        * Store hw queue number corresponding to the tag.
+        * This hw queue number is used later to determine
+        * the unique_tag using the logic below. This unique_tag
+        * is used to retrieve the scmd pointer corresponding
+        * to tag using scsi_host_find_tag() API.
+        *
+        * tag = smid - 1;
+        * unique_tag = ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag;
+        */
+       ioc->io_queue_num[tag] = blk_mq_unique_tag_to_hwq(unique_tag);
 
        smid = tag + 1;
        request->cb_idx = cb_idx;
@@ -3831,6 +3838,7 @@ mpt3sas_base_free_smid(struct MPT3SAS_ADAPTER *ioc, u16 smid)
 
                mpt3sas_base_clear_st(ioc, st);
                _base_recovery_check(ioc);
+               ioc->io_queue_num[smid - 1] = 0;
                return;
        }
 
@@ -5362,6 +5370,9 @@ _base_release_memory_pools(struct MPT3SAS_ADAPTER *ioc)
                kfree(ioc->chain_lookup);
                ioc->chain_lookup = NULL;
        }
+
+       kfree(ioc->io_queue_num);
+       ioc->io_queue_num = NULL;
 }
 
 /**
@@ -5641,7 +5652,8 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
        reply_post_free_sz = ioc->reply_post_queue_depth *
            sizeof(Mpi2DefaultReplyDescriptor_t);
        rdpq_sz = reply_post_free_sz * RDPQ_MAX_INDEX_IN_ONE_CHUNK;
-       if (_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
+       if ((_base_is_controller_msix_enabled(ioc) && !ioc->rdpq_array_enable)
+           || (ioc->reply_queue_count < RDPQ_MAX_INDEX_IN_ONE_CHUNK))
                rdpq_sz = reply_post_free_sz * ioc->reply_queue_count;
        ret = base_alloc_rdpq_dma_pool(ioc, rdpq_sz);
        if (ret == -EAGAIN) {
@@ -5772,6 +5784,11 @@ _base_allocate_memory_pools(struct MPT3SAS_ADAPTER *ioc)
                    ioc_info(ioc, "internal(0x%p): depth(%d), start smid(%d)\n",
                             ioc->internal,
                             ioc->internal_depth, ioc->internal_smid));
+
+       ioc->io_queue_num = kcalloc(ioc->scsiio_depth,
+           sizeof(u16), GFP_KERNEL);
+       if (!ioc->io_queue_num)
+               goto out;
        /*
         * The number of NVMe page sized blocks needed is:
         *     (((sg_tablesize * 8) - 1) / (page_size - 8)) + 1
@@ -7789,14 +7806,18 @@ mpt3sas_base_attach(struct MPT3SAS_ADAPTER *ioc)
                ioc->pend_os_device_add_sz++;
        ioc->pend_os_device_add = kzalloc(ioc->pend_os_device_add_sz,
            GFP_KERNEL);
-       if (!ioc->pend_os_device_add)
+       if (!ioc->pend_os_device_add) {
+               r = -ENOMEM;
                goto out_free_resources;
+       }
 
        ioc->device_remove_in_progress_sz = ioc->pend_os_device_add_sz;
        ioc->device_remove_in_progress =
                kzalloc(ioc->device_remove_in_progress_sz, GFP_KERNEL);
-       if (!ioc->device_remove_in_progress)
+       if (!ioc->device_remove_in_progress) {
+               r = -ENOMEM;
                goto out_free_resources;
+       }
 
        ioc->fwfault_debug = mpt3sas_fwfault_debug;
 
@@ -8174,8 +8195,11 @@ mpt3sas_base_hard_reset_handler(struct MPT3SAS_ADAPTER *ioc,
                ioc_state = mpt3sas_base_get_iocstate(ioc, 0);
                if ((ioc_state & MPI2_IOC_STATE_MASK) == MPI2_IOC_STATE_FAULT ||
                    (ioc_state & MPI2_IOC_STATE_MASK) ==
-                   MPI2_IOC_STATE_COREDUMP)
+                   MPI2_IOC_STATE_COREDUMP) {
                        is_fault = 1;
+                       ioc->htb_rel.trigger_info_dwords[1] =
+                           (ioc_state & MPI2_DOORBELL_DATA_MASK);
+               }
        }
        _base_pre_reset_handler(ioc);
        mpt3sas_wait_for_commands_to_complete(ioc);
index 2def7a3..315aee6 100644 (file)
@@ -77,8 +77,8 @@
 #define MPT3SAS_DRIVER_NAME            "mpt3sas"
 #define MPT3SAS_AUTHOR "Avago Technologies <MPT-FusionLinux.pdl@avagotech.com>"
 #define MPT3SAS_DESCRIPTION    "LSI MPT Fusion SAS 3.0 Device Driver"
-#define MPT3SAS_DRIVER_VERSION         "36.100.00.00"
-#define MPT3SAS_MAJOR_VERSION          36
+#define MPT3SAS_DRIVER_VERSION         "37.100.00.00"
+#define MPT3SAS_MAJOR_VERSION          37
 #define MPT3SAS_MINOR_VERSION          100
 #define MPT3SAS_BUILD_VERSION          0
 #define MPT3SAS_RELEASE_VERSION        00
@@ -1073,6 +1073,50 @@ struct hba_port {
 
 #define MULTIPATH_DISABLED_PORT_ID     0xFF
 
+/**
+ * struct htb_rel_query - diagnostic buffer release reason
+ * @unique_id - unique id associated with this buffer.
+ * @buffer_rel_condition - Release condition ioctl/sysfs/reset
+ * @reserved
+ * @trigger_type - Master/Event/scsi/MPI
+ * @trigger_info_dwords - Data Correspondig to trigger type
+ */
+struct htb_rel_query {
+       u16     buffer_rel_condition;
+       u16     reserved;
+       u32     trigger_type;
+       u32     trigger_info_dwords[2];
+};
+
+/* Buffer_rel_condition bit fields */
+
+/* Bit 0 - Diag Buffer not Released */
+#define MPT3_DIAG_BUFFER_NOT_RELEASED  (0x00)
+/* Bit 0 - Diag Buffer Released */
+#define MPT3_DIAG_BUFFER_RELEASED      (0x01)
+
+/*
+ * Bit 1 - Diag Buffer Released by IOCTL,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_IOCTL     (0x02 | MPT3_DIAG_BUFFER_RELEASED)
+
+/*
+ * Bit 2 - Diag Buffer Released by Trigger,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_TRIGGER   (0x04 | MPT3_DIAG_BUFFER_RELEASED)
+
+/*
+ * Bit 3 - Diag Buffer Released by SysFs,
+ * This bit is valid only if Bit 0 is one
+ */
+#define MPT3_DIAG_BUFFER_REL_SYSFS     (0x08 | MPT3_DIAG_BUFFER_RELEASED)
+
+/* DIAG RESET Master trigger flags */
+#define MPT_DIAG_RESET_ISSUED_BY_DRIVER 0x00000000
+#define MPT_DIAG_RESET_ISSUED_BY_USER  0x00000001
+
 typedef void (*MPT3SAS_FLUSH_RUNNING_CMDS)(struct MPT3SAS_ADAPTER *ioc);
 /**
  * struct MPT3SAS_ADAPTER - per adapter struct
@@ -1439,6 +1483,7 @@ struct MPT3SAS_ADAPTER {
        spinlock_t      scsi_lookup_lock;
        int             pending_io_count;
        wait_queue_head_t reset_wq;
+       u16             *io_queue_num;
 
        /* PCIe SGL */
        struct dma_pool *pcie_sgl_dma_pool;
@@ -1529,6 +1574,8 @@ struct MPT3SAS_ADAPTER {
        u32             diagnostic_flags[MPI2_DIAG_BUF_TYPE_COUNT];
        u32             ring_buffer_offset;
        u32             ring_buffer_sz;
+       struct htb_rel_query htb_rel;
+       u8 reset_from_user;
        u8              is_warpdrive;
        u8              is_mcpu_endpoint;
        u8              hide_ir_msg;
@@ -1565,6 +1612,7 @@ struct mpt3sas_debugfs_buffer {
 };
 
 #define MPT_DRV_SUPPORT_BITMAP_MEMMOVE 0x00000001
+#define MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY      0x00000002
 
 typedef u8 (*MPT_CALLBACK)(struct MPT3SAS_ADAPTER *ioc, u16 smid, u8 msix_index,
        u32 reply);
index c8a0ce1..44f9a05 100644 (file)
@@ -479,6 +479,8 @@ void mpt3sas_ctl_pre_reset_handler(struct MPT3SAS_ADAPTER *ioc)
                ioc_info(ioc,
                    "%s: Releasing the trace buffer due to adapter reset.",
                    __func__);
+               ioc->htb_rel.buffer_rel_condition =
+                   MPT3_DIAG_BUFFER_REL_TRIGGER;
                mpt3sas_send_diag_release(ioc, i, &issue_reset);
        }
 }
@@ -1334,6 +1336,7 @@ _ctl_do_reset(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
        dctlprintk(ioc, ioc_info(ioc, "%s: enter\n",
                                 __func__));
 
+       ioc->reset_from_user = 1;
        retval = mpt3sas_base_hard_reset_handler(ioc, FORCE_BIG_HAMMER);
        ioc_info(ioc,
            "Ioctl: host reset: %s\n", ((!retval) ? "SUCCESS" : "FAILED"));
@@ -1687,6 +1690,9 @@ _ctl_diag_register_2(struct MPT3SAS_ADAPTER *ioc,
        request_data = ioc->diag_buffer[buffer_type];
        request_data_sz = diag_register->requested_buffer_size;
        ioc->unique_id[buffer_type] = diag_register->unique_id;
+       /* Reset ioc variables used for additional query commands */
+       ioc->reset_from_user = 0;
+       memset(&ioc->htb_rel, 0, sizeof(struct htb_rel_query));
        ioc->diag_buffer_status[buffer_type] &=
            MPT3_DIAG_BUFFER_IS_DRIVER_ALLOCATED;
        memcpy(ioc->product_specific[buffer_type],
@@ -2469,7 +2475,61 @@ _ctl_diag_read_buffer(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
        return rc;
 }
 
+/**
+ * _ctl_addnl_diag_query - query relevant info associated with diag buffers
+ * @ioc: per adapter object
+ * @arg: user space buffer containing ioctl content
+ *
+ * The application will send only unique_id.  Driver will
+ * inspect unique_id first, if valid, fill the details related to cause
+ * for diag buffer release.
+ */
+static long
+_ctl_addnl_diag_query(struct MPT3SAS_ADAPTER *ioc, void __user *arg)
+{
+       struct mpt3_addnl_diag_query karg;
+       u32 buffer_type = 0;
 
+       if (copy_from_user(&karg, arg, sizeof(karg))) {
+               pr_err("%s: failure at %s:%d/%s()!\n",
+                   ioc->name, __FILE__, __LINE__, __func__);
+               return -EFAULT;
+       }
+       dctlprintk(ioc, ioc_info(ioc, "%s\n",  __func__));
+       if (karg.unique_id == 0) {
+               ioc_err(ioc, "%s: unique_id is(0x%08x)\n",
+                   __func__, karg.unique_id);
+               return -EPERM;
+       }
+       buffer_type = _ctl_diag_get_bufftype(ioc, karg.unique_id);
+       if (buffer_type == MPT3_DIAG_UID_NOT_FOUND) {
+               ioc_err(ioc, "%s: buffer with unique_id(0x%08x) not found\n",
+                   __func__, karg.unique_id);
+               return -EPERM;
+       }
+       memset(&karg.buffer_rel_condition, 0, sizeof(struct htb_rel_query));
+       if ((ioc->diag_buffer_status[buffer_type] &
+           MPT3_DIAG_BUFFER_IS_REGISTERED) == 0) {
+               ioc_info(ioc, "%s: buffer_type(0x%02x) is not registered\n",
+                   __func__, buffer_type);
+               goto out;
+       }
+       if ((ioc->diag_buffer_status[buffer_type] &
+           MPT3_DIAG_BUFFER_IS_RELEASED) == 0) {
+               ioc_err(ioc, "%s: buffer_type(0x%02x) is not released\n",
+                   __func__, buffer_type);
+               return -EPERM;
+       }
+       memcpy(&karg.buffer_rel_condition, &ioc->htb_rel,
+           sizeof(struct  htb_rel_query));
+out:
+       if (copy_to_user(arg, &karg, sizeof(struct mpt3_addnl_diag_query))) {
+               ioc_err(ioc, "%s: unable to write mpt3_addnl_diag_query data @ %p\n",
+                   __func__, arg);
+               return -EFAULT;
+       }
+       return 0;
+}
 
 #ifdef CONFIG_COMPAT
 /**
@@ -2533,7 +2593,7 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
        struct MPT3SAS_ADAPTER *ioc;
        struct mpt3_ioctl_header ioctl_header;
        enum block_state state;
-       long ret = -EINVAL;
+       long ret = -ENOIOCTLCMD;
 
        /* get IOCTL header */
        if (copy_from_user(&ioctl_header, (char __user *)arg,
@@ -2643,6 +2703,10 @@ _ctl_ioctl_main(struct file *file, unsigned int cmd, void __user *arg,
                if (_IOC_SIZE(cmd) == sizeof(struct mpt3_diag_read_buffer))
                        ret = _ctl_diag_read_buffer(ioc, arg);
                break;
+       case MPT3ADDNLDIAGQUERY:
+               if (_IOC_SIZE(cmd) == sizeof(struct mpt3_addnl_diag_query))
+                       ret = _ctl_addnl_diag_query(ioc, arg);
+               break;
        default:
                dctlprintk(ioc,
                           ioc_info(ioc, "unsupported ioctl opcode(0x%08x)\n",
@@ -3425,6 +3489,7 @@ host_trace_buffer_enable_store(struct device *cdev,
                    MPT3_DIAG_BUFFER_IS_RELEASED))
                        goto out;
                ioc_info(ioc, "releasing host trace buffer\n");
+               ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_SYSFS;
                mpt3sas_send_diag_release(ioc, MPI2_DIAG_BUF_TYPE_TRACE,
                    &issue_reset);
        }
index 0f7aa4d..d2ccdaf 100644 (file)
@@ -94,6 +94,8 @@
        struct mpt3_diag_query)
 #define MPT3DIAGREADBUFFER _IOWR(MPT3_MAGIC_NUMBER, 30, \
        struct mpt3_diag_read_buffer)
+#define MPT3ADDNLDIAGQUERY _IOWR(MPT3_MAGIC_NUMBER, 32, \
+       struct mpt3_addnl_diag_query)
 
 /* Trace Buffer default UniqueId */
 #define MPT2DIAGBUFFUNIQUEID (0x07075900)
@@ -430,4 +432,24 @@ struct mpt3_diag_read_buffer {
        uint32_t diagnostic_data[1];
 };
 
+/**
+ * struct mpt3_addnl_diag_query - diagnostic buffer release reason
+ * @hdr - generic header
+ * @unique_id - unique id associated with this buffer.
+ * @buffer_rel_condition - Release condition ioctl/sysfs/reset
+ * @reserved1
+ * @trigger_type - Master/Event/scsi/MPI
+ * @trigger_info_dwords - Data Correspondig to trigger type
+ * @reserved2
+ */
+struct mpt3_addnl_diag_query {
+       struct mpt3_ioctl_header hdr;
+       uint32_t unique_id;
+       uint16_t buffer_rel_condition;
+       uint16_t reserved1;
+       uint32_t trigger_type;
+       uint32_t trigger_info_dwords[2];
+       uint32_t reserved2[2];
+};
+
 #endif /* MPT3SAS_CTL_H_INCLUDED */
index c8b09a8..6aa6de7 100644 (file)
@@ -54,6 +54,7 @@
 #include <linux/interrupt.h>
 #include <linux/aer.h>
 #include <linux/raid_class.h>
+#include <linux/blk-mq-pci.h>
 #include <asm/unaligned.h>
 
 #include "mpt3sas_base.h"
@@ -168,6 +169,11 @@ MODULE_PARM_DESC(multipath_on_hba,
        "\t SAS 2.0 & SAS 3.0 HBA - This will be disabled,\n\t\t"
        "\t SAS 3.5 HBA - This will be enabled)");
 
+static int host_tagset_enable = 1;
+module_param(host_tagset_enable, int, 0444);
+MODULE_PARM_DESC(host_tagset_enable,
+       "Shared host tagset enable/disable Default: enable(1)");
+
 /* raid transport support */
 static struct raid_template *mpt3sas_raid_template;
 static struct raid_template *mpt2sas_raid_template;
@@ -407,7 +413,7 @@ mpt3sas_get_port_by_id(struct MPT3SAS_ADAPTER *ioc,
         * And add this object to port_table_list.
         */
        if (!ioc->multipath_on_hba) {
-               port = kzalloc(sizeof(struct hba_port), GFP_KERNEL);
+               port = kzalloc(sizeof(struct hba_port), GFP_ATOMIC);
                if (!port)
                        return NULL;
 
@@ -1743,10 +1749,12 @@ mpt3sas_scsih_scsi_lookup_get(struct MPT3SAS_ADAPTER *ioc, u16 smid)
        struct scsi_cmnd *scmd = NULL;
        struct scsiio_tracker *st;
        Mpi25SCSIIORequest_t *mpi_request;
+       u16 tag = smid - 1;
 
        if (smid > 0  &&
            smid <= ioc->scsiio_depth - INTERNAL_SCSIIO_CMDS_COUNT) {
-               u32 unique_tag = smid - 1;
+               u32 unique_tag =
+                   ioc->io_queue_num[tag] << BLK_MQ_UNIQUE_TAG_BITS | tag;
 
                mpi_request = mpt3sas_base_get_msg_frame(ioc, smid);
 
@@ -11599,6 +11607,22 @@ scsih_scan_finished(struct Scsi_Host *shost, unsigned long time)
        return 1;
 }
 
+/**
+ * scsih_map_queues - map reply queues with request queues
+ * @shost: SCSI host pointer
+ */
+static int scsih_map_queues(struct Scsi_Host *shost)
+{
+       struct MPT3SAS_ADAPTER *ioc =
+           (struct MPT3SAS_ADAPTER *)shost->hostdata;
+
+       if (ioc->shost->nr_hw_queues == 1)
+               return 0;
+
+       return blk_mq_pci_map_queues(&shost->tag_set.map[HCTX_TYPE_DEFAULT],
+           ioc->pdev, ioc->high_iops_queues);
+}
+
 /* shost template for SAS 2.0 HBA devices */
 static struct scsi_host_template mpt2sas_driver_template = {
        .module                         = THIS_MODULE,
@@ -11666,6 +11690,7 @@ static struct scsi_host_template mpt3sas_driver_template = {
        .sdev_attrs                     = mpt3sas_dev_attrs,
        .track_queue_depth              = 1,
        .cmd_size                       = sizeof(struct scsiio_tracker),
+       .map_queues                     = scsih_map_queues,
 };
 
 /* raid transport support for SAS 3.0 HBA devices */
@@ -11922,6 +11947,8 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
         * Enable MEMORY MOVE support flag.
         */
        ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_MEMMOVE;
+       /* Enable ADDITIONAL QUERY support flag. */
+       ioc->drv_support_bitmap |= MPT_DRV_SUPPORT_BITMAP_ADDNLQUERY;
 
        ioc->enable_sdev_max_qd = enable_sdev_max_qd;
 
@@ -12028,6 +12055,21 @@ _scsih_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        } else
                ioc->hide_drives = 0;
 
+       shost->host_tagset = 0;
+       shost->nr_hw_queues = 1;
+
+       if (ioc->is_gen35_ioc && ioc->reply_queue_count > 1 &&
+           host_tagset_enable && ioc->smp_affinity_enable) {
+
+               shost->host_tagset = 1;
+               shost->nr_hw_queues =
+                   ioc->reply_queue_count - ioc->high_iops_queues;
+
+               dev_info(&ioc->pdev->dev,
+                   "Max SCSIIO MPT commands: %d shared with nr_hw_queues = %d\n",
+                   shost->can_queue, shost->nr_hw_queues);
+       }
+
        rv = scsi_add_host(shost, &pdev->dev);
        if (rv) {
                ioc_err(ioc, "failure at %s:%d/%s()!\n",
index 8ec9bab..d9b7d0e 100644 (file)
@@ -132,6 +132,35 @@ mpt3sas_process_trigger_data(struct MPT3SAS_ADAPTER *ioc,
                    &issue_reset);
        }
 
+       ioc->htb_rel.buffer_rel_condition = MPT3_DIAG_BUFFER_REL_TRIGGER;
+       if (event_data) {
+               ioc->htb_rel.trigger_type = event_data->trigger_type;
+               switch (event_data->trigger_type) {
+               case MPT3SAS_TRIGGER_SCSI:
+                       memcpy(&ioc->htb_rel.trigger_info_dwords,
+                           &event_data->u.scsi,
+                           sizeof(struct SL_WH_SCSI_TRIGGER_T));
+                       break;
+               case MPT3SAS_TRIGGER_MPI:
+                       memcpy(&ioc->htb_rel.trigger_info_dwords,
+                           &event_data->u.mpi,
+                           sizeof(struct SL_WH_MPI_TRIGGER_T));
+                       break;
+               case MPT3SAS_TRIGGER_MASTER:
+                       ioc->htb_rel.trigger_info_dwords[0] =
+                           event_data->u.master.MasterData;
+                       break;
+               case MPT3SAS_TRIGGER_EVENT:
+                       memcpy(&ioc->htb_rel.trigger_info_dwords,
+                           &event_data->u.event,
+                           sizeof(struct SL_WH_EVENT_TRIGGER_T));
+                       break;
+               default:
+                       ioc_err(ioc, "%d - Is not a valid Trigger type\n",
+                           event_data->trigger_type);
+                       break;
+               }
+       }
        _mpt3sas_raise_sigio(ioc, event_data);
 
        dTriggerDiagPrintk(ioc, ioc_info(ioc, "%s: exit\n",
@@ -201,9 +230,14 @@ mpt3sas_trigger_master(struct MPT3SAS_ADAPTER *ioc, u32 trigger_bitmask)
        event_data.u.master.MasterData = trigger_bitmask;
 
        if (trigger_bitmask & MASTER_TRIGGER_FW_FAULT ||
-           trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET)
+           trigger_bitmask & MASTER_TRIGGER_ADAPTER_RESET) {
+               ioc->htb_rel.trigger_type = MPT3SAS_TRIGGER_MASTER;
+               ioc->htb_rel.trigger_info_dwords[0] = trigger_bitmask;
+               if (ioc->reset_from_user)
+                       ioc->htb_rel.trigger_info_dwords[1] =
+                           MPT_DIAG_RESET_ISSUED_BY_USER;
                _mpt3sas_raise_sigio(ioc, &event_data);
-       else
+       else
                mpt3sas_send_trigger_data_event(ioc, &event_data);
 
  out:
index 4adf9de..329fd02 100644 (file)
@@ -2273,12 +2273,12 @@ static void myrs_cleanup(struct myrs_hba *cs)
        if (cs->mmio_base) {
                cs->disable_intr(cs);
                iounmap(cs->mmio_base);
+               cs->mmio_base = NULL;
        }
        if (cs->irq)
                free_irq(cs->irq, cs);
        if (cs->io_addr)
                release_region(cs->io_addr, 0x80);
-       iounmap(cs->mmio_base);
        pci_set_drvdata(pdev, NULL);
        pci_disable_device(pdev);
        scsi_host_put(cs->host);
index 5d5f50d..ac89002 100644 (file)
@@ -55,7 +55,6 @@
 
 MODULE_AUTHOR("YOKOTA Hiroshi <yokota@netlab.is.tsukuba.ac.jp>");
 MODULE_DESCRIPTION("WorkBit NinjaSCSI-3 / NinjaSCSI-32Bi(16bit) PCMCIA SCSI host adapter module");
-MODULE_SUPPORTED_DEVICE("sd,sr,sg,st");
 MODULE_LICENSE("GPL");
 
 #include "nsp_io.h"
index 15c9621..6d36deb 100644 (file)
@@ -244,7 +244,7 @@ struct pmcraid_ioarcb {
        __u8  hrrq_id;
        __u8  cdb[PMCRAID_MAX_CDB_LEN];
        struct pmcraid_ioarcb_add_data add_data;
-} __attribute__((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
+};
 
 /* well known resource handle values */
 #define PMCRAID_IOA_RES_HANDLE        0xffffffff
@@ -1040,8 +1040,8 @@ struct pmcraid_passthrough_ioctl_buffer {
        struct pmcraid_ioctl_header ioctl_header;
        struct pmcraid_ioarcb ioarcb;
        struct pmcraid_ioasa  ioasa;
-       u8  request_buffer[1];
-} __attribute__ ((packed));
+       u8  request_buffer[];
+} __attribute__ ((packed, aligned(PMCRAID_IOARCB_ALIGNMENT)));
 
 /*
  * keys to differentiate between driver handled IOCTLs and passthrough
index 47ad64b..69c5b5e 100644 (file)
@@ -1675,6 +1675,7 @@ static int qedi_alloc_global_queues(struct qedi_ctx *qedi)
                if (!qedi->global_queues[i]) {
                        QEDI_ERR(&qedi->dbg_ctx,
                                 "Unable to allocation global queue %d.\n", i);
+                       status = -ENOMEM;
                        goto mem_alloc_failure;
                }
 
index 0d09480..480e7d2 100644 (file)
@@ -981,8 +981,7 @@ void qlt_free_session_done(struct work_struct *work)
                        int rc;
 
                        if (!own ||
-                           (own &&
-                            (own->iocb.u.isp24.status_subcode == ELS_PLOGI))) {
+                            (own->iocb.u.isp24.status_subcode == ELS_PLOGI)) {
                                rc = qla2x00_post_async_logout_work(vha, sess,
                                    NULL);
                                if (rc != QLA_SUCCESS)
@@ -3223,8 +3222,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
        if (!qpair->fw_started || (cmd->reset_count != qpair->chip_reset) ||
            (cmd->sess && cmd->sess->deleted)) {
                cmd->state = QLA_TGT_STATE_PROCESSED;
-               res = 0;
-               goto free;
+               return 0;
        }
 
        ql_dbg_qp(ql_dbg_tgt, qpair, 0xe018,
@@ -3235,8 +3233,9 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
 
        res = qlt_pre_xmit_response(cmd, &prm, xmit_type, scsi_status,
            &full_req_cnt);
-       if (unlikely(res != 0))
-               goto free;
+       if (unlikely(res != 0)) {
+               return res;
+       }
 
        spin_lock_irqsave(qpair->qp_lock_ptr, flags);
 
@@ -3256,8 +3255,7 @@ int qlt_xmit_response(struct qla_tgt_cmd *cmd, int xmit_type,
                        vha->flags.online, qla2x00_reset_active(vha),
                        cmd->reset_count, qpair->chip_reset);
                spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
-               res = 0;
-               goto free;
+               return 0;
        }
 
        /* Does F/W have an IOCBs for this request */
@@ -3360,8 +3358,6 @@ out_unmap_unlock:
        qlt_unmap_sg(vha, cmd);
        spin_unlock_irqrestore(qpair->qp_lock_ptr, flags);
 
-free:
-       vha->hw->tgt.tgt_ops->free_cmd(cmd);
        return res;
 }
 EXPORT_SYMBOL(qlt_xmit_response);
index 10e5e6c..01620f3 100644 (file)
        (min(1270, ((ql) > 0) ? (QLA_TGT_DATASEGS_PER_CMD_24XX + \
                QLA_TGT_DATASEGS_PER_CONT_24XX*((ql) - 1)) : 0))
 #endif
-#endif
 
 #define GET_TARGET_ID(ha, iocb) ((HAS_EXTENDED_IDS(ha))                        \
                         ? le16_to_cpu((iocb)->u.isp2x.target.extended) \
@@ -244,6 +243,7 @@ struct ctio_to_2xxx {
 #ifndef CTIO_RET_TYPE
 #define CTIO_RET_TYPE  0x17            /* CTIO return entry */
 #define ATIO_TYPE7 0x06 /* Accept target I/O entry for 24xx */
+#endif
 
 struct fcp_hdr {
        uint8_t  r_ctl;
index b55fc76..8b4890c 100644 (file)
@@ -644,7 +644,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
-       struct scsi_qla_host *vha = cmd->vha;
 
        if (cmd->aborted) {
                /* Cmd can loop during Q-full.  tcm_qla2xxx_aborted_task
@@ -657,7 +656,6 @@ static int tcm_qla2xxx_queue_data_in(struct se_cmd *se_cmd)
                        cmd->se_cmd.transport_state,
                        cmd->se_cmd.t_state,
                        cmd->se_cmd.se_cmd_flags);
-               vha->hw->tgt.tgt_ops->free_cmd(cmd);
                return 0;
        }
 
@@ -685,7 +683,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
 {
        struct qla_tgt_cmd *cmd = container_of(se_cmd,
                                struct qla_tgt_cmd, se_cmd);
-       struct scsi_qla_host *vha = cmd->vha;
        int xmit_type = QLA_TGT_XMIT_STATUS;
 
        if (cmd->aborted) {
@@ -699,7 +696,6 @@ static int tcm_qla2xxx_queue_status(struct se_cmd *se_cmd)
                    cmd, kref_read(&cmd->se_cmd.cmd_kref),
                    cmd->se_cmd.transport_state, cmd->se_cmd.t_state,
                    cmd->se_cmd.se_cmd_flags);
-               vha->hw->tgt.tgt_ops->free_cmd(cmd);
                return 0;
        }
        cmd->bufflen = se_cmd->data_length;
index a4b014e..7bd9a4a 100644 (file)
@@ -841,7 +841,7 @@ static int __qla4xxx_is_chap_active(struct device *dev, void *data)
        sess = cls_session->dd_data;
        ddb_entry = sess->dd_data;
 
-       if (iscsi_session_chkready(cls_session))
+       if (iscsi_is_session_online(cls_session))
                goto exit_is_chap_active;
 
        if (ddb_entry->chap_tbl_idx == *chap_tbl_idx)
index 2e68c0a..f4bf62b 100644 (file)
@@ -132,7 +132,11 @@ show_transport_handle(struct device *dev, struct device_attribute *attr,
                      char *buf)
 {
        struct iscsi_internal *priv = dev_to_iscsi_internal(dev);
-       return sprintf(buf, "%llu\n", (unsigned long long)iscsi_handle(priv->iscsi_transport));
+
+       if (!capable(CAP_SYS_ADMIN))
+               return -EACCES;
+       return sysfs_emit(buf, "%llu\n",
+                 (unsigned long long)iscsi_handle(priv->iscsi_transport));
 }
 static DEVICE_ATTR(handle, S_IRUGO, show_transport_handle, NULL);
 
@@ -142,7 +146,7 @@ show_transport_##name(struct device *dev,                           \
                      struct device_attribute *attr,char *buf)          \
 {                                                                      \
        struct iscsi_internal *priv = dev_to_iscsi_internal(dev);       \
-       return sprintf(buf, format"\n", priv->iscsi_transport->name);   \
+       return sysfs_emit(buf, format"\n", priv->iscsi_transport->name);\
 }                                                                      \
 static DEVICE_ATTR(name, S_IRUGO, show_transport_##name, NULL);
 
@@ -183,7 +187,7 @@ static ssize_t
 show_ep_handle(struct device *dev, struct device_attribute *attr, char *buf)
 {
        struct iscsi_endpoint *ep = iscsi_dev_to_endpoint(dev);
-       return sprintf(buf, "%llu\n", (unsigned long long) ep->id);
+       return sysfs_emit(buf, "%llu\n", (unsigned long long) ep->id);
 }
 static ISCSI_ATTR(ep, handle, S_IRUGO, show_ep_handle, NULL);
 
@@ -1701,10 +1705,8 @@ static const char *iscsi_session_state_name(int state)
 
 int iscsi_session_chkready(struct iscsi_cls_session *session)
 {
-       unsigned long flags;
        int err;
 
-       spin_lock_irqsave(&session->lock, flags);
        switch (session->state) {
        case ISCSI_SESSION_LOGGED_IN:
                err = 0;
@@ -1719,7 +1721,6 @@ int iscsi_session_chkready(struct iscsi_cls_session *session)
                err = DID_NO_CONNECT << 16;
                break;
        }
-       spin_unlock_irqrestore(&session->lock, flags);
        return err;
 }
 EXPORT_SYMBOL_GPL(iscsi_session_chkready);
@@ -2474,6 +2475,7 @@ static void iscsi_if_stop_conn(struct iscsi_cls_conn *conn, int flag)
         */
        mutex_lock(&conn_mutex);
        conn->transport->stop_conn(conn, flag);
+       conn->state = ISCSI_CONN_DOWN;
        mutex_unlock(&conn_mutex);
 
 }
@@ -2883,6 +2885,9 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
        struct iscsi_cls_session *session;
        int err = 0, value = 0;
 
+       if (ev->u.set_param.len > PAGE_SIZE)
+               return -EINVAL;
+
        session = iscsi_session_lookup(ev->u.set_param.sid);
        conn = iscsi_conn_lookup(ev->u.set_param.sid, ev->u.set_param.cid);
        if (!conn || !session)
@@ -2897,6 +2902,13 @@ iscsi_set_param(struct iscsi_transport *transport, struct iscsi_uevent *ev)
        default:
                err = transport->set_param(conn, ev->u.set_param.param,
                                           data, ev->u.set_param.len);
+               if ((conn->state == ISCSI_CONN_BOUND) ||
+                       (conn->state == ISCSI_CONN_UP)) {
+                       err = transport->set_param(conn, ev->u.set_param.param,
+                                       data, ev->u.set_param.len);
+               } else {
+                       return -ENOTCONN;
+               }
        }
 
        return err;
@@ -2956,6 +2968,7 @@ static int iscsi_if_ep_disconnect(struct iscsi_transport *transport,
                mutex_lock(&conn->ep_mutex);
                conn->ep = NULL;
                mutex_unlock(&conn->ep_mutex);
+               conn->state = ISCSI_CONN_DOWN;
        }
 
        transport->ep_disconnect(ep);
@@ -3030,6 +3043,9 @@ iscsi_set_host_param(struct iscsi_transport *transport,
        if (!transport->set_host_param)
                return -ENOSYS;
 
+       if (ev->u.set_host_param.len > PAGE_SIZE)
+               return -EINVAL;
+
        shost = scsi_host_lookup(ev->u.set_host_param.host_no);
        if (!shost) {
                printk(KERN_ERR "set_host_param could not find host no %u\n",
@@ -3617,6 +3633,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
 {
        int err = 0;
        u32 portid;
+       u32 pdu_len;
        struct iscsi_uevent *ev = nlmsg_data(nlh);
        struct iscsi_transport *transport = NULL;
        struct iscsi_internal *priv;
@@ -3624,6 +3641,9 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
        struct iscsi_cls_conn *conn;
        struct iscsi_endpoint *ep = NULL;
 
+       if (!netlink_capable(skb, CAP_SYS_ADMIN))
+               return -EPERM;
+
        if (nlh->nlmsg_type == ISCSI_UEVENT_PATH_UPDATE)
                *group = ISCSI_NL_GRP_UIP;
        else
@@ -3716,6 +3736,8 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
                ev->r.retcode = transport->bind_conn(session, conn,
                                                ev->u.b_conn.transport_eph,
                                                ev->u.b_conn.is_leading);
+               if (!ev->r.retcode)
+                       conn->state = ISCSI_CONN_BOUND;
                mutex_unlock(&conn_mutex);
 
                if (ev->r.retcode || !transport->ep_connect)
@@ -3756,6 +3778,14 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group)
                        err = -EINVAL;
                break;
        case ISCSI_UEVENT_SEND_PDU:
+               pdu_len = nlh->nlmsg_len - sizeof(*nlh) - sizeof(*ev);
+
+               if ((ev->u.send_pdu.hdr_size > pdu_len) ||
+                   (ev->u.send_pdu.data_size > (pdu_len - ev->u.send_pdu.hdr_size))) {
+                       err = -EINVAL;
+                       break;
+               }
+
                conn = iscsi_conn_lookup(ev->u.send_pdu.sid, ev->u.send_pdu.cid);
                if (conn) {
                        mutex_lock(&conn_mutex);
@@ -3947,7 +3977,8 @@ iscsi_conn_attr(local_ipaddr, ISCSI_PARAM_LOCAL_IPADDR);
 static const char *const connection_state_names[] = {
        [ISCSI_CONN_UP] = "up",
        [ISCSI_CONN_DOWN] = "down",
-       [ISCSI_CONN_FAILED] = "failed"
+       [ISCSI_CONN_FAILED] = "failed",
+       [ISCSI_CONN_BOUND] = "bound"
 };
 
 static ssize_t show_conn_state(struct device *dev,
@@ -3960,7 +3991,7 @@ static ssize_t show_conn_state(struct device *dev,
            conn->state < ARRAY_SIZE(connection_state_names))
                state = connection_state_names[conn->state];
 
-       return sprintf(buf, "%s\n", state);
+       return sysfs_emit(buf, "%s\n", state);
 }
 static ISCSI_CLASS_ATTR(conn, state, S_IRUGO, show_conn_state,
                        NULL);
@@ -4188,7 +4219,7 @@ show_priv_session_state(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
-       return sprintf(buf, "%s\n", iscsi_session_state_name(session->state));
+       return sysfs_emit(buf, "%s\n", iscsi_session_state_name(session->state));
 }
 static ISCSI_CLASS_ATTR(priv_sess, state, S_IRUGO, show_priv_session_state,
                        NULL);
@@ -4197,7 +4228,7 @@ show_priv_session_creator(struct device *dev, struct device_attribute *attr,
                        char *buf)
 {
        struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
-       return sprintf(buf, "%d\n", session->creator);
+       return sysfs_emit(buf, "%d\n", session->creator);
 }
 static ISCSI_CLASS_ATTR(priv_sess, creator, S_IRUGO, show_priv_session_creator,
                        NULL);
@@ -4206,7 +4237,7 @@ show_priv_session_target_id(struct device *dev, struct device_attribute *attr,
                            char *buf)
 {
        struct iscsi_cls_session *session = iscsi_dev_to_session(dev->parent);
-       return sprintf(buf, "%d\n", session->target_id);
+       return sysfs_emit(buf, "%d\n", session->target_id);
 }
 static ISCSI_CLASS_ATTR(priv_sess, target_id, S_IRUGO,
                        show_priv_session_target_id, NULL);
@@ -4219,8 +4250,8 @@ show_priv_session_##field(struct device *dev,                             \
        struct iscsi_cls_session *session =                             \
                        iscsi_dev_to_session(dev->parent);              \
        if (session->field == -1)                                       \
-               return sprintf(buf, "off\n");                           \
-       return sprintf(buf, format"\n", session->field);                \
+               return sysfs_emit(buf, "off\n");                        \
+       return sysfs_emit(buf, format"\n", session->field);             \
 }
 
 #define iscsi_priv_session_attr_store(field)                           \
index a3d2d4b..ed0b1bb 100644 (file)
@@ -707,9 +707,9 @@ static int sd_sec_submit(void *data, u16 spsp, u8 secp, void *buffer,
        put_unaligned_be16(spsp, &cdb[2]);
        put_unaligned_be32(len, &cdb[6]);
 
-       ret = scsi_execute_req(sdev, cdb,
-                       send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-                       buffer, len, NULL, SD_TIMEOUT, sdkp->max_retries, NULL);
+       ret = scsi_execute(sdev, cdb, send ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
+               buffer, len, NULL, NULL, SD_TIMEOUT, sdkp->max_retries, 0,
+               RQF_PM, NULL);
        return ret <= 0 ? ret : -EIO;
 }
 #endif /* CONFIG_BLK_SED_OPAL */
@@ -3379,10 +3379,12 @@ static int sd_probe(struct device *dev)
            sdp->type != TYPE_RBC)
                goto out;
 
-#ifndef CONFIG_BLK_DEV_ZONED
-       if (sdp->type == TYPE_ZBC)
+       if (!IS_ENABLED(CONFIG_BLK_DEV_ZONED) && sdp->type == TYPE_ZBC) {
+               sdev_printk(KERN_WARNING, sdp,
+                           "Unsupported ZBC host-managed device.\n");
                goto out;
-#endif
+       }
+
        SCSI_LOG_HLQUEUE(3, sdev_printk(KERN_INFO, sdp,
                                        "sd_probe\n"));
 
index 03adb39..994f1b8 100644 (file)
@@ -280,27 +280,28 @@ static int sd_zbc_update_wp_offset_cb(struct blk_zone *zone, unsigned int idx,
 static void sd_zbc_update_wp_offset_workfn(struct work_struct *work)
 {
        struct scsi_disk *sdkp;
+       unsigned long flags;
        unsigned int zno;
        int ret;
 
        sdkp = container_of(work, struct scsi_disk, zone_wp_offset_work);
 
-       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
        for (zno = 0; zno < sdkp->nr_zones; zno++) {
                if (sdkp->zones_wp_offset[zno] != SD_ZBC_UPDATING_WP_OFST)
                        continue;
 
-               spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+               spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
                ret = sd_zbc_do_report_zones(sdkp, sdkp->zone_wp_update_buf,
                                             SD_BUF_SIZE,
                                             zno * sdkp->zone_blocks, true);
-               spin_lock_bh(&sdkp->zones_wp_offset_lock);
+               spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
                if (!ret)
                        sd_zbc_parse_report(sdkp, sdkp->zone_wp_update_buf + 64,
                                            zno, sd_zbc_update_wp_offset_cb,
                                            sdkp);
        }
-       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
 
        scsi_device_put(sdkp->device);
 }
@@ -324,6 +325,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
        struct request *rq = cmd->request;
        struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
        unsigned int wp_offset, zno = blk_rq_zone_no(rq);
+       unsigned long flags;
        blk_status_t ret;
 
        ret = sd_zbc_cmnd_checks(cmd);
@@ -337,7 +339,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
        if (!blk_req_zone_write_trylock(rq))
                return BLK_STS_ZONE_RESOURCE;
 
-       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
        wp_offset = sdkp->zones_wp_offset[zno];
        switch (wp_offset) {
        case SD_ZBC_INVALID_WP_OFST:
@@ -366,7 +368,7 @@ blk_status_t sd_zbc_prepare_zone_append(struct scsi_cmnd *cmd, sector_t *lba,
 
                *lba += wp_offset;
        }
-       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
        if (ret)
                blk_req_zone_write_unlock(rq);
        return ret;
@@ -445,6 +447,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
        struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
        unsigned int zno = blk_rq_zone_no(rq);
        enum req_opf op = req_op(rq);
+       unsigned long flags;
 
        /*
         * If we got an error for a command that needs updating the write
@@ -452,7 +455,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
         * invalid to force an update from disk the next time a zone append
         * command is issued.
         */
-       spin_lock_bh(&sdkp->zones_wp_offset_lock);
+       spin_lock_irqsave(&sdkp->zones_wp_offset_lock, flags);
 
        if (result && op != REQ_OP_ZONE_RESET_ALL) {
                if (op == REQ_OP_ZONE_APPEND) {
@@ -496,7 +499,7 @@ static unsigned int sd_zbc_zone_wp_update(struct scsi_cmnd *cmd,
        }
 
 unlock_wp_offset:
-       spin_unlock_bh(&sdkp->zones_wp_offset_lock);
+       spin_unlock_irqrestore(&sdkp->zones_wp_offset_lock, flags);
 
        return good_bytes;
 }
@@ -704,6 +707,7 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
        unsigned int nr_zones = sdkp->rev_nr_zones;
        u32 max_append;
        int ret = 0;
+       unsigned int flags;
 
        /*
         * For all zoned disks, initialize zone append emulation data if not
@@ -736,16 +740,19 @@ int sd_zbc_revalidate_zones(struct scsi_disk *sdkp)
            disk->queue->nr_zones == nr_zones)
                goto unlock;
 
+       flags = memalloc_noio_save();
        sdkp->zone_blocks = zone_blocks;
        sdkp->nr_zones = nr_zones;
-       sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_NOIO);
+       sdkp->rev_wp_offset = kvcalloc(nr_zones, sizeof(u32), GFP_KERNEL);
        if (!sdkp->rev_wp_offset) {
                ret = -ENOMEM;
+               memalloc_noio_restore(flags);
                goto unlock;
        }
 
        ret = blk_revalidate_disk_zones(disk, sd_zbc_revalidate_zones_cb);
 
+       memalloc_noio_restore(flags);
        kvfree(sdkp->rev_wp_offset);
        sdkp->rev_wp_offset = NULL;
 
index c53f456..a1dacb6 100644 (file)
@@ -48,7 +48,6 @@
 MODULE_AUTHOR("Microsemi");
 MODULE_DESCRIPTION("Driver for Microsemi Smart Family Controller version "
        DRIVER_VERSION);
-MODULE_SUPPORTED_DEVICE("Microsemi Smart Family Controllers");
 MODULE_VERSION(DRIVER_VERSION);
 MODULE_LICENSE("GPL");
 
index 841ad2f..9ca536a 100644 (file)
@@ -1269,8 +1269,8 @@ static int st_open(struct inode *inode, struct file *filp)
        spin_lock(&st_use_lock);
        if (STp->in_use) {
                spin_unlock(&st_use_lock);
-               scsi_tape_put(STp);
                DEBC_printk(STp, "Device already in use.\n");
+               scsi_tape_put(STp);
                return (-EBUSY);
        }
 
index c55202b..a981f26 100644 (file)
@@ -911,7 +911,7 @@ static void ufs_mtk_vreg_set_lpm(struct ufs_hba *hba, bool lpm)
        if (!hba->vreg_info.vccq2 || !hba->vreg_info.vcc)
                return;
 
-       if (lpm & !hba->vreg_info.vcc->enabled)
+       if (lpm && !hba->vreg_info.vcc->enabled)
                regulator_set_mode(hba->vreg_info.vccq2->reg,
                                   REGULATOR_MODE_IDLE);
        else if (!lpm)
index f97d7b0..a9dc8d7 100644 (file)
@@ -253,12 +253,17 @@ static int ufs_qcom_host_reset(struct ufs_hba *hba)
 {
        int ret = 0;
        struct ufs_qcom_host *host = ufshcd_get_variant(hba);
+       bool reenable_intr = false;
 
        if (!host->core_reset) {
                dev_warn(hba->dev, "%s: reset control not set\n", __func__);
                goto out;
        }
 
+       reenable_intr = hba->is_irq_enabled;
+       disable_irq(hba->irq);
+       hba->is_irq_enabled = false;
+
        ret = reset_control_assert(host->core_reset);
        if (ret) {
                dev_err(hba->dev, "%s: core_reset assert failed, err = %d\n",
@@ -280,6 +285,11 @@ static int ufs_qcom_host_reset(struct ufs_hba *hba)
 
        usleep_range(1000, 1100);
 
+       if (reenable_intr) {
+               enable_irq(hba->irq);
+               hba->is_irq_enabled = true;
+       }
+
 out:
        return ret;
 }
index 721f55d..c867607 100644 (file)
@@ -95,8 +95,6 @@
                       16, 4, buf, __len, false);                        \
 } while (0)
 
-static bool early_suspend;
-
 int ufshcd_dump_regs(struct ufs_hba *hba, size_t offset, size_t len,
                     const char *prefix)
 {
@@ -451,6 +449,8 @@ static void ufshcd_print_evt(struct ufs_hba *hba, u32 id,
 
        if (!found)
                dev_err(hba->dev, "No record of %s\n", err_name);
+       else
+               dev_err(hba->dev, "%s: total cnt=%llu\n", err_name, e->cnt);
 }
 
 static void ufshcd_print_evt_hist(struct ufs_hba *hba)
@@ -1533,7 +1533,7 @@ static ssize_t ufshcd_clkscale_enable_show(struct device *dev,
 {
        struct ufs_hba *hba = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_scaling.is_enabled);
+       return sysfs_emit(buf, "%d\n", hba->clk_scaling.is_enabled);
 }
 
 static ssize_t ufshcd_clkscale_enable_store(struct device *dev,
@@ -1866,7 +1866,7 @@ static ssize_t ufshcd_clkgate_delay_show(struct device *dev,
 {
        struct ufs_hba *hba = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%lu\n", hba->clk_gating.delay_ms);
+       return sysfs_emit(buf, "%lu\n", hba->clk_gating.delay_ms);
 }
 
 static ssize_t ufshcd_clkgate_delay_store(struct device *dev,
@@ -1889,7 +1889,7 @@ static ssize_t ufshcd_clkgate_enable_show(struct device *dev,
 {
        struct ufs_hba *hba = dev_get_drvdata(dev);
 
-       return snprintf(buf, PAGE_SIZE, "%d\n", hba->clk_gating.is_enabled);
+       return sysfs_emit(buf, "%d\n", hba->clk_gating.is_enabled);
 }
 
 static ssize_t ufshcd_clkgate_enable_store(struct device *dev,
@@ -4985,6 +4985,7 @@ ufshcd_transfer_rsp_status(struct ufs_hba *hba, struct ufshcd_lrb *lrbp)
                         * UFS device needs urgent BKOPs.
                         */
                        if (!hba->pm_op_in_progress &&
+                           !ufshcd_eh_in_progress(hba) &&
                            ufshcd_is_exception_event(lrbp->ucd_rsp_ptr) &&
                            schedule_work(&hba->eeh_work)) {
                                /*
@@ -5782,13 +5783,20 @@ static void ufshcd_err_handling_prepare(struct ufs_hba *hba)
                        ufshcd_suspend_clkscaling(hba);
                ufshcd_clk_scaling_allow(hba, false);
        }
+       ufshcd_scsi_block_requests(hba);
+       /* Drain ufshcd_queuecommand() */
+       down_write(&hba->clk_scaling_lock);
+       up_write(&hba->clk_scaling_lock);
+       cancel_work_sync(&hba->eeh_work);
 }
 
 static void ufshcd_err_handling_unprepare(struct ufs_hba *hba)
 {
+       ufshcd_scsi_unblock_requests(hba);
        ufshcd_release(hba);
        if (ufshcd_is_clkscaling_supported(hba))
                ufshcd_clk_scaling_suspend(hba, false);
+       ufshcd_clear_ua_wluns(hba);
        pm_runtime_put(hba->dev);
 }
 
@@ -5880,8 +5888,8 @@ static void ufshcd_err_handler(struct work_struct *work)
        spin_unlock_irqrestore(hba->host->host_lock, flags);
        ufshcd_err_handling_prepare(hba);
        spin_lock_irqsave(hba->host->host_lock, flags);
-       ufshcd_scsi_block_requests(hba);
-       hba->ufshcd_state = UFSHCD_STATE_RESET;
+       if (hba->ufshcd_state != UFSHCD_STATE_ERROR)
+               hba->ufshcd_state = UFSHCD_STATE_RESET;
 
        /* Complete requests that have door-bell cleared by h/w */
        ufshcd_complete_requests(hba);
@@ -6040,12 +6048,8 @@ skip_err_handling:
        }
        ufshcd_clear_eh_in_progress(hba);
        spin_unlock_irqrestore(hba->host->host_lock, flags);
-       ufshcd_scsi_unblock_requests(hba);
        ufshcd_err_handling_unprepare(hba);
        up(&hba->host_sem);
-
-       if (!err && needs_reset)
-               ufshcd_clear_ua_wluns(hba);
 }
 
 /**
@@ -7856,6 +7860,8 @@ static int ufshcd_probe_hba(struct ufs_hba *hba, bool async)
        unsigned long flags;
        ktime_t start = ktime_get();
 
+       hba->ufshcd_state = UFSHCD_STATE_RESET;
+
        ret = ufshcd_link_startup(hba);
        if (ret)
                goto out;
@@ -8970,11 +8976,6 @@ int ufshcd_system_suspend(struct ufs_hba *hba)
        int ret = 0;
        ktime_t start = ktime_get();
 
-       if (!hba) {
-               early_suspend = true;
-               return 0;
-       }
-
        down(&hba->host_sem);
 
        if (!hba->is_powered)
@@ -9026,14 +9027,6 @@ int ufshcd_system_resume(struct ufs_hba *hba)
        int ret = 0;
        ktime_t start = ktime_get();
 
-       if (!hba)
-               return -EINVAL;
-
-       if (unlikely(early_suspend)) {
-               early_suspend = false;
-               down(&hba->host_sem);
-       }
-
        if (!hba->is_powered || pm_runtime_suspended(hba->dev))
                /*
                 * Let the runtime resume take care of resuming
@@ -9066,9 +9059,6 @@ int ufshcd_runtime_suspend(struct ufs_hba *hba)
        int ret = 0;
        ktime_t start = ktime_get();
 
-       if (!hba)
-               return -EINVAL;
-
        if (!hba->is_powered)
                goto out;
        else
@@ -9107,9 +9097,6 @@ int ufshcd_runtime_resume(struct ufs_hba *hba)
        int ret = 0;
        ktime_t start = ktime_get();
 
-       if (!hba)
-               return -EINVAL;
-
        if (!hba->is_powered)
                goto out;
        else
index ee61f82..18e56c1 100644 (file)
@@ -570,7 +570,7 @@ enum ufshcd_quirks {
        /*
         * This quirk allows only sg entries aligned with page size.
         */
-       UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE            = 1 << 13,
+       UFSHCD_QUIRK_ALIGN_SG_WITH_PAGE_SIZE            = 1 << 14,
 };
 
 enum ufshcd_caps {
index 081f54a..8a79605 100644 (file)
@@ -17,8 +17,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Maintained by: Jim Gill <jgill@vmware.com>
- *
  */
 
 #include <linux/kernel.h>
index 75966d3..51a82f7 100644 (file)
@@ -17,8 +17,6 @@
  * along with this program; if not, write to the Free Software
  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
  *
- * Maintained by: Jim Gill <jgill@vmware.com>
- *
  */
 
 #ifndef _VMW_PVSCSI_H_
diff --git a/drivers/sfi/Kconfig b/drivers/sfi/Kconfig
deleted file mode 100644 (file)
index 3d0b64d..0000000
+++ /dev/null
@@ -1,18 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-#
-# SFI Configuration
-#
-
-menuconfig SFI
-       bool "SFI (Simple Firmware Interface) Support"
-       help
-       The Simple Firmware Interface (SFI) provides a lightweight method
-       for platform firmware to pass information to the operating system
-       via static tables in memory.  Kernel SFI support is required to
-       boot on SFI-only platforms.  Currently, all SFI-only platforms are
-       based on the 2nd generation Intel Atom processor platform,
-       code-named Moorestown.
-
-       For more information, see http://simplefirmware.org
-
-       Say 'Y' here to enable the kernel to boot on SFI-only platforms.
diff --git a/drivers/sfi/Makefile b/drivers/sfi/Makefile
deleted file mode 100644 (file)
index ca9436b..0000000
+++ /dev/null
@@ -1,4 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0-only
-obj-y  += sfi_acpi.o
-obj-y  += sfi_core.o
-
diff --git a/drivers/sfi/sfi_acpi.c b/drivers/sfi/sfi_acpi.c
deleted file mode 100644 (file)
index d277b36..0000000
+++ /dev/null
@@ -1,214 +0,0 @@
-/* sfi_acpi.c Simple Firmware Interface - ACPI extensions */
-
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  BSD LICENSE
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#define KMSG_COMPONENT "SFI"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/sfi_acpi.h>
-#include "sfi_core.h"
-
-/*
- * SFI can access ACPI-defined tables via an optional ACPI XSDT.
- *
- * This allows re-use, and avoids re-definition, of standard tables.
- * For example, the "MCFG" table is defined by PCI, reserved by ACPI,
- * and is expected to be present many SFI-only systems.
- */
-
-static struct acpi_table_xsdt *xsdt_va __read_mostly;
-
-#define XSDT_GET_NUM_ENTRIES(ptable, entry_type) \
-       ((ptable->header.length - sizeof(struct acpi_table_header)) / \
-       (sizeof(entry_type)))
-
-static inline struct sfi_table_header *acpi_to_sfi_th(
-                               struct acpi_table_header *th)
-{
-       return (struct sfi_table_header *)th;
-}
-
-static inline struct acpi_table_header *sfi_to_acpi_th(
-                               struct sfi_table_header *th)
-{
-       return (struct acpi_table_header *)th;
-}
-
-/*
- * sfi_acpi_parse_xsdt()
- *
- * Parse the ACPI XSDT for later access by sfi_acpi_table_parse().
- */
-static int __init sfi_acpi_parse_xsdt(struct sfi_table_header *th)
-{
-       struct sfi_table_key key = SFI_ANY_KEY;
-       int tbl_cnt, i;
-       void *ret;
-
-       xsdt_va = (struct acpi_table_xsdt *)th;
-       tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
-       for (i = 0; i < tbl_cnt; i++) {
-               ret = sfi_check_table(xsdt_va->table_offset_entry[i], &key);
-               if (IS_ERR(ret)) {
-                       disable_sfi();
-                       return -1;
-               }
-       }
-
-       return 0;
-}
-
-int __init sfi_acpi_init(void)
-{
-       struct sfi_table_key xsdt_key = { .sig = SFI_SIG_XSDT };
-
-       sfi_table_parse(SFI_SIG_XSDT, NULL, NULL, sfi_acpi_parse_xsdt);
-
-       /* Only call the get_table to keep the table mapped */
-       xsdt_va = (struct acpi_table_xsdt *)sfi_get_table(&xsdt_key);
-       return 0;
-}
-
-static struct acpi_table_header *sfi_acpi_get_table(struct sfi_table_key *key)
-{
-       u32 tbl_cnt, i;
-       void *ret;
-
-       tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
-       for (i = 0; i < tbl_cnt; i++) {
-               ret = sfi_check_table(xsdt_va->table_offset_entry[i], key);
-               if (!IS_ERR(ret) && ret)
-                       return sfi_to_acpi_th(ret);
-       }
-
-       return NULL;
-}
-
-static void sfi_acpi_put_table(struct acpi_table_header *table)
-{
-       sfi_put_table(acpi_to_sfi_th(table));
-}
-
-/*
- * sfi_acpi_table_parse()
- *
- * Find specified table in XSDT, run handler on it and return its return value
- */
-int sfi_acpi_table_parse(char *signature, char *oem_id, char *oem_table_id,
-                       int(*handler)(struct acpi_table_header *))
-{
-       struct acpi_table_header *table = NULL;
-       struct sfi_table_key key;
-       int ret = 0;
-
-       if (sfi_disabled)
-               return -1;
-
-       key.sig = signature;
-       key.oem_id = oem_id;
-       key.oem_table_id = oem_table_id;
-
-       table = sfi_acpi_get_table(&key);
-       if (!table)
-               return -EINVAL;
-
-       ret = handler(table);
-       sfi_acpi_put_table(table);
-       return ret;
-}
-
-static ssize_t sfi_acpi_table_show(struct file *filp, struct kobject *kobj,
-                              struct bin_attribute *bin_attr, char *buf,
-                              loff_t offset, size_t count)
-{
-       struct sfi_table_attr *tbl_attr =
-           container_of(bin_attr, struct sfi_table_attr, attr);
-       struct acpi_table_header *th = NULL;
-       struct sfi_table_key key;
-       ssize_t cnt;
-
-       key.sig = tbl_attr->name;
-       key.oem_id = NULL;
-       key.oem_table_id = NULL;
-
-       th = sfi_acpi_get_table(&key);
-       if (!th)
-               return 0;
-
-       cnt =  memory_read_from_buffer(buf, count, &offset,
-                                       th, th->length);
-       sfi_acpi_put_table(th);
-
-       return cnt;
-}
-
-
-void __init sfi_acpi_sysfs_init(void)
-{
-       u32 tbl_cnt, i;
-       struct sfi_table_attr *tbl_attr;
-
-       tbl_cnt = XSDT_GET_NUM_ENTRIES(xsdt_va, u64);
-       for (i = 0; i < tbl_cnt; i++) {
-               tbl_attr =
-                       sfi_sysfs_install_table(xsdt_va->table_offset_entry[i]);
-               tbl_attr->attr.read = sfi_acpi_table_show;
-       }
-
-       return;
-}
diff --git a/drivers/sfi/sfi_core.c b/drivers/sfi/sfi_core.c
deleted file mode 100644 (file)
index a513690..0000000
+++ /dev/null
@@ -1,522 +0,0 @@
-/* sfi_core.c Simple Firmware Interface - core internals */
-
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  BSD LICENSE
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#define KMSG_COMPONENT "SFI"
-#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt
-
-#include <linux/memblock.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/types.h>
-#include <linux/acpi.h>
-#include <linux/init.h>
-#include <linux/sfi.h>
-#include <linux/slab.h>
-#include <linux/io.h>
-
-#include "sfi_core.h"
-
-#define ON_SAME_PAGE(addr1, addr2) \
-       (((unsigned long)(addr1) & PAGE_MASK) == \
-       ((unsigned long)(addr2) & PAGE_MASK))
-#define TABLE_ON_PAGE(page, table, size) (ON_SAME_PAGE(page, table) && \
-                               ON_SAME_PAGE(page, table + size))
-
-int sfi_disabled __read_mostly;
-EXPORT_SYMBOL(sfi_disabled);
-
-static u64 syst_pa __read_mostly;
-static struct sfi_table_simple *syst_va __read_mostly;
-
-/*
- * FW creates and saves the SFI tables in memory. When these tables get
- * used, they may need to be mapped to virtual address space, and the mapping
- * can happen before or after the memremap() is ready, so a flag is needed
- * to indicating this
- */
-static u32 sfi_use_memremap __read_mostly;
-
-/*
- * sfi_un/map_memory calls early_memremap/memunmap which is a __init function
- * and introduces section mismatch. So use __ref to make it calm.
- */
-static void __iomem * __ref sfi_map_memory(u64 phys, u32 size)
-{
-       if (!phys || !size)
-               return NULL;
-
-       if (sfi_use_memremap)
-               return memremap(phys, size, MEMREMAP_WB);
-       else
-               return early_memremap(phys, size);
-}
-
-static void __ref sfi_unmap_memory(void __iomem *virt, u32 size)
-{
-       if (!virt || !size)
-               return;
-
-       if (sfi_use_memremap)
-               memunmap(virt);
-       else
-               early_memunmap(virt, size);
-}
-
-static void sfi_print_table_header(unsigned long long pa,
-                               struct sfi_table_header *header)
-{
-       pr_info("%4.4s %llX, %04X (v%d %6.6s %8.8s)\n",
-               header->sig, pa,
-               header->len, header->rev, header->oem_id,
-               header->oem_table_id);
-}
-
-/*
- * sfi_verify_table()
- * Sanity check table lengh, calculate checksum
- */
-static int sfi_verify_table(struct sfi_table_header *table)
-{
-
-       u8 checksum = 0;
-       u8 *puchar = (u8 *)table;
-       u32 length = table->len;
-
-       /* Sanity check table length against arbitrary 1MB limit */
-       if (length > 0x100000) {
-               pr_err("Invalid table length 0x%x\n", length);
-               return -1;
-       }
-
-       while (length--)
-               checksum += *puchar++;
-
-       if (checksum) {
-               pr_err("Checksum %2.2X should be %2.2X\n",
-                       table->csum, table->csum - checksum);
-               return -1;
-       }
-       return 0;
-}
-
-/*
- * sfi_map_table()
- *
- * Return address of mapped table
- * Check for common case that we can re-use mapping to SYST,
- * which requires syst_pa, syst_va to be initialized.
- */
-static struct sfi_table_header *sfi_map_table(u64 pa)
-{
-       struct sfi_table_header *th;
-       u32 length;
-
-       if (!TABLE_ON_PAGE(syst_pa, pa, sizeof(struct sfi_table_header)))
-               th = sfi_map_memory(pa, sizeof(struct sfi_table_header));
-       else
-               th = (void *)syst_va + (pa - syst_pa);
-
-        /* If table fits on same page as its header, we are done */
-       if (TABLE_ON_PAGE(th, th, th->len))
-               return th;
-
-       /* Entire table does not fit on same page as SYST */
-       length = th->len;
-       if (!TABLE_ON_PAGE(syst_pa, pa, sizeof(struct sfi_table_header)))
-               sfi_unmap_memory(th, sizeof(struct sfi_table_header));
-
-       return sfi_map_memory(pa, length);
-}
-
-/*
- * sfi_unmap_table()
- *
- * Undoes effect of sfi_map_table() by unmapping table
- * if it did not completely fit on same page as SYST.
- */
-static void sfi_unmap_table(struct sfi_table_header *th)
-{
-       if (!TABLE_ON_PAGE(syst_va, th, th->len))
-               sfi_unmap_memory(th, TABLE_ON_PAGE(th, th, th->len) ?
-                                       sizeof(*th) : th->len);
-}
-
-static int sfi_table_check_key(struct sfi_table_header *th,
-                               struct sfi_table_key *key)
-{
-
-       if (strncmp(th->sig, key->sig, SFI_SIGNATURE_SIZE)
-               || (key->oem_id && strncmp(th->oem_id,
-                               key->oem_id, SFI_OEM_ID_SIZE))
-               || (key->oem_table_id && strncmp(th->oem_table_id,
-                               key->oem_table_id, SFI_OEM_TABLE_ID_SIZE)))
-               return -1;
-
-       return 0;
-}
-
-/*
- * This function will be used in 2 cases:
- * 1. used to enumerate and verify the tables addressed by SYST/XSDT,
- *    thus no signature will be given (in kernel boot phase)
- * 2. used to parse one specific table, signature must exist, and
- *    the mapped virt address will be returned, and the virt space
- *    will be released by call sfi_put_table() later
- *
- * This two cases are from two different functions with two different
- * sections and causes section mismatch warning. So use __ref to tell
- * modpost not to make any noise.
- *
- * Return value:
- *     NULL:                   when can't find a table matching the key
- *     ERR_PTR(error):         error value
- *     virt table address:     when a matched table is found
- */
-struct sfi_table_header *
- __ref sfi_check_table(u64 pa, struct sfi_table_key *key)
-{
-       struct sfi_table_header *th;
-       void *ret = NULL;
-
-       th = sfi_map_table(pa);
-       if (!th)
-               return ERR_PTR(-ENOMEM);
-
-       if (!key->sig) {
-               sfi_print_table_header(pa, th);
-               if (sfi_verify_table(th))
-                       ret = ERR_PTR(-EINVAL);
-       } else {
-               if (!sfi_table_check_key(th, key))
-                       return th;      /* Success */
-       }
-
-       sfi_unmap_table(th);
-       return ret;
-}
-
-/*
- * sfi_get_table()
- *
- * Search SYST for the specified table with the signature in
- * the key, and return the mapped table
- */
-struct sfi_table_header *sfi_get_table(struct sfi_table_key *key)
-{
-       struct sfi_table_header *th;
-       u32 tbl_cnt, i;
-
-       tbl_cnt = SFI_GET_NUM_ENTRIES(syst_va, u64);
-       for (i = 0; i < tbl_cnt; i++) {
-               th = sfi_check_table(syst_va->pentry[i], key);
-               if (!IS_ERR(th) && th)
-                       return th;
-       }
-
-       return NULL;
-}
-
-void sfi_put_table(struct sfi_table_header *th)
-{
-       sfi_unmap_table(th);
-}
-
-/* Find table with signature, run handler on it */
-int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id,
-                       sfi_table_handler handler)
-{
-       struct sfi_table_header *table = NULL;
-       struct sfi_table_key key;
-       int ret = -EINVAL;
-
-       if (sfi_disabled || !handler || !signature)
-               goto exit;
-
-       key.sig = signature;
-       key.oem_id = oem_id;
-       key.oem_table_id = oem_table_id;
-
-       table = sfi_get_table(&key);
-       if (!table)
-               goto exit;
-
-       ret = handler(table);
-       sfi_put_table(table);
-exit:
-       return ret;
-}
-EXPORT_SYMBOL_GPL(sfi_table_parse);
-
-/*
- * sfi_parse_syst()
- * Checksum all the tables in SYST and print their headers
- *
- * success: set syst_va, return 0
- */
-static int __init sfi_parse_syst(void)
-{
-       struct sfi_table_key key = SFI_ANY_KEY;
-       int tbl_cnt, i;
-       void *ret;
-
-       syst_va = sfi_map_memory(syst_pa, sizeof(struct sfi_table_simple));
-       if (!syst_va)
-               return -ENOMEM;
-
-       tbl_cnt = SFI_GET_NUM_ENTRIES(syst_va, u64);
-       for (i = 0; i < tbl_cnt; i++) {
-               ret = sfi_check_table(syst_va->pentry[i], &key);
-               if (IS_ERR(ret))
-                       return PTR_ERR(ret);
-       }
-
-       return 0;
-}
-
-/*
- * The OS finds the System Table by searching 16-byte boundaries between
- * physical address 0x000E0000 and 0x000FFFFF. The OS shall search this region
- * starting at the low address and shall stop searching when the 1st valid SFI
- * System Table is found.
- *
- * success: set syst_pa, return 0
- * fail: return -1
- */
-static __init int sfi_find_syst(void)
-{
-       unsigned long offset, len;
-       void *start;
-
-       len = SFI_SYST_SEARCH_END - SFI_SYST_SEARCH_BEGIN;
-       start = sfi_map_memory(SFI_SYST_SEARCH_BEGIN, len);
-       if (!start)
-               return -1;
-
-       for (offset = 0; offset < len; offset += 16) {
-               struct sfi_table_header *syst_hdr;
-
-               syst_hdr = start + offset;
-               if (strncmp(syst_hdr->sig, SFI_SIG_SYST,
-                               SFI_SIGNATURE_SIZE))
-                       continue;
-
-               if (syst_hdr->len > PAGE_SIZE)
-                       continue;
-
-               sfi_print_table_header(SFI_SYST_SEARCH_BEGIN + offset,
-                                       syst_hdr);
-
-               if (sfi_verify_table(syst_hdr))
-                       continue;
-
-               /*
-                * Enforce SFI spec mandate that SYST reside within a page.
-                */
-               if (!ON_SAME_PAGE(syst_pa, syst_pa + syst_hdr->len)) {
-                       pr_info("SYST 0x%llx + 0x%x crosses page\n",
-                                       syst_pa, syst_hdr->len);
-                       continue;
-               }
-
-               /* Success */
-               syst_pa = SFI_SYST_SEARCH_BEGIN + offset;
-               sfi_unmap_memory(start, len);
-               return 0;
-       }
-
-       sfi_unmap_memory(start, len);
-       return -1;
-}
-
-static struct kobject *sfi_kobj;
-static struct kobject *tables_kobj;
-
-static ssize_t sfi_table_show(struct file *filp, struct kobject *kobj,
-                              struct bin_attribute *bin_attr, char *buf,
-                              loff_t offset, size_t count)
-{
-       struct sfi_table_attr *tbl_attr =
-           container_of(bin_attr, struct sfi_table_attr, attr);
-       struct sfi_table_header *th = NULL;
-       struct sfi_table_key key;
-       ssize_t cnt;
-
-       key.sig = tbl_attr->name;
-       key.oem_id = NULL;
-       key.oem_table_id = NULL;
-
-       if (strncmp(SFI_SIG_SYST, tbl_attr->name, SFI_SIGNATURE_SIZE)) {
-               th = sfi_get_table(&key);
-               if (!th)
-                       return 0;
-
-               cnt =  memory_read_from_buffer(buf, count, &offset,
-                                               th, th->len);
-               sfi_put_table(th);
-       } else
-               cnt =  memory_read_from_buffer(buf, count, &offset,
-                                       syst_va, syst_va->header.len);
-
-       return cnt;
-}
-
-struct sfi_table_attr __init *sfi_sysfs_install_table(u64 pa)
-{
-       struct sfi_table_attr *tbl_attr;
-       struct sfi_table_header *th;
-       int ret;
-
-       tbl_attr = kzalloc(sizeof(struct sfi_table_attr), GFP_KERNEL);
-       if (!tbl_attr)
-               return NULL;
-
-       th = sfi_map_table(pa);
-       if (!th || !th->sig[0]) {
-               kfree(tbl_attr);
-               return NULL;
-       }
-
-       sysfs_attr_init(&tbl_attr->attr.attr);
-       memcpy(tbl_attr->name, th->sig, SFI_SIGNATURE_SIZE);
-
-       tbl_attr->attr.size = 0;
-       tbl_attr->attr.read = sfi_table_show;
-       tbl_attr->attr.attr.name = tbl_attr->name;
-       tbl_attr->attr.attr.mode = 0400;
-
-       ret = sysfs_create_bin_file(tables_kobj,
-                                 &tbl_attr->attr);
-       if (ret) {
-               kfree(tbl_attr);
-               tbl_attr = NULL;
-       }
-
-       sfi_unmap_table(th);
-       return tbl_attr;
-}
-
-static int __init sfi_sysfs_init(void)
-{
-       int tbl_cnt, i;
-
-       if (sfi_disabled)
-               return 0;
-
-       sfi_kobj = kobject_create_and_add("sfi", firmware_kobj);
-       if (!sfi_kobj)
-               return 0;
-
-       tables_kobj = kobject_create_and_add("tables", sfi_kobj);
-       if (!tables_kobj) {
-               kobject_put(sfi_kobj);
-               return 0;
-       }
-
-       sfi_sysfs_install_table(syst_pa);
-
-       tbl_cnt = SFI_GET_NUM_ENTRIES(syst_va, u64);
-
-       for (i = 0; i < tbl_cnt; i++)
-               sfi_sysfs_install_table(syst_va->pentry[i]);
-
-       sfi_acpi_sysfs_init();
-       kobject_uevent(sfi_kobj, KOBJ_ADD);
-       kobject_uevent(tables_kobj, KOBJ_ADD);
-       pr_info("SFI sysfs interfaces init success\n");
-       return 0;
-}
-
-void __init sfi_init(void)
-{
-       if (!acpi_disabled)
-               disable_sfi();
-
-       if (sfi_disabled)
-               return;
-
-       pr_info("Simple Firmware Interface v0.81 http://simplefirmware.org\n");
-
-       if (sfi_find_syst() || sfi_parse_syst() || sfi_platform_init())
-               disable_sfi();
-
-       return;
-}
-
-void __init sfi_init_late(void)
-{
-       int length;
-
-       if (sfi_disabled)
-               return;
-
-       length = syst_va->header.len;
-       sfi_unmap_memory(syst_va, sizeof(struct sfi_table_simple));
-
-       /* Use memremap now after it is ready */
-       sfi_use_memremap = 1;
-       syst_va = sfi_map_memory(syst_pa, length);
-
-       sfi_acpi_init();
-}
-
-/*
- * The reason we put it here because we need wait till the /sys/firmware
- * is setup, then our interface can be registered in /sys/firmware/sfi
- */
-core_initcall(sfi_sysfs_init);
diff --git a/drivers/sfi/sfi_core.h b/drivers/sfi/sfi_core.h
deleted file mode 100644 (file)
index 1d5cfe8..0000000
+++ /dev/null
@@ -1,81 +0,0 @@
-/* sfi_core.h Simple Firmware Interface, internal header */
-
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  BSD LICENSE
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#include <linux/sysfs.h>
-
-struct sfi_table_key{
-       char    *sig;
-       char    *oem_id;
-       char    *oem_table_id;
-};
-
-/* sysfs interface */
-struct sfi_table_attr {
-       struct bin_attribute attr;
-       char name[8];
-};
-
-#define SFI_ANY_KEY { .sig = NULL, .oem_id = NULL, .oem_table_id = NULL }
-
-extern int __init sfi_acpi_init(void);
-extern  struct sfi_table_header *sfi_check_table(u64 paddr,
-                                       struct sfi_table_key *key);
-struct sfi_table_header *sfi_get_table(struct sfi_table_key *key);
-extern void sfi_put_table(struct sfi_table_header *table);
-extern struct sfi_table_attr __init *sfi_sysfs_install_table(u64 pa);
-extern void __init sfi_acpi_sysfs_init(void);
index e5d7fb8..bd0fbcd 100644 (file)
@@ -30,7 +30,6 @@
 MODULE_AUTHOR("Adrian McMenamin <adrian@mcmen.demon.co.uk>");
 MODULE_DESCRIPTION("Maple bus driver for Dreamcast");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{SEGA, Dreamcast/Maple}}");
 
 static void maple_dma_handler(struct work_struct *work);
 static void maple_vblank_handler(struct work_struct *work);
index f357c6c..e8a30c4 100644 (file)
@@ -6,6 +6,7 @@ source "drivers/soc/amlogic/Kconfig"
 source "drivers/soc/aspeed/Kconfig"
 source "drivers/soc/atmel/Kconfig"
 source "drivers/soc/bcm/Kconfig"
+source "drivers/soc/canaan/Kconfig"
 source "drivers/soc/fsl/Kconfig"
 source "drivers/soc/imx/Kconfig"
 source "drivers/soc/ixp4xx/Kconfig"
@@ -22,6 +23,5 @@ source "drivers/soc/ti/Kconfig"
 source "drivers/soc/ux500/Kconfig"
 source "drivers/soc/versatile/Kconfig"
 source "drivers/soc/xilinx/Kconfig"
-source "drivers/soc/kendryte/Kconfig"
 
 endmenu
index 9bceb12..f678e4d 100644 (file)
@@ -7,6 +7,7 @@ obj-$(CONFIG_ARCH_ACTIONS)      += actions/
 obj-y                          += aspeed/
 obj-$(CONFIG_ARCH_AT91)                += atmel/
 obj-y                          += bcm/
+obj-$(CONFIG_SOC_CANAAN)       += canaan/
 obj-$(CONFIG_ARCH_DOVE)                += dove/
 obj-$(CONFIG_MACH_DOVE)                += dove/
 obj-y                          += fsl/
@@ -28,4 +29,3 @@ obj-y                         += ti/
 obj-$(CONFIG_ARCH_U8500)       += ux500/
 obj-$(CONFIG_PLAT_VERSATILE)   += versatile/
 obj-y                          += xilinx/
-obj-$(CONFIG_SOC_KENDRYTE)     += kendryte/
diff --git a/drivers/soc/canaan/Kconfig b/drivers/soc/canaan/Kconfig
new file mode 100644 (file)
index 0000000..8179b69
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+config SOC_K210_SYSCTL
+       bool "Canaan Kendryte K210 SoC system controller"
+       depends on RISCV && SOC_CANAAN && OF
+       default SOC_CANAAN
+        select PM
+        select SIMPLE_PM_BUS
+        select SYSCON
+        select MFD_SYSCON
+       help
+         Canaan Kendryte K210 SoC system controller driver.
diff --git a/drivers/soc/canaan/Makefile b/drivers/soc/canaan/Makefile
new file mode 100644 (file)
index 0000000..570280a
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_SOC_K210_SYSCTL)  += k210-sysctl.o
diff --git a/drivers/soc/canaan/k210-sysctl.c b/drivers/soc/canaan/k210-sysctl.c
new file mode 100644 (file)
index 0000000..27a346c
--- /dev/null
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright (c) 2019 Christoph Hellwig.
+ * Copyright (c) 2019 Western Digital Corporation or its affiliates.
+ */
+#include <linux/io.h>
+#include <linux/platform_device.h>
+#include <linux/of_platform.h>
+#include <linux/clk.h>
+#include <asm/soc.h>
+
+#include <soc/canaan/k210-sysctl.h>
+
+static int k210_sysctl_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       struct clk *pclk;
+       int ret;
+
+       dev_info(dev, "K210 system controller\n");
+
+       /* Get power bus clock */
+       pclk = devm_clk_get(dev, NULL);
+       if (IS_ERR(pclk))
+               return dev_err_probe(dev, PTR_ERR(pclk),
+                                    "Get bus clock failed\n");
+
+       ret = clk_prepare_enable(pclk);
+       if (ret) {
+               dev_err(dev, "Enable bus clock failed\n");
+               return ret;
+       }
+
+       /* Populate children */
+       ret = devm_of_platform_populate(dev);
+       if (ret)
+               dev_err(dev, "Populate platform failed %d\n", ret);
+
+       return ret;
+}
+
+static const struct of_device_id k210_sysctl_of_match[] = {
+       { .compatible = "canaan,k210-sysctl", },
+       { /* sentinel */ },
+};
+
+static struct platform_driver k210_sysctl_driver = {
+       .driver = {
+               .name           = "k210-sysctl",
+               .of_match_table = k210_sysctl_of_match,
+       },
+       .probe                  = k210_sysctl_probe,
+};
+builtin_platform_driver(k210_sysctl_driver);
+
+/*
+ * System controller registers base address and size.
+ */
+#define K210_SYSCTL_BASE_ADDR  0x50440000ULL
+#define K210_SYSCTL_BASE_SIZE  0x1000
+
+/*
+ * This needs to be called very early during initialization, given that
+ * PLL1 needs to be enabled to be able to use all SRAM.
+ */
+static void __init k210_soc_early_init(const void *fdt)
+{
+       void __iomem *sysctl_base;
+
+       sysctl_base = ioremap(K210_SYSCTL_BASE_ADDR, K210_SYSCTL_BASE_SIZE);
+       if (!sysctl_base)
+               panic("k210-sysctl: ioremap failed");
+
+       k210_clk_early_init(sysctl_base);
+
+       iounmap(sysctl_base);
+}
+SOC_EARLY_INIT_DECLARE(k210_soc, "canaan,kendryte-k210", k210_soc_early_init);
index a1b9be1..fde4edd 100644 (file)
@@ -186,7 +186,7 @@ struct qm_eqcr_entry {
        __be32 tag;
        struct qm_fd fd;
        u8 __reserved3[32];
-} __packed;
+} __packed __aligned(8);
 #define QM_EQCR_VERB_VBIT              0x80
 #define QM_EQCR_VERB_CMD_MASK          0x61    /* but only one value; */
 #define QM_EQCR_VERB_CMD_ENQUEUE       0x01
diff --git a/drivers/soc/kendryte/Kconfig b/drivers/soc/kendryte/Kconfig
deleted file mode 100644 (file)
index 49785b1..0000000
+++ /dev/null
@@ -1,14 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-if SOC_KENDRYTE
-
-config K210_SYSCTL
-       bool "Kendryte K210 system controller"
-       default y
-       depends on RISCV
-       help
-         Enables controlling the K210 various clocks and to enable
-         general purpose use of the extra 2MB of SRAM normally
-         reserved for the AI engine.
-
-endif
diff --git a/drivers/soc/kendryte/Makefile b/drivers/soc/kendryte/Makefile
deleted file mode 100644 (file)
index 002d9ce..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-# SPDX-License-Identifier: GPL-2.0
-
-obj-$(CONFIG_K210_SYSCTL)      += k210-sysctl.o
diff --git a/drivers/soc/kendryte/k210-sysctl.c b/drivers/soc/kendryte/k210-sysctl.c
deleted file mode 100644 (file)
index 7070192..0000000
+++ /dev/null
@@ -1,260 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- * Copyright (c) 2019 Christoph Hellwig.
- * Copyright (c) 2019 Western Digital Corporation or its affiliates.
- */
-#include <linux/types.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/platform_device.h>
-#include <linux/clk-provider.h>
-#include <linux/clkdev.h>
-#include <linux/bitfield.h>
-#include <asm/soc.h>
-
-#define K210_SYSCTL_CLK0_FREQ          26000000UL
-
-/* Registers base address */
-#define K210_SYSCTL_SYSCTL_BASE_ADDR   0x50440000ULL
-
-/* Registers */
-#define K210_SYSCTL_PLL0               0x08
-#define K210_SYSCTL_PLL1               0x0c
-/* clkr: 4bits, clkf1: 6bits, clkod: 4bits, bwadj: 4bits */
-#define   PLL_RESET            (1 << 20)
-#define   PLL_PWR              (1 << 21)
-#define   PLL_INTFB            (1 << 22)
-#define   PLL_BYPASS           (1 << 23)
-#define   PLL_TEST             (1 << 24)
-#define   PLL_OUT_EN           (1 << 25)
-#define   PLL_TEST_EN          (1 << 26)
-#define K210_SYSCTL_PLL_LOCK           0x18
-#define   PLL0_LOCK1           (1 << 0)
-#define   PLL0_LOCK2           (1 << 1)
-#define   PLL0_SLIP_CLEAR      (1 << 2)
-#define   PLL0_TEST_CLK_OUT    (1 << 3)
-#define   PLL1_LOCK1           (1 << 8)
-#define   PLL1_LOCK2           (1 << 9)
-#define   PLL1_SLIP_CLEAR      (1 << 10)
-#define   PLL1_TEST_CLK_OUT    (1 << 11)
-#define   PLL2_LOCK1           (1 << 16)
-#define   PLL2_LOCK2           (1 << 16)
-#define   PLL2_SLIP_CLEAR      (1 << 18)
-#define   PLL2_TEST_CLK_OUT    (1 << 19)
-#define K210_SYSCTL_CLKSEL0    0x20
-#define   CLKSEL_ACLK          (1 << 0)
-#define K210_SYSCTL_CLKEN_CENT         0x28
-#define   CLKEN_CPU            (1 << 0)
-#define   CLKEN_SRAM0          (1 << 1)
-#define   CLKEN_SRAM1          (1 << 2)
-#define   CLKEN_APB0           (1 << 3)
-#define   CLKEN_APB1           (1 << 4)
-#define   CLKEN_APB2           (1 << 5)
-#define K210_SYSCTL_CLKEN_PERI         0x2c
-#define   CLKEN_ROM            (1 << 0)
-#define   CLKEN_DMA            (1 << 1)
-#define   CLKEN_AI             (1 << 2)
-#define   CLKEN_DVP            (1 << 3)
-#define   CLKEN_FFT            (1 << 4)
-#define   CLKEN_GPIO           (1 << 5)
-#define   CLKEN_SPI0           (1 << 6)
-#define   CLKEN_SPI1           (1 << 7)
-#define   CLKEN_SPI2           (1 << 8)
-#define   CLKEN_SPI3           (1 << 9)
-#define   CLKEN_I2S0           (1 << 10)
-#define   CLKEN_I2S1           (1 << 11)
-#define   CLKEN_I2S2           (1 << 12)
-#define   CLKEN_I2C0           (1 << 13)
-#define   CLKEN_I2C1           (1 << 14)
-#define   CLKEN_I2C2           (1 << 15)
-#define   CLKEN_UART1          (1 << 16)
-#define   CLKEN_UART2          (1 << 17)
-#define   CLKEN_UART3          (1 << 18)
-#define   CLKEN_AES            (1 << 19)
-#define   CLKEN_FPIO           (1 << 20)
-#define   CLKEN_TIMER0         (1 << 21)
-#define   CLKEN_TIMER1         (1 << 22)
-#define   CLKEN_TIMER2         (1 << 23)
-#define   CLKEN_WDT0           (1 << 24)
-#define   CLKEN_WDT1           (1 << 25)
-#define   CLKEN_SHA            (1 << 26)
-#define   CLKEN_OTP            (1 << 27)
-#define   CLKEN_RTC            (1 << 29)
-
-struct k210_sysctl {
-       void __iomem            *regs;
-       struct clk_hw           hw;
-};
-
-static void k210_set_bits(u32 val, void __iomem *reg)
-{
-       writel(readl(reg) | val, reg);
-}
-
-static void k210_clear_bits(u32 val, void __iomem *reg)
-{
-       writel(readl(reg) & ~val, reg);
-}
-
-static void k210_pll1_enable(void __iomem *regs)
-{
-       u32 val;
-
-       val = readl(regs + K210_SYSCTL_PLL1);
-       val &= ~GENMASK(19, 0);                         /* clkr1 = 0 */
-       val |= FIELD_PREP(GENMASK(9, 4), 0x3B);         /* clkf1 = 59 */
-       val |= FIELD_PREP(GENMASK(13, 10), 0x3);        /* clkod1 = 3 */
-       val |= FIELD_PREP(GENMASK(19, 14), 0x3B);       /* bwadj1 = 59 */
-       writel(val, regs + K210_SYSCTL_PLL1);
-
-       k210_clear_bits(PLL_BYPASS, regs + K210_SYSCTL_PLL1);
-       k210_set_bits(PLL_PWR, regs + K210_SYSCTL_PLL1);
-
-       /*
-        * Reset the pll. The magic NOPs come from the Kendryte reference SDK.
-        */
-       k210_clear_bits(PLL_RESET, regs + K210_SYSCTL_PLL1);
-       k210_set_bits(PLL_RESET, regs + K210_SYSCTL_PLL1);
-       nop();
-       nop();
-       k210_clear_bits(PLL_RESET, regs + K210_SYSCTL_PLL1);
-
-       for (;;) {
-               val = readl(regs + K210_SYSCTL_PLL_LOCK);
-               if (val & PLL1_LOCK2)
-                       break;
-               writel(val | PLL1_SLIP_CLEAR, regs + K210_SYSCTL_PLL_LOCK);
-       }
-
-       k210_set_bits(PLL_OUT_EN, regs + K210_SYSCTL_PLL1);
-}
-
-static unsigned long k210_sysctl_clk_recalc_rate(struct clk_hw *hw,
-               unsigned long parent_rate)
-{
-       struct k210_sysctl *s = container_of(hw, struct k210_sysctl, hw);
-       u32 clksel0, pll0;
-       u64 pll0_freq, clkr0, clkf0, clkod0;
-
-       /*
-        * If the clock selector is not set, use the base frequency.
-        * Otherwise, use PLL0 frequency with a frequency divisor.
-        */
-       clksel0 = readl(s->regs + K210_SYSCTL_CLKSEL0);
-       if (!(clksel0 & CLKSEL_ACLK))
-               return K210_SYSCTL_CLK0_FREQ;
-
-       /*
-        * Get PLL0 frequency:
-        * freq = base frequency * clkf0 / (clkr0 * clkod0)
-        */
-       pll0 = readl(s->regs + K210_SYSCTL_PLL0);
-       clkr0 = 1 + FIELD_GET(GENMASK(3, 0), pll0);
-       clkf0 = 1 + FIELD_GET(GENMASK(9, 4), pll0);
-       clkod0 = 1 + FIELD_GET(GENMASK(13, 10), pll0);
-       pll0_freq = clkf0 * K210_SYSCTL_CLK0_FREQ / (clkr0 * clkod0);
-
-       /* Get the frequency divisor from the clock selector */
-       return pll0_freq / (2ULL << FIELD_GET(0x00000006, clksel0));
-}
-
-static const struct clk_ops k210_sysctl_clk_ops = {
-       .recalc_rate    = k210_sysctl_clk_recalc_rate,
-};
-
-static const struct clk_init_data k210_clk_init_data = {
-       .name           = "k210-sysctl-pll1",
-       .ops            = &k210_sysctl_clk_ops,
-};
-
-static int k210_sysctl_probe(struct platform_device *pdev)
-{
-       struct k210_sysctl *s;
-       int error;
-
-       pr_info("Kendryte K210 SoC sysctl\n");
-
-       s = devm_kzalloc(&pdev->dev, sizeof(*s), GFP_KERNEL);
-       if (!s)
-               return -ENOMEM;
-
-       s->regs = devm_ioremap_resource(&pdev->dev,
-                       platform_get_resource(pdev, IORESOURCE_MEM, 0));
-       if (IS_ERR(s->regs))
-               return PTR_ERR(s->regs);
-
-       s->hw.init = &k210_clk_init_data;
-       error = devm_clk_hw_register(&pdev->dev, &s->hw);
-       if (error) {
-               dev_err(&pdev->dev, "failed to register clk");
-               return error;
-       }
-
-       error = devm_of_clk_add_hw_provider(&pdev->dev, of_clk_hw_simple_get,
-                                           &s->hw);
-       if (error) {
-               dev_err(&pdev->dev, "adding clk provider failed\n");
-               return error;
-       }
-
-       return 0;
-}
-
-static const struct of_device_id k210_sysctl_of_match[] = {
-       { .compatible = "kendryte,k210-sysctl", },
-       {}
-};
-
-static struct platform_driver k210_sysctl_driver = {
-       .driver = {
-               .name           = "k210-sysctl",
-               .of_match_table = k210_sysctl_of_match,
-       },
-       .probe                  = k210_sysctl_probe,
-};
-
-static int __init k210_sysctl_init(void)
-{
-       return platform_driver_register(&k210_sysctl_driver);
-}
-core_initcall(k210_sysctl_init);
-
-/*
- * This needs to be called very early during initialization, given that
- * PLL1 needs to be enabled to be able to use all SRAM.
- */
-static void __init k210_soc_early_init(const void *fdt)
-{
-       void __iomem *regs;
-
-       regs = ioremap(K210_SYSCTL_SYSCTL_BASE_ADDR, 0x1000);
-       if (!regs)
-               panic("K210 sysctl ioremap");
-
-       /* Enable PLL1 to make the KPU SRAM useable */
-       k210_pll1_enable(regs);
-
-       k210_set_bits(PLL_OUT_EN, regs + K210_SYSCTL_PLL0);
-
-       k210_set_bits(CLKEN_CPU | CLKEN_SRAM0 | CLKEN_SRAM1,
-                     regs + K210_SYSCTL_CLKEN_CENT);
-       k210_set_bits(CLKEN_ROM | CLKEN_TIMER0 | CLKEN_RTC,
-                     regs + K210_SYSCTL_CLKEN_PERI);
-
-       k210_set_bits(CLKSEL_ACLK, regs + K210_SYSCTL_CLKSEL0);
-
-       iounmap(regs);
-}
-SOC_EARLY_INIT_DECLARE(generic_k210, "kendryte,k210", k210_soc_early_init);
-
-#ifdef CONFIG_SOC_KENDRYTE_K210_DTB_BUILTIN
-/*
- * Generic entry for the default k210.dtb embedded DTB for boards with:
- *   - Vendor ID: 0x4B5
- *   - Arch ID: 0xE59889E6A5A04149 (= "Canaan AI" in UTF-8 encoded Chinese)
- *   - Impl ID:        0x4D41495832303030 (= "MAIX2000")
- * These values are reported by the SiPEED MAXDUINO, SiPEED MAIX GO and
- * SiPEED Dan dock boards.
- */
-SOC_BUILTIN_DTB_DECLARE(k210, 0x4B5, 0xE59889E6A5A04149, 0x4D41495832303030);
-#endif
index 7a7c382..e7011d6 100644 (file)
@@ -12,9 +12,21 @@ config LITEX_SOC_CONTROLLER
        select LITEX
        help
          This option enables the SoC Controller Driver which verifies
-         LiteX CSR access and provides common litex_get_reg/litex_set_reg
+         LiteX CSR access and provides common litex_[read|write]*
          accessors.
          All drivers that use functions from litex.h must depend on
          LITEX.
 
+config LITEX_SUBREG_SIZE
+       int "Size of a LiteX CSR subregister, in bytes"
+       depends on LITEX
+       range 1 4
+       default 4
+       help
+       LiteX MMIO registers (referred to as Configuration and Status
+       registers, or CSRs) are spread across adjacent 8- or 32-bit
+       subregisters, located at 32-bit aligned MMIO addresses. Use
+       this to select the appropriate size (1 or 4 bytes) matching
+       your particular LiteX build.
+
 endmenu
index 9b07663..c3e379a 100644 (file)
 #include <linux/platform_device.h>
 #include <linux/printk.h>
 #include <linux/module.h>
-#include <linux/errno.h>
 #include <linux/io.h>
+#include <linux/reboot.h>
 
-/*
- * LiteX SoC Generator, depending on the configuration, can split a single
- * logical CSR (Control&Status Register) into a series of consecutive physical
- * registers.
- *
- * For example, in the configuration with 8-bit CSR Bus, 32-bit aligned (the
- * default one for 32-bit CPUs) a 32-bit logical CSR will be generated as four
- * 32-bit physical registers, each one containing one byte of meaningful data.
- *
- * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus
- *
- * The purpose of `litex_set_reg`/`litex_get_reg` is to implement the logic
- * of writing to/reading from the LiteX CSR in a single place that can be
- * then reused by all LiteX drivers.
- */
-
-/**
- * litex_set_reg() - Writes the value to the LiteX CSR (Control&Status Register)
- * @reg: Address of the CSR
- * @reg_size: The width of the CSR expressed in the number of bytes
- * @val: Value to be written to the CSR
- *
- * In the currently supported LiteX configuration (8-bit CSR Bus, 32-bit aligned),
- * a 32-bit LiteX CSR is generated as 4 consecutive 32-bit physical registers,
- * each one containing one byte of meaningful data.
- *
- * This function splits a single possibly multi-byte write into a series of
- * single-byte writes with a proper offset.
- */
-void litex_set_reg(void __iomem *reg, unsigned long reg_size,
-                   unsigned long val)
-{
-       unsigned long shifted_data, shift, i;
-
-       for (i = 0; i < reg_size; ++i) {
-               shift = ((reg_size - i - 1) * LITEX_SUBREG_SIZE_BIT);
-               shifted_data = val >> shift;
-
-               WRITE_LITEX_SUBREGISTER(shifted_data, reg, i);
-       }
-}
-EXPORT_SYMBOL_GPL(litex_set_reg);
-
-/**
- * litex_get_reg() - Reads the value of the LiteX CSR (Control&Status Register)
- * @reg: Address of the CSR
- * @reg_size: The width of the CSR expressed in the number of bytes
- *
- * Return: Value read from the CSR
- *
- * In the currently supported LiteX configuration (8-bit CSR Bus, 32-bit aligned),
- * a 32-bit LiteX CSR is generated as 4 consecutive 32-bit physical registers,
- * each one containing one byte of meaningful data.
- *
- * This function generates a series of single-byte reads with a proper offset
- * and joins their results into a single multi-byte value.
- */
-unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_size)
-{
-       unsigned long shifted_data, shift, i;
-       unsigned long result = 0;
-
-       for (i = 0; i < reg_size; ++i) {
-               shifted_data = READ_LITEX_SUBREGISTER(reg, i);
-
-               shift = ((reg_size - i - 1) * LITEX_SUBREG_SIZE_BIT);
-               result |= (shifted_data << shift);
-       }
-
-       return result;
-}
-EXPORT_SYMBOL_GPL(litex_get_reg);
+/* reset register located at the base address */
+#define RESET_REG_OFF           0x00
+#define RESET_REG_VALUE         0x00000001
 
 #define SCRATCH_REG_OFF         0x04
 #define SCRATCH_REG_VALUE       0x12345678
@@ -131,15 +62,27 @@ static int litex_check_csr_access(void __iomem *reg_addr)
        /* restore original value of the SCRATCH register */
        litex_write32(reg_addr + SCRATCH_REG_OFF, SCRATCH_REG_VALUE);
 
-       pr_info("LiteX SoC Controller driver initialized");
+       pr_info("LiteX SoC Controller driver initialized: subreg:%d, align:%d",
+               LITEX_SUBREG_SIZE, LITEX_SUBREG_ALIGN);
 
        return 0;
 }
 
 struct litex_soc_ctrl_device {
        void __iomem *base;
+       struct notifier_block reset_nb;
 };
 
+static int litex_reset_handler(struct notifier_block *this, unsigned long mode,
+                              void *cmd)
+{
+       struct litex_soc_ctrl_device *soc_ctrl_dev =
+               container_of(this, struct litex_soc_ctrl_device, reset_nb);
+
+       litex_write32(soc_ctrl_dev->base + RESET_REG_OFF, RESET_REG_VALUE);
+       return NOTIFY_DONE;
+}
+
 #ifdef CONFIG_OF
 static const struct of_device_id litex_soc_ctrl_of_match[] = {
        {.compatible = "litex,soc-controller"},
@@ -151,6 +94,7 @@ MODULE_DEVICE_TABLE(of, litex_soc_ctrl_of_match);
 static int litex_soc_ctrl_probe(struct platform_device *pdev)
 {
        struct litex_soc_ctrl_device *soc_ctrl_dev;
+       int error;
 
        soc_ctrl_dev = devm_kzalloc(&pdev->dev, sizeof(*soc_ctrl_dev), GFP_KERNEL);
        if (!soc_ctrl_dev)
@@ -160,7 +104,29 @@ static int litex_soc_ctrl_probe(struct platform_device *pdev)
        if (IS_ERR(soc_ctrl_dev->base))
                return PTR_ERR(soc_ctrl_dev->base);
 
-       return litex_check_csr_access(soc_ctrl_dev->base);
+       error = litex_check_csr_access(soc_ctrl_dev->base);
+       if (error)
+               return error;
+
+       platform_set_drvdata(pdev, soc_ctrl_dev);
+
+       soc_ctrl_dev->reset_nb.notifier_call = litex_reset_handler;
+       soc_ctrl_dev->reset_nb.priority = 128;
+       error = register_restart_handler(&soc_ctrl_dev->reset_nb);
+       if (error) {
+               dev_warn(&pdev->dev, "cannot register restart handler: %d\n",
+                        error);
+       }
+
+       return 0;
+}
+
+static int litex_soc_ctrl_remove(struct platform_device *pdev)
+{
+       struct litex_soc_ctrl_device *soc_ctrl_dev = platform_get_drvdata(pdev);
+
+       unregister_restart_handler(&soc_ctrl_dev->reset_nb);
+       return 0;
 }
 
 static struct platform_driver litex_soc_ctrl_driver = {
@@ -169,6 +135,7 @@ static struct platform_driver litex_soc_ctrl_driver = {
                .of_match_table = of_match_ptr(litex_soc_ctrl_of_match)
        },
        .probe = litex_soc_ctrl_probe,
+       .remove = litex_soc_ctrl_remove,
 };
 
 module_platform_driver(litex_soc_ctrl_driver);
index f42954e..1fd29f9 100644 (file)
@@ -3,7 +3,6 @@
 
 #include <linux/acpi.h>
 #include <linux/clk.h>
-#include <linux/console.h>
 #include <linux/slab.h>
 #include <linux/dma-mapping.h>
 #include <linux/io.h>
@@ -92,14 +91,11 @@ struct geni_wrapper {
        struct device *dev;
        void __iomem *base;
        struct clk_bulk_data ahb_clks[NUM_AHB_CLKS];
-       struct geni_icc_path to_core;
 };
 
 static const char * const icc_path_names[] = {"qup-core", "qup-config",
                                                "qup-memory"};
 
-static struct geni_wrapper *earlycon_wrapper;
-
 #define QUP_HW_VER_REG                 0x4
 
 /* Common SE registers */
@@ -843,44 +839,11 @@ int geni_icc_disable(struct geni_se *se)
 }
 EXPORT_SYMBOL(geni_icc_disable);
 
-void geni_remove_earlycon_icc_vote(void)
-{
-       struct platform_device *pdev;
-       struct geni_wrapper *wrapper;
-       struct device_node *parent;
-       struct device_node *child;
-
-       if (!earlycon_wrapper)
-               return;
-
-       wrapper = earlycon_wrapper;
-       parent = of_get_next_parent(wrapper->dev->of_node);
-       for_each_child_of_node(parent, child) {
-               if (!of_device_is_compatible(child, "qcom,geni-se-qup"))
-                       continue;
-
-               pdev = of_find_device_by_node(child);
-               if (!pdev)
-                       continue;
-
-               wrapper = platform_get_drvdata(pdev);
-               icc_put(wrapper->to_core.path);
-               wrapper->to_core.path = NULL;
-
-       }
-       of_node_put(parent);
-
-       earlycon_wrapper = NULL;
-}
-EXPORT_SYMBOL(geni_remove_earlycon_icc_vote);
-
 static int geni_se_probe(struct platform_device *pdev)
 {
        struct device *dev = &pdev->dev;
        struct resource *res;
        struct geni_wrapper *wrapper;
-       struct console __maybe_unused *bcon;
-       bool __maybe_unused has_earlycon = false;
        int ret;
 
        wrapper = devm_kzalloc(dev, sizeof(*wrapper), GFP_KERNEL);
@@ -903,43 +866,6 @@ static int geni_se_probe(struct platform_device *pdev)
                }
        }
 
-#ifdef CONFIG_SERIAL_EARLYCON
-       for_each_console(bcon) {
-               if (!strcmp(bcon->name, "qcom_geni")) {
-                       has_earlycon = true;
-                       break;
-               }
-       }
-       if (!has_earlycon)
-               goto exit;
-
-       wrapper->to_core.path = devm_of_icc_get(dev, "qup-core");
-       if (IS_ERR(wrapper->to_core.path))
-               return PTR_ERR(wrapper->to_core.path);
-       /*
-        * Put minmal BW request on core clocks on behalf of early console.
-        * The vote will be removed earlycon exit function.
-        *
-        * Note: We are putting vote on each QUP wrapper instead only to which
-        * earlycon is connected because QUP core clock of different wrapper
-        * share same voltage domain. If core1 is put to 0, then core2 will
-        * also run at 0, if not voted. Default ICC vote will be removed ASA
-        * we touch any of the core clock.
-        * core1 = core2 = max(core1, core2)
-        */
-       ret = icc_set_bw(wrapper->to_core.path, GENI_DEFAULT_BW,
-                               GENI_DEFAULT_BW);
-       if (ret) {
-               dev_err(&pdev->dev, "%s: ICC BW voting failed for core: %d\n",
-                       __func__, ret);
-               return ret;
-       }
-
-       if (of_get_compatible_child(pdev->dev.of_node, "qcom,geni-debug-uart"))
-               earlycon_wrapper = wrapper;
-       of_node_put(pdev->dev.of_node);
-exit:
-#endif
        dev_set_drvdata(dev, wrapper);
        dev_dbg(dev, "GENI SE Driver probed\n");
        return devm_of_platform_populate(dev);
index 44d7e19..59640a1 100644 (file)
 #define SIFIVE_L2_DIRECCFIX_HIGH 0x104
 #define SIFIVE_L2_DIRECCFIX_COUNT 0x108
 
+#define SIFIVE_L2_DIRECCFAIL_LOW 0x120
+#define SIFIVE_L2_DIRECCFAIL_HIGH 0x124
+#define SIFIVE_L2_DIRECCFAIL_COUNT 0x128
+
 #define SIFIVE_L2_DATECCFIX_LOW 0x140
 #define SIFIVE_L2_DATECCFIX_HIGH 0x144
 #define SIFIVE_L2_DATECCFIX_COUNT 0x148
@@ -29,7 +33,7 @@
 #define SIFIVE_L2_WAYENABLE 0x08
 #define SIFIVE_L2_ECCINJECTERR 0x40
 
-#define SIFIVE_L2_MAX_ECCINTR 3
+#define SIFIVE_L2_MAX_ECCINTR 4
 
 static void __iomem *l2_base;
 static int g_irq[SIFIVE_L2_MAX_ECCINTR];
@@ -39,6 +43,7 @@ enum {
        DIR_CORR = 0,
        DATA_CORR,
        DATA_UNCORR,
+       DIR_UNCORR,
 };
 
 #ifdef CONFIG_DEBUG_FS
@@ -93,6 +98,7 @@ static void l2_config_read(void)
 
 static const struct of_device_id sifive_l2_ids[] = {
        { .compatible = "sifive,fu540-c000-ccache" },
+       { .compatible = "sifive,fu740-c000-ccache" },
        { /* end of table */ },
 };
 
@@ -155,6 +161,15 @@ static irqreturn_t l2_int_handler(int irq, void *device)
                atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_CE,
                                           "DirECCFix");
        }
+       if (irq == g_irq[DIR_UNCORR]) {
+               add_h = readl(l2_base + SIFIVE_L2_DIRECCFAIL_HIGH);
+               add_l = readl(l2_base + SIFIVE_L2_DIRECCFAIL_LOW);
+               /* Reading this register clears the DirFail interrupt sig */
+               readl(l2_base + SIFIVE_L2_DIRECCFAIL_COUNT);
+               atomic_notifier_call_chain(&l2_err_chain, SIFIVE_L2_ERR_TYPE_UE,
+                                          "DirECCFail");
+               panic("L2CACHE: DirFail @ 0x%08X.%08X\n", add_h, add_l);
+       }
        if (irq == g_irq[DATA_CORR]) {
                add_h = readl(l2_base + SIFIVE_L2_DATECCFIX_HIGH);
                add_l = readl(l2_base + SIFIVE_L2_DATECCFIX_LOW);
@@ -181,7 +196,7 @@ static int __init sifive_l2_init(void)
 {
        struct device_node *np;
        struct resource res;
-       int i, rc;
+       int i, rc, intr_num;
 
        np = of_find_matching_node(NULL, sifive_l2_ids);
        if (!np)
@@ -194,7 +209,13 @@ static int __init sifive_l2_init(void)
        if (!l2_base)
                return -ENOMEM;
 
-       for (i = 0; i < SIFIVE_L2_MAX_ECCINTR; i++) {
+       intr_num = of_property_count_u32_elems(np, "interrupts");
+       if (!intr_num) {
+               pr_err("L2CACHE: no interrupts property\n");
+               return -ENODEV;
+       }
+
+       for (i = 0; i < intr_num; i++) {
                g_irq[i] = irq_of_parse_and_map(np, i);
                rc = request_irq(g_irq[i], l2_int_handler, 0, "l2_ecc", NULL);
                if (rc) {
index bf1468e..51143a6 100644 (file)
@@ -332,7 +332,7 @@ static const struct omap_prm_data dra7_prm_data[] = {
        {
                .name = "l3init", .base = 0x4ae07300,
                .pwrstctrl = 0x0, .pwrstst = 0x4, .dmap = &omap_prm_alwon,
-               .rstctrl = 0x10, .rstst = 0x14, .rstmap = rst_map_012,
+               .rstctrl = 0x10, .rstst = 0x14, .rstmap = rst_map_01,
                .clkdm_name = "pcie"
        },
        {
@@ -830,8 +830,12 @@ static int omap_reset_deassert(struct reset_controller_dev *rcdev,
                       reset->prm->data->name, id);
 
 exit:
-       if (reset->clkdm)
+       if (reset->clkdm) {
+               /* At least dra7 iva needs a delay before clkdm idle */
+               if (has_rstst)
+                       udelay(1);
                pdata->clkdm_allow_idle(reset->clkdm);
+       }
 
        return ret;
 }
index d1e8c3a..4688542 100644 (file)
@@ -267,8 +267,10 @@ static int sdw_transfer_unlocked(struct sdw_bus *bus, struct sdw_msg *msg)
 
        ret = do_transfer(bus, msg);
        if (ret != 0 && ret != -ENODATA)
-               dev_err(bus->dev, "trf on Slave %d failed:%d\n",
-                       msg->dev_num, ret);
+               dev_err(bus->dev, "trf on Slave %d failed:%d %s addr %x count %d\n",
+                       msg->dev_num, ret,
+                       (msg->flags & SDW_MSG_FLAG_WRITE) ? "write" : "read",
+                       msg->addr, msg->len);
 
        if (msg->page)
                sdw_reset_page(bus, msg->dev_num);
@@ -405,10 +407,11 @@ sdw_nwrite_no_pm(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
        return sdw_transfer(slave->bus, &msg);
 }
 
-static int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value)
+int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value)
 {
        return sdw_nwrite_no_pm(slave, addr, 1, &value);
 }
+EXPORT_SYMBOL(sdw_write_no_pm);
 
 static int
 sdw_bread_no_pm(struct sdw_bus *bus, u16 dev_num, u32 addr)
@@ -476,8 +479,7 @@ int sdw_bwrite_no_pm_unlocked(struct sdw_bus *bus, u16 dev_num, u32 addr, u8 val
 }
 EXPORT_SYMBOL(sdw_bwrite_no_pm_unlocked);
 
-static int
-sdw_read_no_pm(struct sdw_slave *slave, u32 addr)
+int sdw_read_no_pm(struct sdw_slave *slave, u32 addr)
 {
        u8 buf;
        int ret;
@@ -488,6 +490,19 @@ sdw_read_no_pm(struct sdw_slave *slave, u32 addr)
        else
                return buf;
 }
+EXPORT_SYMBOL(sdw_read_no_pm);
+
+static int sdw_update_no_pm(struct sdw_slave *slave, u32 addr, u8 mask, u8 val)
+{
+       int tmp;
+
+       tmp = sdw_read_no_pm(slave, addr);
+       if (tmp < 0)
+               return tmp;
+
+       tmp = (tmp & ~mask) | val;
+       return sdw_write_no_pm(slave, addr, tmp);
+}
 
 /**
  * sdw_nread() - Read "n" contiguous SDW Slave registers
@@ -500,16 +515,16 @@ int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
 {
        int ret;
 
-       ret = pm_runtime_get_sync(slave->bus->dev);
+       ret = pm_runtime_get_sync(&slave->dev);
        if (ret < 0 && ret != -EACCES) {
-               pm_runtime_put_noidle(slave->bus->dev);
+               pm_runtime_put_noidle(&slave->dev);
                return ret;
        }
 
        ret = sdw_nread_no_pm(slave, addr, count, val);
 
-       pm_runtime_mark_last_busy(slave->bus->dev);
-       pm_runtime_put(slave->bus->dev);
+       pm_runtime_mark_last_busy(&slave->dev);
+       pm_runtime_put(&slave->dev);
 
        return ret;
 }
@@ -526,16 +541,16 @@ int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, u8 *val)
 {
        int ret;
 
-       ret = pm_runtime_get_sync(slave->bus->dev);
+       ret = pm_runtime_get_sync(&slave->dev);
        if (ret < 0 && ret != -EACCES) {
-               pm_runtime_put_noidle(slave->bus->dev);
+               pm_runtime_put_noidle(&slave->dev);
                return ret;
        }
 
        ret = sdw_nwrite_no_pm(slave, addr, count, val);
 
-       pm_runtime_mark_last_busy(slave->bus->dev);
-       pm_runtime_put(slave->bus->dev);
+       pm_runtime_mark_last_busy(&slave->dev);
+       pm_runtime_put(&slave->dev);
 
        return ret;
 }
@@ -623,6 +638,7 @@ err:
 
 static int sdw_assign_device_num(struct sdw_slave *slave)
 {
+       struct sdw_bus *bus = slave->bus;
        int ret, dev_num;
        bool new_device = false;
 
@@ -633,7 +649,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
                        dev_num = sdw_get_device_num(slave);
                        mutex_unlock(&slave->bus->bus_lock);
                        if (dev_num < 0) {
-                               dev_err(slave->bus->dev, "Get dev_num failed: %d\n",
+                               dev_err(bus->dev, "Get dev_num failed: %d\n",
                                        dev_num);
                                return dev_num;
                        }
@@ -646,7 +662,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
        }
 
        if (!new_device)
-               dev_dbg(slave->bus->dev,
+               dev_dbg(bus->dev,
                        "Slave already registered, reusing dev_num:%d\n",
                        slave->dev_num);
 
@@ -656,7 +672,7 @@ static int sdw_assign_device_num(struct sdw_slave *slave)
 
        ret = sdw_write_no_pm(slave, SDW_SCP_DEVNUMBER, dev_num);
        if (ret < 0) {
-               dev_err(&slave->dev, "Program device_num %d failed: %d\n",
+               dev_err(bus->dev, "Program device_num %d failed: %d\n",
                        dev_num, ret);
                return ret;
        }
@@ -679,9 +695,8 @@ void sdw_extract_slave_id(struct sdw_bus *bus,
        id->class_id = SDW_CLASS_ID(addr);
 
        dev_dbg(bus->dev,
-               "SDW Slave class_id %x, part_id %x, mfg_id %x, unique_id %x, version %x\n",
-                               id->class_id, id->part_id, id->mfg_id,
-                               id->unique_id, id->sdw_version);
+               "SDW Slave class_id 0x%02x, mfg_id 0x%04x, part_id 0x%04x, unique_id 0x%x, version 0x%x\n",
+               id->class_id, id->mfg_id, id->part_id, id->unique_id, id->sdw_version);
 }
 
 static int sdw_program_device_num(struct sdw_bus *bus)
@@ -735,7 +750,7 @@ static int sdw_program_device_num(struct sdw_bus *bus)
                                 */
                                ret = sdw_assign_device_num(slave);
                                if (ret) {
-                                       dev_err(slave->bus->dev,
+                                       dev_err(bus->dev,
                                                "Assign dev_num failed:%d\n",
                                                ret);
                                        return ret;
@@ -775,15 +790,17 @@ static int sdw_program_device_num(struct sdw_bus *bus)
 static void sdw_modify_slave_status(struct sdw_slave *slave,
                                    enum sdw_slave_status status)
 {
-       mutex_lock(&slave->bus->bus_lock);
+       struct sdw_bus *bus = slave->bus;
 
-       dev_vdbg(&slave->dev,
+       mutex_lock(&bus->bus_lock);
+
+       dev_vdbg(bus->dev,
                 "%s: changing status slave %d status %d new status %d\n",
                 __func__, slave->dev_num, slave->status, status);
 
        if (status == SDW_SLAVE_UNATTACHED) {
                dev_dbg(&slave->dev,
-                       "%s: initializing completion for Slave %d\n",
+                       "%s: initializing enumeration and init completion for Slave %d\n",
                        __func__, slave->dev_num);
 
                init_completion(&slave->enumeration_complete);
@@ -792,13 +809,13 @@ static void sdw_modify_slave_status(struct sdw_slave *slave,
        } else if ((status == SDW_SLAVE_ATTACHED) &&
                   (slave->status == SDW_SLAVE_UNATTACHED)) {
                dev_dbg(&slave->dev,
-                       "%s: signaling completion for Slave %d\n",
+                       "%s: signaling enumeration completion for Slave %d\n",
                        __func__, slave->dev_num);
 
                complete(&slave->enumeration_complete);
        }
        slave->status = status;
-       mutex_unlock(&slave->bus->bus_lock);
+       mutex_unlock(&bus->bus_lock);
 }
 
 static enum sdw_clk_stop_mode sdw_get_clk_stop_mode(struct sdw_slave *slave)
@@ -950,17 +967,17 @@ int sdw_bus_prep_clk_stop(struct sdw_bus *bus)
                        simple_clk_stop = false;
        }
 
-       if (is_slave && !simple_clk_stop) {
+       /* Skip remaining clock stop preparation if no Slave is attached */
+       if (!is_slave)
+               return ret;
+
+       if (!simple_clk_stop) {
                ret = sdw_bus_wait_for_clk_prep_deprep(bus,
                                                       SDW_BROADCAST_DEV_NUM);
                if (ret < 0)
                        return ret;
        }
 
-       /* Don't need to inform slaves if there is no slave attached */
-       if (!is_slave)
-               return ret;
-
        /* Inform slaves that prep is done */
        list_for_each_entry(slave, &bus->slaves, node) {
                if (!slave->dev_num)
@@ -1074,16 +1091,13 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus)
                                 "clk stop deprep failed:%d", ret);
        }
 
-       if (is_slave && !simple_clk_stop)
-               sdw_bus_wait_for_clk_prep_deprep(bus, SDW_BROADCAST_DEV_NUM);
-
-       /*
-        * Don't need to call slave callback function if there is no slave
-        * attached
-        */
+       /* Skip remaining clock stop de-preparation if no Slave is attached */
        if (!is_slave)
                return 0;
 
+       if (!simple_clk_stop)
+               sdw_bus_wait_for_clk_prep_deprep(bus, SDW_BROADCAST_DEV_NUM);
+
        list_for_each_entry(slave, &bus->slaves, node) {
                if (!slave->dev_num)
                        continue;
@@ -1127,7 +1141,7 @@ int sdw_configure_dpn_intr(struct sdw_slave *slave,
 
        ret = sdw_update(slave, addr, (mask | SDW_DPN_INT_PORT_READY), val);
        if (ret < 0)
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_DPN_INTMASK write failed:%d\n", val);
 
        return ret;
@@ -1210,7 +1224,7 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave)
        }
        scale_index++;
 
-       ret = sdw_write(slave, SDW_SCP_BUS_CLOCK_BASE, base);
+       ret = sdw_write_no_pm(slave, SDW_SCP_BUS_CLOCK_BASE, base);
        if (ret < 0) {
                dev_err(&slave->dev,
                        "SDW_SCP_BUS_CLOCK_BASE write failed:%d\n", ret);
@@ -1218,13 +1232,13 @@ static int sdw_slave_set_frequency(struct sdw_slave *slave)
        }
 
        /* initialize scale for both banks */
-       ret = sdw_write(slave, SDW_SCP_BUSCLOCK_SCALE_B0, scale_index);
+       ret = sdw_write_no_pm(slave, SDW_SCP_BUSCLOCK_SCALE_B0, scale_index);
        if (ret < 0) {
                dev_err(&slave->dev,
                        "SDW_SCP_BUSCLOCK_SCALE_B0 write failed:%d\n", ret);
                return ret;
        }
-       ret = sdw_write(slave, SDW_SCP_BUSCLOCK_SCALE_B1, scale_index);
+       ret = sdw_write_no_pm(slave, SDW_SCP_BUSCLOCK_SCALE_B1, scale_index);
        if (ret < 0)
                dev_err(&slave->dev,
                        "SDW_SCP_BUSCLOCK_SCALE_B1 write failed:%d\n", ret);
@@ -1256,9 +1270,9 @@ static int sdw_initialize_slave(struct sdw_slave *slave)
        val = slave->prop.scp_int1_mask;
 
        /* Enable SCP interrupts */
-       ret = sdw_update(slave, SDW_SCP_INTMASK1, val, val);
+       ret = sdw_update_no_pm(slave, SDW_SCP_INTMASK1, val, val);
        if (ret < 0) {
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_SCP_INTMASK1 write failed:%d\n", ret);
                return ret;
        }
@@ -1271,9 +1285,9 @@ static int sdw_initialize_slave(struct sdw_slave *slave)
        val = prop->dp0_prop->imp_def_interrupts;
        val |= SDW_DP0_INT_PORT_READY | SDW_DP0_INT_BRA_FAILURE;
 
-       ret = sdw_update(slave, SDW_DP0_INTMASK, val, val);
+       ret = sdw_update_no_pm(slave, SDW_DP0_INTMASK, val, val);
        if (ret < 0)
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_DP0_INTMASK read failed:%d\n", ret);
        return ret;
 }
@@ -1283,9 +1297,9 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status)
        u8 clear, impl_int_mask;
        int status, status2, ret, count = 0;
 
-       status = sdw_read(slave, SDW_DP0_INT);
+       status = sdw_read_no_pm(slave, SDW_DP0_INT);
        if (status < 0) {
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_DP0_INT read failed:%d\n", status);
                return status;
        }
@@ -1322,17 +1336,17 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status)
                }
 
                /* clear the interrupts but don't touch reserved and SDCA_CASCADE fields */
-               ret = sdw_write(slave, SDW_DP0_INT, clear);
+               ret = sdw_write_no_pm(slave, SDW_DP0_INT, clear);
                if (ret < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_DP0_INT write failed:%d\n", ret);
                        return ret;
                }
 
                /* Read DP0 interrupt again */
-               status2 = sdw_read(slave, SDW_DP0_INT);
+               status2 = sdw_read_no_pm(slave, SDW_DP0_INT);
                if (status2 < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_DP0_INT read failed:%d\n", status2);
                        return status2;
                }
@@ -1345,7 +1359,7 @@ static int sdw_handle_dp0_interrupt(struct sdw_slave *slave, u8 *slave_status)
        } while ((status & SDW_DP0_INTERRUPTS) && (count < SDW_READ_INTR_CLEAR_RETRY));
 
        if (count == SDW_READ_INTR_CLEAR_RETRY)
-               dev_warn(slave->bus->dev, "Reached MAX_RETRY on DP0 read\n");
+               dev_warn(&slave->dev, "Reached MAX_RETRY on DP0 read\n");
 
        return ret;
 }
@@ -1361,9 +1375,9 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
                return sdw_handle_dp0_interrupt(slave, slave_status);
 
        addr = SDW_DPN_INT(port);
-       status = sdw_read(slave, addr);
+       status = sdw_read_no_pm(slave, addr);
        if (status < 0) {
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_DPN_INT read failed:%d\n", status);
 
                return status;
@@ -1395,17 +1409,17 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
                }
 
                /* clear the interrupt but don't touch reserved fields */
-               ret = sdw_write(slave, addr, clear);
+               ret = sdw_write_no_pm(slave, addr, clear);
                if (ret < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_DPN_INT write failed:%d\n", ret);
                        return ret;
                }
 
                /* Read DPN interrupt again */
-               status2 = sdw_read(slave, addr);
+               status2 = sdw_read_no_pm(slave, addr);
                if (status2 < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_DPN_INT read failed:%d\n", status2);
                        return status2;
                }
@@ -1418,7 +1432,7 @@ static int sdw_handle_port_interrupt(struct sdw_slave *slave,
        } while ((status & SDW_DPN_INTERRUPTS) && (count < SDW_READ_INTR_CLEAR_RETRY));
 
        if (count == SDW_READ_INTR_CLEAR_RETRY)
-               dev_warn(slave->bus->dev, "Reached MAX_RETRY on port read");
+               dev_warn(&slave->dev, "Reached MAX_RETRY on port read");
 
        return ret;
 }
@@ -1440,30 +1454,30 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
        ret = pm_runtime_get_sync(&slave->dev);
        if (ret < 0 && ret != -EACCES) {
                dev_err(&slave->dev, "Failed to resume device: %d\n", ret);
-               pm_runtime_put_noidle(slave->bus->dev);
+               pm_runtime_put_noidle(&slave->dev);
                return ret;
        }
 
        /* Read Intstat 1, Intstat 2 and Intstat 3 registers */
-       ret = sdw_read(slave, SDW_SCP_INT1);
+       ret = sdw_read_no_pm(slave, SDW_SCP_INT1);
        if (ret < 0) {
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_SCP_INT1 read failed:%d\n", ret);
                goto io_err;
        }
        buf = ret;
 
-       ret = sdw_nread(slave, SDW_SCP_INTSTAT2, 2, buf2);
+       ret = sdw_nread_no_pm(slave, SDW_SCP_INTSTAT2, 2, buf2);
        if (ret < 0) {
-               dev_err(slave->bus->dev,
+               dev_err(&slave->dev,
                        "SDW_SCP_INT2/3 read failed:%d\n", ret);
                goto io_err;
        }
 
        if (slave->prop.is_sdca) {
-               ret = sdw_read(slave, SDW_DP0_INT);
+               ret = sdw_read_no_pm(slave, SDW_DP0_INT);
                if (ret < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_DP0_INT read failed:%d\n", ret);
                        goto io_err;
                }
@@ -1558,9 +1572,9 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
                }
 
                /* Ack interrupt */
-               ret = sdw_write(slave, SDW_SCP_INT1, clear);
+               ret = sdw_write_no_pm(slave, SDW_SCP_INT1, clear);
                if (ret < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_SCP_INT1 write failed:%d\n", ret);
                        goto io_err;
                }
@@ -1572,25 +1586,25 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
                 * Read status again to ensure no new interrupts arrived
                 * while servicing interrupts.
                 */
-               ret = sdw_read(slave, SDW_SCP_INT1);
+               ret = sdw_read_no_pm(slave, SDW_SCP_INT1);
                if (ret < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_SCP_INT1 read failed:%d\n", ret);
                        goto io_err;
                }
                _buf = ret;
 
-               ret = sdw_nread(slave, SDW_SCP_INTSTAT2, 2, _buf2);
+               ret = sdw_nread_no_pm(slave, SDW_SCP_INTSTAT2, 2, _buf2);
                if (ret < 0) {
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "SDW_SCP_INT2/3 read failed:%d\n", ret);
                        goto io_err;
                }
 
                if (slave->prop.is_sdca) {
-                       ret = sdw_read(slave, SDW_DP0_INT);
+                       ret = sdw_read_no_pm(slave, SDW_DP0_INT);
                        if (ret < 0) {
-                               dev_err(slave->bus->dev,
+                               dev_err(&slave->dev,
                                        "SDW_DP0_INT read failed:%d\n", ret);
                                goto io_err;
                        }
@@ -1616,7 +1630,7 @@ static int sdw_handle_slave_alerts(struct sdw_slave *slave)
        } while (stat != 0 && count < SDW_READ_INTR_CLEAR_RETRY);
 
        if (count == SDW_READ_INTR_CLEAR_RETRY)
-               dev_warn(slave->bus->dev, "Reached MAX_RETRY on alert read\n");
+               dev_warn(&slave->dev, "Reached MAX_RETRY on alert read\n");
 
 io_err:
        pm_runtime_mark_last_busy(&slave->dev);
@@ -1722,7 +1736,7 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
                case SDW_SLAVE_ALERT:
                        ret = sdw_handle_slave_alerts(slave);
                        if (ret)
-                               dev_err(bus->dev,
+                               dev_err(&slave->dev,
                                        "Slave %d alert handling failed: %d\n",
                                        i, ret);
                        break;
@@ -1741,24 +1755,29 @@ int sdw_handle_slave_status(struct sdw_bus *bus,
 
                        ret = sdw_initialize_slave(slave);
                        if (ret)
-                               dev_err(bus->dev,
+                               dev_err(&slave->dev,
                                        "Slave %d initialization failed: %d\n",
                                        i, ret);
 
                        break;
 
                default:
-                       dev_err(bus->dev, "Invalid slave %d status:%d\n",
+                       dev_err(&slave->dev, "Invalid slave %d status:%d\n",
                                i, status[i]);
                        break;
                }
 
                ret = sdw_update_slave_status(slave, status[i]);
                if (ret)
-                       dev_err(slave->bus->dev,
+                       dev_err(&slave->dev,
                                "Update Slave status failed:%d\n", ret);
-               if (attached_initializing)
+               if (attached_initializing) {
+                       dev_dbg(&slave->dev,
+                               "%s: signaling initialization completion for Slave %d\n",
+                               __func__, slave->dev_num);
+
                        complete(&slave->initialization_complete);
+               }
        }
 
        return ret;
index 9fa5516..d05442e 100644 (file)
@@ -188,7 +188,7 @@ MODULE_PARM_DESC(cdns_mcp_int_mask, "Cadence MCP IntMask");
 #define CDNS_PDI_CONFIG_PORT                   GENMASK(4, 0)
 
 /* Driver defaults */
-#define CDNS_TX_TIMEOUT                                2000
+#define CDNS_TX_TIMEOUT                                500
 
 #define CDNS_SCP_RX_FIFOLEVEL                  0x2
 
@@ -483,11 +483,11 @@ cdns_fill_msg_resp(struct sdw_cdns *cdns,
        for (i = 0; i < count; i++) {
                if (!(cdns->response_buf[i] & CDNS_MCP_RESP_ACK)) {
                        no_ack = 1;
-                       dev_dbg_ratelimited(cdns->dev, "Msg Ack not received\n");
-                       if (cdns->response_buf[i] & CDNS_MCP_RESP_NACK) {
-                               nack = 1;
-                               dev_err_ratelimited(cdns->dev, "Msg NACK received\n");
-                       }
+                       dev_vdbg(cdns->dev, "Msg Ack not received, cmd %d\n", i);
+               }
+               if (cdns->response_buf[i] & CDNS_MCP_RESP_NACK) {
+                       nack = 1;
+                       dev_err_ratelimited(cdns->dev, "Msg NACK received, cmd %d\n", i);
                }
        }
 
@@ -734,21 +734,18 @@ static void cdns_read_response(struct sdw_cdns *cdns)
 }
 
 static int cdns_update_slave_status(struct sdw_cdns *cdns,
-                                   u32 slave0, u32 slave1)
+                                   u64 slave_intstat)
 {
        enum sdw_slave_status status[SDW_MAX_DEVICES + 1];
        bool is_slave = false;
-       u64 slave;
        u32 mask;
        int i, set_status;
 
-       /* combine the two status */
-       slave = ((u64)slave1 << 32) | slave0;
        memset(status, 0, sizeof(status));
 
        for (i = 0; i <= SDW_MAX_DEVICES; i++) {
-               mask = (slave >> (i * CDNS_MCP_SLAVE_STATUS_NUM)) &
-                               CDNS_MCP_SLAVE_STATUS_BITS;
+               mask = (slave_intstat >> (i * CDNS_MCP_SLAVE_STATUS_NUM)) &
+                       CDNS_MCP_SLAVE_STATUS_BITS;
                if (!mask)
                        continue;
 
@@ -918,13 +915,17 @@ static void cdns_update_slave_status_work(struct work_struct *work)
        struct sdw_cdns *cdns =
                container_of(work, struct sdw_cdns, work);
        u32 slave0, slave1;
-
-       dev_dbg_ratelimited(cdns->dev, "Slave status change\n");
+       u64 slave_intstat;
 
        slave0 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT0);
        slave1 = cdns_readl(cdns, CDNS_MCP_SLAVE_INTSTAT1);
 
-       cdns_update_slave_status(cdns, slave0, slave1);
+       /* combine the two status */
+       slave_intstat = ((u64)slave1 << 32) | slave0;
+
+       dev_dbg_ratelimited(cdns->dev, "Slave status change: 0x%llx\n", slave_intstat);
+
+       cdns_update_slave_status(cdns, slave_intstat);
        cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT0, slave0);
        cdns_writel(cdns, CDNS_MCP_SLAVE_INTSTAT1, slave1);
 
index 66adb25..a2d5cda 100644 (file)
@@ -967,7 +967,7 @@ static int intel_hw_params(struct snd_pcm_substream *substream,
        }
 
        /* Port configuration */
-       pconfig = kcalloc(1, sizeof(*pconfig), GFP_KERNEL);
+       pconfig = kzalloc(sizeof(*pconfig), GFP_KERNEL);
        if (!pconfig) {
                ret =  -ENOMEM;
                goto error;
@@ -1673,10 +1673,12 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev)
 
        } else if (clock_stop_quirks & SDW_INTEL_CLK_STOP_BUS_RESET ||
                   !clock_stop_quirks) {
+               bool wake_enable = true;
+
                ret = sdw_cdns_clock_stop(cdns, true);
                if (ret < 0) {
                        dev_err(dev, "cannot enable clock stop on suspend\n");
-                       return ret;
+                       wake_enable = false;
                }
 
                ret = sdw_cdns_enable_interrupt(cdns, false);
@@ -1691,7 +1693,7 @@ static int __maybe_unused intel_suspend_runtime(struct device *dev)
                        return ret;
                }
 
-               intel_shim_wake(sdw, true);
+               intel_shim_wake(sdw, wake_enable);
        } else {
                dev_err(dev, "%s clock_stop_quirks %x unsupported\n",
                        __func__, clock_stop_quirks);
index 76820d0..06bac8b 100644 (file)
@@ -48,8 +48,6 @@ struct sdw_intel {
 #endif
 };
 
-#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE      BIT(1)
-
 int intel_master_startup(struct platform_device *pdev);
 int intel_master_process_wakeen_event(struct platform_device *pdev);
 
index cabdadb..05b726c 100644 (file)
 #include "cadence_master.h"
 #include "intel.h"
 
-#define SDW_LINK_TYPE          4 /* from Intel ACPI documentation */
-#define SDW_MAX_LINKS          4
 #define SDW_SHIM_LCAP          0x0
 #define SDW_SHIM_BASE          0x2C000
 #define SDW_ALH_BASE           0x2C800
 #define SDW_LINK_BASE          0x30000
 #define SDW_LINK_SIZE          0x10000
 
-static int ctrl_link_mask;
-module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
-MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
-
-static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
-{
-       struct fwnode_handle *link;
-       char name[32];
-       u32 quirk_mask = 0;
-
-       /* Find master handle */
-       snprintf(name, sizeof(name),
-                "mipi-sdw-link-%d-subproperties", i);
-
-       link = fwnode_get_named_child_node(fw_node, name);
-       if (!link)
-               return false;
-
-       fwnode_property_read_u32(link,
-                                "intel-quirk-mask",
-                                &quirk_mask);
-
-       if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
-               return false;
-
-       return true;
-}
-
 static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
 {
        struct sdw_intel_link_res *link = ctx->links;
@@ -81,74 +51,6 @@ static int sdw_intel_cleanup(struct sdw_intel_ctx *ctx)
        return 0;
 }
 
-static int
-sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
-{
-       struct acpi_device *adev;
-       int ret, i;
-       u8 count;
-
-       if (acpi_bus_get_device(info->handle, &adev))
-               return -EINVAL;
-
-       /* Found controller, find links supported */
-       count = 0;
-       ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
-                                           "mipi-sdw-master-count", &count, 1);
-
-       /*
-        * In theory we could check the number of links supported in
-        * hardware, but in that step we cannot assume SoundWire IP is
-        * powered.
-        *
-        * In addition, if the BIOS doesn't even provide this
-        * 'master-count' property then all the inits based on link
-        * masks will fail as well.
-        *
-        * We will check the hardware capabilities in the startup() step
-        */
-
-       if (ret) {
-               dev_err(&adev->dev,
-                       "Failed to read mipi-sdw-master-count: %d\n", ret);
-               return -EINVAL;
-       }
-
-       /* Check count is within bounds */
-       if (count > SDW_MAX_LINKS) {
-               dev_err(&adev->dev, "Link count %d exceeds max %d\n",
-                       count, SDW_MAX_LINKS);
-               return -EINVAL;
-       }
-
-       if (!count) {
-               dev_warn(&adev->dev, "No SoundWire links detected\n");
-               return -EINVAL;
-       }
-       dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
-
-       info->count = count;
-       info->link_mask = 0;
-
-       for (i = 0; i < count; i++) {
-               if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
-                       dev_dbg(&adev->dev,
-                               "Link %d masked, will not be enabled\n", i);
-                       continue;
-               }
-
-               if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
-                       dev_dbg(&adev->dev,
-                               "Link %d not selected in firmware\n", i);
-                       continue;
-               }
-
-               info->link_mask |= BIT(i);
-       }
-
-       return 0;
-}
-
 #define HDA_DSP_REG_ADSPIC2             (0x10)
 #define HDA_DSP_REG_ADSPIS2             (0x14)
 #define HDA_DSP_REG_ADSPIC2_SNDW        BIT(5)
@@ -357,65 +259,6 @@ sdw_intel_startup_controller(struct sdw_intel_ctx *ctx)
        return 0;
 }
 
-static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
-                                    void *cdata, void **return_value)
-{
-       struct sdw_intel_acpi_info *info = cdata;
-       struct acpi_device *adev;
-       acpi_status status;
-       u64 adr;
-
-       status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
-       if (ACPI_FAILURE(status))
-               return AE_OK; /* keep going */
-
-       if (acpi_bus_get_device(handle, &adev)) {
-               pr_err("%s: Couldn't find ACPI handle\n", __func__);
-               return AE_NOT_FOUND;
-       }
-
-       info->handle = handle;
-
-       /*
-        * On some Intel platforms, multiple children of the HDAS
-        * device can be found, but only one of them is the SoundWire
-        * controller. The SNDW device is always exposed with
-        * Name(_ADR, 0x40000000), with bits 31..28 representing the
-        * SoundWire link so filter accordingly
-        */
-       if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
-               return AE_OK; /* keep going */
-
-       /* device found, stop namespace walk */
-       return AE_CTRL_TERMINATE;
-}
-
-/**
- * sdw_intel_acpi_scan() - SoundWire Intel init routine
- * @parent_handle: ACPI parent handle
- * @info: description of what firmware/DSDT tables expose
- *
- * This scans the namespace and queries firmware to figure out which
- * links to enable. A follow-up use of sdw_intel_probe() and
- * sdw_intel_startup() is required for creation of devices and bus
- * startup
- */
-int sdw_intel_acpi_scan(acpi_handle *parent_handle,
-                       struct sdw_intel_acpi_info *info)
-{
-       acpi_status status;
-
-       status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
-                                    parent_handle, 1,
-                                    sdw_intel_acpi_cb,
-                                    NULL, info, NULL);
-       if (ACPI_FAILURE(status))
-               return -ENODEV;
-
-       return sdw_intel_scan_controller(info);
-}
-EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SOUNDWIRE_INTEL_INIT);
-
 /**
  * sdw_intel_probe() - SoundWire Intel probe routine
  * @res: resource data
index a08f408..180f38b 100644 (file)
@@ -163,15 +163,13 @@ int sdw_acpi_find_slaves(struct sdw_bus *bus)
 
                        if (id.unique_id != id2.unique_id) {
                                dev_dbg(bus->dev,
-                                       "Valid unique IDs %x %x for Slave mfg %x part %d\n",
-                                       id.unique_id, id2.unique_id,
-                                       id.mfg_id, id.part_id);
+                                       "Valid unique IDs 0x%x 0x%x for Slave mfg_id 0x%04x, part_id 0x%04x\n",
+                                       id.unique_id, id2.unique_id, id.mfg_id, id.part_id);
                                ignore_unique_id = false;
                        } else {
                                dev_err(bus->dev,
-                                       "Invalid unique IDs %x %x for Slave mfg %x part %d\n",
-                                       id.unique_id, id2.unique_id,
-                                       id.mfg_id, id.part_id);
+                                       "Invalid unique IDs 0x%x 0x%x for Slave mfg_id 0x%04x, part_id 0x%04x\n",
+                                       id.unique_id, id2.unique_id, id.mfg_id, id.part_id);
                                return -ENODEV;
                        }
                }
index b48b661..3210359 100644 (file)
@@ -130,7 +130,7 @@ static struct attribute *slave_dev_attrs[] = {
  * we don't use ATTRIBUTES_GROUP here since we want to add a subdirectory
  * for device-level properties
  */
-static struct attribute_group sdw_slave_dev_attr_group = {
+static const struct attribute_group sdw_slave_dev_attr_group = {
        .attrs  = slave_dev_attrs,
        .name = "dev-properties",
 };
index 442cc7c..52ddb32 100644 (file)
@@ -1433,6 +1433,7 @@ static int cqspi_probe(struct platform_device *pdev)
        cqspi = spi_master_get_devdata(master);
 
        cqspi->pdev = pdev;
+       platform_set_drvdata(pdev, cqspi);
 
        /* Obtain configuration from OF. */
        ret = cqspi_of_get_pdata(cqspi);
index de844b4..bbbd311 100644 (file)
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
- * Copyright (c) 2012-2015, 2017, The Linux Foundation. All rights reserved.
+ * Copyright (c) 2012-2015, 2017, 2021, The Linux Foundation. All rights reserved.
  */
 #include <linux/bitmap.h>
 #include <linux/delay.h>
@@ -505,8 +505,7 @@ static void cleanup_irq(struct spmi_pmic_arb *pmic_arb, u16 apid, int id)
 static void periph_interrupt(struct spmi_pmic_arb *pmic_arb, u16 apid)
 {
        unsigned int irq;
-       u32 status;
-       int id;
+       u32 status, id;
        u8 sid = (pmic_arb->apid_data[apid].ppid >> 8) & 0xF;
        u8 per = pmic_arb->apid_data[apid].ppid & 0xFF;
 
index 35b75f0..81a246f 100644 (file)
@@ -260,6 +260,7 @@ static irqreturn_t apci1032_interrupt(int irq, void *d)
        struct apci1032_private *devpriv = dev->private;
        struct comedi_subdevice *s = dev->read_subdev;
        unsigned int ctrl;
+       unsigned short val;
 
        /* check interrupt is from this device */
        if ((inl(devpriv->amcc_iobase + AMCC_OP_REG_INTCSR) &
@@ -275,7 +276,8 @@ static irqreturn_t apci1032_interrupt(int irq, void *d)
        outl(ctrl & ~APCI1032_CTRL_INT_ENA, dev->iobase + APCI1032_CTRL_REG);
 
        s->state = inl(dev->iobase + APCI1032_STATUS_REG) & 0xffff;
-       comedi_buf_write_samples(s, &s->state, 1);
+       val = s->state;
+       comedi_buf_write_samples(s, &val, 1);
        comedi_handle_events(dev, s);
 
        /* enable the interrupt */
index 11efb21..b04c15d 100644 (file)
@@ -208,7 +208,7 @@ static irqreturn_t apci1500_interrupt(int irq, void *d)
        struct comedi_device *dev = d;
        struct apci1500_private *devpriv = dev->private;
        struct comedi_subdevice *s = dev->read_subdev;
-       unsigned int status = 0;
+       unsigned short status = 0;
        unsigned int val;
 
        val = inl(devpriv->amcc + AMCC_OP_REG_INTCSR);
@@ -238,14 +238,14 @@ static irqreturn_t apci1500_interrupt(int irq, void *d)
         *
         *    Mask     Meaning
         * ----------  ------------------------------------------
-        * 0x00000001  Event 1 has occurred
-        * 0x00000010  Event 2 has occurred
-        * 0x00000100  Counter/timer 1 has run down (not implemented)
-        * 0x00001000  Counter/timer 2 has run down (not implemented)
-        * 0x00010000  Counter 3 has run down (not implemented)
-        * 0x00100000  Watchdog has run down (not implemented)
-        * 0x01000000  Voltage error
-        * 0x10000000  Short-circuit error
+        * 0b00000001  Event 1 has occurred
+        * 0b00000010  Event 2 has occurred
+        * 0b00000100  Counter/timer 1 has run down (not implemented)
+        * 0b00001000  Counter/timer 2 has run down (not implemented)
+        * 0b00010000  Counter 3 has run down (not implemented)
+        * 0b00100000  Watchdog has run down (not implemented)
+        * 0b01000000  Voltage error
+        * 0b10000000  Short-circuit error
         */
        comedi_buf_write_samples(s, &status, 1);
        comedi_handle_events(dev, s);
index 692893c..0906077 100644 (file)
@@ -300,11 +300,11 @@ static int pci1710_ai_eoc(struct comedi_device *dev,
 static int pci1710_ai_read_sample(struct comedi_device *dev,
                                  struct comedi_subdevice *s,
                                  unsigned int cur_chan,
-                                 unsigned int *val)
+                                 unsigned short *val)
 {
        const struct boardtype *board = dev->board_ptr;
        struct pci1710_private *devpriv = dev->private;
-       unsigned int sample;
+       unsigned short sample;
        unsigned int chan;
 
        sample = inw(dev->iobase + PCI171X_AD_DATA_REG);
@@ -345,7 +345,7 @@ static int pci1710_ai_insn_read(struct comedi_device *dev,
        pci1710_ai_setup_chanlist(dev, s, &insn->chanspec, 1, 1);
 
        for (i = 0; i < insn->n; i++) {
-               unsigned int val;
+               unsigned short val;
 
                /* start conversion */
                outw(0, dev->iobase + PCI171X_SOFTTRG_REG);
@@ -395,7 +395,7 @@ static void pci1710_handle_every_sample(struct comedi_device *dev,
 {
        struct comedi_cmd *cmd = &s->async->cmd;
        unsigned int status;
-       unsigned int val;
+       unsigned short val;
        int ret;
 
        status = inw(dev->iobase + PCI171X_STATUS_REG);
@@ -455,7 +455,7 @@ static void pci1710_handle_fifo(struct comedi_device *dev,
        }
 
        for (i = 0; i < devpriv->max_samples; i++) {
-               unsigned int val;
+               unsigned short val;
                int ret;
 
                ret = pci1710_ai_read_sample(dev, s, s->async->cur_chan, &val);
index 0437526..981d281 100644 (file)
@@ -126,7 +126,9 @@ static irqreturn_t pc236_interrupt(int irq, void *d)
 
        handled = pc236_intr_check(dev);
        if (dev->attached && handled) {
-               comedi_buf_write_samples(s, &s->state, 1);
+               unsigned short val = 0;
+
+               comedi_buf_write_samples(s, &val, 1);
                comedi_handle_events(dev, s);
        }
        return IRQ_RETVAL(handled);
index d740c47..2f20bd5 100644 (file)
@@ -1281,7 +1281,7 @@ static int cb_pcidas_auto_attach(struct comedi_device *dev,
             devpriv->amcc + AMCC_OP_REG_INTCSR);
 
        ret = request_irq(pcidev->irq, cb_pcidas_interrupt, IRQF_SHARED,
-                         dev->board_name, dev);
+                         "cb_pcidas", dev);
        if (ret) {
                dev_dbg(dev->class_dev, "unable to allocate irq %d\n",
                        pcidev->irq);
index fa987bb..6d3ba39 100644 (file)
@@ -4035,7 +4035,7 @@ static int auto_attach(struct comedi_device *dev,
        init_stc_registers(dev);
 
        retval = request_irq(pcidev->irq, handle_interrupt, IRQF_SHARED,
-                            dev->board_name, dev);
+                            "cb_pcidas64", dev);
        if (retval) {
                dev_dbg(dev->class_dev, "unable to allocate irq %u\n",
                        pcidev->irq);
index 9361b2d..5338b5e 100644 (file)
@@ -210,12 +210,13 @@ static irqreturn_t parport_interrupt(int irq, void *d)
        struct comedi_device *dev = d;
        struct comedi_subdevice *s = dev->read_subdev;
        unsigned int ctrl;
+       unsigned short val = 0;
 
        ctrl = inb(dev->iobase + PARPORT_CTRL_REG);
        if (!(ctrl & PARPORT_CTRL_IRQ_ENA))
                return IRQ_NONE;
 
-       comedi_buf_write_samples(s, &s->state, 1);
+       comedi_buf_write_samples(s, &val, 1);
        comedi_handle_events(dev, s);
 
        return IRQ_HANDLED;
index 04e224f..96f4107 100644 (file)
@@ -186,7 +186,7 @@ static irqreturn_t das6402_interrupt(int irq, void *d)
        if (status & DAS6402_STATUS_FFULL) {
                async->events |= COMEDI_CB_OVERFLOW;
        } else if (status & DAS6402_STATUS_FFNE) {
-               unsigned int val;
+               unsigned short val;
 
                val = das6402_ai_read_sample(dev, s);
                comedi_buf_write_samples(s, &val, 1);
index 4ea100f..2881808 100644 (file)
@@ -427,7 +427,7 @@ static irqreturn_t das800_interrupt(int irq, void *d)
        struct comedi_cmd *cmd;
        unsigned long irq_flags;
        unsigned int status;
-       unsigned int val;
+       unsigned short val;
        bool fifo_empty;
        bool fifo_overflow;
        int i;
index 17e6018..56682f0 100644 (file)
@@ -404,7 +404,7 @@ static irqreturn_t dmm32at_isr(int irq, void *d)
 {
        struct comedi_device *dev = d;
        unsigned char intstat;
-       unsigned int val;
+       unsigned short val;
        int i;
 
        if (!dev->attached) {
index 726e40d..0d3d4ca 100644 (file)
@@ -924,7 +924,7 @@ static irqreturn_t me4000_ai_isr(int irq, void *dev_id)
        struct comedi_subdevice *s = dev->read_subdev;
        int i;
        int c = 0;
-       unsigned int lval;
+       unsigned short lval;
 
        if (!dev->attached)
                return IRQ_NONE;
index 99e7441..f1a45cf 100644 (file)
@@ -195,7 +195,9 @@ static irqreturn_t ni6527_interrupt(int irq, void *d)
                return IRQ_NONE;
 
        if (status & NI6527_STATUS_EDGE) {
-               comedi_buf_write_samples(s, &s->state, 1);
+               unsigned short val = 0;
+
+               comedi_buf_write_samples(s, &val, 1);
                comedi_handle_events(dev, s);
        }
 
index eb3f9f7..7cd8497 100644 (file)
@@ -472,6 +472,7 @@ static irqreturn_t ni_65xx_interrupt(int irq, void *d)
        struct comedi_device *dev = d;
        struct comedi_subdevice *s = dev->read_subdev;
        unsigned int status;
+       unsigned short val = 0;
 
        status = readb(dev->mmio + NI_65XX_STATUS_REG);
        if ((status & NI_65XX_STATUS_INT) == 0)
@@ -482,7 +483,7 @@ static irqreturn_t ni_65xx_interrupt(int irq, void *d)
        writeb(NI_65XX_CLR_EDGE_INT | NI_65XX_CLR_OVERFLOW_INT,
               dev->mmio + NI_65XX_CLR_REG);
 
-       comedi_buf_write_samples(s, &s->state, 1);
+       comedi_buf_write_samples(s, &val, 1);
        comedi_handle_events(dev, s);
 
        return IRQ_HANDLED;
index 2dbf69e..bd6f42f 100644 (file)
@@ -184,7 +184,7 @@ static irqreturn_t pcl711_interrupt(int irq, void *d)
        struct comedi_device *dev = d;
        struct comedi_subdevice *s = dev->read_subdev;
        struct comedi_cmd *cmd = &s->async->cmd;
-       unsigned int data;
+       unsigned short data;
 
        if (!dev->attached) {
                dev_err(dev->class_dev, "spurious interrupt\n");
index 64eb649..88f25d7 100644 (file)
@@ -220,9 +220,11 @@ static irqreturn_t pcl726_interrupt(int irq, void *d)
        struct pcl726_private *devpriv = dev->private;
 
        if (devpriv->cmd_running) {
+               unsigned short val = 0;
+
                pcl726_intr_cancel(dev, s);
 
-               comedi_buf_write_samples(s, &s->state, 1);
+               comedi_buf_write_samples(s, &val, 1);
                comedi_handle_events(dev, s);
        }
 
index 63e3011..f4b4a68 100644 (file)
@@ -423,7 +423,7 @@ static int pcl818_ai_eoc(struct comedi_device *dev,
 
 static bool pcl818_ai_write_sample(struct comedi_device *dev,
                                   struct comedi_subdevice *s,
-                                  unsigned int chan, unsigned int val)
+                                  unsigned int chan, unsigned short val)
 {
        struct pcl818_private *devpriv = dev->private;
        struct comedi_cmd *cmd = &s->async->cmd;
index 7956abc..9f92081 100644 (file)
@@ -877,5 +877,4 @@ module_comedi_usb_driver(vmk80xx_driver, vmk80xx_usb_driver);
 
 MODULE_AUTHOR("Manuel Gebele <forensixs@gmx.de>");
 MODULE_DESCRIPTION("Velleman USB Board Low-Level Driver");
-MODULE_SUPPORTED_DEVICE("K8055/K8061 aka VM110/VM140");
 MODULE_LICENSE("GPL");
index dc09cc6..09e7b4c 100644 (file)
@@ -1120,6 +1120,7 @@ static int ks_wlan_set_scan(struct net_device *dev,
 {
        struct ks_wlan_private *priv = netdev_priv(dev);
        struct iw_scan_req *req = NULL;
+       int len;
 
        if (priv->sleep_mode == SLP_SLEEP)
                return -EPERM;
@@ -1129,8 +1130,9 @@ static int ks_wlan_set_scan(struct net_device *dev,
        if (wrqu->data.length == sizeof(struct iw_scan_req) &&
            wrqu->data.flags & IW_SCAN_THIS_ESSID) {
                req = (struct iw_scan_req *)extra;
-               priv->scan_ssid_len = req->essid_len;
-               memcpy(priv->scan_ssid, req->essid, priv->scan_ssid_len);
+               len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+               priv->scan_ssid_len = len;
+               memcpy(priv->scan_ssid, req->essid, len);
        } else {
                priv->scan_ssid_len = 0;
        }
index 5a5121d..8c65733 100644 (file)
@@ -22,7 +22,6 @@
 #include <asm/processor.h>
 
 #include <linux/i2c.h>
-#include <linux/sfi.h>
 #include <media/v4l2-subdev.h>
 #include "atomisp.h"
 
index fa1e34a..182bb94 100644 (file)
@@ -791,6 +791,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf,  int len)
        p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SSID, &ie_len,
                       pbss_network->ie_length - _BEACON_IE_OFFSET_);
        if (p && ie_len > 0) {
+               ie_len = min_t(int, ie_len, sizeof(pbss_network->ssid.ssid));
                memset(&pbss_network->ssid, 0, sizeof(struct ndis_802_11_ssid));
                memcpy(pbss_network->ssid.ssid, p + 2, ie_len);
                pbss_network->ssid.ssid_length = ie_len;
@@ -811,6 +812,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf,  int len)
        p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_SUPP_RATES, &ie_len,
                       pbss_network->ie_length - _BEACON_IE_OFFSET_);
        if (p) {
+               ie_len = min_t(int, ie_len, NDIS_802_11_LENGTH_RATES_EX);
                memcpy(supportRate, p + 2, ie_len);
                supportRateNum = ie_len;
        }
@@ -819,6 +821,8 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf,  int len)
        p = rtw_get_ie(ie + _BEACON_IE_OFFSET_, WLAN_EID_EXT_SUPP_RATES,
                       &ie_len, pbss_network->ie_length - _BEACON_IE_OFFSET_);
        if (p) {
+               ie_len = min_t(int, ie_len,
+                              NDIS_802_11_LENGTH_RATES_EX - supportRateNum);
                memcpy(supportRate + supportRateNum, p + 2, ie_len);
                supportRateNum += ie_len;
        }
@@ -934,6 +938,7 @@ int rtw_check_beacon_data(struct adapter *padapter, u8 *pbuf,  int len)
 
                pht_cap->mcs.rx_mask[0] = 0xff;
                pht_cap->mcs.rx_mask[1] = 0x0;
+               ie_len = min_t(int, ie_len, sizeof(pmlmepriv->htpriv.ht_cap));
                memcpy(&pmlmepriv->htpriv.ht_cap, p + 2, ie_len);
        }
 
index bf22f13..58954b8 100644 (file)
@@ -1133,9 +1133,11 @@ static int rtw_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
                                                break;
                                        }
                                        sec_len = *(pos++); len -= 1;
-                                       if (sec_len > 0 && sec_len <= len) {
+                                       if (sec_len > 0 &&
+                                           sec_len <= len &&
+                                           sec_len <= 32) {
                                                ssid[ssid_index].ssid_length = sec_len;
-                                               memcpy(ssid[ssid_index].ssid, pos, ssid[ssid_index].ssid_length);
+                                               memcpy(ssid[ssid_index].ssid, pos, sec_len);
                                                ssid_index++;
                                        }
                                        pos += sec_len;
index 963a2ff..39f5a6a 100644 (file)
@@ -27,6 +27,7 @@ config RTLLIB_CRYPTO_CCMP
 config RTLLIB_CRYPTO_TKIP
        tristate "Support for rtllib TKIP crypto"
        depends on RTLLIB
+       select CRYPTO
        select CRYPTO_LIB_ARC4
        select CRYPTO_MICHAEL_MIC
        default y
index 16bcee1..407effd 100644 (file)
@@ -406,9 +406,10 @@ static int _rtl92e_wx_set_scan(struct net_device *dev,
                struct iw_scan_req *req = (struct iw_scan_req *)b;
 
                if (req->essid_len) {
-                       ieee->current_network.ssid_len = req->essid_len;
-                       memcpy(ieee->current_network.ssid, req->essid,
-                              req->essid_len);
+                       int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+
+                       ieee->current_network.ssid_len = len;
+                       memcpy(ieee->current_network.ssid, req->essid, len);
                }
        }
 
index b84f00b..4cabaf2 100644 (file)
@@ -1105,7 +1105,7 @@ struct rtllib_network {
        bool    bWithAironetIE;
        bool    bCkipSupported;
        bool    bCcxRmEnable;
-       u16     CcxRmState[2];
+       u     CcxRmState[2];
        bool    bMBssidValid;
        u8      MBssidMask;
        u8      MBssid[ETH_ALEN];
index 66c1353..15bbb63 100644 (file)
@@ -1967,7 +1967,7 @@ static void rtllib_parse_mife_generic(struct rtllib_device *ieee,
            info_element->data[2] == 0x96 &&
            info_element->data[3] == 0x01) {
                if (info_element->len == 6) {
-                       memcpy(network->CcxRmState, &info_element[4], 2);
+                       memcpy(network->CcxRmState, &info_element->data[4], 2);
                        if (network->CcxRmState[0] != 0)
                                network->bCcxRmEnable = true;
                        else
index d853586..77bf886 100644 (file)
@@ -331,8 +331,10 @@ static int r8192_wx_set_scan(struct net_device *dev, struct iw_request_info *a,
                struct iw_scan_req *req = (struct iw_scan_req *)b;
 
                if (req->essid_len) {
-                       ieee->current_network.ssid_len = req->essid_len;
-                       memcpy(ieee->current_network.ssid, req->essid, req->essid_len);
+                       int len = min_t(int, req->essid_len, IW_ESSID_MAX_SIZE);
+
+                       ieee->current_network.ssid_len = len;
+                       memcpy(ieee->current_network.ssid, req->essid, len);
                }
        }
 
index 1811646..75716f5 100644 (file)
@@ -192,8 +192,10 @@ u8 r8712_sitesurvey_cmd(struct _adapter *padapter,
        psurveyPara->ss_ssidlen = 0;
        memset(psurveyPara->ss_ssid, 0, IW_ESSID_MAX_SIZE + 1);
        if (pssid && pssid->SsidLength) {
-               memcpy(psurveyPara->ss_ssid, pssid->Ssid, pssid->SsidLength);
-               psurveyPara->ss_ssidlen = cpu_to_le32(pssid->SsidLength);
+               int len = min_t(int, pssid->SsidLength, IW_ESSID_MAX_SIZE);
+
+               memcpy(psurveyPara->ss_ssid, pssid->Ssid, len);
+               psurveyPara->ss_ssidlen = cpu_to_le32(len);
        }
        set_fwstate(pmlmepriv, _FW_UNDER_SURVEY);
        r8712_enqueue_cmd(pcmdpriv, ph2c);
index 81de5a9..60dd798 100644 (file)
@@ -924,7 +924,7 @@ static int r871x_wx_set_priv(struct net_device *dev,
        struct iw_point *dwrq = (struct iw_point *)awrq;
 
        len = dwrq->length;
-       ext = memdup_user(dwrq->pointer, len);
+       ext = strndup_user(dwrq->pointer, len);
        if (IS_ERR(ext))
                return PTR_ERR(ext);
 
index fd0ea4d..e3fa38b 100644 (file)
@@ -175,7 +175,7 @@ static ssize_t buffer_from_user(unsigned int minor, const char __user *buf,
 static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
                             loff_t *ppos)
 {
-       unsigned int minor = MINOR(file_inode(file)->i_rdev);
+       unsigned int minor = iminor(file_inode(file));
        ssize_t retval;
        size_t image_size;
 
@@ -218,7 +218,7 @@ static ssize_t vme_user_read(struct file *file, char __user *buf, size_t count,
 static ssize_t vme_user_write(struct file *file, const char __user *buf,
                              size_t count, loff_t *ppos)
 {
-       unsigned int minor = MINOR(file_inode(file)->i_rdev);
+       unsigned int minor = iminor(file_inode(file));
        ssize_t retval;
        size_t image_size;
 
@@ -260,7 +260,7 @@ static ssize_t vme_user_write(struct file *file, const char __user *buf,
 
 static loff_t vme_user_llseek(struct file *file, loff_t off, int whence)
 {
-       unsigned int minor = MINOR(file_inode(file)->i_rdev);
+       unsigned int minor = iminor(file_inode(file));
        size_t image_size;
        loff_t res;
 
@@ -294,7 +294,7 @@ static int vme_user_ioctl(struct inode *inode, struct file *file,
        struct vme_slave slave;
        struct vme_irq_id irq_req;
        unsigned long copied;
-       unsigned int minor = MINOR(inode->i_rdev);
+       unsigned int minor = iminor(inode);
        int retval;
        dma_addr_t pci_addr;
        void __user *argp = (void __user *)arg;
@@ -412,7 +412,7 @@ vme_user_unlocked_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 {
        int ret;
        struct inode *inode = file_inode(file);
-       unsigned int minor = MINOR(inode->i_rdev);
+       unsigned int minor = iminor(inode);
 
        mutex_lock(&image[minor].mutex);
        ret = vme_user_ioctl(inode, file, cmd, arg);
@@ -481,7 +481,7 @@ static int vme_user_master_mmap(unsigned int minor, struct vm_area_struct *vma)
 
 static int vme_user_mmap(struct file *file, struct vm_area_struct *vma)
 {
-       unsigned int minor = MINOR(file_inode(file)->i_rdev);
+       unsigned int minor = iminor(file_inode(file));
 
        if (type[minor] == MASTER_MINOR)
                return vme_user_master_mmap(minor, vma);
@@ -689,7 +689,7 @@ err_dev:
        return err;
 }
 
-static int vme_user_remove(struct vme_dev *dev)
+static void vme_user_remove(struct vme_dev *dev)
 {
        int i;
 
@@ -717,8 +717,6 @@ static int vme_user_remove(struct vme_dev *dev)
 
        /* Unregister the major and minor device numbers */
        unregister_chrdev_region(MKDEV(VME_MAJOR, 0), VME_DEVS);
-
-       return 0;
 }
 
 static struct vme_driver vme_user_driver = {
index e7061d3..c3c2c15 100644 (file)
@@ -150,7 +150,7 @@ struct vnt_cts {
        u16 reserved;
        struct ieee80211_cts data;
        u16 reserved2;
-} __packed;
+} __packed __aligned(2);
 
 struct vnt_cts_fb {
        struct vnt_phy_field b;
@@ -160,7 +160,7 @@ struct vnt_cts_fb {
        __le16 cts_duration_ba_f1;
        struct ieee80211_cts data;
        u16 reserved2;
-} __packed;
+} __packed __aligned(2);
 
 struct vnt_tx_fifo_head {
        u8 tx_key[WLAN_KEY_LEN_CCMP];
index cd6bcfd..ed53d0b 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/gpio/consumer.h>
 #include <net/mac80211.h>
 
 #include "bh.h"
index 92ef329..78c4932 100644 (file)
@@ -8,6 +8,10 @@
 #ifndef WFX_BH_H
 #define WFX_BH_H
 
+#include <linux/atomic.h>
+#include <linux/wait.h>
+#include <linux/workqueue.h>
+
 struct wfx_dev;
 
 struct wfx_hif {
index ea39114..ca04b3d 100644 (file)
@@ -8,6 +8,9 @@
 #ifndef WFX_BUS_H
 #define WFX_BUS_H
 
+#include <linux/mmc/sdio_func.h>
+#include <linux/spi/spi.h>
+
 #define WFX_REG_CONFIG        0x0
 #define WFX_REG_CONTROL       0x1
 #define WFX_REG_IN_OUT_QUEUE  0x2
index 588edce..e06d7e1 100644 (file)
@@ -5,13 +5,19 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/module.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/card.h>
+#include <linux/interrupt.h>
 #include <linux/of_irq.h>
+#include <linux/irq.h>
 
 #include "bus.h"
 #include "wfx.h"
+#include "hwio.h"
+#include "main.h"
+#include "bh.h"
 
 static const struct wfx_platform_data wfx_sdio_pdata = {
        .file_fw = "wfm_wf200",
index f89855a..a99125d 100644 (file)
@@ -6,12 +6,19 @@
  * Copyright (c) 2011, Sagrad Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/module.h>
+#include <linux/delay.h>
+#include <linux/gpio/consumer.h>
 #include <linux/spi/spi.h>
+#include <linux/interrupt.h>
 #include <linux/irq.h>
 #include <linux/of.h>
 
 #include "bus.h"
 #include "wfx.h"
+#include "hwio.h"
+#include "main.h"
+#include "bh.h"
 
 #define SET_WRITE 0x7FFF        /* usage: and operation */
 #define SET_READ 0x8000         /* usage: or operation */
index 2cfa162..385f2d4 100644 (file)
@@ -5,8 +5,13 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/etherdevice.h>
+#include <net/mac80211.h>
+
 #include "data_rx.h"
 #include "wfx.h"
+#include "bh.h"
+#include "sta.h"
 
 static void wfx_rx_handle_ba(struct wfx_vif *wvif, struct ieee80211_mgmt *mgmt)
 {
index 76f26e3..77fb104 100644 (file)
@@ -6,9 +6,14 @@
  * Copyright (c) 2010, ST-Ericsson
  */
 #include <net/mac80211.h>
+#include <linux/etherdevice.h>
 
+#include "data_tx.h"
 #include "wfx.h"
+#include "bh.h"
 #include "sta.h"
+#include "queue.h"
+#include "debug.h"
 #include "traces.h"
 #include "hif_tx_mib.h"
 
index 6b30200..401363d 100644 (file)
@@ -8,6 +8,9 @@
 #ifndef WFX_DATA_TX_H
 #define WFX_DATA_TX_H
 
+#include <linux/list.h>
+#include <net/mac80211.h>
+
 #include "hif_api_cmd.h"
 #include "hif_api_mib.h"
 
index 3e87d13..eedada7 100644 (file)
@@ -5,9 +5,15 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/crc32.h>
+
 #include "debug.h"
 #include "wfx.h"
 #include "sta.h"
+#include "main.h"
+#include "hif_tx.h"
 #include "hif_tx_mib.h"
 
 #define CREATE_TRACE_POINTS
index 1bb9054..1b8aec0 100644 (file)
@@ -6,6 +6,8 @@
  * Copyright (c) 2010, ST-Ericsson
  */
 #include <linux/firmware.h>
+#include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/bitfield.h>
 
 #include "fwio.h"
index 8b671c9..58c9bb0 100644 (file)
@@ -8,6 +8,10 @@
 #ifndef WFX_HIF_API_CMD_H
 #define WFX_HIF_API_CMD_H
 
+#include <linux/ieee80211.h>
+
+#include "hif_api_general.h"
+
 enum hif_requests_ids {
        HIF_REQ_ID_RESET                = 0x0a,
        HIF_REQ_ID_READ_MIB             = 0x05,
index 70b253d..2418894 100644 (file)
@@ -8,6 +8,15 @@
 #ifndef WFX_HIF_API_GENERAL_H
 #define WFX_HIF_API_GENERAL_H
 
+#ifdef __KERNEL__
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#else
+#include <net/ethernet.h>
+#include <stdint.h>
+#define __packed __attribute__((__packed__))
+#endif
+
 #define HIF_ID_IS_INDICATION      0x80
 #define HIF_COUNTER_MAX           7
 
index 17dc133..63b4372 100644 (file)
@@ -6,7 +6,11 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/etherdevice.h>
+
+#include "hif_tx.h"
 #include "wfx.h"
+#include "bh.h"
 #include "hwio.h"
 #include "debug.h"
 #include "sta.h"
index 6432ed8..1926cf1 100644 (file)
@@ -6,8 +6,13 @@
  * Copyright (c) 2010, ST-Ericsson
  * Copyright (C) 2010, ST-Ericsson SA
  */
+
+#include <linux/etherdevice.h>
+
 #include "wfx.h"
+#include "hif_tx.h"
 #include "hif_tx_mib.h"
+#include "hif_api_mib.h"
 
 int hif_set_output_power(struct wfx_vif *wvif, int val)
 {
index 089bb41..36fbc5b 100644 (file)
@@ -5,10 +5,13 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/kernel.h>
+#include <linux/delay.h>
 #include <linux/slab.h>
 
 #include "hwio.h"
 #include "wfx.h"
+#include "bus.h"
 #include "traces.h"
 
 /*
index 8bb9bcf..0b8e4f7 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef WFX_HWIO_H
 #define WFX_HWIO_H
 
+#include <linux/types.h>
+
 struct wfx_dev;
 
 int wfx_data_read(struct wfx_dev *wdev, void *buf, size_t buf_len);
index c93d07d..2ab82be 100644 (file)
@@ -5,10 +5,12 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/etherdevice.h>
 #include <net/mac80211.h>
 
 #include "key.h"
 #include "wfx.h"
+#include "hif_tx_mib.h"
 
 static int wfx_alloc_key(struct wfx_dev *wdev)
 {
index 4dc9fea..70a44d0 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef WFX_KEY_H
 #define WFX_KEY_H
 
+#include <net/mac80211.h>
+
 struct wfx_dev;
 struct wfx_vif;
 
index b9ea9a9..e7bc198 100644 (file)
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2004-2006 Jean-Baptiste Note <jbnote@gmail.com>, et al.
  */
+#include <linux/module.h>
 #include <linux/of.h>
 #include <linux/of_net.h>
+#include <linux/gpio/consumer.h>
 #include <linux/mmc/sdio_func.h>
 #include <linux/spi/spi.h>
+#include <linux/etherdevice.h>
 #include <linux/firmware.h>
 
+#include "main.h"
 #include "wfx.h"
 #include "fwio.h"
 #include "hwio.h"
 #include "bus.h"
+#include "bh.h"
 #include "sta.h"
 #include "key.h"
 #include "scan.h"
 #include "debug.h"
+#include "data_tx.h"
 #include "hif_tx_mib.h"
+#include "hif_api_cmd.h"
 
 #define WFX_PDS_MAX_SIZE 1500
 
index 086bcc0..a0db322 100644 (file)
 #ifndef WFX_MAIN_H
 #define WFX_MAIN_H
 
+#include <linux/device.h>
 #include <linux/gpio/consumer.h>
 
+#include "hif_api_general.h"
+
 struct wfx_dev;
 struct hwbus_ops;
 
index 3bddf28..31c37f6 100644 (file)
@@ -5,9 +5,13 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/sched.h>
 #include <net/mac80211.h>
 
+#include "queue.h"
 #include "wfx.h"
+#include "sta.h"
+#include "data_tx.h"
 #include "traces.h"
 
 void wfx_tx_lock(struct wfx_dev *wdev)
index e43aa9d..80ba194 100644 (file)
@@ -8,6 +8,9 @@
 #ifndef WFX_QUEUE_H
 #define WFX_QUEUE_H
 
+#include <linux/skbuff.h>
+#include <linux/atomic.h>
+
 struct wfx_dev;
 struct wfx_vif;
 
index e5b7eef..c7496a7 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef WFX_SCAN_H
 #define WFX_SCAN_H
 
+#include <net/mac80211.h>
+
 struct wfx_dev;
 struct wfx_vif;
 
index 5585f9e..196779a 100644 (file)
@@ -5,11 +5,17 @@
  * Copyright (c) 2017-2020, Silicon Laboratories, Inc.
  * Copyright (c) 2010, ST-Ericsson
  */
+#include <linux/etherdevice.h>
 #include <net/mac80211.h>
 
 #include "sta.h"
 #include "wfx.h"
+#include "fwio.h"
+#include "bh.h"
+#include "key.h"
 #include "scan.h"
+#include "debug.h"
+#include "hif_tx.h"
 #include "hif_tx_mib.h"
 
 #define HIF_MAX_ARP_IP_ADDRTABLE_ENTRIES 2
index a3fb9fc..d7b5df5 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef WFX_STA_H
 #define WFX_STA_H
 
+#include <net/mac80211.h>
+
 struct wfx_dev;
 struct wfx_vif;
 
index afe1074..e34c7a5 100644 (file)
 #define _WFX_TRACE_H
 
 #include <linux/tracepoint.h>
+#include <net/mac80211.h>
 
 #include "bus.h"
+#include "hif_api_cmd.h"
+#include "hif_api_mib.h"
 
 /* The hell below need some explanations. For each symbolic number, we need to
  * define it with TRACE_DEFINE_ENUM() and in a list for __print_symbolic.
index a185b82..9489868 100644 (file)
@@ -10,6 +10,9 @@
 #ifndef WFX_H
 #define WFX_H
 
+#include <linux/completion.h>
+#include <linux/workqueue.h>
+#include <linux/mutex.h>
 #include <linux/nospec.h>
 #include <net/mac80211.h>
 
index e4a9b9f..2a6165f 100644 (file)
@@ -1006,7 +1006,7 @@ static void tgt_agent_fetch_work(struct work_struct *work)
                        agent->state = AGENT_STATE_SUSPENDED;
 
                spin_unlock_bh(&agent->lock);
-       };
+       }
 }
 
 static struct sbp_target_agent *sbp_target_agent_register(
index 8ed93fd..ee3d520 100644 (file)
@@ -315,10 +315,8 @@ iblock_get_bio(struct se_cmd *cmd, sector_t lba, u32 sg_num, int op,
         * Only allocate as many vector entries as the bio code allows us to,
         * we'll loop later on until we have handled the whole request.
         */
-       if (sg_num > BIO_MAX_PAGES)
-               sg_num = BIO_MAX_PAGES;
-
-       bio = bio_alloc_bioset(GFP_NOIO, sg_num, &ib_dev->ibd_bio_set);
+       bio = bio_alloc_bioset(GFP_NOIO, bio_max_segs(sg_num),
+                               &ib_dev->ibd_bio_set);
        if (!bio) {
                pr_err("Unable to allocate memory for bio\n");
                return NULL;
@@ -638,8 +636,7 @@ iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio,
                return -ENODEV;
        }
 
-       bip = bio_integrity_alloc(bio, GFP_NOIO,
-                       min_t(unsigned int, cmd->t_prot_nents, BIO_MAX_PAGES));
+       bip = bio_integrity_alloc(bio, GFP_NOIO, bio_max_segs(cmd->t_prot_nents));
        if (IS_ERR(bip)) {
                pr_err("Unable to allocate bio_integrity_payload\n");
                return PTR_ERR(bip);
index 14db5e5..d4cc43a 100644 (file)
@@ -3739,6 +3739,7 @@ core_scsi3_pri_read_keys(struct se_cmd *cmd)
        spin_unlock(&dev->t10_pr.registration_lock);
 
        put_unaligned_be32(add_len, &buf[4]);
+       target_set_cmd_data_length(cmd, 8 + add_len);
 
        transport_kunmap_data_sg(cmd);
 
@@ -3757,7 +3758,7 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
        struct t10_pr_registration *pr_reg;
        unsigned char *buf;
        u64 pr_res_key;
-       u32 add_len = 16; /* Hardcoded to 16 when a reservation is held. */
+       u32 add_len = 0;
 
        if (cmd->data_length < 8) {
                pr_err("PRIN SA READ_RESERVATIONS SCSI Data Length: %u"
@@ -3775,8 +3776,9 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
        pr_reg = dev->dev_pr_res_holder;
        if (pr_reg) {
                /*
-                * Set the hardcoded Additional Length
+                * Set the Additional Length to 16 when a reservation is held
                 */
+               add_len = 16;
                put_unaligned_be32(add_len, &buf[4]);
 
                if (cmd->data_length < 22)
@@ -3812,6 +3814,8 @@ core_scsi3_pri_read_reservation(struct se_cmd *cmd)
                          (pr_reg->pr_res_type & 0x0f);
        }
 
+       target_set_cmd_data_length(cmd, 8 + add_len);
+
 err:
        spin_unlock(&dev->dev_reservation_lock);
        transport_kunmap_data_sg(cmd);
@@ -3830,7 +3834,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
        struct se_device *dev = cmd->se_dev;
        struct t10_reservation *pr_tmpl = &dev->t10_pr;
        unsigned char *buf;
-       u16 add_len = 8; /* Hardcoded to 8. */
+       u16 len = 8; /* Hardcoded to 8. */
 
        if (cmd->data_length < 6) {
                pr_err("PRIN SA REPORT_CAPABILITIES SCSI Data Length:"
@@ -3842,7 +3846,7 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
        if (!buf)
                return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 
-       put_unaligned_be16(add_len, &buf[0]);
+       put_unaligned_be16(len, &buf[0]);
        buf[2] |= 0x10; /* CRH: Compatible Reservation Hanlding bit. */
        buf[2] |= 0x08; /* SIP_C: Specify Initiator Ports Capable bit */
        buf[2] |= 0x04; /* ATP_C: All Target Ports Capable bit */
@@ -3871,6 +3875,8 @@ core_scsi3_pri_report_capabilities(struct se_cmd *cmd)
        buf[4] |= 0x02; /* PR_TYPE_WRITE_EXCLUSIVE */
        buf[5] |= 0x01; /* PR_TYPE_EXCLUSIVE_ACCESS_ALLREG */
 
+       target_set_cmd_data_length(cmd, len);
+
        transport_kunmap_data_sg(cmd);
 
        return 0;
@@ -4031,6 +4037,7 @@ core_scsi3_pri_read_full_status(struct se_cmd *cmd)
         * Set ADDITIONAL_LENGTH
         */
        put_unaligned_be32(add_len, &buf[4]);
+       target_set_cmd_data_length(cmd, 8 + add_len);
 
        transport_kunmap_data_sg(cmd);
 
index 33770e5..9ee797b 100644 (file)
@@ -881,8 +881,7 @@ pscsi_map_sg(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
 
                        if (!bio) {
 new_bio:
-                               nr_vecs = min_t(int, BIO_MAX_PAGES, nr_pages);
-                               nr_pages -= nr_vecs;
+                               nr_vecs = bio_max_segs(nr_pages);
                                /*
                                 * Calls bio_kmalloc() and sets bio->bi_end_io()
                                 */
@@ -939,6 +938,14 @@ new_bio:
 
        return 0;
 fail:
+       if (bio)
+               bio_put(bio);
+       while (req->bio) {
+               bio = req->bio;
+               req->bio = bio->bi_next;
+               bio_put(bio);
+       }
+       req->biotail = NULL;
        return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
 }
 
index 93ea17c..5ecb9f1 100644 (file)
@@ -879,11 +879,9 @@ void target_complete_cmd(struct se_cmd *cmd, u8 scsi_status)
 }
 EXPORT_SYMBOL(target_complete_cmd);
 
-void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+void target_set_cmd_data_length(struct se_cmd *cmd, int length)
 {
-       if ((scsi_status == SAM_STAT_GOOD ||
-            cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) &&
-           length < cmd->data_length) {
+       if (length < cmd->data_length) {
                if (cmd->se_cmd_flags & SCF_UNDERFLOW_BIT) {
                        cmd->residual_count += cmd->data_length - length;
                } else {
@@ -893,6 +891,15 @@ void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int len
 
                cmd->data_length = length;
        }
+}
+EXPORT_SYMBOL(target_set_cmd_data_length);
+
+void target_complete_cmd_with_length(struct se_cmd *cmd, u8 scsi_status, int length)
+{
+       if (scsi_status == SAM_STAT_GOOD ||
+           cmd->se_cmd_flags & SCF_TREAT_READ_AS_NORMAL) {
+               target_set_cmd_data_length(cmd, length);
+       }
 
        target_complete_cmd(cmd, scsi_status);
 }
index a5991df..bf73cd5 100644 (file)
@@ -1566,6 +1566,88 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name)
        return &udev->se_dev;
 }
 
+static void tcmu_dev_call_rcu(struct rcu_head *p)
+{
+       struct se_device *dev = container_of(p, struct se_device, rcu_head);
+       struct tcmu_dev *udev = TCMU_DEV(dev);
+
+       kfree(udev->uio_info.name);
+       kfree(udev->name);
+       kfree(udev);
+}
+
+static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
+{
+       if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
+               kmem_cache_free(tcmu_cmd_cache, cmd);
+               return 0;
+       }
+       return -EINVAL;
+}
+
+static void tcmu_blocks_release(struct radix_tree_root *blocks,
+                               int start, int end)
+{
+       int i;
+       struct page *page;
+
+       for (i = start; i < end; i++) {
+               page = radix_tree_delete(blocks, i);
+               if (page) {
+                       __free_page(page);
+                       atomic_dec(&global_db_count);
+               }
+       }
+}
+
+static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
+{
+       struct tcmu_tmr *tmr, *tmp;
+
+       list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
+               list_del_init(&tmr->queue_entry);
+               kfree(tmr);
+       }
+}
+
+static void tcmu_dev_kref_release(struct kref *kref)
+{
+       struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
+       struct se_device *dev = &udev->se_dev;
+       struct tcmu_cmd *cmd;
+       bool all_expired = true;
+       int i;
+
+       vfree(udev->mb_addr);
+       udev->mb_addr = NULL;
+
+       spin_lock_bh(&timed_out_udevs_lock);
+       if (!list_empty(&udev->timedout_entry))
+               list_del(&udev->timedout_entry);
+       spin_unlock_bh(&timed_out_udevs_lock);
+
+       /* Upper layer should drain all requests before calling this */
+       mutex_lock(&udev->cmdr_lock);
+       idr_for_each_entry(&udev->commands, cmd, i) {
+               if (tcmu_check_and_free_pending_cmd(cmd) != 0)
+                       all_expired = false;
+       }
+       /* There can be left over TMR cmds. Remove them. */
+       tcmu_remove_all_queued_tmr(udev);
+       if (!list_empty(&udev->qfull_queue))
+               all_expired = false;
+       idr_destroy(&udev->commands);
+       WARN_ON(!all_expired);
+
+       tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
+       bitmap_free(udev->data_bitmap);
+       mutex_unlock(&udev->cmdr_lock);
+
+       pr_debug("dev_kref_release\n");
+
+       call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
+}
+
 static void run_qfull_queue(struct tcmu_dev *udev, bool fail)
 {
        struct tcmu_cmd *tcmu_cmd, *tmp_cmd;
@@ -1678,6 +1760,25 @@ static struct page *tcmu_try_get_block_page(struct tcmu_dev *udev, uint32_t dbi)
        return page;
 }
 
+static void tcmu_vma_open(struct vm_area_struct *vma)
+{
+       struct tcmu_dev *udev = vma->vm_private_data;
+
+       pr_debug("vma_open\n");
+
+       kref_get(&udev->kref);
+}
+
+static void tcmu_vma_close(struct vm_area_struct *vma)
+{
+       struct tcmu_dev *udev = vma->vm_private_data;
+
+       pr_debug("vma_close\n");
+
+       /* release ref from tcmu_vma_open */
+       kref_put(&udev->kref, tcmu_dev_kref_release);
+}
+
 static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 {
        struct tcmu_dev *udev = vmf->vma->vm_private_data;
@@ -1716,6 +1817,8 @@ static vm_fault_t tcmu_vma_fault(struct vm_fault *vmf)
 }
 
 static const struct vm_operations_struct tcmu_vm_ops = {
+       .open = tcmu_vma_open,
+       .close = tcmu_vma_close,
        .fault = tcmu_vma_fault,
 };
 
@@ -1732,6 +1835,8 @@ static int tcmu_mmap(struct uio_info *info, struct vm_area_struct *vma)
        if (vma_pages(vma) != (udev->ring_size >> PAGE_SHIFT))
                return -EINVAL;
 
+       tcmu_vma_open(vma);
+
        return 0;
 }
 
@@ -1744,93 +1849,12 @@ static int tcmu_open(struct uio_info *info, struct inode *inode)
                return -EBUSY;
 
        udev->inode = inode;
-       kref_get(&udev->kref);
 
        pr_debug("open\n");
 
        return 0;
 }
 
-static void tcmu_dev_call_rcu(struct rcu_head *p)
-{
-       struct se_device *dev = container_of(p, struct se_device, rcu_head);
-       struct tcmu_dev *udev = TCMU_DEV(dev);
-
-       kfree(udev->uio_info.name);
-       kfree(udev->name);
-       kfree(udev);
-}
-
-static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
-{
-       if (test_bit(TCMU_CMD_BIT_EXPIRED, &cmd->flags)) {
-               kmem_cache_free(tcmu_cmd_cache, cmd);
-               return 0;
-       }
-       return -EINVAL;
-}
-
-static void tcmu_blocks_release(struct radix_tree_root *blocks,
-                               int start, int end)
-{
-       int i;
-       struct page *page;
-
-       for (i = start; i < end; i++) {
-               page = radix_tree_delete(blocks, i);
-               if (page) {
-                       __free_page(page);
-                       atomic_dec(&global_db_count);
-               }
-       }
-}
-
-static void tcmu_remove_all_queued_tmr(struct tcmu_dev *udev)
-{
-       struct tcmu_tmr *tmr, *tmp;
-
-       list_for_each_entry_safe(tmr, tmp, &udev->tmr_queue, queue_entry) {
-               list_del_init(&tmr->queue_entry);
-               kfree(tmr);
-       }
-}
-
-static void tcmu_dev_kref_release(struct kref *kref)
-{
-       struct tcmu_dev *udev = container_of(kref, struct tcmu_dev, kref);
-       struct se_device *dev = &udev->se_dev;
-       struct tcmu_cmd *cmd;
-       bool all_expired = true;
-       int i;
-
-       vfree(udev->mb_addr);
-       udev->mb_addr = NULL;
-
-       spin_lock_bh(&timed_out_udevs_lock);
-       if (!list_empty(&udev->timedout_entry))
-               list_del(&udev->timedout_entry);
-       spin_unlock_bh(&timed_out_udevs_lock);
-
-       /* Upper layer should drain all requests before calling this */
-       mutex_lock(&udev->cmdr_lock);
-       idr_for_each_entry(&udev->commands, cmd, i) {
-               if (tcmu_check_and_free_pending_cmd(cmd) != 0)
-                       all_expired = false;
-       }
-       /* There can be left over TMR cmds. Remove them. */
-       tcmu_remove_all_queued_tmr(udev);
-       if (!list_empty(&udev->qfull_queue))
-               all_expired = false;
-       idr_destroy(&udev->commands);
-       WARN_ON(!all_expired);
-
-       tcmu_blocks_release(&udev->data_blocks, 0, udev->dbi_max + 1);
-       bitmap_free(udev->data_bitmap);
-       mutex_unlock(&udev->cmdr_lock);
-
-       call_rcu(&dev->rcu_head, tcmu_dev_call_rcu);
-}
-
 static int tcmu_release(struct uio_info *info, struct inode *inode)
 {
        struct tcmu_dev *udev = container_of(info, struct tcmu_dev, uio_info);
@@ -1838,8 +1862,7 @@ static int tcmu_release(struct uio_info *info, struct inode *inode)
        clear_bit(TCMU_DEV_BIT_OPEN, &udev->flags);
 
        pr_debug("close\n");
-       /* release ref from open */
-       kref_put(&udev->kref, tcmu_dev_kref_release);
+
        return 0;
 }
 
index cf4718c..319a1e7 100644 (file)
@@ -747,7 +747,6 @@ module_platform_driver(optee_driver);
 
 MODULE_AUTHOR("Linaro");
 MODULE_DESCRIPTION("OP-TEE driver");
-MODULE_SUPPORTED_DEVICE("");
 MODULE_VERSION("1.0");
 MODULE_LICENSE("GPL v2");
 MODULE_ALIAS("platform:optee");
index 345917a..1c4aac8 100644 (file)
@@ -674,6 +674,9 @@ void thermal_cooling_device_stats_update(struct thermal_cooling_device *cdev,
 {
        struct cooling_dev_stats *stats = cdev->stats;
 
+       if (!stats)
+               return;
+
        spin_lock(&stats->lock);
 
        if (stats->state == new_state)
index b63fecc..2a95b4c 100644 (file)
@@ -768,12 +768,6 @@ static int tb_init_port(struct tb_port *port)
 
        tb_dump_port(port->sw->tb, &port->config);
 
-       /* Control port does not need HopID allocation */
-       if (port->port) {
-               ida_init(&port->in_hopids);
-               ida_init(&port->out_hopids);
-       }
-
        INIT_LIST_HEAD(&port->list);
        return 0;
 
@@ -1842,10 +1836,8 @@ static void tb_switch_release(struct device *dev)
        dma_port_free(sw->dma_port);
 
        tb_switch_for_each_port(sw, port) {
-               if (!port->disabled) {
-                       ida_destroy(&port->in_hopids);
-                       ida_destroy(&port->out_hopids);
-               }
+               ida_destroy(&port->in_hopids);
+               ida_destroy(&port->out_hopids);
        }
 
        kfree(sw->uuid);
@@ -2025,6 +2017,12 @@ struct tb_switch *tb_switch_alloc(struct tb *tb, struct device *parent,
                /* minimum setup for tb_find_cap and tb_drom_read to work */
                sw->ports[i].sw = sw;
                sw->ports[i].port = i;
+
+               /* Control port does not need HopID allocation */
+               if (i) {
+                       ida_init(&sw->ports[i].in_hopids);
+                       ida_init(&sw->ports[i].out_hopids);
+               }
        }
 
        ret = tb_switch_find_vse_cap(sw, TB_VSE_CAP_PLUG_EVENTS);
index 1f000ac..c348b1f 100644 (file)
@@ -138,6 +138,10 @@ static void tb_discover_tunnels(struct tb_switch *sw)
                                parent->boot = true;
                                parent = tb_switch_parent(parent);
                        }
+               } else if (tb_tunnel_is_dp(tunnel)) {
+                       /* Keep the domain from powering down */
+                       pm_runtime_get_sync(&tunnel->src_port->sw->dev);
+                       pm_runtime_get_sync(&tunnel->dst_port->sw->dev);
                }
 
                list_add_tail(&tunnel->list, &tcm->tunnel_list);
index b3ccae9..730de6b 100644 (file)
@@ -9,8 +9,6 @@ obj-$(CONFIG_AUDIT)             += tty_audit.o
 obj-$(CONFIG_MAGIC_SYSRQ)      += sysrq.o
 obj-$(CONFIG_N_HDLC)           += n_hdlc.o
 obj-$(CONFIG_N_GSM)            += n_gsm.o
-obj-$(CONFIG_TRACE_ROUTER)     += n_tracerouter.o
-obj-$(CONFIG_TRACE_SINK)       += n_tracesink.o
 obj-$(CONFIG_R3964)            += n_r3964.o
 
 obj-y                          += vt/
index c908489..9afa1dc 100644 (file)
@@ -317,7 +317,6 @@ static void hvcs_hangup(struct tty_struct * tty);
 
 static int hvcs_probe(struct vio_dev *dev,
                const struct vio_device_id *id);
-static int hvcs_remove(struct vio_dev *dev);
 static int __init hvcs_module_init(void);
 static void __exit hvcs_module_exit(void);
 static int hvcs_initialize(void);
@@ -819,7 +818,7 @@ static int hvcs_probe(
        return 0;
 }
 
-static int hvcs_remove(struct vio_dev *dev)
+static void hvcs_remove(struct vio_dev *dev)
 {
        struct hvcs_struct *hvcsd = dev_get_drvdata(&dev->dev);
        unsigned long flags;
@@ -849,7 +848,6 @@ static int hvcs_remove(struct vio_dev *dev)
 
        printk(KERN_INFO "HVCS: vty-server@%X removed from the"
                        " vio bus.\n", dev->unit_address);
-       return 0;
 };
 
 static struct vio_driver hvcs_vio_driver = {
diff --git a/drivers/tty/n_tracerouter.c b/drivers/tty/n_tracerouter.c
deleted file mode 100644 (file)
index 3490ed5..0000000
+++ /dev/null
@@ -1,235 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  n_tracerouter.c - Trace data router through tty space
- *
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * This trace router uses the Linux line discipline framework to route
- * trace data coming from a HW Modem to a PTI (Parallel Trace Module) port.
- * The solution is not specific to a HW modem and this line disciple can
- * be used to route any stream of data in kernel space.
- * This is part of a solution for the P1149.7, compact JTAG, standard.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <linux/tty.h>
-#include <linux/tty_ldisc.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/mutex.h>
-#include <linux/slab.h>
-#include <linux/bug.h>
-#include "n_tracesink.h"
-
-/*
- * Other ldisc drivers use 65536 which basically means,
- * 'I can always accept 64k' and flow control is off.
- * This number is deemed appropriate for this driver.
- */
-#define RECEIVE_ROOM   65536
-#define DRIVERNAME     "n_tracerouter"
-
-/*
- * struct to hold private configuration data for this ldisc.
- * opencalled is used to hold if this ldisc has been opened.
- * kref_tty holds the tty reference the ldisc sits on top of.
- */
-struct tracerouter_data {
-       u8 opencalled;
-       struct tty_struct *kref_tty;
-};
-static struct tracerouter_data *tr_data;
-
-/* lock for when tty reference is being used */
-static DEFINE_MUTEX(routelock);
-
-/**
- * n_tracerouter_open() - Called when a tty is opened by a SW entity.
- * @tty: terminal device to the ldisc.
- *
- * Return:
- *      0 for success.
- *
- * Caveats: This should only be opened one time per SW entity.
- */
-static int n_tracerouter_open(struct tty_struct *tty)
-{
-       int retval = -EEXIST;
-
-       mutex_lock(&routelock);
-       if (tr_data->opencalled == 0) {
-
-               tr_data->kref_tty = tty_kref_get(tty);
-               if (tr_data->kref_tty == NULL) {
-                       retval = -EFAULT;
-               } else {
-                       tr_data->opencalled = 1;
-                       tty->disc_data      = tr_data;
-                       tty->receive_room   = RECEIVE_ROOM;
-                       tty_driver_flush_buffer(tty);
-                       retval = 0;
-               }
-       }
-       mutex_unlock(&routelock);
-       return retval;
-}
-
-/**
- * n_tracerouter_close() - close connection
- * @tty: terminal device to the ldisc.
- *
- * Called when a software entity wants to close a connection.
- */
-static void n_tracerouter_close(struct tty_struct *tty)
-{
-       struct tracerouter_data *tptr = tty->disc_data;
-
-       mutex_lock(&routelock);
-       WARN_ON(tptr->kref_tty != tr_data->kref_tty);
-       tty_driver_flush_buffer(tty);
-       tty_kref_put(tr_data->kref_tty);
-       tr_data->kref_tty = NULL;
-       tr_data->opencalled = 0;
-       tty->disc_data = NULL;
-       mutex_unlock(&routelock);
-}
-
-/**
- * n_tracerouter_read() - read request from user space
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * function that allows read() functionality in userspace. By default if this
- * is not implemented it returns -EIO. This module is functioning like a
- * router via n_tracerouter_receivebuf(), and there is no real requirement
- * to implement this function. However, an error return value other than
- * -EIO should be used just to show that there was an intent not to have
- * this function implemented.  Return value based on read() man pages.
- *
- * Return:
- *      -EINVAL
- */
-static ssize_t n_tracerouter_read(struct tty_struct *tty, struct file *file,
-                                 unsigned char *buf, size_t nr,
-                                 void **cookie, unsigned long offset)
-{
-       return -EINVAL;
-}
-
-/**
- * n_tracerouter_write() - Function that allows write() in userspace.
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * By default if this is not implemented, it returns -EIO.
- * This should not be implemented, ever, because
- * 1. this driver is functioning like a router via
- *    n_tracerouter_receivebuf()
- * 2. No writes to HW will ever go through this line discpline driver.
- * However, an error return value other than -EIO should be used
- * just to show that there was an intent not to have this function
- * implemented.  Return value based on write() man pages.
- *
- * Return:
- *     -EINVAL
- */
-static ssize_t n_tracerouter_write(struct tty_struct *tty, struct file *file,
-                                  const unsigned char *buf, size_t nr) {
-       return -EINVAL;
-}
-
-/**
- * n_tracerouter_receivebuf() - Routing function for driver.
- * @tty: terminal device passed into the ldisc.  It's assumed
- *       tty will never be NULL.
- * @cp:  buffer, block of characters to be eventually read by
- *       someone, somewhere (user read() call or some kernel function).
- * @fp:  flag buffer.
- * @count: number of characters (aka, bytes) in cp.
- *
- * This function takes the input buffer, cp, and passes it to
- * an external API function for processing.
- */
-static void n_tracerouter_receivebuf(struct tty_struct *tty,
-                                       const unsigned char *cp,
-                                       char *fp, int count)
-{
-       mutex_lock(&routelock);
-       n_tracesink_datadrain((u8 *) cp, count);
-       mutex_unlock(&routelock);
-}
-
-/*
- * Flush buffer is not impelemented as the ldisc has no internal buffering
- * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
- */
-
-static struct tty_ldisc_ops tty_ptirouter_ldisc = {
-       .owner          = THIS_MODULE,
-       .magic          = TTY_LDISC_MAGIC,
-       .name           = DRIVERNAME,
-       .open           = n_tracerouter_open,
-       .close          = n_tracerouter_close,
-       .read           = n_tracerouter_read,
-       .write          = n_tracerouter_write,
-       .receive_buf    = n_tracerouter_receivebuf
-};
-
-/**
- * n_tracerouter_init -        module initialisation
- *
- * Registers this module as a line discipline driver.
- *
- * Return:
- *     0 for success, any other value error.
- */
-static int __init n_tracerouter_init(void)
-{
-       int retval;
-
-       tr_data = kzalloc(sizeof(struct tracerouter_data), GFP_KERNEL);
-       if (tr_data == NULL)
-               return -ENOMEM;
-
-
-       /* Note N_TRACEROUTER is defined in linux/tty.h */
-       retval = tty_register_ldisc(N_TRACEROUTER, &tty_ptirouter_ldisc);
-       if (retval < 0) {
-               pr_err("%s: Registration failed: %d\n", __func__, retval);
-               kfree(tr_data);
-       }
-       return retval;
-}
-
-/**
- * n_tracerouter_exit -        module unload
- *
- * Removes this module as a line discipline driver.
- */
-static void __exit n_tracerouter_exit(void)
-{
-       int retval = tty_unregister_ldisc(N_TRACEROUTER);
-
-       if (retval < 0)
-               pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
-       else
-               kfree(tr_data);
-}
-
-module_init(n_tracerouter_init);
-module_exit(n_tracerouter_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jay Freyensee");
-MODULE_ALIAS_LDISC(N_TRACEROUTER);
-MODULE_DESCRIPTION("Trace router ldisc driver");
diff --git a/drivers/tty/n_tracesink.c b/drivers/tty/n_tracesink.c
deleted file mode 100644 (file)
index 1d99310..0000000
+++ /dev/null
@@ -1,230 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/*
- *  n_tracesink.c - Trace data router and sink path through tty space.
- *
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The trace sink uses the Linux line discipline framework to receive
- * trace data coming from the PTI source line discipline driver
- * to a user-desired tty port, like USB.
- * This is to provide a way to extract modem trace data on
- * devices that do not have a PTI HW module, or just need modem
- * trace data to come out of a different HW output port.
- * This is part of a solution for the P1149.7, compact JTAG, standard.
- */
-
-#include <linux/init.h>
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/types.h>
-#include <linux/ioctl.h>
-#include <linux/tty.h>
-#include <linux/tty_ldisc.h>
-#include <linux/errno.h>
-#include <linux/string.h>
-#include <linux/bug.h>
-#include "n_tracesink.h"
-
-/*
- * Other ldisc drivers use 65536 which basically means,
- * 'I can always accept 64k' and flow control is off.
- * This number is deemed appropriate for this driver.
- */
-#define RECEIVE_ROOM   65536
-#define DRIVERNAME     "n_tracesink"
-
-/*
- * there is a quirk with this ldisc is he can write data
- * to a tty from anyone calling his kernel API, which
- * meets customer requirements in the drivers/misc/pti.c
- * project.  So he needs to know when he can and cannot write when
- * the API is called. In theory, the API can be called
- * after an init() but before a successful open() which
- * would crash the system if tty is not checked.
- */
-static struct tty_struct *this_tty;
-static DEFINE_MUTEX(writelock);
-
-/**
- * n_tracesink_open() - Called when a tty is opened by a SW entity.
- * @tty: terminal device to the ldisc.
- *
- * Return:
- *      0 for success,
- *      -EFAULT = couldn't get a tty kref n_tracesink will sit
- *       on top of
- *      -EEXIST = open() called successfully once and it cannot
- *      be called again.
- *
- * Caveats: open() should only be successful the first time a
- * SW entity calls it.
- */
-static int n_tracesink_open(struct tty_struct *tty)
-{
-       int retval = -EEXIST;
-
-       mutex_lock(&writelock);
-       if (this_tty == NULL) {
-               this_tty = tty_kref_get(tty);
-               if (this_tty == NULL) {
-                       retval = -EFAULT;
-               } else {
-                       tty->disc_data = this_tty;
-                       tty_driver_flush_buffer(tty);
-                       retval = 0;
-               }
-       }
-       mutex_unlock(&writelock);
-
-       return retval;
-}
-
-/**
- * n_tracesink_close() - close connection
- * @tty: terminal device to the ldisc.
- *
- * Called when a software entity wants to close a connection.
- */
-static void n_tracesink_close(struct tty_struct *tty)
-{
-       mutex_lock(&writelock);
-       tty_driver_flush_buffer(tty);
-       tty_kref_put(this_tty);
-       this_tty = NULL;
-       tty->disc_data = NULL;
-       mutex_unlock(&writelock);
-}
-
-/**
- * n_tracesink_read() - read request from user space
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * function that allows read() functionality in userspace. By default if this
- * is not implemented it returns -EIO. This module is functioning like a
- * router via n_tracesink_receivebuf(), and there is no real requirement
- * to implement this function. However, an error return value other than
- * -EIO should be used just to show that there was an intent not to have
- * this function implemented.  Return value based on read() man pages.
- *
- * Return:
- *      -EINVAL
- */
-static ssize_t n_tracesink_read(struct tty_struct *tty, struct file *file,
-                               unsigned char *buf, size_t nr,
-                               void **cookie, unsigned long offset)
-{
-       return -EINVAL;
-}
-
-/**
- * n_tracesink_write() - Function that allows write() in userspace.
- * @tty:  terminal device passed into the ldisc.
- * @file: pointer to open file object.
- * @buf:  pointer to the data buffer that gets eventually returned.
- * @nr:   number of bytes of the data buffer that is returned.
- *
- * By default if this is not implemented, it returns -EIO.
- * This should not be implemented, ever, because
- * 1. this driver is functioning like a router via
- *    n_tracesink_receivebuf()
- * 2. No writes to HW will ever go through this line discpline driver.
- * However, an error return value other than -EIO should be used
- * just to show that there was an intent not to have this function
- * implemented.  Return value based on write() man pages.
- *
- * Return:
- *     -EINVAL
- */
-static ssize_t n_tracesink_write(struct tty_struct *tty, struct file *file,
-                                const unsigned char *buf, size_t nr) {
-       return -EINVAL;
-}
-
-/**
- * n_tracesink_datadrain() - Kernel API function used to route
- *                          trace debugging data to user-defined
- *                          port like USB.
- *
- * @buf:   Trace debuging data buffer to write to tty target
- *         port. Null value will return with no write occurring.
- * @count: Size of buf. Value of 0 or a negative number will
- *         return with no write occuring.
- *
- * Caveat: If this line discipline does not set the tty it sits
- * on top of via an open() call, this API function will not
- * call the tty's write() call because it will have no pointer
- * to call the write().
- */
-void n_tracesink_datadrain(u8 *buf, int count)
-{
-       mutex_lock(&writelock);
-
-       if ((buf != NULL) && (count > 0) && (this_tty != NULL))
-               this_tty->ops->write(this_tty, buf, count);
-
-       mutex_unlock(&writelock);
-}
-EXPORT_SYMBOL_GPL(n_tracesink_datadrain);
-
-/*
- * Flush buffer is not impelemented as the ldisc has no internal buffering
- * so the tty_driver_flush_buffer() is sufficient for this driver's needs.
- */
-
-/*
- * tty_ldisc function operations for this driver.
- */
-static struct tty_ldisc_ops tty_n_tracesink = {
-       .owner          = THIS_MODULE,
-       .magic          = TTY_LDISC_MAGIC,
-       .name           = DRIVERNAME,
-       .open           = n_tracesink_open,
-       .close          = n_tracesink_close,
-       .read           = n_tracesink_read,
-       .write          = n_tracesink_write
-};
-
-/**
- * n_tracesink_init-   module initialisation
- *
- * Registers this module as a line discipline driver.
- *
- * Return:
- *     0 for success, any other value error.
- */
-static int __init n_tracesink_init(void)
-{
-       /* Note N_TRACESINK is defined in linux/tty.h */
-       int retval = tty_register_ldisc(N_TRACESINK, &tty_n_tracesink);
-
-       if (retval < 0)
-               pr_err("%s: Registration failed: %d\n", __func__, retval);
-
-       return retval;
-}
-
-/**
- * n_tracesink_exit -  module unload
- *
- * Removes this module as a line discipline driver.
- */
-static void __exit n_tracesink_exit(void)
-{
-       int retval = tty_unregister_ldisc(N_TRACESINK);
-
-       if (retval < 0)
-               pr_err("%s: Unregistration failed: %d\n", __func__,  retval);
-}
-
-module_init(n_tracesink_init);
-module_exit(n_tracesink_exit);
-
-MODULE_LICENSE("GPL");
-MODULE_AUTHOR("Jay Freyensee");
-MODULE_ALIAS_LDISC(N_TRACESINK);
-MODULE_DESCRIPTION("Trace sink ldisc driver");
diff --git a/drivers/tty/n_tracesink.h b/drivers/tty/n_tracesink.h
deleted file mode 100644 (file)
index 7031d51..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- *  n_tracesink.h - Kernel driver API to route trace data in kernel space.
- *
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The PTI (Parallel Trace Interface) driver directs trace data routed from
- * various parts in the system out through the Intel Penwell PTI port and
- * out of the mobile device for analysis with a debugging tool
- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
- * compact JTAG, standard.
- *
- * This header file is used by n_tracerouter to be able to send the
- * data of it's tty port to the tty port this module sits.  This
- * mechanism can also be used independent of the PTI module.
- *
- */
-
-#ifndef N_TRACESINK_H_
-#define N_TRACESINK_H_
-
-void n_tracesink_datadrain(u8 *buf, int count);
-
-#endif
index 8b2797b..5e23745 100644 (file)
@@ -66,8 +66,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
        wake_up_interruptible(&tty->link->read_wait);
        wake_up_interruptible(&tty->link->write_wait);
        if (tty->driver->subtype == PTY_TYPE_MASTER) {
-               struct file *f;
-
+               set_bit(TTY_OTHER_CLOSED, &tty->flags);
 #ifdef CONFIG_UNIX98_PTYS
                if (tty->driver == ptm_driver) {
                        mutex_lock(&devpts_mutex);
@@ -76,17 +75,7 @@ static void pty_close(struct tty_struct *tty, struct file *filp)
                        mutex_unlock(&devpts_mutex);
                }
 #endif
-
-               /*
-                * This hack is required because a program can open a
-                * pty and redirect a console to it, but if the pty is
-                * closed and the console is not released, then the
-                * slave side will never close.  So release the
-                * redirect when the master closes.
-                */
-               f = tty_release_redirect(tty->link);
-               if (f)
-                       fput(f);
+               tty_vhangup(tty->link);
        }
 }
 
index 9a87275..94af7a5 100644 (file)
@@ -1639,8 +1639,6 @@ module_exit(icom_exit);
 
 MODULE_AUTHOR("Michael Anderson <mjanders@us.ibm.com>");
 MODULE_DESCRIPTION("IBM iSeries Serial IOA driver");
-MODULE_SUPPORTED_DEVICE
-    ("IBM iSeries 2745, 2771, 2772, 2742, 2793 and 2805 Communications adapters");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE("icom_call_setup.bin");
 MODULE_FIRMWARE("icom_res_dce.bin");
index cd30da0..0ea799b 100644 (file)
@@ -19,7 +19,6 @@
 MODULE_AUTHOR("Digi International, https://www.digi.com");
 MODULE_DESCRIPTION("Driver for the Digi International Neo and Classic PCI based product line");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("jsm");
 
 #define JSM_DRIVER_NAME "jsm"
 #define NR_PORTS       32
index 9795b2e..1b61d26 100644 (file)
@@ -1056,9 +1056,9 @@ static int max310x_startup(struct uart_port *port)
        max310x_port_update(port, MAX310X_MODE1_REG,
                            MAX310X_MODE1_TRNSCVCTRL_BIT, 0);
 
-       /* Reset FIFOs */
-       max310x_port_write(port, MAX310X_MODE2_REG,
-                          MAX310X_MODE2_FIFORST_BIT);
+       /* Configure MODE2 register & Reset FIFOs*/
+       val = MAX310X_MODE2_RXEMPTINV_BIT | MAX310X_MODE2_FIFORST_BIT;
+       max310x_port_write(port, MAX310X_MODE2_REG, val);
        max310x_port_update(port, MAX310X_MODE2_REG,
                            MAX310X_MODE2_FIFORST_BIT, 0);
 
@@ -1086,27 +1086,8 @@ static int max310x_startup(struct uart_port *port)
        /* Clear IRQ status register */
        max310x_port_read(port, MAX310X_IRQSTS_REG);
 
-       /*
-        * Let's ask for an interrupt after a timeout equivalent to
-        * the receiving time of 4 characters after the last character
-        * has been received.
-        */
-       max310x_port_write(port, MAX310X_RXTO_REG, 4);
-
-       /*
-        * Make sure we also get RX interrupts when the RX FIFO is
-        * filling up quickly, so get an interrupt when half of the RX
-        * FIFO has been filled in.
-        */
-       max310x_port_write(port, MAX310X_FIFOTRIGLVL_REG,
-                          MAX310X_FIFOTRIGLVL_RX(MAX310X_FIFO_SIZE / 2));
-
-       /* Enable RX timeout interrupt in LSR */
-       max310x_port_write(port, MAX310X_LSR_IRQEN_REG,
-                          MAX310X_LSR_RXTO_BIT);
-
-       /* Enable LSR, RX FIFO trigger, CTS change interrupts */
-       val = MAX310X_IRQ_LSR_BIT  | MAX310X_IRQ_RXFIFO_BIT | MAX310X_IRQ_TXEMPTY_BIT;
+       /* Enable RX, TX, CTS change interrupts */
+       val = MAX310X_IRQ_RXEMPTY_BIT | MAX310X_IRQ_TXEMPTY_BIT;
        max310x_port_write(port, MAX310X_IRQEN_REG, val | MAX310X_IRQ_CTS_BIT);
 
        return 0;
index 291649f..0d85b55 100644 (file)
@@ -1177,12 +1177,6 @@ static inline void qcom_geni_serial_enable_early_read(struct geni_se *se,
                                                      struct console *con) { }
 #endif
 
-static int qcom_geni_serial_earlycon_exit(struct console *con)
-{
-       geni_remove_earlycon_icc_vote();
-       return 0;
-}
-
 static struct qcom_geni_private_data earlycon_private_data;
 
 static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
@@ -1233,7 +1227,6 @@ static int __init qcom_geni_serial_earlycon_setup(struct earlycon_device *dev,
        writel(stop_bit_len, uport->membase + SE_UART_TX_STOP_BIT_LEN);
 
        dev->con->write = qcom_geni_serial_earlycon_write;
-       dev->con->exit = qcom_geni_serial_earlycon_exit;
        dev->con->setup = NULL;
        qcom_geni_serial_enable_early_read(&se, dev->con);
 
index 74733ec..391bada 100644 (file)
@@ -544,9 +544,7 @@ EXPORT_SYMBOL_GPL(tty_wakeup);
  *     @tty: tty device
  *
  *     This is available to the pty code so if the master closes, if the
- *     slave is a redirect it can release the redirect.  It returns the
- *     filp for the redirect, which must be fput when the operations on
- *     the tty are completed.
+ *     slave is a redirect it can release the redirect.
  */
 struct file *tty_release_redirect(struct tty_struct *tty)
 {
@@ -561,6 +559,7 @@ struct file *tty_release_redirect(struct tty_struct *tty)
 
        return f;
 }
+EXPORT_SYMBOL_GPL(tty_release_redirect);
 
 /**
  *     __tty_hangup            -       actual handler for hangup events
index b8e44d1..c7d681f 100644 (file)
@@ -92,7 +92,7 @@ static int probe(struct pci_dev *pdev,
        gdev->info.version = DRIVER_VERSION;
        gdev->info.release = release;
        gdev->pdev = pdev;
-       if (pdev->irq) {
+       if (pdev->irq && (pdev->irq != IRQ_NOTCONNECTED)) {
                gdev->info.irq = pdev->irq;
                gdev->info.irq_flags = IRQF_SHARED;
                gdev->info.handler = irqhandler;
index f9170d1..5f0513c 100644 (file)
@@ -2197,7 +2197,10 @@ static int cdnsp_queue_isoc_tx(struct cdnsp_device *pdev,
         * inverted in the first TDs isoc TRB.
         */
        field = TRB_TYPE(TRB_ISOC) | TRB_TLBPC(last_burst_pkt) |
-               start_cycle ? 0 : 1 | TRB_SIA | TRB_TBC(burst_count);
+               TRB_SIA | TRB_TBC(burst_count);
+
+       if (!start_cycle)
+               field |= TRB_CYCLE;
 
        /* Fill the rest of the TRB fields, and remaining normal TRBs. */
        for (i = 0; i < trbs_per_td; i++) {
index 37f824b..3fda1ec 100644 (file)
@@ -147,17 +147,29 @@ static inline int acm_set_control(struct acm *acm, int control)
 #define acm_send_break(acm, ms) \
        acm_ctrl_msg(acm, USB_CDC_REQ_SEND_BREAK, ms, NULL, 0)
 
-static void acm_kill_urbs(struct acm *acm)
+static void acm_poison_urbs(struct acm *acm)
 {
        int i;
 
-       usb_kill_urb(acm->ctrlurb);
+       usb_poison_urb(acm->ctrlurb);
        for (i = 0; i < ACM_NW; i++)
-               usb_kill_urb(acm->wb[i].urb);
+               usb_poison_urb(acm->wb[i].urb);
        for (i = 0; i < acm->rx_buflimit; i++)
-               usb_kill_urb(acm->read_urbs[i]);
+               usb_poison_urb(acm->read_urbs[i]);
+}
+
+static void acm_unpoison_urbs(struct acm *acm)
+{
+       int i;
+
+       for (i = 0; i < acm->rx_buflimit; i++)
+               usb_unpoison_urb(acm->read_urbs[i]);
+       for (i = 0; i < ACM_NW; i++)
+               usb_unpoison_urb(acm->wb[i].urb);
+       usb_unpoison_urb(acm->ctrlurb);
 }
 
+
 /*
  * Write buffer management.
  * All of these assume proper locks taken by the caller.
@@ -226,9 +238,10 @@ static int acm_start_wb(struct acm *acm, struct acm_wb *wb)
 
        rc = usb_submit_urb(wb->urb, GFP_ATOMIC);
        if (rc < 0) {
-               dev_err(&acm->data->dev,
-                       "%s - usb_submit_urb(write bulk) failed: %d\n",
-                       __func__, rc);
+               if (rc != -EPERM)
+                       dev_err(&acm->data->dev,
+                               "%s - usb_submit_urb(write bulk) failed: %d\n",
+                               __func__, rc);
                acm_write_done(acm, wb);
        }
        return rc;
@@ -313,8 +326,10 @@ static void acm_process_notification(struct acm *acm, unsigned char *buf)
                        acm->iocount.dsr++;
                if (difference & ACM_CTRL_DCD)
                        acm->iocount.dcd++;
-               if (newctrl & ACM_CTRL_BRK)
+               if (newctrl & ACM_CTRL_BRK) {
                        acm->iocount.brk++;
+                       tty_insert_flip_char(&acm->port, 0, TTY_BREAK);
+               }
                if (newctrl & ACM_CTRL_RI)
                        acm->iocount.rng++;
                if (newctrl & ACM_CTRL_FRAMING)
@@ -480,11 +495,6 @@ static void acm_read_bulk_callback(struct urb *urb)
        dev_vdbg(&acm->data->dev, "got urb %d, len %d, status %d\n",
                rb->index, urb->actual_length, status);
 
-       if (!acm->dev) {
-               dev_dbg(&acm->data->dev, "%s - disconnected\n", __func__);
-               return;
-       }
-
        switch (status) {
        case 0:
                usb_mark_last_busy(acm->dev);
@@ -649,7 +659,8 @@ static void acm_port_dtr_rts(struct tty_port *port, int raise)
 
        res = acm_set_control(acm, val);
        if (res && (acm->ctrl_caps & USB_CDC_CAP_LINE))
-               dev_err(&acm->control->dev, "failed to set dtr/rts\n");
+               /* This is broken in too many devices to spam the logs */
+               dev_dbg(&acm->control->dev, "failed to set dtr/rts\n");
 }
 
 static int acm_port_activate(struct tty_port *port, struct tty_struct *tty)
@@ -731,6 +742,7 @@ static void acm_port_shutdown(struct tty_port *port)
         * Need to grab write_lock to prevent race with resume, but no need to
         * hold it due to the tty-port initialised flag.
         */
+       acm_poison_urbs(acm);
        spin_lock_irq(&acm->write_lock);
        spin_unlock_irq(&acm->write_lock);
 
@@ -747,7 +759,8 @@ static void acm_port_shutdown(struct tty_port *port)
                usb_autopm_put_interface_async(acm->control);
        }
 
-       acm_kill_urbs(acm);
+       acm_unpoison_urbs(acm);
+
 }
 
 static void acm_tty_cleanup(struct tty_struct *tty)
@@ -1296,13 +1309,6 @@ skip_normal_probe:
        if (!combined_interfaces && intf != control_interface)
                return -ENODEV;
 
-       if (!combined_interfaces && usb_interface_claimed(data_interface)) {
-               /* valid in this context */
-               dev_dbg(&intf->dev, "The data interface isn't available\n");
-               return -EBUSY;
-       }
-
-
        if (data_interface->cur_altsetting->desc.bNumEndpoints < 2 ||
            control_interface->cur_altsetting->desc.bNumEndpoints == 0)
                return -EINVAL;
@@ -1323,8 +1329,8 @@ made_compressed_probe:
        dev_dbg(&intf->dev, "interfaces are valid\n");
 
        acm = kzalloc(sizeof(struct acm), GFP_KERNEL);
-       if (acm == NULL)
-               goto alloc_fail;
+       if (!acm)
+               return -ENOMEM;
 
        tty_port_init(&acm->port);
        acm->port.ops = &acm_port_ops;
@@ -1341,7 +1347,7 @@ made_compressed_probe:
 
        minor = acm_alloc_minor(acm);
        if (minor < 0)
-               goto alloc_fail1;
+               goto err_put_port;
 
        acm->minor = minor;
        acm->dev = usb_dev;
@@ -1372,15 +1378,15 @@ made_compressed_probe:
 
        buf = usb_alloc_coherent(usb_dev, ctrlsize, GFP_KERNEL, &acm->ctrl_dma);
        if (!buf)
-               goto alloc_fail1;
+               goto err_put_port;
        acm->ctrl_buffer = buf;
 
        if (acm_write_buffers_alloc(acm) < 0)
-               goto alloc_fail2;
+               goto err_free_ctrl_buffer;
 
        acm->ctrlurb = usb_alloc_urb(0, GFP_KERNEL);
        if (!acm->ctrlurb)
-               goto alloc_fail3;
+               goto err_free_write_buffers;
 
        for (i = 0; i < num_rx_buf; i++) {
                struct acm_rb *rb = &(acm->read_buffers[i]);
@@ -1389,13 +1395,13 @@ made_compressed_probe:
                rb->base = usb_alloc_coherent(acm->dev, readsize, GFP_KERNEL,
                                                                &rb->dma);
                if (!rb->base)
-                       goto alloc_fail4;
+                       goto err_free_read_urbs;
                rb->index = i;
                rb->instance = acm;
 
                urb = usb_alloc_urb(0, GFP_KERNEL);
                if (!urb)
-                       goto alloc_fail4;
+                       goto err_free_read_urbs;
 
                urb->transfer_flags |= URB_NO_TRANSFER_DMA_MAP;
                urb->transfer_dma = rb->dma;
@@ -1416,8 +1422,8 @@ made_compressed_probe:
                struct acm_wb *snd = &(acm->wb[i]);
 
                snd->urb = usb_alloc_urb(0, GFP_KERNEL);
-               if (snd->urb == NULL)
-                       goto alloc_fail5;
+               if (!snd->urb)
+                       goto err_free_write_urbs;
 
                if (usb_endpoint_xfer_int(epwrite))
                        usb_fill_int_urb(snd->urb, usb_dev, acm->out,
@@ -1435,7 +1441,7 @@ made_compressed_probe:
 
        i = device_create_file(&intf->dev, &dev_attr_bmCapabilities);
        if (i < 0)
-               goto alloc_fail5;
+               goto err_free_write_urbs;
 
        if (h.usb_cdc_country_functional_desc) { /* export the country data */
                struct usb_cdc_country_functional_desc * cfd =
@@ -1480,20 +1486,21 @@ skip_countries:
        acm->nb_index = 0;
        acm->nb_size = 0;
 
-       dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor);
-
        acm->line.dwDTERate = cpu_to_le32(9600);
        acm->line.bDataBits = 8;
        acm_set_line(acm, &acm->line);
 
-       usb_driver_claim_interface(&acm_driver, data_interface, acm);
-       usb_set_intfdata(data_interface, acm);
+       if (!acm->combined_interfaces) {
+               rv = usb_driver_claim_interface(&acm_driver, data_interface, acm);
+               if (rv)
+                       goto err_remove_files;
+       }
 
        tty_dev = tty_port_register_device(&acm->port, acm_tty_driver, minor,
                        &control_interface->dev);
        if (IS_ERR(tty_dev)) {
                rv = PTR_ERR(tty_dev);
-               goto alloc_fail6;
+               goto err_release_data_interface;
        }
 
        if (quirks & CLEAR_HALT_CONDITIONS) {
@@ -1501,32 +1508,39 @@ skip_countries:
                usb_clear_halt(usb_dev, acm->out);
        }
 
+       dev_info(&intf->dev, "ttyACM%d: USB ACM device\n", minor);
+
        return 0;
-alloc_fail6:
+
+err_release_data_interface:
+       if (!acm->combined_interfaces) {
+               /* Clear driver data so that disconnect() returns early. */
+               usb_set_intfdata(data_interface, NULL);
+               usb_driver_release_interface(&acm_driver, data_interface);
+       }
+err_remove_files:
        if (acm->country_codes) {
                device_remove_file(&acm->control->dev,
                                &dev_attr_wCountryCodes);
                device_remove_file(&acm->control->dev,
                                &dev_attr_iCountryCodeRelDate);
-               kfree(acm->country_codes);
        }
        device_remove_file(&acm->control->dev, &dev_attr_bmCapabilities);
-alloc_fail5:
-       usb_set_intfdata(intf, NULL);
+err_free_write_urbs:
        for (i = 0; i < ACM_NW; i++)
                usb_free_urb(acm->wb[i].urb);
-alloc_fail4:
+err_free_read_urbs:
        for (i = 0; i < num_rx_buf; i++)
                usb_free_urb(acm->read_urbs[i]);
        acm_read_buffers_free(acm);
        usb_free_urb(acm->ctrlurb);
-alloc_fail3:
+err_free_write_buffers:
        acm_write_buffers_free(acm);
-alloc_fail2:
+err_free_ctrl_buffer:
        usb_free_coherent(usb_dev, ctrlsize, acm->ctrl_buffer, acm->ctrl_dma);
-alloc_fail1:
+err_put_port:
        tty_port_put(&acm->port);
-alloc_fail:
+
        return rv;
 }
 
@@ -1540,8 +1554,14 @@ static void acm_disconnect(struct usb_interface *intf)
        if (!acm)
                return;
 
-       mutex_lock(&acm->mutex);
        acm->disconnected = true;
+       /*
+        * there is a circular dependency. acm_softint() can resubmit
+        * the URBs in error handling so we need to block any
+        * submission right away
+        */
+       acm_poison_urbs(acm);
+       mutex_lock(&acm->mutex);
        if (acm->country_codes) {
                device_remove_file(&acm->control->dev,
                                &dev_attr_wCountryCodes);
@@ -1560,7 +1580,6 @@ static void acm_disconnect(struct usb_interface *intf)
                tty_kref_put(tty);
        }
 
-       acm_kill_urbs(acm);
        cancel_delayed_work_sync(&acm->dwork);
 
        tty_unregister_device(acm_tty_driver, acm->minor);
@@ -1602,7 +1621,7 @@ static int acm_suspend(struct usb_interface *intf, pm_message_t message)
        if (cnt)
                return 0;
 
-       acm_kill_urbs(acm);
+       acm_poison_urbs(acm);
        cancel_delayed_work_sync(&acm->dwork);
        acm->urbs_in_error_delay = 0;
 
@@ -1615,6 +1634,7 @@ static int acm_resume(struct usb_interface *intf)
        struct urb *urb;
        int rv = 0;
 
+       acm_unpoison_urbs(acm);
        spin_lock_irq(&acm->write_lock);
 
        if (--acm->susp_count)
@@ -1935,6 +1955,11 @@ static const struct usb_device_id acm_ids[] = {
        .driver_info = SEND_ZERO_PACKET,
        },
 
+       /* Exclude Goodix Fingerprint Reader */
+       { USB_DEVICE(0x27c6, 0x5395),
+       .driver_info = IGNORE_DEVICE,
+       },
+
        /* control interfaces without any protocol set */
        { USB_INTERFACE_INFO(USB_CLASS_COMM, USB_CDC_SUBCLASS_ACM,
                USB_CDC_PROTO_NONE) },
index c9f6e97..f27b4ae 100644 (file)
@@ -494,16 +494,24 @@ static int usblp_release(struct inode *inode, struct file *file)
 /* No kernel lock - fine */
 static __poll_t usblp_poll(struct file *file, struct poll_table_struct *wait)
 {
-       __poll_t ret;
+       struct usblp *usblp = file->private_data;
+       __poll_t ret = 0;
        unsigned long flags;
 
-       struct usblp *usblp = file->private_data;
        /* Should we check file->f_mode & FMODE_WRITE before poll_wait()? */
        poll_wait(file, &usblp->rwait, wait);
        poll_wait(file, &usblp->wwait, wait);
+
+       mutex_lock(&usblp->mut);
+       if (!usblp->present)
+               ret |= EPOLLHUP;
+       mutex_unlock(&usblp->mut);
+
        spin_lock_irqsave(&usblp->lock, flags);
-       ret = ((usblp->bidir && usblp->rcomplete) ? EPOLLIN  | EPOLLRDNORM : 0) |
-          ((usblp->no_paper || usblp->wcomplete) ? EPOLLOUT | EPOLLWRNORM : 0);
+       if (usblp->bidir && usblp->rcomplete)
+               ret |= EPOLLIN  | EPOLLRDNORM;
+       if (usblp->no_paper || usblp->wcomplete)
+               ret |= EPOLLOUT | EPOLLWRNORM;
        spin_unlock_irqrestore(&usblp->lock, flags);
        return ret;
 }
index ad5a0f4..3f03813 100644 (file)
@@ -111,8 +111,8 @@ DECLARE_WAIT_QUEUE_HEAD(usb_kill_urb_queue);
  */
 
 /*-------------------------------------------------------------------------*/
-#define KERNEL_REL     bin2bcd(((LINUX_VERSION_CODE >> 16) & 0x0ff))
-#define KERNEL_VER     bin2bcd(((LINUX_VERSION_CODE >> 8) & 0x0ff))
+#define KERNEL_REL     bin2bcd(LINUX_VERSION_MAJOR)
+#define KERNEL_VER     bin2bcd(LINUX_VERSION_PATCHLEVEL)
 
 /* usb 3.1 root hub device descriptor */
 static const u8 usb31_rh_dev_descriptor[18] = {
index 6ade3da..76ac5d6 100644 (file)
@@ -498,6 +498,10 @@ static const struct usb_device_id usb_quirk_list[] = {
        /* DJI CineSSD */
        { USB_DEVICE(0x2ca3, 0x0031), .driver_info = USB_QUIRK_NO_LPM },
 
+       /* Fibocom L850-GL LTE Modem */
+       { USB_DEVICE(0x2cb7, 0x0007), .driver_info =
+                       USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
        /* INTEL VALUE SSD */
        { USB_DEVICE(0x8086, 0xf1a5), .driver_info = USB_QUIRK_RESET_RESUME },
 
index 8f07b05..a566bb4 100644 (file)
@@ -748,6 +748,38 @@ void usb_put_intf(struct usb_interface *intf)
 }
 EXPORT_SYMBOL_GPL(usb_put_intf);
 
+/**
+ * usb_intf_get_dma_device - acquire a reference on the usb interface's DMA endpoint
+ * @intf: the usb interface
+ *
+ * While a USB device cannot perform DMA operations by itself, many USB
+ * controllers can. A call to usb_intf_get_dma_device() returns the DMA endpoint
+ * for the given USB interface, if any. The returned device structure must be
+ * released with put_device().
+ *
+ * See also usb_get_dma_device().
+ *
+ * Returns: A reference to the usb interface's DMA endpoint; or NULL if none
+ *          exists.
+ */
+struct device *usb_intf_get_dma_device(struct usb_interface *intf)
+{
+       struct usb_device *udev = interface_to_usbdev(intf);
+       struct device *dmadev;
+
+       if (!udev->bus)
+               return NULL;
+
+       dmadev = get_device(udev->bus->sysdev);
+       if (!dmadev || !dmadev->dma_mask) {
+               put_device(dmadev);
+               return NULL;
+       }
+
+       return dmadev;
+}
+EXPORT_SYMBOL_GPL(usb_intf_get_dma_device);
+
 /*                     USB device locking
  *
  * USB devices and interfaces are locked using the semaphore in their
index fc3269f..1a9789e 100644 (file)
@@ -4322,7 +4322,8 @@ static int _dwc2_hcd_suspend(struct usb_hcd *hcd)
        if (hsotg->op_state == OTG_STATE_B_PERIPHERAL)
                goto unlock;
 
-       if (hsotg->params.power_down > DWC2_POWER_DOWN_PARAM_PARTIAL)
+       if (hsotg->params.power_down != DWC2_POWER_DOWN_PARAM_PARTIAL ||
+           hsotg->flags.b.port_connect_status == 0)
                goto skip_power_saving;
 
        /*
@@ -5398,7 +5399,7 @@ int dwc2_host_enter_hibernation(struct dwc2_hsotg *hsotg)
        dwc2_writel(hsotg, hprt0, HPRT0);
 
        /* Wait for the HPRT0.PrtSusp register field to be set */
-       if (dwc2_hsotg_wait_bit_set(hsotg, HPRT0, HPRT0_SUSP, 3000))
+       if (dwc2_hsotg_wait_bit_set(hsotg, HPRT0, HPRT0_SUSP, 5000))
                dev_warn(hsotg->dev, "Suspend wasn't generated\n");
 
        /*
index 3d3918a..4c5c697 100644 (file)
@@ -120,6 +120,8 @@ static const struct property_entry dwc3_pci_intel_properties[] = {
 static const struct property_entry dwc3_pci_mrfld_properties[] = {
        PROPERTY_ENTRY_STRING("dr_mode", "otg"),
        PROPERTY_ENTRY_STRING("linux,extcon-name", "mrfld_bcove_pwrsrc"),
+       PROPERTY_ENTRY_BOOL("snps,dis_u3_susphy_quirk"),
+       PROPERTY_ENTRY_BOOL("snps,dis_u2_susphy_quirk"),
        PROPERTY_ENTRY_BOOL("linux,sysdev_is_parent"),
        {}
 };
index 846a47b..3de291a 100644 (file)
@@ -244,6 +244,9 @@ static int dwc3_qcom_interconnect_init(struct dwc3_qcom *qcom)
        struct device *dev = qcom->dev;
        int ret;
 
+       if (has_acpi_companion(dev))
+               return 0;
+
        qcom->icc_path_ddr = of_icc_get(dev, "usb-ddr");
        if (IS_ERR(qcom->icc_path_ddr)) {
                dev_err(dev, "failed to get usb-ddr path: %ld\n",
@@ -358,8 +361,10 @@ static int dwc3_qcom_suspend(struct dwc3_qcom *qcom)
        if (ret)
                dev_warn(qcom->dev, "failed to disable interconnect: %d\n", ret);
 
+       if (device_may_wakeup(qcom->dev))
+               dwc3_qcom_enable_interrupts(qcom);
+
        qcom->is_suspended = true;
-       dwc3_qcom_enable_interrupts(qcom);
 
        return 0;
 }
@@ -372,7 +377,8 @@ static int dwc3_qcom_resume(struct dwc3_qcom *qcom)
        if (!qcom->is_suspended)
                return 0;
 
-       dwc3_qcom_disable_interrupts(qcom);
+       if (device_may_wakeup(qcom->dev))
+               dwc3_qcom_disable_interrupts(qcom);
 
        for (i = 0; i < qcom->num_clocks; i++) {
                ret = clk_prepare_enable(qcom->clks[i]);
@@ -650,16 +656,19 @@ static int dwc3_qcom_of_register_core(struct platform_device *pdev)
        ret = of_platform_populate(np, NULL, NULL, dev);
        if (ret) {
                dev_err(dev, "failed to register dwc3 core - %d\n", ret);
-               return ret;
+               goto node_put;
        }
 
        qcom->dwc3 = of_find_device_by_node(dwc3_np);
        if (!qcom->dwc3) {
+               ret = -ENODEV;
                dev_err(dev, "failed to get dwc3 platform device\n");
-               return -ENODEV;
        }
 
-       return 0;
+node_put:
+       of_node_put(dwc3_np);
+
+       return ret;
 }
 
 static struct platform_device *
@@ -938,6 +947,8 @@ static const struct dwc3_acpi_pdata sdm845_acpi_urs_pdata = {
 static const struct acpi_device_id dwc3_qcom_acpi_match[] = {
        { "QCOM2430", (unsigned long)&sdm845_acpi_pdata },
        { "QCOM0304", (unsigned long)&sdm845_acpi_urs_pdata },
+       { "QCOM0497", (unsigned long)&sdm845_acpi_urs_pdata },
+       { "QCOM04A6", (unsigned long)&sdm845_acpi_pdata },
        { },
 };
 MODULE_DEVICE_TABLE(acpi, dwc3_qcom_acpi_match);
index aebcf8e..c7ef218 100644 (file)
@@ -783,8 +783,6 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
 
        trace_dwc3_gadget_ep_disable(dep);
 
-       dwc3_remove_requests(dwc, dep);
-
        /* make sure HW endpoint isn't stalled */
        if (dep->flags & DWC3_EP_STALL)
                __dwc3_gadget_ep_set_halt(dep, 0, false);
@@ -793,16 +791,18 @@ static int __dwc3_gadget_ep_disable(struct dwc3_ep *dep)
        reg &= ~DWC3_DALEPENA_EP(dep->number);
        dwc3_writel(dwc->regs, DWC3_DALEPENA, reg);
 
-       dep->stream_capable = false;
-       dep->type = 0;
-       dep->flags = 0;
-
        /* Clear out the ep descriptors for non-ep0 */
        if (dep->number > 1) {
                dep->endpoint.comp_desc = NULL;
                dep->endpoint.desc = NULL;
        }
 
+       dwc3_remove_requests(dwc, dep);
+
+       dep->stream_capable = false;
+       dep->type = 0;
+       dep->flags = 0;
+
        return 0;
 }
 
@@ -1617,7 +1617,7 @@ static int __dwc3_gadget_ep_queue(struct dwc3_ep *dep, struct dwc3_request *req)
 {
        struct dwc3             *dwc = dep->dwc;
 
-       if (!dep->endpoint.desc || !dwc->pullups_connected) {
+       if (!dep->endpoint.desc || !dwc->pullups_connected || !dwc->connected) {
                dev_err(dwc->dev, "%s: can't queue to disabled endpoint\n",
                                dep->name);
                return -ESHUTDOWN;
@@ -2083,7 +2083,7 @@ static void __dwc3_gadget_set_speed(struct dwc3 *dwc)
        u32                     reg;
 
        speed = dwc->gadget_max_speed;
-       if (speed > dwc->maximum_speed)
+       if (speed == USB_SPEED_UNKNOWN || speed > dwc->maximum_speed)
                speed = dwc->maximum_speed;
 
        if (speed == USB_SPEED_SUPER_PLUS &&
@@ -2247,6 +2247,7 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
        if (!is_on) {
                u32 count;
 
+               dwc->connected = false;
                /*
                 * In the Synopsis DesignWare Cores USB3 Databook Rev. 3.30a
                 * Section 4.1.8 Table 4-7, it states that for a device-initiated
@@ -2271,7 +2272,6 @@ static int dwc3_gadget_pullup(struct usb_gadget *g, int is_on)
                        dwc->ev_buf->lpos = (dwc->ev_buf->lpos + count) %
                                                dwc->ev_buf->length;
                }
-               dwc->connected = false;
        } else {
                __dwc3_gadget_start(dwc);
        }
@@ -2523,6 +2523,7 @@ static void dwc3_gadget_set_ssp_rate(struct usb_gadget *g,
        unsigned long           flags;
 
        spin_lock_irqsave(&dwc->lock, flags);
+       dwc->gadget_max_speed = USB_SPEED_SUPER_PLUS;
        dwc->gadget_ssp_rate = rate;
        spin_unlock_irqrestore(&dwc->lock, flags);
 }
@@ -3321,8 +3322,6 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
 {
        u32                     reg;
 
-       dwc->connected = true;
-
        /*
         * WORKAROUND: DWC3 revisions <1.88a have an issue which
         * would cause a missing Disconnect Event if there's a
@@ -3362,6 +3361,7 @@ static void dwc3_gadget_reset_interrupt(struct dwc3 *dwc)
         * transfers."
         */
        dwc3_stop_active_transfers(dwc);
+       dwc->connected = true;
 
        reg = dwc3_readl(dwc->regs, DWC3_DCTL);
        reg &= ~DWC3_DCTL_TSTCTRL_MASK;
index 0d56f33..15a607c 100644 (file)
@@ -97,6 +97,8 @@ struct gadget_config_name {
        struct list_head list;
 };
 
+#define USB_MAX_STRING_WITH_NULL_LEN   (USB_MAX_STRING_LEN+1)
+
 static int usb_string_copy(const char *s, char **s_copy)
 {
        int ret;
@@ -106,12 +108,16 @@ static int usb_string_copy(const char *s, char **s_copy)
        if (ret > USB_MAX_STRING_LEN)
                return -EOVERFLOW;
 
-       str = kstrdup(s, GFP_KERNEL);
-       if (!str)
-               return -ENOMEM;
+       if (copy) {
+               str = copy;
+       } else {
+               str = kmalloc(USB_MAX_STRING_WITH_NULL_LEN, GFP_KERNEL);
+               if (!str)
+                       return -ENOMEM;
+       }
+       strcpy(str, s);
        if (str[ret - 1] == '\n')
                str[ret - 1] = '\0';
-       kfree(copy);
        *s_copy = str;
        return 0;
 }
index 00d3469..560382e 100644 (file)
@@ -499,6 +499,7 @@ static void f_audio_disable(struct usb_function *f)
        uac1->as_out_alt = 0;
        uac1->as_in_alt = 0;
 
+       u_audio_stop_playback(&uac1->g_audio);
        u_audio_stop_capture(&uac1->g_audio);
 }
 
index 5d960b6..6f03e94 100644 (file)
@@ -478,7 +478,7 @@ static int set_ep_max_packet_size(const struct f_uac2_opts *uac2_opts,
        }
 
        max_size_bw = num_channels(chmask) * ssize *
-               DIV_ROUND_UP(srate, factor / (1 << (ep_desc->bInterval - 1)));
+               ((srate / (factor / (1 << (ep_desc->bInterval - 1)))) + 1);
        ep_desc->wMaxPacketSize = cpu_to_le16(min_t(u16, max_size_bw,
                                                    max_size_ep));
 
index 3dfb460..f558c31 100644 (file)
@@ -182,12 +182,11 @@ out:                                                                      \
                                                size_t len)             \
        {                                                               \
                struct f_##_f_##_opts *opts = to_f_##_f_##_opts(item);  \
-               int ret;                                                \
+               int ret = -EINVAL;                                      \
                u8 val;                                                 \
                                                                        \
                mutex_lock(&opts->lock);                                \
-               ret = sscanf(page, "%02hhx", &val);                     \
-               if (ret > 0) {                                          \
+               if (sscanf(page, "%02hhx", &val) > 0) {                 \
                        opts->_n_ = val;                                \
                        ret = len;                                      \
                }                                                       \
index 8d387e0..c80f9bd 100644 (file)
@@ -153,6 +153,11 @@ static int udc_pci_probe(
        pci_set_master(pdev);
        pci_try_set_mwi(pdev);
 
+       dev->phys_addr = resource;
+       dev->irq = pdev->irq;
+       dev->pdev = pdev;
+       dev->dev = &pdev->dev;
+
        /* init dma pools */
        if (use_dma) {
                retval = init_dma_pools(dev);
@@ -160,11 +165,6 @@ static int udc_pci_probe(
                        goto err_dma;
        }
 
-       dev->phys_addr = resource;
-       dev->irq = pdev->irq;
-       dev->pdev = pdev;
-       dev->dev = &pdev->dev;
-
        /* general probing */
        if (udc_probe(dev)) {
                retval = -ENODEV;
index bfd8e77..5c7dea5 100644 (file)
@@ -46,8 +46,8 @@
  *    - Make vid/did overridable
  *    - make it look like usb1 if usb1 mode forced
  */
-#define KERNEL_REL     bin2bcd(((LINUX_VERSION_CODE >> 16) & 0x0ff))
-#define KERNEL_VER     bin2bcd(((LINUX_VERSION_CODE >> 8) & 0x0ff))
+#define KERNEL_REL     bin2bcd(LINUX_VERSION_MAJOR)
+#define KERNEL_VER     bin2bcd(LINUX_VERSION_PATCHLEVEL)
 
 enum {
        AST_VHUB_STR_INDEX_MAX = 4,
index f1ea514..1d3ebb0 100644 (file)
@@ -1773,8 +1773,8 @@ static int s3c2410_udc_probe(struct platform_device *pdev)
        udc_info = dev_get_platdata(&pdev->dev);
 
        base_addr = devm_platform_ioremap_resource(pdev, 0);
-       if (!base_addr) {
-               retval = -ENOMEM;
+       if (IS_ERR(base_addr)) {
+               retval = PTR_ERR(base_addr);
                goto err_mem;
        }
 
index fe010cc..2f27dc0 100644 (file)
@@ -397,6 +397,13 @@ static void xhci_mtk_quirks(struct device *dev, struct xhci_hcd *xhci)
        xhci->quirks |= XHCI_SPURIOUS_SUCCESS;
        if (mtk->lpm_support)
                xhci->quirks |= XHCI_LPM_SUPPORT;
+
+       /*
+        * MTK xHCI 0.96: PSA is 1 by default even if doesn't support stream,
+        * and it's 3 when support it.
+        */
+       if (xhci->hci_version < 0x100 && HCC_MAX_PSA(xhci->hcc_params) == 4)
+               xhci->quirks |= XHCI_BROKEN_STREAMS;
 }
 
 /* called during probe() after chip reset completes */
@@ -548,7 +555,8 @@ static int xhci_mtk_probe(struct platform_device *pdev)
        if (ret)
                goto put_usb3_hcd;
 
-       if (HCC_MAX_PSA(xhci->hcc_params) >= 4)
+       if (HCC_MAX_PSA(xhci->hcc_params) >= 4 &&
+           !(xhci->quirks & XHCI_BROKEN_STREAMS))
                xhci->shared_hcd->can_do_streams = 1;
 
        ret = usb_add_hcd(xhci->shared_hcd, irq, IRQF_SHARED);
index 84da840..5bbccc9 100644 (file)
@@ -66,6 +66,7 @@
 #define PCI_DEVICE_ID_ASMEDIA_1042A_XHCI               0x1142
 #define PCI_DEVICE_ID_ASMEDIA_1142_XHCI                        0x1242
 #define PCI_DEVICE_ID_ASMEDIA_2142_XHCI                        0x2142
+#define PCI_DEVICE_ID_ASMEDIA_3242_XHCI                        0x3242
 
 static const char hcd_name[] = "xhci_hcd";
 
@@ -276,11 +277,14 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
                pdev->device == PCI_DEVICE_ID_ASMEDIA_1042_XHCI)
                xhci->quirks |= XHCI_BROKEN_STREAMS;
        if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
-               pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI)
+               pdev->device == PCI_DEVICE_ID_ASMEDIA_1042A_XHCI) {
                xhci->quirks |= XHCI_TRUST_TX_LENGTH;
+               xhci->quirks |= XHCI_NO_64BIT_SUPPORT;
+       }
        if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
            (pdev->device == PCI_DEVICE_ID_ASMEDIA_1142_XHCI ||
-            pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI))
+            pdev->device == PCI_DEVICE_ID_ASMEDIA_2142_XHCI ||
+            pdev->device == PCI_DEVICE_ID_ASMEDIA_3242_XHCI))
                xhci->quirks |= XHCI_NO_64BIT_SUPPORT;
 
        if (pdev->vendor == PCI_VENDOR_ID_ASMEDIA &&
@@ -295,6 +299,11 @@ static void xhci_pci_quirks(struct device *dev, struct xhci_hcd *xhci)
             pdev->device == 0x9026)
                xhci->quirks |= XHCI_RESET_PLL_ON_DISCONNECT;
 
+       if (pdev->vendor == PCI_VENDOR_ID_AMD &&
+           (pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_2 ||
+            pdev->device == PCI_DEVICE_ID_AMD_PROMONTORYA_4))
+               xhci->quirks |= XHCI_NO_SOFT_RETRY;
+
        if (xhci->quirks & XHCI_RESET_ON_RESUME)
                xhci_dbg_trace(xhci, trace_xhci_dbg_quirks,
                                "QUIRK: Resetting on resume");
index 5e548a1..ce38076 100644 (file)
@@ -2484,7 +2484,8 @@ static int process_bulk_intr_td(struct xhci_hcd *xhci, struct xhci_td *td,
                remaining       = 0;
                break;
        case COMP_USB_TRANSACTION_ERROR:
-               if ((ep_ring->err_count++ > MAX_SOFT_RETRY) ||
+               if (xhci->quirks & XHCI_NO_SOFT_RETRY ||
+                   (ep_ring->err_count++ > MAX_SOFT_RETRY) ||
                    le32_to_cpu(slot_ctx->tt_info) & TT_SLOT)
                        break;
 
index bd27bd6..1975016 100644 (file)
@@ -883,44 +883,42 @@ static void xhci_clear_command_ring(struct xhci_hcd *xhci)
        xhci_set_cmd_ring_deq(xhci);
 }
 
-static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci)
+/*
+ * Disable port wake bits if do_wakeup is not set.
+ *
+ * Also clear a possible internal port wake state left hanging for ports that
+ * detected termination but never successfully enumerated (trained to 0U).
+ * Internal wake causes immediate xHCI wake after suspend. PORT_CSC write done
+ * at enumeration clears this wake, force one here as well for unconnected ports
+ */
+
+static void xhci_disable_hub_port_wake(struct xhci_hcd *xhci,
+                                      struct xhci_hub *rhub,
+                                      bool do_wakeup)
 {
-       struct xhci_port **ports;
-       int port_index;
        unsigned long flags;
        u32 t1, t2, portsc;
+       int i;
 
        spin_lock_irqsave(&xhci->lock, flags);
 
-       /* disable usb3 ports Wake bits */
-       port_index = xhci->usb3_rhub.num_ports;
-       ports = xhci->usb3_rhub.ports;
-       while (port_index--) {
-               t1 = readl(ports[port_index]->addr);
-               portsc = t1;
-               t1 = xhci_port_state_to_neutral(t1);
-               t2 = t1 & ~PORT_WAKE_BITS;
-               if (t1 != t2) {
-                       writel(t2, ports[port_index]->addr);
-                       xhci_dbg(xhci, "disable wake bits port %d-%d, portsc: 0x%x, write: 0x%x\n",
-                                xhci->usb3_rhub.hcd->self.busnum,
-                                port_index + 1, portsc, t2);
-               }
-       }
+       for (i = 0; i < rhub->num_ports; i++) {
+               portsc = readl(rhub->ports[i]->addr);
+               t1 = xhci_port_state_to_neutral(portsc);
+               t2 = t1;
+
+               /* clear wake bits if do_wake is not set */
+               if (!do_wakeup)
+                       t2 &= ~PORT_WAKE_BITS;
+
+               /* Don't touch csc bit if connected or connect change is set */
+               if (!(portsc & (PORT_CSC | PORT_CONNECT)))
+                       t2 |= PORT_CSC;
 
-       /* disable usb2 ports Wake bits */
-       port_index = xhci->usb2_rhub.num_ports;
-       ports = xhci->usb2_rhub.ports;
-       while (port_index--) {
-               t1 = readl(ports[port_index]->addr);
-               portsc = t1;
-               t1 = xhci_port_state_to_neutral(t1);
-               t2 = t1 & ~PORT_WAKE_BITS;
                if (t1 != t2) {
-                       writel(t2, ports[port_index]->addr);
-                       xhci_dbg(xhci, "disable wake bits port %d-%d, portsc: 0x%x, write: 0x%x\n",
-                                xhci->usb2_rhub.hcd->self.busnum,
-                                port_index + 1, portsc, t2);
+                       writel(t2, rhub->ports[i]->addr);
+                       xhci_dbg(xhci, "config port %d-%d wake bits, portsc: 0x%x, write: 0x%x\n",
+                                rhub->hcd->self.busnum, i + 1, portsc, t2);
                }
        }
        spin_unlock_irqrestore(&xhci->lock, flags);
@@ -983,8 +981,8 @@ int xhci_suspend(struct xhci_hcd *xhci, bool do_wakeup)
                return -EINVAL;
 
        /* Clear root port wake on bits if wakeup not allowed. */
-       if (!do_wakeup)
-               xhci_disable_port_wake_on_bits(xhci);
+       xhci_disable_hub_port_wake(xhci, &xhci->usb3_rhub, do_wakeup);
+       xhci_disable_hub_port_wake(xhci, &xhci->usb2_rhub, do_wakeup);
 
        if (!HCD_HW_ACCESSIBLE(hcd))
                return 0;
@@ -1088,6 +1086,7 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
        struct usb_hcd          *secondary_hcd;
        int                     retval = 0;
        bool                    comp_timer_running = false;
+       bool                    pending_portevent = false;
 
        if (!hcd->state)
                return 0;
@@ -1226,13 +1225,22 @@ int xhci_resume(struct xhci_hcd *xhci, bool hibernated)
 
  done:
        if (retval == 0) {
-               /* Resume root hubs only when have pending events. */
-               if (xhci_pending_portevent(xhci)) {
+               /*
+                * Resume roothubs only if there are pending events.
+                * USB 3 devices resend U3 LFPS wake after a 100ms delay if
+                * the first wake signalling failed, give it that chance.
+                */
+               pending_portevent = xhci_pending_portevent(xhci);
+               if (!pending_portevent) {
+                       msleep(120);
+                       pending_portevent = xhci_pending_portevent(xhci);
+               }
+
+               if (pending_portevent) {
                        usb_hcd_resume_root_hub(xhci->shared_hcd);
                        usb_hcd_resume_root_hub(hcd);
                }
        }
-
        /*
         * If system is subject to the Quirk, Compliance Mode Timer needs to
         * be re-initialized Always after a system resume. Ports are subject
index d41de5d..ca822ad 100644 (file)
@@ -1891,6 +1891,7 @@ struct xhci_hcd {
 #define XHCI_SKIP_PHY_INIT     BIT_ULL(37)
 #define XHCI_DISABLE_SPARSE    BIT_ULL(38)
 #define XHCI_SG_TRB_CACHE_SIZE_QUIRK   BIT_ULL(39)
+#define XHCI_NO_SOFT_RETRY     BIT_ULL(40)
 
        unsigned int            num_active_eps;
        unsigned int            limit_active_eps;
index 670e4d9..dcc88df 100644 (file)
@@ -117,7 +117,6 @@ MODULE_DEVICE_TABLE(usb, ld_usb_table);
 MODULE_AUTHOR("Michael Hund <mhund@ld-didactic.de>");
 MODULE_DESCRIPTION("LD USB Driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("LD USB Devices");
 
 /* All interrupt in transfers are collected in a ring buffer to
  * avoid racing conditions and get better performance of the driver.
index 1cd8772..fc0457d 100644 (file)
@@ -2004,10 +2004,14 @@ static void musb_pm_runtime_check_session(struct musb *musb)
                MUSB_DEVCTL_HR;
        switch (devctl & ~s) {
        case MUSB_QUIRK_B_DISCONNECT_99:
-               musb_dbg(musb, "Poll devctl in case of suspend after disconnect\n");
-               schedule_delayed_work(&musb->irq_work,
-                                     msecs_to_jiffies(1000));
-               break;
+               if (musb->quirk_retries && !musb->flush_irq_work) {
+                       musb_dbg(musb, "Poll devctl in case of suspend after disconnect\n");
+                       schedule_delayed_work(&musb->irq_work,
+                                             msecs_to_jiffies(1000));
+                       musb->quirk_retries--;
+                       break;
+               }
+               fallthrough;
        case MUSB_QUIRK_B_INVALID_VBUS_91:
                if (musb->quirk_retries && !musb->flush_irq_work) {
                        musb_dbg(musb,
index e7334b7..75fff2e 100644 (file)
@@ -746,6 +746,8 @@ struct usbhs_pipe *usbhs_pipe_malloc(struct usbhs_priv *priv,
 
 void usbhs_pipe_free(struct usbhs_pipe *pipe)
 {
+       usbhsp_pipe_select(pipe);
+       usbhsp_pipe_cfg_set(pipe, 0xFFFF, 0);
        usbhsp_put_pipe(pipe);
 }
 
index 8d997b7..2db917e 100644 (file)
@@ -86,6 +86,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1a86, 0x7522) },
        { USB_DEVICE(0x1a86, 0x7523) },
        { USB_DEVICE(0x4348, 0x5523) },
+       { USB_DEVICE(0x9986, 0x7523) },
        { },
 };
 MODULE_DEVICE_TABLE(usb, id_table);
index 9e1c609..a373cd6 100644 (file)
@@ -145,6 +145,7 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x10C4, 0x8857) }, /* CEL EM357 ZigBee USB Stick */
        { USB_DEVICE(0x10C4, 0x88A4) }, /* MMB Networks ZigBee USB Device */
        { USB_DEVICE(0x10C4, 0x88A5) }, /* Planet Innovation Ingeni ZigBee USB Device */
+       { USB_DEVICE(0x10C4, 0x88D8) }, /* Acuity Brands nLight Air Adapter */
        { USB_DEVICE(0x10C4, 0x88FB) }, /* CESINEL MEDCAL STII Network Analyzer */
        { USB_DEVICE(0x10C4, 0x8938) }, /* CESINEL MEDCAL S II Network Analyzer */
        { USB_DEVICE(0x10C4, 0x8946) }, /* Ketra N1 Wireless Interface */
@@ -201,6 +202,8 @@ static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x1901, 0x0194) }, /* GE Healthcare Remote Alarm Box */
        { USB_DEVICE(0x1901, 0x0195) }, /* GE B850/B650/B450 CP2104 DP UART interface */
        { USB_DEVICE(0x1901, 0x0196) }, /* GE B850 CP2105 DP UART interface */
+       { USB_DEVICE(0x1901, 0x0197) }, /* GE CS1000 Display serial interface */
+       { USB_DEVICE(0x1901, 0x0198) }, /* GE CS1000 M.2 Key E serial interface */
        { USB_DEVICE(0x199B, 0xBA30) }, /* LORD WSDA-200-USB */
        { USB_DEVICE(0x19CF, 0x3000) }, /* Parrot NMEA GPS Flight Recorder */
        { USB_DEVICE(0x1ADB, 0x0001) }, /* Schweitzer Engineering C662 Cable */
index a493670..68401ad 100644 (file)
@@ -3003,26 +3003,32 @@ static int edge_startup(struct usb_serial *serial)
                                response = -ENODEV;
                        }
 
-                       usb_free_urb(edge_serial->interrupt_read_urb);
-                       kfree(edge_serial->interrupt_in_buffer);
-
-                       usb_free_urb(edge_serial->read_urb);
-                       kfree(edge_serial->bulk_in_buffer);
-
-                       kfree(edge_serial);
-
-                       return response;
+                       goto error;
                }
 
                /* start interrupt read for this edgeport this interrupt will
                 * continue as long as the edgeport is connected */
                response = usb_submit_urb(edge_serial->interrupt_read_urb,
                                                                GFP_KERNEL);
-               if (response)
+               if (response) {
                        dev_err(ddev, "%s - Error %d submitting control urb\n",
                                __func__, response);
+
+                       goto error;
+               }
        }
        return response;
+
+error:
+       usb_free_urb(edge_serial->interrupt_read_urb);
+       kfree(edge_serial->interrupt_in_buffer);
+
+       usb_free_urb(edge_serial->read_urb);
+       kfree(edge_serial->bulk_in_buffer);
+
+       kfree(edge_serial);
+
+       return response;
 }
 
 
index 483d07d..0ca0490 100644 (file)
@@ -545,37 +545,13 @@ static void xr_close(struct usb_serial_port *port)
 
 static int xr_probe(struct usb_serial *serial, const struct usb_device_id *id)
 {
-       struct usb_driver *driver = serial->type->usb_driver;
-       struct usb_interface *control_interface;
-       int ret;
-
        /* Don't bind to control interface */
        if (serial->interface->cur_altsetting->desc.bInterfaceNumber == 0)
                return -ENODEV;
 
-       /* But claim the control interface during data interface probe */
-       control_interface = usb_ifnum_to_if(serial->dev, 0);
-       if (!control_interface)
-               return -ENODEV;
-
-       ret = usb_driver_claim_interface(driver, control_interface, NULL);
-       if (ret) {
-               dev_err(&serial->interface->dev, "Failed to claim control interface\n");
-               return ret;
-       }
-
        return 0;
 }
 
-static void xr_disconnect(struct usb_serial *serial)
-{
-       struct usb_driver *driver = serial->type->usb_driver;
-       struct usb_interface *control_interface;
-
-       control_interface = usb_ifnum_to_if(serial->dev, 0);
-       usb_driver_release_interface(driver, control_interface);
-}
-
 static const struct usb_device_id id_table[] = {
        { USB_DEVICE(0x04e2, 0x1410) }, /* XR21V141X */
        { }
@@ -590,7 +566,6 @@ static struct usb_serial_driver xr_device = {
        .id_table               = id_table,
        .num_ports              = 1,
        .probe                  = xr_probe,
-       .disconnect             = xr_disconnect,
        .open                   = xr_open,
        .close                  = xr_close,
        .break_ctl              = xr_break_ctl,
index 5eb895b..f4304ce 100644 (file)
@@ -656,6 +656,13 @@ void usb_stor_invoke_transport(struct scsi_cmnd *srb, struct us_data *us)
                need_auto_sense = 1;
        }
 
+       /* Some devices (Kindle) require another command after SYNC CACHE */
+       if ((us->fflags & US_FL_SENSE_AFTER_SYNC) &&
+                       srb->cmnd[0] == SYNCHRONIZE_CACHE) {
+               usb_stor_dbg(us, "-- sense after SYNC CACHE\n");
+               need_auto_sense = 1;
+       }
+
        /*
         * If we have a failure, we're going to do a REQUEST_SENSE 
         * automatically.  Note that we differentiate between a command
index 5732e96..efa972b 100644 (file)
@@ -2212,6 +2212,18 @@ UNUSUAL_DEV( 0x1908, 0x3335, 0x0200, 0x0200,
                US_FL_NO_READ_DISC_INFO ),
 
 /*
+ * Reported by Matthias Schwarzott <zzam@gentoo.org>
+ * The Amazon Kindle treats SYNCHRONIZE CACHE as an indication that
+ * the host may be finished with it, and automatically ejects its
+ * emulated media unless it receives another command within one second.
+ */
+UNUSUAL_DEV( 0x1949, 0x0004, 0x0000, 0x9999,
+               "Amazon",
+               "Kindle",
+               USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+               US_FL_SENSE_AFTER_SYNC ),
+
+/*
  * Reported by Oliver Neukum <oneukum@suse.com>
  * This device morphes spontaneously into another device if the access
  * pattern of Windows isn't followed. Thus writable media would be dirty
index be0b646..ce7af39 100644 (file)
@@ -942,6 +942,7 @@ static int tcpm_set_current_limit(struct tcpm_port *port, u32 max_ma, u32 mv)
 
        port->supply_voltage = mv;
        port->current_limit = max_ma;
+       power_supply_changed(port->psy);
 
        if (port->tcpc->set_current_limit)
                ret = port->tcpc->set_current_limit(port->tcpc, max_ma, mv);
@@ -2928,6 +2929,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
 
        port->pps_data.supported = false;
        port->usb_type = POWER_SUPPLY_USB_TYPE_PD;
+       power_supply_changed(port->psy);
 
        /*
         * Select the source PDO providing the most power which has a
@@ -2952,6 +2954,7 @@ static int tcpm_pd_select_pdo(struct tcpm_port *port, int *sink_pdo,
                                port->pps_data.supported = true;
                                port->usb_type =
                                        POWER_SUPPLY_USB_TYPE_PD_PPS;
+                               power_supply_changed(port->psy);
                        }
                        continue;
                default:
@@ -3109,6 +3112,7 @@ static unsigned int tcpm_pd_select_pps_apdo(struct tcpm_port *port)
                                                  port->pps_data.out_volt));
                port->pps_data.op_curr = min(port->pps_data.max_curr,
                                             port->pps_data.op_curr);
+               power_supply_changed(port->psy);
        }
 
        return src_pdo;
@@ -3344,6 +3348,7 @@ static int tcpm_set_charge(struct tcpm_port *port, bool charge)
                        return ret;
        }
        port->vbus_charge = charge;
+       power_supply_changed(port->psy);
        return 0;
 }
 
@@ -3523,6 +3528,7 @@ static void tcpm_reset_port(struct tcpm_port *port)
        port->try_src_count = 0;
        port->try_snk_count = 0;
        port->usb_type = POWER_SUPPLY_USB_TYPE_C;
+       power_supply_changed(port->psy);
        port->nr_sink_caps = 0;
        port->sink_cap_done = false;
        if (port->tcpc->enable_frs)
@@ -5167,7 +5173,7 @@ static void tcpm_enable_frs_work(struct kthread_work *work)
                goto unlock;
 
        /* Send when the state machine is idle */
-       if (port->state != SNK_READY || port->vdm_state != VDM_STATE_DONE || port->send_discover)
+       if (port->state != SNK_READY || port->vdm_sm_running || port->send_discover)
                goto resched;
 
        port->upcoming_state = GET_SINK_CAP;
@@ -5905,7 +5911,7 @@ static int tcpm_psy_set_prop(struct power_supply *psy,
                ret = -EINVAL;
                break;
        }
-
+       power_supply_changed(port->psy);
        return ret;
 }
 
@@ -6058,6 +6064,7 @@ struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc)
        err = devm_tcpm_psy_register(port);
        if (err)
                goto out_role_sw_put;
+       power_supply_changed(port->psy);
 
        port->typec_port = typec_register_port(port->dev, &port->typec_caps);
        if (IS_ERR(port->typec_port)) {
index 6e6ef63..29bd1c5 100644 (file)
@@ -64,7 +64,6 @@ enum {
 struct tps6598x_rx_identity_reg {
        u8 status;
        struct usb_pd_identity identity;
-       u32 vdo[3];
 } __packed;
 
 /* Standard Task return codes */
index 2305d42..8f1de1f 100644 (file)
@@ -46,6 +46,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
        int sockfd = 0;
        struct socket *socket;
        int rv;
+       struct task_struct *tcp_rx = NULL;
+       struct task_struct *tcp_tx = NULL;
 
        if (!sdev) {
                dev_err(dev, "sdev is null\n");
@@ -69,23 +71,47 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
                }
 
                socket = sockfd_lookup(sockfd, &err);
-               if (!socket)
+               if (!socket) {
+                       dev_err(dev, "failed to lookup sock");
                        goto err;
+               }
 
-               sdev->ud.tcp_socket = socket;
-               sdev->ud.sockfd = sockfd;
+               if (socket->type != SOCK_STREAM) {
+                       dev_err(dev, "Expecting SOCK_STREAM - found %d",
+                               socket->type);
+                       goto sock_err;
+               }
 
+               /* unlock and create threads and get tasks */
                spin_unlock_irq(&sdev->ud.lock);
+               tcp_rx = kthread_create(stub_rx_loop, &sdev->ud, "stub_rx");
+               if (IS_ERR(tcp_rx)) {
+                       sockfd_put(socket);
+                       return -EINVAL;
+               }
+               tcp_tx = kthread_create(stub_tx_loop, &sdev->ud, "stub_tx");
+               if (IS_ERR(tcp_tx)) {
+                       kthread_stop(tcp_rx);
+                       sockfd_put(socket);
+                       return -EINVAL;
+               }
 
-               sdev->ud.tcp_rx = kthread_get_run(stub_rx_loop, &sdev->ud,
-                                                 "stub_rx");
-               sdev->ud.tcp_tx = kthread_get_run(stub_tx_loop, &sdev->ud,
-                                                 "stub_tx");
+               /* get task structs now */
+               get_task_struct(tcp_rx);
+               get_task_struct(tcp_tx);
 
+               /* lock and update sdev->ud state */
                spin_lock_irq(&sdev->ud.lock);
+               sdev->ud.tcp_socket = socket;
+               sdev->ud.sockfd = sockfd;
+               sdev->ud.tcp_rx = tcp_rx;
+               sdev->ud.tcp_tx = tcp_tx;
                sdev->ud.status = SDEV_ST_USED;
                spin_unlock_irq(&sdev->ud.lock);
 
+               wake_up_process(sdev->ud.tcp_rx);
+               wake_up_process(sdev->ud.tcp_tx);
+
        } else {
                dev_info(dev, "stub down\n");
 
@@ -100,6 +126,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
 
        return count;
 
+sock_err:
+       sockfd_put(socket);
 err:
        spin_unlock_irq(&sdev->ud.lock);
        return -EINVAL;
index 3209b5d..a20a838 100644 (file)
@@ -594,6 +594,8 @@ static int vhci_hub_control(struct usb_hcd *hcd, u16 typeReq, u16 wValue,
                                pr_err("invalid port number %d\n", wIndex);
                                goto error;
                        }
+                       if (wValue >= 32)
+                               goto error;
                        if (hcd->speed == HCD_USB3) {
                                if ((vhci_hcd->port_status[rhport] &
                                     USB_SS_PORT_STAT_POWER) != 0) {
index 96e5371..c4b4256 100644 (file)
@@ -312,6 +312,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr,
        struct vhci *vhci;
        int err;
        unsigned long flags;
+       struct task_struct *tcp_rx = NULL;
+       struct task_struct *tcp_tx = NULL;
 
        /*
         * @rhport: port number of vhci_hcd
@@ -349,12 +351,35 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr,
 
        /* Extract socket from fd. */
        socket = sockfd_lookup(sockfd, &err);
-       if (!socket)
+       if (!socket) {
+               dev_err(dev, "failed to lookup sock");
                return -EINVAL;
+       }
+       if (socket->type != SOCK_STREAM) {
+               dev_err(dev, "Expecting SOCK_STREAM - found %d",
+                       socket->type);
+               sockfd_put(socket);
+               return -EINVAL;
+       }
+
+       /* create threads before locking */
+       tcp_rx = kthread_create(vhci_rx_loop, &vdev->ud, "vhci_rx");
+       if (IS_ERR(tcp_rx)) {
+               sockfd_put(socket);
+               return -EINVAL;
+       }
+       tcp_tx = kthread_create(vhci_tx_loop, &vdev->ud, "vhci_tx");
+       if (IS_ERR(tcp_tx)) {
+               kthread_stop(tcp_rx);
+               sockfd_put(socket);
+               return -EINVAL;
+       }
 
-       /* now need lock until setting vdev status as used */
+       /* get task structs now */
+       get_task_struct(tcp_rx);
+       get_task_struct(tcp_tx);
 
-       /* begin a lock */
+       /* now begin lock until setting vdev status set */
        spin_lock_irqsave(&vhci->lock, flags);
        spin_lock(&vdev->ud.lock);
 
@@ -364,6 +389,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr,
                spin_unlock_irqrestore(&vhci->lock, flags);
 
                sockfd_put(socket);
+               kthread_stop_put(tcp_rx);
+               kthread_stop_put(tcp_tx);
 
                dev_err(dev, "port %d already used\n", rhport);
                /*
@@ -382,6 +409,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr,
        vdev->speed         = speed;
        vdev->ud.sockfd     = sockfd;
        vdev->ud.tcp_socket = socket;
+       vdev->ud.tcp_rx     = tcp_rx;
+       vdev->ud.tcp_tx     = tcp_tx;
        vdev->ud.status     = VDEV_ST_NOTASSIGNED;
        usbip_kcov_handle_init(&vdev->ud);
 
@@ -389,8 +418,8 @@ static ssize_t attach_store(struct device *dev, struct device_attribute *attr,
        spin_unlock_irqrestore(&vhci->lock, flags);
        /* end the lock */
 
-       vdev->ud.tcp_rx = kthread_get_run(vhci_rx_loop, &vdev->ud, "vhci_rx");
-       vdev->ud.tcp_tx = kthread_get_run(vhci_tx_loop, &vdev->ud, "vhci_tx");
+       wake_up_process(vdev->ud.tcp_rx);
+       wake_up_process(vdev->ud.tcp_tx);
 
        rh_port_connect(vdev, speed);
 
index 100f680..7383a54 100644 (file)
@@ -90,8 +90,9 @@ unlock:
 }
 static BIN_ATTR_RO(dev_desc, sizeof(struct usb_device_descriptor));
 
-static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *attr,
-                    const char *in, size_t count)
+static ssize_t usbip_sockfd_store(struct device *dev,
+                                 struct device_attribute *attr,
+                                 const char *in, size_t count)
 {
        struct vudc *udc = (struct vudc *) dev_get_drvdata(dev);
        int rv;
@@ -100,6 +101,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
        struct socket *socket;
        unsigned long flags;
        int ret;
+       struct task_struct *tcp_rx = NULL;
+       struct task_struct *tcp_tx = NULL;
 
        rv = kstrtoint(in, 0, &sockfd);
        if (rv != 0)
@@ -138,24 +141,54 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
                        goto unlock_ud;
                }
 
-               udc->ud.tcp_socket = socket;
+               if (socket->type != SOCK_STREAM) {
+                       dev_err(dev, "Expecting SOCK_STREAM - found %d",
+                               socket->type);
+                       ret = -EINVAL;
+                       goto sock_err;
+               }
 
+               /* unlock and create threads and get tasks */
                spin_unlock_irq(&udc->ud.lock);
                spin_unlock_irqrestore(&udc->lock, flags);
 
-               udc->ud.tcp_rx = kthread_get_run(&v_rx_loop,
-                                                   &udc->ud, "vudc_rx");
-               udc->ud.tcp_tx = kthread_get_run(&v_tx_loop,
-                                                   &udc->ud, "vudc_tx");
+               tcp_rx = kthread_create(&v_rx_loop, &udc->ud, "vudc_rx");
+               if (IS_ERR(tcp_rx)) {
+                       sockfd_put(socket);
+                       return -EINVAL;
+               }
+               tcp_tx = kthread_create(&v_tx_loop, &udc->ud, "vudc_tx");
+               if (IS_ERR(tcp_tx)) {
+                       kthread_stop(tcp_rx);
+                       sockfd_put(socket);
+                       return -EINVAL;
+               }
+
+               /* get task structs now */
+               get_task_struct(tcp_rx);
+               get_task_struct(tcp_tx);
 
+               /* lock and update udc->ud state */
                spin_lock_irqsave(&udc->lock, flags);
                spin_lock_irq(&udc->ud.lock);
+
+               udc->ud.tcp_socket = socket;
+               udc->ud.tcp_rx = tcp_rx;
+               udc->ud.tcp_tx = tcp_tx;
                udc->ud.status = SDEV_ST_USED;
+
                spin_unlock_irq(&udc->ud.lock);
 
                ktime_get_ts64(&udc->start_time);
                v_start_timer(udc);
                udc->connected = 1;
+
+               spin_unlock_irqrestore(&udc->lock, flags);
+
+               wake_up_process(udc->ud.tcp_rx);
+               wake_up_process(udc->ud.tcp_tx);
+               return count;
+
        } else {
                if (!udc->connected) {
                        dev_err(dev, "Device not connected");
@@ -177,6 +210,8 @@ static ssize_t usbip_sockfd_store(struct device *dev, struct device_attribute *a
 
        return count;
 
+sock_err:
+       sockfd_put(socket);
 unlock_ud:
        spin_unlock_irq(&udc->ud.lock);
 unlock:
index 92a6396..ffd1e09 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 menuconfig VDPA
        tristate "vDPA drivers"
+       depends on NET
        help
          Enable this module to support vDPA device that uses a
          datapath which complies with virtio specifications with
index fa1af30..d555a6a 100644 (file)
@@ -431,8 +431,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        }
 
        adapter = vdpa_alloc_device(struct ifcvf_adapter, vdpa,
-                                   dev, &ifc_vdpa_ops,
-                                   IFCVF_MAX_QUEUE_PAIRS * 2);
+                                   dev, &ifc_vdpa_ops, NULL);
        if (adapter == NULL) {
                IFCVF_ERR(pdev, "Failed to allocate vDPA structure");
                return -ENOMEM;
@@ -456,7 +455,7 @@ static int ifcvf_probe(struct pci_dev *pdev, const struct pci_device_id *id)
        for (i = 0; i < IFCVF_MAX_QUEUE_PAIRS * 2; i++)
                vf->vring[i].irq = -EINVAL;
 
-       ret = vdpa_register_device(&adapter->vdpa);
+       ret = vdpa_register_device(&adapter->vdpa, IFCVF_MAX_QUEUE_PAIRS * 2);
        if (ret) {
                IFCVF_ERR(pdev, "Failed to register ifcvf to vdpa bus");
                goto err;
index 08f742f..b6cc53b 100644 (file)
@@ -4,9 +4,13 @@
 #ifndef __MLX5_VDPA_H__
 #define __MLX5_VDPA_H__
 
+#include <linux/etherdevice.h>
+#include <linux/if_vlan.h>
 #include <linux/vdpa.h>
 #include <linux/mlx5/driver.h>
 
+#define MLX5V_ETH_HARD_MTU (ETH_HLEN + VLAN_HLEN + ETH_FCS_LEN)
+
 struct mlx5_vdpa_direct_mr {
        u64 start;
        u64 end;
index d300f79..3908ff2 100644 (file)
@@ -219,6 +219,11 @@ static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_m
        mlx5_vdpa_destroy_mkey(mvdev, &mkey->mkey);
 }
 
+static struct device *get_dma_device(struct mlx5_vdpa_dev *mvdev)
+{
+       return &mvdev->mdev->pdev->dev;
+}
+
 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
                         struct vhost_iotlb *iotlb)
 {
@@ -234,7 +239,7 @@ static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr
        u64 pa;
        u64 paend;
        struct scatterlist *sg;
-       struct device *dma = mvdev->mdev->device;
+       struct device *dma = get_dma_device(mvdev);
 
        for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
             map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
@@ -291,7 +296,7 @@ err_map:
 
 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
 {
-       struct device *dma = mvdev->mdev->device;
+       struct device *dma = get_dma_device(mvdev);
 
        destroy_direct_mr(mvdev, mr);
        dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
index 96e6421..6521cbd 100644 (file)
@@ -246,7 +246,8 @@ int mlx5_vdpa_alloc_resources(struct mlx5_vdpa_dev *mvdev)
        if (err)
                goto err_key;
 
-       kick_addr = pci_resource_start(mdev->pdev, 0) + offset;
+       kick_addr = mdev->bar_addr + offset;
+
        res->kick_addr = ioremap(kick_addr, PAGE_SIZE);
        if (!res->kick_addr) {
                err = -ENOMEM;
index b5fe6d2..4d2809c 100644 (file)
@@ -820,7 +820,7 @@ static int create_virtqueue(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtque
        MLX5_SET(virtio_q, vq_ctx, event_qpn_or_msix, mvq->fwqp.mqp.qpn);
        MLX5_SET(virtio_q, vq_ctx, queue_size, mvq->num_ent);
        MLX5_SET(virtio_q, vq_ctx, virtio_version_1_0,
-                !!(ndev->mvdev.actual_features & VIRTIO_F_VERSION_1));
+                !!(ndev->mvdev.actual_features & BIT_ULL(VIRTIO_F_VERSION_1)));
        MLX5_SET64(virtio_q, vq_ctx, desc_addr, mvq->desc_addr);
        MLX5_SET64(virtio_q, vq_ctx, used_addr, mvq->device_addr);
        MLX5_SET64(virtio_q, vq_ctx, available_addr, mvq->driver_addr);
@@ -1169,6 +1169,7 @@ static void suspend_vq(struct mlx5_vdpa_net *ndev, struct mlx5_vdpa_virtqueue *m
                return;
        }
        mvq->avail_idx = attr.available_index;
+       mvq->used_idx = attr.used_index;
 }
 
 static void suspend_vqs(struct mlx5_vdpa_net *ndev)
@@ -1426,6 +1427,7 @@ static int mlx5_vdpa_set_vq_state(struct vdpa_device *vdev, u16 idx,
                return -EINVAL;
        }
 
+       mvq->used_idx = state->avail_index;
        mvq->avail_idx = state->avail_index;
        return 0;
 }
@@ -1443,7 +1445,11 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
         * that cares about emulating the index after vq is stopped.
         */
        if (!mvq->initialized) {
-               state->avail_index = mvq->avail_idx;
+               /* Firmware returns a wrong value for the available index.
+                * Since both values should be identical, we take the value of
+                * used_idx which is reported correctly.
+                */
+               state->avail_index = mvq->used_idx;
                return 0;
        }
 
@@ -1452,7 +1458,7 @@ static int mlx5_vdpa_get_vq_state(struct vdpa_device *vdev, u16 idx, struct vdpa
                mlx5_vdpa_warn(mvdev, "failed to query virtqueue\n");
                return err;
        }
-       state->avail_index = attr.available_index;
+       state->avail_index = attr.used_index;
        return 0;
 }
 
@@ -1540,21 +1546,11 @@ static void teardown_virtqueues(struct mlx5_vdpa_net *ndev)
        }
 }
 
-static void clear_virtqueues(struct mlx5_vdpa_net *ndev)
-{
-       int i;
-
-       for (i = ndev->mvdev.max_vqs - 1; i >= 0; i--) {
-               ndev->vqs[i].avail_idx = 0;
-               ndev->vqs[i].used_idx = 0;
-       }
-}
-
 /* TODO: cross-endian support */
 static inline bool mlx5_vdpa_is_little_endian(struct mlx5_vdpa_dev *mvdev)
 {
        return virtio_legacy_is_little_endian() ||
-               (mvdev->actual_features & (1ULL << VIRTIO_F_VERSION_1));
+               (mvdev->actual_features & BIT_ULL(VIRTIO_F_VERSION_1));
 }
 
 static __virtio16 cpu_to_mlx5vdpa16(struct mlx5_vdpa_dev *mvdev, u16 val)
@@ -1785,7 +1781,6 @@ static void mlx5_vdpa_set_status(struct vdpa_device *vdev, u8 status)
        if (!status) {
                mlx5_vdpa_info(mvdev, "performing device reset\n");
                teardown_driver(ndev);
-               clear_virtqueues(ndev);
                mlx5_vdpa_destroy_mr(&ndev->mvdev);
                ndev->mvdev.status = 0;
                ndev->mvdev.mlx_features = 0;
@@ -1820,7 +1815,7 @@ static void mlx5_vdpa_get_config(struct vdpa_device *vdev, unsigned int offset,
        struct mlx5_vdpa_dev *mvdev = to_mvdev(vdev);
        struct mlx5_vdpa_net *ndev = to_mlx5_vdpa_ndev(mvdev);
 
-       if (offset + len < sizeof(struct virtio_net_config))
+       if (offset + len <= sizeof(struct virtio_net_config))
                memcpy(buf, (u8 *)&ndev->config + offset, len);
 }
 
@@ -1907,6 +1902,19 @@ static const struct vdpa_config_ops mlx5_vdpa_ops = {
        .free = mlx5_vdpa_free,
 };
 
+static int query_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
+{
+       u16 hw_mtu;
+       int err;
+
+       err = mlx5_query_nic_vport_mtu(mdev, &hw_mtu);
+       if (err)
+               return err;
+
+       *mtu = hw_mtu - MLX5V_ETH_HARD_MTU;
+       return 0;
+}
+
 static int alloc_resources(struct mlx5_vdpa_net *ndev)
 {
        struct mlx5_vdpa_net_resources *res = &ndev->res;
@@ -1982,7 +1990,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
        max_vqs = min_t(u32, max_vqs, MLX5_MAX_SUPPORTED_VQS);
 
        ndev = vdpa_alloc_device(struct mlx5_vdpa_net, mvdev.vdev, mdev->device, &mlx5_vdpa_ops,
-                                2 * mlx5_vdpa_max_qps(max_vqs));
+                                NULL);
        if (IS_ERR(ndev))
                return PTR_ERR(ndev);
 
@@ -1992,7 +2000,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
        init_mvqs(ndev);
        mutex_init(&ndev->reslock);
        config = &ndev->config;
-       err = mlx5_query_nic_vport_mtu(mdev, &ndev->mtu);
+       err = query_mtu(mdev, &ndev->mtu);
        if (err)
                goto err_mtu;
 
@@ -2009,7 +2017,7 @@ static int mlx5v_probe(struct auxiliary_device *adev,
        if (err)
                goto err_res;
 
-       err = vdpa_register_device(&mvdev->vdev);
+       err = vdpa_register_device(&mvdev->vdev, 2 * mlx5_vdpa_max_qps(max_vqs));
        if (err)
                goto err_reg;
 
index c082565..5cffce6 100644 (file)
 #include <linux/idr.h>
 #include <linux/slab.h>
 #include <linux/vdpa.h>
+#include <uapi/linux/vdpa.h>
+#include <net/genetlink.h>
+#include <linux/mod_devicetable.h>
 
+static LIST_HEAD(mdev_head);
+/* A global mutex that protects vdpa management device and device level operations. */
+static DEFINE_MUTEX(vdpa_dev_mutex);
 static DEFINE_IDA(vdpa_index_ida);
 
+static struct genl_family vdpa_nl_family;
+
 static int vdpa_dev_probe(struct device *d)
 {
        struct vdpa_device *vdev = dev_to_vdpa(d);
@@ -61,8 +69,8 @@ static void vdpa_release_dev(struct device *d)
  * initialized but before registered.
  * @parent: the parent device
  * @config: the bus operations that is supported by this device
- * @nvqs: number of virtqueues supported by this device
  * @size: size of the parent structure that contains private data
+ * @name: name of the vdpa device; optional.
  *
  * Driver should use vdpa_alloc_device() wrapper macro instead of
  * using this directly.
@@ -72,8 +80,7 @@ static void vdpa_release_dev(struct device *d)
  */
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
                                        const struct vdpa_config_ops *config,
-                                       int nvqs,
-                                       size_t size)
+                                       size_t size, const char *name)
 {
        struct vdpa_device *vdev;
        int err = -EINVAL;
@@ -99,9 +106,11 @@ struct vdpa_device *__vdpa_alloc_device(struct device *parent,
        vdev->index = err;
        vdev->config = config;
        vdev->features_valid = false;
-       vdev->nvqs = nvqs;
 
-       err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
+       if (name)
+               err = dev_set_name(&vdev->dev, "%s", name);
+       else
+               err = dev_set_name(&vdev->dev, "vdpa%u", vdev->index);
        if (err)
                goto err_name;
 
@@ -118,26 +127,89 @@ err:
 }
 EXPORT_SYMBOL_GPL(__vdpa_alloc_device);
 
+static int vdpa_name_match(struct device *dev, const void *data)
+{
+       struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
+
+       return (strcmp(dev_name(&vdev->dev), data) == 0);
+}
+
+static int __vdpa_register_device(struct vdpa_device *vdev, int nvqs)
+{
+       struct device *dev;
+
+       vdev->nvqs = nvqs;
+
+       lockdep_assert_held(&vdpa_dev_mutex);
+       dev = bus_find_device(&vdpa_bus, NULL, dev_name(&vdev->dev), vdpa_name_match);
+       if (dev) {
+               put_device(dev);
+               return -EEXIST;
+       }
+       return device_add(&vdev->dev);
+}
+
+/**
+ * _vdpa_register_device - register a vDPA device with vdpa lock held
+ * Caller must have a succeed call of vdpa_alloc_device() before.
+ * Caller must invoke this routine in the management device dev_add()
+ * callback after setting up valid mgmtdev for this vdpa device.
+ * @vdev: the vdpa device to be registered to vDPA bus
+ * @nvqs: number of virtqueues supported by this device
+ *
+ * Returns an error when fail to add device to vDPA bus
+ */
+int _vdpa_register_device(struct vdpa_device *vdev, int nvqs)
+{
+       if (!vdev->mdev)
+               return -EINVAL;
+
+       return __vdpa_register_device(vdev, nvqs);
+}
+EXPORT_SYMBOL_GPL(_vdpa_register_device);
+
 /**
  * vdpa_register_device - register a vDPA device
  * Callers must have a succeed call of vdpa_alloc_device() before.
  * @vdev: the vdpa device to be registered to vDPA bus
+ * @nvqs: number of virtqueues supported by this device
  *
  * Returns an error when fail to add to vDPA bus
  */
-int vdpa_register_device(struct vdpa_device *vdev)
+int vdpa_register_device(struct vdpa_device *vdev, int nvqs)
 {
-       return device_add(&vdev->dev);
+       int err;
+
+       mutex_lock(&vdpa_dev_mutex);
+       err = __vdpa_register_device(vdev, nvqs);
+       mutex_unlock(&vdpa_dev_mutex);
+       return err;
 }
 EXPORT_SYMBOL_GPL(vdpa_register_device);
 
 /**
+ * _vdpa_unregister_device - unregister a vDPA device
+ * Caller must invoke this routine as part of management device dev_del()
+ * callback.
+ * @vdev: the vdpa device to be unregisted from vDPA bus
+ */
+void _vdpa_unregister_device(struct vdpa_device *vdev)
+{
+       lockdep_assert_held(&vdpa_dev_mutex);
+       WARN_ON(!vdev->mdev);
+       device_unregister(&vdev->dev);
+}
+EXPORT_SYMBOL_GPL(_vdpa_unregister_device);
+
+/**
  * vdpa_unregister_device - unregister a vDPA device
  * @vdev: the vdpa device to be unregisted from vDPA bus
  */
 void vdpa_unregister_device(struct vdpa_device *vdev)
 {
+       mutex_lock(&vdpa_dev_mutex);
        device_unregister(&vdev->dev);
+       mutex_unlock(&vdpa_dev_mutex);
 }
 EXPORT_SYMBOL_GPL(vdpa_unregister_device);
 
@@ -167,13 +239,436 @@ void vdpa_unregister_driver(struct vdpa_driver *drv)
 }
 EXPORT_SYMBOL_GPL(vdpa_unregister_driver);
 
+/**
+ * vdpa_mgmtdev_register - register a vdpa management device
+ *
+ * @mdev: Pointer to vdpa management device
+ * vdpa_mgmtdev_register() register a vdpa management device which supports
+ * vdpa device management.
+ */
+int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev)
+{
+       if (!mdev->device || !mdev->ops || !mdev->ops->dev_add || !mdev->ops->dev_del)
+               return -EINVAL;
+
+       INIT_LIST_HEAD(&mdev->list);
+       mutex_lock(&vdpa_dev_mutex);
+       list_add_tail(&mdev->list, &mdev_head);
+       mutex_unlock(&vdpa_dev_mutex);
+       return 0;
+}
+EXPORT_SYMBOL_GPL(vdpa_mgmtdev_register);
+
+static int vdpa_match_remove(struct device *dev, void *data)
+{
+       struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
+       struct vdpa_mgmt_dev *mdev = vdev->mdev;
+
+       if (mdev == data)
+               mdev->ops->dev_del(mdev, vdev);
+       return 0;
+}
+
+void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev)
+{
+       mutex_lock(&vdpa_dev_mutex);
+
+       list_del(&mdev->list);
+
+       /* Filter out all the entries belong to this management device and delete it. */
+       bus_for_each_dev(&vdpa_bus, NULL, mdev, vdpa_match_remove);
+
+       mutex_unlock(&vdpa_dev_mutex);
+}
+EXPORT_SYMBOL_GPL(vdpa_mgmtdev_unregister);
+
+static bool mgmtdev_handle_match(const struct vdpa_mgmt_dev *mdev,
+                                const char *busname, const char *devname)
+{
+       /* Bus name is optional for simulated management device, so ignore the
+        * device with bus if bus attribute is provided.
+        */
+       if ((busname && !mdev->device->bus) || (!busname && mdev->device->bus))
+               return false;
+
+       if (!busname && strcmp(dev_name(mdev->device), devname) == 0)
+               return true;
+
+       if (busname && (strcmp(mdev->device->bus->name, busname) == 0) &&
+           (strcmp(dev_name(mdev->device), devname) == 0))
+               return true;
+
+       return false;
+}
+
+static struct vdpa_mgmt_dev *vdpa_mgmtdev_get_from_attr(struct nlattr **attrs)
+{
+       struct vdpa_mgmt_dev *mdev;
+       const char *busname = NULL;
+       const char *devname;
+
+       if (!attrs[VDPA_ATTR_MGMTDEV_DEV_NAME])
+               return ERR_PTR(-EINVAL);
+       devname = nla_data(attrs[VDPA_ATTR_MGMTDEV_DEV_NAME]);
+       if (attrs[VDPA_ATTR_MGMTDEV_BUS_NAME])
+               busname = nla_data(attrs[VDPA_ATTR_MGMTDEV_BUS_NAME]);
+
+       list_for_each_entry(mdev, &mdev_head, list) {
+               if (mgmtdev_handle_match(mdev, busname, devname))
+                       return mdev;
+       }
+       return ERR_PTR(-ENODEV);
+}
+
+static int vdpa_nl_mgmtdev_handle_fill(struct sk_buff *msg, const struct vdpa_mgmt_dev *mdev)
+{
+       if (mdev->device->bus &&
+           nla_put_string(msg, VDPA_ATTR_MGMTDEV_BUS_NAME, mdev->device->bus->name))
+               return -EMSGSIZE;
+       if (nla_put_string(msg, VDPA_ATTR_MGMTDEV_DEV_NAME, dev_name(mdev->device)))
+               return -EMSGSIZE;
+       return 0;
+}
+
+static int vdpa_mgmtdev_fill(const struct vdpa_mgmt_dev *mdev, struct sk_buff *msg,
+                            u32 portid, u32 seq, int flags)
+{
+       u64 supported_classes = 0;
+       void *hdr;
+       int i = 0;
+       int err;
+
+       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_MGMTDEV_NEW);
+       if (!hdr)
+               return -EMSGSIZE;
+       err = vdpa_nl_mgmtdev_handle_fill(msg, mdev);
+       if (err)
+               goto msg_err;
+
+       while (mdev->id_table[i].device) {
+               supported_classes |= BIT(mdev->id_table[i].device);
+               i++;
+       }
+
+       if (nla_put_u64_64bit(msg, VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES,
+                             supported_classes, VDPA_ATTR_UNSPEC)) {
+               err = -EMSGSIZE;
+               goto msg_err;
+       }
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+msg_err:
+       genlmsg_cancel(msg, hdr);
+       return err;
+}
+
+static int vdpa_nl_cmd_mgmtdev_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+       struct vdpa_mgmt_dev *mdev;
+       struct sk_buff *msg;
+       int err;
+
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       mutex_lock(&vdpa_dev_mutex);
+       mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
+       if (IS_ERR(mdev)) {
+               mutex_unlock(&vdpa_dev_mutex);
+               NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified mgmt device");
+               err = PTR_ERR(mdev);
+               goto out;
+       }
+
+       err = vdpa_mgmtdev_fill(mdev, msg, info->snd_portid, info->snd_seq, 0);
+       mutex_unlock(&vdpa_dev_mutex);
+       if (err)
+               goto out;
+       err = genlmsg_reply(msg, info);
+       return err;
+
+out:
+       nlmsg_free(msg);
+       return err;
+}
+
+static int
+vdpa_nl_cmd_mgmtdev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct vdpa_mgmt_dev *mdev;
+       int start = cb->args[0];
+       int idx = 0;
+       int err;
+
+       mutex_lock(&vdpa_dev_mutex);
+       list_for_each_entry(mdev, &mdev_head, list) {
+               if (idx < start) {
+                       idx++;
+                       continue;
+               }
+               err = vdpa_mgmtdev_fill(mdev, msg, NETLINK_CB(cb->skb).portid,
+                                       cb->nlh->nlmsg_seq, NLM_F_MULTI);
+               if (err)
+                       goto out;
+               idx++;
+       }
+out:
+       mutex_unlock(&vdpa_dev_mutex);
+       cb->args[0] = idx;
+       return msg->len;
+}
+
+static int vdpa_nl_cmd_dev_add_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+       struct vdpa_mgmt_dev *mdev;
+       const char *name;
+       int err = 0;
+
+       if (!info->attrs[VDPA_ATTR_DEV_NAME])
+               return -EINVAL;
+
+       name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
+
+       mutex_lock(&vdpa_dev_mutex);
+       mdev = vdpa_mgmtdev_get_from_attr(info->attrs);
+       if (IS_ERR(mdev)) {
+               NL_SET_ERR_MSG_MOD(info->extack, "Fail to find the specified management device");
+               err = PTR_ERR(mdev);
+               goto err;
+       }
+
+       err = mdev->ops->dev_add(mdev, name);
+err:
+       mutex_unlock(&vdpa_dev_mutex);
+       return err;
+}
+
+static int vdpa_nl_cmd_dev_del_set_doit(struct sk_buff *skb, struct genl_info *info)
+{
+       struct vdpa_mgmt_dev *mdev;
+       struct vdpa_device *vdev;
+       struct device *dev;
+       const char *name;
+       int err = 0;
+
+       if (!info->attrs[VDPA_ATTR_DEV_NAME])
+               return -EINVAL;
+       name = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
+
+       mutex_lock(&vdpa_dev_mutex);
+       dev = bus_find_device(&vdpa_bus, NULL, name, vdpa_name_match);
+       if (!dev) {
+               NL_SET_ERR_MSG_MOD(info->extack, "device not found");
+               err = -ENODEV;
+               goto dev_err;
+       }
+       vdev = container_of(dev, struct vdpa_device, dev);
+       if (!vdev->mdev) {
+               NL_SET_ERR_MSG_MOD(info->extack, "Only user created device can be deleted by user");
+               err = -EINVAL;
+               goto mdev_err;
+       }
+       mdev = vdev->mdev;
+       mdev->ops->dev_del(mdev, vdev);
+mdev_err:
+       put_device(dev);
+dev_err:
+       mutex_unlock(&vdpa_dev_mutex);
+       return err;
+}
+
+static int
+vdpa_dev_fill(struct vdpa_device *vdev, struct sk_buff *msg, u32 portid, u32 seq,
+             int flags, struct netlink_ext_ack *extack)
+{
+       u16 max_vq_size;
+       u32 device_id;
+       u32 vendor_id;
+       void *hdr;
+       int err;
+
+       hdr = genlmsg_put(msg, portid, seq, &vdpa_nl_family, flags, VDPA_CMD_DEV_NEW);
+       if (!hdr)
+               return -EMSGSIZE;
+
+       err = vdpa_nl_mgmtdev_handle_fill(msg, vdev->mdev);
+       if (err)
+               goto msg_err;
+
+       device_id = vdev->config->get_device_id(vdev);
+       vendor_id = vdev->config->get_vendor_id(vdev);
+       max_vq_size = vdev->config->get_vq_num_max(vdev);
+
+       err = -EMSGSIZE;
+       if (nla_put_string(msg, VDPA_ATTR_DEV_NAME, dev_name(&vdev->dev)))
+               goto msg_err;
+       if (nla_put_u32(msg, VDPA_ATTR_DEV_ID, device_id))
+               goto msg_err;
+       if (nla_put_u32(msg, VDPA_ATTR_DEV_VENDOR_ID, vendor_id))
+               goto msg_err;
+       if (nla_put_u32(msg, VDPA_ATTR_DEV_MAX_VQS, vdev->nvqs))
+               goto msg_err;
+       if (nla_put_u16(msg, VDPA_ATTR_DEV_MAX_VQ_SIZE, max_vq_size))
+               goto msg_err;
+
+       genlmsg_end(msg, hdr);
+       return 0;
+
+msg_err:
+       genlmsg_cancel(msg, hdr);
+       return err;
+}
+
+static int vdpa_nl_cmd_dev_get_doit(struct sk_buff *skb, struct genl_info *info)
+{
+       struct vdpa_device *vdev;
+       struct sk_buff *msg;
+       const char *devname;
+       struct device *dev;
+       int err;
+
+       if (!info->attrs[VDPA_ATTR_DEV_NAME])
+               return -EINVAL;
+       devname = nla_data(info->attrs[VDPA_ATTR_DEV_NAME]);
+       msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
+       if (!msg)
+               return -ENOMEM;
+
+       mutex_lock(&vdpa_dev_mutex);
+       dev = bus_find_device(&vdpa_bus, NULL, devname, vdpa_name_match);
+       if (!dev) {
+               NL_SET_ERR_MSG_MOD(info->extack, "device not found");
+               err = -ENODEV;
+               goto err;
+       }
+       vdev = container_of(dev, struct vdpa_device, dev);
+       if (!vdev->mdev) {
+               err = -EINVAL;
+               goto mdev_err;
+       }
+       err = vdpa_dev_fill(vdev, msg, info->snd_portid, info->snd_seq, 0, info->extack);
+       if (!err)
+               err = genlmsg_reply(msg, info);
+mdev_err:
+       put_device(dev);
+err:
+       mutex_unlock(&vdpa_dev_mutex);
+       if (err)
+               nlmsg_free(msg);
+       return err;
+}
+
+struct vdpa_dev_dump_info {
+       struct sk_buff *msg;
+       struct netlink_callback *cb;
+       int start_idx;
+       int idx;
+};
+
+static int vdpa_dev_dump(struct device *dev, void *data)
+{
+       struct vdpa_device *vdev = container_of(dev, struct vdpa_device, dev);
+       struct vdpa_dev_dump_info *info = data;
+       int err;
+
+       if (!vdev->mdev)
+               return 0;
+       if (info->idx < info->start_idx) {
+               info->idx++;
+               return 0;
+       }
+       err = vdpa_dev_fill(vdev, info->msg, NETLINK_CB(info->cb->skb).portid,
+                           info->cb->nlh->nlmsg_seq, NLM_F_MULTI, info->cb->extack);
+       if (err)
+               return err;
+
+       info->idx++;
+       return 0;
+}
+
+static int vdpa_nl_cmd_dev_get_dumpit(struct sk_buff *msg, struct netlink_callback *cb)
+{
+       struct vdpa_dev_dump_info info;
+
+       info.msg = msg;
+       info.cb = cb;
+       info.start_idx = cb->args[0];
+       info.idx = 0;
+
+       mutex_lock(&vdpa_dev_mutex);
+       bus_for_each_dev(&vdpa_bus, NULL, &info, vdpa_dev_dump);
+       mutex_unlock(&vdpa_dev_mutex);
+       cb->args[0] = info.idx;
+       return msg->len;
+}
+
+static const struct nla_policy vdpa_nl_policy[VDPA_ATTR_MAX + 1] = {
+       [VDPA_ATTR_MGMTDEV_BUS_NAME] = { .type = NLA_NUL_STRING },
+       [VDPA_ATTR_MGMTDEV_DEV_NAME] = { .type = NLA_STRING },
+       [VDPA_ATTR_DEV_NAME] = { .type = NLA_STRING },
+};
+
+static const struct genl_ops vdpa_nl_ops[] = {
+       {
+               .cmd = VDPA_CMD_MGMTDEV_GET,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit = vdpa_nl_cmd_mgmtdev_get_doit,
+               .dumpit = vdpa_nl_cmd_mgmtdev_get_dumpit,
+       },
+       {
+               .cmd = VDPA_CMD_DEV_NEW,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit = vdpa_nl_cmd_dev_add_set_doit,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = VDPA_CMD_DEV_DEL,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit = vdpa_nl_cmd_dev_del_set_doit,
+               .flags = GENL_ADMIN_PERM,
+       },
+       {
+               .cmd = VDPA_CMD_DEV_GET,
+               .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+               .doit = vdpa_nl_cmd_dev_get_doit,
+               .dumpit = vdpa_nl_cmd_dev_get_dumpit,
+       },
+};
+
+static struct genl_family vdpa_nl_family __ro_after_init = {
+       .name = VDPA_GENL_NAME,
+       .version = VDPA_GENL_VERSION,
+       .maxattr = VDPA_ATTR_MAX,
+       .policy = vdpa_nl_policy,
+       .netnsok = false,
+       .module = THIS_MODULE,
+       .ops = vdpa_nl_ops,
+       .n_ops = ARRAY_SIZE(vdpa_nl_ops),
+};
+
 static int vdpa_init(void)
 {
-       return bus_register(&vdpa_bus);
+       int err;
+
+       err = bus_register(&vdpa_bus);
+       if (err)
+               return err;
+       err = genl_register_family(&vdpa_nl_family);
+       if (err)
+               goto err;
+       return 0;
+
+err:
+       bus_unregister(&vdpa_bus);
+       return err;
 }
 
 static void __exit vdpa_exit(void)
 {
+       genl_unregister_family(&vdpa_nl_family);
        bus_unregister(&vdpa_bus);
        ida_destroy(&vdpa_index_ida);
 }
index b3fcc67..5b6b2f8 100644 (file)
@@ -235,7 +235,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
                ops = &vdpasim_config_ops;
 
        vdpasim = vdpa_alloc_device(struct vdpasim, vdpa, NULL, ops,
-                                   dev_attr->nvqs);
+                                   dev_attr->name);
        if (!vdpasim)
                goto err_alloc;
 
@@ -249,6 +249,7 @@ struct vdpasim *vdpasim_create(struct vdpasim_dev_attr *dev_attr)
        if (dma_set_mask_and_coherent(dev, DMA_BIT_MASK(64)))
                goto err_iommu;
        set_dma_ops(dev, &vdpasim_dma_ops);
+       vdpasim->vdpa.mdev = dev_attr->mgmt_dev;
 
        vdpasim->config = kzalloc(dev_attr->config_size, GFP_KERNEL);
        if (!vdpasim->config)
index b021422..6d75444 100644 (file)
@@ -33,6 +33,8 @@ struct vdpasim_virtqueue {
 };
 
 struct vdpasim_dev_attr {
+       struct vdpa_mgmt_dev *mgmt_dev;
+       const char *name;
        u64 supported_features;
        size_t config_size;
        size_t buffer_size;
index c10b698..a1ab616 100644 (file)
@@ -33,9 +33,7 @@ static char *macaddr;
 module_param(macaddr, charp, 0);
 MODULE_PARM_DESC(macaddr, "Ethernet MAC address");
 
-u8 macaddr_buf[ETH_ALEN];
-
-static struct vdpasim *vdpasim_net_dev;
+static u8 macaddr_buf[ETH_ALEN];
 
 static void vdpasim_net_work(struct work_struct *work)
 {
@@ -112,29 +110,30 @@ out:
 
 static void vdpasim_net_get_config(struct vdpasim *vdpasim, void *config)
 {
-       struct virtio_net_config *net_config =
-               (struct virtio_net_config *)config;
+       struct virtio_net_config *net_config = config;
 
        net_config->mtu = cpu_to_vdpasim16(vdpasim, 1500);
        net_config->status = cpu_to_vdpasim16(vdpasim, VIRTIO_NET_S_LINK_UP);
        memcpy(net_config->mac, macaddr_buf, ETH_ALEN);
 }
 
-static int __init vdpasim_net_init(void)
+static void vdpasim_net_mgmtdev_release(struct device *dev)
+{
+}
+
+static struct device vdpasim_net_mgmtdev = {
+       .init_name = "vdpasim_net",
+       .release = vdpasim_net_mgmtdev_release,
+};
+
+static int vdpasim_net_dev_add(struct vdpa_mgmt_dev *mdev, const char *name)
 {
        struct vdpasim_dev_attr dev_attr = {};
+       struct vdpasim *simdev;
        int ret;
 
-       if (macaddr) {
-               mac_pton(macaddr, macaddr_buf);
-               if (!is_valid_ether_addr(macaddr_buf)) {
-                       ret = -EADDRNOTAVAIL;
-                       goto out;
-               }
-       } else {
-               eth_random_addr(macaddr_buf);
-       }
-
+       dev_attr.mgmt_dev = mdev;
+       dev_attr.name = name;
        dev_attr.id = VIRTIO_ID_NET;
        dev_attr.supported_features = VDPASIM_NET_FEATURES;
        dev_attr.nvqs = VDPASIM_NET_VQ_NUM;
@@ -143,29 +142,75 @@ static int __init vdpasim_net_init(void)
        dev_attr.work_fn = vdpasim_net_work;
        dev_attr.buffer_size = PAGE_SIZE;
 
-       vdpasim_net_dev = vdpasim_create(&dev_attr);
-       if (IS_ERR(vdpasim_net_dev)) {
-               ret = PTR_ERR(vdpasim_net_dev);
-               goto out;
+       simdev = vdpasim_create(&dev_attr);
+       if (IS_ERR(simdev))
+               return PTR_ERR(simdev);
+
+       ret = _vdpa_register_device(&simdev->vdpa, VDPASIM_NET_VQ_NUM);
+       if (ret)
+               goto reg_err;
+
+       return 0;
+
+reg_err:
+       put_device(&simdev->vdpa.dev);
+       return ret;
+}
+
+static void vdpasim_net_dev_del(struct vdpa_mgmt_dev *mdev,
+                               struct vdpa_device *dev)
+{
+       struct vdpasim *simdev = container_of(dev, struct vdpasim, vdpa);
+
+       _vdpa_unregister_device(&simdev->vdpa);
+}
+
+static const struct vdpa_mgmtdev_ops vdpasim_net_mgmtdev_ops = {
+       .dev_add = vdpasim_net_dev_add,
+       .dev_del = vdpasim_net_dev_del
+};
+
+static struct virtio_device_id id_table[] = {
+       { VIRTIO_ID_NET, VIRTIO_DEV_ANY_ID },
+       { 0 },
+};
+
+static struct vdpa_mgmt_dev mgmt_dev = {
+       .device = &vdpasim_net_mgmtdev,
+       .id_table = id_table,
+       .ops = &vdpasim_net_mgmtdev_ops,
+};
+
+static int __init vdpasim_net_init(void)
+{
+       int ret;
+
+       if (macaddr) {
+               mac_pton(macaddr, macaddr_buf);
+               if (!is_valid_ether_addr(macaddr_buf))
+                       return -EADDRNOTAVAIL;
+       } else {
+               eth_random_addr(macaddr_buf);
        }
 
-       ret = vdpa_register_device(&vdpasim_net_dev->vdpa);
+       ret = device_register(&vdpasim_net_mgmtdev);
        if (ret)
-               goto put_dev;
+               return ret;
 
+       ret = vdpa_mgmtdev_register(&mgmt_dev);
+       if (ret)
+               goto parent_err;
        return 0;
 
-put_dev:
-       put_device(&vdpasim_net_dev->vdpa.dev);
-out:
+parent_err:
+       device_unregister(&vdpasim_net_mgmtdev);
        return ret;
 }
 
 static void __exit vdpasim_net_exit(void)
 {
-       struct vdpa_device *vdpa = &vdpasim_net_dev->vdpa;
-
-       vdpa_unregister_device(vdpa);
+       vdpa_mgmtdev_unregister(&mgmt_dev);
+       device_unregister(&vdpasim_net_mgmtdev);
 }
 
 module_init(vdpasim_net_init);
index 5533df9..67d0bf4 100644 (file)
@@ -21,8 +21,8 @@ config VFIO_VIRQFD
 
 menuconfig VFIO
        tristate "VFIO Non-Privileged userspace driver framework"
-       depends on IOMMU_API
-       select VFIO_IOMMU_TYPE1 if (X86 || S390 || ARM || ARM64)
+       select IOMMU_API
+       select VFIO_IOMMU_TYPE1 if MMU && (X86 || S390 || ARM || ARM64)
        help
          VFIO provides a framework for secure userspace device drivers.
          See Documentation/driver-api/vfio.rst for more details.
index 40a2233..4abddbe 100644 (file)
@@ -42,18 +42,6 @@ config VFIO_PCI_IGD
 
 config VFIO_PCI_NVLINK2
        def_bool y
-       depends on VFIO_PCI && PPC_POWERNV
+       depends on VFIO_PCI && PPC_POWERNV && SPAPR_TCE_IOMMU
        help
          VFIO PCI support for P9 Witherspoon machine with NVIDIA V100 GPUs
-
-config VFIO_PCI_ZDEV
-       bool "VFIO PCI ZPCI device CLP support"
-       depends on VFIO_PCI && S390
-       default y
-       help
-         Enabling this option exposes VFIO capabilities containing hardware
-         configuration for zPCI devices. This enables userspace (e.g. QEMU)
-         to supply proper configuration values instead of hard-coded defaults
-         for zPCI devices passed through via VFIO on s390.
-
-         Say Y here.
index 781e080..eff97a7 100644 (file)
@@ -3,6 +3,6 @@
 vfio-pci-y := vfio_pci.o vfio_pci_intrs.o vfio_pci_rdwr.o vfio_pci_config.o
 vfio-pci-$(CONFIG_VFIO_PCI_IGD) += vfio_pci_igd.o
 vfio-pci-$(CONFIG_VFIO_PCI_NVLINK2) += vfio_pci_nvlink2.o
-vfio-pci-$(CONFIG_VFIO_PCI_ZDEV) += vfio_pci_zdev.o
+vfio-pci-$(CONFIG_S390) += vfio_pci_zdev.o
 
 obj-$(CONFIG_VFIO_PCI) += vfio-pci.o
index 706de3e..65e7e6b 100644 (file)
@@ -807,6 +807,7 @@ static long vfio_pci_ioctl(void *device_data,
                struct vfio_device_info info;
                struct vfio_info_cap caps = { .buf = NULL, .size = 0 };
                unsigned long capsz;
+               int ret;
 
                minsz = offsetofend(struct vfio_device_info, num_irqs);
 
@@ -832,13 +833,10 @@ static long vfio_pci_ioctl(void *device_data,
                info.num_regions = VFIO_PCI_NUM_REGIONS + vdev->num_regions;
                info.num_irqs = VFIO_PCI_NUM_IRQS;
 
-               if (IS_ENABLED(CONFIG_VFIO_PCI_ZDEV)) {
-                       int ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
-
-                       if (ret && ret != -ENODEV) {
-                               pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
-                               return ret;
-                       }
+               ret = vfio_pci_info_zdev_add_caps(vdev, &caps);
+               if (ret && ret != -ENODEV) {
+                       pci_warn(vdev->pdev, "Failed to setup zPCI info capabilities\n");
+                       return ret;
                }
 
                if (caps.size) {
index 53d97f4..e66dfb0 100644 (file)
@@ -127,7 +127,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
 
                ret = pci_user_read_config_byte(pdev, pos, &val);
                if (ret)
-                       return pcibios_err_to_errno(ret);
+                       return ret;
 
                if (copy_to_user(buf + count - size, &val, 1))
                        return -EFAULT;
@@ -141,7 +141,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
 
                ret = pci_user_read_config_word(pdev, pos, &val);
                if (ret)
-                       return pcibios_err_to_errno(ret);
+                       return ret;
 
                val = cpu_to_le16(val);
                if (copy_to_user(buf + count - size, &val, 2))
@@ -156,7 +156,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
 
                ret = pci_user_read_config_dword(pdev, pos, &val);
                if (ret)
-                       return pcibios_err_to_errno(ret);
+                       return ret;
 
                val = cpu_to_le32(val);
                if (copy_to_user(buf + count - size, &val, 4))
@@ -171,7 +171,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
 
                ret = pci_user_read_config_word(pdev, pos, &val);
                if (ret)
-                       return pcibios_err_to_errno(ret);
+                       return ret;
 
                val = cpu_to_le16(val);
                if (copy_to_user(buf + count - size, &val, 2))
@@ -186,7 +186,7 @@ static size_t vfio_pci_igd_cfg_rw(struct vfio_pci_device *vdev,
 
                ret = pci_user_read_config_byte(pdev, pos, &val);
                if (ret)
-                       return pcibios_err_to_errno(ret);
+                       return ret;
 
                if (copy_to_user(buf + count - size, &val, 1))
                        return -EFAULT;
index 5c90e56..9cd1882 100644 (file)
@@ -214,7 +214,7 @@ static inline int vfio_pci_ibm_npu2_init(struct vfio_pci_device *vdev)
 }
 #endif
 
-#ifdef CONFIG_VFIO_PCI_ZDEV
+#ifdef CONFIG_S390
 extern int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
                                       struct vfio_info_cap *caps);
 #else
index 2296856..7b011b6 100644 (file)
@@ -24,8 +24,7 @@
 /*
  * Add the Base PCI Function information to the device info region.
  */
-static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
-                        struct vfio_info_cap *caps)
+static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
 {
        struct vfio_device_info_cap_zpci_base cap = {
                .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_BASE,
@@ -45,8 +44,7 @@ static int zpci_base_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
 /*
  * Add the Base PCI Function Group information to the device info region.
  */
-static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
-                         struct vfio_info_cap *caps)
+static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
 {
        struct vfio_device_info_cap_zpci_group cap = {
                .header.id = VFIO_DEVICE_INFO_CAP_ZPCI_GROUP,
@@ -66,14 +64,15 @@ static int zpci_group_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
 /*
  * Add the device utility string to the device info region.
  */
-static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
-                        struct vfio_info_cap *caps)
+static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
 {
        struct vfio_device_info_cap_zpci_util *cap;
        int cap_size = sizeof(*cap) + CLP_UTIL_STR_LEN;
        int ret;
 
        cap = kmalloc(cap_size, GFP_KERNEL);
+       if (!cap)
+               return -ENOMEM;
 
        cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_UTIL;
        cap->header.version = 1;
@@ -90,14 +89,15 @@ static int zpci_util_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
 /*
  * Add the function path string to the device info region.
  */
-static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_pci_device *vdev,
-                        struct vfio_info_cap *caps)
+static int zpci_pfip_cap(struct zpci_dev *zdev, struct vfio_info_cap *caps)
 {
        struct vfio_device_info_cap_zpci_pfip *cap;
        int cap_size = sizeof(*cap) + CLP_PFIP_NR_SEGMENTS;
        int ret;
 
        cap = kmalloc(cap_size, GFP_KERNEL);
+       if (!cap)
+               return -ENOMEM;
 
        cap->header.id = VFIO_DEVICE_INFO_CAP_ZPCI_PFIP;
        cap->header.version = 1;
@@ -123,21 +123,21 @@ int vfio_pci_info_zdev_add_caps(struct vfio_pci_device *vdev,
        if (!zdev)
                return -ENODEV;
 
-       ret = zpci_base_cap(zdev, vdev, caps);
+       ret = zpci_base_cap(zdev, caps);
        if (ret)
                return ret;
 
-       ret = zpci_group_cap(zdev, vdev, caps);
+       ret = zpci_group_cap(zdev, caps);
        if (ret)
                return ret;
 
        if (zdev->util_str_avail) {
-               ret = zpci_util_cap(zdev, vdev, caps);
+               ret = zpci_util_cap(zdev, caps);
                if (ret)
                        return ret;
        }
 
-       ret = zpci_pfip_cap(zdev, vdev, caps);
+       ret = zpci_pfip_cap(zdev, caps);
 
        return ret;
 }
index dc1a3c4..ab34110 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config VFIO_PLATFORM
        tristate "VFIO support for platform devices"
-       depends on VFIO && EVENTFD && (ARM || ARM64)
+       depends on VFIO && EVENTFD && (ARM || ARM64 || COMPILE_TEST)
        select VFIO_VIRQFD
        help
          Support for platform devices with VFIO. This is required to make
@@ -12,7 +12,7 @@ config VFIO_PLATFORM
 
 config VFIO_AMBA
        tristate "VFIO support for AMBA devices"
-       depends on VFIO_PLATFORM && ARM_AMBA
+       depends on VFIO_PLATFORM && (ARM_AMBA || COMPILE_TEST)
        help
          Support for ARM AMBA devices with VFIO. This is required to make
          use of ARM AMBA devices present on the system using the VFIO
index 4ad8a35..38779e6 100644 (file)
@@ -1220,6 +1220,11 @@ static int vfio_fops_open(struct inode *inode, struct file *filep)
 static int vfio_fops_release(struct inode *inode, struct file *filep)
 {
        struct vfio_container *container = filep->private_data;
+       struct vfio_iommu_driver *driver = container->iommu_driver;
+
+       if (driver && driver->ops->notify)
+               driver->ops->notify(container->iommu_data,
+                                   VFIO_IOMMU_CONTAINER_CLOSE);
 
        filep->private_data = NULL;
 
index 0b4deda..45cbfd4 100644 (file)
@@ -24,6 +24,7 @@
 #include <linux/compat.h>
 #include <linux/device.h>
 #include <linux/fs.h>
+#include <linux/highmem.h>
 #include <linux/iommu.h>
 #include <linux/module.h>
 #include <linux/mm.h>
@@ -69,11 +70,15 @@ struct vfio_iommu {
        struct rb_root          dma_list;
        struct blocking_notifier_head notifier;
        unsigned int            dma_avail;
+       unsigned int            vaddr_invalid_count;
        uint64_t                pgsize_bitmap;
+       uint64_t                num_non_pinned_groups;
+       wait_queue_head_t       vaddr_wait;
        bool                    v2;
        bool                    nesting;
        bool                    dirty_page_tracking;
        bool                    pinned_page_dirty_scope;
+       bool                    container_open;
 };
 
 struct vfio_domain {
@@ -92,11 +97,20 @@ struct vfio_dma {
        int                     prot;           /* IOMMU_READ/WRITE */
        bool                    iommu_mapped;
        bool                    lock_cap;       /* capable(CAP_IPC_LOCK) */
+       bool                    vaddr_invalid;
        struct task_struct      *task;
        struct rb_root          pfn_list;       /* Ex-user pinned pfn list */
        unsigned long           *bitmap;
 };
 
+struct vfio_batch {
+       struct page             **pages;        /* for pin_user_pages_remote */
+       struct page             *fallback_page; /* if pages alloc fails */
+       int                     capacity;       /* length of pages array */
+       int                     size;           /* of batch currently */
+       int                     offset;         /* of next entry in pages */
+};
+
 struct vfio_group {
        struct iommu_group      *iommu_group;
        struct list_head        next;
@@ -143,12 +157,13 @@ struct vfio_regions {
 #define DIRTY_BITMAP_PAGES_MAX  ((u64)INT_MAX)
 #define DIRTY_BITMAP_SIZE_MAX   DIRTY_BITMAP_BYTES(DIRTY_BITMAP_PAGES_MAX)
 
+#define WAITED 1
+
 static int put_pfn(unsigned long pfn, int prot);
 
 static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
                                               struct iommu_group *iommu_group);
 
-static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu);
 /*
  * This code handles mapping and unmapping of user data buffers
  * into DMA'ble space using the IOMMU
@@ -173,6 +188,31 @@ static struct vfio_dma *vfio_find_dma(struct vfio_iommu *iommu,
        return NULL;
 }
 
+static struct rb_node *vfio_find_dma_first_node(struct vfio_iommu *iommu,
+                                               dma_addr_t start, u64 size)
+{
+       struct rb_node *res = NULL;
+       struct rb_node *node = iommu->dma_list.rb_node;
+       struct vfio_dma *dma_res = NULL;
+
+       while (node) {
+               struct vfio_dma *dma = rb_entry(node, struct vfio_dma, node);
+
+               if (start < dma->iova + dma->size) {
+                       res = node;
+                       dma_res = dma;
+                       if (start >= dma->iova)
+                               break;
+                       node = node->rb_left;
+               } else {
+                       node = node->rb_right;
+               }
+       }
+       if (res && size && dma_res->iova >= start + size)
+               res = NULL;
+       return res;
+}
+
 static void vfio_link_dma(struct vfio_iommu *iommu, struct vfio_dma *new)
 {
        struct rb_node **link = &iommu->dma_list.rb_node, *parent = NULL;
@@ -236,6 +276,18 @@ static void vfio_dma_populate_bitmap(struct vfio_dma *dma, size_t pgsize)
        }
 }
 
+static void vfio_iommu_populate_bitmap_full(struct vfio_iommu *iommu)
+{
+       struct rb_node *n;
+       unsigned long pgshift = __ffs(iommu->pgsize_bitmap);
+
+       for (n = rb_first(&iommu->dma_list); n; n = rb_next(n)) {
+               struct vfio_dma *dma = rb_entry(n, struct vfio_dma, node);
+
+               bitmap_set(dma->bitmap, 0, dma->size >> pgshift);
+       }
+}
+
 static int vfio_dma_bitmap_alloc_all(struct vfio_iommu *iommu, size_t pgsize)
 {
        struct rb_node *n;
@@ -415,13 +467,54 @@ static int put_pfn(unsigned long pfn, int prot)
        return 0;
 }
 
+#define VFIO_BATCH_MAX_CAPACITY (PAGE_SIZE / sizeof(struct page *))
+
+static void vfio_batch_init(struct vfio_batch *batch)
+{
+       batch->size = 0;
+       batch->offset = 0;
+
+       if (unlikely(disable_hugepages))
+               goto fallback;
+
+       batch->pages = (struct page **) __get_free_page(GFP_KERNEL);
+       if (!batch->pages)
+               goto fallback;
+
+       batch->capacity = VFIO_BATCH_MAX_CAPACITY;
+       return;
+
+fallback:
+       batch->pages = &batch->fallback_page;
+       batch->capacity = 1;
+}
+
+static void vfio_batch_unpin(struct vfio_batch *batch, struct vfio_dma *dma)
+{
+       while (batch->size) {
+               unsigned long pfn = page_to_pfn(batch->pages[batch->offset]);
+
+               put_pfn(pfn, dma->prot);
+               batch->offset++;
+               batch->size--;
+       }
+}
+
+static void vfio_batch_fini(struct vfio_batch *batch)
+{
+       if (batch->capacity == VFIO_BATCH_MAX_CAPACITY)
+               free_page((unsigned long)batch->pages);
+}
+
 static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
                            unsigned long vaddr, unsigned long *pfn,
                            bool write_fault)
 {
+       pte_t *ptep;
+       spinlock_t *ptl;
        int ret;
 
-       ret = follow_pfn(vma, vaddr, pfn);
+       ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
        if (ret) {
                bool unlocked = false;
 
@@ -435,16 +528,28 @@ static int follow_fault_pfn(struct vm_area_struct *vma, struct mm_struct *mm,
                if (ret)
                        return ret;
 
-               ret = follow_pfn(vma, vaddr, pfn);
+               ret = follow_pte(vma->vm_mm, vaddr, &ptep, &ptl);
+               if (ret)
+                       return ret;
        }
 
+       if (write_fault && !pte_write(*ptep))
+               ret = -EFAULT;
+       else
+               *pfn = pte_pfn(*ptep);
+
+       pte_unmap_unlock(ptep, ptl);
        return ret;
 }
 
-static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
-                        int prot, unsigned long *pfn)
+/*
+ * Returns the positive number of pfns successfully obtained or a negative
+ * error code.
+ */
+static int vaddr_get_pfns(struct mm_struct *mm, unsigned long vaddr,
+                         long npages, int prot, unsigned long *pfn,
+                         struct page **pages)
 {
-       struct page *page[1];
        struct vm_area_struct *vma;
        unsigned int flags = 0;
        int ret;
@@ -453,11 +558,10 @@ static int vaddr_get_pfn(struct mm_struct *mm, unsigned long vaddr,
                flags |= FOLL_WRITE;
 
        mmap_read_lock(mm);
-       ret = pin_user_pages_remote(mm, vaddr, 1, flags | FOLL_LONGTERM,
-                                   page, NULL, NULL);
-       if (ret == 1) {
-               *pfn = page_to_pfn(page[0]);
-               ret = 0;
+       ret = pin_user_pages_remote(mm, vaddr, npages, flags | FOLL_LONGTERM,
+                                   pages, NULL, NULL);
+       if (ret > 0) {
+               *pfn = page_to_pfn(pages[0]);
                goto done;
        }
 
@@ -471,14 +575,73 @@ retry:
                if (ret == -EAGAIN)
                        goto retry;
 
-               if (!ret && !is_invalid_reserved_pfn(*pfn))
-                       ret = -EFAULT;
+               if (!ret) {
+                       if (is_invalid_reserved_pfn(*pfn))
+                               ret = 1;
+                       else
+                               ret = -EFAULT;
+               }
        }
 done:
        mmap_read_unlock(mm);
        return ret;
 }
 
+static int vfio_wait(struct vfio_iommu *iommu)
+{
+       DEFINE_WAIT(wait);
+
+       prepare_to_wait(&iommu->vaddr_wait, &wait, TASK_KILLABLE);
+       mutex_unlock(&iommu->lock);
+       schedule();
+       mutex_lock(&iommu->lock);
+       finish_wait(&iommu->vaddr_wait, &wait);
+       if (kthread_should_stop() || !iommu->container_open ||
+           fatal_signal_pending(current)) {
+               return -EFAULT;
+       }
+       return WAITED;
+}
+
+/*
+ * Find dma struct and wait for its vaddr to be valid.  iommu lock is dropped
+ * if the task waits, but is re-locked on return.  Return result in *dma_p.
+ * Return 0 on success with no waiting, WAITED on success if waited, and -errno
+ * on error.
+ */
+static int vfio_find_dma_valid(struct vfio_iommu *iommu, dma_addr_t start,
+                              size_t size, struct vfio_dma **dma_p)
+{
+       int ret;
+
+       do {
+               *dma_p = vfio_find_dma(iommu, start, size);
+               if (!*dma_p)
+                       ret = -EINVAL;
+               else if (!(*dma_p)->vaddr_invalid)
+                       ret = 0;
+               else
+                       ret = vfio_wait(iommu);
+       } while (ret > 0);
+
+       return ret;
+}
+
+/*
+ * Wait for all vaddr in the dma_list to become valid.  iommu lock is dropped
+ * if the task waits, but is re-locked on return.  Return 0 on success with no
+ * waiting, WAITED on success if waited, and -errno on error.
+ */
+static int vfio_wait_all_valid(struct vfio_iommu *iommu)
+{
+       int ret = 0;
+
+       while (iommu->vaddr_invalid_count && ret >= 0)
+               ret = vfio_wait(iommu);
+
+       return ret;
+}
+
 /*
  * Attempt to pin pages.  We really don't want to track all the pfns and
  * the iommu can only map chunks of consecutive pfns anyway, so get the
@@ -486,76 +649,108 @@ done:
  */
 static long vfio_pin_pages_remote(struct vfio_dma *dma, unsigned long vaddr,
                                  long npage, unsigned long *pfn_base,
-                                 unsigned long limit)
+                                 unsigned long limit, struct vfio_batch *batch)
 {
-       unsigned long pfn = 0;
+       unsigned long pfn;
+       struct mm_struct *mm = current->mm;
        long ret, pinned = 0, lock_acct = 0;
        bool rsvd;
        dma_addr_t iova = vaddr - dma->vaddr + dma->iova;
 
        /* This code path is only user initiated */
-       if (!current->mm)
+       if (!mm)
                return -ENODEV;
 
-       ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, pfn_base);
-       if (ret)
-               return ret;
-
-       pinned++;
-       rsvd = is_invalid_reserved_pfn(*pfn_base);
-
-       /*
-        * Reserved pages aren't counted against the user, externally pinned
-        * pages are already counted against the user.
-        */
-       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
-               if (!dma->lock_cap && current->mm->locked_vm + 1 > limit) {
-                       put_pfn(*pfn_base, dma->prot);
-                       pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n", __func__,
-                                       limit << PAGE_SHIFT);
-                       return -ENOMEM;
-               }
-               lock_acct++;
+       if (batch->size) {
+               /* Leftover pages in batch from an earlier call. */
+               *pfn_base = page_to_pfn(batch->pages[batch->offset]);
+               pfn = *pfn_base;
+               rsvd = is_invalid_reserved_pfn(*pfn_base);
+       } else {
+               *pfn_base = 0;
        }
 
-       if (unlikely(disable_hugepages))
-               goto out;
+       while (npage) {
+               if (!batch->size) {
+                       /* Empty batch, so refill it. */
+                       long req_pages = min_t(long, npage, batch->capacity);
 
-       /* Lock all the consecutive pages from pfn_base */
-       for (vaddr += PAGE_SIZE, iova += PAGE_SIZE; pinned < npage;
-            pinned++, vaddr += PAGE_SIZE, iova += PAGE_SIZE) {
-               ret = vaddr_get_pfn(current->mm, vaddr, dma->prot, &pfn);
-               if (ret)
-                       break;
+                       ret = vaddr_get_pfns(mm, vaddr, req_pages, dma->prot,
+                                            &pfn, batch->pages);
+                       if (ret < 0)
+                               goto unpin_out;
 
-               if (pfn != *pfn_base + pinned ||
-                   rsvd != is_invalid_reserved_pfn(pfn)) {
-                       put_pfn(pfn, dma->prot);
-                       break;
+                       batch->size = ret;
+                       batch->offset = 0;
+
+                       if (!*pfn_base) {
+                               *pfn_base = pfn;
+                               rsvd = is_invalid_reserved_pfn(*pfn_base);
+                       }
                }
 
-               if (!rsvd && !vfio_find_vpfn(dma, iova)) {
-                       if (!dma->lock_cap &&
-                           current->mm->locked_vm + lock_acct + 1 > limit) {
-                               put_pfn(pfn, dma->prot);
-                               pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
-                                       __func__, limit << PAGE_SHIFT);
-                               ret = -ENOMEM;
-                               goto unpin_out;
+               /*
+                * pfn is preset for the first iteration of this inner loop and
+                * updated at the end to handle a VM_PFNMAP pfn.  In that case,
+                * batch->pages isn't valid (there's no struct page), so allow
+                * batch->pages to be touched only when there's more than one
+                * pfn to check, which guarantees the pfns are from a
+                * !VM_PFNMAP vma.
+                */
+               while (true) {
+                       if (pfn != *pfn_base + pinned ||
+                           rsvd != is_invalid_reserved_pfn(pfn))
+                               goto out;
+
+                       /*
+                        * Reserved pages aren't counted against the user,
+                        * externally pinned pages are already counted against
+                        * the user.
+                        */
+                       if (!rsvd && !vfio_find_vpfn(dma, iova)) {
+                               if (!dma->lock_cap &&
+                                   mm->locked_vm + lock_acct + 1 > limit) {
+                                       pr_warn("%s: RLIMIT_MEMLOCK (%ld) exceeded\n",
+                                               __func__, limit << PAGE_SHIFT);
+                                       ret = -ENOMEM;
+                                       goto unpin_out;
+                               }
+                               lock_acct++;
                        }
-                       lock_acct++;
+
+                       pinned++;
+                       npage--;
+                       vaddr += PAGE_SIZE;
+                       iova += PAGE_SIZE;
+                       batch->offset++;
+                       batch->size--;
+
+                       if (!batch->size)
+                               break;
+
+                       pfn = page_to_pfn(batch->pages[batch->offset]);
                }
+
+               if (unlikely(disable_hugepages))
+                       break;
        }
 
 out:
        ret = vfio_lock_acct(dma, lock_acct, false);
 
 unpin_out:
-       if (ret) {
-               if (!rsvd) {
+       if (batch->size == 1 && !batch->offset) {
+               /* May be a VM_PFNMAP pfn, which the batch can't remember. */
+               put_pfn(pfn, dma->prot);
+               batch->size = 0;
+       }
+
+       if (ret < 0) {
+               if (pinned && !rsvd) {
                        for (pfn = *pfn_base ; pinned ; pfn++, pinned--)
                                put_pfn(pfn, dma->prot);
                }
+               vfio_batch_unpin(batch, dma);
 
                return ret;
        }
@@ -587,6 +782,7 @@ static long vfio_unpin_pages_remote(struct vfio_dma *dma, dma_addr_t iova,
 static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
                                  unsigned long *pfn_base, bool do_accounting)
 {
+       struct page *pages[1];
        struct mm_struct *mm;
        int ret;
 
@@ -594,8 +790,13 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
        if (!mm)
                return -ENODEV;
 
-       ret = vaddr_get_pfn(mm, vaddr, dma->prot, pfn_base);
-       if (!ret && do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
+       ret = vaddr_get_pfns(mm, vaddr, 1, dma->prot, pfn_base, pages);
+       if (ret != 1)
+               goto out;
+
+       ret = 0;
+
+       if (do_accounting && !is_invalid_reserved_pfn(*pfn_base)) {
                ret = vfio_lock_acct(dma, 1, true);
                if (ret) {
                        put_pfn(*pfn_base, dma->prot);
@@ -607,6 +808,7 @@ static int vfio_pin_page_external(struct vfio_dma *dma, unsigned long vaddr,
                }
        }
 
+out:
        mmput(mm);
        return ret;
 }
@@ -640,6 +842,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
        unsigned long remote_vaddr;
        struct vfio_dma *dma;
        bool do_accounting;
+       dma_addr_t iova;
 
        if (!iommu || !user_pfn || !phys_pfn)
                return -EINVAL;
@@ -650,6 +853,22 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
 
        mutex_lock(&iommu->lock);
 
+       /*
+        * Wait for all necessary vaddr's to be valid so they can be used in
+        * the main loop without dropping the lock, to avoid racing vs unmap.
+        */
+again:
+       if (iommu->vaddr_invalid_count) {
+               for (i = 0; i < npage; i++) {
+                       iova = user_pfn[i] << PAGE_SHIFT;
+                       ret = vfio_find_dma_valid(iommu, iova, PAGE_SIZE, &dma);
+                       if (ret < 0)
+                               goto pin_done;
+                       if (ret == WAITED)
+                               goto again;
+               }
+       }
+
        /* Fail if notifier list is empty */
        if (!iommu->notifier.head) {
                ret = -EINVAL;
@@ -664,7 +883,6 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
        do_accounting = !IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu);
 
        for (i = 0; i < npage; i++) {
-               dma_addr_t iova;
                struct vfio_pfn *vpfn;
 
                iova = user_pfn[i] << PAGE_SHIFT;
@@ -714,7 +932,7 @@ static int vfio_iommu_type1_pin_pages(void *iommu_data,
        group = vfio_iommu_find_iommu_group(iommu, iommu_group);
        if (!group->pinned_page_dirty_scope) {
                group->pinned_page_dirty_scope = true;
-               update_pinned_page_dirty_scope(iommu);
+               iommu->num_non_pinned_groups--;
        }
 
        goto pin_done;
@@ -945,10 +1163,15 @@ static long vfio_unmap_unpin(struct vfio_iommu *iommu, struct vfio_dma *dma,
 
 static void vfio_remove_dma(struct vfio_iommu *iommu, struct vfio_dma *dma)
 {
+       WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list));
        vfio_unmap_unpin(iommu, dma, true);
        vfio_unlink_dma(iommu, dma);
        put_task_struct(dma->task);
        vfio_dma_bitmap_free(dma);
+       if (dma->vaddr_invalid) {
+               iommu->vaddr_invalid_count--;
+               wake_up_all(&iommu->vaddr_wait);
+       }
        kfree(dma);
        iommu->dma_avail++;
 }
@@ -991,7 +1214,7 @@ static int update_user_bitmap(u64 __user *bitmap, struct vfio_iommu *iommu,
         * mark all pages dirty if any IOMMU capable device is not able
         * to report dirty pages and all pages are pinned and mapped.
         */
-       if (!iommu->pinned_page_dirty_scope && dma->iommu_mapped)
+       if (iommu->num_non_pinned_groups && dma->iommu_mapped)
                bitmap_set(dma->bitmap, 0, nbits);
 
        if (shift) {
@@ -1074,34 +1297,34 @@ static int vfio_dma_do_unmap(struct vfio_iommu *iommu,
 {
        struct vfio_dma *dma, *dma_last = NULL;
        size_t unmapped = 0, pgsize;
-       int ret = 0, retries = 0;
+       int ret = -EINVAL, retries = 0;
        unsigned long pgshift;
+       dma_addr_t iova = unmap->iova;
+       u64 size = unmap->size;
+       bool unmap_all = unmap->flags & VFIO_DMA_UNMAP_FLAG_ALL;
+       bool invalidate_vaddr = unmap->flags & VFIO_DMA_UNMAP_FLAG_VADDR;
+       struct rb_node *n, *first_n;
 
        mutex_lock(&iommu->lock);
 
        pgshift = __ffs(iommu->pgsize_bitmap);
        pgsize = (size_t)1 << pgshift;
 
-       if (unmap->iova & (pgsize - 1)) {
-               ret = -EINVAL;
-               goto unlock;
-       }
-
-       if (!unmap->size || unmap->size & (pgsize - 1)) {
-               ret = -EINVAL;
+       if (iova & (pgsize - 1))
                goto unlock;
-       }
 
-       if (unmap->iova + unmap->size - 1 < unmap->iova ||
-           unmap->size > SIZE_MAX) {
-               ret = -EINVAL;
+       if (unmap_all) {
+               if (iova || size)
+                       goto unlock;
+               size = U64_MAX;
+       } else if (!size || size & (pgsize - 1) ||
+                  iova + size - 1 < iova || size > SIZE_MAX) {
                goto unlock;
        }
 
        /* When dirty tracking is enabled, allow only min supported pgsize */
        if ((unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
            (!iommu->dirty_page_tracking || (bitmap->pgsize != pgsize))) {
-               ret = -EINVAL;
                goto unlock;
        }
 
@@ -1138,21 +1361,25 @@ again:
         * will only return success and a size of zero if there were no
         * mappings within the range.
         */
-       if (iommu->v2) {
-               dma = vfio_find_dma(iommu, unmap->iova, 1);
-               if (dma && dma->iova != unmap->iova) {
-                       ret = -EINVAL;
+       if (iommu->v2 && !unmap_all) {
+               dma = vfio_find_dma(iommu, iova, 1);
+               if (dma && dma->iova != iova)
                        goto unlock;
-               }
-               dma = vfio_find_dma(iommu, unmap->iova + unmap->size - 1, 0);
-               if (dma && dma->iova + dma->size != unmap->iova + unmap->size) {
-                       ret = -EINVAL;
+
+               dma = vfio_find_dma(iommu, iova + size - 1, 0);
+               if (dma && dma->iova + dma->size != iova + size)
                        goto unlock;
-               }
        }
 
-       while ((dma = vfio_find_dma(iommu, unmap->iova, unmap->size))) {
-               if (!iommu->v2 && unmap->iova > dma->iova)
+       ret = 0;
+       n = first_n = vfio_find_dma_first_node(iommu, iova, size);
+
+       while (n) {
+               dma = rb_entry(n, struct vfio_dma, node);
+               if (dma->iova >= iova + size)
+                       break;
+
+               if (!iommu->v2 && iova > dma->iova)
                        break;
                /*
                 * Task with same address space who mapped this iova range is
@@ -1161,6 +1388,27 @@ again:
                if (dma->task->mm != current->mm)
                        break;
 
+               if (invalidate_vaddr) {
+                       if (dma->vaddr_invalid) {
+                               struct rb_node *last_n = n;
+
+                               for (n = first_n; n != last_n; n = rb_next(n)) {
+                                       dma = rb_entry(n,
+                                                      struct vfio_dma, node);
+                                       dma->vaddr_invalid = false;
+                                       iommu->vaddr_invalid_count--;
+                               }
+                               ret = -EINVAL;
+                               unmapped = 0;
+                               break;
+                       }
+                       dma->vaddr_invalid = true;
+                       iommu->vaddr_invalid_count++;
+                       unmapped += dma->size;
+                       n = rb_next(n);
+                       continue;
+               }
+
                if (!RB_EMPTY_ROOT(&dma->pfn_list)) {
                        struct vfio_iommu_type1_dma_unmap nb_unmap;
 
@@ -1190,12 +1438,13 @@ again:
 
                if (unmap->flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
                        ret = update_user_bitmap(bitmap->data, iommu, dma,
-                                                unmap->iova, pgsize);
+                                                iova, pgsize);
                        if (ret)
                                break;
                }
 
                unmapped += dma->size;
+               n = rb_next(n);
                vfio_remove_dma(iommu, dma);
        }
 
@@ -1239,15 +1488,19 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
 {
        dma_addr_t iova = dma->iova;
        unsigned long vaddr = dma->vaddr;
+       struct vfio_batch batch;
        size_t size = map_size;
        long npage;
        unsigned long pfn, limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
        int ret = 0;
 
+       vfio_batch_init(&batch);
+
        while (size) {
                /* Pin a contiguous chunk of memory */
                npage = vfio_pin_pages_remote(dma, vaddr + dma->size,
-                                             size >> PAGE_SHIFT, &pfn, limit);
+                                             size >> PAGE_SHIFT, &pfn, limit,
+                                             &batch);
                if (npage <= 0) {
                        WARN_ON(!npage);
                        ret = (int)npage;
@@ -1260,6 +1513,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
                if (ret) {
                        vfio_unpin_pages_remote(dma, iova + dma->size, pfn,
                                                npage, true);
+                       vfio_batch_unpin(&batch, dma);
                        break;
                }
 
@@ -1267,6 +1521,7 @@ static int vfio_pin_map_dma(struct vfio_iommu *iommu, struct vfio_dma *dma,
                dma->size += npage << PAGE_SHIFT;
        }
 
+       vfio_batch_fini(&batch);
        dma->iommu_mapped = true;
 
        if (ret)
@@ -1299,6 +1554,7 @@ static bool vfio_iommu_iova_dma_valid(struct vfio_iommu *iommu,
 static int vfio_dma_do_map(struct vfio_iommu *iommu,
                           struct vfio_iommu_type1_dma_map *map)
 {
+       bool set_vaddr = map->flags & VFIO_DMA_MAP_FLAG_VADDR;
        dma_addr_t iova = map->iova;
        unsigned long vaddr = map->vaddr;
        size_t size = map->size;
@@ -1316,13 +1572,16 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
        if (map->flags & VFIO_DMA_MAP_FLAG_READ)
                prot |= IOMMU_READ;
 
+       if ((prot && set_vaddr) || (!prot && !set_vaddr))
+               return -EINVAL;
+
        mutex_lock(&iommu->lock);
 
        pgsize = (size_t)1 << __ffs(iommu->pgsize_bitmap);
 
        WARN_ON((pgsize - 1) & PAGE_MASK);
 
-       if (!prot || !size || (size | iova | vaddr) & (pgsize - 1)) {
+       if (!size || (size | iova | vaddr) & (pgsize - 1)) {
                ret = -EINVAL;
                goto out_unlock;
        }
@@ -1333,7 +1592,21 @@ static int vfio_dma_do_map(struct vfio_iommu *iommu,
                goto out_unlock;
        }
 
-       if (vfio_find_dma(iommu, iova, size)) {
+       dma = vfio_find_dma(iommu, iova, size);
+       if (set_vaddr) {
+               if (!dma) {
+                       ret = -ENOENT;
+               } else if (!dma->vaddr_invalid || dma->iova != iova ||
+                          dma->size != size) {
+                       ret = -EINVAL;
+               } else {
+                       dma->vaddr = vaddr;
+                       dma->vaddr_invalid = false;
+                       iommu->vaddr_invalid_count--;
+                       wake_up_all(&iommu->vaddr_wait);
+               }
+               goto out_unlock;
+       } else if (dma) {
                ret = -EEXIST;
                goto out_unlock;
        }
@@ -1425,16 +1698,23 @@ static int vfio_bus_type(struct device *dev, void *data)
 static int vfio_iommu_replay(struct vfio_iommu *iommu,
                             struct vfio_domain *domain)
 {
+       struct vfio_batch batch;
        struct vfio_domain *d = NULL;
        struct rb_node *n;
        unsigned long limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
        int ret;
 
+       ret = vfio_wait_all_valid(iommu);
+       if (ret < 0)
+               return ret;
+
        /* Arbitrarily pick the first domain in the list for lookups */
        if (!list_empty(&iommu->domain_list))
                d = list_first_entry(&iommu->domain_list,
                                     struct vfio_domain, next);
 
+       vfio_batch_init(&batch);
+
        n = rb_first(&iommu->dma_list);
 
        for (; n; n = rb_next(n)) {
@@ -1482,7 +1762,8 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
 
                                npage = vfio_pin_pages_remote(dma, vaddr,
                                                              n >> PAGE_SHIFT,
-                                                             &pfn, limit);
+                                                             &pfn, limit,
+                                                             &batch);
                                if (npage <= 0) {
                                        WARN_ON(!npage);
                                        ret = (int)npage;
@@ -1496,11 +1777,13 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
                        ret = iommu_map(domain->domain, iova, phys,
                                        size, dma->prot | domain->prot);
                        if (ret) {
-                               if (!dma->iommu_mapped)
+                               if (!dma->iommu_mapped) {
                                        vfio_unpin_pages_remote(dma, iova,
                                                        phys >> PAGE_SHIFT,
                                                        size >> PAGE_SHIFT,
                                                        true);
+                                       vfio_batch_unpin(&batch, dma);
+                               }
                                goto unwind;
                        }
 
@@ -1515,6 +1798,7 @@ static int vfio_iommu_replay(struct vfio_iommu *iommu,
                dma->iommu_mapped = true;
        }
 
+       vfio_batch_fini(&batch);
        return 0;
 
 unwind:
@@ -1555,6 +1839,7 @@ unwind:
                }
        }
 
+       vfio_batch_fini(&batch);
        return ret;
 }
 
@@ -1622,33 +1907,6 @@ static struct vfio_group *vfio_iommu_find_iommu_group(struct vfio_iommu *iommu,
        return group;
 }
 
-static void update_pinned_page_dirty_scope(struct vfio_iommu *iommu)
-{
-       struct vfio_domain *domain;
-       struct vfio_group *group;
-
-       list_for_each_entry(domain, &iommu->domain_list, next) {
-               list_for_each_entry(group, &domain->group_list, next) {
-                       if (!group->pinned_page_dirty_scope) {
-                               iommu->pinned_page_dirty_scope = false;
-                               return;
-                       }
-               }
-       }
-
-       if (iommu->external_domain) {
-               domain = iommu->external_domain;
-               list_for_each_entry(group, &domain->group_list, next) {
-                       if (!group->pinned_page_dirty_scope) {
-                               iommu->pinned_page_dirty_scope = false;
-                               return;
-                       }
-               }
-       }
-
-       iommu->pinned_page_dirty_scope = true;
-}
-
 static bool vfio_iommu_has_sw_msi(struct list_head *group_resv_regions,
                                  phys_addr_t *base)
 {
@@ -2057,8 +2315,6 @@ static int vfio_iommu_type1_attach_group(void *iommu_data,
                         * addition of a dirty tracking group.
                         */
                        group->pinned_page_dirty_scope = true;
-                       if (!iommu->pinned_page_dirty_scope)
-                               update_pinned_page_dirty_scope(iommu);
                        mutex_unlock(&iommu->lock);
 
                        return 0;
@@ -2188,7 +2444,7 @@ done:
         * demotes the iommu scope until it declares itself dirty tracking
         * capable via the page pinning interface.
         */
-       iommu->pinned_page_dirty_scope = false;
+       iommu->num_non_pinned_groups++;
        mutex_unlock(&iommu->lock);
        vfio_iommu_resv_free(&group_resv_regions);
 
@@ -2238,23 +2494,6 @@ static void vfio_iommu_unmap_unpin_reaccount(struct vfio_iommu *iommu)
        }
 }
 
-static void vfio_sanity_check_pfn_list(struct vfio_iommu *iommu)
-{
-       struct rb_node *n;
-
-       n = rb_first(&iommu->dma_list);
-       for (; n; n = rb_next(n)) {
-               struct vfio_dma *dma;
-
-               dma = rb_entry(n, struct vfio_dma, node);
-
-               if (WARN_ON(!RB_EMPTY_ROOT(&dma->pfn_list)))
-                       break;
-       }
-       /* mdev vendor driver must unregister notifier */
-       WARN_ON(iommu->notifier.head);
-}
-
 /*
  * Called when a domain is removed in detach. It is possible that
  * the removed domain decided the iova aperture window. Modify the
@@ -2354,10 +2593,10 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
                        kfree(group);
 
                        if (list_empty(&iommu->external_domain->group_list)) {
-                               vfio_sanity_check_pfn_list(iommu);
-
-                               if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu))
+                               if (!IS_IOMMU_CAP_DOMAIN_IN_CONTAINER(iommu)) {
+                                       WARN_ON(iommu->notifier.head);
                                        vfio_iommu_unmap_unpin_all(iommu);
+                               }
 
                                kfree(iommu->external_domain);
                                iommu->external_domain = NULL;
@@ -2391,10 +2630,12 @@ static void vfio_iommu_type1_detach_group(void *iommu_data,
                 */
                if (list_empty(&domain->group_list)) {
                        if (list_is_singular(&iommu->domain_list)) {
-                               if (!iommu->external_domain)
+                               if (!iommu->external_domain) {
+                                       WARN_ON(iommu->notifier.head);
                                        vfio_iommu_unmap_unpin_all(iommu);
-                               else
+                               } else {
                                        vfio_iommu_unmap_unpin_reaccount(iommu);
+                               }
                        }
                        iommu_domain_free(domain->domain);
                        list_del(&domain->next);
@@ -2415,8 +2656,11 @@ detach_group_done:
         * Removal of a group without dirty tracking may allow the iommu scope
         * to be promoted.
         */
-       if (update_dirty_scope)
-               update_pinned_page_dirty_scope(iommu);
+       if (update_dirty_scope) {
+               iommu->num_non_pinned_groups--;
+               if (iommu->dirty_page_tracking)
+                       vfio_iommu_populate_bitmap_full(iommu);
+       }
        mutex_unlock(&iommu->lock);
 }
 
@@ -2446,8 +2690,10 @@ static void *vfio_iommu_type1_open(unsigned long arg)
        INIT_LIST_HEAD(&iommu->iova_list);
        iommu->dma_list = RB_ROOT;
        iommu->dma_avail = dma_entry_limit;
+       iommu->container_open = true;
        mutex_init(&iommu->lock);
        BLOCKING_INIT_NOTIFIER_HEAD(&iommu->notifier);
+       init_waitqueue_head(&iommu->vaddr_wait);
 
        return iommu;
 }
@@ -2475,7 +2721,6 @@ static void vfio_iommu_type1_release(void *iommu_data)
 
        if (iommu->external_domain) {
                vfio_release_domain(iommu->external_domain, true);
-               vfio_sanity_check_pfn_list(iommu);
                kfree(iommu->external_domain);
        }
 
@@ -2517,6 +2762,8 @@ static int vfio_iommu_type1_check_extension(struct vfio_iommu *iommu,
        case VFIO_TYPE1_IOMMU:
        case VFIO_TYPE1v2_IOMMU:
        case VFIO_TYPE1_NESTING_IOMMU:
+       case VFIO_UNMAP_ALL:
+       case VFIO_UPDATE_VADDR:
                return 1;
        case VFIO_DMA_CC_IOMMU:
                if (!iommu)
@@ -2688,7 +2935,8 @@ static int vfio_iommu_type1_map_dma(struct vfio_iommu *iommu,
 {
        struct vfio_iommu_type1_dma_map map;
        unsigned long minsz;
-       uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE;
+       uint32_t mask = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE |
+                       VFIO_DMA_MAP_FLAG_VADDR;
 
        minsz = offsetofend(struct vfio_iommu_type1_dma_map, size);
 
@@ -2706,6 +2954,9 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
 {
        struct vfio_iommu_type1_dma_unmap unmap;
        struct vfio_bitmap bitmap = { 0 };
+       uint32_t mask = VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP |
+                       VFIO_DMA_UNMAP_FLAG_VADDR |
+                       VFIO_DMA_UNMAP_FLAG_ALL;
        unsigned long minsz;
        int ret;
 
@@ -2714,8 +2965,12 @@ static int vfio_iommu_type1_unmap_dma(struct vfio_iommu *iommu,
        if (copy_from_user(&unmap, (void __user *)arg, minsz))
                return -EFAULT;
 
-       if (unmap.argsz < minsz ||
-           unmap.flags & ~VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP)
+       if (unmap.argsz < minsz || unmap.flags & ~mask)
+               return -EINVAL;
+
+       if ((unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) &&
+           (unmap.flags & (VFIO_DMA_UNMAP_FLAG_ALL |
+                           VFIO_DMA_UNMAP_FLAG_VADDR)))
                return -EINVAL;
 
        if (unmap.flags & VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP) {
@@ -2906,12 +3161,13 @@ static int vfio_iommu_type1_dma_rw_chunk(struct vfio_iommu *iommu,
        struct vfio_dma *dma;
        bool kthread = current->mm == NULL;
        size_t offset;
+       int ret;
 
        *copied = 0;
 
-       dma = vfio_find_dma(iommu, user_iova, 1);
-       if (!dma)
-               return -EINVAL;
+       ret = vfio_find_dma_valid(iommu, user_iova, 1, &dma);
+       if (ret < 0)
+               return ret;
 
        if ((write && !(dma->prot & IOMMU_WRITE)) ||
                        !(dma->prot & IOMMU_READ))
@@ -3003,6 +3259,19 @@ vfio_iommu_type1_group_iommu_domain(void *iommu_data,
        return domain;
 }
 
+static void vfio_iommu_type1_notify(void *iommu_data,
+                                   enum vfio_iommu_notify_type event)
+{
+       struct vfio_iommu *iommu = iommu_data;
+
+       if (event != VFIO_IOMMU_CONTAINER_CLOSE)
+               return;
+       mutex_lock(&iommu->lock);
+       iommu->container_open = false;
+       mutex_unlock(&iommu->lock);
+       wake_up_all(&iommu->vaddr_wait);
+}
+
 static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
        .name                   = "vfio-iommu-type1",
        .owner                  = THIS_MODULE,
@@ -3017,6 +3286,7 @@ static const struct vfio_iommu_driver_ops vfio_iommu_driver_ops_type1 = {
        .unregister_notifier    = vfio_iommu_type1_unregister_notifier,
        .dma_rw                 = vfio_iommu_type1_dma_rw,
        .group_iommu_domain     = vfio_iommu_type1_group_iommu_domain,
+       .notify                 = vfio_iommu_type1_notify,
 };
 
 static int __init vfio_iommu_type1_init(void)
index 4ce9f00..5de21ad 100644 (file)
@@ -1814,12 +1814,9 @@ static int vhost_scsi_open(struct inode *inode, struct file *f)
        struct vhost_virtqueue **vqs;
        int r = -ENOMEM, i;
 
-       vs = kzalloc(sizeof(*vs), GFP_KERNEL | __GFP_NOWARN | __GFP_RETRY_MAYFAIL);
-       if (!vs) {
-               vs = vzalloc(sizeof(*vs));
-               if (!vs)
-                       goto err_vs;
-       }
+       vs = kvzalloc(sizeof(*vs), GFP_KERNEL);
+       if (!vs)
+               goto err_vs;
 
        vqs = kmalloc_array(VHOST_SCSI_MAX_VQ, sizeof(*vqs), GFP_KERNEL);
        if (!vqs)
index ef688c8..e0a27e3 100644 (file)
@@ -308,8 +308,10 @@ static long vhost_vdpa_get_vring_num(struct vhost_vdpa *v, u16 __user *argp)
 
 static void vhost_vdpa_config_put(struct vhost_vdpa *v)
 {
-       if (v->config_ctx)
+       if (v->config_ctx) {
                eventfd_ctx_put(v->config_ctx);
+               v->config_ctx = NULL;
+       }
 }
 
 static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
@@ -329,8 +331,12 @@ static long vhost_vdpa_set_config_call(struct vhost_vdpa *v, u32 __user *argp)
        if (!IS_ERR_OR_NULL(ctx))
                eventfd_ctx_put(ctx);
 
-       if (IS_ERR(v->config_ctx))
-               return PTR_ERR(v->config_ctx);
+       if (IS_ERR(v->config_ctx)) {
+               long ret = PTR_ERR(v->config_ctx);
+
+               v->config_ctx = NULL;
+               return ret;
+       }
 
        v->vdpa->config->set_config_cb(v->vdpa, &cb);
 
@@ -900,14 +906,10 @@ err:
 
 static void vhost_vdpa_clean_irq(struct vhost_vdpa *v)
 {
-       struct vhost_virtqueue *vq;
        int i;
 
-       for (i = 0; i < v->nvqs; i++) {
-               vq = &v->vqs[i];
-               if (vq->call_ctx.producer.irq)
-                       irq_bypass_unregister_producer(&vq->call_ctx.producer);
-       }
+       for (i = 0; i < v->nvqs; i++)
+               vhost_vdpa_unsetup_vq_irq(v, i);
 }
 
 static int vhost_vdpa_release(struct inode *inode, struct file *filep)
index a262e12..5ccb070 100644 (file)
@@ -332,8 +332,8 @@ static void vhost_vq_reset(struct vhost_dev *dev,
        vq->error_ctx = NULL;
        vq->kick = NULL;
        vq->log_ctx = NULL;
-       vhost_reset_is_le(vq);
        vhost_disable_cross_endian(vq);
+       vhost_reset_is_le(vq);
        vq->busyloop_timeout = 0;
        vq->umem = NULL;
        vq->iotlb = NULL;
index bcc92ae..1b72edc 100644 (file)
@@ -921,40 +921,6 @@ static int acornfb_detect_monitortype(void)
        return 4;
 }
 
-/*
- * This enables the unused memory to be freed on older Acorn machines.
- * We are freeing memory on behalf of the architecture initialisation
- * code here.
- */
-static inline void
-free_unused_pages(unsigned int virtual_start, unsigned int virtual_end)
-{
-       int mb_freed = 0;
-
-       /*
-        * Align addresses
-        */
-       virtual_start = PAGE_ALIGN(virtual_start);
-       virtual_end = PAGE_ALIGN(virtual_end);
-
-       while (virtual_start < virtual_end) {
-               struct page *page;
-
-               /*
-                * Clear page reserved bit,
-                * set count to 1, and free
-                * the page.
-                */
-               page = virt_to_page(virtual_start);
-               __free_reserved_page(page);
-
-               virtual_start += PAGE_SIZE;
-               mb_freed += PAGE_SIZE / 1024;
-       }
-
-       printk("acornfb: freed %dK memory\n", mb_freed);
-}
-
 static int acornfb_probe(struct platform_device *dev)
 {
        unsigned long size;
index 551372f..465f55b 100644 (file)
@@ -287,11 +287,8 @@ static inline void aty_st_8(int regindex, u8 val, const struct atyfb_par *par)
 #endif
 }
 
-#if defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD) || \
-defined (CONFIG_FB_ATY_BACKLIGHT)
 extern void aty_st_lcd(int index, u32 val, const struct atyfb_par *par);
 extern u32 aty_ld_lcd(int index, const struct atyfb_par *par);
-#endif
 
     /*
      *  DAC operations
index 83c8e80..1aef3d6 100644 (file)
 #define PRINTKE(fmt, args...)  printk(KERN_ERR "atyfb: " fmt, ## args)
 
 #if defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_GENERIC_LCD) || \
-defined(CONFIG_FB_ATY_BACKLIGHT)
+defined(CONFIG_FB_ATY_BACKLIGHT) || defined (CONFIG_PPC_PMAC)
 static const u32 lt_lcd_regs[] = {
        CNFG_PANEL_LG,
        LCD_GEN_CNTL_LG,
@@ -175,7 +175,17 @@ u32 aty_ld_lcd(int index, const struct atyfb_par *par)
                return aty_ld_le32(LCD_DATA, par);
        }
 }
-#endif /* defined(CONFIG_PMAC_BACKLIGHT) || defined (CONFIG_FB_ATY_GENERIC_LCD) */
+#else /* defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_BACKLIGHT) ||
+        defined(CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_PPC_PMAC) */
+void aty_st_lcd(int index, u32 val, const struct atyfb_par *par)
+{ }
+
+u32 aty_ld_lcd(int index, const struct atyfb_par *par)
+{
+       return 0;
+}
+#endif /* defined(CONFIG_PMAC_BACKLIGHT) || defined(CONFIG_FB_ATY_BACKLIGHT) ||
+         defined (CONFIG_FB_ATY_GENERIC_LCD) || defined(CONFIG_PPC_PMAC) */
 
 #ifdef CONFIG_FB_ATY_GENERIC_LCD
 /*
index 44a5cd2..3406067 100644 (file)
@@ -1333,6 +1333,9 @@ static void fbcon_cursor(struct vc_data *vc, int mode)
 
        ops->cursor_flash = (mode == CM_ERASE) ? 0 : 1;
 
+       if (!ops->cursor)
+               return;
+
        ops->cursor(vc, info, mode, get_color(vc, info, c, 1),
                    get_color(vc, info, c, 0));
 }
index c8b0ae6..4dc9077 100644 (file)
@@ -1031,7 +1031,6 @@ static int hvfb_getmem(struct hv_device *hdev, struct fb_info *info)
                        PCI_DEVICE_ID_HYPERV_VIDEO, NULL);
                if (!pdev) {
                        pr_err("Unable to find PCI Hyper-V video\n");
-                       kfree(info->apertures);
                        return -ENODEV;
                }
 
@@ -1129,7 +1128,6 @@ getmem_done:
        } else {
                pci_dev_put(pdev);
        }
-       kfree(info->apertures);
 
        return 0;
 
@@ -1141,7 +1139,6 @@ err2:
 err1:
        if (!gen2vm)
                pci_dev_put(pdev);
-       kfree(info->apertures);
 
        return -ENOMEM;
 }
index 80c5f9c..8061e8e 100644 (file)
@@ -34,4 +34,6 @@ config FSL_HV_MANAGER
 source "drivers/virt/vboxguest/Kconfig"
 
 source "drivers/virt/nitro_enclaves/Kconfig"
+
+source "drivers/virt/acrn/Kconfig"
 endif
index f28425c..3e272ea 100644 (file)
@@ -7,3 +7,4 @@ obj-$(CONFIG_FSL_HV_MANAGER)    += fsl_hypervisor.o
 obj-y                          += vboxguest/
 
 obj-$(CONFIG_NITRO_ENCLAVES)   += nitro_enclaves/
+obj-$(CONFIG_ACRN_HSM)         += acrn/
diff --git a/drivers/virt/acrn/Kconfig b/drivers/virt/acrn/Kconfig
new file mode 100644 (file)
index 0000000..3e1a61c
--- /dev/null
@@ -0,0 +1,15 @@
+# SPDX-License-Identifier: GPL-2.0
+config ACRN_HSM
+       tristate "ACRN Hypervisor Service Module"
+       depends on ACRN_GUEST
+       select EVENTFD
+       help
+         ACRN Hypervisor Service Module (HSM) is a kernel module which
+         communicates with ACRN userspace through ioctls and talks to
+         the ACRN Hypervisor through hypercalls. HSM will only run in
+         a privileged management VM, called Service VM, to manage User
+         VMs and do I/O emulation. Not required for simply running
+         under ACRN as a User VM.
+
+         To compile as a module, choose M, the module will be called
+         acrn. If unsure, say N.
diff --git a/drivers/virt/acrn/Makefile b/drivers/virt/acrn/Makefile
new file mode 100644 (file)
index 0000000..08ce641
--- /dev/null
@@ -0,0 +1,3 @@
+# SPDX-License-Identifier: GPL-2.0
+obj-$(CONFIG_ACRN_HSM) := acrn.o
+acrn-y := hsm.o vm.o mm.o ioreq.o ioeventfd.o irqfd.o
diff --git a/drivers/virt/acrn/acrn_drv.h b/drivers/virt/acrn/acrn_drv.h
new file mode 100644 (file)
index 0000000..1be54ef
--- /dev/null
@@ -0,0 +1,227 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef __ACRN_HSM_DRV_H
+#define __ACRN_HSM_DRV_H
+
+#include <linux/acrn.h>
+#include <linux/dev_printk.h>
+#include <linux/miscdevice.h>
+#include <linux/types.h>
+
+#include "hypercall.h"
+
+extern struct miscdevice acrn_dev;
+
+#define ACRN_NAME_LEN          16
+#define ACRN_MEM_MAPPING_MAX   256
+
+#define ACRN_MEM_REGION_ADD    0
+#define ACRN_MEM_REGION_DEL    2
+
+struct acrn_vm;
+struct acrn_ioreq_client;
+
+/**
+ * struct vm_memory_region_op - Hypervisor memory operation
+ * @type:              Operation type (ACRN_MEM_REGION_*)
+ * @attr:              Memory attribute (ACRN_MEM_TYPE_* | ACRN_MEM_ACCESS_*)
+ * @user_vm_pa:                Physical address of User VM to be mapped.
+ * @service_vm_pa:     Physical address of Service VM to be mapped.
+ * @size:              Size of this region.
+ *
+ * Structure containing needed information that is provided to ACRN Hypervisor
+ * to manage the EPT mappings of a single memory region of the User VM. Several
+ * &struct vm_memory_region_op can be batched to ACRN Hypervisor, see &struct
+ * vm_memory_region_batch.
+ */
+struct vm_memory_region_op {
+       u32     type;
+       u32     attr;
+       u64     user_vm_pa;
+       u64     service_vm_pa;
+       u64     size;
+};
+
+/**
+ * struct vm_memory_region_batch - A batch of vm_memory_region_op.
+ * @vmid:              A User VM ID.
+ * @reserved:          Reserved.
+ * @regions_num:       The number of vm_memory_region_op.
+ * @regions_gpa:       Physical address of a vm_memory_region_op array.
+ *
+ * HC_VM_SET_MEMORY_REGIONS uses this structure to manage EPT mappings of
+ * multiple memory regions of a User VM. A &struct vm_memory_region_batch
+ * contains multiple &struct vm_memory_region_op for batch processing in the
+ * ACRN Hypervisor.
+ */
+struct vm_memory_region_batch {
+       u16     vmid;
+       u16     reserved[3];
+       u32     regions_num;
+       u64     regions_gpa;
+};
+
+/**
+ * struct vm_memory_mapping - Memory map between a User VM and the Service VM
+ * @pages:             Pages in Service VM kernel.
+ * @npages:            Number of pages.
+ * @service_vm_va:     Virtual address in Service VM kernel.
+ * @user_vm_pa:                Physical address in User VM.
+ * @size:              Size of this memory region.
+ *
+ * HSM maintains memory mappings between a User VM GPA and the Service VM
+ * kernel VA for accelerating the User VM GPA translation.
+ */
+struct vm_memory_mapping {
+       struct page     **pages;
+       int             npages;
+       void            *service_vm_va;
+       u64             user_vm_pa;
+       size_t          size;
+};
+
+/**
+ * struct acrn_ioreq_buffer - Data for setting the ioreq buffer of User VM
+ * @ioreq_buf: The GPA of the IO request shared buffer of a VM
+ *
+ * The parameter for the HC_SET_IOREQ_BUFFER hypercall used to set up
+ * the shared I/O request buffer between Service VM and ACRN hypervisor.
+ */
+struct acrn_ioreq_buffer {
+       u64     ioreq_buf;
+};
+
+struct acrn_ioreq_range {
+       struct list_head        list;
+       u32                     type;
+       u64                     start;
+       u64                     end;
+};
+
+#define ACRN_IOREQ_CLIENT_DESTROYING   0U
+typedef        int (*ioreq_handler_t)(struct acrn_ioreq_client *client,
+                              struct acrn_io_request *req);
+/**
+ * struct acrn_ioreq_client - Structure of I/O client.
+ * @name:      Client name
+ * @vm:                The VM that the client belongs to
+ * @list:      List node for this acrn_ioreq_client
+ * @is_default:        If this client is the default one
+ * @flags:     Flags (ACRN_IOREQ_CLIENT_*)
+ * @range_list:        I/O ranges
+ * @range_lock:        Lock to protect range_list
+ * @ioreqs_map:        The pending I/O requests bitmap.
+ * @handler:   I/O requests handler of this client
+ * @thread:    The thread which executes the handler
+ * @wq:                The wait queue for the handler thread parking
+ * @priv:      Data for the thread
+ */
+struct acrn_ioreq_client {
+       char                    name[ACRN_NAME_LEN];
+       struct acrn_vm          *vm;
+       struct list_head        list;
+       bool                    is_default;
+       unsigned long           flags;
+       struct list_head        range_list;
+       rwlock_t                range_lock;
+       DECLARE_BITMAP(ioreqs_map, ACRN_IO_REQUEST_MAX);
+       ioreq_handler_t         handler;
+       struct task_struct      *thread;
+       wait_queue_head_t       wq;
+       void                    *priv;
+};
+
+#define ACRN_INVALID_VMID (0xffffU)
+
+#define ACRN_VM_FLAG_DESTROYED         0U
+#define ACRN_VM_FLAG_CLEARING_IOREQ    1U
+extern struct list_head acrn_vm_list;
+extern rwlock_t acrn_vm_list_lock;
+/**
+ * struct acrn_vm - Properties of ACRN User VM.
+ * @list:                      Entry within global list of all VMs.
+ * @vmid:                      User VM ID.
+ * @vcpu_num:                  Number of virtual CPUs in the VM.
+ * @flags:                     Flags (ACRN_VM_FLAG_*) of the VM. This is VM
+ *                             flag management in HSM which is different
+ *                             from the &acrn_vm_creation.vm_flag.
+ * @regions_mapping_lock:      Lock to protect &acrn_vm.regions_mapping and
+ *                             &acrn_vm.regions_mapping_count.
+ * @regions_mapping:           Memory mappings of this VM.
+ * @regions_mapping_count:     Number of memory mapping of this VM.
+ * @ioreq_clients_lock:                Lock to protect ioreq_clients and default_client
+ * @ioreq_clients:             The I/O request clients list of this VM
+ * @default_client:            The default I/O request client
+ * @ioreq_buf:                 I/O request shared buffer
+ * @ioreq_page:                        The page of the I/O request shared buffer
+ * @pci_conf_addr:             Address of a PCI configuration access emulation
+ * @monitor_page:              Page of interrupt statistics of User VM
+ * @ioeventfds_lock:           Lock to protect ioeventfds list
+ * @ioeventfds:                        List to link all hsm_ioeventfd
+ * @ioeventfd_client:          I/O client for ioeventfds of the VM
+ * @irqfds_lock:               Lock to protect irqfds list
+ * @irqfds:                    List to link all hsm_irqfd
+ * @irqfd_wq:                  Workqueue for irqfd async shutdown
+ */
+struct acrn_vm {
+       struct list_head                list;
+       u16                             vmid;
+       int                             vcpu_num;
+       unsigned long                   flags;
+       struct mutex                    regions_mapping_lock;
+       struct vm_memory_mapping        regions_mapping[ACRN_MEM_MAPPING_MAX];
+       int                             regions_mapping_count;
+       spinlock_t                      ioreq_clients_lock;
+       struct list_head                ioreq_clients;
+       struct acrn_ioreq_client        *default_client;
+       struct acrn_io_request_buffer   *ioreq_buf;
+       struct page                     *ioreq_page;
+       u32                             pci_conf_addr;
+       struct page                     *monitor_page;
+       struct mutex                    ioeventfds_lock;
+       struct list_head                ioeventfds;
+       struct acrn_ioreq_client        *ioeventfd_client;
+       struct mutex                    irqfds_lock;
+       struct list_head                irqfds;
+       struct workqueue_struct         *irqfd_wq;
+};
+
+struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
+                              struct acrn_vm_creation *vm_param);
+int acrn_vm_destroy(struct acrn_vm *vm);
+int acrn_mm_region_add(struct acrn_vm *vm, u64 user_gpa, u64 service_gpa,
+                      u64 size, u32 mem_type, u32 mem_access_right);
+int acrn_mm_region_del(struct acrn_vm *vm, u64 user_gpa, u64 size);
+int acrn_vm_memseg_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
+int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
+int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap);
+void acrn_vm_all_ram_unmap(struct acrn_vm *vm);
+
+int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma);
+void acrn_ioreq_deinit(struct acrn_vm *vm);
+int acrn_ioreq_intr_setup(void);
+void acrn_ioreq_intr_remove(void);
+void acrn_ioreq_request_clear(struct acrn_vm *vm);
+int acrn_ioreq_client_wait(struct acrn_ioreq_client *client);
+int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu);
+struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
+                                                  ioreq_handler_t handler,
+                                                  void *data, bool is_default,
+                                                  const char *name);
+void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client);
+int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
+                        u32 type, u64 start, u64 end);
+void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
+                         u32 type, u64 start, u64 end);
+
+int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data);
+
+int acrn_ioeventfd_init(struct acrn_vm *vm);
+int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args);
+void acrn_ioeventfd_deinit(struct acrn_vm *vm);
+
+int acrn_irqfd_init(struct acrn_vm *vm);
+int acrn_irqfd_config(struct acrn_vm *vm, struct acrn_irqfd *args);
+void acrn_irqfd_deinit(struct acrn_vm *vm);
+
+#endif /* __ACRN_HSM_DRV_H */
diff --git a/drivers/virt/acrn/hsm.c b/drivers/virt/acrn/hsm.c
new file mode 100644 (file)
index 0000000..130e12b
--- /dev/null
@@ -0,0 +1,479 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN Hypervisor Service Module (HSM)
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *     Fengwei Yin <fengwei.yin@intel.com>
+ *     Yakui Zhao <yakui.zhao@intel.com>
+ */
+
+#include <linux/cpu.h>
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+
+#include <asm/acrn.h>
+#include <asm/hypervisor.h>
+
+#include "acrn_drv.h"
+
+/*
+ * When /dev/acrn_hsm is opened, a 'struct acrn_vm' object is created to
+ * represent a VM instance and continues to be associated with the opened file
+ * descriptor. All ioctl operations on this file descriptor will be targeted to
+ * the VM instance. Release of this file descriptor will destroy the object.
+ */
+static int acrn_dev_open(struct inode *inode, struct file *filp)
+{
+       struct acrn_vm *vm;
+
+       vm = kzalloc(sizeof(*vm), GFP_KERNEL);
+       if (!vm)
+               return -ENOMEM;
+
+       vm->vmid = ACRN_INVALID_VMID;
+       filp->private_data = vm;
+       return 0;
+}
+
+static int pmcmd_ioctl(u64 cmd, void __user *uptr)
+{
+       struct acrn_pstate_data *px_data;
+       struct acrn_cstate_data *cx_data;
+       u64 *pm_info;
+       int ret = 0;
+
+       switch (cmd & PMCMD_TYPE_MASK) {
+       case ACRN_PMCMD_GET_PX_CNT:
+       case ACRN_PMCMD_GET_CX_CNT:
+               pm_info = kmalloc(sizeof(u64), GFP_KERNEL);
+               if (!pm_info)
+                       return -ENOMEM;
+
+               ret = hcall_get_cpu_state(cmd, virt_to_phys(pm_info));
+               if (ret < 0) {
+                       kfree(pm_info);
+                       break;
+               }
+
+               if (copy_to_user(uptr, pm_info, sizeof(u64)))
+                       ret = -EFAULT;
+               kfree(pm_info);
+               break;
+       case ACRN_PMCMD_GET_PX_DATA:
+               px_data = kmalloc(sizeof(*px_data), GFP_KERNEL);
+               if (!px_data)
+                       return -ENOMEM;
+
+               ret = hcall_get_cpu_state(cmd, virt_to_phys(px_data));
+               if (ret < 0) {
+                       kfree(px_data);
+                       break;
+               }
+
+               if (copy_to_user(uptr, px_data, sizeof(*px_data)))
+                       ret = -EFAULT;
+               kfree(px_data);
+               break;
+       case ACRN_PMCMD_GET_CX_DATA:
+               cx_data = kmalloc(sizeof(*cx_data), GFP_KERNEL);
+               if (!cx_data)
+                       return -ENOMEM;
+
+               ret = hcall_get_cpu_state(cmd, virt_to_phys(cx_data));
+               if (ret < 0) {
+                       kfree(cx_data);
+                       break;
+               }
+
+               if (copy_to_user(uptr, cx_data, sizeof(*cx_data)))
+                       ret = -EFAULT;
+               kfree(cx_data);
+               break;
+       default:
+               break;
+       }
+
+       return ret;
+}
+
+/*
+ * HSM relies on hypercall layer of the ACRN hypervisor to do the
+ * sanity check against the input parameters.
+ */
+static long acrn_dev_ioctl(struct file *filp, unsigned int cmd,
+                          unsigned long ioctl_param)
+{
+       struct acrn_vm *vm = filp->private_data;
+       struct acrn_vm_creation *vm_param;
+       struct acrn_vcpu_regs *cpu_regs;
+       struct acrn_ioreq_notify notify;
+       struct acrn_ptdev_irq *irq_info;
+       struct acrn_ioeventfd ioeventfd;
+       struct acrn_vm_memmap memmap;
+       struct acrn_msi_entry *msi;
+       struct acrn_pcidev *pcidev;
+       struct acrn_irqfd irqfd;
+       struct page *page;
+       u64 cstate_cmd;
+       int i, ret = 0;
+
+       if (vm->vmid == ACRN_INVALID_VMID && cmd != ACRN_IOCTL_CREATE_VM) {
+               dev_dbg(acrn_dev.this_device,
+                       "ioctl 0x%x: Invalid VM state!\n", cmd);
+               return -EINVAL;
+       }
+
+       switch (cmd) {
+       case ACRN_IOCTL_CREATE_VM:
+               vm_param = memdup_user((void __user *)ioctl_param,
+                                      sizeof(struct acrn_vm_creation));
+               if (IS_ERR(vm_param))
+                       return PTR_ERR(vm_param);
+
+               if ((vm_param->reserved0 | vm_param->reserved1) != 0)
+                       return -EINVAL;
+
+               vm = acrn_vm_create(vm, vm_param);
+               if (!vm) {
+                       ret = -EINVAL;
+                       kfree(vm_param);
+                       break;
+               }
+
+               if (copy_to_user((void __user *)ioctl_param, vm_param,
+                                sizeof(struct acrn_vm_creation))) {
+                       acrn_vm_destroy(vm);
+                       ret = -EFAULT;
+               }
+
+               kfree(vm_param);
+               break;
+       case ACRN_IOCTL_START_VM:
+               ret = hcall_start_vm(vm->vmid);
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to start VM %u!\n", vm->vmid);
+               break;
+       case ACRN_IOCTL_PAUSE_VM:
+               ret = hcall_pause_vm(vm->vmid);
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to pause VM %u!\n", vm->vmid);
+               break;
+       case ACRN_IOCTL_RESET_VM:
+               ret = hcall_reset_vm(vm->vmid);
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to restart VM %u!\n", vm->vmid);
+               break;
+       case ACRN_IOCTL_DESTROY_VM:
+               ret = acrn_vm_destroy(vm);
+               break;
+       case ACRN_IOCTL_SET_VCPU_REGS:
+               cpu_regs = memdup_user((void __user *)ioctl_param,
+                                      sizeof(struct acrn_vcpu_regs));
+               if (IS_ERR(cpu_regs))
+                       return PTR_ERR(cpu_regs);
+
+               for (i = 0; i < ARRAY_SIZE(cpu_regs->reserved); i++)
+                       if (cpu_regs->reserved[i])
+                               return -EINVAL;
+
+               for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_32); i++)
+                       if (cpu_regs->vcpu_regs.reserved_32[i])
+                               return -EINVAL;
+
+               for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.reserved_64); i++)
+                       if (cpu_regs->vcpu_regs.reserved_64[i])
+                               return -EINVAL;
+
+               for (i = 0; i < ARRAY_SIZE(cpu_regs->vcpu_regs.gdt.reserved); i++)
+                       if (cpu_regs->vcpu_regs.gdt.reserved[i] |
+                           cpu_regs->vcpu_regs.idt.reserved[i])
+                               return -EINVAL;
+
+               ret = hcall_set_vcpu_regs(vm->vmid, virt_to_phys(cpu_regs));
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to set regs state of VM%u!\n",
+                               vm->vmid);
+               kfree(cpu_regs);
+               break;
+       case ACRN_IOCTL_SET_MEMSEG:
+               if (copy_from_user(&memmap, (void __user *)ioctl_param,
+                                  sizeof(memmap)))
+                       return -EFAULT;
+
+               ret = acrn_vm_memseg_map(vm, &memmap);
+               break;
+       case ACRN_IOCTL_UNSET_MEMSEG:
+               if (copy_from_user(&memmap, (void __user *)ioctl_param,
+                                  sizeof(memmap)))
+                       return -EFAULT;
+
+               ret = acrn_vm_memseg_unmap(vm, &memmap);
+               break;
+       case ACRN_IOCTL_ASSIGN_PCIDEV:
+               pcidev = memdup_user((void __user *)ioctl_param,
+                                    sizeof(struct acrn_pcidev));
+               if (IS_ERR(pcidev))
+                       return PTR_ERR(pcidev);
+
+               ret = hcall_assign_pcidev(vm->vmid, virt_to_phys(pcidev));
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to assign pci device!\n");
+               kfree(pcidev);
+               break;
+       case ACRN_IOCTL_DEASSIGN_PCIDEV:
+               pcidev = memdup_user((void __user *)ioctl_param,
+                                    sizeof(struct acrn_pcidev));
+               if (IS_ERR(pcidev))
+                       return PTR_ERR(pcidev);
+
+               ret = hcall_deassign_pcidev(vm->vmid, virt_to_phys(pcidev));
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to deassign pci device!\n");
+               kfree(pcidev);
+               break;
+       case ACRN_IOCTL_SET_PTDEV_INTR:
+               irq_info = memdup_user((void __user *)ioctl_param,
+                                      sizeof(struct acrn_ptdev_irq));
+               if (IS_ERR(irq_info))
+                       return PTR_ERR(irq_info);
+
+               ret = hcall_set_ptdev_intr(vm->vmid, virt_to_phys(irq_info));
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to configure intr for ptdev!\n");
+               kfree(irq_info);
+               break;
+       case ACRN_IOCTL_RESET_PTDEV_INTR:
+               irq_info = memdup_user((void __user *)ioctl_param,
+                                      sizeof(struct acrn_ptdev_irq));
+               if (IS_ERR(irq_info))
+                       return PTR_ERR(irq_info);
+
+               ret = hcall_reset_ptdev_intr(vm->vmid, virt_to_phys(irq_info));
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to reset intr for ptdev!\n");
+               kfree(irq_info);
+               break;
+       case ACRN_IOCTL_SET_IRQLINE:
+               ret = hcall_set_irqline(vm->vmid, ioctl_param);
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to set interrupt line!\n");
+               break;
+       case ACRN_IOCTL_INJECT_MSI:
+               msi = memdup_user((void __user *)ioctl_param,
+                                 sizeof(struct acrn_msi_entry));
+               if (IS_ERR(msi))
+                       return PTR_ERR(msi);
+
+               ret = hcall_inject_msi(vm->vmid, virt_to_phys(msi));
+               if (ret < 0)
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to inject MSI!\n");
+               kfree(msi);
+               break;
+       case ACRN_IOCTL_VM_INTR_MONITOR:
+               ret = pin_user_pages_fast(ioctl_param, 1,
+                                         FOLL_WRITE | FOLL_LONGTERM, &page);
+               if (unlikely(ret != 1)) {
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to pin intr hdr buffer!\n");
+                       return -EFAULT;
+               }
+
+               ret = hcall_vm_intr_monitor(vm->vmid, page_to_phys(page));
+               if (ret < 0) {
+                       unpin_user_page(page);
+                       dev_dbg(acrn_dev.this_device,
+                               "Failed to monitor intr data!\n");
+                       return ret;
+               }
+               if (vm->monitor_page)
+                       unpin_user_page(vm->monitor_page);
+               vm->monitor_page = page;
+               break;
+       case ACRN_IOCTL_CREATE_IOREQ_CLIENT:
+               if (vm->default_client)
+                       return -EEXIST;
+               if (!acrn_ioreq_client_create(vm, NULL, NULL, true, "acrndm"))
+                       ret = -EINVAL;
+               break;
+       case ACRN_IOCTL_DESTROY_IOREQ_CLIENT:
+               if (vm->default_client)
+                       acrn_ioreq_client_destroy(vm->default_client);
+               break;
+       case ACRN_IOCTL_ATTACH_IOREQ_CLIENT:
+               if (vm->default_client)
+                       ret = acrn_ioreq_client_wait(vm->default_client);
+               else
+                       ret = -ENODEV;
+               break;
+       case ACRN_IOCTL_NOTIFY_REQUEST_FINISH:
+               if (copy_from_user(&notify, (void __user *)ioctl_param,
+                                  sizeof(struct acrn_ioreq_notify)))
+                       return -EFAULT;
+
+               if (notify.reserved != 0)
+                       return -EINVAL;
+
+               ret = acrn_ioreq_request_default_complete(vm, notify.vcpu);
+               break;
+       case ACRN_IOCTL_CLEAR_VM_IOREQ:
+               acrn_ioreq_request_clear(vm);
+               break;
+       case ACRN_IOCTL_PM_GET_CPU_STATE:
+               if (copy_from_user(&cstate_cmd, (void __user *)ioctl_param,
+                                  sizeof(cstate_cmd)))
+                       return -EFAULT;
+
+               ret = pmcmd_ioctl(cstate_cmd, (void __user *)ioctl_param);
+               break;
+       case ACRN_IOCTL_IOEVENTFD:
+               if (copy_from_user(&ioeventfd, (void __user *)ioctl_param,
+                                  sizeof(ioeventfd)))
+                       return -EFAULT;
+
+               if (ioeventfd.reserved != 0)
+                       return -EINVAL;
+
+               ret = acrn_ioeventfd_config(vm, &ioeventfd);
+               break;
+       case ACRN_IOCTL_IRQFD:
+               if (copy_from_user(&irqfd, (void __user *)ioctl_param,
+                                  sizeof(irqfd)))
+                       return -EFAULT;
+               ret = acrn_irqfd_config(vm, &irqfd);
+               break;
+       default:
+               dev_dbg(acrn_dev.this_device, "Unknown IOCTL 0x%x!\n", cmd);
+               ret = -ENOTTY;
+       }
+
+       return ret;
+}
+
+static int acrn_dev_release(struct inode *inode, struct file *filp)
+{
+       struct acrn_vm *vm = filp->private_data;
+
+       acrn_vm_destroy(vm);
+       kfree(vm);
+       return 0;
+}
+
+static ssize_t remove_cpu_store(struct device *dev,
+                               struct device_attribute *attr,
+                               const char *buf, size_t count)
+{
+       u64 cpu, lapicid;
+       int ret;
+
+       if (kstrtoull(buf, 0, &cpu) < 0)
+               return -EINVAL;
+
+       if (cpu >= num_possible_cpus() || cpu == 0 || !cpu_is_hotpluggable(cpu))
+               return -EINVAL;
+
+       if (cpu_online(cpu))
+               remove_cpu(cpu);
+
+       lapicid = cpu_data(cpu).apicid;
+       dev_dbg(dev, "Try to remove cpu %lld with lapicid %lld\n", cpu, lapicid);
+       ret = hcall_sos_remove_cpu(lapicid);
+       if (ret < 0) {
+               dev_err(dev, "Failed to remove cpu %lld!\n", cpu);
+               goto fail_remove;
+       }
+
+       return count;
+
+fail_remove:
+       add_cpu(cpu);
+       return ret;
+}
+static DEVICE_ATTR_WO(remove_cpu);
+
+static umode_t acrn_attr_visible(struct kobject *kobj, struct attribute *a, int n)
+{
+       if (a == &dev_attr_remove_cpu.attr)
+               return IS_ENABLED(CONFIG_HOTPLUG_CPU) ? a->mode : 0;
+
+       return a->mode;
+}
+
+static struct attribute *acrn_attrs[] = {
+       &dev_attr_remove_cpu.attr,
+       NULL
+};
+
+static struct attribute_group acrn_attr_group = {
+       .attrs = acrn_attrs,
+       .is_visible = acrn_attr_visible,
+};
+
+static const struct attribute_group *acrn_attr_groups[] = {
+       &acrn_attr_group,
+       NULL
+};
+
+static const struct file_operations acrn_fops = {
+       .owner          = THIS_MODULE,
+       .open           = acrn_dev_open,
+       .release        = acrn_dev_release,
+       .unlocked_ioctl = acrn_dev_ioctl,
+};
+
+struct miscdevice acrn_dev = {
+       .minor  = MISC_DYNAMIC_MINOR,
+       .name   = "acrn_hsm",
+       .fops   = &acrn_fops,
+       .groups = acrn_attr_groups,
+};
+
+static int __init hsm_init(void)
+{
+       int ret;
+
+       if (x86_hyper_type != X86_HYPER_ACRN)
+               return -ENODEV;
+
+       if (!(cpuid_eax(ACRN_CPUID_FEATURES) & ACRN_FEATURE_PRIVILEGED_VM))
+               return -EPERM;
+
+       ret = misc_register(&acrn_dev);
+       if (ret) {
+               pr_err("Create misc dev failed!\n");
+               return ret;
+       }
+
+       ret = acrn_ioreq_intr_setup();
+       if (ret) {
+               pr_err("Setup I/O request handler failed!\n");
+               misc_deregister(&acrn_dev);
+               return ret;
+       }
+       return 0;
+}
+
+static void __exit hsm_exit(void)
+{
+       acrn_ioreq_intr_remove();
+       misc_deregister(&acrn_dev);
+}
+module_init(hsm_init);
+module_exit(hsm_exit);
+
+MODULE_AUTHOR("Intel Corporation");
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("ACRN Hypervisor Service Module (HSM)");
diff --git a/drivers/virt/acrn/hypercall.h b/drivers/virt/acrn/hypercall.h
new file mode 100644 (file)
index 0000000..0cfad05
--- /dev/null
@@ -0,0 +1,254 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * ACRN HSM: hypercalls of ACRN Hypervisor
+ */
+#ifndef __ACRN_HSM_HYPERCALL_H
+#define __ACRN_HSM_HYPERCALL_H
+#include <asm/acrn.h>
+
+/*
+ * Hypercall IDs of the ACRN Hypervisor
+ */
+#define _HC_ID(x, y) (((x) << 24) | (y))
+
+#define HC_ID 0x80UL
+
+#define HC_ID_GEN_BASE                 0x0UL
+#define HC_SOS_REMOVE_CPU              _HC_ID(HC_ID, HC_ID_GEN_BASE + 0x01)
+
+#define HC_ID_VM_BASE                  0x10UL
+#define HC_CREATE_VM                   _HC_ID(HC_ID, HC_ID_VM_BASE + 0x00)
+#define HC_DESTROY_VM                  _HC_ID(HC_ID, HC_ID_VM_BASE + 0x01)
+#define HC_START_VM                    _HC_ID(HC_ID, HC_ID_VM_BASE + 0x02)
+#define HC_PAUSE_VM                    _HC_ID(HC_ID, HC_ID_VM_BASE + 0x03)
+#define HC_RESET_VM                    _HC_ID(HC_ID, HC_ID_VM_BASE + 0x05)
+#define HC_SET_VCPU_REGS               _HC_ID(HC_ID, HC_ID_VM_BASE + 0x06)
+
+#define HC_ID_IRQ_BASE                 0x20UL
+#define HC_INJECT_MSI                  _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x03)
+#define HC_VM_INTR_MONITOR             _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x04)
+#define HC_SET_IRQLINE                 _HC_ID(HC_ID, HC_ID_IRQ_BASE + 0x05)
+
+#define HC_ID_IOREQ_BASE               0x30UL
+#define HC_SET_IOREQ_BUFFER            _HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x00)
+#define HC_NOTIFY_REQUEST_FINISH       _HC_ID(HC_ID, HC_ID_IOREQ_BASE + 0x01)
+
+#define HC_ID_MEM_BASE                 0x40UL
+#define HC_VM_SET_MEMORY_REGIONS       _HC_ID(HC_ID, HC_ID_MEM_BASE + 0x02)
+
+#define HC_ID_PCI_BASE                 0x50UL
+#define HC_SET_PTDEV_INTR              _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x03)
+#define HC_RESET_PTDEV_INTR            _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x04)
+#define HC_ASSIGN_PCIDEV               _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x05)
+#define HC_DEASSIGN_PCIDEV             _HC_ID(HC_ID, HC_ID_PCI_BASE + 0x06)
+
+#define HC_ID_PM_BASE                  0x80UL
+#define HC_PM_GET_CPU_STATE            _HC_ID(HC_ID, HC_ID_PM_BASE + 0x00)
+
+/**
+ * hcall_sos_remove_cpu() - Remove a vCPU of Service VM
+ * @cpu: The vCPU to be removed
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_sos_remove_cpu(u64 cpu)
+{
+       return acrn_hypercall1(HC_SOS_REMOVE_CPU, cpu);
+}
+
+/**
+ * hcall_create_vm() - Create a User VM
+ * @vminfo:    Service VM GPA of info of User VM creation
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_create_vm(u64 vminfo)
+{
+       return acrn_hypercall1(HC_CREATE_VM, vminfo);
+}
+
+/**
+ * hcall_start_vm() - Start a User VM
+ * @vmid:      User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_start_vm(u64 vmid)
+{
+       return acrn_hypercall1(HC_START_VM, vmid);
+}
+
+/**
+ * hcall_pause_vm() - Pause a User VM
+ * @vmid:      User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_pause_vm(u64 vmid)
+{
+       return acrn_hypercall1(HC_PAUSE_VM, vmid);
+}
+
+/**
+ * hcall_destroy_vm() - Destroy a User VM
+ * @vmid:      User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_destroy_vm(u64 vmid)
+{
+       return acrn_hypercall1(HC_DESTROY_VM, vmid);
+}
+
+/**
+ * hcall_reset_vm() - Reset a User VM
+ * @vmid:      User VM ID
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_reset_vm(u64 vmid)
+{
+       return acrn_hypercall1(HC_RESET_VM, vmid);
+}
+
+/**
+ * hcall_set_vcpu_regs() - Set up registers of virtual CPU of a User VM
+ * @vmid:      User VM ID
+ * @regs_state:        Service VM GPA of registers state
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_vcpu_regs(u64 vmid, u64 regs_state)
+{
+       return acrn_hypercall2(HC_SET_VCPU_REGS, vmid, regs_state);
+}
+
+/**
+ * hcall_inject_msi() - Deliver a MSI interrupt to a User VM
+ * @vmid:      User VM ID
+ * @msi:       Service VM GPA of MSI message
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_inject_msi(u64 vmid, u64 msi)
+{
+       return acrn_hypercall2(HC_INJECT_MSI, vmid, msi);
+}
+
+/**
+ * hcall_vm_intr_monitor() - Set a shared page for User VM interrupt statistics
+ * @vmid:      User VM ID
+ * @addr:      Service VM GPA of the shared page
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_vm_intr_monitor(u64 vmid, u64 addr)
+{
+       return acrn_hypercall2(HC_VM_INTR_MONITOR, vmid, addr);
+}
+
+/**
+ * hcall_set_irqline() - Set or clear an interrupt line
+ * @vmid:      User VM ID
+ * @op:                Service VM GPA of interrupt line operations
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_irqline(u64 vmid, u64 op)
+{
+       return acrn_hypercall2(HC_SET_IRQLINE, vmid, op);
+}
+
+/**
+ * hcall_set_ioreq_buffer() - Set up the shared buffer for I/O Requests.
+ * @vmid:      User VM ID
+ * @buffer:    Service VM GPA of the shared buffer
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_ioreq_buffer(u64 vmid, u64 buffer)
+{
+       return acrn_hypercall2(HC_SET_IOREQ_BUFFER, vmid, buffer);
+}
+
+/**
+ * hcall_notify_req_finish() - Notify ACRN Hypervisor of I/O request completion.
+ * @vmid:      User VM ID
+ * @vcpu:      The vCPU which initiated the I/O request
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_notify_req_finish(u64 vmid, u64 vcpu)
+{
+       return acrn_hypercall2(HC_NOTIFY_REQUEST_FINISH, vmid, vcpu);
+}
+
+/**
+ * hcall_set_memory_regions() - Inform the hypervisor to set up EPT mappings
+ * @regions_pa:        Service VM GPA of &struct vm_memory_region_batch
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_memory_regions(u64 regions_pa)
+{
+       return acrn_hypercall1(HC_VM_SET_MEMORY_REGIONS, regions_pa);
+}
+
+/**
+ * hcall_assign_pcidev() - Assign a PCI device to a User VM
+ * @vmid:      User VM ID
+ * @addr:      Service VM GPA of the &struct acrn_pcidev
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_assign_pcidev(u64 vmid, u64 addr)
+{
+       return acrn_hypercall2(HC_ASSIGN_PCIDEV, vmid, addr);
+}
+
+/**
+ * hcall_deassign_pcidev() - De-assign a PCI device from a User VM
+ * @vmid:      User VM ID
+ * @addr:      Service VM GPA of the &struct acrn_pcidev
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_deassign_pcidev(u64 vmid, u64 addr)
+{
+       return acrn_hypercall2(HC_DEASSIGN_PCIDEV, vmid, addr);
+}
+
+/**
+ * hcall_set_ptdev_intr() - Configure an interrupt for an assigned PCI device.
+ * @vmid:      User VM ID
+ * @irq:       Service VM GPA of the &struct acrn_ptdev_irq
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_set_ptdev_intr(u64 vmid, u64 irq)
+{
+       return acrn_hypercall2(HC_SET_PTDEV_INTR, vmid, irq);
+}
+
+/**
+ * hcall_reset_ptdev_intr() - Reset an interrupt for an assigned PCI device.
+ * @vmid:      User VM ID
+ * @irq:       Service VM GPA of the &struct acrn_ptdev_irq
+ *
+ * Return: 0 on success, <0 on failure
+ */
+static inline long hcall_reset_ptdev_intr(u64 vmid, u64 irq)
+{
+       return acrn_hypercall2(HC_RESET_PTDEV_INTR, vmid, irq);
+}
+
+/*
+ * hcall_get_cpu_state() - Get P-states and C-states info from the hypervisor
+ * @state:     Service VM GPA of buffer of P-states and C-states
+ */
+static inline long hcall_get_cpu_state(u64 cmd, u64 state)
+{
+       return acrn_hypercall2(HC_PM_GET_CPU_STATE, cmd, state);
+}
+
+#endif /* __ACRN_HSM_HYPERCALL_H */
diff --git a/drivers/virt/acrn/ioeventfd.c b/drivers/virt/acrn/ioeventfd.c
new file mode 100644 (file)
index 0000000..ac4037e
--- /dev/null
@@ -0,0 +1,273 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN HSM eventfd - use eventfd objects to signal expected I/O requests
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *     Shuo Liu <shuo.a.liu@intel.com>
+ *     Yakui Zhao <yakui.zhao@intel.com>
+ */
+
+#include <linux/eventfd.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+/**
+ * struct hsm_ioeventfd - Properties of HSM ioeventfd
+ * @list:      Entry within &acrn_vm.ioeventfds of ioeventfds of a VM
+ * @eventfd:   Eventfd of the HSM ioeventfd
+ * @addr:      Address of I/O range
+ * @data:      Data for matching
+ * @length:    Length of I/O range
+ * @type:      Type of I/O range (ACRN_IOREQ_TYPE_MMIO/ACRN_IOREQ_TYPE_PORTIO)
+ * @wildcard:  Data matching or not
+ */
+struct hsm_ioeventfd {
+       struct list_head        list;
+       struct eventfd_ctx      *eventfd;
+       u64                     addr;
+       u64                     data;
+       int                     length;
+       int                     type;
+       bool                    wildcard;
+};
+
+static inline int ioreq_type_from_flags(int flags)
+{
+       return flags & ACRN_IOEVENTFD_FLAG_PIO ?
+                      ACRN_IOREQ_TYPE_PORTIO : ACRN_IOREQ_TYPE_MMIO;
+}
+
+static void acrn_ioeventfd_shutdown(struct acrn_vm *vm, struct hsm_ioeventfd *p)
+{
+       lockdep_assert_held(&vm->ioeventfds_lock);
+
+       eventfd_ctx_put(p->eventfd);
+       list_del(&p->list);
+       kfree(p);
+}
+
+static bool hsm_ioeventfd_is_conflict(struct acrn_vm *vm,
+                                     struct hsm_ioeventfd *ioeventfd)
+{
+       struct hsm_ioeventfd *p;
+
+       lockdep_assert_held(&vm->ioeventfds_lock);
+
+       /* Either one is wildcard, the data matching will be skipped. */
+       list_for_each_entry(p, &vm->ioeventfds, list)
+               if (p->eventfd == ioeventfd->eventfd &&
+                   p->addr == ioeventfd->addr &&
+                   p->type == ioeventfd->type &&
+                   (p->wildcard || ioeventfd->wildcard ||
+                       p->data == ioeventfd->data))
+                       return true;
+
+       return false;
+}
+
+/*
+ * Assign an eventfd to a VM and create a HSM ioeventfd associated with the
+ * eventfd. The properties of the HSM ioeventfd are built from a &struct
+ * acrn_ioeventfd.
+ */
+static int acrn_ioeventfd_assign(struct acrn_vm *vm,
+                                struct acrn_ioeventfd *args)
+{
+       struct eventfd_ctx *eventfd;
+       struct hsm_ioeventfd *p;
+       int ret;
+
+       /* Check for range overflow */
+       if (args->addr + args->len < args->addr)
+               return -EINVAL;
+
+       /*
+        * Currently, acrn_ioeventfd is used to support vhost. 1,2,4,8 width
+        * accesses can cover vhost's requirements.
+        */
+       if (!(args->len == 1 || args->len == 2 ||
+             args->len == 4 || args->len == 8))
+               return -EINVAL;
+
+       eventfd = eventfd_ctx_fdget(args->fd);
+       if (IS_ERR(eventfd))
+               return PTR_ERR(eventfd);
+
+       p = kzalloc(sizeof(*p), GFP_KERNEL);
+       if (!p) {
+               ret = -ENOMEM;
+               goto fail;
+       }
+
+       INIT_LIST_HEAD(&p->list);
+       p->addr = args->addr;
+       p->length = args->len;
+       p->eventfd = eventfd;
+       p->type = ioreq_type_from_flags(args->flags);
+
+       /*
+        * ACRN_IOEVENTFD_FLAG_DATAMATCH flag is set in virtio 1.0 support, the
+        * writing of notification register of each virtqueue may trigger the
+        * notification. There is no data matching requirement.
+        */
+       if (args->flags & ACRN_IOEVENTFD_FLAG_DATAMATCH)
+               p->data = args->data;
+       else
+               p->wildcard = true;
+
+       mutex_lock(&vm->ioeventfds_lock);
+
+       if (hsm_ioeventfd_is_conflict(vm, p)) {
+               ret = -EEXIST;
+               goto unlock_fail;
+       }
+
+       /* register the I/O range into ioreq client */
+       ret = acrn_ioreq_range_add(vm->ioeventfd_client, p->type,
+                                  p->addr, p->addr + p->length - 1);
+       if (ret < 0)
+               goto unlock_fail;
+
+       list_add_tail(&p->list, &vm->ioeventfds);
+       mutex_unlock(&vm->ioeventfds_lock);
+
+       return 0;
+
+unlock_fail:
+       mutex_unlock(&vm->ioeventfds_lock);
+       kfree(p);
+fail:
+       eventfd_ctx_put(eventfd);
+       return ret;
+}
+
+static int acrn_ioeventfd_deassign(struct acrn_vm *vm,
+                                  struct acrn_ioeventfd *args)
+{
+       struct hsm_ioeventfd *p;
+       struct eventfd_ctx *eventfd;
+
+       eventfd = eventfd_ctx_fdget(args->fd);
+       if (IS_ERR(eventfd))
+               return PTR_ERR(eventfd);
+
+       mutex_lock(&vm->ioeventfds_lock);
+       list_for_each_entry(p, &vm->ioeventfds, list) {
+               if (p->eventfd != eventfd)
+                       continue;
+
+               acrn_ioreq_range_del(vm->ioeventfd_client, p->type,
+                                    p->addr, p->addr + p->length - 1);
+               acrn_ioeventfd_shutdown(vm, p);
+               break;
+       }
+       mutex_unlock(&vm->ioeventfds_lock);
+
+       eventfd_ctx_put(eventfd);
+       return 0;
+}
+
+static struct hsm_ioeventfd *hsm_ioeventfd_match(struct acrn_vm *vm, u64 addr,
+                                                u64 data, int len, int type)
+{
+       struct hsm_ioeventfd *p = NULL;
+
+       lockdep_assert_held(&vm->ioeventfds_lock);
+
+       list_for_each_entry(p, &vm->ioeventfds, list) {
+               if (p->type == type && p->addr == addr && p->length >= len &&
+                   (p->wildcard || p->data == data))
+                       return p;
+       }
+
+       return NULL;
+}
+
+static int acrn_ioeventfd_handler(struct acrn_ioreq_client *client,
+                                 struct acrn_io_request *req)
+{
+       struct hsm_ioeventfd *p;
+       u64 addr, val;
+       int size;
+
+       if (req->type == ACRN_IOREQ_TYPE_MMIO) {
+               /*
+                * I/O requests are dispatched by range check only, so a
+                * acrn_ioreq_client need process both READ and WRITE accesses
+                * of same range. READ accesses are safe to be ignored here
+                * because virtio PCI devices write the notify registers for
+                * notification.
+                */
+               if (req->reqs.mmio_request.direction == ACRN_IOREQ_DIR_READ) {
+                       /* reading does nothing and return 0 */
+                       req->reqs.mmio_request.value = 0;
+                       return 0;
+               }
+               addr = req->reqs.mmio_request.address;
+               size = req->reqs.mmio_request.size;
+               val = req->reqs.mmio_request.value;
+       } else {
+               if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ) {
+                       /* reading does nothing and return 0 */
+                       req->reqs.pio_request.value = 0;
+                       return 0;
+               }
+               addr = req->reqs.pio_request.address;
+               size = req->reqs.pio_request.size;
+               val = req->reqs.pio_request.value;
+       }
+
+       mutex_lock(&client->vm->ioeventfds_lock);
+       p = hsm_ioeventfd_match(client->vm, addr, val, size, req->type);
+       if (p)
+               eventfd_signal(p->eventfd, 1);
+       mutex_unlock(&client->vm->ioeventfds_lock);
+
+       return 0;
+}
+
+int acrn_ioeventfd_config(struct acrn_vm *vm, struct acrn_ioeventfd *args)
+{
+       int ret;
+
+       if (args->flags & ACRN_IOEVENTFD_FLAG_DEASSIGN)
+               ret = acrn_ioeventfd_deassign(vm, args);
+       else
+               ret = acrn_ioeventfd_assign(vm, args);
+
+       return ret;
+}
+
+int acrn_ioeventfd_init(struct acrn_vm *vm)
+{
+       char name[ACRN_NAME_LEN];
+
+       mutex_init(&vm->ioeventfds_lock);
+       INIT_LIST_HEAD(&vm->ioeventfds);
+       snprintf(name, sizeof(name), "ioeventfd-%u", vm->vmid);
+       vm->ioeventfd_client = acrn_ioreq_client_create(vm,
+                                                       acrn_ioeventfd_handler,
+                                                       NULL, false, name);
+       if (!vm->ioeventfd_client) {
+               dev_err(acrn_dev.this_device, "Failed to create ioeventfd ioreq client!\n");
+               return -EINVAL;
+       }
+
+       dev_dbg(acrn_dev.this_device, "VM %u ioeventfd init.\n", vm->vmid);
+       return 0;
+}
+
+void acrn_ioeventfd_deinit(struct acrn_vm *vm)
+{
+       struct hsm_ioeventfd *p, *next;
+
+       dev_dbg(acrn_dev.this_device, "VM %u ioeventfd deinit.\n", vm->vmid);
+       acrn_ioreq_client_destroy(vm->ioeventfd_client);
+       mutex_lock(&vm->ioeventfds_lock);
+       list_for_each_entry_safe(p, next, &vm->ioeventfds, list)
+               acrn_ioeventfd_shutdown(vm, p);
+       mutex_unlock(&vm->ioeventfds_lock);
+}
diff --git a/drivers/virt/acrn/ioreq.c b/drivers/virt/acrn/ioreq.c
new file mode 100644 (file)
index 0000000..80b2e3f
--- /dev/null
@@ -0,0 +1,657 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN_HSM: Handle I/O requests
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *     Jason Chen CJ <jason.cj.chen@intel.com>
+ *     Fengwei Yin <fengwei.yin@intel.com>
+ */
+
+#include <linux/interrupt.h>
+#include <linux/io.h>
+#include <linux/kthread.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include <asm/acrn.h>
+
+#include "acrn_drv.h"
+
+static void ioreq_pause(void);
+static void ioreq_resume(void);
+
+static void ioreq_dispatcher(struct work_struct *work);
+static struct workqueue_struct *ioreq_wq;
+static DECLARE_WORK(ioreq_work, ioreq_dispatcher);
+
+static inline bool has_pending_request(struct acrn_ioreq_client *client)
+{
+       return !bitmap_empty(client->ioreqs_map, ACRN_IO_REQUEST_MAX);
+}
+
+static inline bool is_destroying(struct acrn_ioreq_client *client)
+{
+       return test_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
+}
+
+static int ioreq_complete_request(struct acrn_vm *vm, u16 vcpu,
+                                 struct acrn_io_request *acrn_req)
+{
+       bool polling_mode;
+       int ret = 0;
+
+       polling_mode = acrn_req->completion_polling;
+       /* Add barrier() to make sure the writes are done before completion */
+       smp_store_release(&acrn_req->processed, ACRN_IOREQ_STATE_COMPLETE);
+
+       /*
+        * To fulfill the requirement of real-time in several industry
+        * scenarios, like automotive, ACRN can run under the partition mode,
+        * in which User VMs and Service VM are bound to dedicated CPU cores.
+        * Polling mode of handling the I/O request is introduced to achieve a
+        * faster I/O request handling. In polling mode, the hypervisor polls
+        * I/O request's completion. Once an I/O request is marked as
+        * ACRN_IOREQ_STATE_COMPLETE, hypervisor resumes from the polling point
+        * to continue the I/O request flow. Thus, the completion notification
+        * from HSM of I/O request is not needed.  Please note,
+        * completion_polling needs to be read before the I/O request being
+        * marked as ACRN_IOREQ_STATE_COMPLETE to avoid racing with the
+        * hypervisor.
+        */
+       if (!polling_mode) {
+               ret = hcall_notify_req_finish(vm->vmid, vcpu);
+               if (ret < 0)
+                       dev_err(acrn_dev.this_device,
+                               "Notify I/O request finished failed!\n");
+       }
+
+       return ret;
+}
+
+static int acrn_ioreq_complete_request(struct acrn_ioreq_client *client,
+                                      u16 vcpu,
+                                      struct acrn_io_request *acrn_req)
+{
+       int ret;
+
+       if (vcpu >= client->vm->vcpu_num)
+               return -EINVAL;
+
+       clear_bit(vcpu, client->ioreqs_map);
+       if (!acrn_req) {
+               acrn_req = (struct acrn_io_request *)client->vm->ioreq_buf;
+               acrn_req += vcpu;
+       }
+
+       ret = ioreq_complete_request(client->vm, vcpu, acrn_req);
+
+       return ret;
+}
+
+int acrn_ioreq_request_default_complete(struct acrn_vm *vm, u16 vcpu)
+{
+       int ret = 0;
+
+       spin_lock_bh(&vm->ioreq_clients_lock);
+       if (vm->default_client)
+               ret = acrn_ioreq_complete_request(vm->default_client,
+                                                 vcpu, NULL);
+       spin_unlock_bh(&vm->ioreq_clients_lock);
+
+       return ret;
+}
+
+/**
+ * acrn_ioreq_range_add() - Add an iorange monitored by an ioreq client
+ * @client:    The ioreq client
+ * @type:      Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
+ * @start:     Start address of iorange
+ * @end:       End address of iorange
+ *
+ * Return: 0 on success, <0 on error
+ */
+int acrn_ioreq_range_add(struct acrn_ioreq_client *client,
+                        u32 type, u64 start, u64 end)
+{
+       struct acrn_ioreq_range *range;
+
+       if (end < start) {
+               dev_err(acrn_dev.this_device,
+                       "Invalid IO range [0x%llx,0x%llx]\n", start, end);
+               return -EINVAL;
+       }
+
+       range = kzalloc(sizeof(*range), GFP_KERNEL);
+       if (!range)
+               return -ENOMEM;
+
+       range->type = type;
+       range->start = start;
+       range->end = end;
+
+       write_lock_bh(&client->range_lock);
+       list_add(&range->list, &client->range_list);
+       write_unlock_bh(&client->range_lock);
+
+       return 0;
+}
+
+/**
+ * acrn_ioreq_range_del() - Del an iorange monitored by an ioreq client
+ * @client:    The ioreq client
+ * @type:      Type (ACRN_IOREQ_TYPE_MMIO or ACRN_IOREQ_TYPE_PORTIO)
+ * @start:     Start address of iorange
+ * @end:       End address of iorange
+ */
+void acrn_ioreq_range_del(struct acrn_ioreq_client *client,
+                         u32 type, u64 start, u64 end)
+{
+       struct acrn_ioreq_range *range;
+
+       write_lock_bh(&client->range_lock);
+       list_for_each_entry(range, &client->range_list, list) {
+               if (type == range->type &&
+                   start == range->start &&
+                   end == range->end) {
+                       list_del(&range->list);
+                       kfree(range);
+                       break;
+               }
+       }
+       write_unlock_bh(&client->range_lock);
+}
+
+/*
+ * ioreq_task() is the execution entity of handler thread of an I/O client.
+ * The handler callback of the I/O client is called within the handler thread.
+ */
+static int ioreq_task(void *data)
+{
+       struct acrn_ioreq_client *client = data;
+       struct acrn_io_request *req;
+       unsigned long *ioreqs_map;
+       int vcpu, ret;
+
+       /*
+        * Lockless access to ioreqs_map is safe, because
+        * 1) set_bit() and clear_bit() are atomic operations.
+        * 2) I/O requests arrives serialized. The access flow of ioreqs_map is:
+        *      set_bit() - in ioreq_work handler
+        *      Handler callback handles corresponding I/O request
+        *      clear_bit() - in handler thread (include ACRN userspace)
+        *      Mark corresponding I/O request completed
+        *      Loop again if a new I/O request occurs
+        */
+       ioreqs_map = client->ioreqs_map;
+       while (!kthread_should_stop()) {
+               acrn_ioreq_client_wait(client);
+               while (has_pending_request(client)) {
+                       vcpu = find_first_bit(ioreqs_map, client->vm->vcpu_num);
+                       req = client->vm->ioreq_buf->req_slot + vcpu;
+                       ret = client->handler(client, req);
+                       if (ret < 0) {
+                               dev_err(acrn_dev.this_device,
+                                       "IO handle failure: %d\n", ret);
+                               break;
+                       }
+                       acrn_ioreq_complete_request(client, vcpu, req);
+               }
+       }
+
+       return 0;
+}
+
+/*
+ * For the non-default I/O clients, give them chance to complete the current
+ * I/O requests if there are any. For the default I/O client, it is safe to
+ * clear all pending I/O requests because the clearing request is from ACRN
+ * userspace.
+ */
+void acrn_ioreq_request_clear(struct acrn_vm *vm)
+{
+       struct acrn_ioreq_client *client;
+       bool has_pending = false;
+       unsigned long vcpu;
+       int retry = 10;
+
+       /*
+        * IO requests of this VM will be completed directly in
+        * acrn_ioreq_dispatch if ACRN_VM_FLAG_CLEARING_IOREQ flag is set.
+        */
+       set_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
+
+       /*
+        * acrn_ioreq_request_clear is only called in VM reset case. Simply
+        * wait 100ms in total for the IO requests' completion.
+        */
+       do {
+               spin_lock_bh(&vm->ioreq_clients_lock);
+               list_for_each_entry(client, &vm->ioreq_clients, list) {
+                       has_pending = has_pending_request(client);
+                       if (has_pending)
+                               break;
+               }
+               spin_unlock_bh(&vm->ioreq_clients_lock);
+
+               if (has_pending)
+                       schedule_timeout_interruptible(HZ / 100);
+       } while (has_pending && --retry > 0);
+       if (retry == 0)
+               dev_warn(acrn_dev.this_device,
+                        "%s cannot flush pending request!\n", client->name);
+
+       /* Clear all ioreqs belonging to the default client */
+       spin_lock_bh(&vm->ioreq_clients_lock);
+       client = vm->default_client;
+       if (client) {
+               vcpu = find_next_bit(client->ioreqs_map,
+                                    ACRN_IO_REQUEST_MAX, 0);
+               while (vcpu < ACRN_IO_REQUEST_MAX) {
+                       acrn_ioreq_complete_request(client, vcpu, NULL);
+                       vcpu = find_next_bit(client->ioreqs_map,
+                                            ACRN_IO_REQUEST_MAX, vcpu + 1);
+               }
+       }
+       spin_unlock_bh(&vm->ioreq_clients_lock);
+
+       /* Clear ACRN_VM_FLAG_CLEARING_IOREQ flag after the clearing */
+       clear_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags);
+}
+
+int acrn_ioreq_client_wait(struct acrn_ioreq_client *client)
+{
+       if (client->is_default) {
+               /*
+                * In the default client, a user space thread waits on the
+                * waitqueue. The is_destroying() check is used to notify user
+                * space the client is going to be destroyed.
+                */
+               wait_event_interruptible(client->wq,
+                                        has_pending_request(client) ||
+                                        is_destroying(client));
+               if (is_destroying(client))
+                       return -ENODEV;
+       } else {
+               wait_event_interruptible(client->wq,
+                                        has_pending_request(client) ||
+                                        kthread_should_stop());
+       }
+
+       return 0;
+}
+
+static bool is_cfg_addr(struct acrn_io_request *req)
+{
+       return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
+               (req->reqs.pio_request.address == 0xcf8));
+}
+
+static bool is_cfg_data(struct acrn_io_request *req)
+{
+       return ((req->type == ACRN_IOREQ_TYPE_PORTIO) &&
+               ((req->reqs.pio_request.address >= 0xcfc) &&
+                (req->reqs.pio_request.address < (0xcfc + 4))));
+}
+
+/* The low 8-bit of supported pci_reg addr.*/
+#define PCI_LOWREG_MASK  0xFC
+/* The high 4-bit of supported pci_reg addr */
+#define PCI_HIGHREG_MASK 0xF00
+/* Max number of supported functions */
+#define PCI_FUNCMAX    7
+/* Max number of supported slots */
+#define PCI_SLOTMAX    31
+/* Max number of supported buses */
+#define PCI_BUSMAX     255
+#define CONF1_ENABLE   0x80000000UL
+/*
+ * A PCI configuration space access via PIO 0xCF8 and 0xCFC normally has two
+ * following steps:
+ *   1) writes address into 0xCF8 port
+ *   2) accesses data in/from 0xCFC
+ * This function combines such paired PCI configuration space I/O requests into
+ * one ACRN_IOREQ_TYPE_PCICFG type I/O request and continues the processing.
+ */
+static bool handle_cf8cfc(struct acrn_vm *vm,
+                         struct acrn_io_request *req, u16 vcpu)
+{
+       int offset, pci_cfg_addr, pci_reg;
+       bool is_handled = false;
+
+       if (is_cfg_addr(req)) {
+               WARN_ON(req->reqs.pio_request.size != 4);
+               if (req->reqs.pio_request.direction == ACRN_IOREQ_DIR_WRITE)
+                       vm->pci_conf_addr = req->reqs.pio_request.value;
+               else
+                       req->reqs.pio_request.value = vm->pci_conf_addr;
+               is_handled = true;
+       } else if (is_cfg_data(req)) {
+               if (!(vm->pci_conf_addr & CONF1_ENABLE)) {
+                       if (req->reqs.pio_request.direction ==
+                                       ACRN_IOREQ_DIR_READ)
+                               req->reqs.pio_request.value = 0xffffffff;
+                       is_handled = true;
+               } else {
+                       offset = req->reqs.pio_request.address - 0xcfc;
+
+                       req->type = ACRN_IOREQ_TYPE_PCICFG;
+                       pci_cfg_addr = vm->pci_conf_addr;
+                       req->reqs.pci_request.bus =
+                                       (pci_cfg_addr >> 16) & PCI_BUSMAX;
+                       req->reqs.pci_request.dev =
+                                       (pci_cfg_addr >> 11) & PCI_SLOTMAX;
+                       req->reqs.pci_request.func =
+                                       (pci_cfg_addr >> 8) & PCI_FUNCMAX;
+                       pci_reg = (pci_cfg_addr & PCI_LOWREG_MASK) +
+                                  ((pci_cfg_addr >> 16) & PCI_HIGHREG_MASK);
+                       req->reqs.pci_request.reg = pci_reg + offset;
+               }
+       }
+
+       if (is_handled)
+               ioreq_complete_request(vm, vcpu, req);
+
+       return is_handled;
+}
+
+static bool in_range(struct acrn_ioreq_range *range,
+                    struct acrn_io_request *req)
+{
+       bool ret = false;
+
+       if (range->type == req->type) {
+               switch (req->type) {
+               case ACRN_IOREQ_TYPE_MMIO:
+                       if (req->reqs.mmio_request.address >= range->start &&
+                           (req->reqs.mmio_request.address +
+                            req->reqs.mmio_request.size - 1) <= range->end)
+                               ret = true;
+                       break;
+               case ACRN_IOREQ_TYPE_PORTIO:
+                       if (req->reqs.pio_request.address >= range->start &&
+                           (req->reqs.pio_request.address +
+                            req->reqs.pio_request.size - 1) <= range->end)
+                               ret = true;
+                       break;
+               default:
+                       break;
+               }
+       }
+
+       return ret;
+}
+
+static struct acrn_ioreq_client *find_ioreq_client(struct acrn_vm *vm,
+                                                  struct acrn_io_request *req)
+{
+       struct acrn_ioreq_client *client, *found = NULL;
+       struct acrn_ioreq_range *range;
+
+       lockdep_assert_held(&vm->ioreq_clients_lock);
+
+       list_for_each_entry(client, &vm->ioreq_clients, list) {
+               read_lock_bh(&client->range_lock);
+               list_for_each_entry(range, &client->range_list, list) {
+                       if (in_range(range, req)) {
+                               found = client;
+                               break;
+                       }
+               }
+               read_unlock_bh(&client->range_lock);
+               if (found)
+                       break;
+       }
+       return found ? found : vm->default_client;
+}
+
+/**
+ * acrn_ioreq_client_create() - Create an ioreq client
+ * @vm:                The VM that this client belongs to
+ * @handler:   The ioreq_handler of ioreq client acrn_hsm will create a kernel
+ *             thread and call the handler to handle I/O requests.
+ * @priv:      Private data for the handler
+ * @is_default:        If it is the default client
+ * @name:      The name of ioreq client
+ *
+ * Return: acrn_ioreq_client pointer on success, NULL on error
+ */
+struct acrn_ioreq_client *acrn_ioreq_client_create(struct acrn_vm *vm,
+                                                  ioreq_handler_t handler,
+                                                  void *priv, bool is_default,
+                                                  const char *name)
+{
+       struct acrn_ioreq_client *client;
+
+       if (!handler && !is_default) {
+               dev_dbg(acrn_dev.this_device,
+                       "Cannot create non-default client w/o handler!\n");
+               return NULL;
+       }
+       client = kzalloc(sizeof(*client), GFP_KERNEL);
+       if (!client)
+               return NULL;
+
+       client->handler = handler;
+       client->vm = vm;
+       client->priv = priv;
+       client->is_default = is_default;
+       if (name)
+               strncpy(client->name, name, sizeof(client->name) - 1);
+       rwlock_init(&client->range_lock);
+       INIT_LIST_HEAD(&client->range_list);
+       init_waitqueue_head(&client->wq);
+
+       if (client->handler) {
+               client->thread = kthread_run(ioreq_task, client, "VM%u-%s",
+                                            client->vm->vmid, client->name);
+               if (IS_ERR(client->thread)) {
+                       kfree(client);
+                       return NULL;
+               }
+       }
+
+       spin_lock_bh(&vm->ioreq_clients_lock);
+       if (is_default)
+               vm->default_client = client;
+       else
+               list_add(&client->list, &vm->ioreq_clients);
+       spin_unlock_bh(&vm->ioreq_clients_lock);
+
+       dev_dbg(acrn_dev.this_device, "Created ioreq client %s.\n", name);
+       return client;
+}
+
+/**
+ * acrn_ioreq_client_destroy() - Destroy an ioreq client
+ * @client:    The ioreq client
+ */
+void acrn_ioreq_client_destroy(struct acrn_ioreq_client *client)
+{
+       struct acrn_ioreq_range *range, *next;
+       struct acrn_vm *vm = client->vm;
+
+       dev_dbg(acrn_dev.this_device,
+               "Destroy ioreq client %s.\n", client->name);
+       ioreq_pause();
+       set_bit(ACRN_IOREQ_CLIENT_DESTROYING, &client->flags);
+       if (client->is_default)
+               wake_up_interruptible(&client->wq);
+       else
+               kthread_stop(client->thread);
+
+       spin_lock_bh(&vm->ioreq_clients_lock);
+       if (client->is_default)
+               vm->default_client = NULL;
+       else
+               list_del(&client->list);
+       spin_unlock_bh(&vm->ioreq_clients_lock);
+
+       write_lock_bh(&client->range_lock);
+       list_for_each_entry_safe(range, next, &client->range_list, list) {
+               list_del(&range->list);
+               kfree(range);
+       }
+       write_unlock_bh(&client->range_lock);
+       kfree(client);
+
+       ioreq_resume();
+}
+
+static int acrn_ioreq_dispatch(struct acrn_vm *vm)
+{
+       struct acrn_ioreq_client *client;
+       struct acrn_io_request *req;
+       int i;
+
+       for (i = 0; i < vm->vcpu_num; i++) {
+               req = vm->ioreq_buf->req_slot + i;
+
+               /* barrier the read of processed of acrn_io_request */
+               if (smp_load_acquire(&req->processed) ==
+                                    ACRN_IOREQ_STATE_PENDING) {
+                       /* Complete the IO request directly in clearing stage */
+                       if (test_bit(ACRN_VM_FLAG_CLEARING_IOREQ, &vm->flags)) {
+                               ioreq_complete_request(vm, i, req);
+                               continue;
+                       }
+                       if (handle_cf8cfc(vm, req, i))
+                               continue;
+
+                       spin_lock_bh(&vm->ioreq_clients_lock);
+                       client = find_ioreq_client(vm, req);
+                       if (!client) {
+                               dev_err(acrn_dev.this_device,
+                                       "Failed to find ioreq client!\n");
+                               spin_unlock_bh(&vm->ioreq_clients_lock);
+                               return -EINVAL;
+                       }
+                       if (!client->is_default)
+                               req->kernel_handled = 1;
+                       else
+                               req->kernel_handled = 0;
+                       /*
+                        * Add barrier() to make sure the writes are done
+                        * before setting ACRN_IOREQ_STATE_PROCESSING
+                        */
+                       smp_store_release(&req->processed,
+                                         ACRN_IOREQ_STATE_PROCESSING);
+                       set_bit(i, client->ioreqs_map);
+                       wake_up_interruptible(&client->wq);
+                       spin_unlock_bh(&vm->ioreq_clients_lock);
+               }
+       }
+
+       return 0;
+}
+
+static void ioreq_dispatcher(struct work_struct *work)
+{
+       struct acrn_vm *vm;
+
+       read_lock(&acrn_vm_list_lock);
+       list_for_each_entry(vm, &acrn_vm_list, list) {
+               if (!vm->ioreq_buf)
+                       break;
+               acrn_ioreq_dispatch(vm);
+       }
+       read_unlock(&acrn_vm_list_lock);
+}
+
+static void ioreq_intr_handler(void)
+{
+       queue_work(ioreq_wq, &ioreq_work);
+}
+
+static void ioreq_pause(void)
+{
+       /* Flush and unarm the handler to ensure no I/O requests pending */
+       acrn_remove_intr_handler();
+       drain_workqueue(ioreq_wq);
+}
+
+static void ioreq_resume(void)
+{
+       /* Schedule after enabling in case other clients miss interrupt */
+       acrn_setup_intr_handler(ioreq_intr_handler);
+       queue_work(ioreq_wq, &ioreq_work);
+}
+
+int acrn_ioreq_intr_setup(void)
+{
+       acrn_setup_intr_handler(ioreq_intr_handler);
+       ioreq_wq = alloc_workqueue("ioreq_wq",
+                                  WQ_HIGHPRI | WQ_MEM_RECLAIM | WQ_UNBOUND, 1);
+       if (!ioreq_wq) {
+               dev_err(acrn_dev.this_device, "Failed to alloc workqueue!\n");
+               acrn_remove_intr_handler();
+               return -ENOMEM;
+       }
+       return 0;
+}
+
+void acrn_ioreq_intr_remove(void)
+{
+       if (ioreq_wq)
+               destroy_workqueue(ioreq_wq);
+       acrn_remove_intr_handler();
+}
+
+int acrn_ioreq_init(struct acrn_vm *vm, u64 buf_vma)
+{
+       struct acrn_ioreq_buffer *set_buffer;
+       struct page *page;
+       int ret;
+
+       if (vm->ioreq_buf)
+               return -EEXIST;
+
+       set_buffer = kzalloc(sizeof(*set_buffer), GFP_KERNEL);
+       if (!set_buffer)
+               return -ENOMEM;
+
+       ret = pin_user_pages_fast(buf_vma, 1,
+                                 FOLL_WRITE | FOLL_LONGTERM, &page);
+       if (unlikely(ret != 1) || !page) {
+               dev_err(acrn_dev.this_device, "Failed to pin ioreq page!\n");
+               ret = -EFAULT;
+               goto free_buf;
+       }
+
+       vm->ioreq_buf = page_address(page);
+       vm->ioreq_page = page;
+       set_buffer->ioreq_buf = page_to_phys(page);
+       ret = hcall_set_ioreq_buffer(vm->vmid, virt_to_phys(set_buffer));
+       if (ret < 0) {
+               dev_err(acrn_dev.this_device, "Failed to init ioreq buffer!\n");
+               unpin_user_page(page);
+               vm->ioreq_buf = NULL;
+               goto free_buf;
+       }
+
+       dev_dbg(acrn_dev.this_device,
+               "Init ioreq buffer %pK!\n", vm->ioreq_buf);
+       ret = 0;
+free_buf:
+       kfree(set_buffer);
+       return ret;
+}
+
+void acrn_ioreq_deinit(struct acrn_vm *vm)
+{
+       struct acrn_ioreq_client *client, *next;
+
+       dev_dbg(acrn_dev.this_device,
+               "Deinit ioreq buffer %pK!\n", vm->ioreq_buf);
+       /* Destroy all clients belonging to this VM */
+       list_for_each_entry_safe(client, next, &vm->ioreq_clients, list)
+               acrn_ioreq_client_destroy(client);
+       if (vm->default_client)
+               acrn_ioreq_client_destroy(vm->default_client);
+
+       if (vm->ioreq_buf && vm->ioreq_page) {
+               unpin_user_page(vm->ioreq_page);
+               vm->ioreq_buf = NULL;
+       }
+}
diff --git a/drivers/virt/acrn/irqfd.c b/drivers/virt/acrn/irqfd.c
new file mode 100644 (file)
index 0000000..df51849
--- /dev/null
@@ -0,0 +1,235 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN HSM irqfd: use eventfd objects to inject virtual interrupts
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *     Shuo Liu <shuo.a.liu@intel.com>
+ *     Yakui Zhao <yakui.zhao@intel.com>
+ */
+
+#include <linux/eventfd.h>
+#include <linux/file.h>
+#include <linux/poll.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+static LIST_HEAD(acrn_irqfd_clients);
+static DEFINE_MUTEX(acrn_irqfds_mutex);
+
+/**
+ * struct hsm_irqfd - Properties of HSM irqfd
+ * @vm:                Associated VM pointer
+ * @wait:      Entry of wait-queue
+ * @shutdown:  Async shutdown work
+ * @eventfd:   Associated eventfd
+ * @list:      Entry within &acrn_vm.irqfds of irqfds of a VM
+ * @pt:                Structure for select/poll on the associated eventfd
+ * @msi:       MSI data
+ */
+struct hsm_irqfd {
+       struct acrn_vm          *vm;
+       wait_queue_entry_t      wait;
+       struct work_struct      shutdown;
+       struct eventfd_ctx      *eventfd;
+       struct list_head        list;
+       poll_table              pt;
+       struct acrn_msi_entry   msi;
+};
+
+static void acrn_irqfd_inject(struct hsm_irqfd *irqfd)
+{
+       struct acrn_vm *vm = irqfd->vm;
+
+       acrn_msi_inject(vm, irqfd->msi.msi_addr,
+                       irqfd->msi.msi_data);
+}
+
+static void hsm_irqfd_shutdown(struct hsm_irqfd *irqfd)
+{
+       u64 cnt;
+
+       lockdep_assert_held(&irqfd->vm->irqfds_lock);
+
+       /* remove from wait queue */
+       list_del_init(&irqfd->list);
+       eventfd_ctx_remove_wait_queue(irqfd->eventfd, &irqfd->wait, &cnt);
+       eventfd_ctx_put(irqfd->eventfd);
+       kfree(irqfd);
+}
+
+static void hsm_irqfd_shutdown_work(struct work_struct *work)
+{
+       struct hsm_irqfd *irqfd;
+       struct acrn_vm *vm;
+
+       irqfd = container_of(work, struct hsm_irqfd, shutdown);
+       vm = irqfd->vm;
+       mutex_lock(&vm->irqfds_lock);
+       if (!list_empty(&irqfd->list))
+               hsm_irqfd_shutdown(irqfd);
+       mutex_unlock(&vm->irqfds_lock);
+}
+
+/* Called with wqh->lock held and interrupts disabled */
+static int hsm_irqfd_wakeup(wait_queue_entry_t *wait, unsigned int mode,
+                           int sync, void *key)
+{
+       unsigned long poll_bits = (unsigned long)key;
+       struct hsm_irqfd *irqfd;
+       struct acrn_vm *vm;
+
+       irqfd = container_of(wait, struct hsm_irqfd, wait);
+       vm = irqfd->vm;
+       if (poll_bits & POLLIN)
+               /* An event has been signaled, inject an interrupt */
+               acrn_irqfd_inject(irqfd);
+
+       if (poll_bits & POLLHUP)
+               /* Do shutdown work in thread to hold wqh->lock */
+               queue_work(vm->irqfd_wq, &irqfd->shutdown);
+
+       return 0;
+}
+
+static void hsm_irqfd_poll_func(struct file *file, wait_queue_head_t *wqh,
+                               poll_table *pt)
+{
+       struct hsm_irqfd *irqfd;
+
+       irqfd = container_of(pt, struct hsm_irqfd, pt);
+       add_wait_queue(wqh, &irqfd->wait);
+}
+
+/*
+ * Assign an eventfd to a VM and create a HSM irqfd associated with the
+ * eventfd. The properties of the HSM irqfd are built from a &struct
+ * acrn_irqfd.
+ */
+static int acrn_irqfd_assign(struct acrn_vm *vm, struct acrn_irqfd *args)
+{
+       struct eventfd_ctx *eventfd = NULL;
+       struct hsm_irqfd *irqfd, *tmp;
+       __poll_t events;
+       struct fd f;
+       int ret = 0;
+
+       irqfd = kzalloc(sizeof(*irqfd), GFP_KERNEL);
+       if (!irqfd)
+               return -ENOMEM;
+
+       irqfd->vm = vm;
+       memcpy(&irqfd->msi, &args->msi, sizeof(args->msi));
+       INIT_LIST_HEAD(&irqfd->list);
+       INIT_WORK(&irqfd->shutdown, hsm_irqfd_shutdown_work);
+
+       f = fdget(args->fd);
+       if (!f.file) {
+               ret = -EBADF;
+               goto out;
+       }
+
+       eventfd = eventfd_ctx_fileget(f.file);
+       if (IS_ERR(eventfd)) {
+               ret = PTR_ERR(eventfd);
+               goto fail;
+       }
+
+       irqfd->eventfd = eventfd;
+
+       /*
+        * Install custom wake-up handling to be notified whenever underlying
+        * eventfd is signaled.
+        */
+       init_waitqueue_func_entry(&irqfd->wait, hsm_irqfd_wakeup);
+       init_poll_funcptr(&irqfd->pt, hsm_irqfd_poll_func);
+
+       mutex_lock(&vm->irqfds_lock);
+       list_for_each_entry(tmp, &vm->irqfds, list) {
+               if (irqfd->eventfd != tmp->eventfd)
+                       continue;
+               ret = -EBUSY;
+               mutex_unlock(&vm->irqfds_lock);
+               goto fail;
+       }
+       list_add_tail(&irqfd->list, &vm->irqfds);
+       mutex_unlock(&vm->irqfds_lock);
+
+       /* Check the pending event in this stage */
+       events = vfs_poll(f.file, &irqfd->pt);
+
+       if (events & EPOLLIN)
+               acrn_irqfd_inject(irqfd);
+
+       fdput(f);
+       return 0;
+fail:
+       if (eventfd && !IS_ERR(eventfd))
+               eventfd_ctx_put(eventfd);
+
+       fdput(f);
+out:
+       kfree(irqfd);
+       return ret;
+}
+
+static int acrn_irqfd_deassign(struct acrn_vm *vm,
+                              struct acrn_irqfd *args)
+{
+       struct hsm_irqfd *irqfd, *tmp;
+       struct eventfd_ctx *eventfd;
+
+       eventfd = eventfd_ctx_fdget(args->fd);
+       if (IS_ERR(eventfd))
+               return PTR_ERR(eventfd);
+
+       mutex_lock(&vm->irqfds_lock);
+       list_for_each_entry_safe(irqfd, tmp, &vm->irqfds, list) {
+               if (irqfd->eventfd == eventfd) {
+                       hsm_irqfd_shutdown(irqfd);
+                       break;
+               }
+       }
+       mutex_unlock(&vm->irqfds_lock);
+       eventfd_ctx_put(eventfd);
+
+       return 0;
+}
+
+int acrn_irqfd_config(struct acrn_vm *vm, struct acrn_irqfd *args)
+{
+       int ret;
+
+       if (args->flags & ACRN_IRQFD_FLAG_DEASSIGN)
+               ret = acrn_irqfd_deassign(vm, args);
+       else
+               ret = acrn_irqfd_assign(vm, args);
+
+       return ret;
+}
+
+int acrn_irqfd_init(struct acrn_vm *vm)
+{
+       INIT_LIST_HEAD(&vm->irqfds);
+       mutex_init(&vm->irqfds_lock);
+       vm->irqfd_wq = alloc_workqueue("acrn_irqfd-%u", 0, 0, vm->vmid);
+       if (!vm->irqfd_wq)
+               return -ENOMEM;
+
+       dev_dbg(acrn_dev.this_device, "VM %u irqfd init.\n", vm->vmid);
+       return 0;
+}
+
+void acrn_irqfd_deinit(struct acrn_vm *vm)
+{
+       struct hsm_irqfd *irqfd, *next;
+
+       dev_dbg(acrn_dev.this_device, "VM %u irqfd deinit.\n", vm->vmid);
+       destroy_workqueue(vm->irqfd_wq);
+       mutex_lock(&vm->irqfds_lock);
+       list_for_each_entry_safe(irqfd, next, &vm->irqfds, list)
+               hsm_irqfd_shutdown(irqfd);
+       mutex_unlock(&vm->irqfds_lock);
+}
diff --git a/drivers/virt/acrn/mm.c b/drivers/virt/acrn/mm.c
new file mode 100644 (file)
index 0000000..c4f2e15
--- /dev/null
@@ -0,0 +1,306 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN: Memory mapping management
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *     Fei Li <lei1.li@intel.com>
+ *     Shuo Liu <shuo.a.liu@intel.com>
+ */
+
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+static int modify_region(struct acrn_vm *vm, struct vm_memory_region_op *region)
+{
+       struct vm_memory_region_batch *regions;
+       int ret;
+
+       regions = kzalloc(sizeof(*regions), GFP_KERNEL);
+       if (!regions)
+               return -ENOMEM;
+
+       regions->vmid = vm->vmid;
+       regions->regions_num = 1;
+       regions->regions_gpa = virt_to_phys(region);
+
+       ret = hcall_set_memory_regions(virt_to_phys(regions));
+       if (ret < 0)
+               dev_dbg(acrn_dev.this_device,
+                       "Failed to set memory region for VM[%u]!\n", vm->vmid);
+
+       kfree(regions);
+       return ret;
+}
+
+/**
+ * acrn_mm_region_add() - Set up the EPT mapping of a memory region.
+ * @vm:                        User VM.
+ * @user_gpa:          A GPA of User VM.
+ * @service_gpa:       A GPA of Service VM.
+ * @size:              Size of the region.
+ * @mem_type:          Combination of ACRN_MEM_TYPE_*.
+ * @mem_access_right:  Combination of ACRN_MEM_ACCESS_*.
+ *
+ * Return: 0 on success, <0 on error.
+ */
+int acrn_mm_region_add(struct acrn_vm *vm, u64 user_gpa, u64 service_gpa,
+                      u64 size, u32 mem_type, u32 mem_access_right)
+{
+       struct vm_memory_region_op *region;
+       int ret = 0;
+
+       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+
+       region->type = ACRN_MEM_REGION_ADD;
+       region->user_vm_pa = user_gpa;
+       region->service_vm_pa = service_gpa;
+       region->size = size;
+       region->attr = ((mem_type & ACRN_MEM_TYPE_MASK) |
+                       (mem_access_right & ACRN_MEM_ACCESS_RIGHT_MASK));
+       ret = modify_region(vm, region);
+
+       dev_dbg(acrn_dev.this_device,
+               "%s: user-GPA[%pK] service-GPA[%pK] size[0x%llx].\n",
+               __func__, (void *)user_gpa, (void *)service_gpa, size);
+       kfree(region);
+       return ret;
+}
+
+/**
+ * acrn_mm_region_del() - Del the EPT mapping of a memory region.
+ * @vm:                User VM.
+ * @user_gpa:  A GPA of the User VM.
+ * @size:      Size of the region.
+ *
+ * Return: 0 on success, <0 for error.
+ */
+int acrn_mm_region_del(struct acrn_vm *vm, u64 user_gpa, u64 size)
+{
+       struct vm_memory_region_op *region;
+       int ret = 0;
+
+       region = kzalloc(sizeof(*region), GFP_KERNEL);
+       if (!region)
+               return -ENOMEM;
+
+       region->type = ACRN_MEM_REGION_DEL;
+       region->user_vm_pa = user_gpa;
+       region->service_vm_pa = 0UL;
+       region->size = size;
+       region->attr = 0U;
+
+       ret = modify_region(vm, region);
+
+       dev_dbg(acrn_dev.this_device, "%s: user-GPA[%pK] size[0x%llx].\n",
+               __func__, (void *)user_gpa, size);
+       kfree(region);
+       return ret;
+}
+
+int acrn_vm_memseg_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+{
+       int ret;
+
+       if (memmap->type == ACRN_MEMMAP_RAM)
+               return acrn_vm_ram_map(vm, memmap);
+
+       if (memmap->type != ACRN_MEMMAP_MMIO) {
+               dev_dbg(acrn_dev.this_device,
+                       "Invalid memmap type: %u\n", memmap->type);
+               return -EINVAL;
+       }
+
+       ret = acrn_mm_region_add(vm, memmap->user_vm_pa,
+                                memmap->service_vm_pa, memmap->len,
+                                ACRN_MEM_TYPE_UC, memmap->attr);
+       if (ret < 0)
+               dev_dbg(acrn_dev.this_device,
+                       "Add memory region failed, VM[%u]!\n", vm->vmid);
+
+       return ret;
+}
+
+int acrn_vm_memseg_unmap(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+{
+       int ret;
+
+       if (memmap->type != ACRN_MEMMAP_MMIO) {
+               dev_dbg(acrn_dev.this_device,
+                       "Invalid memmap type: %u\n", memmap->type);
+               return -EINVAL;
+       }
+
+       ret = acrn_mm_region_del(vm, memmap->user_vm_pa, memmap->len);
+       if (ret < 0)
+               dev_dbg(acrn_dev.this_device,
+                       "Del memory region failed, VM[%u]!\n", vm->vmid);
+
+       return ret;
+}
+
+/**
+ * acrn_vm_ram_map() - Create a RAM EPT mapping of User VM.
+ * @vm:                The User VM pointer
+ * @memmap:    Info of the EPT mapping
+ *
+ * Return: 0 on success, <0 for error.
+ */
+int acrn_vm_ram_map(struct acrn_vm *vm, struct acrn_vm_memmap *memmap)
+{
+       struct vm_memory_region_batch *regions_info;
+       int nr_pages, i = 0, order, nr_regions = 0;
+       struct vm_memory_mapping *region_mapping;
+       struct vm_memory_region_op *vm_region;
+       struct page **pages = NULL, *page;
+       void *remap_vaddr;
+       int ret, pinned;
+       u64 user_vm_pa;
+
+       if (!vm || !memmap)
+               return -EINVAL;
+
+       /* Get the page number of the map region */
+       nr_pages = memmap->len >> PAGE_SHIFT;
+       pages = vzalloc(nr_pages * sizeof(struct page *));
+       if (!pages)
+               return -ENOMEM;
+
+       /* Lock the pages of user memory map region */
+       pinned = pin_user_pages_fast(memmap->vma_base,
+                                    nr_pages, FOLL_WRITE | FOLL_LONGTERM,
+                                    pages);
+       if (pinned < 0) {
+               ret = pinned;
+               goto free_pages;
+       } else if (pinned != nr_pages) {
+               ret = -EFAULT;
+               goto put_pages;
+       }
+
+       /* Create a kernel map for the map region */
+       remap_vaddr = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+       if (!remap_vaddr) {
+               ret = -ENOMEM;
+               goto put_pages;
+       }
+
+       /* Record Service VM va <-> User VM pa mapping */
+       mutex_lock(&vm->regions_mapping_lock);
+       region_mapping = &vm->regions_mapping[vm->regions_mapping_count];
+       if (vm->regions_mapping_count < ACRN_MEM_MAPPING_MAX) {
+               region_mapping->pages = pages;
+               region_mapping->npages = nr_pages;
+               region_mapping->size = memmap->len;
+               region_mapping->service_vm_va = remap_vaddr;
+               region_mapping->user_vm_pa = memmap->user_vm_pa;
+               vm->regions_mapping_count++;
+       } else {
+               dev_warn(acrn_dev.this_device,
+                       "Run out of memory mapping slots!\n");
+               ret = -ENOMEM;
+               mutex_unlock(&vm->regions_mapping_lock);
+               goto unmap_no_count;
+       }
+       mutex_unlock(&vm->regions_mapping_lock);
+
+       /* Calculate count of vm_memory_region_op */
+       while (i < nr_pages) {
+               page = pages[i];
+               VM_BUG_ON_PAGE(PageTail(page), page);
+               order = compound_order(page);
+               nr_regions++;
+               i += 1 << order;
+       }
+
+       /* Prepare the vm_memory_region_batch */
+       regions_info = kzalloc(sizeof(*regions_info) +
+                              sizeof(*vm_region) * nr_regions,
+                              GFP_KERNEL);
+       if (!regions_info) {
+               ret = -ENOMEM;
+               goto unmap_kernel_map;
+       }
+
+       /* Fill each vm_memory_region_op */
+       vm_region = (struct vm_memory_region_op *)(regions_info + 1);
+       regions_info->vmid = vm->vmid;
+       regions_info->regions_num = nr_regions;
+       regions_info->regions_gpa = virt_to_phys(vm_region);
+       user_vm_pa = memmap->user_vm_pa;
+       i = 0;
+       while (i < nr_pages) {
+               u32 region_size;
+
+               page = pages[i];
+               VM_BUG_ON_PAGE(PageTail(page), page);
+               order = compound_order(page);
+               region_size = PAGE_SIZE << order;
+               vm_region->type = ACRN_MEM_REGION_ADD;
+               vm_region->user_vm_pa = user_vm_pa;
+               vm_region->service_vm_pa = page_to_phys(page);
+               vm_region->size = region_size;
+               vm_region->attr = (ACRN_MEM_TYPE_WB & ACRN_MEM_TYPE_MASK) |
+                                 (memmap->attr & ACRN_MEM_ACCESS_RIGHT_MASK);
+
+               vm_region++;
+               user_vm_pa += region_size;
+               i += 1 << order;
+       }
+
+       /* Inform the ACRN Hypervisor to set up EPT mappings */
+       ret = hcall_set_memory_regions(virt_to_phys(regions_info));
+       if (ret < 0) {
+               dev_dbg(acrn_dev.this_device,
+                       "Failed to set regions, VM[%u]!\n", vm->vmid);
+               goto unset_region;
+       }
+       kfree(regions_info);
+
+       dev_dbg(acrn_dev.this_device,
+               "%s: VM[%u] service-GVA[%pK] user-GPA[%pK] size[0x%llx]\n",
+               __func__, vm->vmid,
+               remap_vaddr, (void *)memmap->user_vm_pa, memmap->len);
+       return ret;
+
+unset_region:
+       kfree(regions_info);
+unmap_kernel_map:
+       mutex_lock(&vm->regions_mapping_lock);
+       vm->regions_mapping_count--;
+       mutex_unlock(&vm->regions_mapping_lock);
+unmap_no_count:
+       vunmap(remap_vaddr);
+put_pages:
+       for (i = 0; i < pinned; i++)
+               unpin_user_page(pages[i]);
+free_pages:
+       vfree(pages);
+       return ret;
+}
+
+/**
+ * acrn_vm_all_ram_unmap() - Destroy a RAM EPT mapping of User VM.
+ * @vm:        The User VM
+ */
+void acrn_vm_all_ram_unmap(struct acrn_vm *vm)
+{
+       struct vm_memory_mapping *region_mapping;
+       int i, j;
+
+       mutex_lock(&vm->regions_mapping_lock);
+       for (i = 0; i < vm->regions_mapping_count; i++) {
+               region_mapping = &vm->regions_mapping[i];
+               vunmap(region_mapping->service_vm_va);
+               for (j = 0; j < region_mapping->npages; j++)
+                       unpin_user_page(region_mapping->pages[j]);
+               vfree(region_mapping->pages);
+       }
+       mutex_unlock(&vm->regions_mapping_lock);
+}
diff --git a/drivers/virt/acrn/vm.c b/drivers/virt/acrn/vm.c
new file mode 100644 (file)
index 0000000..7804a24
--- /dev/null
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * ACRN_HSM: Virtual Machine management
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ *
+ * Authors:
+ *     Jason Chen CJ <jason.cj.chen@intel.com>
+ *     Yakui Zhao <yakui.zhao@intel.com>
+ */
+#include <linux/io.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+
+#include "acrn_drv.h"
+
+/* List of VMs */
+LIST_HEAD(acrn_vm_list);
+/*
+ * acrn_vm_list is read in a worker thread which dispatch I/O requests and
+ * is wrote in VM creation ioctl. Use the rwlock mechanism to protect it.
+ */
+DEFINE_RWLOCK(acrn_vm_list_lock);
+
+struct acrn_vm *acrn_vm_create(struct acrn_vm *vm,
+                              struct acrn_vm_creation *vm_param)
+{
+       int ret;
+
+       ret = hcall_create_vm(virt_to_phys(vm_param));
+       if (ret < 0 || vm_param->vmid == ACRN_INVALID_VMID) {
+               dev_err(acrn_dev.this_device,
+                       "Failed to create VM! Error: %d\n", ret);
+               return NULL;
+       }
+
+       mutex_init(&vm->regions_mapping_lock);
+       INIT_LIST_HEAD(&vm->ioreq_clients);
+       spin_lock_init(&vm->ioreq_clients_lock);
+       vm->vmid = vm_param->vmid;
+       vm->vcpu_num = vm_param->vcpu_num;
+
+       if (acrn_ioreq_init(vm, vm_param->ioreq_buf) < 0) {
+               hcall_destroy_vm(vm_param->vmid);
+               vm->vmid = ACRN_INVALID_VMID;
+               return NULL;
+       }
+
+       write_lock_bh(&acrn_vm_list_lock);
+       list_add(&vm->list, &acrn_vm_list);
+       write_unlock_bh(&acrn_vm_list_lock);
+
+       acrn_ioeventfd_init(vm);
+       acrn_irqfd_init(vm);
+       dev_dbg(acrn_dev.this_device, "VM %u created.\n", vm->vmid);
+       return vm;
+}
+
+int acrn_vm_destroy(struct acrn_vm *vm)
+{
+       int ret;
+
+       if (vm->vmid == ACRN_INVALID_VMID ||
+           test_and_set_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags))
+               return 0;
+
+       /* Remove from global VM list */
+       write_lock_bh(&acrn_vm_list_lock);
+       list_del_init(&vm->list);
+       write_unlock_bh(&acrn_vm_list_lock);
+
+       acrn_ioeventfd_deinit(vm);
+       acrn_irqfd_deinit(vm);
+       acrn_ioreq_deinit(vm);
+
+       if (vm->monitor_page) {
+               put_page(vm->monitor_page);
+               vm->monitor_page = NULL;
+       }
+
+       ret = hcall_destroy_vm(vm->vmid);
+       if (ret < 0) {
+               dev_err(acrn_dev.this_device,
+                       "Failed to destroy VM %u\n", vm->vmid);
+               clear_bit(ACRN_VM_FLAG_DESTROYED, &vm->flags);
+               return ret;
+       }
+
+       acrn_vm_all_ram_unmap(vm);
+
+       dev_dbg(acrn_dev.this_device, "VM %u destroyed.\n", vm->vmid);
+       vm->vmid = ACRN_INVALID_VMID;
+       return 0;
+}
+
+/**
+ * acrn_inject_msi() - Inject a MSI interrupt into a User VM
+ * @vm:                User VM
+ * @msi_addr:  The MSI address
+ * @msi_data:  The MSI data
+ *
+ * Return: 0 on success, <0 on error
+ */
+int acrn_msi_inject(struct acrn_vm *vm, u64 msi_addr, u64 msi_data)
+{
+       struct acrn_msi_entry *msi;
+       int ret;
+
+       /* might be used in interrupt context, so use GFP_ATOMIC */
+       msi = kzalloc(sizeof(*msi), GFP_ATOMIC);
+       if (!msi)
+               return -ENOMEM;
+
+       /*
+        * msi_addr: addr[19:12] with dest vcpu id
+        * msi_data: data[7:0] with vector
+        */
+       msi->msi_addr = msi_addr;
+       msi->msi_data = msi_data;
+       ret = hcall_inject_msi(vm->vmid, virt_to_phys(msi));
+       if (ret < 0)
+               dev_err(acrn_dev.this_device,
+                       "Failed to inject MSI to VM %u!\n", vm->vmid);
+       kfree(msi);
+       return ret;
+}
index ea05af4..8d195e3 100644 (file)
@@ -468,7 +468,7 @@ static int hgcm_cancel_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call)
  *               Cancellation fun.
  */
 static int vbg_hgcm_do_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call,
-                           u32 timeout_ms, bool *leak_it)
+                           u32 timeout_ms, bool interruptible, bool *leak_it)
 {
        int rc, cancel_rc, ret;
        long timeout;
@@ -495,10 +495,15 @@ static int vbg_hgcm_do_call(struct vbg_dev *gdev, struct vmmdev_hgcm_call *call,
        else
                timeout = msecs_to_jiffies(timeout_ms);
 
-       timeout = wait_event_interruptible_timeout(
-                                       gdev->hgcm_wq,
-                                       hgcm_req_done(gdev, &call->header),
-                                       timeout);
+       if (interruptible) {
+               timeout = wait_event_interruptible_timeout(gdev->hgcm_wq,
+                                                          hgcm_req_done(gdev, &call->header),
+                                                          timeout);
+       } else {
+               timeout = wait_event_timeout(gdev->hgcm_wq,
+                                            hgcm_req_done(gdev, &call->header),
+                                            timeout);
+       }
 
        /* timeout > 0 means hgcm_req_done has returned true, so success */
        if (timeout > 0)
@@ -631,7 +636,8 @@ int vbg_hgcm_call(struct vbg_dev *gdev, u32 requestor, u32 client_id,
        hgcm_call_init_call(call, client_id, function, parms, parm_count,
                            bounce_bufs);
 
-       ret = vbg_hgcm_do_call(gdev, call, timeout_ms, &leak_it);
+       ret = vbg_hgcm_do_call(gdev, call, timeout_ms,
+                              requestor & VMMDEV_REQUESTOR_USERMODE, &leak_it);
        if (ret == 0) {
                *vbox_status = call->header.result;
                ret = hgcm_call_copy_back_result(call, parms, parm_count,
index 7b41130..ce1b3f6 100644 (file)
@@ -12,6 +12,14 @@ config ARCH_HAS_RESTRICTED_VIRTIO_MEMORY_ACCESS
          This option is selected if the architecture may need to enforce
          VIRTIO_F_ACCESS_PLATFORM
 
+config VIRTIO_PCI_LIB
+       tristate
+       help
+         Modern PCI device implementation. This module implements the
+         basic probe and control for devices which are based on modern
+         PCI device with possible vendor specific extensions. Any
+         module that selects this module must depend on PCI.
+
 menuconfig VIRTIO_MENU
        bool "Virtio drivers"
        default y
@@ -21,6 +29,7 @@ if VIRTIO_MENU
 config VIRTIO_PCI
        tristate "PCI driver for virtio devices"
        depends on PCI
+       select VIRTIO_PCI_LIB
        select VIRTIO
        help
          This driver provides support for virtio based paravirtual device
index 591e6f7..699bbea 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 obj-$(CONFIG_VIRTIO) += virtio.o virtio_ring.o
+obj-$(CONFIG_VIRTIO_PCI_LIB) += virtio_pci_modern_dev.o
 obj-$(CONFIG_VIRTIO_MMIO) += virtio_mmio.o
 obj-$(CONFIG_VIRTIO_PCI) += virtio_pci.o
 virtio_pci-y := virtio_pci_modern.o virtio_pci_common.o
index 42e09cc..4b15c00 100644 (file)
@@ -141,15 +141,14 @@ void virtio_config_changed(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(virtio_config_changed);
 
-void virtio_config_disable(struct virtio_device *dev)
+static void virtio_config_disable(struct virtio_device *dev)
 {
        spin_lock_irq(&dev->config_lock);
        dev->config_enabled = false;
        spin_unlock_irq(&dev->config_lock);
 }
-EXPORT_SYMBOL_GPL(virtio_config_disable);
 
-void virtio_config_enable(struct virtio_device *dev)
+static void virtio_config_enable(struct virtio_device *dev)
 {
        spin_lock_irq(&dev->config_lock);
        dev->config_enabled = true;
@@ -158,7 +157,6 @@ void virtio_config_enable(struct virtio_device *dev)
        dev->config_change_pending = false;
        spin_unlock_irq(&dev->config_lock);
 }
-EXPORT_SYMBOL_GPL(virtio_config_enable);
 
 void virtio_add_status(struct virtio_device *dev, unsigned int status)
 {
index f1f6208..ce51ae1 100644 (file)
@@ -7,6 +7,7 @@
 
 #include <uapi/linux/virtio_ids.h>
 #include <uapi/linux/virtio_input.h>
+#include <linux/input/mt.h>
 
 struct virtio_input {
        struct virtio_device       *vdev;
@@ -64,6 +65,21 @@ static int virtinput_send_status(struct virtio_input *vi,
        unsigned long flags;
        int rc;
 
+       /*
+        * Since 29cc309d8bf1 (HID: hid-multitouch: forward MSC_TIMESTAMP),
+        * EV_MSC/MSC_TIMESTAMP is added to each before EV_SYN event.
+        * EV_MSC is configured as INPUT_PASS_TO_ALL.
+        * In case of touch device:
+        *   BE pass EV_MSC/MSC_TIMESTAMP to FE on receiving event from evdev.
+        *   FE pass EV_MSC/MSC_TIMESTAMP back to BE.
+        *   BE writes EV_MSC/MSC_TIMESTAMP to evdev due to INPUT_PASS_TO_ALL.
+        *   BE receives extra EV_MSC/MSC_TIMESTAMP and pass to FE.
+        *   >>> Each new frame becomes larger and larger.
+        * Disable EV_MSC/MSC_TIMESTAMP forwarding for MT.
+        */
+       if (vi->idev->mt && type == EV_MSC && code == MSC_TIMESTAMP)
+               return 0;
+
        stsbuf = kzalloc(sizeof(*stsbuf), GFP_ATOMIC);
        if (!stsbuf)
                return -ENOMEM;
@@ -204,7 +220,7 @@ static int virtinput_probe(struct virtio_device *vdev)
        struct virtio_input *vi;
        unsigned long flags;
        size_t size;
-       int abs, err;
+       int abs, err, nslots;
 
        if (!virtio_has_feature(vdev, VIRTIO_F_VERSION_1))
                return -ENODEV;
@@ -289,6 +305,13 @@ static int virtinput_probe(struct virtio_device *vdev)
                                continue;
                        virtinput_cfg_abs(vi, abs);
                }
+
+               if (test_bit(ABS_MT_SLOT, vi->idev->absbit)) {
+                       nslots = input_abs_get_max(vi->idev, ABS_MT_SLOT) + 1;
+                       err = input_mt_init_slots(vi->idev, nslots, 0);
+                       if (err)
+                               goto err_mt_init_slots;
+               }
        }
 
        virtio_device_ready(vdev);
@@ -304,6 +327,7 @@ err_input_register:
        spin_lock_irqsave(&vi->lock, flags);
        vi->ready = false;
        spin_unlock_irqrestore(&vi->lock, flags);
+err_mt_init_slots:
        input_free_device(vi->idev);
 err_input_alloc:
        vdev->config->del_vqs(vdev);
index 9fc9ec4..10ec60d 100644 (file)
@@ -623,7 +623,7 @@ static int virtio_mem_add_memory(struct virtio_mem *vm, uint64_t addr,
        /* Memory might get onlined immediately. */
        atomic64_add(size, &vm->offline_size);
        rc = add_memory_driver_managed(vm->nid, addr, size, vm->resource_name,
-                                      MEMHP_MERGE_RESOURCE);
+                                      MHP_MERGE_RESOURCE);
        if (rc) {
                atomic64_sub(size, &vm->offline_size);
                dev_warn(&vm->vdev->dev, "adding memory failed: %d\n", rc);
@@ -2222,7 +2222,7 @@ static int virtio_mem_unplug_pending_mb(struct virtio_mem *vm)
  */
 static void virtio_mem_refresh_config(struct virtio_mem *vm)
 {
-       const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
+       const struct range pluggable_range = mhp_get_pluggable_range(true);
        uint64_t new_plugged_size, usable_region_size, end_addr;
 
        /* the plugged_size is just a reflection of what _we_ did previously */
@@ -2234,15 +2234,25 @@ static void virtio_mem_refresh_config(struct virtio_mem *vm)
        /* calculate the last usable memory block id */
        virtio_cread_le(vm->vdev, struct virtio_mem_config,
                        usable_region_size, &usable_region_size);
-       end_addr = vm->addr + usable_region_size;
-       end_addr = min(end_addr, phys_limit);
+       end_addr = min(vm->addr + usable_region_size - 1,
+                      pluggable_range.end);
 
-       if (vm->in_sbm)
-               vm->sbm.last_usable_mb_id =
-                                        virtio_mem_phys_to_mb_id(end_addr) - 1;
-       else
-               vm->bbm.last_usable_bb_id =
-                                    virtio_mem_phys_to_bb_id(vm, end_addr) - 1;
+       if (vm->in_sbm) {
+               vm->sbm.last_usable_mb_id = virtio_mem_phys_to_mb_id(end_addr);
+               if (!IS_ALIGNED(end_addr + 1, memory_block_size_bytes()))
+                       vm->sbm.last_usable_mb_id--;
+       } else {
+               vm->bbm.last_usable_bb_id = virtio_mem_phys_to_bb_id(vm,
+                                                                    end_addr);
+               if (!IS_ALIGNED(end_addr + 1, vm->bbm.bb_size))
+                       vm->bbm.last_usable_bb_id--;
+       }
+       /*
+        * If we cannot plug any of our device memory (e.g., nothing in the
+        * usable region is addressable), the last usable memory block id will
+        * be smaller than the first usable memory block id. We'll stop
+        * attempting to add memory with -ENOSPC from our main loop.
+        */
 
        /* see if there is a request to change the size */
        virtio_cread_le(vm->vdev, struct virtio_mem_config, requested_size,
@@ -2364,7 +2374,7 @@ static int virtio_mem_init_vq(struct virtio_mem *vm)
 
 static int virtio_mem_init(struct virtio_mem *vm)
 {
-       const uint64_t phys_limit = 1UL << MAX_PHYSMEM_BITS;
+       const struct range pluggable_range = mhp_get_pluggable_range(true);
        uint64_t sb_size, addr;
        uint16_t node_id;
 
@@ -2405,9 +2415,10 @@ static int virtio_mem_init(struct virtio_mem *vm)
        if (!IS_ALIGNED(vm->addr + vm->region_size, memory_block_size_bytes()))
                dev_warn(&vm->vdev->dev,
                         "The alignment of the physical end address can make some memory unusable.\n");
-       if (vm->addr + vm->region_size > phys_limit)
+       if (vm->addr < pluggable_range.start ||
+           vm->addr + vm->region_size - 1 > pluggable_range.end)
                dev_warn(&vm->vdev->dev,
-                        "Some memory is not addressable. This can make some memory unusable.\n");
+                        "Some device memory is not addressable/pluggable. This can make some memory unusable.\n");
 
        /*
         * We want subblocks to span at least MAX_ORDER_NR_PAGES and
@@ -2429,7 +2440,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
                                     vm->sbm.sb_size;
 
                /* Round up to the next full memory block */
-               addr = vm->addr + memory_block_size_bytes() - 1;
+               addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
+                      memory_block_size_bytes() - 1;
                vm->sbm.first_mb_id = virtio_mem_phys_to_mb_id(addr);
                vm->sbm.next_mb_id = vm->sbm.first_mb_id;
        } else {
@@ -2450,7 +2462,8 @@ static int virtio_mem_init(struct virtio_mem *vm)
                }
 
                /* Round up to the next aligned big block */
-               addr = vm->addr + vm->bbm.bb_size - 1;
+               addr = max_t(uint64_t, vm->addr, pluggable_range.start) +
+                      vm->bbm.bb_size - 1;
                vm->bbm.first_bb_id = virtio_mem_phys_to_bb_id(vm, addr);
                vm->bbm.next_bb_id = vm->bbm.first_bb_id;
        }
@@ -2577,7 +2590,7 @@ static int virtio_mem_probe(struct virtio_device *vdev)
         * actually in use (e.g., trying to reload the driver).
         */
        if (vm->plugged_size) {
-               vm->unplug_all_required = 1;
+               vm->unplug_all_required = true;
                dev_info(&vm->vdev->dev, "unplugging all memory is required\n");
        }
 
index 238383f..56128b9 100644 (file)
@@ -126,7 +126,7 @@ static int vm_finalize_features(struct virtio_device *vdev)
        /* Give virtio_ring a chance to accept features. */
        vring_transport_features(vdev);
 
-       /* Make sure there is are no mixed devices */
+       /* Make sure there are no mixed devices */
        if (vm_dev->version == 2 &&
                        !__virtio_test_bit(vdev, VIRTIO_F_VERSION_1)) {
                dev_err(&vdev->dev, "New virtio-mmio devices (version 2) must provide VIRTIO_F_VERSION_1 feature!\n");
@@ -548,8 +548,7 @@ static void virtio_mmio_release_dev(struct device *_d)
 {
        struct virtio_device *vdev =
                        container_of(_d, struct virtio_device, dev);
-       struct virtio_mmio_device *vm_dev =
-                       container_of(vdev, struct virtio_mmio_device, vdev);
+       struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
        struct platform_device *pdev = vm_dev->pdev;
 
        devm_kfree(&pdev->dev, vm_dev);
index b2f0eb4..beec047 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/virtio_config.h>
 #include <linux/virtio_ring.h>
 #include <linux/virtio_pci.h>
+#include <linux/virtio_pci_modern.h>
 #include <linux/highmem.h>
 #include <linux/spinlock.h>
 
@@ -43,31 +44,12 @@ struct virtio_pci_vq_info {
 struct virtio_pci_device {
        struct virtio_device vdev;
        struct pci_dev *pci_dev;
+       struct virtio_pci_modern_device mdev;
 
        /* In legacy mode, these two point to within ->legacy. */
        /* Where to read and clear interrupt */
        u8 __iomem *isr;
 
-       /* Modern only fields */
-       /* The IO mapping for the PCI config space (non-legacy mode) */
-       struct virtio_pci_common_cfg __iomem *common;
-       /* Device-specific data (non-legacy mode)  */
-       void __iomem *device;
-       /* Base of vq notifications (non-legacy mode). */
-       void __iomem *notify_base;
-
-       /* So we can sanity-check accesses. */
-       size_t notify_len;
-       size_t device_len;
-
-       /* Capability for when we need to map notifications per-vq. */
-       int notify_map_cap;
-
-       /* Multiply queue_notify_off by this value. (non-legacy mode). */
-       u32 notify_offset_multiplier;
-
-       int modern_bars;
-
        /* Legacy only field */
        /* the IO mapping for the PCI config space */
        void __iomem *ioaddr;
index 3d6ae5a..fbd4ebc 100644 (file)
 #define VIRTIO_RING_NO_LEGACY
 #include "virtio_pci_common.h"
 
-/*
- * Type-safe wrappers for io accesses.
- * Use these to enforce at compile time the following spec requirement:
- *
- * The driver MUST access each field using the “natural” access
- * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
- * for 16-bit fields and 8-bit accesses for 8-bit fields.
- */
-static inline u8 vp_ioread8(const u8 __iomem *addr)
-{
-       return ioread8(addr);
-}
-static inline u16 vp_ioread16 (const __le16 __iomem *addr)
-{
-       return ioread16(addr);
-}
-
-static inline u32 vp_ioread32(const __le32 __iomem *addr)
-{
-       return ioread32(addr);
-}
-
-static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
-{
-       iowrite8(value, addr);
-}
-
-static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
-{
-       iowrite16(value, addr);
-}
-
-static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
-{
-       iowrite32(value, addr);
-}
-
-static void vp_iowrite64_twopart(u64 val,
-                                __le32 __iomem *lo, __le32 __iomem *hi)
-{
-       vp_iowrite32((u32)val, lo);
-       vp_iowrite32(val >> 32, hi);
-}
-
-static void __iomem *map_capability(struct pci_dev *dev, int off,
-                                   size_t minlen,
-                                   u32 align,
-                                   u32 start, u32 size,
-                                   size_t *len)
-{
-       u8 bar;
-       u32 offset, length;
-       void __iomem *p;
-
-       pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
-                                                bar),
-                            &bar);
-       pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
-                            &offset);
-       pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
-                             &length);
-
-       if (length <= start) {
-               dev_err(&dev->dev,
-                       "virtio_pci: bad capability len %u (>%u expected)\n",
-                       length, start);
-               return NULL;
-       }
-
-       if (length - start < minlen) {
-               dev_err(&dev->dev,
-                       "virtio_pci: bad capability len %u (>=%zu expected)\n",
-                       length, minlen);
-               return NULL;
-       }
-
-       length -= start;
-
-       if (start + offset < offset) {
-               dev_err(&dev->dev,
-                       "virtio_pci: map wrap-around %u+%u\n",
-                       start, offset);
-               return NULL;
-       }
-
-       offset += start;
-
-       if (offset & (align - 1)) {
-               dev_err(&dev->dev,
-                       "virtio_pci: offset %u not aligned to %u\n",
-                       offset, align);
-               return NULL;
-       }
-
-       if (length > size)
-               length = size;
-
-       if (len)
-               *len = length;
-
-       if (minlen + offset < minlen ||
-           minlen + offset > pci_resource_len(dev, bar)) {
-               dev_err(&dev->dev,
-                       "virtio_pci: map virtio %zu@%u "
-                       "out of range on bar %i length %lu\n",
-                       minlen, offset,
-                       bar, (unsigned long)pci_resource_len(dev, bar));
-               return NULL;
-       }
-
-       p = pci_iomap_range(dev, bar, offset, length);
-       if (!p)
-               dev_err(&dev->dev,
-                       "virtio_pci: unable to map virtio %u@%u on bar %i\n",
-                       length, offset, bar);
-       return p;
-}
-
-/* virtio config->get_features() implementation */
 static u64 vp_get_features(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       u64 features;
-
-       vp_iowrite32(0, &vp_dev->common->device_feature_select);
-       features = vp_ioread32(&vp_dev->common->device_feature);
-       vp_iowrite32(1, &vp_dev->common->device_feature_select);
-       features |= ((u64)vp_ioread32(&vp_dev->common->device_feature) << 32);
 
-       return features;
+       return vp_modern_get_features(&vp_dev->mdev);
 }
 
 static void vp_transport_features(struct virtio_device *vdev, u64 features)
@@ -179,10 +54,7 @@ static int vp_finalize_features(struct virtio_device *vdev)
                return -EINVAL;
        }
 
-       vp_iowrite32(0, &vp_dev->common->guest_feature_select);
-       vp_iowrite32((u32)vdev->features, &vp_dev->common->guest_feature);
-       vp_iowrite32(1, &vp_dev->common->guest_feature_select);
-       vp_iowrite32(vdev->features >> 32, &vp_dev->common->guest_feature);
+       vp_modern_set_features(&vp_dev->mdev, vdev->features);
 
        return 0;
 }
@@ -192,29 +64,31 @@ static void vp_get(struct virtio_device *vdev, unsigned offset,
                   void *buf, unsigned len)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+       void __iomem *device = mdev->device;
        u8 b;
        __le16 w;
        __le32 l;
 
-       BUG_ON(offset + len > vp_dev->device_len);
+       BUG_ON(offset + len > mdev->device_len);
 
        switch (len) {
        case 1:
-               b = ioread8(vp_dev->device + offset);
+               b = ioread8(device + offset);
                memcpy(buf, &b, sizeof b);
                break;
        case 2:
-               w = cpu_to_le16(ioread16(vp_dev->device + offset));
+               w = cpu_to_le16(ioread16(device + offset));
                memcpy(buf, &w, sizeof w);
                break;
        case 4:
-               l = cpu_to_le32(ioread32(vp_dev->device + offset));
+               l = cpu_to_le32(ioread32(device + offset));
                memcpy(buf, &l, sizeof l);
                break;
        case 8:
-               l = cpu_to_le32(ioread32(vp_dev->device + offset));
+               l = cpu_to_le32(ioread32(device + offset));
                memcpy(buf, &l, sizeof l);
-               l = cpu_to_le32(ioread32(vp_dev->device + offset + sizeof l));
+               l = cpu_to_le32(ioread32(device + offset + sizeof l));
                memcpy(buf + sizeof l, &l, sizeof l);
                break;
        default:
@@ -228,30 +102,32 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
                   const void *buf, unsigned len)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+       void __iomem *device = mdev->device;
        u8 b;
        __le16 w;
        __le32 l;
 
-       BUG_ON(offset + len > vp_dev->device_len);
+       BUG_ON(offset + len > mdev->device_len);
 
        switch (len) {
        case 1:
                memcpy(&b, buf, sizeof b);
-               iowrite8(b, vp_dev->device + offset);
+               iowrite8(b, device + offset);
                break;
        case 2:
                memcpy(&w, buf, sizeof w);
-               iowrite16(le16_to_cpu(w), vp_dev->device + offset);
+               iowrite16(le16_to_cpu(w), device + offset);
                break;
        case 4:
                memcpy(&l, buf, sizeof l);
-               iowrite32(le32_to_cpu(l), vp_dev->device + offset);
+               iowrite32(le32_to_cpu(l), device + offset);
                break;
        case 8:
                memcpy(&l, buf, sizeof l);
-               iowrite32(le32_to_cpu(l), vp_dev->device + offset);
+               iowrite32(le32_to_cpu(l), device + offset);
                memcpy(&l, buf + sizeof l, sizeof l);
-               iowrite32(le32_to_cpu(l), vp_dev->device + offset + sizeof l);
+               iowrite32(le32_to_cpu(l), device + offset + sizeof l);
                break;
        default:
                BUG();
@@ -261,35 +137,40 @@ static void vp_set(struct virtio_device *vdev, unsigned offset,
 static u32 vp_generation(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       return vp_ioread8(&vp_dev->common->config_generation);
+
+       return vp_modern_generation(&vp_dev->mdev);
 }
 
 /* config->{get,set}_status() implementations */
 static u8 vp_get_status(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
-       return vp_ioread8(&vp_dev->common->device_status);
+
+       return vp_modern_get_status(&vp_dev->mdev);
 }
 
 static void vp_set_status(struct virtio_device *vdev, u8 status)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+
        /* We should never be setting status to 0. */
        BUG_ON(status == 0);
-       vp_iowrite8(status, &vp_dev->common->device_status);
+       vp_modern_set_status(&vp_dev->mdev, status);
 }
 
 static void vp_reset(struct virtio_device *vdev)
 {
        struct virtio_pci_device *vp_dev = to_vp_device(vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
+
        /* 0 status means a reset. */
-       vp_iowrite8(0, &vp_dev->common->device_status);
+       vp_modern_set_status(mdev, 0);
        /* After writing 0 to device_status, the driver MUST wait for a read of
         * device_status to return 0 before reinitializing the device.
         * This will flush out the status write, and flush in device writes,
         * including MSI-X interrupts, if any.
         */
-       while (vp_ioread8(&vp_dev->common->device_status))
+       while (vp_modern_get_status(mdev))
                msleep(1);
        /* Flush pending VQ/configuration callbacks. */
        vp_synchronize_vectors(vdev);
@@ -297,11 +178,7 @@ static void vp_reset(struct virtio_device *vdev)
 
 static u16 vp_config_vector(struct virtio_pci_device *vp_dev, u16 vector)
 {
-       /* Setup the vector used for configuration events */
-       vp_iowrite16(vector, &vp_dev->common->msix_config);
-       /* Verify we had enough resources to assign the vector */
-       /* Will also flush the write out to device */
-       return vp_ioread16(&vp_dev->common->msix_config);
+       return vp_modern_config_vector(&vp_dev->mdev, vector);
 }
 
 static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
@@ -312,20 +189,18 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
                                  bool ctx,
                                  u16 msix_vec)
 {
-       struct virtio_pci_common_cfg __iomem *cfg = vp_dev->common;
+
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
        struct virtqueue *vq;
        u16 num, off;
        int err;
 
-       if (index >= vp_ioread16(&cfg->num_queues))
+       if (index >= vp_modern_get_num_queues(mdev))
                return ERR_PTR(-ENOENT);
 
-       /* Select the queue we're interested in */
-       vp_iowrite16(index, &cfg->queue_select);
-
        /* Check if queue is either not available or already active. */
-       num = vp_ioread16(&cfg->queue_size);
-       if (!num || vp_ioread16(&cfg->queue_enable))
+       num = vp_modern_get_queue_size(mdev, index);
+       if (!num || vp_modern_get_queue_enable(mdev, index))
                return ERR_PTR(-ENOENT);
 
        if (num & (num - 1)) {
@@ -334,7 +209,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        }
 
        /* get offset of notification word for this vq */
-       off = vp_ioread16(&cfg->queue_notify_off);
+       off = vp_modern_get_queue_notify_off(mdev, index);
 
        info->msix_vector = msix_vec;
 
@@ -347,33 +222,30 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
                return ERR_PTR(-ENOMEM);
 
        /* activate the queue */
-       vp_iowrite16(virtqueue_get_vring_size(vq), &cfg->queue_size);
-       vp_iowrite64_twopart(virtqueue_get_desc_addr(vq),
-                            &cfg->queue_desc_lo, &cfg->queue_desc_hi);
-       vp_iowrite64_twopart(virtqueue_get_avail_addr(vq),
-                            &cfg->queue_avail_lo, &cfg->queue_avail_hi);
-       vp_iowrite64_twopart(virtqueue_get_used_addr(vq),
-                            &cfg->queue_used_lo, &cfg->queue_used_hi);
-
-       if (vp_dev->notify_base) {
+       vp_modern_set_queue_size(mdev, index, virtqueue_get_vring_size(vq));
+       vp_modern_queue_address(mdev, index, virtqueue_get_desc_addr(vq),
+                               virtqueue_get_avail_addr(vq),
+                               virtqueue_get_used_addr(vq));
+
+       if (mdev->notify_base) {
                /* offset should not wrap */
-               if ((u64)off * vp_dev->notify_offset_multiplier + 2
-                   > vp_dev->notify_len) {
-                       dev_warn(&vp_dev->pci_dev->dev,
+               if ((u64)off * mdev->notify_offset_multiplier + 2
+                   > mdev->notify_len) {
+                       dev_warn(&mdev->pci_dev->dev,
                                 "bad notification offset %u (x %u) "
                                 "for queue %u > %zd",
-                                off, vp_dev->notify_offset_multiplier,
-                                index, vp_dev->notify_len);
+                                off, mdev->notify_offset_multiplier,
+                                index, mdev->notify_len);
                        err = -EINVAL;
                        goto err_map_notify;
                }
-               vq->priv = (void __force *)vp_dev->notify_base +
-                       off * vp_dev->notify_offset_multiplier;
+               vq->priv = (void __force *)mdev->notify_base +
+                       off * mdev->notify_offset_multiplier;
        } else {
-               vq->priv = (void __force *)map_capability(vp_dev->pci_dev,
-                                         vp_dev->notify_map_cap, 2, 2,
-                                         off * vp_dev->notify_offset_multiplier, 2,
-                                         NULL);
+               vq->priv = (void __force *)vp_modern_map_capability(mdev,
+                                                         mdev->notify_map_cap, 2, 2,
+                                                         off * mdev->notify_offset_multiplier, 2,
+                                                         NULL);
        }
 
        if (!vq->priv) {
@@ -382,8 +254,7 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        }
 
        if (msix_vec != VIRTIO_MSI_NO_VECTOR) {
-               vp_iowrite16(msix_vec, &cfg->queue_msix_vector);
-               msix_vec = vp_ioread16(&cfg->queue_msix_vector);
+               msix_vec = vp_modern_queue_vector(mdev, index, msix_vec);
                if (msix_vec == VIRTIO_MSI_NO_VECTOR) {
                        err = -EBUSY;
                        goto err_assign_vector;
@@ -393,8 +264,8 @@ static struct virtqueue *setup_vq(struct virtio_pci_device *vp_dev,
        return vq;
 
 err_assign_vector:
-       if (!vp_dev->notify_base)
-               pci_iounmap(vp_dev->pci_dev, (void __iomem __force *)vq->priv);
+       if (!mdev->notify_base)
+               pci_iounmap(mdev->pci_dev, (void __iomem __force *)vq->priv);
 err_map_notify:
        vring_del_virtqueue(vq);
        return ERR_PTR(err);
@@ -416,10 +287,8 @@ static int vp_modern_find_vqs(struct virtio_device *vdev, unsigned nvqs,
        /* Select and activate all queues. Has to be done last: once we do
         * this, there's no way to go back except reset.
         */
-       list_for_each_entry(vq, &vdev->vqs, list) {
-               vp_iowrite16(vq->index, &vp_dev->common->queue_select);
-               vp_iowrite16(1, &vp_dev->common->queue_enable);
-       }
+       list_for_each_entry(vq, &vdev->vqs, list)
+               vp_modern_set_queue_enable(&vp_dev->mdev, vq->index, true);
 
        return 0;
 }
@@ -428,18 +297,14 @@ static void del_vq(struct virtio_pci_vq_info *info)
 {
        struct virtqueue *vq = info->vq;
        struct virtio_pci_device *vp_dev = to_vp_device(vq->vdev);
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
 
-       vp_iowrite16(vq->index, &vp_dev->common->queue_select);
-
-       if (vp_dev->msix_enabled) {
-               vp_iowrite16(VIRTIO_MSI_NO_VECTOR,
-                            &vp_dev->common->queue_msix_vector);
-               /* Flush the write out to device */
-               vp_ioread16(&vp_dev->common->queue_msix_vector);
-       }
+       if (vp_dev->msix_enabled)
+               vp_modern_queue_vector(mdev, vq->index,
+                                      VIRTIO_MSI_NO_VECTOR);
 
-       if (!vp_dev->notify_base)
-               pci_iounmap(vp_dev->pci_dev, (void __force __iomem *)vq->priv);
+       if (!mdev->notify_base)
+               pci_iounmap(mdev->pci_dev, (void __force __iomem *)vq->priv);
 
        vring_del_virtqueue(vq);
 }
@@ -571,261 +436,36 @@ static const struct virtio_config_ops virtio_pci_config_ops = {
        .get_shm_region  = vp_get_shm_region,
 };
 
-/**
- * virtio_pci_find_capability - walk capabilities to find device info.
- * @dev: the pci device
- * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
- * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
- * @bars: the bitmask of BARs
- *
- * Returns offset of the capability, or 0.
- */
-static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
-                                            u32 ioresource_types, int *bars)
-{
-       int pos;
-
-       for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
-            pos > 0;
-            pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
-               u8 type, bar;
-               pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
-                                                        cfg_type),
-                                    &type);
-               pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
-                                                        bar),
-                                    &bar);
-
-               /* Ignore structures with reserved BAR values */
-               if (bar > 0x5)
-                       continue;
-
-               if (type == cfg_type) {
-                       if (pci_resource_len(dev, bar) &&
-                           pci_resource_flags(dev, bar) & ioresource_types) {
-                               *bars |= (1 << bar);
-                               return pos;
-                       }
-               }
-       }
-       return 0;
-}
-
-/* This is part of the ABI.  Don't screw with it. */
-static inline void check_offsets(void)
-{
-       /* Note: disk space was harmed in compilation of this function. */
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
-                    offsetof(struct virtio_pci_cap, cap_vndr));
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
-                    offsetof(struct virtio_pci_cap, cap_next));
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
-                    offsetof(struct virtio_pci_cap, cap_len));
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
-                    offsetof(struct virtio_pci_cap, cfg_type));
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
-                    offsetof(struct virtio_pci_cap, bar));
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
-                    offsetof(struct virtio_pci_cap, offset));
-       BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
-                    offsetof(struct virtio_pci_cap, length));
-       BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
-                    offsetof(struct virtio_pci_notify_cap,
-                             notify_off_multiplier));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
-                    offsetof(struct virtio_pci_common_cfg,
-                             device_feature_select));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
-                    offsetof(struct virtio_pci_common_cfg, device_feature));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
-                    offsetof(struct virtio_pci_common_cfg,
-                             guest_feature_select));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
-                    offsetof(struct virtio_pci_common_cfg, guest_feature));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
-                    offsetof(struct virtio_pci_common_cfg, msix_config));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
-                    offsetof(struct virtio_pci_common_cfg, num_queues));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
-                    offsetof(struct virtio_pci_common_cfg, device_status));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
-                    offsetof(struct virtio_pci_common_cfg, config_generation));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
-                    offsetof(struct virtio_pci_common_cfg, queue_select));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
-                    offsetof(struct virtio_pci_common_cfg, queue_size));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
-                    offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
-                    offsetof(struct virtio_pci_common_cfg, queue_enable));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
-                    offsetof(struct virtio_pci_common_cfg, queue_notify_off));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
-                    offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
-                    offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
-                    offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
-                    offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
-                    offsetof(struct virtio_pci_common_cfg, queue_used_lo));
-       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
-                    offsetof(struct virtio_pci_common_cfg, queue_used_hi));
-}
-
 /* the PCI probing function */
 int virtio_pci_modern_probe(struct virtio_pci_device *vp_dev)
 {
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
        struct pci_dev *pci_dev = vp_dev->pci_dev;
-       int err, common, isr, notify, device;
-       u32 notify_length;
-       u32 notify_offset;
-
-       check_offsets();
-
-       /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
-       if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
-               return -ENODEV;
-
-       if (pci_dev->device < 0x1040) {
-               /* Transitional devices: use the PCI subsystem device id as
-                * virtio device id, same as legacy driver always did.
-                */
-               vp_dev->vdev.id.device = pci_dev->subsystem_device;
-       } else {
-               /* Modern devices: simply use PCI device id, but start from 0x1040. */
-               vp_dev->vdev.id.device = pci_dev->device - 0x1040;
-       }
-       vp_dev->vdev.id.vendor = pci_dev->subsystem_vendor;
-
-       /* check for a common config: if not, use legacy mode (bar 0). */
-       common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
-                                           IORESOURCE_IO | IORESOURCE_MEM,
-                                           &vp_dev->modern_bars);
-       if (!common) {
-               dev_info(&pci_dev->dev,
-                        "virtio_pci: leaving for legacy driver\n");
-               return -ENODEV;
-       }
-
-       /* If common is there, these should be too... */
-       isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
-                                        IORESOURCE_IO | IORESOURCE_MEM,
-                                        &vp_dev->modern_bars);
-       notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
-                                           IORESOURCE_IO | IORESOURCE_MEM,
-                                           &vp_dev->modern_bars);
-       if (!isr || !notify) {
-               dev_err(&pci_dev->dev,
-                       "virtio_pci: missing capabilities %i/%i/%i\n",
-                       common, isr, notify);
-               return -EINVAL;
-       }
-
-       err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
-       if (err)
-               err = dma_set_mask_and_coherent(&pci_dev->dev,
-                                               DMA_BIT_MASK(32));
-       if (err)
-               dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+       int err;
 
-       /* Device capability is only mandatory for devices that have
-        * device-specific configuration.
-        */
-       device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
-                                           IORESOURCE_IO | IORESOURCE_MEM,
-                                           &vp_dev->modern_bars);
+       mdev->pci_dev = pci_dev;
 
-       err = pci_request_selected_regions(pci_dev, vp_dev->modern_bars,
-                                          "virtio-pci-modern");
+       err = vp_modern_probe(mdev);
        if (err)
                return err;
 
-       err = -EINVAL;
-       vp_dev->common = map_capability(pci_dev, common,
-                                       sizeof(struct virtio_pci_common_cfg), 4,
-                                       0, sizeof(struct virtio_pci_common_cfg),
-                                       NULL);
-       if (!vp_dev->common)
-               goto err_map_common;
-       vp_dev->isr = map_capability(pci_dev, isr, sizeof(u8), 1,
-                                    0, 1,
-                                    NULL);
-       if (!vp_dev->isr)
-               goto err_map_isr;
-
-       /* Read notify_off_multiplier from config space. */
-       pci_read_config_dword(pci_dev,
-                             notify + offsetof(struct virtio_pci_notify_cap,
-                                               notify_off_multiplier),
-                             &vp_dev->notify_offset_multiplier);
-       /* Read notify length and offset from config space. */
-       pci_read_config_dword(pci_dev,
-                             notify + offsetof(struct virtio_pci_notify_cap,
-                                               cap.length),
-                             &notify_length);
-
-       pci_read_config_dword(pci_dev,
-                             notify + offsetof(struct virtio_pci_notify_cap,
-                                               cap.offset),
-                             &notify_offset);
-
-       /* We don't know how many VQs we'll map, ahead of the time.
-        * If notify length is small, map it all now.
-        * Otherwise, map each VQ individually later.
-        */
-       if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
-               vp_dev->notify_base = map_capability(pci_dev, notify, 2, 2,
-                                                    0, notify_length,
-                                                    &vp_dev->notify_len);
-               if (!vp_dev->notify_base)
-                       goto err_map_notify;
-       } else {
-               vp_dev->notify_map_cap = notify;
-       }
-
-       /* Again, we don't know how much we should map, but PAGE_SIZE
-        * is more than enough for all existing devices.
-        */
-       if (device) {
-               vp_dev->device = map_capability(pci_dev, device, 0, 4,
-                                               0, PAGE_SIZE,
-                                               &vp_dev->device_len);
-               if (!vp_dev->device)
-                       goto err_map_device;
-
+       if (mdev->device)
                vp_dev->vdev.config = &virtio_pci_config_ops;
-       } else {
+       else
                vp_dev->vdev.config = &virtio_pci_config_nodev_ops;
-       }
 
        vp_dev->config_vector = vp_config_vector;
        vp_dev->setup_vq = setup_vq;
        vp_dev->del_vq = del_vq;
+       vp_dev->isr = mdev->isr;
+       vp_dev->vdev.id = mdev->id;
 
        return 0;
-
-err_map_device:
-       if (vp_dev->notify_base)
-               pci_iounmap(pci_dev, vp_dev->notify_base);
-err_map_notify:
-       pci_iounmap(pci_dev, vp_dev->isr);
-err_map_isr:
-       pci_iounmap(pci_dev, vp_dev->common);
-err_map_common:
-       return err;
 }
 
 void virtio_pci_modern_remove(struct virtio_pci_device *vp_dev)
 {
-       struct pci_dev *pci_dev = vp_dev->pci_dev;
+       struct virtio_pci_modern_device *mdev = &vp_dev->mdev;
 
-       if (vp_dev->device)
-               pci_iounmap(pci_dev, vp_dev->device);
-       if (vp_dev->notify_base)
-               pci_iounmap(pci_dev, vp_dev->notify_base);
-       pci_iounmap(pci_dev, vp_dev->isr);
-       pci_iounmap(pci_dev, vp_dev->common);
-       pci_release_selected_regions(pci_dev, vp_dev->modern_bars);
+       vp_modern_remove(mdev);
 }
diff --git a/drivers/virtio/virtio_pci_modern_dev.c b/drivers/virtio/virtio_pci_modern_dev.c
new file mode 100644 (file)
index 0000000..cbd6674
--- /dev/null
@@ -0,0 +1,599 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/virtio_pci_modern.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+/*
+ * vp_modern_map_capability - map a part of virtio pci capability
+ * @mdev: the modern virtio-pci device
+ * @off: offset of the capability
+ * @minlen: minimal length of the capability
+ * @align: align requirement
+ * @start: start from the capability
+ * @size: map size
+ * @len: the length that is actually mapped
+ *
+ * Returns the io address of for the part of the capability
+ */
+void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
+                                      size_t minlen,
+                                      u32 align,
+                                      u32 start, u32 size,
+                                      size_t *len)
+{
+       struct pci_dev *dev = mdev->pci_dev;
+       u8 bar;
+       u32 offset, length;
+       void __iomem *p;
+
+       pci_read_config_byte(dev, off + offsetof(struct virtio_pci_cap,
+                                                bar),
+                            &bar);
+       pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, offset),
+                            &offset);
+       pci_read_config_dword(dev, off + offsetof(struct virtio_pci_cap, length),
+                             &length);
+
+       if (length <= start) {
+               dev_err(&dev->dev,
+                       "virtio_pci: bad capability len %u (>%u expected)\n",
+                       length, start);
+               return NULL;
+       }
+
+       if (length - start < minlen) {
+               dev_err(&dev->dev,
+                       "virtio_pci: bad capability len %u (>=%zu expected)\n",
+                       length, minlen);
+               return NULL;
+       }
+
+       length -= start;
+
+       if (start + offset < offset) {
+               dev_err(&dev->dev,
+                       "virtio_pci: map wrap-around %u+%u\n",
+                       start, offset);
+               return NULL;
+       }
+
+       offset += start;
+
+       if (offset & (align - 1)) {
+               dev_err(&dev->dev,
+                       "virtio_pci: offset %u not aligned to %u\n",
+                       offset, align);
+               return NULL;
+       }
+
+       if (length > size)
+               length = size;
+
+       if (len)
+               *len = length;
+
+       if (minlen + offset < minlen ||
+           minlen + offset > pci_resource_len(dev, bar)) {
+               dev_err(&dev->dev,
+                       "virtio_pci: map virtio %zu@%u "
+                       "out of range on bar %i length %lu\n",
+                       minlen, offset,
+                       bar, (unsigned long)pci_resource_len(dev, bar));
+               return NULL;
+       }
+
+       p = pci_iomap_range(dev, bar, offset, length);
+       if (!p)
+               dev_err(&dev->dev,
+                       "virtio_pci: unable to map virtio %u@%u on bar %i\n",
+                       length, offset, bar);
+       return p;
+}
+EXPORT_SYMBOL_GPL(vp_modern_map_capability);
+
+/**
+ * virtio_pci_find_capability - walk capabilities to find device info.
+ * @dev: the pci device
+ * @cfg_type: the VIRTIO_PCI_CAP_* value we seek
+ * @ioresource_types: IORESOURCE_MEM and/or IORESOURCE_IO.
+ * @bars: the bitmask of BARs
+ *
+ * Returns offset of the capability, or 0.
+ */
+static inline int virtio_pci_find_capability(struct pci_dev *dev, u8 cfg_type,
+                                            u32 ioresource_types, int *bars)
+{
+       int pos;
+
+       for (pos = pci_find_capability(dev, PCI_CAP_ID_VNDR);
+            pos > 0;
+            pos = pci_find_next_capability(dev, pos, PCI_CAP_ID_VNDR)) {
+               u8 type, bar;
+               pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+                                                        cfg_type),
+                                    &type);
+               pci_read_config_byte(dev, pos + offsetof(struct virtio_pci_cap,
+                                                        bar),
+                                    &bar);
+
+               /* Ignore structures with reserved BAR values */
+               if (bar > 0x5)
+                       continue;
+
+               if (type == cfg_type) {
+                       if (pci_resource_len(dev, bar) &&
+                           pci_resource_flags(dev, bar) & ioresource_types) {
+                               *bars |= (1 << bar);
+                               return pos;
+                       }
+               }
+       }
+       return 0;
+}
+
+/* This is part of the ABI.  Don't screw with it. */
+static inline void check_offsets(void)
+{
+       /* Note: disk space was harmed in compilation of this function. */
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_VNDR !=
+                    offsetof(struct virtio_pci_cap, cap_vndr));
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_NEXT !=
+                    offsetof(struct virtio_pci_cap, cap_next));
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_LEN !=
+                    offsetof(struct virtio_pci_cap, cap_len));
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_CFG_TYPE !=
+                    offsetof(struct virtio_pci_cap, cfg_type));
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_BAR !=
+                    offsetof(struct virtio_pci_cap, bar));
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_OFFSET !=
+                    offsetof(struct virtio_pci_cap, offset));
+       BUILD_BUG_ON(VIRTIO_PCI_CAP_LENGTH !=
+                    offsetof(struct virtio_pci_cap, length));
+       BUILD_BUG_ON(VIRTIO_PCI_NOTIFY_CAP_MULT !=
+                    offsetof(struct virtio_pci_notify_cap,
+                             notify_off_multiplier));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_DFSELECT !=
+                    offsetof(struct virtio_pci_common_cfg,
+                             device_feature_select));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_DF !=
+                    offsetof(struct virtio_pci_common_cfg, device_feature));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_GFSELECT !=
+                    offsetof(struct virtio_pci_common_cfg,
+                             guest_feature_select));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_GF !=
+                    offsetof(struct virtio_pci_common_cfg, guest_feature));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_MSIX !=
+                    offsetof(struct virtio_pci_common_cfg, msix_config));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_NUMQ !=
+                    offsetof(struct virtio_pci_common_cfg, num_queues));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_STATUS !=
+                    offsetof(struct virtio_pci_common_cfg, device_status));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_CFGGENERATION !=
+                    offsetof(struct virtio_pci_common_cfg, config_generation));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SELECT !=
+                    offsetof(struct virtio_pci_common_cfg, queue_select));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_SIZE !=
+                    offsetof(struct virtio_pci_common_cfg, queue_size));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_MSIX !=
+                    offsetof(struct virtio_pci_common_cfg, queue_msix_vector));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_ENABLE !=
+                    offsetof(struct virtio_pci_common_cfg, queue_enable));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_NOFF !=
+                    offsetof(struct virtio_pci_common_cfg, queue_notify_off));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCLO !=
+                    offsetof(struct virtio_pci_common_cfg, queue_desc_lo));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_DESCHI !=
+                    offsetof(struct virtio_pci_common_cfg, queue_desc_hi));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILLO !=
+                    offsetof(struct virtio_pci_common_cfg, queue_avail_lo));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_AVAILHI !=
+                    offsetof(struct virtio_pci_common_cfg, queue_avail_hi));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDLO !=
+                    offsetof(struct virtio_pci_common_cfg, queue_used_lo));
+       BUILD_BUG_ON(VIRTIO_PCI_COMMON_Q_USEDHI !=
+                    offsetof(struct virtio_pci_common_cfg, queue_used_hi));
+}
+
+/*
+ * vp_modern_probe: probe the modern virtio pci device, note that the
+ * caller is required to enable PCI device before calling this function.
+ * @mdev: the modern virtio-pci device
+ *
+ * Return 0 on succeed otherwise fail
+ */
+int vp_modern_probe(struct virtio_pci_modern_device *mdev)
+{
+       struct pci_dev *pci_dev = mdev->pci_dev;
+       int err, common, isr, notify, device;
+       u32 notify_length;
+       u32 notify_offset;
+
+       check_offsets();
+
+       mdev->pci_dev = pci_dev;
+
+       /* We only own devices >= 0x1000 and <= 0x107f: leave the rest. */
+       if (pci_dev->device < 0x1000 || pci_dev->device > 0x107f)
+               return -ENODEV;
+
+       if (pci_dev->device < 0x1040) {
+               /* Transitional devices: use the PCI subsystem device id as
+                * virtio device id, same as legacy driver always did.
+                */
+               mdev->id.device = pci_dev->subsystem_device;
+       } else {
+               /* Modern devices: simply use PCI device id, but start from 0x1040. */
+               mdev->id.device = pci_dev->device - 0x1040;
+       }
+       mdev->id.vendor = pci_dev->subsystem_vendor;
+
+       /* check for a common config: if not, use legacy mode (bar 0). */
+       common = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_COMMON_CFG,
+                                           IORESOURCE_IO | IORESOURCE_MEM,
+                                           &mdev->modern_bars);
+       if (!common) {
+               dev_info(&pci_dev->dev,
+                        "virtio_pci: leaving for legacy driver\n");
+               return -ENODEV;
+       }
+
+       /* If common is there, these should be too... */
+       isr = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_ISR_CFG,
+                                        IORESOURCE_IO | IORESOURCE_MEM,
+                                        &mdev->modern_bars);
+       notify = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_NOTIFY_CFG,
+                                           IORESOURCE_IO | IORESOURCE_MEM,
+                                           &mdev->modern_bars);
+       if (!isr || !notify) {
+               dev_err(&pci_dev->dev,
+                       "virtio_pci: missing capabilities %i/%i/%i\n",
+                       common, isr, notify);
+               return -EINVAL;
+       }
+
+       err = dma_set_mask_and_coherent(&pci_dev->dev, DMA_BIT_MASK(64));
+       if (err)
+               err = dma_set_mask_and_coherent(&pci_dev->dev,
+                                               DMA_BIT_MASK(32));
+       if (err)
+               dev_warn(&pci_dev->dev, "Failed to enable 64-bit or 32-bit DMA.  Trying to continue, but this might not work.\n");
+
+       /* Device capability is only mandatory for devices that have
+        * device-specific configuration.
+        */
+       device = virtio_pci_find_capability(pci_dev, VIRTIO_PCI_CAP_DEVICE_CFG,
+                                           IORESOURCE_IO | IORESOURCE_MEM,
+                                           &mdev->modern_bars);
+
+       err = pci_request_selected_regions(pci_dev, mdev->modern_bars,
+                                          "virtio-pci-modern");
+       if (err)
+               return err;
+
+       err = -EINVAL;
+       mdev->common = vp_modern_map_capability(mdev, common,
+                                     sizeof(struct virtio_pci_common_cfg), 4,
+                                     0, sizeof(struct virtio_pci_common_cfg),
+                                     NULL);
+       if (!mdev->common)
+               goto err_map_common;
+       mdev->isr = vp_modern_map_capability(mdev, isr, sizeof(u8), 1,
+                                            0, 1,
+                                            NULL);
+       if (!mdev->isr)
+               goto err_map_isr;
+
+       /* Read notify_off_multiplier from config space. */
+       pci_read_config_dword(pci_dev,
+                             notify + offsetof(struct virtio_pci_notify_cap,
+                                               notify_off_multiplier),
+                             &mdev->notify_offset_multiplier);
+       /* Read notify length and offset from config space. */
+       pci_read_config_dword(pci_dev,
+                             notify + offsetof(struct virtio_pci_notify_cap,
+                                               cap.length),
+                             &notify_length);
+
+       pci_read_config_dword(pci_dev,
+                             notify + offsetof(struct virtio_pci_notify_cap,
+                                               cap.offset),
+                             &notify_offset);
+
+       /* We don't know how many VQs we'll map, ahead of the time.
+        * If notify length is small, map it all now.
+        * Otherwise, map each VQ individually later.
+        */
+       if ((u64)notify_length + (notify_offset % PAGE_SIZE) <= PAGE_SIZE) {
+               mdev->notify_base = vp_modern_map_capability(mdev, notify,
+                                                            2, 2,
+                                                            0, notify_length,
+                                                            &mdev->notify_len);
+               if (!mdev->notify_base)
+                       goto err_map_notify;
+       } else {
+               mdev->notify_map_cap = notify;
+       }
+
+       /* Again, we don't know how much we should map, but PAGE_SIZE
+        * is more than enough for all existing devices.
+        */
+       if (device) {
+               mdev->device = vp_modern_map_capability(mdev, device, 0, 4,
+                                                       0, PAGE_SIZE,
+                                                       &mdev->device_len);
+               if (!mdev->device)
+                       goto err_map_device;
+       }
+
+       return 0;
+
+err_map_device:
+       if (mdev->notify_base)
+               pci_iounmap(pci_dev, mdev->notify_base);
+err_map_notify:
+       pci_iounmap(pci_dev, mdev->isr);
+err_map_isr:
+       pci_iounmap(pci_dev, mdev->common);
+err_map_common:
+       return err;
+}
+EXPORT_SYMBOL_GPL(vp_modern_probe);
+
+/*
+ * vp_modern_probe: remove and cleanup the modern virtio pci device
+ * @mdev: the modern virtio-pci device
+ */
+void vp_modern_remove(struct virtio_pci_modern_device *mdev)
+{
+       struct pci_dev *pci_dev = mdev->pci_dev;
+
+       if (mdev->device)
+               pci_iounmap(pci_dev, mdev->device);
+       if (mdev->notify_base)
+               pci_iounmap(pci_dev, mdev->notify_base);
+       pci_iounmap(pci_dev, mdev->isr);
+       pci_iounmap(pci_dev, mdev->common);
+       pci_release_selected_regions(pci_dev, mdev->modern_bars);
+}
+EXPORT_SYMBOL_GPL(vp_modern_remove);
+
+/*
+ * vp_modern_get_features - get features from device
+ * @mdev: the modern virtio-pci device
+ *
+ * Returns the features read from the device
+ */
+u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       u64 features;
+
+       vp_iowrite32(0, &cfg->device_feature_select);
+       features = vp_ioread32(&cfg->device_feature);
+       vp_iowrite32(1, &cfg->device_feature_select);
+       features |= ((u64)vp_ioread32(&cfg->device_feature) << 32);
+
+       return features;
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_features);
+
+/*
+ * vp_modern_set_features - set features to device
+ * @mdev: the modern virtio-pci device
+ * @features: the features set to device
+ */
+void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
+                           u64 features)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       vp_iowrite32(0, &cfg->guest_feature_select);
+       vp_iowrite32((u32)features, &cfg->guest_feature);
+       vp_iowrite32(1, &cfg->guest_feature_select);
+       vp_iowrite32(features >> 32, &cfg->guest_feature);
+}
+EXPORT_SYMBOL_GPL(vp_modern_set_features);
+
+/*
+ * vp_modern_generation - get the device genreation
+ * @mdev: the modern virtio-pci device
+ *
+ * Returns the genreation read from device
+ */
+u32 vp_modern_generation(struct virtio_pci_modern_device *mdev)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       return vp_ioread8(&cfg->config_generation);
+}
+EXPORT_SYMBOL_GPL(vp_modern_generation);
+
+/*
+ * vp_modern_get_status - get the device status
+ * @mdev: the modern virtio-pci device
+ *
+ * Returns the status read from device
+ */
+u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       return vp_ioread8(&cfg->device_status);
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_status);
+
+/*
+ * vp_modern_set_status - set status to device
+ * @mdev: the modern virtio-pci device
+ * @status: the status set to device
+ */
+void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
+                                u8 status)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       vp_iowrite8(status, &cfg->device_status);
+}
+EXPORT_SYMBOL_GPL(vp_modern_set_status);
+
+/*
+ * vp_modern_queue_vector - set the MSIX vector for a specific virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: queue index
+ * @vector: the config vector
+ *
+ * Returns the config vector read from the device
+ */
+u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev,
+                          u16 index, u16 vector)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       vp_iowrite16(index, &cfg->queue_select);
+       vp_iowrite16(vector, &cfg->queue_msix_vector);
+       /* Flush the write out to device */
+       return vp_ioread16(&cfg->queue_msix_vector);
+}
+EXPORT_SYMBOL_GPL(vp_modern_queue_vector);
+
+/*
+ * vp_modern_config_vector - set the vector for config interrupt
+ * @mdev: the modern virtio-pci device
+ * @vector: the config vector
+ *
+ * Returns the config vector read from the device
+ */
+u16 vp_modern_config_vector(struct virtio_pci_modern_device *mdev,
+                           u16 vector)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       /* Setup the vector used for configuration events */
+       vp_iowrite16(vector, &cfg->msix_config);
+       /* Verify we had enough resources to assign the vector */
+       /* Will also flush the write out to device */
+       return vp_ioread16(&cfg->msix_config);
+}
+EXPORT_SYMBOL_GPL(vp_modern_config_vector);
+
+/*
+ * vp_modern_queue_address - set the virtqueue address
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ * @desc_addr: address of the descriptor area
+ * @driver_addr: address of the driver area
+ * @device_addr: address of the device area
+ */
+void vp_modern_queue_address(struct virtio_pci_modern_device *mdev,
+                            u16 index, u64 desc_addr, u64 driver_addr,
+                            u64 device_addr)
+{
+       struct virtio_pci_common_cfg __iomem *cfg = mdev->common;
+
+       vp_iowrite16(index, &cfg->queue_select);
+
+       vp_iowrite64_twopart(desc_addr, &cfg->queue_desc_lo,
+                            &cfg->queue_desc_hi);
+       vp_iowrite64_twopart(driver_addr, &cfg->queue_avail_lo,
+                            &cfg->queue_avail_hi);
+       vp_iowrite64_twopart(device_addr, &cfg->queue_used_lo,
+                            &cfg->queue_used_hi);
+}
+EXPORT_SYMBOL_GPL(vp_modern_queue_address);
+
+/*
+ * vp_modern_set_queue_enable - enable a virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ * @enable: whether the virtqueue is enable or not
+ */
+void vp_modern_set_queue_enable(struct virtio_pci_modern_device *mdev,
+                               u16 index, bool enable)
+{
+       vp_iowrite16(index, &mdev->common->queue_select);
+       vp_iowrite16(enable, &mdev->common->queue_enable);
+}
+EXPORT_SYMBOL_GPL(vp_modern_set_queue_enable);
+
+/*
+ * vp_modern_get_queue_enable - enable a virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ *
+ * Returns whether a virtqueue is enabled or not
+ */
+bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
+                               u16 index)
+{
+       vp_iowrite16(index, &mdev->common->queue_select);
+
+       return vp_ioread16(&mdev->common->queue_enable);
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_queue_enable);
+
+/*
+ * vp_modern_set_queue_size - set size for a virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ * @size: the size of the virtqueue
+ */
+void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev,
+                             u16 index, u16 size)
+{
+       vp_iowrite16(index, &mdev->common->queue_select);
+       vp_iowrite16(size, &mdev->common->queue_size);
+
+}
+EXPORT_SYMBOL_GPL(vp_modern_set_queue_size);
+
+/*
+ * vp_modern_get_queue_size - get size for a virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ *
+ * Returns the size of the virtqueue
+ */
+u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
+                            u16 index)
+{
+       vp_iowrite16(index, &mdev->common->queue_select);
+
+       return vp_ioread16(&mdev->common->queue_size);
+
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_queue_size);
+
+/*
+ * vp_modern_get_num_queues - get the number of virtqueues
+ * @mdev: the modern virtio-pci device
+ *
+ * Returns the number of virtqueues
+ */
+u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev)
+{
+       return vp_ioread16(&mdev->common->num_queues);
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_num_queues);
+
+/*
+ * vp_modern_get_queue_notify_off - get notification offset for a virtqueue
+ * @mdev: the modern virtio-pci device
+ * @index: the queue index
+ *
+ * Returns the notification offset for a virtqueue
+ */
+u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
+                                  u16 index)
+{
+       vp_iowrite16(index, &mdev->common->queue_select);
+
+       return vp_ioread16(&mdev->common->queue_notify_off);
+}
+EXPORT_SYMBOL_GPL(vp_modern_get_queue_notify_off);
+
+MODULE_VERSION("0.1");
+MODULE_DESCRIPTION("Modern Virtio PCI Device");
+MODULE_AUTHOR("Jason Wang <jasowang@redhat.com>");
+MODULE_LICENSE("GPL");
index 4a9ddb4..e28acf4 100644 (file)
@@ -225,9 +225,8 @@ static void virtio_vdpa_del_vq(struct virtqueue *vq)
        list_del(&info->node);
        spin_unlock_irqrestore(&vd_dev->lock, flags);
 
-       /* Select and deactivate the queue */
+       /* Select and deactivate the queue (best effort) */
        ops->set_vq_ready(vdpa, index, 0);
-       WARN_ON(ops->get_vq_ready(vdpa, index));
 
        vring_del_virtqueue(vq);
 
index 54d7963..1b15afe 100644 (file)
@@ -1997,9 +1997,9 @@ static int vme_bus_remove(struct device *dev)
 
        driver = dev->platform_data;
        if (driver->remove)
-               return driver->remove(vdev);
+               driver->remove(vdev);
 
-       return -ENODEV;
+       return 0;
 }
 
 struct bus_type vme_bus_type = {
index e17c8f7..cd88215 100644 (file)
@@ -688,12 +688,22 @@ static void ds9490r_search(void *data, struct w1_master *master,
         * packet size.
         */
        const size_t bufsize = 2 * 64;
-       u64 *buf;
+       u64 *buf, *found_ids;
 
        buf = kmalloc(bufsize, GFP_KERNEL);
        if (!buf)
                return;
 
+       /*
+        * We are holding the bus mutex during the scan, but adding devices via the
+        * callback needs the bus to be unlocked. So we queue up found ids here.
+        */
+       found_ids = kmalloc_array(master->max_slave_count, sizeof(u64), GFP_KERNEL);
+       if (!found_ids) {
+               kfree(buf);
+               return;
+       }
+
        mutex_lock(&master->bus_mutex);
 
        /* address to start searching at */
@@ -729,13 +739,13 @@ static void ds9490r_search(void *data, struct w1_master *master,
                        if (err < 0)
                                break;
                        for (i = 0; i < err/8; ++i) {
-                               ++found;
-                               if (found <= search_limit)
-                                       callback(master, buf[i]);
+                               found_ids[found++] = buf[i];
                                /* can't know if there will be a discrepancy
                                 * value after until the next id */
-                               if (found == search_limit)
+                               if (found == search_limit) {
                                        master->search_id = buf[i];
+                                       break;
+                               }
                        }
                }
 
@@ -759,9 +769,14 @@ static void ds9490r_search(void *data, struct w1_master *master,
                        master->max_slave_count);
                set_bit(W1_WARN_MAX_COUNT, &master->flags);
        }
+
 search_out:
        mutex_unlock(&master->bus_mutex);
        kfree(buf);
+
+       for (i = 0; i < found; i++) /* run callback for all queued up IDs */
+               callback(master, found_ids[i]);
+       kfree(found_ids);
 }
 
 #if 0
index 3712b1e..976eea2 100644 (file)
@@ -667,28 +667,24 @@ static inline int w1_DS18B20_get_resolution(struct w1_slave *sl)
  */
 static inline int w1_DS18B20_convert_temp(u8 rom[9])
 {
-       int t;
-       u32 bv;
+       u16 bv;
+       s16 t;
+
+       /* Signed 16-bit value to unsigned, cpu order */
+       bv = le16_to_cpup((__le16 *)rom);
 
        /* Config register bit R2 = 1 - GX20MH01 in 13 or 14 bit resolution mode */
        if (rom[4] & 0x80) {
-               /* Signed 16-bit value to unsigned, cpu order */
-               bv = le16_to_cpup((__le16 *)rom);
-
                /* Insert two temperature bits from config register */
                /* Avoid arithmetic shift of signed value */
                bv = (bv << 2) | (rom[4] & 3);
-
-               t = (int) sign_extend32(bv, 17); /* Degrees, lowest bit is 2^-6 */
-               return (t*1000)/64;  /* Millidegrees */
+               t = (s16) bv;   /* Degrees, lowest bit is 2^-6 */
+               return (int)t * 1000 / 64;      /* Sign-extend to int; millidegrees */
        }
-
-       t = (int)le16_to_cpup((__le16 *)rom);
-       return t*1000/16;
+       t = (s16)bv;    /* Degrees, lowest bit is 2^-4 */
+       return (int)t * 1000 / 16;      /* Sign-extend to int; millidegrees */
 }
 
-
-
 /**
  * w1_DS18S20_convert_temp() - temperature computation for DS18S20
  * @rom: data read from device RAM (8 data bytes + 1 CRC byte)
index 15a2ee3..f2ae2e5 100644 (file)
@@ -25,6 +25,8 @@
 #include "w1_netlink.h"
 
 #define W1_FAMILY_DEFAULT      0
+#define W1_FAMILY_DS28E04       0x1C /* for crc quirk */
+
 
 static int w1_timeout = 10;
 module_param_named(timeout, w1_timeout, int, 0);
@@ -913,11 +915,44 @@ void w1_reconnect_slaves(struct w1_family *f, int attach)
        mutex_unlock(&w1_mlock);
 }
 
+static int w1_addr_crc_is_valid(struct w1_master *dev, u64 rn)
+{
+       u64 rn_le = cpu_to_le64(rn);
+       struct w1_reg_num *tmp = (struct w1_reg_num *)&rn;
+       u8 crc;
+
+       crc = w1_calc_crc8((u8 *)&rn_le, 7);
+
+       /* quirk:
+        *   DS28E04 (1w eeprom) has strapping pins to change
+        *   address, but will not update the crc. So normal rules
+        *   for consistent w1 addresses are violated. We test
+        *   with the 7 LSBs of the address forced high.
+        *
+        *   (char*)&rn_le = { family, addr_lsb, ..., addr_msb, crc }.
+        */
+       if (crc != tmp->crc && tmp->family == W1_FAMILY_DS28E04) {
+               u64 corr_le = rn_le;
+
+               ((u8 *)&corr_le)[1] |= 0x7f;
+               crc = w1_calc_crc8((u8 *)&corr_le, 7);
+
+               dev_info(&dev->dev, "DS28E04 crc workaround on %02x.%012llx.%02x\n",
+                       tmp->family, (unsigned long long)tmp->id, tmp->crc);
+       }
+
+       if (crc != tmp->crc) {
+               dev_dbg(&dev->dev, "w1 addr crc mismatch: %02x.%012llx.%02x != 0x%02x.\n",
+                       tmp->family, (unsigned long long)tmp->id, tmp->crc, crc);
+               return 0;
+       }
+       return 1;
+}
+
 void w1_slave_found(struct w1_master *dev, u64 rn)
 {
        struct w1_slave *sl;
        struct w1_reg_num *tmp;
-       u64 rn_le = cpu_to_le64(rn);
 
        atomic_inc(&dev->refcnt);
 
@@ -927,7 +962,7 @@ void w1_slave_found(struct w1_master *dev, u64 rn)
        if (sl) {
                set_bit(W1_SLAVE_ACTIVE, &sl->flags);
        } else {
-               if (rn && tmp->crc == w1_calc_crc8((u8 *)&rn_le, 7))
+               if (rn && w1_addr_crc_is_valid(dev, rn))
                        w1_attach_slave_device(dev, tmp);
        }
 
index 9867a3a..688b112 100644 (file)
@@ -273,7 +273,6 @@ module_exit(cpu5wdt_exit_module);
 
 MODULE_AUTHOR("Heiko Ronsdorf <hero@ihg.uni-duisburg.de>");
 MODULE_DESCRIPTION("sma cpu5 watchdog driver");
-MODULE_SUPPORTED_DEVICE("sma cpu5 watchdog");
 MODULE_LICENSE("GPL");
 
 module_param_hw(port, int, ioport, 0);
index 808eeb4..1eafe0b 100644 (file)
@@ -172,7 +172,6 @@ MODULE_PARM_DESC(wd2_timeout, "Default watchdog2 timeout in 1/10secs");
 MODULE_AUTHOR("Eric Brower <ebrower@usa.net>");
 MODULE_DESCRIPTION("Hardware watchdog driver for Sun Microsystems CP1400/1500");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("watchdog");
 
 static void cpwd_writew(u16 val, void __iomem *addr)
 {
index c5967d8..e023d7d 100644 (file)
@@ -620,7 +620,7 @@ err_out:
        return ret;
 }
 
-static int mei_wdt_remove(struct mei_cl_device *cldev)
+static void mei_wdt_remove(struct mei_cl_device *cldev)
 {
        struct mei_wdt *wdt = mei_cldev_get_drvdata(cldev);
 
@@ -637,8 +637,6 @@ static int mei_wdt_remove(struct mei_cl_device *cldev)
        dbgfs_unregister(wdt);
 
        kfree(wdt);
-
-       return 0;
 }
 
 #define MEI_UUID_WD UUID_LE(0x05B79A6F, 0x4628, 0x4D7F, \
index 7008596..747e346 100644 (file)
@@ -46,7 +46,6 @@
 
 MODULE_AUTHOR("David S. Miller <davem@davemloft.net>");
 MODULE_DESCRIPTION("Hardware watchdog driver for Sun RIO");
-MODULE_SUPPORTED_DEVICE("watchdog");
 MODULE_LICENSE("GPL");
 
 #define DRIVER_NAME    "riowd"
index 41645fe..ea0efd2 100644 (file)
@@ -50,11 +50,11 @@ config XEN_BALLOON_MEMORY_HOTPLUG
 
          SUBSYSTEM=="memory", ACTION=="add", RUN+="/bin/sh -c '[ -f /sys$devpath/state ] && echo online > /sys$devpath/state'"
 
-config XEN_BALLOON_MEMORY_HOTPLUG_LIMIT
+config XEN_MEMORY_HOTPLUG_LIMIT
        int "Hotplugged memory limit (in GiB) for a PV guest"
        default 512
        depends on XEN_HAVE_PVMMU
-       depends on XEN_BALLOON_MEMORY_HOTPLUG
+       depends on MEMORY_HOTPLUG
        help
          Maxmium amount of memory (in GiB) that a PV guest can be
          expanded to when using memory hotplug.
index b57b206..671c712 100644 (file)
@@ -331,7 +331,7 @@ static enum bp_state reserve_additional_memory(void)
        mutex_unlock(&balloon_mutex);
        /* add_memory_resource() requires the device_hotplug lock */
        lock_device_hotplug();
-       rc = add_memory_resource(nid, resource, MEMHP_MERGE_RESOURCE);
+       rc = add_memory_resource(nid, resource, MHP_MERGE_RESOURCE);
        unlock_device_hotplug();
        mutex_lock(&balloon_mutex);
 
index da87f3a..b8f2f97 100644 (file)
@@ -47,6 +47,11 @@ static unsigned evtchn_2l_max_channels(void)
        return EVTCHN_2L_NR_CHANNELS;
 }
 
+static void evtchn_2l_remove(evtchn_port_t evtchn, unsigned int cpu)
+{
+       clear_bit(evtchn, BM(per_cpu(cpu_evtchn_mask, cpu)));
+}
+
 static void evtchn_2l_bind_to_cpu(evtchn_port_t evtchn, unsigned int cpu,
                                  unsigned int old_cpu)
 {
@@ -72,12 +77,6 @@ static bool evtchn_2l_is_pending(evtchn_port_t port)
        return sync_test_bit(port, BM(&s->evtchn_pending[0]));
 }
 
-static bool evtchn_2l_test_and_set_mask(evtchn_port_t port)
-{
-       struct shared_info *s = HYPERVISOR_shared_info;
-       return sync_test_and_set_bit(port, BM(&s->evtchn_mask[0]));
-}
-
 static void evtchn_2l_mask(evtchn_port_t port)
 {
        struct shared_info *s = HYPERVISOR_shared_info;
@@ -355,18 +354,27 @@ static void evtchn_2l_resume(void)
                                EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
 }
 
+static int evtchn_2l_percpu_deinit(unsigned int cpu)
+{
+       memset(per_cpu(cpu_evtchn_mask, cpu), 0, sizeof(xen_ulong_t) *
+                       EVTCHN_2L_NR_CHANNELS/BITS_PER_EVTCHN_WORD);
+
+       return 0;
+}
+
 static const struct evtchn_ops evtchn_ops_2l = {
        .max_channels      = evtchn_2l_max_channels,
        .nr_channels       = evtchn_2l_max_channels,
+       .remove            = evtchn_2l_remove,
        .bind_to_cpu       = evtchn_2l_bind_to_cpu,
        .clear_pending     = evtchn_2l_clear_pending,
        .set_pending       = evtchn_2l_set_pending,
        .is_pending        = evtchn_2l_is_pending,
-       .test_and_set_mask = evtchn_2l_test_and_set_mask,
        .mask              = evtchn_2l_mask,
        .unmask            = evtchn_2l_unmask,
        .handle_events     = evtchn_2l_handle_events,
        .resume            = evtchn_2l_resume,
+       .percpu_deinit     = evtchn_2l_percpu_deinit,
 };
 
 void __init xen_evtchn_2l_init(void)
index b249f2d..7bbfd58 100644 (file)
@@ -98,13 +98,19 @@ struct irq_info {
        short refcnt;
        u8 spurious_cnt;
        u8 is_accounted;
-       enum xen_irq_type type; /* type */
+       short type;             /* type: IRQT_* */
+       u8 mask_reason;         /* Why is event channel masked */
+#define EVT_MASK_REASON_EXPLICIT       0x01
+#define EVT_MASK_REASON_TEMPORARY      0x02
+#define EVT_MASK_REASON_EOI_PENDING    0x04
+       u8 is_active;           /* Is event just being handled? */
        unsigned irq;
        evtchn_port_t evtchn;   /* event channel */
        unsigned short cpu;     /* cpu bound */
        unsigned short eoi_cpu; /* EOI must happen on this cpu-1 */
        unsigned int irq_epoch; /* If eoi_cpu valid: irq_epoch of event */
        u64 eoi_time;           /* Time in jiffies when to EOI. */
+       raw_spinlock_t lock;
 
        union {
                unsigned short virq;
@@ -154,6 +160,7 @@ static DEFINE_RWLOCK(evtchn_rwlock);
  *   evtchn_rwlock
  *     IRQ-desc lock
  *       percpu eoi_list_lock
+ *         irq_info->lock
  */
 
 static LIST_HEAD(xen_irq_list_head);
@@ -304,6 +311,8 @@ static int xen_irq_info_common_setup(struct irq_info *info,
        info->irq = irq;
        info->evtchn = evtchn;
        info->cpu = cpu;
+       info->mask_reason = EVT_MASK_REASON_EXPLICIT;
+       raw_spin_lock_init(&info->lock);
 
        ret = set_evtchn_to_irq(evtchn, irq);
        if (ret < 0)
@@ -323,6 +332,8 @@ static int xen_irq_info_evtchn_setup(unsigned irq,
 
        ret = xen_irq_info_common_setup(info, irq, IRQT_EVTCHN, evtchn, 0);
        info->u.interdomain = dev;
+       if (dev)
+               atomic_inc(&dev->event_channels);
 
        return ret;
 }
@@ -375,6 +386,7 @@ static int xen_irq_info_pirq_setup(unsigned irq,
 static void xen_irq_info_cleanup(struct irq_info *info)
 {
        set_evtchn_to_irq(info->evtchn, -1);
+       xen_evtchn_port_remove(info->evtchn, info->cpu);
        info->evtchn = 0;
        channels_on_cpu_dec(info);
 }
@@ -456,6 +468,34 @@ unsigned int cpu_from_evtchn(evtchn_port_t evtchn)
        return ret;
 }
 
+static void do_mask(struct irq_info *info, u8 reason)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&info->lock, flags);
+
+       if (!info->mask_reason)
+               mask_evtchn(info->evtchn);
+
+       info->mask_reason |= reason;
+
+       raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
+static void do_unmask(struct irq_info *info, u8 reason)
+{
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&info->lock, flags);
+
+       info->mask_reason &= ~reason;
+
+       if (!info->mask_reason)
+               unmask_evtchn(info->evtchn);
+
+       raw_spin_unlock_irqrestore(&info->lock, flags);
+}
+
 #ifdef CONFIG_X86
 static bool pirq_check_eoi_map(unsigned irq)
 {
@@ -568,18 +608,28 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
                return;
 
        if (spurious) {
+               struct xenbus_device *dev = info->u.interdomain;
+               unsigned int threshold = 1;
+
+               if (dev && dev->spurious_threshold)
+                       threshold = dev->spurious_threshold;
+
                if ((1 << info->spurious_cnt) < (HZ << 2)) {
                        if (info->spurious_cnt != 0xFF)
                                info->spurious_cnt++;
                }
-               if (info->spurious_cnt > 1) {
-                       delay = 1 << (info->spurious_cnt - 2);
+               if (info->spurious_cnt > threshold) {
+                       delay = 1 << (info->spurious_cnt - 1 - threshold);
                        if (delay > HZ)
                                delay = HZ;
                        if (!info->eoi_time)
                                info->eoi_cpu = smp_processor_id();
                        info->eoi_time = get_jiffies_64() + delay;
+                       if (dev)
+                               atomic_add(delay, &dev->jiffies_eoi_delayed);
                }
+               if (dev)
+                       atomic_inc(&dev->spurious_events);
        } else {
                info->spurious_cnt = 0;
        }
@@ -592,7 +642,7 @@ static void xen_irq_lateeoi_locked(struct irq_info *info, bool spurious)
        }
 
        info->eoi_time = 0;
-       unmask_evtchn(evtchn);
+       do_unmask(info, EVT_MASK_REASON_EOI_PENDING);
 }
 
 static void xen_irq_lateeoi_worker(struct work_struct *work)
@@ -761,6 +811,12 @@ static void xen_evtchn_close(evtchn_port_t port)
                BUG();
 }
 
+static void event_handler_exit(struct irq_info *info)
+{
+       smp_store_release(&info->is_active, 0);
+       clear_evtchn(info->evtchn);
+}
+
 static void pirq_query_unmask(int irq)
 {
        struct physdev_irq_status_query irq_status;
@@ -779,14 +835,15 @@ static void pirq_query_unmask(int irq)
 
 static void eoi_pirq(struct irq_data *data)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
        struct physdev_eoi eoi = { .irq = pirq_from_irq(data->irq) };
        int rc = 0;
 
        if (!VALID_EVTCHN(evtchn))
                return;
 
-       clear_evtchn(evtchn);
+       event_handler_exit(info);
 
        if (pirq_needs_eoi(data->irq)) {
                rc = HYPERVISOR_physdev_op(PHYSDEVOP_eoi, &eoi);
@@ -837,7 +894,8 @@ static unsigned int __startup_pirq(unsigned int irq)
                goto err;
 
 out:
-       unmask_evtchn(evtchn);
+       do_unmask(info, EVT_MASK_REASON_EXPLICIT);
+
        eoi_pirq(irq_get_irq_data(irq));
 
        return 0;
@@ -864,7 +922,7 @@ static void shutdown_pirq(struct irq_data *data)
        if (!VALID_EVTCHN(evtchn))
                return;
 
-       mask_evtchn(evtchn);
+       do_mask(info, EVT_MASK_REASON_EXPLICIT);
        xen_evtchn_close(evtchn);
        xen_irq_info_cleanup(info);
 }
@@ -908,6 +966,7 @@ static void __unbind_from_irq(unsigned int irq)
 
        if (VALID_EVTCHN(evtchn)) {
                unsigned int cpu = cpu_from_irq(irq);
+               struct xenbus_device *dev;
 
                xen_evtchn_close(evtchn);
 
@@ -918,6 +977,11 @@ static void __unbind_from_irq(unsigned int irq)
                case IRQT_IPI:
                        per_cpu(ipi_to_irq, cpu)[ipi_from_irq(irq)] = -1;
                        break;
+               case IRQT_EVTCHN:
+                       dev = info->u.interdomain;
+                       if (dev)
+                               atomic_dec(&dev->event_channels);
+                       break;
                default:
                        break;
                }
@@ -1581,6 +1645,7 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
 {
        int irq;
        struct irq_info *info;
+       struct xenbus_device *dev;
 
        irq = get_evtchn_to_irq(port);
        if (irq == -1)
@@ -1609,6 +1674,12 @@ void handle_irq_for_port(evtchn_port_t port, struct evtchn_loop_ctrl *ctrl)
        }
 
        info = info_for_irq(irq);
+       if (xchg_acquire(&info->is_active, 1))
+               return;
+
+       dev = (info->type == IRQT_EVTCHN) ? info->u.interdomain : NULL;
+       if (dev)
+               atomic_inc(&dev->events);
 
        if (ctrl->defer_eoi) {
                info->eoi_cpu = smp_processor_id();
@@ -1697,10 +1768,10 @@ void rebind_evtchn_irq(evtchn_port_t evtchn, int irq)
 }
 
 /* Rebind an evtchn so that it gets delivered to a specific cpu */
-static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
+static int xen_rebind_evtchn_to_cpu(struct irq_info *info, unsigned int tcpu)
 {
        struct evtchn_bind_vcpu bind_vcpu;
-       int masked;
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
        if (!VALID_EVTCHN(evtchn))
                return -1;
@@ -1716,7 +1787,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
         * Mask the event while changing the VCPU binding to prevent
         * it being delivered on an unexpected VCPU.
         */
-       masked = test_and_set_mask(evtchn);
+       do_mask(info, EVT_MASK_REASON_TEMPORARY);
 
        /*
         * If this fails, it usually just indicates that we're dealing with a
@@ -1726,8 +1797,7 @@ static int xen_rebind_evtchn_to_cpu(evtchn_port_t evtchn, unsigned int tcpu)
        if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_vcpu, &bind_vcpu) >= 0)
                bind_evtchn_to_cpu(evtchn, tcpu, false);
 
-       if (!masked)
-               unmask_evtchn(evtchn);
+       do_unmask(info, EVT_MASK_REASON_TEMPORARY);
 
        return 0;
 }
@@ -1766,7 +1836,7 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
        unsigned int tcpu = select_target_cpu(dest);
        int ret;
 
-       ret = xen_rebind_evtchn_to_cpu(evtchn_from_irq(data->irq), tcpu);
+       ret = xen_rebind_evtchn_to_cpu(info_for_irq(data->irq), tcpu);
        if (!ret)
                irq_data_update_effective_affinity(data, cpumask_of(tcpu));
 
@@ -1775,28 +1845,29 @@ static int set_affinity_irq(struct irq_data *data, const struct cpumask *dest,
 
 static void enable_dynirq(struct irq_data *data)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
        if (VALID_EVTCHN(evtchn))
-               unmask_evtchn(evtchn);
+               do_unmask(info, EVT_MASK_REASON_EXPLICIT);
 }
 
 static void disable_dynirq(struct irq_data *data)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(data->irq);
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
        if (VALID_EVTCHN(evtchn))
-               mask_evtchn(evtchn);
+               do_mask(info, EVT_MASK_REASON_EXPLICIT);
 }
 
 static void ack_dynirq(struct irq_data *data)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(data->irq);
-
-       if (!VALID_EVTCHN(evtchn))
-               return;
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
-       clear_evtchn(evtchn);
+       if (VALID_EVTCHN(evtchn))
+               event_handler_exit(info);
 }
 
 static void mask_ack_dynirq(struct irq_data *data)
@@ -1805,18 +1876,39 @@ static void mask_ack_dynirq(struct irq_data *data)
        ack_dynirq(data);
 }
 
+static void lateeoi_ack_dynirq(struct irq_data *data)
+{
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
+
+       if (VALID_EVTCHN(evtchn)) {
+               do_mask(info, EVT_MASK_REASON_EOI_PENDING);
+               event_handler_exit(info);
+       }
+}
+
+static void lateeoi_mask_ack_dynirq(struct irq_data *data)
+{
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
+
+       if (VALID_EVTCHN(evtchn)) {
+               do_mask(info, EVT_MASK_REASON_EXPLICIT);
+               event_handler_exit(info);
+       }
+}
+
 static int retrigger_dynirq(struct irq_data *data)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(data->irq);
-       int masked;
+       struct irq_info *info = info_for_irq(data->irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
        if (!VALID_EVTCHN(evtchn))
                return 0;
 
-       masked = test_and_set_mask(evtchn);
+       do_mask(info, EVT_MASK_REASON_TEMPORARY);
        set_evtchn(evtchn);
-       if (!masked)
-               unmask_evtchn(evtchn);
+       do_unmask(info, EVT_MASK_REASON_TEMPORARY);
 
        return 1;
 }
@@ -1915,10 +2007,11 @@ static void restore_cpu_ipis(unsigned int cpu)
 /* Clear an irq's pending state, in preparation for polling on it */
 void xen_clear_irq_pending(int irq)
 {
-       evtchn_port_t evtchn = evtchn_from_irq(irq);
+       struct irq_info *info = info_for_irq(irq);
+       evtchn_port_t evtchn = info ? info->evtchn : 0;
 
        if (VALID_EVTCHN(evtchn))
-               clear_evtchn(evtchn);
+               event_handler_exit(info);
 }
 EXPORT_SYMBOL(xen_clear_irq_pending);
 void xen_set_irq_pending(int irq)
@@ -2030,8 +2123,8 @@ static struct irq_chip xen_lateeoi_chip __read_mostly = {
        .irq_mask               = disable_dynirq,
        .irq_unmask             = enable_dynirq,
 
-       .irq_ack                = mask_ack_dynirq,
-       .irq_mask_ack           = mask_ack_dynirq,
+       .irq_ack                = lateeoi_ack_dynirq,
+       .irq_mask_ack           = lateeoi_mask_ack_dynirq,
 
        .irq_set_affinity       = set_affinity_irq,
        .irq_retrigger          = retrigger_dynirq,
index b234f17..ad9fe51 100644 (file)
@@ -209,12 +209,6 @@ static bool evtchn_fifo_is_pending(evtchn_port_t port)
        return sync_test_bit(EVTCHN_FIFO_BIT(PENDING, word), BM(word));
 }
 
-static bool evtchn_fifo_test_and_set_mask(evtchn_port_t port)
-{
-       event_word_t *word = event_word_from_port(port);
-       return sync_test_and_set_bit(EVTCHN_FIFO_BIT(MASKED, word), BM(word));
-}
-
 static void evtchn_fifo_mask(evtchn_port_t port)
 {
        event_word_t *word = event_word_from_port(port);
@@ -423,7 +417,6 @@ static const struct evtchn_ops evtchn_ops_fifo = {
        .clear_pending     = evtchn_fifo_clear_pending,
        .set_pending       = evtchn_fifo_set_pending,
        .is_pending        = evtchn_fifo_is_pending,
-       .test_and_set_mask = evtchn_fifo_test_and_set_mask,
        .mask              = evtchn_fifo_mask,
        .unmask            = evtchn_fifo_unmask,
        .handle_events     = evtchn_fifo_handle_events,
index 0a97c05..4d3398e 100644 (file)
@@ -14,13 +14,13 @@ struct evtchn_ops {
        unsigned (*nr_channels)(void);
 
        int (*setup)(evtchn_port_t port);
+       void (*remove)(evtchn_port_t port, unsigned int cpu);
        void (*bind_to_cpu)(evtchn_port_t evtchn, unsigned int cpu,
                            unsigned int old_cpu);
 
        void (*clear_pending)(evtchn_port_t port);
        void (*set_pending)(evtchn_port_t port);
        bool (*is_pending)(evtchn_port_t port);
-       bool (*test_and_set_mask)(evtchn_port_t port);
        void (*mask)(evtchn_port_t port);
        void (*unmask)(evtchn_port_t port);
 
@@ -54,6 +54,13 @@ static inline int xen_evtchn_port_setup(evtchn_port_t evtchn)
        return 0;
 }
 
+static inline void xen_evtchn_port_remove(evtchn_port_t evtchn,
+                                         unsigned int cpu)
+{
+       if (evtchn_ops->remove)
+               evtchn_ops->remove(evtchn, cpu);
+}
+
 static inline void xen_evtchn_port_bind_to_cpu(evtchn_port_t evtchn,
                                               unsigned int cpu,
                                               unsigned int old_cpu)
@@ -76,11 +83,6 @@ static inline bool test_evtchn(evtchn_port_t port)
        return evtchn_ops->is_pending(port);
 }
 
-static inline bool test_and_set_mask(evtchn_port_t port)
-{
-       return evtchn_ops->test_and_set_mask(port);
-}
-
 static inline void mask_evtchn(evtchn_port_t port)
 {
        return evtchn_ops->mask(port);
index a7a8571..c99415a 100644 (file)
@@ -162,6 +162,7 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
 {
        struct user_evtchn *evtchn = data;
        struct per_user_data *u = evtchn->user;
+       unsigned int prod, cons;
 
        WARN(!evtchn->enabled,
             "Interrupt for port %u, but apparently not enabled; per-user %p\n",
@@ -171,10 +172,14 @@ static irqreturn_t evtchn_interrupt(int irq, void *data)
 
        spin_lock(&u->ring_prod_lock);
 
-       if ((u->ring_prod - u->ring_cons) < u->ring_size) {
-               *evtchn_ring_entry(u, u->ring_prod) = evtchn->port;
-               wmb(); /* Ensure ring contents visible */
-               if (u->ring_cons == u->ring_prod++) {
+       prod = READ_ONCE(u->ring_prod);
+       cons = READ_ONCE(u->ring_cons);
+
+       if ((prod - cons) < u->ring_size) {
+               *evtchn_ring_entry(u, prod) = evtchn->port;
+               smp_wmb(); /* Ensure ring contents visible */
+               WRITE_ONCE(u->ring_prod, prod + 1);
+               if (cons == prod) {
                        wake_up_interruptible(&u->evtchn_wait);
                        kill_fasync(&u->evtchn_async_queue,
                                    SIGIO, POLL_IN);
@@ -210,8 +215,8 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
                if (u->ring_overflow)
                        goto unlock_out;
 
-               c = u->ring_cons;
-               p = u->ring_prod;
+               c = READ_ONCE(u->ring_cons);
+               p = READ_ONCE(u->ring_prod);
                if (c != p)
                        break;
 
@@ -221,7 +226,7 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
                        return -EAGAIN;
 
                rc = wait_event_interruptible(u->evtchn_wait,
-                                             u->ring_cons != u->ring_prod);
+                       READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod));
                if (rc)
                        return rc;
        }
@@ -245,13 +250,13 @@ static ssize_t evtchn_read(struct file *file, char __user *buf,
        }
 
        rc = -EFAULT;
-       rmb(); /* Ensure that we see the port before we copy it. */
+       smp_rmb(); /* Ensure that we see the port before we copy it. */
        if (copy_to_user(buf, evtchn_ring_entry(u, c), bytes1) ||
            ((bytes2 != 0) &&
             copy_to_user(&buf[bytes1], &u->ring[0], bytes2)))
                goto unlock_out;
 
-       u->ring_cons += (bytes1 + bytes2) / sizeof(evtchn_port_t);
+       WRITE_ONCE(u->ring_cons, c + (bytes1 + bytes2) / sizeof(evtchn_port_t));
        rc = bytes1 + bytes2;
 
  unlock_out:
@@ -552,7 +557,9 @@ static long evtchn_ioctl(struct file *file,
                /* Initialise the ring to empty. Clear errors. */
                mutex_lock(&u->ring_cons_mutex);
                spin_lock_irq(&u->ring_prod_lock);
-               u->ring_cons = u->ring_prod = u->ring_overflow = 0;
+               WRITE_ONCE(u->ring_cons, 0);
+               WRITE_ONCE(u->ring_prod, 0);
+               u->ring_overflow = 0;
                spin_unlock_irq(&u->ring_prod_lock);
                mutex_unlock(&u->ring_cons_mutex);
                rc = 0;
@@ -595,7 +602,7 @@ static __poll_t evtchn_poll(struct file *file, poll_table *wait)
        struct per_user_data *u = file->private_data;
 
        poll_wait(file, &u->evtchn_wait, wait);
-       if (u->ring_cons != u->ring_prod)
+       if (READ_ONCE(u->ring_cons) != READ_ONCE(u->ring_prod))
                mask |= EPOLLIN | EPOLLRDNORM;
        if (u->ring_overflow)
                mask = EPOLLERR;
index 5447c51..f01d58c 100644 (file)
@@ -133,20 +133,26 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
        if (NULL == add)
                return NULL;
 
-       add->grants    = kvcalloc(count, sizeof(add->grants[0]), GFP_KERNEL);
-       add->map_ops   = kvcalloc(count, sizeof(add->map_ops[0]), GFP_KERNEL);
-       add->unmap_ops = kvcalloc(count, sizeof(add->unmap_ops[0]), GFP_KERNEL);
-       add->kmap_ops  = kvcalloc(count, sizeof(add->kmap_ops[0]), GFP_KERNEL);
-       add->kunmap_ops = kvcalloc(count,
-                                  sizeof(add->kunmap_ops[0]), GFP_KERNEL);
+       add->grants    = kvmalloc_array(count, sizeof(add->grants[0]),
+                                       GFP_KERNEL);
+       add->map_ops   = kvmalloc_array(count, sizeof(add->map_ops[0]),
+                                       GFP_KERNEL);
+       add->unmap_ops = kvmalloc_array(count, sizeof(add->unmap_ops[0]),
+                                       GFP_KERNEL);
        add->pages     = kvcalloc(count, sizeof(add->pages[0]), GFP_KERNEL);
        if (NULL == add->grants    ||
            NULL == add->map_ops   ||
            NULL == add->unmap_ops ||
-           NULL == add->kmap_ops  ||
-           NULL == add->kunmap_ops ||
            NULL == add->pages)
                goto err;
+       if (use_ptemod) {
+               add->kmap_ops   = kvmalloc_array(count, sizeof(add->kmap_ops[0]),
+                                                GFP_KERNEL);
+               add->kunmap_ops = kvmalloc_array(count, sizeof(add->kunmap_ops[0]),
+                                                GFP_KERNEL);
+               if (NULL == add->kmap_ops || NULL == add->kunmap_ops)
+                       goto err;
+       }
 
 #ifdef CONFIG_XEN_GRANT_DMA_ALLOC
        add->dma_flags = dma_flags;
@@ -183,10 +189,14 @@ struct gntdev_grant_map *gntdev_alloc_map(struct gntdev_priv *priv, int count,
                goto err;
 
        for (i = 0; i < count; i++) {
-               add->map_ops[i].handle = -1;
-               add->unmap_ops[i].handle = -1;
-               add->kmap_ops[i].handle = -1;
-               add->kunmap_ops[i].handle = -1;
+               add->grants[i].domid = DOMID_INVALID;
+               add->grants[i].ref = INVALID_GRANT_REF;
+               add->map_ops[i].handle = INVALID_GRANT_HANDLE;
+               add->unmap_ops[i].handle = INVALID_GRANT_HANDLE;
+               if (use_ptemod) {
+                       add->kmap_ops[i].handle = INVALID_GRANT_HANDLE;
+                       add->kunmap_ops[i].handle = INVALID_GRANT_HANDLE;
+               }
        }
 
        add->index = 0;
@@ -274,7 +284,7 @@ static int find_grant_ptes(pte_t *pte, unsigned long addr, void *data)
                          map->grants[pgnr].ref,
                          map->grants[pgnr].domid);
        gnttab_set_unmap_op(&map->unmap_ops[pgnr], pte_maddr, flags,
-                           -1 /* handle */);
+                           INVALID_GRANT_HANDLE);
        return 0;
 }
 
@@ -292,7 +302,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
 
        if (!use_ptemod) {
                /* Note: it could already be mapped */
-               if (map->map_ops[0].handle != -1)
+               if (map->map_ops[0].handle != INVALID_GRANT_HANDLE)
                        return 0;
                for (i = 0; i < map->count; i++) {
                        unsigned long addr = (unsigned long)
@@ -301,7 +311,7 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
                                map->grants[i].ref,
                                map->grants[i].domid);
                        gnttab_set_unmap_op(&map->unmap_ops[i], addr,
-                               map->flags, -1 /* handle */);
+                               map->flags, INVALID_GRANT_HANDLE);
                }
        } else {
                /*
@@ -327,13 +337,13 @@ int gntdev_map_grant_pages(struct gntdev_grant_map *map)
                                map->grants[i].ref,
                                map->grants[i].domid);
                        gnttab_set_unmap_op(&map->kunmap_ops[i], address,
-                               flags, -1);
+                               flags, INVALID_GRANT_HANDLE);
                }
        }
 
        pr_debug("map %d+%d\n", map->index, map->count);
-       err = gnttab_map_refs(map->map_ops, use_ptemod ? map->kmap_ops : NULL,
-                       map->pages, map->count);
+       err = gnttab_map_refs(map->map_ops, map->kmap_ops, map->pages,
+                       map->count);
 
        for (i = 0; i < map->count; i++) {
                if (map->map_ops[i].status == GNTST_okay)
@@ -385,7 +395,7 @@ static int __unmap_grant_pages(struct gntdev_grant_map *map, int offset,
                pr_debug("unmap handle=%d st=%d\n",
                        map->unmap_ops[offset+i].handle,
                        map->unmap_ops[offset+i].status);
-               map->unmap_ops[offset+i].handle = -1;
+               map->unmap_ops[offset+i].handle = INVALID_GRANT_HANDLE;
        }
        return err;
 }
@@ -401,13 +411,15 @@ static int unmap_grant_pages(struct gntdev_grant_map *map, int offset,
         * already unmapped some of the grants. Only unmap valid ranges.
         */
        while (pages && !err) {
-               while (pages && map->unmap_ops[offset].handle == -1) {
+               while (pages &&
+                      map->unmap_ops[offset].handle == INVALID_GRANT_HANDLE) {
                        offset++;
                        pages--;
                }
                range = 0;
                while (range < pages) {
-                       if (map->unmap_ops[offset+range].handle == -1)
+                       if (map->unmap_ops[offset + range].handle ==
+                           INVALID_GRANT_HANDLE)
                                break;
                        range++;
                }
index ce8ffb5..df7cab8 100644 (file)
@@ -3,7 +3,8 @@
  * Copyright 2012 by Oracle Inc
  * Author: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
  *
- * This code borrows ideas from https://lkml.org/lkml/2011/11/30/249
+ * This code borrows ideas from
+ * https://lore.kernel.org/lkml/1322673664-14642-6-git-send-email-konrad.wilk@oracle.com
  * so many thanks go to Kevin Tian <kevin.tian@intel.com>
  * and Yu Ke <ke.yu@intel.com>.
  */
index 48a658d..81b6e13 100644 (file)
@@ -305,11 +305,18 @@ static int backend_map(struct xen_front_pgdir_shbuf *buf)
 
        /* Save handles even if error, so we can unmap. */
        for (cur_page = 0; cur_page < buf->num_pages; cur_page++) {
-               buf->backend_map_handles[cur_page] = map_ops[cur_page].handle;
-               if (unlikely(map_ops[cur_page].status != GNTST_okay))
+               if (likely(map_ops[cur_page].status == GNTST_okay)) {
+                       buf->backend_map_handles[cur_page] =
+                               map_ops[cur_page].handle;
+               } else {
+                       buf->backend_map_handles[cur_page] =
+                               INVALID_GRANT_HANDLE;
+                       if (!ret)
+                               ret = -ENXIO;
                        dev_err(&buf->xb_dev->dev,
                                "Failed to map page %d: %d\n",
                                cur_page, map_ops[cur_page].status);
+               }
        }
 
        if (ret) {
index 8a75092..97f0d23 100644 (file)
@@ -206,6 +206,65 @@ void xenbus_otherend_changed(struct xenbus_watch *watch,
 }
 EXPORT_SYMBOL_GPL(xenbus_otherend_changed);
 
+#define XENBUS_SHOW_STAT(name)                                         \
+static ssize_t show_##name(struct device *_dev,                                \
+                          struct device_attribute *attr,               \
+                          char *buf)                                   \
+{                                                                      \
+       struct xenbus_device *dev = to_xenbus_device(_dev);             \
+                                                                       \
+       return sprintf(buf, "%d\n", atomic_read(&dev->name));           \
+}                                                                      \
+static DEVICE_ATTR(name, 0444, show_##name, NULL)
+
+XENBUS_SHOW_STAT(event_channels);
+XENBUS_SHOW_STAT(events);
+XENBUS_SHOW_STAT(spurious_events);
+XENBUS_SHOW_STAT(jiffies_eoi_delayed);
+
+static ssize_t show_spurious_threshold(struct device *_dev,
+                                      struct device_attribute *attr,
+                                      char *buf)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+
+       return sprintf(buf, "%d\n", dev->spurious_threshold);
+}
+
+static ssize_t set_spurious_threshold(struct device *_dev,
+                                     struct device_attribute *attr,
+                                     const char *buf, size_t count)
+{
+       struct xenbus_device *dev = to_xenbus_device(_dev);
+       unsigned int val;
+       ssize_t ret;
+
+       ret = kstrtouint(buf, 0, &val);
+       if (ret)
+               return ret;
+
+       dev->spurious_threshold = val;
+
+       return count;
+}
+
+static DEVICE_ATTR(spurious_threshold, 0644, show_spurious_threshold,
+                  set_spurious_threshold);
+
+static struct attribute *xenbus_attrs[] = {
+       &dev_attr_event_channels.attr,
+       &dev_attr_events.attr,
+       &dev_attr_spurious_events.attr,
+       &dev_attr_jiffies_eoi_delayed.attr,
+       &dev_attr_spurious_threshold.attr,
+       NULL
+};
+
+static const struct attribute_group xenbus_group = {
+       .name = "xenbus",
+       .attrs = xenbus_attrs,
+};
+
 int xenbus_dev_probe(struct device *_dev)
 {
        struct xenbus_device *dev = to_xenbus_device(_dev);
@@ -253,6 +312,11 @@ int xenbus_dev_probe(struct device *_dev)
                return err;
        }
 
+       dev->spurious_threshold = 1;
+       if (sysfs_create_group(&dev->dev.kobj, &xenbus_group))
+               dev_warn(&dev->dev, "sysfs_create_group on %s failed.\n",
+                        dev->nodename);
+
        return 0;
 fail_put:
        module_put(drv->driver.owner);
@@ -269,6 +333,8 @@ int xenbus_dev_remove(struct device *_dev)
 
        DPRINTK("%s", dev->nodename);
 
+       sysfs_remove_group(&dev->dev.kobj, &xenbus_group);
+
        free_otherend_watch(dev);
 
        if (drv->remove) {
index 6261719..bb1b286 100644 (file)
@@ -239,6 +239,7 @@ static int v9fs_xattr_get_acl(const struct xattr_handler *handler,
 }
 
 static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
+                             struct user_namespace *mnt_userns,
                              struct dentry *dentry, struct inode *inode,
                              const char *name, const void *value,
                              size_t size, int flags)
@@ -258,7 +259,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
 
        if (S_ISLNK(inode->i_mode))
                return -EOPNOTSUPP;
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EPERM;
        if (value) {
                /* update the cached acl value */
@@ -279,7 +280,8 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
                        struct iattr iattr = { 0 };
                        struct posix_acl *old_acl = acl;
 
-                       retval = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
+                       retval = posix_acl_update_mode(&init_user_ns, inode,
+                                                      &iattr.ia_mode, &acl);
                        if (retval)
                                goto err_out;
                        if (!acl) {
@@ -297,7 +299,7 @@ static int v9fs_xattr_set_acl(const struct xattr_handler *handler,
                         * What is the following setxattr update the
                         * mode ?
                         */
-                       v9fs_vfs_setattr_dotl(dentry, &iattr);
+                       v9fs_vfs_setattr_dotl(&init_user_ns, dentry, &iattr);
                }
                break;
        case ACL_TYPE_DEFAULT:
index 7b76377..4ca56c5 100644 (file)
@@ -135,7 +135,8 @@ extern struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
                        unsigned int flags);
 extern int v9fs_vfs_unlink(struct inode *i, struct dentry *d);
 extern int v9fs_vfs_rmdir(struct inode *i, struct dentry *d);
-extern int v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+extern int v9fs_vfs_rename(struct user_namespace *mnt_userns,
+                          struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry,
                           unsigned int flags);
 extern struct inode *v9fs_inode_from_fid(struct v9fs_session_info *v9ses,
index fd2a2b0..d44ade7 100644 (file)
@@ -59,7 +59,8 @@ void v9fs_inode2stat(struct inode *inode, struct p9_wstat *stat);
 int v9fs_uflags2omode(int uflags, int extended);
 
 void v9fs_blank_wstat(struct p9_wstat *wstat);
-int v9fs_vfs_setattr_dotl(struct dentry *, struct iattr *);
+int v9fs_vfs_setattr_dotl(struct user_namespace *, struct dentry *,
+                         struct iattr *);
 int v9fs_file_fsync_dotl(struct file *filp, loff_t start, loff_t end,
                         int datasync);
 int v9fs_refresh_inode(struct p9_fid *fid, struct inode *inode);
index 4a937fa..8d97f0b 100644 (file)
@@ -251,7 +251,7 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses,
 {
        int err = 0;
 
-       inode_init_owner(inode, NULL, mode);
+       inode_init_owner(&init_user_ns,inode,  NULL, mode);
        inode->i_blocks = 0;
        inode->i_rdev = rdev;
        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
@@ -676,8 +676,8 @@ error:
  */
 
 static int
-v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool excl)
+v9fs_vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+               struct dentry *dentry, umode_t mode, bool excl)
 {
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
        u32 perm = unixmode2p9mode(v9ses, mode);
@@ -702,7 +702,8 @@ v9fs_vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  *
  */
 
-static int v9fs_vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int v9fs_vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode)
 {
        int err;
        u32 perm;
@@ -907,9 +908,9 @@ int v9fs_vfs_rmdir(struct inode *i, struct dentry *d)
  */
 
 int
-v9fs_vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-               struct inode *new_dir, struct dentry *new_dentry,
-               unsigned int flags)
+v9fs_vfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+               struct dentry *old_dentry, struct inode *new_dir,
+               struct dentry *new_dentry, unsigned int flags)
 {
        int retval;
        struct inode *old_inode;
@@ -1016,8 +1017,8 @@ done:
  */
 
 static int
-v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int flags)
+v9fs_vfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
        struct v9fs_session_info *v9ses;
@@ -1027,7 +1028,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
        p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
        v9ses = v9fs_dentry2v9ses(dentry);
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-               generic_fillattr(d_inode(dentry), stat);
+               generic_fillattr(&init_user_ns, d_inode(dentry), stat);
                return 0;
        }
        fid = v9fs_fid_lookup(dentry);
@@ -1040,7 +1041,7 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
                return PTR_ERR(st);
 
        v9fs_stat2inode(st, d_inode(dentry), dentry->d_sb, 0);
-       generic_fillattr(d_inode(dentry), stat);
+       generic_fillattr(&init_user_ns, d_inode(dentry), stat);
 
        p9stat_free(st);
        kfree(st);
@@ -1054,7 +1055,8 @@ v9fs_vfs_getattr(const struct path *path, struct kstat *stat,
  *
  */
 
-static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
+static int v9fs_vfs_setattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry, struct iattr *iattr)
 {
        int retval, use_dentry = 0;
        struct v9fs_session_info *v9ses;
@@ -1062,7 +1064,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
        struct p9_wstat wstat;
 
        p9_debug(P9_DEBUG_VFS, "\n");
-       retval = setattr_prepare(dentry, iattr);
+       retval = setattr_prepare(&init_user_ns, dentry, iattr);
        if (retval)
                return retval;
 
@@ -1118,7 +1120,7 @@ static int v9fs_vfs_setattr(struct dentry *dentry, struct iattr *iattr)
 
        v9fs_invalidate_inode_attr(d_inode(dentry));
 
-       setattr_copy(d_inode(dentry), iattr);
+       setattr_copy(&init_user_ns, d_inode(dentry), iattr);
        mark_inode_dirty(d_inode(dentry));
        return 0;
 }
@@ -1137,9 +1139,6 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
                 struct super_block *sb, unsigned int flags)
 {
        umode_t mode;
-       char ext[32];
-       char tag_name[14];
-       unsigned int i_nlink;
        struct v9fs_session_info *v9ses = sb->s_fs_info;
        struct v9fs_inode *v9inode = V9FS_I(inode);
 
@@ -1157,18 +1156,18 @@ v9fs_stat2inode(struct p9_wstat *stat, struct inode *inode,
                inode->i_gid = stat->n_gid;
        }
        if ((S_ISREG(inode->i_mode)) || (S_ISDIR(inode->i_mode))) {
-               if (v9fs_proto_dotu(v9ses) && (stat->extension[0] != '\0')) {
+               if (v9fs_proto_dotu(v9ses)) {
+                       unsigned int i_nlink;
                        /*
-                        * Hadlink support got added later to
-                        * to the .u extension. So there can be
-                        * server out there that doesn't support
-                        * this even with .u extension. So check
-                        * for non NULL stat->extension
+                        * Hadlink support got added later to the .u extension.
+                        * So there can be a server out there that doesn't
+                        * support this even with .u extension. That would
+                        * just leave us with stat->extension being an empty
+                        * string, though.
                         */
-                       strlcpy(ext, stat->extension, sizeof(ext));
                        /* HARDLINKCOUNT %u */
-                       sscanf(ext, "%13s %u", tag_name, &i_nlink);
-                       if (!strncmp(tag_name, "HARDLINKCOUNT", 13))
+                       if (sscanf(stat->extension,
+                                  " HARDLINKCOUNT %u", &i_nlink) == 1)
                                set_nlink(inode, i_nlink);
                }
        }
@@ -1295,7 +1294,8 @@ static int v9fs_vfs_mkspecial(struct inode *dir, struct dentry *dentry,
  */
 
 static int
-v9fs_vfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+v9fs_vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                struct dentry *dentry, const char *symname)
 {
        p9_debug(P9_DEBUG_VFS, " %lu,%pd,%s\n",
                 dir->i_ino, dentry, symname);
@@ -1348,7 +1348,8 @@ v9fs_vfs_link(struct dentry *old_dentry, struct inode *dir,
  */
 
 static int
-v9fs_vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+v9fs_vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+              struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct v9fs_session_info *v9ses = v9fs_inode2v9ses(dir);
        int retval;
index 823c2eb..1dc7af0 100644 (file)
@@ -33,8 +33,8 @@
 #include "acl.h"
 
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
-                   dev_t rdev);
+v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+                   struct dentry *dentry, umode_t omode, dev_t rdev);
 
 /**
  * v9fs_get_fsgid_for_create - Helper function to get the gid for creating a
@@ -218,10 +218,10 @@ int v9fs_open_to_dotl_flags(int flags)
  */
 
 static int
-v9fs_vfs_create_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
-               bool excl)
+v9fs_vfs_create_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t omode, bool excl)
 {
-       return v9fs_vfs_mknod_dotl(dir, dentry, omode, 0);
+       return v9fs_vfs_mknod_dotl(mnt_userns, dir, dentry, omode, 0);
 }
 
 static int
@@ -367,8 +367,9 @@ err_clunk_old_fid:
  *
  */
 
-static int v9fs_vfs_mkdir_dotl(struct inode *dir,
-                              struct dentry *dentry, umode_t omode)
+static int v9fs_vfs_mkdir_dotl(struct user_namespace *mnt_userns,
+                              struct inode *dir, struct dentry *dentry,
+                              umode_t omode)
 {
        int err;
        struct v9fs_session_info *v9ses;
@@ -457,8 +458,9 @@ error:
 }
 
 static int
-v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int flags)
+v9fs_vfs_getattr_dotl(struct user_namespace *mnt_userns,
+                     const struct path *path, struct kstat *stat,
+                     u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
        struct v9fs_session_info *v9ses;
@@ -468,7 +470,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
        p9_debug(P9_DEBUG_VFS, "dentry: %p\n", dentry);
        v9ses = v9fs_dentry2v9ses(dentry);
        if (v9ses->cache == CACHE_LOOSE || v9ses->cache == CACHE_FSCACHE) {
-               generic_fillattr(d_inode(dentry), stat);
+               generic_fillattr(&init_user_ns, d_inode(dentry), stat);
                return 0;
        }
        fid = v9fs_fid_lookup(dentry);
@@ -485,7 +487,7 @@ v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat,
                return PTR_ERR(st);
 
        v9fs_stat2inode_dotl(st, d_inode(dentry), 0);
-       generic_fillattr(d_inode(dentry), stat);
+       generic_fillattr(&init_user_ns, d_inode(dentry), stat);
        /* Change block size to what the server returned */
        stat->blksize = st->st_blksize;
 
@@ -540,7 +542,8 @@ static int v9fs_mapped_iattr_valid(int iattr_valid)
  *
  */
 
-int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
+int v9fs_vfs_setattr_dotl(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, struct iattr *iattr)
 {
        int retval, use_dentry = 0;
        struct p9_fid *fid = NULL;
@@ -549,7 +552,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
 
        p9_debug(P9_DEBUG_VFS, "\n");
 
-       retval = setattr_prepare(dentry, iattr);
+       retval = setattr_prepare(&init_user_ns, dentry, iattr);
        if (retval)
                return retval;
 
@@ -590,7 +593,7 @@ int v9fs_vfs_setattr_dotl(struct dentry *dentry, struct iattr *iattr)
                truncate_setsize(inode, iattr->ia_size);
 
        v9fs_invalidate_inode_attr(inode);
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
        mark_inode_dirty(inode);
        if (iattr->ia_valid & ATTR_MODE) {
                /* We also want to update ACL when we update mode bits */
@@ -684,8 +687,8 @@ v9fs_stat2inode_dotl(struct p9_stat_dotl *stat, struct inode *inode,
 }
 
 static int
-v9fs_vfs_symlink_dotl(struct inode *dir, struct dentry *dentry,
-               const char *symname)
+v9fs_vfs_symlink_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, const char *symname)
 {
        int err;
        kgid_t gid;
@@ -824,8 +827,8 @@ v9fs_vfs_link_dotl(struct dentry *old_dentry, struct inode *dir,
  *
  */
 static int
-v9fs_vfs_mknod_dotl(struct inode *dir, struct dentry *dentry, umode_t omode,
-               dev_t rdev)
+v9fs_vfs_mknod_dotl(struct user_namespace *mnt_userns, struct inode *dir,
+                   struct dentry *dentry, umode_t omode, dev_t rdev)
 {
        int err;
        kgid_t gid;
index 87217dd..ee33184 100644 (file)
@@ -157,6 +157,7 @@ static int v9fs_xattr_handler_get(const struct xattr_handler *handler,
 }
 
 static int v9fs_xattr_handler_set(const struct xattr_handler *handler,
+                                 struct user_namespace *mnt_userns,
                                  struct dentry *dentry, struct inode *inode,
                                  const char *name, const void *value,
                                  size_t size, int flags)
index 462253a..a55bda4 100644 (file)
@@ -203,7 +203,7 @@ config TMPFS_XATTR
 
 config TMPFS_INODE64
        bool "Use 64-bit ino_t by default in tmpfs"
-       depends on TMPFS && 64BIT && !(S390 || ALPHA)
+       depends on TMPFS && 64BIT
        default n
        help
          tmpfs has historically used only inode numbers as wide as an unsigned
index 699c4fa..06b7c92 100644 (file)
@@ -144,7 +144,8 @@ struct adfs_discmap {
 /* Inode stuff */
 struct inode *adfs_iget(struct super_block *sb, struct object_info *obj);
 int adfs_write_inode(struct inode *inode, struct writeback_control *wbc);
-int adfs_notify_change(struct dentry *dentry, struct iattr *attr);
+int adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+                      struct iattr *attr);
 
 /* map.c */
 int adfs_map_lookup(struct super_block *sb, u32 frag_id, unsigned int offset);
index 32620f4..fb7ee02 100644 (file)
@@ -292,14 +292,15 @@ out:
  * later.
  */
 int
-adfs_notify_change(struct dentry *dentry, struct iattr *attr)
+adfs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+                  struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        unsigned int ia_valid = attr->ia_valid;
        int error;
        
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
 
        /*
         * we can't change the UID or GID of any file -
index a755bef..bfa89e1 100644 (file)
@@ -167,27 +167,33 @@ extern const struct export_operations affs_export_ops;
 extern int     affs_hash_name(struct super_block *sb, const u8 *name, unsigned int len);
 extern struct dentry *affs_lookup(struct inode *dir, struct dentry *dentry, unsigned int);
 extern int     affs_unlink(struct inode *dir, struct dentry *dentry);
-extern int     affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool);
-extern int     affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+extern int     affs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool);
+extern int     affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode);
 extern int     affs_rmdir(struct inode *dir, struct dentry *dentry);
 extern int     affs_link(struct dentry *olddentry, struct inode *dir,
                          struct dentry *dentry);
-extern int     affs_symlink(struct inode *dir, struct dentry *dentry,
-                            const char *symname);
-extern int     affs_rename2(struct inode *old_dir, struct dentry *old_dentry,
-                           struct inode *new_dir, struct dentry *new_dentry,
-                           unsigned int flags);
+extern int     affs_symlink(struct user_namespace *mnt_userns,
+                       struct inode *dir, struct dentry *dentry,
+                       const char *symname);
+extern int     affs_rename2(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
+                       struct inode *new_dir, struct dentry *new_dentry,
+                       unsigned int flags);
 
 /* inode.c */
 
 extern struct inode            *affs_new_inode(struct inode *dir);
-extern int                      affs_notify_change(struct dentry *dentry, struct iattr *attr);
+extern int                      affs_notify_change(struct user_namespace *mnt_userns,
+                                       struct dentry *dentry, struct iattr *attr);
 extern void                     affs_evict_inode(struct inode *inode);
 extern struct inode            *affs_iget(struct super_block *sb,
                                        unsigned long ino);
 extern int                      affs_write_inode(struct inode *inode,
                                        struct writeback_control *wbc);
-extern int                      affs_add_entry(struct inode *dir, struct inode *inode, struct dentry *dentry, s32 type);
+extern int                      affs_add_entry(struct inode *dir, struct inode *inode,
+                                       struct dentry *dentry, s32 type);
 
 /* file.c */
 
index 0444121..2352a75 100644 (file)
@@ -216,14 +216,15 @@ affs_write_inode(struct inode *inode, struct writeback_control *wbc)
 }
 
 int
-affs_notify_change(struct dentry *dentry, struct iattr *attr)
+affs_notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+                  struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
        pr_debug("notify_change(%lu,0x%x)\n", inode->i_ino, attr->ia_valid);
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                goto out;
 
@@ -249,7 +250,7 @@ affs_notify_change(struct dentry *dentry, struct iattr *attr)
                affs_truncate(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
 
        if (attr->ia_valid & ATTR_MODE)
index 5400a87..bcab189 100644 (file)
@@ -242,7 +242,8 @@ affs_unlink(struct inode *dir, struct dentry *dentry)
 }
 
 int
-affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+affs_create(struct user_namespace *mnt_userns, struct inode *dir,
+           struct dentry *dentry, umode_t mode, bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode    *inode;
@@ -273,7 +274,8 @@ affs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
 }
 
 int
-affs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+affs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+          struct dentry *dentry, umode_t mode)
 {
        struct inode            *inode;
        int                      error;
@@ -311,7 +313,8 @@ affs_rmdir(struct inode *dir, struct dentry *dentry)
 }
 
 int
-affs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+affs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+            struct dentry *dentry, const char *symname)
 {
        struct super_block      *sb = dir->i_sb;
        struct buffer_head      *bh;
@@ -500,9 +503,9 @@ done:
        return retval;
 }
 
-int affs_rename2(struct inode *old_dir, struct dentry *old_dentry,
-                       struct inode *new_dir, struct dentry *new_dentry,
-                       unsigned int flags)
+int affs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+                struct dentry *old_dentry, struct inode *new_dir,
+                struct dentry *new_dentry, unsigned int flags)
 {
 
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE))
index 7bd659a..17548c1 100644 (file)
@@ -28,18 +28,19 @@ static int afs_lookup_one_filldir(struct dir_context *ctx, const char *name, int
                                  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
                              loff_t fpos, u64 ino, unsigned dtype);
-static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                     bool excl);
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode);
+static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, bool excl);
+static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode);
 static int afs_rmdir(struct inode *dir, struct dentry *dentry);
 static int afs_unlink(struct inode *dir, struct dentry *dentry);
 static int afs_link(struct dentry *from, struct inode *dir,
                    struct dentry *dentry);
-static int afs_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *content);
-static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags);
+static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, const char *content);
+static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags);
 static int afs_dir_releasepage(struct page *page, gfp_t gfp_flags);
 static void afs_dir_invalidatepage(struct page *page, unsigned int offset,
                                   unsigned int length);
@@ -69,7 +70,6 @@ const struct inode_operations afs_dir_inode_operations = {
        .permission     = afs_permission,
        .getattr        = afs_getattr,
        .setattr        = afs_setattr,
-       .listxattr      = afs_listxattr,
 };
 
 const struct address_space_operations afs_dir_aops = {
@@ -1325,7 +1325,8 @@ static const struct afs_operation_ops afs_mkdir_operation = {
 /*
  * create a directory on an AFS filesystem
  */
-static int afs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int afs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode)
 {
        struct afs_operation *op;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
@@ -1619,8 +1620,8 @@ static const struct afs_operation_ops afs_create_operation = {
 /*
  * create a regular file on an AFS filesystem
  */
-static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                     bool excl)
+static int afs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, bool excl)
 {
        struct afs_operation *op;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
@@ -1741,8 +1742,8 @@ static const struct afs_operation_ops afs_symlink_operation = {
 /*
  * create a symlink in an AFS filesystem
  */
-static int afs_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *content)
+static int afs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, const char *content)
 {
        struct afs_operation *op;
        struct afs_vnode *dvnode = AFS_FS_I(dir);
@@ -1876,9 +1877,9 @@ static const struct afs_operation_ops afs_rename_operation = {
 /*
  * rename a file in an AFS filesystem and/or move it between directories
  */
-static int afs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags)
+static int afs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags)
 {
        struct afs_operation *op;
        struct afs_vnode *orig_dvnode, *new_dvnode, *vnode;
index 85f5adf..960b642 100644 (file)
@@ -43,7 +43,6 @@ const struct inode_operations afs_file_inode_operations = {
        .getattr        = afs_getattr,
        .setattr        = afs_setattr,
        .permission     = afs_permission,
-       .listxattr      = afs_listxattr,
 };
 
 const struct address_space_operations afs_fs_aops = {
index 97cab12..71c5872 100644 (file)
@@ -181,10 +181,13 @@ void afs_wait_for_operation(struct afs_operation *op)
                if (test_bit(AFS_SERVER_FL_IS_YFS, &op->server->flags) &&
                    op->ops->issue_yfs_rpc)
                        op->ops->issue_yfs_rpc(op);
-               else
+               else if (op->ops->issue_afs_rpc)
                        op->ops->issue_afs_rpc(op);
+               else
+                       op->ac.error = -ENOTSUPP;
 
-               op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
+               if (op->call)
+                       op->error = afs_wait_for_call_to_complete(op->call, &op->ac);
        }
 
        switch (op->error) {
index b0d7b89..12be887 100644 (file)
@@ -27,7 +27,6 @@
 
 static const struct inode_operations afs_symlink_inode_operations = {
        .get_link       = page_get_link,
-       .listxattr      = afs_listxattr,
 };
 
 static noinline void dump_vnode(struct afs_vnode *vnode, struct afs_vnode *parent_vnode)
@@ -734,8 +733,8 @@ error_unlock:
 /*
  * read the attributes of an inode
  */
-int afs_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int query_flags)
+int afs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+               struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct afs_vnode *vnode = AFS_FS_I(inode);
@@ -745,7 +744,7 @@ int afs_getattr(const struct path *path, struct kstat *stat,
 
        do {
                read_seqbegin_or_lock(&vnode->cb_lock, &seq);
-               generic_fillattr(inode, stat);
+               generic_fillattr(&init_user_ns, inode, stat);
                if (test_bit(AFS_VNODE_SILLY_DELETED, &vnode->flags) &&
                    stat->nlink > 0)
                        stat->nlink -= 1;
@@ -857,7 +856,8 @@ static const struct afs_operation_ops afs_setattr_operation = {
 /*
  * set the attributes of an inode
  */
-int afs_setattr(struct dentry *dentry, struct iattr *attr)
+int afs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct iattr *attr)
 {
        struct afs_operation *op;
        struct afs_vnode *vnode = AFS_FS_I(d_inode(dentry));
index 0d150a2..1627b18 100644 (file)
@@ -1149,8 +1149,9 @@ extern struct inode *afs_iget(struct afs_operation *, struct afs_vnode_param *);
 extern struct inode *afs_root_iget(struct super_block *, struct key *);
 extern bool afs_check_validity(struct afs_vnode *);
 extern int afs_validate(struct afs_vnode *, struct key *);
-extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int);
-extern int afs_setattr(struct dentry *, struct iattr *);
+extern int afs_getattr(struct user_namespace *mnt_userns, const struct path *,
+                      struct kstat *, u32, unsigned int);
+extern int afs_setattr(struct user_namespace *mnt_userns, struct dentry *, struct iattr *);
 extern void afs_evict_inode(struct inode *);
 extern int afs_drop_inode(struct inode *);
 
@@ -1361,7 +1362,7 @@ extern void afs_zap_permits(struct rcu_head *);
 extern struct key *afs_request_key(struct afs_cell *);
 extern struct key *afs_request_key_rcu(struct afs_cell *);
 extern int afs_check_permit(struct afs_vnode *, struct key *, afs_access_t *);
-extern int afs_permission(struct inode *, int);
+extern int afs_permission(struct user_namespace *, struct inode *, int);
 extern void __exit afs_clean_up_permit_cache(void);
 
 /*
@@ -1508,7 +1509,6 @@ extern int afs_launder_page(struct page *);
  * xattr.c
  */
 extern const struct xattr_handler *afs_xattr_handlers[];
-extern ssize_t afs_listxattr(struct dentry *, char *, size_t);
 
 /*
  * yfsclient.c
index 052dab2..bbb2c21 100644 (file)
@@ -32,7 +32,6 @@ const struct inode_operations afs_mntpt_inode_operations = {
        .lookup         = afs_mntpt_lookup,
        .readlink       = page_readlink,
        .getattr        = afs_getattr,
-       .listxattr      = afs_listxattr,
 };
 
 const struct inode_operations afs_autocell_inode_operations = {
index 9cf3102..3c7a8fc 100644 (file)
@@ -396,7 +396,8 @@ int afs_check_permit(struct afs_vnode *vnode, struct key *key,
  * - AFS ACLs are attached to directories only, and a file is controlled by its
  *   parent directory's ACL
  */
-int afs_permission(struct inode *inode, int mask)
+int afs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                  int mask)
 {
        struct afs_vnode *vnode = AFS_FS_I(inode);
        afs_access_t access;
index c9195fc..eb737ed 100644 (file)
@@ -851,8 +851,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf)
        fscache_wait_on_page_write(vnode->cache, vmf->page);
 #endif
 
-       if (PageWriteback(vmf->page) &&
-           wait_on_page_bit_killable(vmf->page, PG_writeback) < 0)
+       if (wait_on_page_writeback_killable(vmf->page))
                return VM_FAULT_RETRY;
 
        if (lock_page_killable(vmf->page) < 0)
index 95c573d..7751b0b 100644 (file)
 #include <linux/xattr.h>
 #include "internal.h"
 
-static const char afs_xattr_list[] =
-       "afs.acl\0"
-       "afs.cell\0"
-       "afs.fid\0"
-       "afs.volume\0"
-       "afs.yfs.acl\0"
-       "afs.yfs.acl_inherited\0"
-       "afs.yfs.acl_num_cleaned\0"
-       "afs.yfs.vol_acl";
-
-/*
- * Retrieve a list of the supported xattrs.
- */
-ssize_t afs_listxattr(struct dentry *dentry, char *buffer, size_t size)
-{
-       if (size == 0)
-               return sizeof(afs_xattr_list);
-       if (size < sizeof(afs_xattr_list))
-               return -ERANGE;
-       memcpy(buffer, afs_xattr_list, sizeof(afs_xattr_list));
-       return sizeof(afs_xattr_list);
-}
-
 /*
  * Deal with the result of a successful fetch ACL operation.
  */
@@ -120,6 +97,7 @@ static const struct afs_operation_ops afs_store_acl_operation = {
  * Set a file's AFS3 ACL.
  */
 static int afs_xattr_set_acl(const struct xattr_handler *handler,
+                            struct user_namespace *mnt_userns,
                              struct dentry *dentry,
                              struct inode *inode, const char *name,
                              const void *buffer, size_t size, int flags)
@@ -230,6 +208,8 @@ static int afs_xattr_get_yfs(const struct xattr_handler *handler,
                        else
                                ret = -ERANGE;
                }
+       } else if (ret == -ENOTSUPP) {
+               ret = -ENODATA;
        }
 
 error_yacl:
@@ -248,12 +228,14 @@ static const struct afs_operation_ops yfs_store_opaque_acl2_operation = {
  * Set a file's YFS ACL.
  */
 static int afs_xattr_set_yfs(const struct xattr_handler *handler,
+                            struct user_namespace *mnt_userns,
                              struct dentry *dentry,
                              struct inode *inode, const char *name,
                              const void *buffer, size_t size, int flags)
 {
        struct afs_operation *op;
        struct afs_vnode *vnode = AFS_FS_I(inode);
+       int ret;
 
        if (flags == XATTR_CREATE ||
            strcmp(name, "acl") != 0)
@@ -268,7 +250,10 @@ static int afs_xattr_set_yfs(const struct xattr_handler *handler,
                return afs_put_operation(op);
 
        op->ops = &yfs_store_opaque_acl2_operation;
-       return afs_do_sync_operation(op);
+       ret = afs_do_sync_operation(op);
+       if (ret == -ENOTSUPP)
+               ret = -ENODATA;
+       return ret;
 }
 
 static const struct xattr_handler afs_xattr_yfs_handler = {
index b4bbdbd..87ef39d 100644 (file)
--- a/fs/attr.c
+++ b/fs/attr.c
 #include <linux/evm.h>
 #include <linux/ima.h>
 
-static bool chown_ok(const struct inode *inode, kuid_t uid)
+/**
+ * chown_ok - verify permissions to chown inode
+ * @mnt_userns:        user namespace of the mount @inode was found from
+ * @inode:     inode to check permissions on
+ * @uid:       uid to chown @inode to
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
+static bool chown_ok(struct user_namespace *mnt_userns,
+                    const struct inode *inode,
+                    kuid_t uid)
 {
-       if (uid_eq(current_fsuid(), inode->i_uid) &&
-           uid_eq(uid, inode->i_uid))
+       kuid_t kuid = i_uid_into_mnt(mnt_userns, inode);
+       if (uid_eq(current_fsuid(), kuid) && uid_eq(uid, kuid))
                return true;
-       if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+       if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
                return true;
-       if (uid_eq(inode->i_uid, INVALID_UID) &&
+       if (uid_eq(kuid, INVALID_UID) &&
            ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
                return true;
        return false;
 }
 
-static bool chgrp_ok(const struct inode *inode, kgid_t gid)
+/**
+ * chgrp_ok - verify permissions to chgrp inode
+ * @mnt_userns:        user namespace of the mount @inode was found from
+ * @inode:     inode to check permissions on
+ * @gid:       gid to chown @inode to
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
+static bool chgrp_ok(struct user_namespace *mnt_userns,
+                    const struct inode *inode, kgid_t gid)
 {
-       if (uid_eq(current_fsuid(), inode->i_uid) &&
-           (in_group_p(gid) || gid_eq(gid, inode->i_gid)))
+       kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+       if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)) &&
+           (in_group_p(gid) || gid_eq(gid, kgid)))
                return true;
-       if (capable_wrt_inode_uidgid(inode, CAP_CHOWN))
+       if (capable_wrt_inode_uidgid(mnt_userns, inode, CAP_CHOWN))
                return true;
-       if (gid_eq(inode->i_gid, INVALID_GID) &&
+       if (gid_eq(kgid, INVALID_GID) &&
            ns_capable(inode->i_sb->s_user_ns, CAP_CHOWN))
                return true;
        return false;
@@ -46,6 +74,7 @@ static bool chgrp_ok(const struct inode *inode, kgid_t gid)
 
 /**
  * setattr_prepare - check if attribute changes to a dentry are allowed
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dentry:    dentry to check
  * @attr:      attributes to change
  *
@@ -55,10 +84,17 @@ static bool chgrp_ok(const struct inode *inode, kgid_t gid)
  * SGID bit from mode if user is not allowed to set it. Also file capabilities
  * and IMA extended attributes are cleared if ATTR_KILL_PRIV is set.
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
  * Should be called as the first thing in ->setattr implementations,
  * possibly after taking additional locks.
  */
-int setattr_prepare(struct dentry *dentry, struct iattr *attr)
+int setattr_prepare(struct user_namespace *mnt_userns, struct dentry *dentry,
+                   struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        unsigned int ia_valid = attr->ia_valid;
@@ -78,27 +114,27 @@ int setattr_prepare(struct dentry *dentry, struct iattr *attr)
                goto kill_priv;
 
        /* Make sure a caller can chown. */
-       if ((ia_valid & ATTR_UID) && !chown_ok(inode, attr->ia_uid))
+       if ((ia_valid & ATTR_UID) && !chown_ok(mnt_userns, inode, attr->ia_uid))
                return -EPERM;
 
        /* Make sure caller can chgrp. */
-       if ((ia_valid & ATTR_GID) && !chgrp_ok(inode, attr->ia_gid))
+       if ((ia_valid & ATTR_GID) && !chgrp_ok(mnt_userns, inode, attr->ia_gid))
                return -EPERM;
 
        /* Make sure a caller can chmod. */
        if (ia_valid & ATTR_MODE) {
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EPERM;
                /* Also check the setgid bit! */
-               if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
-                               inode->i_gid) &&
-                   !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+               if (!in_group_p((ia_valid & ATTR_GID) ? attr->ia_gid :
+                                i_gid_into_mnt(mnt_userns, inode)) &&
+                    !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
                        attr->ia_mode &= ~S_ISGID;
        }
 
        /* Check for setting the inode time. */
        if (ia_valid & (ATTR_MTIME_SET | ATTR_ATIME_SET | ATTR_TIMES_SET)) {
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EPERM;
        }
 
@@ -107,7 +143,7 @@ kill_priv:
        if (ia_valid & ATTR_KILL_PRIV) {
                int error;
 
-               error = security_inode_killpriv(dentry);
+               error = security_inode_killpriv(mnt_userns, dentry);
                if (error)
                        return error;
        }
@@ -162,20 +198,33 @@ EXPORT_SYMBOL(inode_newsize_ok);
 
 /**
  * setattr_copy - copy simple metadata updates into the generic inode
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode:     the inode to be updated
  * @attr:      the new attributes
  *
  * setattr_copy must be called with i_mutex held.
  *
  * setattr_copy updates the inode's metadata with that specified
- * in attr. Noticeably missing is inode size update, which is more complex
+ * in attr on idmapped mounts. If file ownership is changed setattr_copy
+ * doesn't map ia_uid and ia_gid. It will asssume the caller has already
+ * provided the intended values. Necessary permission checks to determine
+ * whether or not the S_ISGID property needs to be removed are performed with
+ * the correct idmapped mount permission helpers.
+ * Noticeably missing is inode size update, which is more complex
  * as it requires pagecache updates.
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
  * The inode is not marked as dirty after this operation. The rationale is
  * that for "simple" filesystems, the struct inode is the inode storage.
  * The caller is free to mark the inode dirty afterwards if needed.
  */
-void setattr_copy(struct inode *inode, const struct iattr *attr)
+void setattr_copy(struct user_namespace *mnt_userns, struct inode *inode,
+                 const struct iattr *attr)
 {
        unsigned int ia_valid = attr->ia_valid;
 
@@ -191,9 +240,9 @@ void setattr_copy(struct inode *inode, const struct iattr *attr)
                inode->i_ctime = attr->ia_ctime;
        if (ia_valid & ATTR_MODE) {
                umode_t mode = attr->ia_mode;
-
-               if (!in_group_p(inode->i_gid) &&
-                   !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+               kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+               if (!in_group_p(kgid) &&
+                   !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
                        mode &= ~S_ISGID;
                inode->i_mode = mode;
        }
@@ -202,6 +251,7 @@ EXPORT_SYMBOL(setattr_copy);
 
 /**
  * notify_change - modify attributes of a filesytem object
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dentry:    object affected
  * @attr:      new attributes
  * @delegated_inode: returns inode, if the inode is delegated
@@ -214,13 +264,23 @@ EXPORT_SYMBOL(setattr_copy);
  * retry.  Because breaking a delegation may take a long time, the
  * caller should drop the i_mutex before doing so.
  *
+ * If file ownership is changed notify_change() doesn't map ia_uid and
+ * ia_gid. It will asssume the caller has already provided the intended values.
+ *
  * Alternatively, a caller may pass NULL for delegated_inode.  This may
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.  Also, passing NULL is fine for callers holding
  * the file open for write, as there can be no conflicting delegation in
  * that case.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
  */
-int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **delegated_inode)
+int notify_change(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr, struct inode **delegated_inode)
 {
        struct inode *inode = dentry->d_inode;
        umode_t mode = inode->i_mode;
@@ -243,8 +303,8 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
                if (IS_IMMUTABLE(inode))
                        return -EPERM;
 
-               if (!inode_owner_or_capable(inode)) {
-                       error = inode_permission(inode, MAY_WRITE);
+               if (!inode_owner_or_capable(mnt_userns, inode)) {
+                       error = inode_permission(mnt_userns, inode, MAY_WRITE);
                        if (error)
                                return error;
                }
@@ -320,9 +380,11 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
        /* Don't allow modifications of files with invalid uids or
         * gids unless those uids & gids are being made valid.
         */
-       if (!(ia_valid & ATTR_UID) && !uid_valid(inode->i_uid))
+       if (!(ia_valid & ATTR_UID) &&
+           !uid_valid(i_uid_into_mnt(mnt_userns, inode)))
                return -EOVERFLOW;
-       if (!(ia_valid & ATTR_GID) && !gid_valid(inode->i_gid))
+       if (!(ia_valid & ATTR_GID) &&
+           !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
                return -EOVERFLOW;
 
        error = security_inode_setattr(dentry, attr);
@@ -333,13 +395,13 @@ int notify_change(struct dentry * dentry, struct iattr * attr, struct inode **de
                return error;
 
        if (inode->i_op->setattr)
-               error = inode->i_op->setattr(dentry, attr);
+               error = inode->i_op->setattr(mnt_userns, dentry, attr);
        else
-               error = simple_setattr(dentry, attr);
+               error = simple_setattr(mnt_userns, dentry, attr);
 
        if (!error) {
                fsnotify_change(dentry, ia_valid);
-               ima_inode_post_setattr(dentry);
+               ima_inode_post_setattr(mnt_userns, dentry);
                evm_inode_post_setattr(dentry, ia_valid);
        }
 
index 5aaa173..91fe454 100644 (file)
 
 #include "autofs_i.h"
 
-static int autofs_dir_symlink(struct inode *, struct dentry *, const char *);
+static int autofs_dir_symlink(struct user_namespace *, struct inode *,
+                             struct dentry *, const char *);
 static int autofs_dir_unlink(struct inode *, struct dentry *);
 static int autofs_dir_rmdir(struct inode *, struct dentry *);
-static int autofs_dir_mkdir(struct inode *, struct dentry *, umode_t);
+static int autofs_dir_mkdir(struct user_namespace *, struct inode *,
+                           struct dentry *, umode_t);
 static long autofs_root_ioctl(struct file *, unsigned int, unsigned long);
 #ifdef CONFIG_COMPAT
 static long autofs_root_compat_ioctl(struct file *,
@@ -524,9 +526,9 @@ static struct dentry *autofs_lookup(struct inode *dir,
        return NULL;
 }
 
-static int autofs_dir_symlink(struct inode *dir,
-                              struct dentry *dentry,
-                              const char *symname)
+static int autofs_dir_symlink(struct user_namespace *mnt_userns,
+                             struct inode *dir, struct dentry *dentry,
+                             const char *symname)
 {
        struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
        struct autofs_info *ino = autofs_dentry_ino(dentry);
@@ -715,8 +717,9 @@ static int autofs_dir_rmdir(struct inode *dir, struct dentry *dentry)
        return 0;
 }
 
-static int autofs_dir_mkdir(struct inode *dir,
-                           struct dentry *dentry, umode_t mode)
+static int autofs_dir_mkdir(struct user_namespace *mnt_userns,
+                           struct inode *dir, struct dentry *dentry,
+                           umode_t mode)
 {
        struct autofs_sb_info *sbi = autofs_sbi(dir->i_sb);
        struct autofs_info *ino = autofs_dentry_ino(dentry);
index 54f0ce4..48e1614 100644 (file)
@@ -27,8 +27,9 @@ static const struct file_operations bad_file_ops =
        .open           = bad_file_open,
 };
 
-static int bad_inode_create (struct inode *dir, struct dentry *dentry,
-               umode_t mode, bool excl)
+static int bad_inode_create(struct user_namespace *mnt_userns,
+                           struct inode *dir, struct dentry *dentry,
+                           umode_t mode, bool excl)
 {
        return -EIO;
 }
@@ -50,14 +51,15 @@ static int bad_inode_unlink(struct inode *dir, struct dentry *dentry)
        return -EIO;
 }
 
-static int bad_inode_symlink (struct inode *dir, struct dentry *dentry,
-               const char *symname)
+static int bad_inode_symlink(struct user_namespace *mnt_userns,
+                            struct inode *dir, struct dentry *dentry,
+                            const char *symname)
 {
        return -EIO;
 }
 
-static int bad_inode_mkdir(struct inode *dir, struct dentry *dentry,
-                       umode_t mode)
+static int bad_inode_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, umode_t mode)
 {
        return -EIO;
 }
@@ -67,13 +69,14 @@ static int bad_inode_rmdir (struct inode *dir, struct dentry *dentry)
        return -EIO;
 }
 
-static int bad_inode_mknod (struct inode *dir, struct dentry *dentry,
-                       umode_t mode, dev_t rdev)
+static int bad_inode_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        return -EIO;
 }
 
-static int bad_inode_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int bad_inode_rename2(struct user_namespace *mnt_userns,
+                            struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry,
                             unsigned int flags)
 {
@@ -86,18 +89,21 @@ static int bad_inode_readlink(struct dentry *dentry, char __user *buffer,
        return -EIO;
 }
 
-static int bad_inode_permission(struct inode *inode, int mask)
+static int bad_inode_permission(struct user_namespace *mnt_userns,
+                               struct inode *inode, int mask)
 {
        return -EIO;
 }
 
-static int bad_inode_getattr(const struct path *path, struct kstat *stat,
+static int bad_inode_getattr(struct user_namespace *mnt_userns,
+                            const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int query_flags)
 {
        return -EIO;
 }
 
-static int bad_inode_setattr(struct dentry *direntry, struct iattr *attrs)
+static int bad_inode_setattr(struct user_namespace *mnt_userns,
+                            struct dentry *direntry, struct iattr *attrs)
 {
        return -EIO;
 }
@@ -140,13 +146,15 @@ static int bad_inode_atomic_open(struct inode *inode, struct dentry *dentry,
        return -EIO;
 }
 
-static int bad_inode_tmpfile(struct inode *inode, struct dentry *dentry,
+static int bad_inode_tmpfile(struct user_namespace *mnt_userns,
+                            struct inode *inode, struct dentry *dentry,
                             umode_t mode)
 {
        return -EIO;
 }
 
-static int bad_inode_set_acl(struct inode *inode, struct posix_acl *acl,
+static int bad_inode_set_acl(struct user_namespace *mnt_userns,
+                            struct inode *inode, struct posix_acl *acl,
                             int type)
 {
        return -EIO;
index d8dfe3a..34d4f68 100644 (file)
@@ -75,8 +75,8 @@ const struct file_operations bfs_dir_operations = {
        .llseek         = generic_file_llseek,
 };
 
-static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                                               bool excl)
+static int bfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, bool excl)
 {
        int err;
        struct inode *inode;
@@ -96,7 +96,7 @@ static int bfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        }
        set_bit(ino, info->si_imap);
        info->si_freei--;
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
        inode->i_blocks = 0;
        inode->i_op = &bfs_file_inops;
@@ -199,9 +199,9 @@ out_brelse:
        return error;
 }
 
-static int bfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags)
+static int bfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags)
 {
        struct inode *old_inode, *new_inode;
        struct buffer_head *old_bh, *new_bh;
index c457334..e1eae7e 100644 (file)
@@ -649,12 +649,24 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
        struct super_block *sb = file_inode(file)->i_sb;
        struct dentry *root = sb->s_root, *dentry;
        int err = 0;
+       struct file *f = NULL;
 
        e = create_entry(buffer, count);
 
        if (IS_ERR(e))
                return PTR_ERR(e);
 
+       if (e->flags & MISC_FMT_OPEN_FILE) {
+               f = open_exec(e->interpreter);
+               if (IS_ERR(f)) {
+                       pr_notice("register: failed to install interpreter file %s\n",
+                                e->interpreter);
+                       kfree(e);
+                       return PTR_ERR(f);
+               }
+               e->interp_file = f;
+       }
+
        inode_lock(d_inode(root));
        dentry = lookup_one_len(e->name, root, strlen(e->name));
        err = PTR_ERR(dentry);
@@ -678,21 +690,6 @@ static ssize_t bm_register_write(struct file *file, const char __user *buffer,
                goto out2;
        }
 
-       if (e->flags & MISC_FMT_OPEN_FILE) {
-               struct file *f;
-
-               f = open_exec(e->interpreter);
-               if (IS_ERR(f)) {
-                       err = PTR_ERR(f);
-                       pr_notice("register: failed to install interpreter file %s\n", e->interpreter);
-                       simple_release_fs(&bm_mnt, &entry_count);
-                       iput(inode);
-                       inode = NULL;
-                       goto out2;
-               }
-               e->interp_file = f;
-       }
-
        e->dentry = dget(dentry);
        inode->i_private = e;
        inode->i_fop = &bm_entry_operations;
@@ -709,6 +706,8 @@ out:
        inode_unlock(d_inode(root));
 
        if (err) {
+               if (f)
+                       filp_close(f, NULL);
                kfree(e);
                return err;
        }
index ec26179..09d6f72 100644 (file)
@@ -118,13 +118,22 @@ int truncate_bdev_range(struct block_device *bdev, fmode_t mode,
        if (!(mode & FMODE_EXCL)) {
                int err = bd_prepare_to_claim(bdev, truncate_bdev_range);
                if (err)
-                       return err;
+                       goto invalidate;
        }
 
        truncate_inode_pages_range(bdev->bd_inode->i_mapping, lstart, lend);
        if (!(mode & FMODE_EXCL))
                bd_abort_claiming(bdev, truncate_bdev_range);
        return 0;
+
+invalidate:
+       /*
+        * Someone else has handle exclusively open. Try invalidating instead.
+        * The 'end' argument is inclusive so the rounding is safe.
+        */
+       return invalidate_inode_pages2_range(bdev->bd_inode->i_mapping,
+                                            lstart >> PAGE_SHIFT,
+                                            lend >> PAGE_SHIFT);
 }
 
 static void set_init_blocksize(struct block_device *bdev)
@@ -221,7 +230,7 @@ static void blkdev_bio_end_io_simple(struct bio *bio)
 
 static ssize_t
 __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
-               int nr_pages)
+               unsigned int nr_pages)
 {
        struct file *file = iocb->ki_filp;
        struct block_device *bdev = I_BDEV(bdev_file_inode(file));
@@ -266,6 +275,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter,
                bio.bi_opf = dio_bio_write_op(iocb);
                task_io_account_write(ret);
        }
+       if (iocb->ki_flags & IOCB_NOWAIT)
+               bio.bi_opf |= REQ_NOWAIT;
        if (iocb->ki_flags & IOCB_HIPRI)
                bio_set_polled(&bio, iocb);
 
@@ -355,8 +366,8 @@ static void blkdev_bio_end_io(struct bio *bio)
        }
 }
 
-static ssize_t
-__blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
+static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+               unsigned int nr_pages)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = bdev_file_inode(file);
@@ -419,11 +430,13 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
                        bio->bi_opf = dio_bio_write_op(iocb);
                        task_io_account_write(bio->bi_iter.bi_size);
                }
+               if (iocb->ki_flags & IOCB_NOWAIT)
+                       bio->bi_opf |= REQ_NOWAIT;
 
                dio->size += bio->bi_iter.bi_size;
                pos += bio->bi_iter.bi_size;
 
-               nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES);
+               nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS);
                if (!nr_pages) {
                        bool polled = false;
 
@@ -486,16 +499,16 @@ __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, int nr_pages)
 static ssize_t
 blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
 {
-       int nr_pages;
+       unsigned int nr_pages;
 
        if (!iov_iter_count(iter))
                return 0;
 
-       nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_PAGES + 1);
-       if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_PAGES)
+       nr_pages = bio_iov_vecs_to_alloc(iter, BIO_MAX_VECS + 1);
+       if (is_sync_kiocb(iocb) && nr_pages <= BIO_MAX_VECS)
                return __blkdev_direct_IO_simple(iocb, iter, nr_pages);
 
-       return __blkdev_direct_IO(iocb, iter, min(nr_pages, BIO_MAX_PAGES));
+       return __blkdev_direct_IO(iocb, iter, bio_max_segs(nr_pages));
 }
 
 static __init int blkdev_init(void)
@@ -1231,13 +1244,13 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate)
 
        lockdep_assert_held(&bdev->bd_mutex);
 
-       clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state);
-
 rescan:
        ret = blk_drop_partitions(bdev);
        if (ret)
                return ret;
 
+       clear_bit(GD_NEED_PART_SCAN, &disk->state);
+
        /*
         * Historically we only set the capacity to zero for devices that
         * support partitions (independ of actually having partitions created).
@@ -1270,7 +1283,7 @@ rescan:
        return ret;
 }
 /*
- * Only exported for for loop and dasd for historic reasons.  Don't use in new
+ * Only exported for loop and dasd for historic reasons.  Don't use in new
  * code!
  */
 EXPORT_SYMBOL_GPL(bdev_disk_changed);
index b634c42..b4fb997 100644 (file)
@@ -7,10 +7,12 @@ subdir-ccflags-y += -Wmissing-format-attribute
 subdir-ccflags-y += -Wmissing-prototypes
 subdir-ccflags-y += -Wold-style-definition
 subdir-ccflags-y += -Wmissing-include-dirs
-subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable)
-subdir-ccflags-y += $(call cc-option, -Wunused-const-variable)
-subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned)
-subdir-ccflags-y += $(call cc-option, -Wstringop-truncation)
+condflags := \
+       $(call cc-option, -Wunused-but-set-variable)            \
+       $(call cc-option, -Wunused-const-variable)              \
+       $(call cc-option, -Wpacked-not-aligned)                 \
+       $(call cc-option, -Wstringop-truncation)
+subdir-ccflags-y += $(condflags)
 # The following turn off the warnings enabled by -Wextra
 subdir-ccflags-y += -Wno-missing-field-initializers
 subdir-ccflags-y += -Wno-sign-compare
index a0af1b9..d95eb5c 100644 (file)
@@ -107,13 +107,15 @@ out:
        return ret;
 }
 
-int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                 struct posix_acl *acl, int type)
 {
        int ret;
        umode_t old_mode = inode->i_mode;
 
        if (type == ACL_TYPE_ACCESS && acl) {
-               ret = posix_acl_update_mode(inode, &inode->i_mode, &acl);
+               ret = posix_acl_update_mode(&init_user_ns, inode,
+                                           &inode->i_mode, &acl);
                if (ret)
                        return ret;
        }
index 5064be5..744b99d 100644 (file)
@@ -1162,6 +1162,11 @@ static int inc_block_group_ro(struct btrfs_block_group *cache, int force)
        spin_lock(&sinfo->lock);
        spin_lock(&cache->lock);
 
+       if (cache->swap_extents) {
+               ret = -ETXTBSY;
+               goto out;
+       }
+
        if (cache->ro) {
                cache->ro++;
                ret = 0;
@@ -2307,7 +2312,7 @@ again:
        }
 
        ret = inc_block_group_ro(cache, 0);
-       if (!do_chunk_alloc)
+       if (!do_chunk_alloc || ret == -ETXTBSY)
                goto unlock_out;
        if (!ret)
                goto out;
@@ -2316,6 +2321,8 @@ again:
        if (ret < 0)
                goto out;
        ret = inc_block_group_ro(cache, 0);
+       if (ret == -ETXTBSY)
+               goto unlock_out;
 out:
        if (cache->flags & BTRFS_BLOCK_GROUP_SYSTEM) {
                alloc_flags = btrfs_get_alloc_profile(fs_info, cache->flags);
@@ -3406,6 +3413,7 @@ int btrfs_free_block_groups(struct btrfs_fs_info *info)
                ASSERT(list_empty(&block_group->io_list));
                ASSERT(list_empty(&block_group->bg_list));
                ASSERT(refcount_read(&block_group->refs) == 1);
+               ASSERT(block_group->swap_extents == 0);
                btrfs_put_block_group(block_group);
 
                spin_lock(&info->block_group_cache_lock);
@@ -3472,3 +3480,26 @@ void btrfs_unfreeze_block_group(struct btrfs_block_group *block_group)
                __btrfs_remove_free_space_cache(block_group->free_space_ctl);
        }
 }
+
+bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg)
+{
+       bool ret = true;
+
+       spin_lock(&bg->lock);
+       if (bg->ro)
+               ret = false;
+       else
+               bg->swap_extents++;
+       spin_unlock(&bg->lock);
+
+       return ret;
+}
+
+void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount)
+{
+       spin_lock(&bg->lock);
+       ASSERT(!bg->ro);
+       ASSERT(bg->swap_extents >= amount);
+       bg->swap_extents -= amount;
+       spin_unlock(&bg->lock);
+}
index 2967842..3ecc337 100644 (file)
@@ -186,6 +186,12 @@ struct btrfs_block_group {
        /* Flag indicating this block group is placed on a sequential zone */
        bool seq_zone;
 
+       /*
+        * Number of extents in this block group used for swap files.
+        * All accesses protected by the spinlock 'lock'.
+        */
+       int swap_extents;
+
        /* Record locked full stripes for RAID5/6 block group */
        struct btrfs_full_stripe_locks_tree full_stripe_locks_root;
 
@@ -312,4 +318,7 @@ static inline int btrfs_block_group_done(struct btrfs_block_group *cache)
 void btrfs_freeze_block_group(struct btrfs_block_group *cache);
 void btrfs_unfreeze_block_group(struct btrfs_block_group *cache);
 
+bool btrfs_inc_block_group_swap_extents(struct btrfs_block_group *bg);
+void btrfs_dec_block_group_swap_extents(struct btrfs_block_group *bg, int amount);
+
 #endif /* BTRFS_BLOCK_GROUP_H */
index 6d203ac..3f4c832 100644 (file)
@@ -141,6 +141,7 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
        struct btrfs_fs_info *fs_info = inode->root->fs_info;
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
        const u32 csum_size = fs_info->csum_size;
+       const u32 sectorsize = fs_info->sectorsize;
        struct page *page;
        unsigned long i;
        char *kaddr;
@@ -154,22 +155,34 @@ static int check_compressed_csum(struct btrfs_inode *inode, struct bio *bio,
        shash->tfm = fs_info->csum_shash;
 
        for (i = 0; i < cb->nr_pages; i++) {
+               u32 pg_offset;
+               u32 bytes_left = PAGE_SIZE;
                page = cb->compressed_pages[i];
 
-               kaddr = kmap_atomic(page);
-               crypto_shash_digest(shash, kaddr, PAGE_SIZE, csum);
-               kunmap_atomic(kaddr);
-
-               if (memcmp(&csum, cb_sum, csum_size)) {
-                       btrfs_print_data_csum_error(inode, disk_start,
-                                       csum, cb_sum, cb->mirror_num);
-                       if (btrfs_io_bio(bio)->device)
-                               btrfs_dev_stat_inc_and_print(
-                                       btrfs_io_bio(bio)->device,
-                                       BTRFS_DEV_STAT_CORRUPTION_ERRS);
-                       return -EIO;
+               /* Determine the remaining bytes inside the page first */
+               if (i == cb->nr_pages - 1)
+                       bytes_left = cb->compressed_len - i * PAGE_SIZE;
+
+               /* Hash through the page sector by sector */
+               for (pg_offset = 0; pg_offset < bytes_left;
+                    pg_offset += sectorsize) {
+                       kaddr = kmap_atomic(page);
+                       crypto_shash_digest(shash, kaddr + pg_offset,
+                                           sectorsize, csum);
+                       kunmap_atomic(kaddr);
+
+                       if (memcmp(&csum, cb_sum, csum_size) != 0) {
+                               btrfs_print_data_csum_error(inode, disk_start,
+                                               csum, cb_sum, cb->mirror_num);
+                               if (btrfs_io_bio(bio)->device)
+                                       btrfs_dev_stat_inc_and_print(
+                                               btrfs_io_bio(bio)->device,
+                                               BTRFS_DEV_STAT_CORRUPTION_ERRS);
+                               return -EIO;
+                       }
+                       cb_sum += csum_size;
+                       disk_start += sectorsize;
                }
-               cb_sum += csum_size;
        }
        return 0;
 }
@@ -640,7 +653,7 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        read_lock(&em_tree->lock);
        em = lookup_extent_mapping(em_tree,
                                   page_offset(bio_first_page_all(bio)),
-                                  PAGE_SIZE);
+                                  fs_info->sectorsize);
        read_unlock(&em_tree->lock);
        if (!em)
                return BLK_STS_IOERR;
@@ -698,19 +711,30 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
        refcount_set(&cb->pending_bios, 1);
 
        for (pg_index = 0; pg_index < nr_pages; pg_index++) {
+               u32 pg_len = PAGE_SIZE;
                int submit = 0;
 
+               /*
+                * To handle subpage case, we need to make sure the bio only
+                * covers the range we need.
+                *
+                * If we're at the last page, truncate the length to only cover
+                * the remaining part.
+                */
+               if (pg_index == nr_pages - 1)
+                       pg_len = min_t(u32, PAGE_SIZE,
+                                       compressed_len - pg_index * PAGE_SIZE);
+
                page = cb->compressed_pages[pg_index];
                page->mapping = inode->i_mapping;
                page->index = em_start >> PAGE_SHIFT;
 
                if (comp_bio->bi_iter.bi_size)
-                       submit = btrfs_bio_fits_in_stripe(page, PAGE_SIZE,
+                       submit = btrfs_bio_fits_in_stripe(page, pg_len,
                                                          comp_bio, 0);
 
                page->mapping = NULL;
-               if (submit || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
-                   PAGE_SIZE) {
+               if (submit || bio_add_page(comp_bio, page, pg_len, 0) < pg_len) {
                        unsigned int nr_sectors;
 
                        ret = btrfs_bio_wq_end_io(fs_info, comp_bio,
@@ -743,9 +767,9 @@ blk_status_t btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
                        comp_bio->bi_private = cb;
                        comp_bio->bi_end_io = end_compressed_bio_read;
 
-                       bio_add_page(comp_bio, page, PAGE_SIZE, 0);
+                       bio_add_page(comp_bio, page, pg_len, 0);
                }
-               cur_disk_byte += PAGE_SIZE;
+               cur_disk_byte += pg_len;
        }
 
        ret = btrfs_bio_wq_end_io(fs_info, comp_bio, BTRFS_WQ_ENDIO_DATA);
@@ -1237,7 +1261,6 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
        unsigned long prev_start_byte;
        unsigned long working_bytes = total_out - buf_start;
        unsigned long bytes;
-       char *kaddr;
        struct bio_vec bvec = bio_iter_iovec(bio, bio->bi_iter);
 
        /*
@@ -1268,9 +1291,8 @@ int btrfs_decompress_buf2page(const char *buf, unsigned long buf_start,
                                PAGE_SIZE - (buf_offset % PAGE_SIZE));
                bytes = min(bytes, working_bytes);
 
-               kaddr = kmap_atomic(bvec.bv_page);
-               memcpy(kaddr + bvec.bv_offset, buf + buf_offset, bytes);
-               kunmap_atomic(kaddr);
+               memcpy_to_page(bvec.bv_page, bvec.bv_offset, buf + buf_offset,
+                              bytes);
                flush_dcache_page(bvec.bv_page);
 
                buf_offset += bytes;
index d56730a..34b929b 100644 (file)
@@ -1365,7 +1365,9 @@ get_old_root(struct btrfs_root *root, u64 time_seq)
                                   "failed to read tree block %llu from get_old_root",
                                   logical);
                } else {
+                       btrfs_tree_read_lock(old);
                        eb = btrfs_clone_extent_buffer(old);
+                       btrfs_tree_read_unlock(old);
                        free_extent_buffer(old);
                }
        } else if (old_root) {
index 3bc00ae..9ae776a 100644 (file)
@@ -524,6 +524,11 @@ struct btrfs_swapfile_pin {
         * points to a struct btrfs_device.
         */
        bool is_block_group;
+       /*
+        * Only used when 'is_block_group' is true and it is the number of
+        * extents used by a swapfile for this block group ('ptr' field).
+        */
+       int bg_extent_count;
 };
 
 bool btrfs_pinned_by_swapfile(struct btrfs_fs_info *fs_info, void *ptr);
@@ -3635,7 +3640,8 @@ static inline int __btrfs_fs_compat_ro(struct btrfs_fs_info *fs_info, u64 flag)
 /* acl.c */
 #ifdef CONFIG_BTRFS_FS_POSIX_ACL
 struct posix_acl *btrfs_get_acl(struct inode *inode, int type);
-int btrfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int btrfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                 struct posix_acl *acl, int type);
 int btrfs_init_acl(struct btrfs_trans_handle *trans,
                   struct inode *inode, struct inode *dir);
 #else
index ec0b50b..bf25401 100644 (file)
@@ -627,7 +627,8 @@ static int btrfs_delayed_inode_reserve_metadata(
         */
        if (!src_rsv || (!trans->bytes_reserved &&
                         src_rsv->type != BTRFS_BLOCK_RSV_DELALLOC)) {
-               ret = btrfs_qgroup_reserve_meta_prealloc(root, num_bytes, true);
+               ret = btrfs_qgroup_reserve_meta(root, num_bytes,
+                                         BTRFS_QGROUP_RSV_META_PREALLOC, true);
                if (ret < 0)
                        return ret;
                ret = btrfs_block_rsv_add(root, dst_rsv, num_bytes,
@@ -649,7 +650,7 @@ static int btrfs_delayed_inode_reserve_metadata(
                                                      btrfs_ino(inode),
                                                      num_bytes, 1);
                } else {
-                       btrfs_qgroup_free_meta_prealloc(root, fs_info->nodesize);
+                       btrfs_qgroup_free_meta_prealloc(root, num_bytes);
                }
                return ret;
        }
index 3a9c1e0..d05f735 100644 (file)
@@ -81,6 +81,9 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info)
        struct btrfs_dev_replace_item *ptr;
        u64 src_devid;
 
+       if (!dev_root)
+               return 0;
+
        path = btrfs_alloc_path();
        if (!path) {
                ret = -ENOMEM;
index 41b718c..289f1f0 100644 (file)
@@ -2387,8 +2387,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info)
        } else {
                set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state);
                fs_info->dev_root = root;
-               btrfs_init_devices_late(fs_info);
        }
+       /* Initialize fs_info for all devices in any case */
+       btrfs_init_devices_late(fs_info);
 
        /* If IGNOREDATACSUMS is set don't bother reading the csum root. */
        if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) {
@@ -3009,6 +3010,21 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
                }
        }
 
+       /*
+        * btrfs_find_orphan_roots() is responsible for finding all the dead
+        * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load
+        * them into the fs_info->fs_roots_radix tree. This must be done before
+        * calling btrfs_orphan_cleanup() on the tree root. If we don't do it
+        * first, then btrfs_orphan_cleanup() will delete a dead root's orphan
+        * item before the root's tree is deleted - this means that if we unmount
+        * or crash before the deletion completes, on the next mount we will not
+        * delete what remains of the tree because the orphan item does not
+        * exists anymore, which is what tells us we have a pending deletion.
+        */
+       ret = btrfs_find_orphan_roots(fs_info);
+       if (ret)
+               goto out;
+
        ret = btrfs_cleanup_fs_roots(fs_info);
        if (ret)
                goto out;
@@ -3068,7 +3084,6 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info)
                }
        }
 
-       ret = btrfs_find_orphan_roots(fs_info);
 out:
        return ret;
 }
index 78ad31a..36a3c97 100644 (file)
@@ -3323,6 +3323,7 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
 
        if (last_ref && btrfs_header_generation(buf) == trans->transid) {
                struct btrfs_block_group *cache;
+               bool must_pin = false;
 
                if (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID) {
                        ret = check_ref_cleanup(trans, buf->start);
@@ -3340,7 +3341,27 @@ void btrfs_free_tree_block(struct btrfs_trans_handle *trans,
                        goto out;
                }
 
-               if (btrfs_is_zoned(fs_info)) {
+               /*
+                * If this is a leaf and there are tree mod log users, we may
+                * have recorded mod log operations that point to this leaf.
+                * So we must make sure no one reuses this leaf's extent before
+                * mod log operations are applied to a node, otherwise after
+                * rewinding a node using the mod log operations we get an
+                * inconsistent btree, as the leaf's extent may now be used as
+                * a node or leaf for another different btree.
+                * We are safe from races here because at this point no other
+                * node or root points to this extent buffer, so if after this
+                * check a new tree mod log user joins, it will not be able to
+                * find a node pointing to this leaf and record operations that
+                * point to this leaf.
+                */
+               if (btrfs_header_level(buf) == 0) {
+                       read_lock(&fs_info->tree_mod_log_lock);
+                       must_pin = !list_empty(&fs_info->tree_mod_seq_list);
+                       read_unlock(&fs_info->tree_mod_log_lock);
+               }
+
+               if (must_pin || btrfs_is_zoned(fs_info)) {
                        btrfs_redirty_list_add(trans->transaction, buf);
                        pin_down_extent(trans, cache, buf->start, buf->len, 1);
                        btrfs_put_block_group(cache);
index 4dfb3ea..910769d 100644 (file)
@@ -2886,6 +2886,35 @@ static void end_page_read(struct page *page, bool uptodate, u64 start, u32 len)
 }
 
 /*
+ * Find extent buffer for a givne bytenr.
+ *
+ * This is for end_bio_extent_readpage(), thus we can't do any unsafe locking
+ * in endio context.
+ */
+static struct extent_buffer *find_extent_buffer_readpage(
+               struct btrfs_fs_info *fs_info, struct page *page, u64 bytenr)
+{
+       struct extent_buffer *eb;
+
+       /*
+        * For regular sectorsize, we can use page->private to grab extent
+        * buffer
+        */
+       if (fs_info->sectorsize == PAGE_SIZE) {
+               ASSERT(PagePrivate(page) && page->private);
+               return (struct extent_buffer *)page->private;
+       }
+
+       /* For subpage case, we need to lookup buffer radix tree */
+       rcu_read_lock();
+       eb = radix_tree_lookup(&fs_info->buffer_radix,
+                              bytenr >> fs_info->sectorsize_bits);
+       rcu_read_unlock();
+       ASSERT(eb);
+       return eb;
+}
+
+/*
  * after a readpage IO is done, we need to:
  * clear the uptodate bits on error
  * set the uptodate bits if things worked
@@ -2996,7 +3025,7 @@ static void end_bio_extent_readpage(struct bio *bio)
                } else {
                        struct extent_buffer *eb;
 
-                       eb = (struct extent_buffer *)page->private;
+                       eb = find_extent_buffer_readpage(fs_info, page, start);
                        set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
                        eb->read_mirror = mirror;
                        atomic_dec(&eb->io_pages);
@@ -3008,12 +3037,23 @@ readpage_ok:
                if (likely(uptodate)) {
                        loff_t i_size = i_size_read(inode);
                        pgoff_t end_index = i_size >> PAGE_SHIFT;
-                       unsigned off;
 
-                       /* Zero out the end if this page straddles i_size */
-                       off = offset_in_page(i_size);
-                       if (page->index == end_index && off)
-                               zero_user_segment(page, off, PAGE_SIZE);
+                       /*
+                        * Zero out the remaining part if this range straddles
+                        * i_size.
+                        *
+                        * Here we should only zero the range inside the bvec,
+                        * not touch anything else.
+                        *
+                        * NOTE: i_size is exclusive while end is inclusive.
+                        */
+                       if (page->index == end_index && i_size <= end) {
+                               u32 zero_start = max(offset_in_page(i_size),
+                                                    offset_in_page(start));
+
+                               zero_user_segment(page, zero_start,
+                                                 offset_in_page(end) + 1);
+                       }
                }
                ASSERT(bio_offset + len > bio_offset);
                bio_offset += len;
@@ -3048,7 +3088,7 @@ struct bio *btrfs_bio_alloc(u64 first_byte)
 {
        struct bio *bio;
 
-       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &btrfs_bioset);
+       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &btrfs_bioset);
        bio->bi_iter.bi_sector = first_byte >> 9;
        btrfs_io_bio_init(btrfs_io_bio(bio));
        return bio;
index be9e390..0e155f0 100644 (file)
@@ -3260,8 +3260,11 @@ reserve_space:
                        goto out;
                ret = btrfs_qgroup_reserve_data(BTRFS_I(inode), &data_reserved,
                                                alloc_start, bytes_to_reserve);
-               if (ret)
+               if (ret) {
+                       unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart,
+                                            lockend, &cached_state);
                        goto out;
+               }
                ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
                                                alloc_end - alloc_start,
                                                i_blocksize(inode),
@@ -3634,7 +3637,7 @@ static ssize_t btrfs_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
                        return ret;
        }
 
-       return generic_file_buffered_read(iocb, to, ret);
+       return filemap_read(iocb, to, ret);
 }
 
 const struct file_operations btrfs_file_operations = {
index 5400294..9988dec 100644 (file)
@@ -2555,7 +2555,12 @@ static int __btrfs_add_free_space_zoned(struct btrfs_block_group *block_group,
        to_unusable = size - to_free;
 
        ctl->free_space += to_free;
-       block_group->zone_unusable += to_unusable;
+       /*
+        * If the block group is read-only, we should account freed space into
+        * bytes_readonly.
+        */
+       if (!block_group->ro)
+               block_group->zone_unusable += to_unusable;
        spin_unlock(&ctl->tree_lock);
        if (!used) {
                spin_lock(&block_group->lock);
@@ -2801,8 +2806,10 @@ static void __btrfs_return_cluster_to_free_space(
        struct rb_node *node;
 
        spin_lock(&cluster->lock);
-       if (cluster->block_group != block_group)
-               goto out;
+       if (cluster->block_group != block_group) {
+               spin_unlock(&cluster->lock);
+               return;
+       }
 
        cluster->block_group = NULL;
        cluster->window_start = 0;
@@ -2840,8 +2847,6 @@ static void __btrfs_return_cluster_to_free_space(
                                   entry->offset, &entry->offset_index, bitmap);
        }
        cluster->root = RB_ROOT;
-
-out:
        spin_unlock(&cluster->lock);
        btrfs_put_block_group(block_group);
 }
@@ -3125,8 +3130,6 @@ u64 btrfs_alloc_from_cluster(struct btrfs_block_group *block_group,
                        entry->bytes -= bytes;
                }
 
-               if (entry->bytes == 0)
-                       rb_erase(&entry->offset_index, &cluster->root);
                break;
        }
 out:
@@ -3143,7 +3146,10 @@ out:
        ctl->free_space -= bytes;
        if (!entry->bitmap && !btrfs_free_space_trimmed(entry))
                ctl->discardable_bytes[BTRFS_STAT_CURR] -= bytes;
+
+       spin_lock(&cluster->lock);
        if (entry->bytes == 0) {
+               rb_erase(&entry->offset_index, &cluster->root);
                ctl->free_extents--;
                if (entry->bitmap) {
                        kmem_cache_free(btrfs_free_space_bitmap_cachep,
@@ -3156,6 +3162,7 @@ out:
                kmem_cache_free(btrfs_free_space_cachep, entry);
        }
 
+       spin_unlock(&cluster->lock);
        spin_unlock(&ctl->tree_lock);
 
        return ret;
index 535abf8..a520775 100644 (file)
@@ -1674,9 +1674,6 @@ next_slot:
                         */
                        btrfs_release_path(path);
 
-                       /* If extent is RO, we must COW it */
-                       if (btrfs_extent_readonly(fs_info, disk_bytenr))
-                               goto out_check;
                        ret = btrfs_cross_ref_exist(root, ino,
                                                    found_key.offset -
                                                    extent_offset, disk_bytenr, false);
@@ -1723,6 +1720,7 @@ next_slot:
                                WARN_ON_ONCE(freespace_inode);
                                goto out_check;
                        }
+                       /* If the extent's block group is RO, we must COW */
                        if (!btrfs_inc_nocow_writers(fs_info, disk_bytenr))
                                goto out_check;
                        nocow = true;
@@ -3101,11 +3099,13 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start,
  * @bio_offset:        offset to the beginning of the bio (in bytes)
  * @page:      page where is the data to be verified
  * @pgoff:     offset inside the page
+ * @start:     logical offset in the file
  *
  * The length of such check is always one sector size.
  */
 static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
-                          u32 bio_offset, struct page *page, u32 pgoff)
+                          u32 bio_offset, struct page *page, u32 pgoff,
+                          u64 start)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
        SHASH_DESC_ON_STACK(shash, fs_info->csum_shash);
@@ -3132,8 +3132,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio,
        kunmap_atomic(kaddr);
        return 0;
 zeroit:
-       btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff,
-                                   csum, csum_expected, io_bio->mirror_num);
+       btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected,
+                                   io_bio->mirror_num);
        if (io_bio->device)
                btrfs_dev_stat_inc_and_print(io_bio->device,
                                             BTRFS_DEV_STAT_CORRUPTION_ERRS);
@@ -3186,7 +3186,8 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset,
             pg_off += sectorsize, bio_offset += sectorsize) {
                int ret;
 
-               ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off);
+               ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off,
+                                     page_offset(page) + pg_off);
                if (ret < 0)
                        return -EIO;
        }
@@ -5212,7 +5213,8 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
        return ret;
 }
 
-static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int btrfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                        struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct btrfs_root *root = BTRFS_I(inode)->root;
@@ -5221,7 +5223,7 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        if (btrfs_root_readonly(root))
                return -EROFS;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
@@ -5232,12 +5234,13 @@ static int btrfs_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (attr->ia_valid) {
-               setattr_copy(inode, attr);
+               setattr_copy(&init_user_ns, inode, attr);
                inode_inc_iversion(inode);
                err = btrfs_dirty_inode(inode);
 
                if (!err && attr->ia_valid & ATTR_MODE)
-                       err = posix_acl_chmod(inode, inode->i_mode);
+                       err = posix_acl_chmod(&init_user_ns, inode,
+                                             inode->i_mode);
        }
 
        return err;
@@ -6083,7 +6086,7 @@ static int btrfs_dirty_inode(struct inode *inode)
                return PTR_ERR(trans);
 
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
-       if (ret && ret == -ENOSPC) {
+       if (ret && (ret == -ENOSPC || ret == -EDQUOT)) {
                /* whoops, lets try again with the full transaction */
                btrfs_end_transaction(trans);
                trans = btrfs_start_transaction(root, 1);
@@ -6357,7 +6360,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
        if (ret != 0)
                goto fail_unlock;
 
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode_set_bytes(inode, 0);
 
        inode->i_mtime = current_time(inode);
@@ -6518,8 +6521,8 @@ static int btrfs_add_nondir(struct btrfs_trans_handle *trans,
        return err;
 }
 
-static int btrfs_mknod(struct inode *dir, struct dentry *dentry,
-                       umode_t mode, dev_t rdev)
+static int btrfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -6582,8 +6585,8 @@ out_unlock:
        return err;
 }
 
-static int btrfs_create(struct inode *dir, struct dentry *dentry,
-                       umode_t mode, bool excl)
+static int btrfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -6727,7 +6730,8 @@ fail:
        return err;
 }
 
-static int btrfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int btrfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct inode *inode = NULL;
@@ -7909,7 +7913,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode,
                        ASSERT(pgoff < PAGE_SIZE);
                        if (uptodate &&
                            (!csum || !check_data_csum(inode, io_bio,
-                                       bio_offset, bvec.bv_page, pgoff))) {
+                                                      bio_offset, bvec.bv_page,
+                                                      pgoff, start))) {
                                clean_io_failure(fs_info, failure_tree, io_tree,
                                                 start, bvec.bv_page,
                                                 btrfs_ino(BTRFS_I(inode)),
@@ -8168,10 +8173,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap,
                bio->bi_end_io = btrfs_end_dio_bio;
                btrfs_io_bio(bio)->logical = file_offset;
 
-               WARN_ON_ONCE(write && btrfs_is_zoned(fs_info) &&
-                            fs_info->max_zone_append_size &&
-                            bio_op(bio) != REQ_OP_ZONE_APPEND);
-
                if (bio_op(bio) == REQ_OP_ZONE_APPEND) {
                        status = extract_ordered_extent(BTRFS_I(inode), bio,
                                                        file_offset);
@@ -9007,7 +9008,7 @@ int __init btrfs_init_cachep(void)
 
        btrfs_free_space_bitmap_cachep = kmem_cache_create("btrfs_free_space_bitmap",
                                                        PAGE_SIZE, PAGE_SIZE,
-                                                       SLAB_RED_ZONE, NULL);
+                                                       SLAB_MEM_SPREAD, NULL);
        if (!btrfs_free_space_bitmap_cachep)
                goto fail;
 
@@ -9017,7 +9018,8 @@ fail:
        return -ENOMEM;
 }
 
-static int btrfs_getattr(const struct path *path, struct kstat *stat,
+static int btrfs_getattr(struct user_namespace *mnt_userns,
+                        const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int flags)
 {
        u64 delalloc_bytes;
@@ -9043,7 +9045,7 @@ static int btrfs_getattr(const struct path *path, struct kstat *stat,
                                  STATX_ATTR_IMMUTABLE |
                                  STATX_ATTR_NODUMP);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->dev = BTRFS_I(inode)->root->anon_dev;
 
        spin_lock(&BTRFS_I(inode)->lock);
@@ -9534,9 +9536,9 @@ out_notrans:
        return ret;
 }
 
-static int btrfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
-                        struct inode *new_dir, struct dentry *new_dentry,
-                        unsigned int flags)
+static int btrfs_rename2(struct user_namespace *mnt_userns, struct inode *old_dir,
+                        struct dentry *old_dentry, struct inode *new_dir,
+                        struct dentry *new_dentry, unsigned int flags)
 {
        if (flags & ~(RENAME_NOREPLACE | RENAME_EXCHANGE | RENAME_WHITEOUT))
                return -EINVAL;
@@ -9744,8 +9746,8 @@ out:
        return ret;
 }
 
-static int btrfs_symlink(struct inode *dir, struct dentry *dentry,
-                        const char *symname)
+static int btrfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -9875,6 +9877,7 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        struct btrfs_path *path;
        u64 start = ins->objectid;
        u64 len = ins->offset;
+       int qgroup_released;
        int ret;
 
        memset(&stack_fi, 0, sizeof(stack_fi));
@@ -9887,16 +9890,16 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        btrfs_set_stack_file_extent_compression(&stack_fi, BTRFS_COMPRESS_NONE);
        /* Encryption and other encoding is reserved and all 0 */
 
-       ret = btrfs_qgroup_release_data(inode, file_offset, len);
-       if (ret < 0)
-               return ERR_PTR(ret);
+       qgroup_released = btrfs_qgroup_release_data(inode, file_offset, len);
+       if (qgroup_released < 0)
+               return ERR_PTR(qgroup_released);
 
        if (trans) {
                ret = insert_reserved_file_extent(trans, inode,
                                                  file_offset, &stack_fi,
-                                                 true, ret);
+                                                 true, qgroup_released);
                if (ret)
-                       return ERR_PTR(ret);
+                       goto free_qgroup;
                return trans;
        }
 
@@ -9907,21 +9910,35 @@ static struct btrfs_trans_handle *insert_prealloc_file_extent(
        extent_info.file_offset = file_offset;
        extent_info.extent_buf = (char *)&stack_fi;
        extent_info.is_new_extent = true;
-       extent_info.qgroup_reserved = ret;
+       extent_info.qgroup_reserved = qgroup_released;
        extent_info.insertions = 0;
 
        path = btrfs_alloc_path();
-       if (!path)
-               return ERR_PTR(-ENOMEM);
+       if (!path) {
+               ret = -ENOMEM;
+               goto free_qgroup;
+       }
 
        ret = btrfs_replace_file_extents(&inode->vfs_inode, path, file_offset,
                                     file_offset + len - 1, &extent_info,
                                     &trans);
        btrfs_free_path(path);
        if (ret)
-               return ERR_PTR(ret);
-
+               goto free_qgroup;
        return trans;
+
+free_qgroup:
+       /*
+        * We have released qgroup data range at the beginning of the function,
+        * and normally qgroup_released bytes will be freed when committing
+        * transaction.
+        * But if we error out early, we have to free what we have released
+        * or we leak qgroup data reservation.
+        */
+       btrfs_qgroup_free_refroot(inode->root->fs_info,
+                       inode->root->root_key.objectid, qgroup_released,
+                       BTRFS_QGROUP_RSV_DATA);
+       return ERR_PTR(ret);
 }
 
 static int __btrfs_prealloc_file_range(struct inode *inode, int mode,
@@ -10079,7 +10096,8 @@ static int btrfs_set_page_dirty(struct page *page)
        return __set_page_dirty_nobuffers(page);
 }
 
-static int btrfs_permission(struct inode *inode, int mask)
+static int btrfs_permission(struct user_namespace *mnt_userns,
+                           struct inode *inode, int mask)
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        umode_t mode = inode->i_mode;
@@ -10091,10 +10109,11 @@ static int btrfs_permission(struct inode *inode, int mask)
                if (BTRFS_I(inode)->flags & BTRFS_INODE_READONLY)
                        return -EACCES;
        }
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
-static int btrfs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int btrfs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
 {
        struct btrfs_fs_info *fs_info = btrfs_sb(dir->i_sb);
        struct btrfs_trans_handle *trans;
@@ -10194,6 +10213,7 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
        sp->ptr = ptr;
        sp->inode = inode;
        sp->is_block_group = is_block_group;
+       sp->bg_extent_count = 1;
 
        spin_lock(&fs_info->swapfile_pins_lock);
        p = &fs_info->swapfile_pins.rb_node;
@@ -10207,6 +10227,8 @@ static int btrfs_add_swapfile_pin(struct inode *inode, void *ptr,
                           (sp->ptr == entry->ptr && sp->inode > entry->inode)) {
                        p = &(*p)->rb_right;
                } else {
+                       if (is_block_group)
+                               entry->bg_extent_count++;
                        spin_unlock(&fs_info->swapfile_pins_lock);
                        kfree(sp);
                        return 1;
@@ -10232,8 +10254,11 @@ static void btrfs_free_swapfile_pins(struct inode *inode)
                sp = rb_entry(node, struct btrfs_swapfile_pin, node);
                if (sp->inode == inode) {
                        rb_erase(&sp->node, &fs_info->swapfile_pins);
-                       if (sp->is_block_group)
+                       if (sp->is_block_group) {
+                               btrfs_dec_block_group_swap_extents(sp->ptr,
+                                                          sp->bg_extent_count);
                                btrfs_put_block_group(sp->ptr);
+                       }
                        kfree(sp);
                }
                node = next;
@@ -10294,7 +10319,8 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                               sector_t *span)
 {
        struct inode *inode = file_inode(file);
-       struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+       struct btrfs_fs_info *fs_info = root->fs_info;
        struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
        struct extent_state *cached_state = NULL;
        struct extent_map *em = NULL;
@@ -10345,13 +10371,27 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
           "cannot activate swapfile while exclusive operation is running");
                return -EBUSY;
        }
+
+       /*
+        * Prevent snapshot creation while we are activating the swap file.
+        * We do not want to race with snapshot creation. If snapshot creation
+        * already started before we bumped nr_swapfiles from 0 to 1 and
+        * completes before the first write into the swap file after it is
+        * activated, than that write would fallback to COW.
+        */
+       if (!btrfs_drew_try_write_lock(&root->snapshot_lock)) {
+               btrfs_exclop_finish(fs_info);
+               btrfs_warn(fs_info,
+          "cannot activate swapfile because snapshot creation is in progress");
+               return -EINVAL;
+       }
        /*
         * Snapshots can create extents which require COW even if NODATACOW is
         * set. We use this counter to prevent snapshots. We must increment it
         * before walking the extents because we don't want a concurrent
         * snapshot to run after we've already checked the extents.
         */
-       atomic_inc(&BTRFS_I(inode)->root->nr_swapfiles);
+       atomic_inc(&root->nr_swapfiles);
 
        isize = ALIGN_DOWN(inode->i_size, fs_info->sectorsize);
 
@@ -10448,6 +10488,17 @@ static int btrfs_swap_activate(struct swap_info_struct *sis, struct file *file,
                        goto out;
                }
 
+               if (!btrfs_inc_block_group_swap_extents(bg)) {
+                       btrfs_warn(fs_info,
+                          "block group for swapfile at %llu is read-only%s",
+                          bg->start,
+                          atomic_read(&fs_info->scrubs_running) ?
+                                      " (scrub running)" : "");
+                       btrfs_put_block_group(bg);
+                       ret = -EINVAL;
+                       goto out;
+               }
+
                ret = btrfs_add_swapfile_pin(inode, bg, true);
                if (ret) {
                        btrfs_put_block_group(bg);
@@ -10486,6 +10537,8 @@ out:
        if (ret)
                btrfs_swap_deactivate(file);
 
+       btrfs_drew_write_unlock(&root->snapshot_lock);
+
        btrfs_exclop_finish(fs_info);
 
        if (ret)
index a8c60d4..e8d53fe 100644 (file)
@@ -213,7 +213,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
        const char *comp = NULL;
        u32 binode_flags;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EPERM;
 
        if (btrfs_root_readonly(root))
@@ -429,7 +429,7 @@ static int btrfs_ioctl_fssetxattr(struct file *file, void __user *arg)
        unsigned old_i_flags;
        int ret = 0;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EPERM;
 
        if (btrfs_root_readonly(root))
@@ -925,13 +925,14 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
        BUG_ON(d_inode(victim->d_parent) != dir);
        audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
-       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
        if (IS_APPEND(dir))
                return -EPERM;
-       if (check_sticky(dir, d_inode(victim)) || IS_APPEND(d_inode(victim)) ||
-           IS_IMMUTABLE(d_inode(victim)) || IS_SWAPFILE(d_inode(victim)))
+       if (check_sticky(&init_user_ns, dir, d_inode(victim)) ||
+           IS_APPEND(d_inode(victim)) || IS_IMMUTABLE(d_inode(victim)) ||
+           IS_SWAPFILE(d_inode(victim)))
                return -EPERM;
        if (isdir) {
                if (!d_is_dir(victim))
@@ -954,7 +955,7 @@ static inline int btrfs_may_create(struct inode *dir, struct dentry *child)
                return -EEXIST;
        if (IS_DEADDIR(dir))
                return -ENOENT;
-       return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
 }
 
 /*
@@ -1871,7 +1872,7 @@ static noinline int __btrfs_ioctl_snap_create(struct file *file,
                        btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
                                   "Snapshot src from another FS");
                        ret = -EXDEV;
-               } else if (!inode_owner_or_capable(src_inode)) {
+               } else if (!inode_owner_or_capable(&init_user_ns, src_inode)) {
                        /*
                         * Subvolume creation is not restricted, but snapshots
                         * are limited to own subvolumes only
@@ -1935,7 +1936,10 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
        if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
                readonly = true;
        if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
-               if (vol_args->size > PAGE_SIZE) {
+               u64 nums;
+
+               if (vol_args->size < sizeof(*inherit) ||
+                   vol_args->size > PAGE_SIZE) {
                        ret = -EINVAL;
                        goto free_args;
                }
@@ -1944,6 +1948,20 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
                        ret = PTR_ERR(inherit);
                        goto free_args;
                }
+
+               if (inherit->num_qgroups > PAGE_SIZE ||
+                   inherit->num_ref_copies > PAGE_SIZE ||
+                   inherit->num_excl_copies > PAGE_SIZE) {
+                       ret = -EINVAL;
+                       goto free_inherit;
+               }
+
+               nums = inherit->num_qgroups + 2 * inherit->num_ref_copies +
+                      2 * inherit->num_excl_copies;
+               if (vol_args->size != struct_size(inherit, qgroups, nums)) {
+                       ret = -EINVAL;
+                       goto free_inherit;
+               }
        }
 
        ret = __btrfs_ioctl_snap_create(file, vol_args->name, vol_args->fd,
@@ -1991,7 +2009,7 @@ static noinline int btrfs_ioctl_subvol_setflags(struct file *file,
        u64 flags;
        int ret = 0;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EPERM;
 
        ret = mnt_want_write_file(file);
@@ -2547,7 +2565,8 @@ static int btrfs_search_path_in_tree_user(struct inode *inode,
                                ret = PTR_ERR(temp_inode);
                                goto out_put;
                        }
-                       ret = inode_permission(temp_inode, MAY_READ | MAY_EXEC);
+                       ret = inode_permission(&init_user_ns, temp_inode,
+                                              MAY_READ | MAY_EXEC);
                        iput(temp_inode);
                        if (ret) {
                                ret = -EACCES;
@@ -3077,7 +3096,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
                if (root == dest)
                        goto out_dput;
 
-               err = inode_permission(inode, MAY_WRITE | MAY_EXEC);
+               err = inode_permission(&init_user_ns, inode,
+                                      MAY_WRITE | MAY_EXEC);
                if (err)
                        goto out_dput;
        }
@@ -3148,7 +3168,7 @@ static int btrfs_ioctl_defrag(struct file *file, void __user *argp)
                 * running and allows defrag on files open in read-only mode.
                 */
                if (!capable(CAP_SYS_ADMIN) &&
-                   inode_permission(inode, MAY_WRITE)) {
+                   inode_permission(&init_user_ns, inode, MAY_WRITE)) {
                        ret = -EPERM;
                        goto out;
                }
@@ -4460,7 +4480,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
        int ret = 0;
        int received_uuid_changed;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EPERM;
 
        ret = mnt_want_write_file(file);
index aa9cd11..9084a95 100644 (file)
@@ -467,7 +467,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
        destlen = min_t(unsigned long, destlen, PAGE_SIZE);
        bytes = min_t(unsigned long, destlen, out_len - start_byte);
 
-       kaddr = kmap_atomic(dest_page);
+       kaddr = kmap_local_page(dest_page);
        memcpy(kaddr, workspace->buf + start_byte, bytes);
 
        /*
@@ -477,7 +477,7 @@ int lzo_decompress(struct list_head *ws, unsigned char *data_in,
         */
        if (bytes < destlen)
                memset(kaddr+bytes, 0, destlen-bytes);
-       kunmap_atomic(kaddr);
+       kunmap_local(kaddr);
 out:
        return ret;
 }
index 808370a..f0b9ef1 100644 (file)
@@ -226,7 +226,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_qgroup_list *list;
 
-       btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
        list_del(&qgroup->dirty);
        while (!list_empty(&qgroup->groups)) {
                list = list_first_entry(&qgroup->groups,
@@ -243,7 +242,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info,
                list_del(&list->next_member);
                kfree(list);
        }
-       kfree(qgroup);
 }
 
 /* must be called with qgroup_lock held */
@@ -569,6 +567,8 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info)
                qgroup = rb_entry(n, struct btrfs_qgroup, node);
                rb_erase(n, &fs_info->qgroup_tree);
                __del_qgroup_rb(fs_info, qgroup);
+               btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
+               kfree(qgroup);
        }
        /*
         * We call btrfs_free_qgroup_config() when unmounting
@@ -1578,6 +1578,14 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid)
        spin_lock(&fs_info->qgroup_lock);
        del_qgroup_rb(fs_info, qgroupid);
        spin_unlock(&fs_info->qgroup_lock);
+
+       /*
+        * Remove the qgroup from sysfs now without holding the qgroup_lock
+        * spinlock, since the sysfs_remove_group() function needs to take
+        * the mutex kernfs_mutex through kernfs_remove_by_name_ns().
+        */
+       btrfs_sysfs_del_one_qgroup(fs_info, qgroup);
+       kfree(qgroup);
 out:
        mutex_unlock(&fs_info->qgroup_ioctl_lock);
        return ret;
@@ -3841,8 +3849,8 @@ static int sub_root_meta_rsv(struct btrfs_root *root, int num_bytes,
        return num_bytes;
 }
 
-static int qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
-                               enum btrfs_qgroup_rsv_type type, bool enforce)
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             enum btrfs_qgroup_rsv_type type, bool enforce)
 {
        struct btrfs_fs_info *fs_info = root->fs_info;
        int ret;
@@ -3873,14 +3881,14 @@ int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
 {
        int ret;
 
-       ret = qgroup_reserve_meta(root, num_bytes, type, enforce);
+       ret = btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
        if (ret <= 0 && ret != -EDQUOT)
                return ret;
 
        ret = try_flush_qgroup(root);
        if (ret < 0)
                return ret;
-       return qgroup_reserve_meta(root, num_bytes, type, enforce);
+       return btrfs_qgroup_reserve_meta(root, num_bytes, type, enforce);
 }
 
 void btrfs_qgroup_free_meta_all_pertrans(struct btrfs_root *root)
index 50dea9a..7283e4f 100644 (file)
@@ -361,6 +361,8 @@ int btrfs_qgroup_release_data(struct btrfs_inode *inode, u64 start, u64 len);
 int btrfs_qgroup_free_data(struct btrfs_inode *inode,
                           struct extent_changeset *reserved, u64 start,
                           u64 len);
+int btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
+                             enum btrfs_qgroup_rsv_type type, bool enforce);
 int __btrfs_qgroup_reserve_meta(struct btrfs_root *root, int num_bytes,
                                enum btrfs_qgroup_rsv_type type, bool enforce);
 /* Reserve metadata space for pertrans and prealloc type */
index 8ec34ec..8c31357 100644 (file)
@@ -249,8 +249,6 @@ int btrfs_alloc_stripe_hash_table(struct btrfs_fs_info *info)
 static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
 {
        int i;
-       char *s;
-       char *d;
        int ret;
 
        ret = alloc_rbio_pages(rbio);
@@ -261,13 +259,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
                if (!rbio->bio_pages[i])
                        continue;
 
-               s = kmap(rbio->bio_pages[i]);
-               d = kmap(rbio->stripe_pages[i]);
-
-               copy_page(d, s);
-
-               kunmap(rbio->bio_pages[i]);
-               kunmap(rbio->stripe_pages[i]);
+               copy_highpage(rbio->stripe_pages[i], rbio->bio_pages[i]);
                SetPageUptodate(rbio->stripe_pages[i]);
        }
        set_bit(RBIO_CACHE_READY_BIT, &rbio->flags);
@@ -2359,16 +2351,21 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
        SetPageUptodate(p_page);
 
        if (has_qstripe) {
+               /* RAID6, allocate and map temp space for the Q stripe */
                q_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
                if (!q_page) {
                        __free_page(p_page);
                        goto cleanup;
                }
                SetPageUptodate(q_page);
+               pointers[rbio->real_stripes - 1] = kmap(q_page);
        }
 
        atomic_set(&rbio->error, 0);
 
+       /* Map the parity stripe just once */
+       pointers[nr_data] = kmap(p_page);
+
        for_each_set_bit(pagenr, rbio->dbitmap, rbio->stripe_npages) {
                struct page *p;
                void *parity;
@@ -2378,16 +2375,8 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
                        pointers[stripe] = kmap(p);
                }
 
-               /* then add the parity stripe */
-               pointers[stripe++] = kmap(p_page);
-
                if (has_qstripe) {
-                       /*
-                        * raid6, add the qstripe and call the
-                        * library function to fill in our p/q
-                        */
-                       pointers[stripe++] = kmap(q_page);
-
+                       /* RAID6, call the library function to fill in our P/Q */
                        raid6_call.gen_syndrome(rbio->real_stripes, PAGE_SIZE,
                                                pointers);
                } else {
@@ -2408,12 +2397,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
 
                for (stripe = 0; stripe < nr_data; stripe++)
                        kunmap(page_in_rbio(rbio, stripe, pagenr, 0));
-               kunmap(p_page);
        }
 
+       kunmap(p_page);
        __free_page(p_page);
-       if (q_page)
+       if (q_page) {
+               kunmap(q_page);
                __free_page(q_page);
+       }
 
 writeback:
        /*
index 20fd4aa..06713a8 100644 (file)
@@ -209,7 +209,7 @@ int btree_readahead_hook(struct extent_buffer *eb, int err)
        /* find extent */
        spin_lock(&fs_info->reada_lock);
        re = radix_tree_lookup(&fs_info->reada_tree,
-                              eb->start >> PAGE_SHIFT);
+                              eb->start >> fs_info->sectorsize_bits);
        if (re)
                re->refcnt++;
        spin_unlock(&fs_info->reada_lock);
@@ -240,7 +240,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
        zone = NULL;
        spin_lock(&fs_info->reada_lock);
        ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-                                    logical >> PAGE_SHIFT, 1);
+                                    logical >> fs_info->sectorsize_bits, 1);
        if (ret == 1 && logical >= zone->start && logical <= zone->end) {
                kref_get(&zone->refcnt);
                spin_unlock(&fs_info->reada_lock);
@@ -283,13 +283,13 @@ static struct reada_zone *reada_find_zone(struct btrfs_device *dev, u64 logical,
 
        spin_lock(&fs_info->reada_lock);
        ret = radix_tree_insert(&dev->reada_zones,
-                               (unsigned long)(zone->end >> PAGE_SHIFT),
-                               zone);
+                       (unsigned long)(zone->end >> fs_info->sectorsize_bits),
+                       zone);
 
        if (ret == -EEXIST) {
                kfree(zone);
                ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
-                                            logical >> PAGE_SHIFT, 1);
+                                       logical >> fs_info->sectorsize_bits, 1);
                if (ret == 1 && logical >= zone->start && logical <= zone->end)
                        kref_get(&zone->refcnt);
                else
@@ -315,7 +315,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_fs_info *fs_info,
        u64 length;
        int real_stripes;
        int nzones = 0;
-       unsigned long index = logical >> PAGE_SHIFT;
+       unsigned long index = logical >> fs_info->sectorsize_bits;
        int dev_replace_is_ongoing;
        int have_zone = 0;
 
@@ -497,7 +497,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
                             struct reada_extent *re)
 {
        int i;
-       unsigned long index = re->logical >> PAGE_SHIFT;
+       unsigned long index = re->logical >> fs_info->sectorsize_bits;
 
        spin_lock(&fs_info->reada_lock);
        if (--re->refcnt) {
@@ -538,11 +538,12 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
 static void reada_zone_release(struct kref *kref)
 {
        struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
+       struct btrfs_fs_info *fs_info = zone->device->fs_info;
 
-       lockdep_assert_held(&zone->device->fs_info->reada_lock);
+       lockdep_assert_held(&fs_info->reada_lock);
 
        radix_tree_delete(&zone->device->reada_zones,
-                         zone->end >> PAGE_SHIFT);
+                         zone->end >> fs_info->sectorsize_bits);
 
        kfree(zone);
 }
@@ -593,7 +594,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
 static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
 {
        int i;
-       unsigned long index = zone->end >> PAGE_SHIFT;
+       unsigned long index = zone->end >> zone->device->fs_info->sectorsize_bits;
 
        for (i = 0; i < zone->ndevs; ++i) {
                struct reada_zone *peer;
@@ -628,7 +629,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
                                             (void **)&zone, index, 1);
                if (ret == 0)
                        break;
-               index = (zone->end >> PAGE_SHIFT) + 1;
+               index = (zone->end >> dev->fs_info->sectorsize_bits) + 1;
                if (zone->locked) {
                        if (zone->elems > top_locked_elems) {
                                top_locked_elems = zone->elems;
@@ -709,7 +710,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
         * plugging to speed things up
         */
        ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-                                    dev->reada_next >> PAGE_SHIFT, 1);
+                               dev->reada_next >> fs_info->sectorsize_bits, 1);
        if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
                ret = reada_pick_zone(dev);
                if (!ret) {
@@ -718,7 +719,7 @@ static int reada_start_machine_dev(struct btrfs_device *dev)
                }
                re = NULL;
                ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
-                                       dev->reada_next >> PAGE_SHIFT, 1);
+                               dev->reada_next >> fs_info->sectorsize_bits, 1);
        }
        if (ret == 0) {
                spin_unlock(&fs_info->reada_lock);
@@ -885,7 +886,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
                                pr_cont(" curr off %llu",
                                        device->reada_next - zone->start);
                        pr_cont("\n");
-                       index = (zone->end >> PAGE_SHIFT) + 1;
+                       index = (zone->end >> fs_info->sectorsize_bits) + 1;
                }
                cnt = 0;
                index = 0;
@@ -910,7 +911,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
                                }
                        }
                        pr_cont("\n");
-                       index = (re->logical >> PAGE_SHIFT) + 1;
+                       index = (re->logical >> fs_info->sectorsize_bits) + 1;
                        if (++cnt > 15)
                                break;
                }
@@ -926,7 +927,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
                if (ret == 0)
                        break;
                if (!re->scheduled) {
-                       index = (re->logical >> PAGE_SHIFT) + 1;
+                       index = (re->logical >> fs_info->sectorsize_bits) + 1;
                        continue;
                }
                pr_debug("re: logical %llu size %u list empty %d scheduled %d",
@@ -942,7 +943,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
                        }
                }
                pr_cont("\n");
-               index = (re->logical >> PAGE_SHIFT) + 1;
+               index = (re->logical >> fs_info->sectorsize_bits) + 1;
        }
        spin_unlock(&fs_info->reada_lock);
 }
index 2b490be..8e026de 100644 (file)
@@ -218,11 +218,11 @@ static void __print_stack_trace(struct btrfs_fs_info *fs_info,
        stack_trace_print(ra->trace, ra->trace_len, 2);
 }
 #else
-static void inline __save_stack_trace(struct ref_action *ra)
+static inline void __save_stack_trace(struct ref_action *ra)
 {
 }
 
-static void inline __print_stack_trace(struct btrfs_fs_info *fs_info,
+static inline void __print_stack_trace(struct btrfs_fs_info *fs_info,
                                       struct ref_action *ra)
 {
        btrfs_err(fs_info, "  ref-verify: no stacktrace support");
index b24396c..762881b 100644 (file)
@@ -106,12 +106,8 @@ static int copy_inline_to_page(struct btrfs_inode *inode,
        set_bit(BTRFS_INODE_NO_DELALLOC_FLUSH, &inode->runtime_flags);
 
        if (comp_type == BTRFS_COMPRESS_NONE) {
-               char *map;
-
-               map = kmap(page);
-               memcpy(map, data_start, datal);
+               memcpy_to_page(page, 0, data_start, datal);
                flush_dcache_page(page);
-               kunmap(page);
        } else {
                ret = btrfs_decompress(comp_type, data_start, page, 0,
                                       inline_size, datal);
@@ -553,6 +549,24 @@ process_slot:
                 */
                btrfs_release_path(path);
 
+               /*
+                * When using NO_HOLES and we are cloning a range that covers
+                * only a hole (no extents) into a range beyond the current
+                * i_size, punching a hole in the target range will not create
+                * an extent map defining a hole, because the range starts at or
+                * beyond current i_size. If the file previously had an i_size
+                * greater than the new i_size set by this clone operation, we
+                * need to make sure the next fsync is a full fsync, so that it
+                * detects and logs a hole covering a range from the current
+                * i_size to the new i_size. If the clone range covers extents,
+                * besides a hole, then we know the full sync flag was already
+                * set by previous calls to btrfs_replace_file_extents() that
+                * replaced file extent items.
+                */
+               if (last_dest_end >= i_size_read(inode))
+                       set_bit(BTRFS_INODE_NEEDS_FULL_SYNC,
+                               &BTRFS_I(inode)->runtime_flags);
+
                ret = btrfs_replace_file_extents(inode, path, last_dest_end,
                                destoff + len - 1, NULL, &trans);
                if (ret)
index 582df11..3d9088e 100644 (file)
@@ -1428,7 +1428,7 @@ static void scrub_recheck_block_on_raid56(struct btrfs_fs_info *fs_info,
        if (!first_page->dev->bdev)
                goto out;
 
-       bio = btrfs_io_bio_alloc(BIO_MAX_PAGES);
+       bio = btrfs_io_bio_alloc(BIO_MAX_VECS);
        bio_set_dev(bio, first_page->dev->bdev);
 
        for (page_num = 0; page_num < sblock->page_count; page_num++) {
@@ -3767,6 +3767,13 @@ int scrub_enumerate_chunks(struct scrub_ctx *sctx,
                         * commit_transactions.
                         */
                        ro_set = 0;
+               } else if (ret == -ETXTBSY) {
+                       btrfs_warn(fs_info,
+                  "skipping scrub of block group %llu due to active swapfile",
+                                  cache->start);
+                       scrub_pause_off(fs_info);
+                       ret = 0;
+                       goto skip_unfreeze;
                } else {
                        btrfs_warn(fs_info,
                                   "failed setting block group ro: %d", ret);
@@ -3862,7 +3869,7 @@ done:
                } else {
                        spin_unlock(&cache->lock);
                }
-
+skip_unfreeze:
                btrfs_unfreeze_block_group(cache);
                btrfs_put_block_group(cache);
                if (ret)
index f878782..8f32385 100644 (file)
@@ -4932,7 +4932,6 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
        struct btrfs_fs_info *fs_info = root->fs_info;
        struct inode *inode;
        struct page *page;
-       char *addr;
        pgoff_t index = offset >> PAGE_SHIFT;
        pgoff_t last_index;
        unsigned pg_offset = offset_in_page(offset);
@@ -4985,10 +4984,8 @@ static int put_file_data(struct send_ctx *sctx, u64 offset, u32 len)
                        }
                }
 
-               addr = kmap(page);
-               memcpy(sctx->send_buf + sctx->send_size, addr + pg_offset,
-                      cur_len);
-               kunmap(page);
+               memcpy_from_page(sctx->send_buf + sctx->send_size, page,
+                                pg_offset, cur_len);
                unlock_page(page);
                put_page(page);
                index++;
index f843564..f7a4ad8 100644 (file)
@@ -1918,8 +1918,8 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
        btrfs_resize_thread_pool(fs_info,
                fs_info->thread_pool_size, old_thread_pool_size);
 
-       if (btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
-           btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
+       if ((bool)btrfs_test_opt(fs_info, FREE_SPACE_TREE) !=
+           (bool)btrfs_fs_compat_ro(fs_info, FREE_SPACE_TREE) &&
            (!sb_rdonly(sb) || (*flags & SB_RDONLY))) {
                btrfs_warn(fs_info,
                "remount supports changing free space tree only from ro to rw");
index 6bd97bd..3a4099a 100644 (file)
@@ -62,7 +62,7 @@ struct inode *btrfs_new_test_inode(void)
        BTRFS_I(inode)->location.type = BTRFS_INODE_ITEM_KEY;
        BTRFS_I(inode)->location.objectid = BTRFS_FIRST_FREE_OBJECTID;
        BTRFS_I(inode)->location.offset = 0;
-       inode_init_owner(inode, NULL, S_IFREG);
+       inode_init_owner(&init_user_ns, inode, NULL, S_IFREG);
 
        return inode;
 }
index 582061c..f4ade82 100644 (file)
@@ -1453,22 +1453,14 @@ static int check_extent_data_ref(struct extent_buffer *leaf,
                return -EUCLEAN;
        }
        for (; ptr < end; ptr += sizeof(*dref)) {
-               u64 root_objectid;
-               u64 owner;
                u64 offset;
-               u64 hash;
 
+               /*
+                * We cannot check the extent_data_ref hash due to possible
+                * overflow from the leaf due to hash collisions.
+                */
                dref = (struct btrfs_extent_data_ref *)ptr;
-               root_objectid = btrfs_extent_data_ref_root(leaf, dref);
-               owner = btrfs_extent_data_ref_objectid(leaf, dref);
                offset = btrfs_extent_data_ref_offset(leaf, dref);
-               hash = hash_extent_data_ref(root_objectid, owner, offset);
-               if (unlikely(hash != key->offset)) {
-                       extent_err(leaf, slot,
-       "invalid extent data ref hash, item has 0x%016llx key has 0x%016llx",
-                                  hash, key->offset);
-                       return -EUCLEAN;
-               }
                if (unlikely(!IS_ALIGNED(offset, leaf->fs_info->sectorsize))) {
                        extent_err(leaf, slot,
        "invalid extent data backref offset, have %llu expect aligned to %u",
index d90695c..92a3686 100644 (file)
@@ -3169,23 +3169,20 @@ int btrfs_sync_log(struct btrfs_trans_handle *trans,
 
        mutex_lock(&log_root_tree->log_mutex);
 
-       index2 = log_root_tree->log_transid % 2;
-       list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
-       root_log_ctx.log_transid = log_root_tree->log_transid;
-
        if (btrfs_is_zoned(fs_info)) {
-               mutex_lock(&fs_info->tree_root->log_mutex);
                if (!log_root_tree->node) {
                        ret = btrfs_alloc_log_tree_node(trans, log_root_tree);
                        if (ret) {
-                               mutex_unlock(&fs_info->tree_log_mutex);
                                mutex_unlock(&log_root_tree->log_mutex);
                                goto out;
                        }
                }
-               mutex_unlock(&fs_info->tree_root->log_mutex);
        }
 
+       index2 = log_root_tree->log_transid % 2;
+       list_add_tail(&root_log_ctx.list, &log_root_tree->log_ctxs[index2]);
+       root_log_ctx.log_transid = log_root_tree->log_transid;
+
        /*
         * Now we are safe to update the log_root_tree because we're under the
         * log_mutex, and we're a current writer so we're holding the commit
index bc3b33e..1c6810b 100644 (file)
@@ -7448,6 +7448,9 @@ static int btrfs_device_init_dev_stats(struct btrfs_device *device,
        int item_size;
        int i, ret, slot;
 
+       if (!device->fs_info->dev_root)
+               return 0;
+
        key.objectid = BTRFS_DEV_STATS_OBJECTID;
        key.type = BTRFS_PERSISTENT_ITEM_KEY;
        key.offset = device->devid;
index af6246f..8a45142 100644 (file)
@@ -229,11 +229,33 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
 {
        struct btrfs_root *root = BTRFS_I(inode)->root;
        struct btrfs_trans_handle *trans;
+       const bool start_trans = (current->journal_info == NULL);
        int ret;
 
-       trans = btrfs_start_transaction(root, 2);
-       if (IS_ERR(trans))
-               return PTR_ERR(trans);
+       if (start_trans) {
+               /*
+                * 1 unit for inserting/updating/deleting the xattr
+                * 1 unit for the inode item update
+                */
+               trans = btrfs_start_transaction(root, 2);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
+       } else {
+               /*
+                * This can happen when smack is enabled and a directory is being
+                * created. It happens through d_instantiate_new(), which calls
+                * smack_d_instantiate(), which in turn calls __vfs_setxattr() to
+                * set the transmute xattr (XATTR_NAME_SMACKTRANSMUTE) on the
+                * inode. We have already reserved space for the xattr and inode
+                * update at btrfs_mkdir(), so just use the transaction handle.
+                * We don't join or start a transaction, as that will reset the
+                * block_rsv of the handle and trigger a warning for the start
+                * case.
+                */
+               ASSERT(strncmp(name, XATTR_SECURITY_PREFIX,
+                              XATTR_SECURITY_PREFIX_LEN) == 0);
+               trans = current->journal_info;
+       }
 
        ret = btrfs_setxattr(trans, inode, name, value, size, flags);
        if (ret)
@@ -244,7 +266,8 @@ int btrfs_setxattr_trans(struct inode *inode, const char *name,
        ret = btrfs_update_inode(trans, root, BTRFS_I(inode));
        BUG_ON(ret);
 out:
-       btrfs_end_transaction(trans);
+       if (start_trans)
+               btrfs_end_transaction(trans);
        return ret;
 }
 
@@ -362,6 +385,7 @@ static int btrfs_xattr_handler_get(const struct xattr_handler *handler,
 }
 
 static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, const void *buffer,
                                   size_t size, int flags)
@@ -371,6 +395,7 @@ static int btrfs_xattr_handler_set(const struct xattr_handler *handler,
 }
 
 static int btrfs_xattr_handler_set_prop(const struct xattr_handler *handler,
+                                       struct user_namespace *mnt_userns,
                                        struct dentry *unused, struct inode *inode,
                                        const char *name, const void *value,
                                        size_t size, int flags)
index 05615a1..d524acf 100644 (file)
@@ -432,9 +432,8 @@ int zlib_decompress(struct list_head *ws, unsigned char *data_in,
                            PAGE_SIZE - (buf_offset % PAGE_SIZE));
                bytes = min(bytes, bytes_left);
 
-               kaddr = kmap_atomic(dest_page);
-               memcpy(kaddr + pg_offset, workspace->buf + buf_offset, bytes);
-               kunmap_atomic(kaddr);
+               memcpy_to_page(dest_page, pg_offset,
+                              workspace->buf + buf_offset, bytes);
 
                pg_offset += bytes;
                bytes_left -= bytes;
index d0eb0c8..1f972b7 100644 (file)
@@ -269,7 +269,7 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device)
        sector_t sector = 0;
        struct blk_zone *zones = NULL;
        unsigned int i, nreported = 0, nr_zones;
-       unsigned int zone_sectors;
+       sector_t zone_sectors;
        char *model, *emulated;
        int ret;
 
@@ -658,7 +658,7 @@ int btrfs_sb_log_location_bdev(struct block_device *bdev, int mirror, int rw,
                               u64 *bytenr_ret)
 {
        struct blk_zone zones[BTRFS_NR_SB_LOG_ZONES];
-       unsigned int zone_sectors;
+       sector_t zone_sectors;
        u32 sb_zone;
        int ret;
        u8 zone_sectors_shift;
index 9a48716..8e9626d 100644 (file)
@@ -688,10 +688,8 @@ int zstd_decompress(struct list_head *ws, unsigned char *data_in,
                bytes = min_t(unsigned long, destlen - pg_offset,
                                workspace->out_buf.size - buf_offset);
 
-               kaddr = kmap_atomic(dest_page);
-               memcpy(kaddr + pg_offset, workspace->out_buf.dst + buf_offset,
-                               bytes);
-               kunmap_atomic(kaddr);
+               memcpy_to_page(dest_page, pg_offset,
+                              workspace->out_buf.dst + buf_offset, bytes);
 
                pg_offset += bytes;
        }
index 32647d2..0cb7ffd 100644 (file)
@@ -847,7 +847,8 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
        if (retry)
                gfp |= __GFP_NOFAIL;
 
-       memcg = get_mem_cgroup_from_page(page);
+       /* The page lock pins the memcg */
+       memcg = page_memcg(page);
        old_memcg = set_active_memcg(memcg);
 
        head = NULL;
@@ -868,7 +869,6 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
        }
 out:
        set_active_memcg(old_memcg);
-       mem_cgroup_put(memcg);
        return head;
 /*
  * In case anything failed, we just free everything we got.
@@ -2083,7 +2083,8 @@ static int __block_commit_write(struct inode *inode, struct page *page,
                        set_buffer_uptodate(bh);
                        mark_buffer_dirty(bh);
                }
-               clear_buffer_new(bh);
+               if (buffer_new(bh))
+                       clear_buffer_new(bh);
 
                block_start = block_end;
                bh = bh->b_this_page;
index dfb14db..38bb776 100644 (file)
@@ -118,6 +118,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache)
        cache->mnt = path.mnt;
        root = path.dentry;
 
+       ret = -EINVAL;
+       if (mnt_user_ns(path.mnt) != &init_user_ns) {
+               pr_warn("File cache on idmapped mounts not supported");
+               goto error_unsupported;
+       }
+
        /* check parameters */
        ret = -EOPNOTSUPP;
        if (d_is_negative(root) ||
index 4cea5fb..5efa6a3 100644 (file)
@@ -470,14 +470,14 @@ static int cachefiles_attr_changed(struct fscache_object *_object)
                _debug("discard tail %llx", oi_size);
                newattrs.ia_valid = ATTR_SIZE;
                newattrs.ia_size = oi_size & PAGE_MASK;
-               ret = notify_change(object->backer, &newattrs, NULL);
+               ret = notify_change(&init_user_ns, object->backer, &newattrs, NULL);
                if (ret < 0)
                        goto truncate_failed;
        }
 
        newattrs.ia_valid = ATTR_SIZE;
        newattrs.ia_size = ni_size;
-       ret = notify_change(object->backer, &newattrs, NULL);
+       ret = notify_change(&init_user_ns, object->backer, &newattrs, NULL);
 
 truncate_failed:
        inode_unlock(d_inode(object->backer));
index ecc8ecb..7bf0732 100644 (file)
@@ -311,7 +311,8 @@ static int cachefiles_bury_object(struct cachefiles_cache *cache,
                        cachefiles_io_error(cache, "Unlink security error");
                } else {
                        trace_cachefiles_unlink(object, rep, why);
-                       ret = vfs_unlink(d_inode(dir), rep, NULL);
+                       ret = vfs_unlink(&init_user_ns, d_inode(dir), rep,
+                                        NULL);
 
                        if (preemptive)
                                cachefiles_mark_object_buried(cache, rep, why);
@@ -412,9 +413,16 @@ try_again:
        if (ret < 0) {
                cachefiles_io_error(cache, "Rename security error %d", ret);
        } else {
+               struct renamedata rd = {
+                       .old_mnt_userns = &init_user_ns,
+                       .old_dir        = d_inode(dir),
+                       .old_dentry     = rep,
+                       .new_mnt_userns = &init_user_ns,
+                       .new_dir        = d_inode(cache->graveyard),
+                       .new_dentry     = grave,
+               };
                trace_cachefiles_rename(object, rep, grave, why);
-               ret = vfs_rename(d_inode(dir), rep,
-                                d_inode(cache->graveyard), grave, NULL, 0);
+               ret = vfs_rename(&rd);
                if (ret != 0 && ret != -ENOMEM)
                        cachefiles_io_error(cache,
                                            "Rename failed with error %d", ret);
@@ -561,7 +569,7 @@ lookup_again:
                        if (ret < 0)
                                goto create_error;
                        start = jiffies;
-                       ret = vfs_mkdir(d_inode(dir), next, 0);
+                       ret = vfs_mkdir(&init_user_ns, d_inode(dir), next, 0);
                        cachefiles_hist(cachefiles_mkdir_histogram, start);
                        if (!key)
                                trace_cachefiles_mkdir(object, next, ret);
@@ -597,7 +605,8 @@ lookup_again:
                        if (ret < 0)
                                goto create_error;
                        start = jiffies;
-                       ret = vfs_create(d_inode(dir), next, S_IFREG, true);
+                       ret = vfs_create(&init_user_ns, d_inode(dir), next,
+                                        S_IFREG, true);
                        cachefiles_hist(cachefiles_create_histogram, start);
                        trace_cachefiles_create(object, next, ret);
                        if (ret < 0)
@@ -791,7 +800,7 @@ retry:
                ret = security_path_mkdir(&path, subdir, 0700);
                if (ret < 0)
                        goto mkdir_error;
-               ret = vfs_mkdir(d_inode(dir), subdir, 0700);
+               ret = vfs_mkdir(&init_user_ns, d_inode(dir), subdir, 0700);
                if (ret < 0)
                        goto mkdir_error;
 
index e027c71..8ffc40e 100644 (file)
@@ -24,17 +24,16 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode,
                container_of(wait, struct cachefiles_one_read, monitor);
        struct cachefiles_object *object;
        struct fscache_retrieval *op = monitor->op;
-       struct wait_bit_key *key = _key;
+       struct wait_page_key *key = _key;
        struct page *page = wait->private;
 
        ASSERT(key);
 
        _enter("{%lu},%u,%d,{%p,%u}",
               monitor->netfs_page->index, mode, sync,
-              key->flags, key->bit_nr);
+              key->page, key->bit_nr);
 
-       if (key->flags != &page->flags ||
-           key->bit_nr != PG_locked)
+       if (key->page != page || key->bit_nr != PG_locked)
                return 0;
 
        _debug("--- monitor %p %lx ---", page, page->flags);
index 72e4243..a591b5e 100644 (file)
@@ -39,8 +39,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
        _enter("%p{%s}", object, type);
 
        /* attempt to install a type label directly */
-       ret = vfs_setxattr(dentry, cachefiles_xattr_cache, type, 2,
-                          XATTR_CREATE);
+       ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache, type,
+                          2, XATTR_CREATE);
        if (ret == 0) {
                _debug("SET"); /* we succeeded */
                goto error;
@@ -54,7 +54,8 @@ int cachefiles_check_object_type(struct cachefiles_object *object)
        }
 
        /* read the current type label */
-       ret = vfs_getxattr(dentry, cachefiles_xattr_cache, xtype, 3);
+       ret = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache, xtype,
+                          3);
        if (ret < 0) {
                if (ret == -ERANGE)
                        goto bad_type_length;
@@ -110,9 +111,8 @@ int cachefiles_set_object_xattr(struct cachefiles_object *object,
        _debug("SET #%u", auxdata->len);
 
        clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
-       ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
-                          &auxdata->type, auxdata->len,
-                          XATTR_CREATE);
+       ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+                          &auxdata->type, auxdata->len, XATTR_CREATE);
        if (ret < 0 && ret != -ENOMEM)
                cachefiles_io_error_obj(
                        object,
@@ -140,9 +140,8 @@ int cachefiles_update_object_xattr(struct cachefiles_object *object,
        _debug("SET #%u", auxdata->len);
 
        clear_bit(FSCACHE_COOKIE_AUX_UPDATED, &object->fscache.cookie->flags);
-       ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
-                          &auxdata->type, auxdata->len,
-                          XATTR_REPLACE);
+       ret = vfs_setxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
+                          &auxdata->type, auxdata->len, XATTR_REPLACE);
        if (ret < 0 && ret != -ENOMEM)
                cachefiles_io_error_obj(
                        object,
@@ -171,7 +170,7 @@ int cachefiles_check_auxdata(struct cachefiles_object *object)
        if (!auxbuf)
                return -ENOMEM;
 
-       xlen = vfs_getxattr(dentry, cachefiles_xattr_cache,
+       xlen = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
                            &auxbuf->type, 512 + 1);
        ret = -ESTALE;
        if (xlen < 1 ||
@@ -213,7 +212,7 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
        }
 
        /* read the current type label */
-       ret = vfs_getxattr(dentry, cachefiles_xattr_cache,
+       ret = vfs_getxattr(&init_user_ns, dentry, cachefiles_xattr_cache,
                           &auxbuf->type, 512 + 1);
        if (ret < 0) {
                if (ret == -ENODATA)
@@ -270,9 +269,9 @@ int cachefiles_check_object_xattr(struct cachefiles_object *object,
                }
 
                /* update the current label */
-               ret = vfs_setxattr(dentry, cachefiles_xattr_cache,
-                                  &auxdata->type, auxdata->len,
-                                  XATTR_REPLACE);
+               ret = vfs_setxattr(&init_user_ns, dentry,
+                                  cachefiles_xattr_cache, &auxdata->type,
+                                  auxdata->len, XATTR_REPLACE);
                if (ret < 0) {
                        cachefiles_io_error_obj(object,
                                                "Can't update xattr on %lu"
@@ -309,7 +308,7 @@ int cachefiles_remove_object_xattr(struct cachefiles_cache *cache,
 {
        int ret;
 
-       ret = vfs_removexattr(dentry, cachefiles_xattr_cache);
+       ret = vfs_removexattr(&init_user_ns, dentry, cachefiles_xattr_cache);
        if (ret < 0) {
                if (ret == -ENOENT || ret == -ENODATA)
                        ret = 0;
index e046574..529af59 100644 (file)
@@ -82,7 +82,8 @@ retry:
        return acl;
 }
 
-int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int ceph_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type)
 {
        int ret = 0, size = 0;
        const char *name = NULL;
@@ -100,7 +101,8 @@ int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        case ACL_TYPE_ACCESS:
                name = XATTR_NAME_POSIX_ACL_ACCESS;
                if (acl) {
-                       ret = posix_acl_update_mode(inode, &new_mode, &acl);
+                       ret = posix_acl_update_mode(&init_user_ns, inode,
+                                                   &new_mode, &acl);
                        if (ret)
                                goto out;
                }
index 858ee73..83d9358 100644 (file)
@@ -823,8 +823,8 @@ int ceph_handle_notrace_create(struct inode *dir, struct dentry *dentry)
        return PTR_ERR(result);
 }
 
-static int ceph_mknod(struct inode *dir, struct dentry *dentry,
-                     umode_t mode, dev_t rdev)
+static int ceph_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
        struct ceph_mds_request *req;
@@ -878,14 +878,14 @@ out:
        return err;
 }
 
-static int ceph_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      bool excl)
+static int ceph_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
-       return ceph_mknod(dir, dentry, mode, 0);
+       return ceph_mknod(mnt_userns, dir, dentry, mode, 0);
 }
 
-static int ceph_symlink(struct inode *dir, struct dentry *dentry,
-                           const char *dest)
+static int ceph_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, const char *dest)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
        struct ceph_mds_request *req;
@@ -937,7 +937,8 @@ out:
        return err;
 }
 
-static int ceph_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ceph_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(dir->i_sb);
        struct ceph_mds_request *req;
@@ -1183,9 +1184,9 @@ out:
        return err;
 }
 
-static int ceph_rename(struct inode *old_dir, struct dentry *old_dentry,
-                      struct inode *new_dir, struct dentry *new_dentry,
-                      unsigned int flags)
+static int ceph_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(old_dir->i_sb);
        struct ceph_mds_request *req;
index 5d20a62..156f849 100644 (file)
@@ -2201,7 +2201,8 @@ int __ceph_setattr(struct inode *inode, struct iattr *attr)
 /*
  * setattr
  */
-int ceph_setattr(struct dentry *dentry, struct iattr *attr)
+int ceph_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
@@ -2210,7 +2211,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        if (ceph_snap(inode) != CEPH_NOSNAP)
                return -EROFS;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err != 0)
                return err;
 
@@ -2225,7 +2226,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
        err = __ceph_setattr(inode, attr);
 
        if (err >= 0 && (attr->ia_valid & ATTR_MODE))
-               err = posix_acl_chmod(inode, attr->ia_mode);
+               err = posix_acl_chmod(&init_user_ns, inode, attr->ia_mode);
 
        return err;
 }
@@ -2284,7 +2285,8 @@ int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
  * Check inode permissions.  We verify we have a valid value for
  * the AUTH cap, then call the generic handler.
  */
-int ceph_permission(struct inode *inode, int mask)
+int ceph_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                   int mask)
 {
        int err;
 
@@ -2294,7 +2296,7 @@ int ceph_permission(struct inode *inode, int mask)
        err = ceph_do_getattr(inode, CEPH_CAP_AUTH_SHARED, false);
 
        if (!err)
-               err = generic_permission(inode, mask);
+               err = generic_permission(&init_user_ns, inode, mask);
        return err;
 }
 
@@ -2331,8 +2333,8 @@ static int statx_to_caps(u32 want, umode_t mode)
  * Get all the attributes. If we have sufficient caps for the requested attrs,
  * then we can avoid talking to the MDS at all.
  */
-int ceph_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int flags)
+int ceph_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct ceph_inode_info *ci = ceph_inode(inode);
@@ -2348,7 +2350,7 @@ int ceph_getattr(const struct path *path, struct kstat *stat,
                        return err;
        }
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->ino = ceph_present_inode(inode);
 
        /*
index 13b0288..c48bb30 100644 (file)
@@ -1000,10 +1000,13 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
 {
        return __ceph_do_getattr(inode, NULL, mask, force);
 }
-extern int ceph_permission(struct inode *inode, int mask);
+extern int ceph_permission(struct user_namespace *mnt_userns,
+                          struct inode *inode, int mask);
 extern int __ceph_setattr(struct inode *inode, struct iattr *attr);
-extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
-extern int ceph_getattr(const struct path *path, struct kstat *stat,
+extern int ceph_setattr(struct user_namespace *mnt_userns,
+                       struct dentry *dentry, struct iattr *attr);
+extern int ceph_getattr(struct user_namespace *mnt_userns,
+                       const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags);
 
 /* xattr.c */
@@ -1064,7 +1067,8 @@ void ceph_release_acl_sec_ctx(struct ceph_acl_sec_ctx *as_ctx);
 #ifdef CONFIG_CEPH_FS_POSIX_ACL
 
 struct posix_acl *ceph_get_acl(struct inode *, int);
-int ceph_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ceph_set_acl(struct user_namespace *mnt_userns,
+                struct inode *inode, struct posix_acl *acl, int type);
 int ceph_pre_init_acls(struct inode *dir, umode_t *mode,
                       struct ceph_acl_sec_ctx *as_ctx);
 void ceph_init_inode_acls(struct inode *inode,
index 2499798..02f59bc 100644 (file)
@@ -1238,6 +1238,7 @@ static int ceph_get_xattr_handler(const struct xattr_handler *handler,
 }
 
 static int ceph_set_xattr_handler(const struct xattr_handler *handler,
+                                 struct user_namespace *mnt_userns,
                                  struct dentry *unused, struct inode *inode,
                                  const char *name, const void *value,
                                  size_t size, int flags)
index fe03cbd..bf52e93 100644 (file)
@@ -18,6 +18,7 @@ config CIFS
        select CRYPTO_AES
        select CRYPTO_LIB_DES
        select KEYS
+       select DNS_RESOLVER
        help
          This is the client VFS module for the SMB3 family of NAS protocols,
          (including support for the most recent, most secure dialect SMB3.1.1)
@@ -112,7 +113,6 @@ config CIFS_WEAK_PW_HASH
 config CIFS_UPCALL
        bool "Kerberos/SPNEGO advanced session setup"
        depends on CIFS
-       select DNS_RESOLVER
        help
          Enables an upcall mechanism for CIFS which accesses userspace helper
          utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets
@@ -179,7 +179,6 @@ config CIFS_DEBUG_DUMP_KEYS
 config CIFS_DFS_UPCALL
        bool "DFS feature support"
        depends on CIFS
-       select DNS_RESOLVER
        help
          Distributed File System (DFS) support is used to access shares
          transparently in an enterprise name space, even if the share
index 5213b20..3ee3b7d 100644 (file)
@@ -10,13 +10,14 @@ cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \
          cifs_unicode.o nterr.o cifsencrypt.o \
          readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \
          smb2ops.o smb2maperror.o smb2transport.o \
-         smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o
+         smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o \
+         dns_resolve.o
 
 cifs-$(CONFIG_CIFS_XATTR) += xattr.o
 
 cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o
 
-cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o
+cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o
 
 cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o
 
index aa697cc..88a7958 100644 (file)
@@ -133,11 +133,12 @@ cifs_dump_channel(struct seq_file *m, int i, struct cifs_chan *chan)
 {
        struct TCP_Server_Info *server = chan->server;
 
-       seq_printf(m, "\t\tChannel %d Number of credits: %d Dialect 0x%x "
-                  "TCP status: %d Instance: %d Local Users To Server: %d "
-                  "SecMode: 0x%x Req On Wire: %d In Send: %d "
-                  "In MaxReq Wait: %d\n",
-                  i+1,
+       seq_printf(m, "\n\n\t\tChannel: %d ConnectionId: 0x%llx"
+                  "\n\t\tNumber of credits: %d Dialect 0x%x"
+                  "\n\t\tTCP status: %d Instance: %d"
+                  "\n\t\tLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d"
+                  "\n\t\tIn Send: %d In MaxReq Wait: %d",
+                  i+1, server->conn_id,
                   server->credits,
                   server->dialect,
                   server->tcpStatus,
@@ -206,7 +207,7 @@ static int cifs_debug_files_proc_show(struct seq_file *m, void *v)
                                                from_kuid(&init_user_ns, cfile->uid),
                                                cfile->dentry);
 #ifdef CONFIG_CIFS_DEBUG2
-                                       seq_printf(m, " 0x%llx\n", cfile->fid.mid);
+                                       seq_printf(m, " %llu\n", cfile->fid.mid);
 #else
                                        seq_printf(m, "\n");
 #endif /* CIFS_DEBUG2 */
@@ -227,7 +228,7 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        struct TCP_Server_Info *server;
        struct cifs_ses *ses;
        struct cifs_tcon *tcon;
-       int i, j;
+       int c, i, j;
 
        seq_puts(m,
                    "Display Internal CIFS Data Structures for Debugging\n"
@@ -275,14 +276,25 @@ static int cifs_debug_data_proc_show(struct seq_file *m, void *v)
        seq_putc(m, '\n');
        seq_printf(m, "CIFSMaxBufSize: %d\n", CIFSMaxBufSize);
        seq_printf(m, "Active VFS Requests: %d\n", GlobalTotalActiveXid);
-       seq_printf(m, "Servers:");
 
-       i = 0;
+       seq_printf(m, "\nServers: ");
+
+       c = 0;
        spin_lock(&cifs_tcp_ses_lock);
        list_for_each(tmp1, &cifs_tcp_ses_list) {
                server = list_entry(tmp1, struct TCP_Server_Info,
                                    tcp_ses_list);
 
+               /* channel info will be printed as a part of sessions below */
+               if (server->is_channel)
+                       continue;
+
+               c++;
+               seq_printf(m, "\n%d) ConnectionId: 0x%llx ",
+                       c, server->conn_id);
+
+               if (server->hostname)
+                       seq_printf(m, "Hostname: %s ", server->hostname);
 #ifdef CONFIG_CIFS_SMB_DIRECT
                if (!server->rdma)
                        goto skip_rdma;
@@ -362,46 +374,48 @@ skip_rdma:
                if (server->posix_ext_supported)
                        seq_printf(m, " posix");
 
-               i++;
+               if (server->rdma)
+                       seq_printf(m, "\nRDMA ");
+               seq_printf(m, "\nTCP status: %d Instance: %d"
+                               "\nLocal Users To Server: %d SecMode: 0x%x Req On Wire: %d",
+                               server->tcpStatus,
+                               server->reconnect_instance,
+                               server->srv_count,
+                               server->sec_mode, in_flight(server));
+
+               seq_printf(m, "\nIn Send: %d In MaxReq Wait: %d",
+                               atomic_read(&server->in_send),
+                               atomic_read(&server->num_waiters));
+
+               seq_printf(m, "\n\n\tSessions: ");
+               i = 0;
                list_for_each(tmp2, &server->smb_ses_list) {
                        ses = list_entry(tmp2, struct cifs_ses,
                                         smb_ses_list);
+                       i++;
                        if ((ses->serverDomain == NULL) ||
                                (ses->serverOS == NULL) ||
                                (ses->serverNOS == NULL)) {
-                               seq_printf(m, "\n%d) Name: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
-                                       i, ses->serverName, ses->ses_count,
+                               seq_printf(m, "\n\t%d) Address: %s Uses: %d Capability: 0x%x\tSession Status: %d ",
+                                       i, ses->ip_addr, ses->ses_count,
                                        ses->capabilities, ses->status);
                                if (ses->session_flags & SMB2_SESSION_FLAG_IS_GUEST)
-                                       seq_printf(m, "Guest\t");
+                                       seq_printf(m, "Guest ");
                                else if (ses->session_flags & SMB2_SESSION_FLAG_IS_NULL)
-                                       seq_printf(m, "Anonymous\t");
+                                       seq_printf(m, "Anonymous ");
                        } else {
                                seq_printf(m,
-                                   "\n%d) Name: %s  Domain: %s Uses: %d OS:"
-                                   " %s\n\tNOS: %s\tCapability: 0x%x\n\tSMB"
-                                   " session status: %d ",
-                               i, ses->serverName, ses->serverDomain,
+                                   "\n\t%d) Name: %s  Domain: %s Uses: %d OS: %s "
+                                   "\n\tNOS: %s\tCapability: 0x%x"
+                                       "\n\tSMB session status: %d ",
+                               i, ses->ip_addr, ses->serverDomain,
                                ses->ses_count, ses->serverOS, ses->serverNOS,
                                ses->capabilities, ses->status);
                        }
 
-                       seq_printf(m,"Security type: %s\n",
+                       seq_printf(m, "\n\tSecurity type: %s ",
                                get_security_type_str(server->ops->select_sectype(server, ses->sectype)));
 
-                       if (server->rdma)
-                               seq_printf(m, "RDMA\n\t");
-                       seq_printf(m, "TCP status: %d Instance: %d\n\tLocal Users To "
-                                  "Server: %d SecMode: 0x%x Req On Wire: %d",
-                                  server->tcpStatus,
-                                  server->reconnect_instance,
-                                  server->srv_count,
-                                  server->sec_mode, in_flight(server));
-
-                       seq_printf(m, " In Send: %d In MaxReq Wait: %d",
-                               atomic_read(&server->in_send),
-                               atomic_read(&server->num_waiters));
-
                        /* dump session id helpful for use with network trace */
                        seq_printf(m, " SessionId: 0x%llx", ses->Suid);
                        if (ses->session_flags & SMB2_SESSION_FLAG_ENCRYPT_DATA)
@@ -414,13 +428,13 @@ skip_rdma:
                                   from_kuid(&init_user_ns, ses->cred_uid));
 
                        if (ses->chan_count > 1) {
-                               seq_printf(m, "\n\n\tExtra Channels: %zu\n",
+                               seq_printf(m, "\n\n\tExtra Channels: %zu ",
                                           ses->chan_count-1);
                                for (j = 1; j < ses->chan_count; j++)
                                        cifs_dump_channel(m, j, &ses->chans[j]);
                        }
 
-                       seq_puts(m, "\n\n\tShares:");
+                       seq_puts(m, "\n\n\tShares: ");
                        j = 0;
 
                        seq_printf(m, "\n\t%d) IPC: ", j);
@@ -437,38 +451,43 @@ skip_rdma:
                                cifs_debug_tcon(m, tcon);
                        }
 
-                       seq_puts(m, "\n\tMIDs:\n");
-
-                       spin_lock(&GlobalMid_Lock);
-                       list_for_each(tmp3, &server->pending_mid_q) {
-                               mid_entry = list_entry(tmp3, struct mid_q_entry,
-                                       qhead);
-                               seq_printf(m, "\tState: %d com: %d pid:"
-                                             " %d cbdata: %p mid %llu\n",
-                                             mid_entry->mid_state,
-                                             le16_to_cpu(mid_entry->command),
-                                             mid_entry->pid,
-                                             mid_entry->callback_data,
-                                             mid_entry->mid);
-                       }
-                       spin_unlock(&GlobalMid_Lock);
-
                        spin_lock(&ses->iface_lock);
                        if (ses->iface_count)
-                               seq_printf(m, "\n\tServer interfaces: %zu\n",
+                               seq_printf(m, "\n\n\tServer interfaces: %zu",
                                           ses->iface_count);
                        for (j = 0; j < ses->iface_count; j++) {
                                struct cifs_server_iface *iface;
 
                                iface = &ses->iface_list[j];
-                               seq_printf(m, "\t%d)", j);
+                               seq_printf(m, "\n\t%d)", j+1);
                                cifs_dump_iface(m, iface);
                                if (is_ses_using_iface(ses, iface))
                                        seq_puts(m, "\t\t[CONNECTED]\n");
                        }
                        spin_unlock(&ses->iface_lock);
                }
+               if (i == 0)
+                       seq_printf(m, "\n\t\t[NONE]");
+
+               seq_puts(m, "\n\n\tMIDs: ");
+               spin_lock(&GlobalMid_Lock);
+               list_for_each(tmp3, &server->pending_mid_q) {
+                       mid_entry = list_entry(tmp3, struct mid_q_entry,
+                                       qhead);
+                       seq_printf(m, "\n\tState: %d com: %d pid:"
+                                       " %d cbdata: %p mid %llu\n",
+                                       mid_entry->mid_state,
+                                       le16_to_cpu(mid_entry->command),
+                                       mid_entry->pid,
+                                       mid_entry->callback_data,
+                                       mid_entry->mid);
+               }
+               spin_unlock(&GlobalMid_Lock);
+               seq_printf(m, "\n--\n");
        }
+       if (c == 0)
+               seq_printf(m, "\n\t[NONE]");
+
        spin_unlock(&cifs_tcp_ses_lock);
        seq_putc(m, '\n');
 
index d35f599..d829b8b 100644 (file)
@@ -248,7 +248,7 @@ nlmsg_fail:
 
 /*
  * Try to find a matching registration for the tcon's server name and share name.
- * Calls to this funciton must be protected by cifs_swnreg_idr_mutex.
+ * Calls to this function must be protected by cifs_swnreg_idr_mutex.
  * TODO Try to avoid memory allocations
  */
 static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
@@ -272,7 +272,7 @@ static struct cifs_swn_reg *cifs_find_swn_reg(struct cifs_tcon *tcon)
        if (IS_ERR(share_name)) {
                int ret;
 
-               ret = PTR_ERR(net_name);
+               ret = PTR_ERR(share_name);
                cifs_dbg(VFS, "%s: failed to extract share name from target '%s': %d\n",
                                __func__, tcon->treeName, ret);
                kfree(net_name);
index 562913e..d178cf8 100644 (file)
@@ -267,10 +267,11 @@ is_well_known_sid(const struct cifs_sid *psid, uint32_t *puid, bool is_group)
        return true; /* well known sid found, uid returned */
 }
 
-static void
+static __u16
 cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
 {
        int i;
+       __u16 size = 1 + 1 + 6;
 
        dst->revision = src->revision;
        dst->num_subauth = min_t(u8, src->num_subauth, SID_MAX_SUB_AUTHORITIES);
@@ -278,6 +279,9 @@ cifs_copy_sid(struct cifs_sid *dst, const struct cifs_sid *src)
                dst->authority[i] = src->authority[i];
        for (i = 0; i < dst->num_subauth; ++i)
                dst->sub_auth[i] = src->sub_auth[i];
+       size += (dst->num_subauth * 4);
+
+       return size;
 }
 
 static int
@@ -521,8 +525,11 @@ exit_cifs_idmap(void)
 }
 
 /* copy ntsd, owner sid, and group sid from a security descriptor to another */
-static void copy_sec_desc(const struct cifs_ntsd *pntsd,
-                               struct cifs_ntsd *pnntsd, __u32 sidsoffset)
+static __u32 copy_sec_desc(const struct cifs_ntsd *pntsd,
+                               struct cifs_ntsd *pnntsd,
+                               __u32 sidsoffset,
+                               struct cifs_sid *pownersid,
+                               struct cifs_sid *pgrpsid)
 {
        struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
        struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
@@ -536,19 +543,25 @@ static void copy_sec_desc(const struct cifs_ntsd *pntsd,
        pnntsd->gsidoffset = cpu_to_le32(sidsoffset + sizeof(struct cifs_sid));
 
        /* copy owner sid */
-       owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+       if (pownersid)
+               owner_sid_ptr = pownersid;
+       else
+               owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->osidoffset));
        nowner_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset);
        cifs_copy_sid(nowner_sid_ptr, owner_sid_ptr);
 
        /* copy group sid */
-       group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+       if (pgrpsid)
+               group_sid_ptr = pgrpsid;
+       else
+               group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
                                le32_to_cpu(pntsd->gsidoffset));
        ngroup_sid_ptr = (struct cifs_sid *)((char *)pnntsd + sidsoffset +
                                        sizeof(struct cifs_sid));
        cifs_copy_sid(ngroup_sid_ptr, group_sid_ptr);
 
-       return;
+       return sidsoffset + (2 * sizeof(struct cifs_sid));
 }
 
 
@@ -663,6 +676,25 @@ static void mode_to_access_flags(umode_t mode, umode_t bits_to_use,
        return;
 }
 
+static __u16 cifs_copy_ace(struct cifs_ace *dst, struct cifs_ace *src, struct cifs_sid *psid)
+{
+       __u16 size = 1 + 1 + 2 + 4;
+
+       dst->type = src->type;
+       dst->flags = src->flags;
+       dst->access_req = src->access_req;
+
+       /* Check if there's a replacement sid specified */
+       if (psid)
+               size += cifs_copy_sid(&dst->sid, psid);
+       else
+               size += cifs_copy_sid(&dst->sid, &src->sid);
+
+       dst->size = cpu_to_le16(size);
+
+       return size;
+}
+
 static __u16 fill_ace_for_sid(struct cifs_ace *pntace,
                        const struct cifs_sid *psid, __u64 nmode,
                        umode_t bits, __u8 access_type,
@@ -907,29 +939,30 @@ unsigned int setup_special_user_owner_ACE(struct cifs_ace *pntace)
        return ace_size;
 }
 
-static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
-                       struct cifs_sid *pgrpsid, __u64 *pnmode, bool modefromsid)
+static void populate_new_aces(char *nacl_base,
+               struct cifs_sid *pownersid,
+               struct cifs_sid *pgrpsid,
+               __u64 *pnmode, u32 *pnum_aces, u16 *pnsize,
+               bool modefromsid)
 {
-       u16 size = 0;
-       u32 num_aces = 0;
-       struct cifs_acl *pnndacl;
        __u64 nmode;
+       u32 num_aces = 0;
+       u16 nsize = 0;
        __u64 user_mode;
        __u64 group_mode;
        __u64 other_mode;
        __u64 deny_user_mode = 0;
        __u64 deny_group_mode = 0;
        bool sticky_set = false;
-
-       pnndacl = (struct cifs_acl *)((char *)pndacl + sizeof(struct cifs_acl));
+       struct cifs_ace *pnntace = NULL;
 
        nmode = *pnmode;
+       num_aces = *pnum_aces;
+       nsize = *pnsize;
 
        if (modefromsid) {
-               struct cifs_ace *pntace =
-                       (struct cifs_ace *)((char *)pnndacl + size);
-
-               size += setup_special_mode_ACE(pntace, nmode);
+               pnntace = (struct cifs_ace *) (nacl_base + nsize);
+               nsize += setup_special_mode_ACE(pnntace, nmode);
                num_aces++;
                goto set_size;
        }
@@ -966,40 +999,172 @@ static int set_chmod_dacl(struct cifs_acl *pndacl, struct cifs_sid *pownersid,
                sticky_set = true;
 
        if (deny_user_mode) {
-               size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                               pownersid, deny_user_mode, 0700, ACCESS_DENIED, false);
+               pnntace = (struct cifs_ace *) (nacl_base + nsize);
+               nsize += fill_ace_for_sid(pnntace, pownersid, deny_user_mode,
+                               0700, ACCESS_DENIED, false);
                num_aces++;
        }
+
        /* Group DENY ACE does not conflict with owner ALLOW ACE. Keep in preferred order*/
        if (deny_group_mode && !(deny_group_mode & (user_mode >> 3))) {
-               size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                               pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+               pnntace = (struct cifs_ace *) (nacl_base + nsize);
+               nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+                               0070, ACCESS_DENIED, false);
                num_aces++;
        }
-       size += fill_ace_for_sid((struct cifs_ace *) ((char *)pnndacl + size),
-                       pownersid, user_mode, 0700, ACCESS_ALLOWED, true);
+
+       pnntace = (struct cifs_ace *) (nacl_base + nsize);
+       nsize += fill_ace_for_sid(pnntace, pownersid, user_mode,
+                       0700, ACCESS_ALLOWED, true);
        num_aces++;
+
        /* Group DENY ACE conflicts with owner ALLOW ACE. So keep it after. */
        if (deny_group_mode && (deny_group_mode & (user_mode >> 3))) {
-               size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                               pgrpsid, deny_group_mode, 0070, ACCESS_DENIED, false);
+               pnntace = (struct cifs_ace *) (nacl_base + nsize);
+               nsize += fill_ace_for_sid(pnntace, pgrpsid, deny_group_mode,
+                               0070, ACCESS_DENIED, false);
                num_aces++;
        }
-       size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                       pgrpsid, group_mode, 0070, ACCESS_ALLOWED, !sticky_set);
+
+       pnntace = (struct cifs_ace *) (nacl_base + nsize);
+       nsize += fill_ace_for_sid(pnntace, pgrpsid, group_mode,
+                       0070, ACCESS_ALLOWED, !sticky_set);
        num_aces++;
-       size += fill_ace_for_sid((struct cifs_ace *)((char *)pnndacl + size),
-                       &sid_everyone, other_mode, 0007, ACCESS_ALLOWED, !sticky_set);
+
+       pnntace = (struct cifs_ace *) (nacl_base + nsize);
+       nsize += fill_ace_for_sid(pnntace, &sid_everyone, other_mode,
+                       0007, ACCESS_ALLOWED, !sticky_set);
        num_aces++;
 
 set_size:
+       *pnum_aces = num_aces;
+       *pnsize = nsize;
+}
+
+static __u16 replace_sids_and_copy_aces(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+               struct cifs_sid *pownersid, struct cifs_sid *pgrpsid,
+               struct cifs_sid *pnownersid, struct cifs_sid *pngrpsid)
+{
+       int i;
+       u16 size = 0;
+       struct cifs_ace *pntace = NULL;
+       char *acl_base = NULL;
+       u32 src_num_aces = 0;
+       u16 nsize = 0;
+       struct cifs_ace *pnntace = NULL;
+       char *nacl_base = NULL;
+       u16 ace_size = 0;
+
+       acl_base = (char *)pdacl;
+       size = sizeof(struct cifs_acl);
+       src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+       nacl_base = (char *)pndacl;
+       nsize = sizeof(struct cifs_acl);
+
+       /* Go through all the ACEs */
+       for (i = 0; i < src_num_aces; ++i) {
+               pntace = (struct cifs_ace *) (acl_base + size);
+               pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+               if (pnownersid && compare_sids(&pntace->sid, pownersid) == 0)
+                       ace_size = cifs_copy_ace(pnntace, pntace, pnownersid);
+               else if (pngrpsid && compare_sids(&pntace->sid, pgrpsid) == 0)
+                       ace_size = cifs_copy_ace(pnntace, pntace, pngrpsid);
+               else
+                       ace_size = cifs_copy_ace(pnntace, pntace, NULL);
+
+               size += le16_to_cpu(pntace->size);
+               nsize += ace_size;
+       }
+
+       return nsize;
+}
+
+static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl,
+               struct cifs_sid *pownersid,     struct cifs_sid *pgrpsid,
+               __u64 *pnmode, bool mode_from_sid)
+{
+       int i;
+       u16 size = 0;
+       struct cifs_ace *pntace = NULL;
+       char *acl_base = NULL;
+       u32 src_num_aces = 0;
+       u16 nsize = 0;
+       struct cifs_ace *pnntace = NULL;
+       char *nacl_base = NULL;
+       u32 num_aces = 0;
+       __u64 nmode;
+       bool new_aces_set = false;
+
+       /* Assuming that pndacl and pnmode are never NULL */
+       nmode = *pnmode;
+       nacl_base = (char *)pndacl;
+       nsize = sizeof(struct cifs_acl);
+
+       /* If pdacl is NULL, we don't have a src. Simply populate new ACL. */
+       if (!pdacl) {
+               populate_new_aces(nacl_base,
+                               pownersid, pgrpsid,
+                               pnmode, &num_aces, &nsize,
+                               mode_from_sid);
+               goto finalize_dacl;
+       }
+
+       acl_base = (char *)pdacl;
+       size = sizeof(struct cifs_acl);
+       src_num_aces = le32_to_cpu(pdacl->num_aces);
+
+       /* Retain old ACEs which we can retain */
+       for (i = 0; i < src_num_aces; ++i) {
+               pntace = (struct cifs_ace *) (acl_base + size);
+
+               if (!new_aces_set && (pntace->flags & INHERITED_ACE)) {
+                       /* Place the new ACEs in between existing explicit and inherited */
+                       populate_new_aces(nacl_base,
+                                       pownersid, pgrpsid,
+                                       pnmode, &num_aces, &nsize,
+                                       mode_from_sid);
+
+                       new_aces_set = true;
+               }
+
+               /* If it's any one of the ACE we're replacing, skip! */
+               if (((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) ||
+                               (compare_sids(&pntace->sid, pownersid) == 0) ||
+                               (compare_sids(&pntace->sid, pgrpsid) == 0) ||
+                               (compare_sids(&pntace->sid, &sid_everyone) == 0) ||
+                               (compare_sids(&pntace->sid, &sid_authusers) == 0))) {
+                       goto next_ace;
+               }
+
+               /* update the pointer to the next ACE to populate*/
+               pnntace = (struct cifs_ace *) (nacl_base + nsize);
+
+               nsize += cifs_copy_ace(pnntace, pntace, NULL);
+               num_aces++;
+
+next_ace:
+               size += le16_to_cpu(pntace->size);
+       }
+
+       /* If inherited ACEs are not present, place the new ones at the tail */
+       if (!new_aces_set) {
+               populate_new_aces(nacl_base,
+                               pownersid, pgrpsid,
+                               pnmode, &num_aces, &nsize,
+                               mode_from_sid);
+
+               new_aces_set = true;
+       }
+
+finalize_dacl:
        pndacl->num_aces = cpu_to_le32(num_aces);
-       pndacl->size = cpu_to_le16(size + sizeof(struct cifs_acl));
+       pndacl->size = cpu_to_le16(nsize);
 
        return 0;
 }
 
-
 static int parse_sid(struct cifs_sid *psid, char *end_of_acl)
 {
        /* BB need to add parm so we can store the SID BB */
@@ -1094,7 +1259,7 @@ static int parse_sec_desc(struct cifs_sb_info *cifs_sb,
 
 /* Convert permission bits from mode to equivalent CIFS ACL */
 static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
-       __u32 secdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
+       __u32 secdesclen, __u32 *pnsecdesclen, __u64 *pnmode, kuid_t uid, kgid_t gid,
        bool mode_from_sid, bool id_from_sid, int *aclflag)
 {
        int rc = 0;
@@ -1102,39 +1267,59 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
        __u32 ndacloffset;
        __u32 sidsoffset;
        struct cifs_sid *owner_sid_ptr, *group_sid_ptr;
-       struct cifs_sid *nowner_sid_ptr, *ngroup_sid_ptr;
+       struct cifs_sid *nowner_sid_ptr = NULL, *ngroup_sid_ptr = NULL;
        struct cifs_acl *dacl_ptr = NULL;  /* no need for SACL ptr */
        struct cifs_acl *ndacl_ptr = NULL; /* no need for SACL ptr */
+       char *end_of_acl = ((char *)pntsd) + secdesclen;
+       u16 size = 0;
 
-       if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
-               owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
-                               le32_to_cpu(pntsd->osidoffset));
-               group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
-                               le32_to_cpu(pntsd->gsidoffset));
-               dacloffset = le32_to_cpu(pntsd->dacloffset);
+       dacloffset = le32_to_cpu(pntsd->dacloffset);
+       if (dacloffset) {
                dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+               if (end_of_acl < (char *)dacl_ptr + le16_to_cpu(dacl_ptr->size)) {
+                       cifs_dbg(VFS, "Server returned illegal ACL size\n");
+                       return -EINVAL;
+               }
+       }
+
+       owner_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+                       le32_to_cpu(pntsd->osidoffset));
+       group_sid_ptr = (struct cifs_sid *)((char *)pntsd +
+                       le32_to_cpu(pntsd->gsidoffset));
+
+       if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
                ndacloffset = sizeof(struct cifs_ntsd);
                ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
-               ndacl_ptr->revision = dacl_ptr->revision;
-               ndacl_ptr->size = 0;
-               ndacl_ptr->num_aces = 0;
+               ndacl_ptr->revision =
+                       dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
 
-               rc = set_chmod_dacl(ndacl_ptr, owner_sid_ptr, group_sid_ptr,
+               ndacl_ptr->size = cpu_to_le16(0);
+               ndacl_ptr->num_aces = cpu_to_le32(0);
+
+               rc = set_chmod_dacl(dacl_ptr, ndacl_ptr, owner_sid_ptr, group_sid_ptr,
                                    pnmode, mode_from_sid);
+
                sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
-               /* copy sec desc control portion & owner and group sids */
-               copy_sec_desc(pntsd, pnntsd, sidsoffset);
-               *aclflag = CIFS_ACL_DACL;
+               /* copy the non-dacl portion of secdesc */
+               *pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+                               NULL, NULL);
+
+               *aclflag |= CIFS_ACL_DACL;
        } else {
-               memcpy(pnntsd, pntsd, secdesclen);
+               ndacloffset = sizeof(struct cifs_ntsd);
+               ndacl_ptr = (struct cifs_acl *)((char *)pnntsd + ndacloffset);
+               ndacl_ptr->revision =
+                       dacloffset ? dacl_ptr->revision : cpu_to_le16(ACL_REVISION);
+               ndacl_ptr->num_aces = dacl_ptr->num_aces;
+
                if (uid_valid(uid)) { /* chown */
                        uid_t id;
-                       owner_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
-                                       le32_to_cpu(pnntsd->osidoffset));
                        nowner_sid_ptr = kmalloc(sizeof(struct cifs_sid),
                                                                GFP_KERNEL);
-                       if (!nowner_sid_ptr)
-                               return -ENOMEM;
+                       if (!nowner_sid_ptr) {
+                               rc = -ENOMEM;
+                               goto chown_chgrp_exit;
+                       }
                        id = from_kuid(&init_user_ns, uid);
                        if (id_from_sid) {
                                struct owner_sid *osid = (struct owner_sid *)nowner_sid_ptr;
@@ -1145,27 +1330,25 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                                osid->SubAuthorities[0] = cpu_to_le32(88);
                                osid->SubAuthorities[1] = cpu_to_le32(1);
                                osid->SubAuthorities[2] = cpu_to_le32(id);
+
                        } else { /* lookup sid with upcall */
                                rc = id_to_sid(id, SIDOWNER, nowner_sid_ptr);
                                if (rc) {
                                        cifs_dbg(FYI, "%s: Mapping error %d for owner id %d\n",
                                                 __func__, rc, id);
-                                       kfree(nowner_sid_ptr);
-                                       return rc;
+                                       goto chown_chgrp_exit;
                                }
                        }
-                       cifs_copy_sid(owner_sid_ptr, nowner_sid_ptr);
-                       kfree(nowner_sid_ptr);
-                       *aclflag = CIFS_ACL_OWNER;
+                       *aclflag |= CIFS_ACL_OWNER;
                }
                if (gid_valid(gid)) { /* chgrp */
                        gid_t id;
-                       group_sid_ptr = (struct cifs_sid *)((char *)pnntsd +
-                                       le32_to_cpu(pnntsd->gsidoffset));
                        ngroup_sid_ptr = kmalloc(sizeof(struct cifs_sid),
                                                                GFP_KERNEL);
-                       if (!ngroup_sid_ptr)
-                               return -ENOMEM;
+                       if (!ngroup_sid_ptr) {
+                               rc = -ENOMEM;
+                               goto chown_chgrp_exit;
+                       }
                        id = from_kgid(&init_user_ns, gid);
                        if (id_from_sid) {
                                struct owner_sid *gsid = (struct owner_sid *)ngroup_sid_ptr;
@@ -1176,19 +1359,35 @@ static int build_sec_desc(struct cifs_ntsd *pntsd, struct cifs_ntsd *pnntsd,
                                gsid->SubAuthorities[0] = cpu_to_le32(88);
                                gsid->SubAuthorities[1] = cpu_to_le32(2);
                                gsid->SubAuthorities[2] = cpu_to_le32(id);
+
                        } else { /* lookup sid with upcall */
                                rc = id_to_sid(id, SIDGROUP, ngroup_sid_ptr);
                                if (rc) {
                                        cifs_dbg(FYI, "%s: Mapping error %d for group id %d\n",
                                                 __func__, rc, id);
-                                       kfree(ngroup_sid_ptr);
-                                       return rc;
+                                       goto chown_chgrp_exit;
                                }
                        }
-                       cifs_copy_sid(group_sid_ptr, ngroup_sid_ptr);
-                       kfree(ngroup_sid_ptr);
-                       *aclflag = CIFS_ACL_GROUP;
+                       *aclflag |= CIFS_ACL_GROUP;
+               }
+
+               if (dacloffset) {
+                       /* Replace ACEs for old owner with new one */
+                       size = replace_sids_and_copy_aces(dacl_ptr, ndacl_ptr,
+                                       owner_sid_ptr, group_sid_ptr,
+                                       nowner_sid_ptr, ngroup_sid_ptr);
+                       ndacl_ptr->size = cpu_to_le16(size);
                }
+
+               sidsoffset = ndacloffset + le16_to_cpu(ndacl_ptr->size);
+               /* copy the non-dacl portion of secdesc */
+               *pnsecdesclen = copy_sec_desc(pntsd, pnntsd, sidsoffset,
+                               nowner_sid_ptr, ngroup_sid_ptr);
+
+chown_chgrp_exit:
+               /* errors could jump here. So make sure we return soon after this */
+               kfree(nowner_sid_ptr);
+               kfree(ngroup_sid_ptr);
        }
 
        return rc;
@@ -1384,6 +1583,9 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
        int rc = 0;
        int aclflag = CIFS_ACL_DACL; /* default flag to set */
        __u32 secdesclen = 0;
+       __u32 nsecdesclen = 0;
+       __u32 dacloffset = 0;
+       struct cifs_acl *dacl_ptr = NULL;
        struct cifs_ntsd *pntsd = NULL; /* acl obtained from server */
        struct cifs_ntsd *pnntsd = NULL; /* modified acl to be sent to server */
        struct cifs_sb_info *cifs_sb = CIFS_SB(inode->i_sb);
@@ -1414,31 +1616,52 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
                return rc;
        }
 
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
+               mode_from_sid = true;
+       else
+               mode_from_sid = false;
+
+       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
+               id_from_sid = true;
+       else
+               id_from_sid = false;
+
+       /* Potentially, five new ACEs can be added to the ACL for U,G,O mapping */
+       nsecdesclen = secdesclen;
+       if (pnmode && *pnmode != NO_CHANGE_64) { /* chmod */
+               if (mode_from_sid)
+                       nsecdesclen += sizeof(struct cifs_ace);
+               else /* cifsacl */
+                       nsecdesclen += 5 * sizeof(struct cifs_ace);
+       } else { /* chown */
+               /* When ownership changes, changes new owner sid length could be different */
+               nsecdesclen = sizeof(struct cifs_ntsd) + (sizeof(struct cifs_sid) * 2);
+               dacloffset = le32_to_cpu(pntsd->dacloffset);
+               if (dacloffset) {
+                       dacl_ptr = (struct cifs_acl *)((char *)pntsd + dacloffset);
+                       if (mode_from_sid)
+                               nsecdesclen +=
+                                       le32_to_cpu(dacl_ptr->num_aces) * sizeof(struct cifs_ace);
+                       else /* cifsacl */
+                               nsecdesclen += le16_to_cpu(dacl_ptr->size);
+               }
+       }
+
        /*
         * Add three ACEs for owner, group, everyone getting rid of other ACEs
         * as chmod disables ACEs and set the security descriptor. Allocate
         * memory for the smb header, set security descriptor request security
         * descriptor parameters, and secuirty descriptor itself
         */
-       secdesclen = max_t(u32, secdesclen, DEFAULT_SEC_DESC_LEN);
-       pnntsd = kmalloc(secdesclen, GFP_KERNEL);
+       nsecdesclen = max_t(u32, nsecdesclen, DEFAULT_SEC_DESC_LEN);
+       pnntsd = kmalloc(nsecdesclen, GFP_KERNEL);
        if (!pnntsd) {
                kfree(pntsd);
                cifs_put_tlink(tlink);
                return -ENOMEM;
        }
 
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_MODE_FROM_SID)
-               mode_from_sid = true;
-       else
-               mode_from_sid = false;
-
-       if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_UID_FROM_ACL)
-               id_from_sid = true;
-       else
-               id_from_sid = false;
-
-       rc = build_sec_desc(pntsd, pnntsd, secdesclen, pnmode, uid, gid,
+       rc = build_sec_desc(pntsd, pnntsd, secdesclen, &nsecdesclen, pnmode, uid, gid,
                            mode_from_sid, id_from_sid, &aclflag);
 
        cifs_dbg(NOISY, "build_sec_desc rc: %d\n", rc);
@@ -1448,7 +1671,7 @@ id_mode_to_cifs_acl(struct inode *inode, const char *path, __u64 *pnmode,
 
        if (!rc) {
                /* Set the security descriptor */
-               rc = ops->set_acl(pnntsd, secdesclen, inode, path, aclflag);
+               rc = ops->set_acl(pnntsd, nsecdesclen, inode, path, aclflag);
                cifs_dbg(NOISY, "set_cifs_acl rc: %d\n", rc);
        }
        cifs_put_tlink(tlink);
index ff7fd08..d9e7049 100644 (file)
@@ -31,8 +31,8 @@
 #define EXEC_BIT        0x1
 
 #define ACL_OWNER_MASK 0700
-#define ACL_GROUP_MASK 0770
-#define ACL_EVERYONE_MASK 0777
+#define ACL_GROUP_MASK 0070
+#define ACL_EVERYONE_MASK 0007
 
 #define UBITSHIFT      6
 #define GBITSHIFT      3
index 51d53e4..b8f1ff9 100644 (file)
@@ -568,15 +568,15 @@ static int calc_ntlmv2_hash(struct cifs_ses *ses, char *ntlmv2_hash,
                        return rc;
                }
        } else {
-               /* We use ses->serverName if no domain name available */
-               len = strlen(ses->serverName);
+               /* We use ses->ip_addr if no domain name available */
+               len = strlen(ses->ip_addr);
 
                server = kmalloc(2 + (len * 2), GFP_KERNEL);
                if (server == NULL) {
                        rc = -ENOMEM;
                        return rc;
                }
-               len = cifs_strtoUTF16((__le16 *)server, ses->serverName, len,
+               len = cifs_strtoUTF16((__le16 *)server, ses->ip_addr, len,
                                        nls_cp);
                rc =
                crypto_shash_update(&ses->server->secmech.sdeschmacmd5->shash,
index ab883e8..5ddd20b 100644 (file)
@@ -290,7 +290,7 @@ cifs_statfs(struct dentry *dentry, struct kstatfs *buf)
                rc = server->ops->queryfs(xid, tcon, cifs_sb, buf);
 
        free_xid(xid);
-       return 0;
+       return rc;
 }
 
 static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
@@ -305,7 +305,8 @@ static long cifs_fallocate(struct file *file, int mode, loff_t off, loff_t len)
        return -EOPNOTSUPP;
 }
 
-static int cifs_permission(struct inode *inode, int mask)
+static int cifs_permission(struct user_namespace *mnt_userns,
+                          struct inode *inode, int mask)
 {
        struct cifs_sb_info *cifs_sb;
 
@@ -320,7 +321,7 @@ static int cifs_permission(struct inode *inode, int mask)
                on the client (above and beyond ACL on servers) for
                servers which do not support setting and viewing mode bits,
                so allowing client to check permissions is useful */
-               return generic_permission(inode, mask);
+               return generic_permission(&init_user_ns, inode, mask);
 }
 
 static struct kmem_cache *cifs_inode_cachep;
@@ -475,7 +476,8 @@ static int cifs_show_devname(struct seq_file *m, struct dentry *root)
                seq_puts(m, "none");
        else {
                convert_delimiter(devname, '/');
-               seq_puts(m, devname);
+               /* escape all spaces in share names */
+               seq_escape(m, devname, " \t");
                kfree(devname);
        }
        return 0;
@@ -637,8 +639,18 @@ cifs_show_options(struct seq_file *s, struct dentry *root)
                seq_printf(s, ",snapshot=%llu", tcon->snapshot_time);
        if (tcon->handle_timeout)
                seq_printf(s, ",handletimeout=%u", tcon->handle_timeout);
-       /* convert actimeo and display it in seconds */
-       seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->actimeo / HZ);
+
+       /*
+        * Display file and directory attribute timeout in seconds.
+        * If file and directory attribute timeout the same then actimeo
+        * was likely specified on mount
+        */
+       if (cifs_sb->ctx->acdirmax == cifs_sb->ctx->acregmax)
+               seq_printf(s, ",actimeo=%lu", cifs_sb->ctx->acregmax / HZ);
+       else {
+               seq_printf(s, ",acdirmax=%lu", cifs_sb->ctx->acdirmax / HZ);
+               seq_printf(s, ",acregmax=%lu", cifs_sb->ctx->acregmax / HZ);
+       }
 
        if (tcon->ses->chan_max > 1)
                seq_printf(s, ",multichannel,max_channels=%zu",
@@ -1525,6 +1537,7 @@ init_cifs(void)
  */
        atomic_set(&sesInfoAllocCount, 0);
        atomic_set(&tconInfoAllocCount, 0);
+       atomic_set(&tcpSesNextId, 0);
        atomic_set(&tcpSesAllocCount, 0);
        atomic_set(&tcpSesReconnectCount, 0);
        atomic_set(&tconInfoReconnectCount, 0);
index 2307bb0..0d7ef15 100644 (file)
@@ -62,19 +62,22 @@ extern void cifs_sb_deactive(struct super_block *sb);
 /* Functions related to inodes */
 extern const struct inode_operations cifs_dir_inode_ops;
 extern struct inode *cifs_root_iget(struct super_block *);
-extern int cifs_create(struct inode *, struct dentry *, umode_t,
-                      bool excl);
+extern int cifs_create(struct user_namespace *, struct inode *,
+                      struct dentry *, umode_t, bool excl);
 extern int cifs_atomic_open(struct inode *, struct dentry *,
                            struct file *, unsigned, umode_t);
 extern struct dentry *cifs_lookup(struct inode *, struct dentry *,
                                  unsigned int);
 extern int cifs_unlink(struct inode *dir, struct dentry *dentry);
 extern int cifs_hardlink(struct dentry *, struct inode *, struct dentry *);
-extern int cifs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-extern int cifs_mkdir(struct inode *, struct dentry *, umode_t);
+extern int cifs_mknod(struct user_namespace *, struct inode *, struct dentry *,
+                     umode_t, dev_t);
+extern int cifs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+                     umode_t);
 extern int cifs_rmdir(struct inode *, struct dentry *);
-extern int cifs_rename2(struct inode *, struct dentry *, struct inode *,
-                       struct dentry *, unsigned int);
+extern int cifs_rename2(struct user_namespace *, struct inode *,
+                       struct dentry *, struct inode *, struct dentry *,
+                       unsigned int);
 extern int cifs_revalidate_file_attr(struct file *filp);
 extern int cifs_revalidate_dentry_attr(struct dentry *);
 extern int cifs_revalidate_file(struct file *filp);
@@ -82,8 +85,10 @@ extern int cifs_revalidate_dentry(struct dentry *);
 extern int cifs_invalidate_mapping(struct inode *inode);
 extern int cifs_revalidate_mapping(struct inode *inode);
 extern int cifs_zap_mapping(struct inode *inode);
-extern int cifs_getattr(const struct path *, struct kstat *, u32, unsigned int);
-extern int cifs_setattr(struct dentry *, struct iattr *);
+extern int cifs_getattr(struct user_namespace *, const struct path *,
+                       struct kstat *, u32, unsigned int);
+extern int cifs_setattr(struct user_namespace *, struct dentry *,
+                       struct iattr *);
 extern int cifs_fiemap(struct inode *, struct fiemap_extent_info *, u64 start,
                       u64 len);
 
@@ -132,8 +137,8 @@ extern struct vfsmount *cifs_dfs_d_automount(struct path *path);
 /* Functions related to symlinks */
 extern const char *cifs_get_link(struct dentry *, struct inode *,
                        struct delayed_call *);
-extern int cifs_symlink(struct inode *inode, struct dentry *direntry,
-                       const char *symname);
+extern int cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
+                       struct dentry *direntry, const char *symname);
 
 #ifdef CONFIG_CIFS_XATTR
 extern const struct xattr_handler *cifs_xattr_handlers[];
@@ -160,5 +165,5 @@ extern struct dentry *cifs_smb3_do_mount(struct file_system_type *fs_type,
 extern const struct export_operations cifs_export_ops;
 #endif /* CONFIG_CIFS_NFSD_EXPORT */
 
-#define CIFS_VERSION   "2.30"
+#define CIFS_VERSION   "2.31"
 #endif                         /* _CIFSFS_H */
index 50fcb65..ec824ab 100644 (file)
@@ -21,6 +21,7 @@
 
 #include <linux/in.h>
 #include <linux/in6.h>
+#include <linux/inet.h>
 #include <linux/slab.h>
 #include <linux/mempool.h>
 #include <linux/workqueue.h>
@@ -256,7 +257,7 @@ struct smb_version_operations {
        /* verify the message */
        int (*check_message)(char *, unsigned int, struct TCP_Server_Info *);
        bool (*is_oplock_break)(char *, struct TCP_Server_Info *);
-       int (*handle_cancelled_mid)(char *, struct TCP_Server_Info *);
+       int (*handle_cancelled_mid)(struct mid_q_entry *, struct TCP_Server_Info *);
        void (*downgrade_oplock)(struct TCP_Server_Info *server,
                                 struct cifsInodeInfo *cinode, __u32 oplock,
                                 unsigned int epoch, bool *purge_cache);
@@ -504,6 +505,8 @@ struct smb_version_operations {
        loff_t (*llseek)(struct file *, struct cifs_tcon *, loff_t, int);
        /* Check for STATUS_IO_TIMEOUT */
        bool (*is_status_io_timeout)(char *buf);
+       /* Check for STATUS_NETWORK_NAME_DELETED */
+       void (*is_network_name_deleted)(char *buf, struct TCP_Server_Info *srv);
 };
 
 struct smb_version_values {
@@ -577,6 +580,7 @@ inc_rfc1001_len(void *buf, int count)
 struct TCP_Server_Info {
        struct list_head tcp_ses_list;
        struct list_head smb_ses_list;
+       __u64 conn_id; /* connection identifier (useful for debugging) */
        int srv_count; /* reference counter */
        /* 15 character server name + 0x20 16th byte indicating type = srv */
        char server_RFC1001_name[RFC1001_NAME_LEN_WITH_NULL];
@@ -901,7 +905,7 @@ struct cifs_ses {
        kuid_t linux_uid;       /* overriding owner of files on the mount */
        kuid_t cred_uid;        /* owner of credentials */
        unsigned int capabilities;
-       char serverName[SERVER_NAME_LEN_WITH_NULL];
+       char ip_addr[INET6_ADDRSTRLEN + 1]; /* Max ipv6 (or v4) addr string len */
        char *user_name;        /* must not be null except during init of sess
                                   and after mount option parsing we fill it */
        char *domainName;
@@ -915,8 +919,8 @@ struct cifs_ses {
        bool binding:1; /* are we binding the session? */
        __u16 session_flags;
        __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE];
-       __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE];
-       __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE];
+       __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE];
+       __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE];
        __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
 
        __u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE];
@@ -1279,8 +1283,6 @@ struct cifs_aio_ctx {
        bool                    direct_io;
 };
 
-struct cifs_readdata;
-
 /* asynchronous read support */
 struct cifs_readdata {
        struct kref                     refcount;
@@ -1701,14 +1703,17 @@ static inline bool is_retryable_error(int error)
 #define   CIFS_NO_RSP_BUF   0x040    /* no response buffer required */
 
 /* Type of request operation */
-#define   CIFS_ECHO_OP      0x080    /* echo request */
-#define   CIFS_OBREAK_OP   0x0100    /* oplock break request */
-#define   CIFS_NEG_OP      0x0200    /* negotiate request */
-#define   CIFS_OP_MASK     0x0380    /* mask request type */
-
-#define   CIFS_HAS_CREDITS 0x0400    /* already has credits */
-#define   CIFS_TRANSFORM_REQ 0x0800    /* transform request before sending */
-#define   CIFS_NO_SRV_RSP    0x1000    /* there is no server response */
+#define   CIFS_ECHO_OP            0x080  /* echo request */
+#define   CIFS_OBREAK_OP          0x0100 /* oplock break request */
+#define   CIFS_NEG_OP             0x0200 /* negotiate request */
+#define   CIFS_CP_CREATE_CLOSE_OP 0x0400 /* compound create+close request */
+/* Lower bitmask values are reserved by others below. */
+#define   CIFS_SESS_OP            0x2000 /* session setup request */
+#define   CIFS_OP_MASK            0x2780 /* mask request type */
+
+#define   CIFS_HAS_CREDITS        0x0400 /* already has credits */
+#define   CIFS_TRANSFORM_REQ      0x0800 /* transform request before sending */
+#define   CIFS_NO_SRV_RSP         0x1000 /* there is no server response */
 
 /* Security Flags: indicate type of session setup needed */
 #define   CIFSSEC_MAY_SIGN     0x00001
@@ -1844,6 +1849,7 @@ GLOBAL_EXTERN spinlock_t GlobalMid_Lock;  /* protects above & list operations */
  */
 GLOBAL_EXTERN atomic_t sesInfoAllocCount;
 GLOBAL_EXTERN atomic_t tconInfoAllocCount;
+GLOBAL_EXTERN atomic_t tcpSesNextId;
 GLOBAL_EXTERN atomic_t tcpSesAllocCount;
 GLOBAL_EXTERN atomic_t tcpSesReconnectCount;
 GLOBAL_EXTERN atomic_t tconInfoReconnectCount;
index 64fe5a4..9adc74b 100644 (file)
  */
 #define SMB3_SIGN_KEY_SIZE (16)
 
+/*
+ * Size of the smb3 encryption/decryption keys
+ */
+#define SMB3_ENC_DEC_KEY_SIZE (32)
+
 #define CIFS_CLIENT_CHALLENGE_SIZE (8)
 #define CIFS_SERVER_CHALLENGE_SIZE (8)
 #define CIFS_HMAC_MD5_HASH_SIZE (16)
index 32f7a01..75ce6f7 100644 (file)
@@ -232,6 +232,8 @@ extern unsigned int setup_special_user_owner_ACE(struct cifs_ace *pace);
 extern void dequeue_mid(struct mid_q_entry *mid, bool malformed);
 extern int cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
                                 unsigned int to_read);
+extern ssize_t cifs_discard_from_socket(struct TCP_Server_Info *server,
+                                       size_t to_read);
 extern int cifs_read_page_from_socket(struct TCP_Server_Info *server,
                                        struct page *page,
                                        unsigned int page_offset,
index 0496934..c279527 100644 (file)
@@ -1451,9 +1451,9 @@ cifs_discard_remaining_data(struct TCP_Server_Info *server)
        while (remaining > 0) {
                int length;
 
-               length = cifs_read_from_socket(server, server->bigbuf,
-                               min_t(unsigned int, remaining,
-                                   CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
+               length = cifs_discard_from_socket(server,
+                               min_t(size_t, remaining,
+                                     CIFSMaxBufSize + MAX_HEADER_SIZE(server)));
                if (length < 0)
                        return length;
                server->total_read += length;
index 4bb9dec..24668eb 100644 (file)
@@ -87,7 +87,6 @@ static void cifs_prune_tlinks(struct work_struct *work);
  *
  * This should be called with server->srv_mutex held.
  */
-#ifdef CONFIG_CIFS_DFS_UPCALL
 static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
 {
        int rc;
@@ -124,6 +123,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
        return !rc ? -1 : 0;
 }
 
+#ifdef CONFIG_CIFS_DFS_UPCALL
 /* These functions must be called with server->srv_mutex held */
 static void reconn_set_next_dfs_target(struct TCP_Server_Info *server,
                                       struct cifs_sb_info *cifs_sb,
@@ -242,7 +242,7 @@ cifs_reconnect(struct TCP_Server_Info *server)
        server->max_read = 0;
 
        cifs_dbg(FYI, "Mark tcp session as need reconnect\n");
-       trace_smb3_reconnect(server->CurrentMid, server->hostname);
+       trace_smb3_reconnect(server->CurrentMid, server->conn_id, server->hostname);
 
        /* before reconnecting the tcp session, mark the smb session (uid)
                and the tid bad so they are not used until reconnected */
@@ -321,14 +321,29 @@ cifs_reconnect(struct TCP_Server_Info *server)
 #endif
 
 #ifdef CONFIG_CIFS_DFS_UPCALL
+               if (cifs_sb && cifs_sb->origin_fullpath)
                        /*
                         * Set up next DFS target server (if any) for reconnect. If DFS
                         * feature is disabled, then we will retry last server we
                         * connected to before.
                         */
                        reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it);
+               else {
+#endif
+                       /*
+                        * Resolve the hostname again to make sure that IP address is up-to-date.
+                        */
+                       rc = reconn_set_ipaddr_from_hostname(server);
+                       if (rc) {
+                               cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+                                               __func__, rc);
+                       }
+
+#ifdef CONFIG_CIFS_DFS_UPCALL
+               }
 #endif
 
+
 #ifdef CONFIG_CIFS_SWN_UPCALL
                }
 #endif
@@ -564,6 +579,23 @@ cifs_read_from_socket(struct TCP_Server_Info *server, char *buf,
        return cifs_readv_from_socket(server, &smb_msg);
 }
 
+ssize_t
+cifs_discard_from_socket(struct TCP_Server_Info *server, size_t to_read)
+{
+       struct msghdr smb_msg;
+
+       /*
+        *  iov_iter_discard already sets smb_msg.type and count and iov_offset
+        *  and cifs_readv_from_socket sets msg_control and msg_controllen
+        *  so little to initialize in struct msghdr
+        */
+       smb_msg.msg_name = NULL;
+       smb_msg.msg_namelen = 0;
+       iov_iter_discard(&smb_msg.msg_iter, READ, to_read);
+
+       return cifs_readv_from_socket(server, &smb_msg);
+}
+
 int
 cifs_read_page_from_socket(struct TCP_Server_Info *server, struct page *page,
        unsigned int page_offset, unsigned int to_read)
@@ -724,7 +756,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
                spin_lock(&GlobalMid_Lock);
                list_for_each_safe(tmp, tmp2, &server->pending_mid_q) {
                        mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-                       cifs_dbg(FYI, "Clearing mid 0x%llx\n", mid_entry->mid);
+                       cifs_dbg(FYI, "Clearing mid %llu\n", mid_entry->mid);
                        kref_get(&mid_entry->refcount);
                        mid_entry->mid_state = MID_SHUTDOWN;
                        list_move(&mid_entry->qhead, &dispose_list);
@@ -735,7 +767,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
                /* now walk dispose list and issue callbacks */
                list_for_each_safe(tmp, tmp2, &dispose_list) {
                        mid_entry = list_entry(tmp, struct mid_q_entry, qhead);
-                       cifs_dbg(FYI, "Callback mid 0x%llx\n", mid_entry->mid);
+                       cifs_dbg(FYI, "Callback mid %llu\n", mid_entry->mid);
                        list_del_init(&mid_entry->qhead);
                        mid_entry->callback(mid_entry);
                        cifs_mid_q_entry_release(mid_entry);
@@ -846,7 +878,7 @@ static void
 smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
 {
        struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buffer;
-       int scredits = server->credits;
+       int scredits, in_flight;
 
        /*
         * SMB1 does not use credits.
@@ -857,12 +889,14 @@ smb2_add_credits_from_hdr(char *buffer, struct TCP_Server_Info *server)
        if (shdr->CreditRequest) {
                spin_lock(&server->req_lock);
                server->credits += le16_to_cpu(shdr->CreditRequest);
+               scredits = server->credits;
+               in_flight = server->in_flight;
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
 
                trace_smb3_add_credits(server->CurrentMid,
-                               server->hostname, scredits,
-                               le16_to_cpu(shdr->CreditRequest));
+                               server->conn_id, server->hostname, scredits,
+                               le16_to_cpu(shdr->CreditRequest), in_flight);
                cifs_server_dbg(FYI, "%s: added %u credits total=%d\n",
                                __func__, le16_to_cpu(shdr->CreditRequest),
                                scredits);
@@ -993,6 +1027,10 @@ next_pdu:
                        if (mids[i] != NULL) {
                                mids[i]->resp_buf_size = server->pdu_size;
 
+                               if (bufs[i] && server->ops->is_network_name_deleted)
+                                       server->ops->is_network_name_deleted(bufs[i],
+                                                                       server);
+
                                if (!mids[i]->multiRsp || mids[i]->multiEnd)
                                        mids[i]->callback(mids[i]);
 
@@ -1317,6 +1355,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
                goto out_err_crypto_release;
        }
 
+       tcp_ses->conn_id = atomic_inc_return(&tcpSesNextId);
        tcp_ses->noblockcnt = ctx->rootfs;
        tcp_ses->noblocksnd = ctx->noblocksnd || ctx->rootfs;
        tcp_ses->noautotune = ctx->noautotune;
@@ -1405,6 +1444,11 @@ smbd_connected:
        tcp_ses->min_offload = ctx->min_offload;
        tcp_ses->tcpStatus = CifsNeedNegotiate;
 
+       if ((ctx->max_credits < 20) || (ctx->max_credits > 60000))
+               tcp_ses->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
+       else
+               tcp_ses->max_credits = ctx->max_credits;
+
        tcp_ses->nr_targets = 1;
        tcp_ses->ignore_signature = ctx->ignore_signature;
        /* thread spawned, put it on the list */
@@ -1838,9 +1882,9 @@ cifs_get_smb_ses(struct TCP_Server_Info *server, struct smb3_fs_context *ctx)
        /* new SMB session uses our server ref */
        ses->server = server;
        if (server->dstaddr.ss_family == AF_INET6)
-               sprintf(ses->serverName, "%pI6", &addr6->sin6_addr);
+               sprintf(ses->ip_addr, "%pI6", &addr6->sin6_addr);
        else
-               sprintf(ses->serverName, "%pI4", &addr->sin_addr);
+               sprintf(ses->ip_addr, "%pI4", &addr->sin_addr);
 
        if (ctx->username) {
                ses->user_name = kstrdup(ctx->username, GFP_KERNEL);
@@ -2269,7 +2313,9 @@ compare_mount_options(struct super_block *sb, struct cifs_mnt_data *mnt_data)
        if (strcmp(old->local_nls->charset, new->local_nls->charset))
                return 0;
 
-       if (old->ctx->actimeo != new->ctx->actimeo)
+       if (old->ctx->acregmax != new->ctx->acregmax)
+               return 0;
+       if (old->ctx->acdirmax != new->ctx->acdirmax)
                return 0;
 
        return 1;
@@ -2806,11 +2852,6 @@ static int mount_get_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cif
 
        *nserver = server;
 
-       if ((ctx->max_credits < 20) || (ctx->max_credits > 60000))
-               server->max_credits = SMB2_MAX_CREDITS_AVAILABLE;
-       else
-               server->max_credits = ctx->max_credits;
-
        /* get a reference to a SMB session */
        ses = cifs_get_smb_ses(server, ctx);
        if (IS_ERR(ses)) {
@@ -2911,7 +2952,7 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 #ifdef CONFIG_CIFS_DFS_UPCALL
 /*
  * cifs_build_path_to_root returns full path to root when we do not have an
- * exiting connection (tcon)
+ * existing connection (tcon)
  */
 static char *
 build_unc_path_to_root(const struct smb3_fs_context *ctx,
@@ -3038,96 +3079,91 @@ static int update_vol_info(const struct dfs_cache_tgt_iterator *tgt_it,
        return 0;
 }
 
-static int setup_dfs_tgt_conn(const char *path, const char *full_path,
-                             const struct dfs_cache_tgt_iterator *tgt_it,
-                             struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-                             unsigned int *xid, struct TCP_Server_Info **server,
-                             struct cifs_ses **ses, struct cifs_tcon **tcon)
-{
-       int rc;
-       struct dfs_info3_param ref = {0};
-       char *mdata = NULL;
-       struct smb3_fs_context fake_ctx = {NULL};
-       char *fake_devname = NULL;
-
-       cifs_dbg(FYI, "%s: dfs path: %s\n", __func__, path);
-
-       rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
-       if (rc)
-               return rc;
-
-       mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options,
-                                          full_path + 1, &ref,
-                                          &fake_devname);
-       free_dfs_info_param(&ref);
-
-       if (IS_ERR(mdata)) {
-               rc = PTR_ERR(mdata);
-               mdata = NULL;
-       } else
-               rc = cifs_setup_volume_info(&fake_ctx, mdata, fake_devname);
-
-       kfree(mdata);
-       kfree(fake_devname);
-
-       if (!rc) {
-               /*
-                * We use a 'fake_ctx' here because we need pass it down to the
-                * mount_{get,put} functions to test connection against new DFS
-                * targets.
-                */
-               mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
-               rc = mount_get_conns(&fake_ctx, cifs_sb, xid, server, ses,
-                                    tcon);
-               if (!rc || (*server && *ses)) {
-                       /*
-                        * We were able to connect to new target server.
-                        * Update current context with new target server.
-                        */
-                       rc = update_vol_info(tgt_it, &fake_ctx, ctx);
-               }
-       }
-       smb3_cleanup_fs_context_contents(&fake_ctx);
-       return rc;
-}
-
 static int do_dfs_failover(const char *path, const char *full_path, struct cifs_sb_info *cifs_sb,
                           struct smb3_fs_context *ctx, struct cifs_ses *root_ses,
                           unsigned int *xid, struct TCP_Server_Info **server,
                           struct cifs_ses **ses, struct cifs_tcon **tcon)
 {
        int rc;
-       struct dfs_cache_tgt_list tgt_list;
+       struct dfs_cache_tgt_list tgt_list = {0};
        struct dfs_cache_tgt_iterator *tgt_it = NULL;
+       struct smb3_fs_context tmp_ctx = {NULL};
 
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS)
                return -EOPNOTSUPP;
 
+       cifs_dbg(FYI, "%s: path=%s full_path=%s\n", __func__, path, full_path);
+
        rc = dfs_cache_noreq_find(path, NULL, &tgt_list);
        if (rc)
                return rc;
+       /*
+        * We use a 'tmp_ctx' here because we need pass it down to the mount_{get,put} functions to
+        * test connection against new DFS targets.
+        */
+       rc = smb3_fs_context_dup(&tmp_ctx, ctx);
+       if (rc)
+               goto out;
 
        for (;;) {
+               struct dfs_info3_param ref = {0};
+               char *fake_devname = NULL, *mdata = NULL;
+
                /* Get next DFS target server - if any */
                rc = get_next_dfs_tgt(path, &tgt_list, &tgt_it);
                if (rc)
                        break;
-               /* Connect to next DFS target */
-               rc = setup_dfs_tgt_conn(path, full_path, tgt_it, cifs_sb, ctx, xid, server, ses,
-                                       tcon);
-               if (!rc || (*server && *ses))
+
+               rc = dfs_cache_get_tgt_referral(path, tgt_it, &ref);
+               if (rc)
                        break;
+
+               cifs_dbg(FYI, "%s: old ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+                        tmp_ctx.prepath);
+
+               mdata = cifs_compose_mount_options(cifs_sb->ctx->mount_options, full_path + 1, &ref,
+                                                  &fake_devname);
+               free_dfs_info_param(&ref);
+
+               if (IS_ERR(mdata)) {
+                       rc = PTR_ERR(mdata);
+                       mdata = NULL;
+               } else
+                       rc = cifs_setup_volume_info(&tmp_ctx, mdata, fake_devname);
+
+               kfree(mdata);
+               kfree(fake_devname);
+
+               if (rc)
+                       break;
+
+               cifs_dbg(FYI, "%s: new ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+                        tmp_ctx.prepath);
+
+               mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
+               rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
+               if (!rc || (*server && *ses)) {
+                       /*
+                        * We were able to connect to new target server. Update current context with
+                        * new target server.
+                        */
+                       rc = update_vol_info(tgt_it, &tmp_ctx, ctx);
+                       break;
+               }
        }
        if (!rc) {
+               cifs_dbg(FYI, "%s: final ctx: UNC=%s prepath=%s\n", __func__, tmp_ctx.UNC,
+                        tmp_ctx.prepath);
                /*
-                * Update DFS target hint in DFS referral cache with the target
-                * server we successfully reconnected to.
+                * Update DFS target hint in DFS referral cache with the target server we
+                * successfully reconnected to.
                 */
-               rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses,
-                                             cifs_sb->local_nls,
-                                             cifs_remap(cifs_sb), path,
-                                             tgt_it);
+               rc = dfs_cache_update_tgthint(*xid, root_ses ? root_ses : *ses, cifs_sb->local_nls,
+                                             cifs_remap(cifs_sb), path, tgt_it);
        }
+
+out:
+       smb3_cleanup_fs_context_contents(&tmp_ctx);
        dfs_cache_free_tgts(&tgt_list);
        return rc;
 }
@@ -3285,77 +3321,77 @@ static void put_root_ses(struct cifs_ses *ses)
                cifs_put_smb_ses(ses);
 }
 
-/* Check if a path component is remote and then update @dfs_path accordingly */
-static int check_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
-                            const unsigned int xid, struct TCP_Server_Info *server,
-                            struct cifs_tcon *tcon, char **dfs_path)
+/* Set up next dfs prefix path in @dfs_path */
+static int next_dfs_prepath(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx,
+                           const unsigned int xid, struct TCP_Server_Info *server,
+                           struct cifs_tcon *tcon, char **dfs_path)
 {
-       char *path, *s;
-       char sep = CIFS_DIR_SEP(cifs_sb), tmp;
-       char *npath;
-       int rc = 0;
-       int added_treename = tcon->Flags & SMB_SHARE_IS_IN_DFS;
-       int skip = added_treename;
+       char *path, *npath;
+       int added_treename = is_tcon_dfs(tcon);
+       int rc;
 
        path = cifs_build_path_to_root(ctx, cifs_sb, tcon, added_treename);
        if (!path)
                return -ENOMEM;
 
-       /*
-        * Walk through the path components in @path and check if they're accessible. In case any of
-        * the components is -EREMOTE, then update @dfs_path with the next DFS referral request path
-        * (NOT including the remaining components).
-        */
-       s = path;
-       do {
-               /* skip separators */
-               while (*s && *s == sep)
-                       s++;
-               if (!*s)
-                       break;
-               /* next separator */
-               while (*s && *s != sep)
-                       s++;
-               /*
-                * if the treename is added, we then have to skip the first
-                * part within the separators
-                */
-               if (skip) {
-                       skip = 0;
-                       continue;
+       rc = is_path_remote(cifs_sb, ctx, xid, server, tcon);
+       if (rc == -EREMOTE) {
+               struct smb3_fs_context v = {NULL};
+               /* if @path contains a tree name, skip it in the prefix path */
+               if (added_treename) {
+                       rc = smb3_parse_devname(path, &v);
+                       if (rc)
+                               goto out;
+                       npath = build_unc_path_to_root(&v, cifs_sb, true);
+                       smb3_cleanup_fs_context_contents(&v);
+               } else {
+                       v.UNC = ctx->UNC;
+                       v.prepath = path + 1;
+                       npath = build_unc_path_to_root(&v, cifs_sb, true);
                }
-               tmp = *s;
-               *s = 0;
-               rc = server->ops->is_path_accessible(xid, tcon, cifs_sb, path);
-               if (rc && rc == -EREMOTE) {
-                       struct smb3_fs_context v = {NULL};
-                       /* if @path contains a tree name, skip it in the prefix path */
-                       if (added_treename) {
-                               rc = smb3_parse_devname(path, &v);
-                               if (rc)
-                                       break;
-                               rc = -EREMOTE;
-                               npath = build_unc_path_to_root(&v, cifs_sb, true);
-                               smb3_cleanup_fs_context_contents(&v);
-                       } else {
-                               v.UNC = ctx->UNC;
-                               v.prepath = path + 1;
-                               npath = build_unc_path_to_root(&v, cifs_sb, true);
-                       }
-                       if (IS_ERR(npath)) {
-                               rc = PTR_ERR(npath);
-                               break;
-                       }
-                       kfree(*dfs_path);
-                       *dfs_path = npath;
+
+               if (IS_ERR(npath)) {
+                       rc = PTR_ERR(npath);
+                       goto out;
                }
-               *s = tmp;
-       } while (rc == 0);
 
+               kfree(*dfs_path);
+               *dfs_path = npath;
+               rc = -EREMOTE;
+       }
+
+out:
        kfree(path);
        return rc;
 }
 
+/* Check if resolved targets can handle any DFS referrals */
+static int is_referral_server(const char *ref_path, struct cifs_tcon *tcon, bool *ref_server)
+{
+       int rc;
+       struct dfs_info3_param ref = {0};
+
+       if (is_tcon_dfs(tcon)) {
+               *ref_server = true;
+       } else {
+               cifs_dbg(FYI, "%s: ref_path=%s\n", __func__, ref_path);
+
+               rc = dfs_cache_noreq_find(ref_path, &ref, NULL);
+               if (rc) {
+                       cifs_dbg(VFS, "%s: dfs_cache_noreq_find: failed (rc=%d)\n", __func__, rc);
+                       return rc;
+               }
+               cifs_dbg(FYI, "%s: ref.flags=0x%x\n", __func__, ref.flags);
+               /*
+                * Check if all targets are capable of handling DFS referrals as per
+                * MS-DFSC 2.2.4 RESP_GET_DFS_REFERRAL.
+                */
+               *ref_server = !!(ref.flags & DFSREF_REFERRAL_SERVER);
+               free_dfs_info_param(&ref);
+       }
+       return 0;
+}
+
 int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
 {
        int rc = 0;
@@ -3367,18 +3403,19 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
        char *ref_path = NULL, *full_path = NULL;
        char *oldmnt = NULL;
        char *mntdata = NULL;
+       bool ref_server = false;
 
        rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
        /*
-        * Unconditionally try to get an DFS referral (even cached) to determine whether it is an
-        * DFS mount.
+        * If called with 'nodfs' mount option, then skip DFS resolving.  Otherwise unconditionally
+        * try to get an DFS referral (even cached) to determine whether it is an DFS mount.
         *
         * Skip prefix path to provide support for DFS referrals from w2k8 servers which don't seem
         * to respond with PATH_NOT_COVERED to requests that include the prefix.
         */
-       if (dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
+       if ((cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_DFS) ||
+           dfs_cache_find(xid, ses, cifs_sb->local_nls, cifs_remap(cifs_sb), ctx->UNC + 1, NULL,
                           NULL)) {
-               /* No DFS referral was returned.  Looks like a regular share. */
                if (rc)
                        goto error;
                /* Check if it is fully accessible and then mount it */
@@ -3432,13 +3469,18 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
                        break;
                if (!tcon)
                        continue;
+
                /* Make sure that requests go through new root servers */
-               if (is_tcon_dfs(tcon)) {
+               rc = is_referral_server(ref_path + 1, tcon, &ref_server);
+               if (rc)
+                       break;
+               if (ref_server) {
                        put_root_ses(root_ses);
                        set_root_ses(cifs_sb, ses, &root_ses);
                }
-               /* Check for remaining path components and then continue chasing them (-EREMOTE) */
-               rc = check_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
+
+               /* Get next dfs path and then continue chasing them if -EREMOTE */
+               rc = next_dfs_prepath(cifs_sb, ctx, xid, server, tcon, &ref_path);
                /* Prevent recursion on broken link referrals */
                if (rc == -EREMOTE && ++count > MAX_NESTED_LINKS)
                        rc = -ELOOP;
index 4950ab0..098b4bc 100644 (file)
@@ -37,11 +37,12 @@ struct cache_dfs_tgt {
 struct cache_entry {
        struct hlist_node hlist;
        const char *path;
-       int ttl;
-       int srvtype;
-       int flags;
+       int hdr_flags; /* RESP_GET_DFS_REFERRAL.ReferralHeaderFlags */
+       int ttl; /* DFS_REREFERRAL_V3.TimeToLive */
+       int srvtype; /* DFS_REREFERRAL_V3.ServerType */
+       int ref_flags; /* DFS_REREFERRAL_V3.ReferralEntryFlags */
        struct timespec64 etime;
-       int path_consumed;
+       int path_consumed; /* RESP_GET_DFS_REFERRAL.PathConsumed */
        int numtgts;
        struct list_head tlist;
        struct cache_dfs_tgt *tgthint;
@@ -166,14 +167,11 @@ static int dfscache_proc_show(struct seq_file *m, void *v)
                                continue;
 
                        seq_printf(m,
-                                  "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,"
-                                  "interlink=%s,path_consumed=%d,expired=%s\n",
-                                  ce->path,
-                                  ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
-                                  ce->ttl, ce->etime.tv_nsec,
-                                  IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
-                                  ce->path_consumed,
-                                  cache_entry_expired(ce) ? "yes" : "no");
+                                  "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
+                                  ce->path, ce->srvtype == DFS_TYPE_ROOT ? "root" : "link",
+                                  ce->ttl, ce->etime.tv_nsec, ce->ref_flags, ce->hdr_flags,
+                                  IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
+                                  ce->path_consumed, cache_entry_expired(ce) ? "yes" : "no");
 
                        list_for_each_entry(t, &ce->tlist, list) {
                                seq_printf(m, "  %s%s\n",
@@ -236,11 +234,12 @@ static inline void dump_tgts(const struct cache_entry *ce)
 
 static inline void dump_ce(const struct cache_entry *ce)
 {
-       cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,interlink=%s,path_consumed=%d,expired=%s\n",
+       cifs_dbg(FYI, "cache entry: path=%s,type=%s,ttl=%d,etime=%ld,hdr_flags=0x%x,ref_flags=0x%x,interlink=%s,path_consumed=%d,expired=%s\n",
                 ce->path,
                 ce->srvtype == DFS_TYPE_ROOT ? "root" : "link", ce->ttl,
                 ce->etime.tv_nsec,
-                IS_INTERLINK_SET(ce->flags) ? "yes" : "no",
+                ce->hdr_flags, ce->ref_flags,
+                IS_INTERLINK_SET(ce->hdr_flags) ? "yes" : "no",
                 ce->path_consumed,
                 cache_entry_expired(ce) ? "yes" : "no");
        dump_tgts(ce);
@@ -381,7 +380,8 @@ static int copy_ref_data(const struct dfs_info3_param *refs, int numrefs,
        ce->ttl = refs[0].ttl;
        ce->etime = get_expire_time(ce->ttl);
        ce->srvtype = refs[0].server_type;
-       ce->flags = refs[0].ref_flag;
+       ce->hdr_flags = refs[0].flags;
+       ce->ref_flags = refs[0].ref_flag;
        ce->path_consumed = refs[0].path_consumed;
 
        for (i = 0; i < numrefs; i++) {
@@ -799,7 +799,8 @@ static int setup_referral(const char *path, struct cache_entry *ce,
        ref->path_consumed = ce->path_consumed;
        ref->ttl = ce->ttl;
        ref->server_type = ce->srvtype;
-       ref->ref_flag = ce->flags;
+       ref->ref_flag = ce->ref_flags;
+       ref->flags = ce->hdr_flags;
 
        return 0;
 
index 97ac363..a3fb81e 100644 (file)
@@ -567,8 +567,8 @@ out_free_xid:
        return rc;
 }
 
-int cifs_create(struct inode *inode, struct dentry *direntry, umode_t mode,
-               bool excl)
+int cifs_create(struct user_namespace *mnt_userns, struct inode *inode,
+               struct dentry *direntry, umode_t mode, bool excl)
 {
        int rc;
        unsigned int xid = get_xid();
@@ -611,8 +611,8 @@ out_free_xid:
        return rc;
 }
 
-int cifs_mknod(struct inode *inode, struct dentry *direntry, umode_t mode,
-               dev_t device_number)
+int cifs_mknod(struct user_namespace *mnt_userns, struct inode *inode,
+              struct dentry *direntry, umode_t mode, dev_t device_number)
 {
        int rc = -EPERM;
        unsigned int xid;
index 6d00190..042e24a 100644 (file)
@@ -165,6 +165,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode,
                        goto posix_open_ret;
                }
        } else {
+               cifs_revalidate_mapping(*pinode);
                cifs_fattr_to_inode(*pinode, &fattr);
        }
 
@@ -580,7 +581,7 @@ int cifs_open(struct inode *inode, struct file *file)
                } else if ((rc == -EINVAL) || (rc == -EOPNOTSUPP)) {
                        if (tcon->ses->serverNOS)
                                cifs_dbg(VFS, "server %s of type %s returned unexpected error on SMB posix open, disabling posix open support. Check if server update available.\n",
-                                        tcon->ses->serverName,
+                                        tcon->ses->ip_addr,
                                         tcon->ses->serverNOS);
                        tcon->broken_posix_open = true;
                } else if ((rc != -EIO) && (rc != -EREMOTE) &&
index 12a5da0..7888902 100644 (file)
@@ -140,6 +140,8 @@ const struct fs_parameter_spec smb3_fs_parameters[] = {
        fsparam_u32("rsize", Opt_rsize),
        fsparam_u32("wsize", Opt_wsize),
        fsparam_u32("actimeo", Opt_actimeo),
+       fsparam_u32("acdirmax", Opt_acdirmax),
+       fsparam_u32("acregmax", Opt_acregmax),
        fsparam_u32("echo_interval", Opt_echo_interval),
        fsparam_u32("max_credits", Opt_max_credits),
        fsparam_u32("handletimeout", Opt_handletimeout),
@@ -397,7 +399,7 @@ cifs_parse_smb_version(char *value, struct smb3_fs_context *ctx, bool is_smb3)
                ctx->vals = &smb3any_values;
                break;
        case Smb_default:
-               ctx->ops = &smb30_operations; /* currently identical with 3.0 */
+               ctx->ops = &smb30_operations;
                ctx->vals = &smbdefault_values;
                break;
        default:
@@ -542,20 +544,37 @@ static int smb3_fs_context_parse_monolithic(struct fs_context *fc,
 
        /* BB Need to add support for sep= here TBD */
        while ((key = strsep(&options, ",")) != NULL) {
-               if (*key) {
-                       size_t v_len = 0;
-                       char *value = strchr(key, '=');
-
-                       if (value) {
-                               if (value == key)
-                                       continue;
-                               *value++ = 0;
-                               v_len = strlen(value);
-                       }
-                       ret = vfs_parse_fs_string(fc, key, value, v_len);
-                       if (ret < 0)
-                               break;
+               size_t len;
+               char *value;
+
+               if (*key == 0)
+                       break;
+
+               /* Check if following character is the deliminator If yes,
+                * we have encountered a double deliminator reset the NULL
+                * character to the deliminator
+                */
+               while (options && options[0] == ',') {
+                       len = strlen(key);
+                       strcpy(key + len, options);
+                       options = strchr(options, ',');
+                       if (options)
+                               *options++ = 0;
+               }
+
+
+               len = 0;
+               value = strchr(key, '=');
+               if (value) {
+                       if (value == key)
+                               continue;
+                       *value++ = 0;
+                       len = strlen(value);
                }
+
+               ret = vfs_parse_fs_string(fc, key, value, len);
+               if (ret < 0)
+                       break;
        }
 
        return ret;
@@ -929,12 +948,31 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                ctx->wsize = result.uint_32;
                ctx->got_wsize = true;
                break;
+       case Opt_acregmax:
+               ctx->acregmax = HZ * result.uint_32;
+               if (ctx->acregmax > CIFS_MAX_ACTIMEO) {
+                       cifs_dbg(VFS, "acregmax too large\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
+       case Opt_acdirmax:
+               ctx->acdirmax = HZ * result.uint_32;
+               if (ctx->acdirmax > CIFS_MAX_ACTIMEO) {
+                       cifs_dbg(VFS, "acdirmax too large\n");
+                       goto cifs_parse_mount_err;
+               }
+               break;
        case Opt_actimeo:
-               ctx->actimeo = HZ * result.uint_32;
-               if (ctx->actimeo > CIFS_MAX_ACTIMEO) {
-                       cifs_dbg(VFS, "attribute cache timeout too large\n");
+               if (HZ * result.uint_32 > CIFS_MAX_ACTIMEO) {
+                       cifs_dbg(VFS, "timeout too large\n");
                        goto cifs_parse_mount_err;
                }
+               if ((ctx->acdirmax != CIFS_DEF_ACTIMEO) ||
+                   (ctx->acregmax != CIFS_DEF_ACTIMEO)) {
+                       cifs_dbg(VFS, "actimeo ignored since acregmax or acdirmax specified\n");
+                       break;
+               }
+               ctx->acdirmax = ctx->acregmax = HZ * result.uint_32;
                break;
        case Opt_echo_interval:
                ctx->echo_interval = result.uint_32;
@@ -1158,9 +1196,11 @@ static int smb3_fs_context_parse_param(struct fs_context *fc,
                pr_warn_once("Witness protocol support is experimental\n");
                break;
        case Opt_rootfs:
-#ifdef CONFIG_CIFS_ROOT
-               ctx->rootfs = true;
+#ifndef CONFIG_CIFS_ROOT
+               cifs_dbg(VFS, "rootfs support requires CONFIG_CIFS_ROOT config option\n");
+               goto cifs_parse_mount_err;
 #endif
+               ctx->rootfs = true;
                break;
        case Opt_posixpaths:
                if (result.negated)
@@ -1361,7 +1401,8 @@ int smb3_init_fs_context(struct fs_context *fc)
        /* default is to use strict cifs caching semantics */
        ctx->strict_io = true;
 
-       ctx->actimeo = CIFS_DEF_ACTIMEO;
+       ctx->acregmax = CIFS_DEF_ACTIMEO;
+       ctx->acdirmax = CIFS_DEF_ACTIMEO;
 
        /* Most clients set timeout to 0, allows server to use its default */
        ctx->handle_timeout = 0; /* See MS-SMB2 spec section 2.2.14.2.12 */
index 1c44a46..87dd1f7 100644 (file)
@@ -118,6 +118,8 @@ enum cifs_param {
        Opt_rsize,
        Opt_wsize,
        Opt_actimeo,
+       Opt_acdirmax,
+       Opt_acregmax,
        Opt_echo_interval,
        Opt_max_credits,
        Opt_snapshot,
@@ -232,7 +234,9 @@ struct smb3_fs_context {
        unsigned int wsize;
        unsigned int min_offload;
        bool sockopt_tcp_nodelay:1;
-       unsigned long actimeo; /* attribute cache timeout (jiffies) */
+       /* attribute cache timemout for files and directories in jiffies */
+       unsigned long acregmax;
+       unsigned long acdirmax;
        struct smb_version_operations *ops;
        struct smb_version_values *vals;
        char *prepath;
index a83b3a8..f2df442 100644 (file)
@@ -1857,7 +1857,8 @@ posix_mkdir_get_info:
        goto posix_mkdir_out;
 }
 
-int cifs_mkdir(struct inode *inode, struct dentry *direntry, umode_t mode)
+int cifs_mkdir(struct user_namespace *mnt_userns, struct inode *inode,
+              struct dentry *direntry, umode_t mode)
 {
        int rc = 0;
        unsigned int xid;
@@ -2067,9 +2068,9 @@ do_rename_exit:
 }
 
 int
-cifs_rename2(struct inode *source_dir, struct dentry *source_dentry,
-            struct inode *target_dir, struct dentry *target_dentry,
-            unsigned int flags)
+cifs_rename2(struct user_namespace *mnt_userns, struct inode *source_dir,
+            struct dentry *source_dentry, struct inode *target_dir,
+            struct dentry *target_dentry, unsigned int flags)
 {
        char *from_name = NULL;
        char *to_name = NULL;
@@ -2198,12 +2199,23 @@ cifs_inode_needs_reval(struct inode *inode)
        if (!lookupCacheEnabled)
                return true;
 
-       if (!cifs_sb->ctx->actimeo)
-               return true;
-
-       if (!time_in_range(jiffies, cifs_i->time,
-                               cifs_i->time + cifs_sb->ctx->actimeo))
-               return true;
+       /*
+        * depending on inode type, check if attribute caching disabled for
+        * files or directories
+        */
+       if (S_ISDIR(inode->i_mode)) {
+               if (!cifs_sb->ctx->acdirmax)
+                       return true;
+               if (!time_in_range(jiffies, cifs_i->time,
+                                  cifs_i->time + cifs_sb->ctx->acdirmax))
+                       return true;
+       } else { /* file */
+               if (!cifs_sb->ctx->acregmax)
+                       return true;
+               if (!time_in_range(jiffies, cifs_i->time,
+                                  cifs_i->time + cifs_sb->ctx->acregmax))
+                       return true;
+       }
 
        /* hardlinked files w/ noserverino get "special" treatment */
        if (!(cifs_sb->mnt_cifs_flags & CIFS_MOUNT_SERVER_INUM) &&
@@ -2370,8 +2382,8 @@ int cifs_revalidate_dentry(struct dentry *dentry)
        return cifs_revalidate_mapping(inode);
 }
 
-int cifs_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int flags)
+int cifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
        struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb);
@@ -2383,7 +2395,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
         * We need to be sure that all dirty pages are written and the server
         * has actual ctime, mtime and file length.
         */
-       if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE)) &&
+       if ((request_mask & (STATX_CTIME | STATX_MTIME | STATX_SIZE | STATX_BLOCKS)) &&
            !CIFS_CACHE_READ(CIFS_I(inode)) &&
            inode->i_mapping && inode->i_mapping->nrpages != 0) {
                rc = filemap_fdatawait(inode->i_mapping);
@@ -2408,7 +2420,7 @@ int cifs_getattr(const struct path *path, struct kstat *stat,
                        return rc;
        }
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->blksize = cifs_sb->ctx->bsize;
        stat->ino = CIFS_I(inode)->uniqueid;
 
@@ -2573,6 +2585,14 @@ set_size_out:
        if (rc == 0) {
                cifsInode->server_eof = attrs->ia_size;
                cifs_setsize(inode, attrs->ia_size);
+               /*
+                * i_blocks is not related to (i_size / i_blksize), but instead
+                * 512 byte (2**9) size is required for calculating num blocks.
+                * Until we can query the server for actual allocation size,
+                * this is best estimate we have for blocks allocated for a file
+                * Number of blocks must be rounded up so size 1 is not 0 blocks
+                */
+               inode->i_blocks = (512 - 1 + attrs->ia_size) >> 9;
 
                /*
                 * The man page of truncate says if the size changed,
@@ -2610,7 +2630,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
                attrs->ia_valid |= ATTR_FORCE;
 
-       rc = setattr_prepare(direntry, attrs);
+       rc = setattr_prepare(&init_user_ns, direntry, attrs);
        if (rc < 0)
                goto out;
 
@@ -2715,7 +2735,7 @@ cifs_setattr_unix(struct dentry *direntry, struct iattr *attrs)
            attrs->ia_size != i_size_read(inode))
                truncate_setsize(inode, attrs->ia_size);
 
-       setattr_copy(inode, attrs);
+       setattr_copy(&init_user_ns, inode, attrs);
        mark_inode_dirty(inode);
 
        /* force revalidate when any of these times are set since some
@@ -2757,7 +2777,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
        if (cifs_sb->mnt_cifs_flags & CIFS_MOUNT_NO_PERM)
                attrs->ia_valid |= ATTR_FORCE;
 
-       rc = setattr_prepare(direntry, attrs);
+       rc = setattr_prepare(&init_user_ns, direntry, attrs);
        if (rc < 0) {
                free_xid(xid);
                return rc;
@@ -2913,7 +2933,7 @@ cifs_setattr_nounix(struct dentry *direntry, struct iattr *attrs)
            attrs->ia_size != i_size_read(inode))
                truncate_setsize(inode, attrs->ia_size);
 
-       setattr_copy(inode, attrs);
+       setattr_copy(&init_user_ns, inode, attrs);
        mark_inode_dirty(inode);
 
 cifs_setattr_exit:
@@ -2923,7 +2943,8 @@ cifs_setattr_exit:
 }
 
 int
-cifs_setattr(struct dentry *direntry, struct iattr *attrs)
+cifs_setattr(struct user_namespace *mnt_userns, struct dentry *direntry,
+            struct iattr *attrs)
 {
        struct cifs_sb_info *cifs_sb = CIFS_SB(direntry->d_sb);
        struct cifs_tcon *pTcon = cifs_sb_master_tcon(cifs_sb);
index 94dab43..7c5878a 100644 (file)
@@ -661,7 +661,8 @@ cifs_get_link(struct dentry *direntry, struct inode *inode,
 }
 
 int
-cifs_symlink(struct inode *inode, struct dentry *direntry, const char *symname)
+cifs_symlink(struct user_namespace *mnt_userns, struct inode *inode,
+            struct dentry *direntry, const char *symname)
 {
        int rc = -EOPNOTSUPP;
        unsigned int xid;
index 2134657..63d517b 100644 (file)
@@ -218,7 +218,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
 
        /* UNC and paths */
        /* XXX: Use ses->server->hostname? */
-       sprintf(unc, unc_fmt, ses->serverName);
+       sprintf(unc, unc_fmt, ses->ip_addr);
        ctx.UNC = unc;
        ctx.prepath = "";
 
@@ -230,6 +230,7 @@ cifs_ses_add_channel(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
        ctx.noautotune = ses->server->noautotune;
        ctx.sockopt_tcp_nodelay = ses->server->tcp_nodelay;
        ctx.echo_interval = ses->server->echo_interval / HZ;
+       ctx.max_credits = ses->server->max_credits;
 
        /*
         * This will be used for encoding/decoding user/domain/pw
index 99a1951..d9a990c 100644 (file)
@@ -58,6 +58,7 @@
 #define SMB2_HMACSHA256_SIZE (32)
 #define SMB2_CMACAES_SIZE (16)
 #define SMB3_SIGNKEY_SIZE (16)
+#define SMB3_GCM128_CRYPTKEY_SIZE (16)
 #define SMB3_GCM256_CRYPTKEY_SIZE (32)
 
 /* Maximum buffer size value we can send with 1 credit */
index 1f900b8..a718dc7 100644 (file)
@@ -358,6 +358,7 @@ smb2_compound_op(const unsigned int xid, struct cifs_tcon *tcon,
        if (cfile)
                goto after_close;
        /* Close */
+       flags |= CIFS_CP_CREATE_CLOSE_OP;
        rqst[num_rqst].rq_iov = &vars->close_iov[0];
        rqst[num_rqst].rq_nvec = 1;
        rc = SMB2_close_init(tcon, server,
index 60d4bd1..aac384f 100644 (file)
@@ -754,8 +754,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server)
                }
        }
        spin_unlock(&cifs_tcp_ses_lock);
-       cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n");
-       return false;
+       cifs_dbg(FYI, "No file id matched, oplock break ignored\n");
+       return true;
 }
 
 void
@@ -767,7 +767,7 @@ smb2_cancelled_close_fid(struct work_struct *work)
        int rc;
 
        if (cancelled->mid)
-               cifs_tcon_dbg(VFS, "Close unmatched open for MID:%llx\n",
+               cifs_tcon_dbg(VFS, "Close unmatched open for MID:%llu\n",
                              cancelled->mid);
        else
                cifs_tcon_dbg(VFS, "Close interrupted close\n");
@@ -844,14 +844,14 @@ smb2_handle_cancelled_close(struct cifs_tcon *tcon, __u64 persistent_fid,
 }
 
 int
-smb2_handle_cancelled_mid(char *buffer, struct TCP_Server_Info *server)
+smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server)
 {
-       struct smb2_sync_hdr *sync_hdr = (struct smb2_sync_hdr *)buffer;
-       struct smb2_create_rsp *rsp = (struct smb2_create_rsp *)buffer;
+       struct smb2_sync_hdr *sync_hdr = mid->resp_buf;
+       struct smb2_create_rsp *rsp = mid->resp_buf;
        struct cifs_tcon *tcon;
        int rc;
 
-       if (sync_hdr->Command != SMB2_CREATE ||
+       if ((mid->optype & CIFS_CP_CREATE_CLOSE_OP) || sync_hdr->Command != SMB2_CREATE ||
            sync_hdr->Status != STATUS_SUCCESS)
                return 0;
 
index f192748..f703204 100644 (file)
@@ -63,17 +63,19 @@ smb2_add_credits(struct TCP_Server_Info *server,
                 const struct cifs_credits *credits, const int optype)
 {
        int *val, rc = -1;
+       int scredits, in_flight;
        unsigned int add = credits->value;
        unsigned int instance = credits->instance;
        bool reconnect_detected = false;
+       bool reconnect_with_invalid_credits = false;
 
        spin_lock(&server->req_lock);
        val = server->ops->get_credits_field(server, optype);
 
        /* eg found case where write overlapping reconnect messed up credits */
        if (((optype & CIFS_OP_MASK) == CIFS_NEG_OP) && (*val != 0))
-               trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
-                       server->hostname, *val, add);
+               reconnect_with_invalid_credits = true;
+
        if ((instance == 0) || (instance == server->reconnect_instance))
                *val += add;
        else
@@ -84,7 +86,9 @@ smb2_add_credits(struct TCP_Server_Info *server,
                pr_warn_once("server overflowed SMB3 credits\n");
        }
        server->in_flight--;
-       if (server->in_flight == 0 && (optype & CIFS_OP_MASK) != CIFS_NEG_OP)
+       if (server->in_flight == 0 &&
+          ((optype & CIFS_OP_MASK) != CIFS_NEG_OP) &&
+          ((optype & CIFS_OP_MASK) != CIFS_SESS_OP))
                rc = change_conf(server);
        /*
         * Sometimes server returns 0 credits on oplock break ack - we need to
@@ -97,14 +101,26 @@ smb2_add_credits(struct TCP_Server_Info *server,
                        server->oplock_credits++;
                }
        }
+       scredits = *val;
+       in_flight = server->in_flight;
        spin_unlock(&server->req_lock);
        wake_up(&server->request_q);
 
        if (reconnect_detected) {
+               trace_smb3_reconnect_detected(server->CurrentMid,
+                       server->conn_id, server->hostname, scredits, add, in_flight);
+
                cifs_dbg(FYI, "trying to put %d credits from the old server instance %d\n",
                         add, instance);
        }
 
+       if (reconnect_with_invalid_credits) {
+               trace_smb3_reconnect_with_invalid_credits(server->CurrentMid,
+                       server->conn_id, server->hostname, scredits, add, in_flight);
+               cifs_dbg(FYI, "Negotiate operation when server credits is non-zero. Optype: %d, server credits: %d, credits added: %d\n",
+                        optype, scredits, add);
+       }
+
        if (server->tcpStatus == CifsNeedReconnect
            || server->tcpStatus == CifsExiting)
                return;
@@ -123,23 +139,30 @@ smb2_add_credits(struct TCP_Server_Info *server,
                cifs_dbg(FYI, "disabling oplocks\n");
                break;
        default:
-               trace_smb3_add_credits(server->CurrentMid,
-                       server->hostname, rc, add);
-               cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, rc);
+               /* change_conf rebalanced credits for different types */
+               break;
        }
+
+       trace_smb3_add_credits(server->CurrentMid,
+                       server->conn_id, server->hostname, scredits, add, in_flight);
+       cifs_dbg(FYI, "%s: added %u credits total=%d\n", __func__, add, scredits);
 }
 
 static void
 smb2_set_credits(struct TCP_Server_Info *server, const int val)
 {
+       int scredits, in_flight;
+
        spin_lock(&server->req_lock);
        server->credits = val;
        if (val == 1)
                server->reconnect_instance++;
+       scredits = server->credits;
+       in_flight = server->in_flight;
        spin_unlock(&server->req_lock);
 
        trace_smb3_set_credits(server->CurrentMid,
-                       server->hostname, val, val);
+                       server->conn_id, server->hostname, scredits, val, in_flight);
        cifs_dbg(FYI, "%s: set %u credits\n", __func__, val);
 
        /* don't log while holding the lock */
@@ -171,7 +194,7 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
                      unsigned int *num, struct cifs_credits *credits)
 {
        int rc = 0;
-       unsigned int scredits;
+       unsigned int scredits, in_flight;
 
        spin_lock(&server->req_lock);
        while (1) {
@@ -208,17 +231,18 @@ smb2_wait_mtu_credits(struct TCP_Server_Info *server, unsigned int size,
                                DIV_ROUND_UP(*num, SMB2_MAX_BUFFER_SIZE);
                        credits->instance = server->reconnect_instance;
                        server->credits -= credits->value;
-                       scredits = server->credits;
                        server->in_flight++;
                        if (server->in_flight > server->max_in_flight)
                                server->max_in_flight = server->in_flight;
                        break;
                }
        }
+       scredits = server->credits;
+       in_flight = server->in_flight;
        spin_unlock(&server->req_lock);
 
        trace_smb3_add_credits(server->CurrentMid,
-                       server->hostname, scredits, -(credits->value));
+                       server->conn_id, server->hostname, scredits, -(credits->value), in_flight);
        cifs_dbg(FYI, "%s: removed %u credits total=%d\n",
                        __func__, credits->value, scredits);
 
@@ -231,14 +255,14 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
                    const unsigned int payload_size)
 {
        int new_val = DIV_ROUND_UP(payload_size, SMB2_MAX_BUFFER_SIZE);
-       int scredits;
+       int scredits, in_flight;
 
        if (!credits->value || credits->value == new_val)
                return 0;
 
        if (credits->value < new_val) {
                trace_smb3_too_many_credits(server->CurrentMid,
-                               server->hostname, 0, credits->value - new_val);
+                               server->conn_id, server->hostname, 0, credits->value - new_val, 0);
                cifs_server_dbg(VFS, "request has less credits (%d) than required (%d)",
                                credits->value, new_val);
 
@@ -248,9 +272,13 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
        spin_lock(&server->req_lock);
 
        if (server->reconnect_instance != credits->instance) {
+               scredits = server->credits;
+               in_flight = server->in_flight;
                spin_unlock(&server->req_lock);
+
                trace_smb3_reconnect_detected(server->CurrentMid,
-                       server->hostname, 0, 0);
+                       server->conn_id, server->hostname, scredits,
+                       credits->value - new_val, in_flight);
                cifs_server_dbg(VFS, "trying to return %d credits to old session\n",
                         credits->value - new_val);
                return -EAGAIN;
@@ -258,15 +286,18 @@ smb2_adjust_credits(struct TCP_Server_Info *server,
 
        server->credits += credits->value - new_val;
        scredits = server->credits;
+       in_flight = server->in_flight;
        spin_unlock(&server->req_lock);
        wake_up(&server->request_q);
-       credits->value = new_val;
 
        trace_smb3_add_credits(server->CurrentMid,
-                       server->hostname, scredits, credits->value - new_val);
+                       server->conn_id, server->hostname, scredits,
+                       credits->value - new_val, in_flight);
        cifs_dbg(FYI, "%s: adjust added %u credits total=%d\n",
                        __func__, credits->value - new_val, scredits);
 
+       credits->value = new_val;
+
        return 0;
 }
 
@@ -1164,7 +1195,7 @@ smb2_set_ea(const unsigned int xid, struct cifs_tcon *tcon,
        struct TCP_Server_Info *server = cifs_pick_channel(ses);
        __le16 *utf16_path = NULL;
        int ea_name_len = strlen(ea_name);
-       int flags = 0;
+       int flags = CIFS_CP_CREATE_CLOSE_OP;
        int len;
        struct smb_rqst rqst[3];
        int resp_buftype[3];
@@ -1542,7 +1573,7 @@ smb2_ioctl_query_info(const unsigned int xid,
        struct smb_query_info qi;
        struct smb_query_info __user *pqi;
        int rc = 0;
-       int flags = 0;
+       int flags = CIFS_CP_CREATE_CLOSE_OP;
        struct smb2_query_info_rsp *qi_rsp = NULL;
        struct smb2_ioctl_rsp *io_rsp = NULL;
        void *buffer = NULL;
@@ -2007,6 +2038,7 @@ smb2_duplicate_extents(const unsigned int xid,
 {
        int rc;
        unsigned int ret_data_len;
+       struct inode *inode;
        struct duplicate_extents_to_file dup_ext_buf;
        struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink);
 
@@ -2023,10 +2055,21 @@ smb2_duplicate_extents(const unsigned int xid,
        cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n",
                src_off, dest_off, len);
 
-       rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
-       if (rc)
-               goto duplicate_extents_out;
+       inode = d_inode(trgtfile->dentry);
+       if (inode->i_size < dest_off + len) {
+               rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false);
+               if (rc)
+                       goto duplicate_extents_out;
 
+               /*
+                * Although also could set plausible allocation size (i_blocks)
+                * here in addition to setting the file size, in reflink
+                * it is likely that the target file is sparse. Its allocation
+                * size will be queried on next revalidate, but it is important
+                * to make sure that file's cached size is updated immediately
+                */
+               cifs_setsize(inode, dest_off + len);
+       }
        rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid,
                        trgtfile->fid.volatile_fid,
                        FSCTL_DUPLICATE_EXTENTS_TO_FILE,
@@ -2369,7 +2412,7 @@ static bool
 smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
 {
        struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
-       int scredits;
+       int scredits, in_flight;
 
        if (shdr->Status != STATUS_PENDING)
                return false;
@@ -2378,11 +2421,13 @@ smb2_is_status_pending(char *buf, struct TCP_Server_Info *server)
                spin_lock(&server->req_lock);
                server->credits += le16_to_cpu(shdr->CreditRequest);
                scredits = server->credits;
+               in_flight = server->in_flight;
                spin_unlock(&server->req_lock);
                wake_up(&server->request_q);
 
                trace_smb3_add_credits(server->CurrentMid,
-                               server->hostname, scredits, le16_to_cpu(shdr->CreditRequest));
+                               server->conn_id, server->hostname, scredits,
+                               le16_to_cpu(shdr->CreditRequest), in_flight);
                cifs_dbg(FYI, "%s: status pending add %u credits total=%d\n",
                                __func__, le16_to_cpu(shdr->CreditRequest), scredits);
        }
@@ -2418,6 +2463,34 @@ smb2_is_status_io_timeout(char *buf)
                return false;
 }
 
+static void
+smb2_is_network_name_deleted(char *buf, struct TCP_Server_Info *server)
+{
+       struct smb2_sync_hdr *shdr = (struct smb2_sync_hdr *)buf;
+       struct list_head *tmp, *tmp1;
+       struct cifs_ses *ses;
+       struct cifs_tcon *tcon;
+
+       if (shdr->Status != STATUS_NETWORK_NAME_DELETED)
+               return;
+
+       spin_lock(&cifs_tcp_ses_lock);
+       list_for_each(tmp, &server->smb_ses_list) {
+               ses = list_entry(tmp, struct cifs_ses, smb_ses_list);
+               list_for_each(tmp1, &ses->tcon_list) {
+                       tcon = list_entry(tmp1, struct cifs_tcon, tcon_list);
+                       if (tcon->tid == shdr->TreeId) {
+                               tcon->need_reconnect = true;
+                               spin_unlock(&cifs_tcp_ses_lock);
+                               pr_warn_once("Server share %s deleted.\n",
+                                            tcon->treeName);
+                               return;
+                       }
+               }
+       }
+       spin_unlock(&cifs_tcp_ses_lock);
+}
+
 static int
 smb2_oplock_response(struct cifs_tcon *tcon, struct cifs_fid *fid,
                     struct cifsInodeInfo *cinode)
@@ -2516,7 +2589,7 @@ smb2_query_info_compound(const unsigned int xid, struct cifs_tcon *tcon,
 {
        struct cifs_ses *ses = tcon->ses;
        struct TCP_Server_Info *server = cifs_pick_channel(ses);
-       int flags = 0;
+       int flags = CIFS_CP_CREATE_CLOSE_OP;
        struct smb_rqst rqst[3];
        int resp_buftype[3];
        struct kvec rsp_iov[3];
@@ -2914,7 +2987,7 @@ smb2_query_symlink(const unsigned int xid, struct cifs_tcon *tcon,
        unsigned int sub_offset;
        unsigned int print_len;
        unsigned int print_offset;
-       int flags = 0;
+       int flags = CIFS_CP_CREATE_CLOSE_OP;
        struct smb_rqst rqst[3];
        int resp_buftype[3];
        struct kvec rsp_iov[3];
@@ -3096,7 +3169,7 @@ smb2_query_reparse_tag(const unsigned int xid, struct cifs_tcon *tcon,
        struct cifs_open_parms oparms;
        struct cifs_fid fid;
        struct TCP_Server_Info *server = cifs_pick_channel(tcon->ses);
-       int flags = 0;
+       int flags = CIFS_CP_CREATE_CLOSE_OP;
        struct smb_rqst rqst[3];
        int resp_buftype[3];
        struct kvec rsp_iov[3];
@@ -4097,7 +4170,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key)
                        if (ses->Suid == ses_id) {
                                ses_enc_key = enc ? ses->smb3encryptionkey :
                                        ses->smb3decryptionkey;
-                               memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE);
+                               memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE);
                                spin_unlock(&cifs_tcp_ses_lock);
                                return 0;
                        }
@@ -4124,7 +4197,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
        int rc = 0;
        struct scatterlist *sg;
        u8 sign[SMB2_SIGNATURE_SIZE] = {};
-       u8 key[SMB3_SIGN_KEY_SIZE];
+       u8 key[SMB3_ENC_DEC_KEY_SIZE];
        struct aead_request *req;
        char *iv;
        unsigned int iv_len;
@@ -4148,10 +4221,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst,
        tfm = enc ? server->secmech.ccmaesencrypt :
                                                server->secmech.ccmaesdecrypt;
 
-       if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)
+       if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
+               (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM))
                rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE);
        else
-               rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE);
+               rc = crypto_aead_setkey(tfm, key, SMB3_GCM128_CRYPTKEY_SIZE);
 
        if (rc) {
                cifs_server_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc);
@@ -4605,6 +4679,10 @@ static void smb2_decrypt_offload(struct work_struct *work)
 #ifdef CONFIG_CIFS_STATS2
                        mid->when_received = jiffies;
 #endif
+                       if (dw->server->ops->is_network_name_deleted)
+                               dw->server->ops->is_network_name_deleted(dw->buf,
+                                                                        dw->server);
+
                        mid->callback(mid);
                } else {
                        spin_lock(&GlobalMid_Lock);
@@ -4723,6 +4801,12 @@ non_offloaded_decrypt:
                rc = handle_read_data(server, *mid, buf,
                                      server->vals->read_rsp_size,
                                      pages, npages, len, false);
+               if (rc >= 0) {
+                       if (server->ops->is_network_name_deleted) {
+                               server->ops->is_network_name_deleted(buf,
+                                                               server);
+                       }
+               }
        }
 
 free_pages:
@@ -5072,6 +5156,7 @@ struct smb_version_operations smb20_operations = {
        .fiemap = smb3_fiemap,
        .llseek = smb3_llseek,
        .is_status_io_timeout = smb2_is_status_io_timeout,
+       .is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_operations smb21_operations = {
@@ -5173,6 +5258,7 @@ struct smb_version_operations smb21_operations = {
        .fiemap = smb3_fiemap,
        .llseek = smb3_llseek,
        .is_status_io_timeout = smb2_is_status_io_timeout,
+       .is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_operations smb30_operations = {
@@ -5286,6 +5372,7 @@ struct smb_version_operations smb30_operations = {
        .fiemap = smb3_fiemap,
        .llseek = smb3_llseek,
        .is_status_io_timeout = smb2_is_status_io_timeout,
+       .is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_operations smb311_operations = {
@@ -5399,6 +5486,7 @@ struct smb_version_operations smb311_operations = {
        .fiemap = smb3_fiemap,
        .llseek = smb3_llseek,
        .is_status_io_timeout = smb2_is_status_io_timeout,
+       .is_network_name_deleted = smb2_is_network_name_deleted,
 };
 
 struct smb_version_values smb20_values = {
index 794fc3b..2199a9b 100644 (file)
@@ -814,8 +814,9 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
                   SMB3ANY_VERSION_STRING) == 0) {
                req->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
                req->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
-               req->DialectCount = cpu_to_le16(2);
-               total_len += 4;
+               req->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+               req->DialectCount = cpu_to_le16(3);
+               total_len += 6;
        } else if (strcmp(server->vals->version_string,
                   SMBDEFAULT_VERSION_STRING) == 0) {
                req->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -849,6 +850,8 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
                        SMB2_CLIENT_GUID_SIZE);
                if ((server->vals->protocol_id == SMB311_PROT_ID) ||
                    (strcmp(server->vals->version_string,
+                    SMB3ANY_VERSION_STRING) == 0) ||
+                   (strcmp(server->vals->version_string,
                     SMBDEFAULT_VERSION_STRING) == 0))
                        assemble_neg_contexts(req, server, &total_len);
        }
@@ -883,6 +886,10 @@ SMB2_negotiate(const unsigned int xid, struct cifs_ses *ses)
                        cifs_server_dbg(VFS,
                                "SMB2.1 dialect returned but not requested\n");
                        return -EIO;
+               } else if (rsp->DialectRevision == cpu_to_le16(SMB311_PROT_ID)) {
+                       /* ops set to 3.0 by default for default so update */
+                       server->ops = &smb311_operations;
+                       server->vals = &smb311_values;
                }
        } else if (strcmp(server->vals->version_string,
                   SMBDEFAULT_VERSION_STRING) == 0) {
@@ -1042,10 +1049,11 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
                SMB3ANY_VERSION_STRING) == 0) {
                pneg_inbuf->Dialects[0] = cpu_to_le16(SMB30_PROT_ID);
                pneg_inbuf->Dialects[1] = cpu_to_le16(SMB302_PROT_ID);
-               pneg_inbuf->DialectCount = cpu_to_le16(2);
-               /* structure is big enough for 3 dialects, sending only 2 */
+               pneg_inbuf->Dialects[2] = cpu_to_le16(SMB311_PROT_ID);
+               pneg_inbuf->DialectCount = cpu_to_le16(3);
+               /* SMB 2.1 not included so subtract one dialect from len */
                inbuflen = sizeof(*pneg_inbuf) -
-                               (2 * sizeof(pneg_inbuf->Dialects[0]));
+                               (sizeof(pneg_inbuf->Dialects[0]));
        } else if (strcmp(server->vals->version_string,
                SMBDEFAULT_VERSION_STRING) == 0) {
                pneg_inbuf->Dialects[0] = cpu_to_le16(SMB21_PROT_ID);
@@ -1053,7 +1061,7 @@ int smb3_validate_negotiate(const unsigned int xid, struct cifs_tcon *tcon)
                pneg_inbuf->Dialects[2] = cpu_to_le16(SMB302_PROT_ID);
                pneg_inbuf->Dialects[3] = cpu_to_le16(SMB311_PROT_ID);
                pneg_inbuf->DialectCount = cpu_to_le16(4);
-               /* structure is big enough for 3 dialects */
+               /* structure is big enough for 4 dialects */
                inbuflen = sizeof(*pneg_inbuf);
        } else {
                /* otherwise specific dialect was requested */
@@ -1253,7 +1261,7 @@ SMB2_sess_sendreceive(struct SMB2_sess_data *sess_data)
                            cifs_ses_server(sess_data->ses),
                            &rqst,
                            &sess_data->buf0_type,
-                           CIFS_LOG_ERROR | CIFS_NEG_OP, &rsp_iov);
+                           CIFS_LOG_ERROR | CIFS_SESS_OP, &rsp_iov);
        cifs_small_buf_release(sess_data->iov[0].iov_base);
        memcpy(&sess_data->iov[0], &rsp_iov, sizeof(struct kvec));
 
@@ -4033,8 +4041,7 @@ smb2_async_readv(struct cifs_readdata *rdata)
        if (rdata->credits.value > 0) {
                shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(rdata->bytes,
                                                SMB2_MAX_BUFFER_SIZE));
-               shdr->CreditRequest =
-                       cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
+               shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8);
 
                rc = adjust_credits(server, &rdata->credits, rdata->bytes);
                if (rc)
@@ -4340,8 +4347,7 @@ smb2_async_writev(struct cifs_writedata *wdata,
        if (wdata->credits.value > 0) {
                shdr->CreditCharge = cpu_to_le16(DIV_ROUND_UP(wdata->bytes,
                                                    SMB2_MAX_BUFFER_SIZE));
-               shdr->CreditRequest =
-                       cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 1);
+               shdr->CreditRequest = cpu_to_le16(le16_to_cpu(shdr->CreditCharge) + 8);
 
                rc = adjust_credits(server, &wdata->credits, wdata->bytes);
                if (rc)
index 9565e27..a2eb34a 100644 (file)
@@ -246,8 +246,7 @@ extern int SMB2_oplock_break(const unsigned int xid, struct cifs_tcon *tcon,
 extern int smb2_handle_cancelled_close(struct cifs_tcon *tcon,
                                       __u64 persistent_fid,
                                       __u64 volatile_fid);
-extern int smb2_handle_cancelled_mid(char *buffer,
-                                       struct TCP_Server_Info *server);
+extern int smb2_handle_cancelled_mid(struct mid_q_entry *mid, struct TCP_Server_Info *server);
 void smb2_cancelled_close_fid(struct work_struct *work);
 extern int SMB2_QFS_info(const unsigned int xid, struct cifs_tcon *tcon,
                         u64 persistent_file_id, u64 volatile_file_id,
index ebccd71..e6fa76a 100644 (file)
@@ -298,7 +298,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
 {
        unsigned char zero = 0x0;
        __u8 i[4] = {0, 0, 0, 1};
-       __u8 L[4] = {0, 0, 0, 128};
+       __u8 L128[4] = {0, 0, 0, 128};
+       __u8 L256[4] = {0, 0, 1, 0};
        int rc = 0;
        unsigned char prfhash[SMB2_HMACSHA256_SIZE];
        unsigned char *hashptr = prfhash;
@@ -354,8 +355,14 @@ static int generate_key(struct cifs_ses *ses, struct kvec label,
                goto smb3signkey_ret;
        }
 
-       rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
-                               L, 4);
+       if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
+               (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
+               rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+                               L256, 4);
+       } else {
+               rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash,
+                               L128, 4);
+       }
        if (rc) {
                cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__);
                goto smb3signkey_ret;
@@ -390,6 +397,9 @@ generate_smb3signingkey(struct cifs_ses *ses,
                        const struct derivation_triplet *ptriplet)
 {
        int rc;
+#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS
+       struct TCP_Server_Info *server = ses->server;
+#endif
 
        /*
         * All channels use the same encryption/decryption keys but
@@ -422,11 +432,11 @@ generate_smb3signingkey(struct cifs_ses *ses,
                rc = generate_key(ses, ptriplet->encryption.label,
                                  ptriplet->encryption.context,
                                  ses->smb3encryptionkey,
-                                 SMB3_SIGN_KEY_SIZE);
+                                 SMB3_ENC_DEC_KEY_SIZE);
                rc = generate_key(ses, ptriplet->decryption.label,
                                  ptriplet->decryption.context,
                                  ses->smb3decryptionkey,
-                                 SMB3_SIGN_KEY_SIZE);
+                                 SMB3_ENC_DEC_KEY_SIZE);
                if (rc)
                        return rc;
        }
@@ -442,14 +452,23 @@ generate_smb3signingkey(struct cifs_ses *ses,
         */
        cifs_dbg(VFS, "Session Id    %*ph\n", (int)sizeof(ses->Suid),
                        &ses->Suid);
+       cifs_dbg(VFS, "Cipher type   %d\n", server->cipher_type);
        cifs_dbg(VFS, "Session Key   %*ph\n",
                 SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response);
        cifs_dbg(VFS, "Signing Key   %*ph\n",
                 SMB3_SIGN_KEY_SIZE, ses->smb3signingkey);
-       cifs_dbg(VFS, "ServerIn Key  %*ph\n",
-                SMB3_SIGN_KEY_SIZE, ses->smb3encryptionkey);
-       cifs_dbg(VFS, "ServerOut Key %*ph\n",
-                SMB3_SIGN_KEY_SIZE, ses->smb3decryptionkey);
+       if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) ||
+               (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) {
+               cifs_dbg(VFS, "ServerIn Key  %*ph\n",
+                               SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3encryptionkey);
+               cifs_dbg(VFS, "ServerOut Key %*ph\n",
+                               SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3decryptionkey);
+       } else {
+               cifs_dbg(VFS, "ServerIn Key  %*ph\n",
+                               SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3encryptionkey);
+               cifs_dbg(VFS, "ServerOut Key %*ph\n",
+                               SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3decryptionkey);
+       }
 #endif
        return rc;
 }
index c3d1a58..d6df908 100644 (file)
@@ -851,17 +851,21 @@ DEFINE_SMB3_LEASE_ERR_EVENT(lease_err);
 
 DECLARE_EVENT_CLASS(smb3_reconnect_class,
        TP_PROTO(__u64  currmid,
+               __u64 conn_id,
                char *hostname),
-       TP_ARGS(currmid, hostname),
+       TP_ARGS(currmid, conn_id, hostname),
        TP_STRUCT__entry(
                __field(__u64, currmid)
+               __field(__u64, conn_id)
                __field(char *, hostname)
        ),
        TP_fast_assign(
                __entry->currmid = currmid;
+               __entry->conn_id = conn_id;
                __entry->hostname = hostname;
        ),
-       TP_printk("server=%s current_mid=0x%llx",
+       TP_printk("conn_id=0x%llx server=%s current_mid=%llu",
+               __entry->conn_id,
                __entry->hostname,
                __entry->currmid)
 )
@@ -869,44 +873,56 @@ DECLARE_EVENT_CLASS(smb3_reconnect_class,
 #define DEFINE_SMB3_RECONNECT_EVENT(name)        \
 DEFINE_EVENT(smb3_reconnect_class, smb3_##name,  \
        TP_PROTO(__u64  currmid,                \
-               char *hostname),                \
-       TP_ARGS(currmid, hostname))
+               __u64 conn_id,                  \
+               char *hostname),                                \
+       TP_ARGS(currmid, conn_id, hostname))
 
 DEFINE_SMB3_RECONNECT_EVENT(reconnect);
 DEFINE_SMB3_RECONNECT_EVENT(partial_send_reconnect);
 
 DECLARE_EVENT_CLASS(smb3_credit_class,
        TP_PROTO(__u64  currmid,
+               __u64 conn_id,
                char *hostname,
                int credits,
-               int credits_to_add),
-       TP_ARGS(currmid, hostname, credits, credits_to_add),
+               int credits_to_add,
+               int in_flight),
+       TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight),
        TP_STRUCT__entry(
                __field(__u64, currmid)
+               __field(__u64, conn_id)
                __field(char *, hostname)
                __field(int, credits)
                __field(int, credits_to_add)
+               __field(int, in_flight)
        ),
        TP_fast_assign(
                __entry->currmid = currmid;
+               __entry->conn_id = conn_id;
                __entry->hostname = hostname;
                __entry->credits = credits;
                __entry->credits_to_add = credits_to_add;
+               __entry->in_flight = in_flight;
        ),
-       TP_printk("server=%s current_mid=0x%llx credits=%d credits_to_add=%d",
+       TP_printk("conn_id=0x%llx server=%s current_mid=%llu "
+                       "credits=%d credit_change=%d in_flight=%d",
+               __entry->conn_id,
                __entry->hostname,
                __entry->currmid,
                __entry->credits,
-               __entry->credits_to_add)
+               __entry->credits_to_add,
+               __entry->in_flight)
 )
 
 #define DEFINE_SMB3_CREDIT_EVENT(name)        \
 DEFINE_EVENT(smb3_credit_class, smb3_##name,  \
        TP_PROTO(__u64  currmid,                \
+               __u64 conn_id,                  \
                char *hostname,                 \
                int  credits,                   \
-               int  credits_to_add),           \
-       TP_ARGS(currmid, hostname, credits, credits_to_add))
+               int  credits_to_add,    \
+               int in_flight),                 \
+       TP_ARGS(currmid, conn_id, hostname, credits, credits_to_add, in_flight))
 
 DEFINE_SMB3_CREDIT_EVENT(reconnect_with_invalid_credits);
 DEFINE_SMB3_CREDIT_EVENT(reconnect_detected);
index 4a2b836..c1725b5 100644 (file)
@@ -101,7 +101,7 @@ static void _cifs_mid_q_entry_release(struct kref *refcount)
        if (midEntry->resp_buf && (midEntry->mid_flags & MID_WAIT_CANCELLED) &&
            midEntry->mid_state == MID_RESPONSE_RECEIVED &&
            server->ops->handle_cancelled_mid)
-               server->ops->handle_cancelled_mid(midEntry->resp_buf, server);
+               server->ops->handle_cancelled_mid(midEntry, server);
 
        midEntry->mid_state = MID_FREE;
        atomic_dec(&midCount);
@@ -445,7 +445,7 @@ unmask:
                 */
                server->tcpStatus = CifsNeedReconnect;
                trace_smb3_partial_send_reconnect(server->CurrentMid,
-                                                 server->hostname);
+                                                 server->conn_id, server->hostname);
        }
 smbd_done:
        if (rc < 0 && rc != -EINTR)
@@ -527,7 +527,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
        int *credits;
        int optype;
        long int t;
-       int scredits = server->credits;
+       int scredits, in_flight;
 
        if (timeout < 0)
                t = MAX_JIFFY_OFFSET;
@@ -551,23 +551,39 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                        server->max_in_flight = server->in_flight;
                *credits -= 1;
                *instance = server->reconnect_instance;
+               scredits = *credits;
+               in_flight = server->in_flight;
                spin_unlock(&server->req_lock);
+
+               trace_smb3_add_credits(server->CurrentMid,
+                               server->conn_id, server->hostname, scredits, -1, in_flight);
+               cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
+                               __func__, 1, scredits);
+
                return 0;
        }
 
        while (1) {
                if (*credits < num_credits) {
+                       scredits = *credits;
                        spin_unlock(&server->req_lock);
+
                        cifs_num_waiters_inc(server);
                        rc = wait_event_killable_timeout(server->request_q,
                                has_credits(server, credits, num_credits), t);
                        cifs_num_waiters_dec(server);
                        if (!rc) {
+                               spin_lock(&server->req_lock);
+                               scredits = *credits;
+                               in_flight = server->in_flight;
+                               spin_unlock(&server->req_lock);
+
                                trace_smb3_credit_timeout(server->CurrentMid,
-                                       server->hostname, num_credits, 0);
+                                               server->conn_id, server->hostname, scredits,
+                                               num_credits, in_flight);
                                cifs_server_dbg(VFS, "wait timed out after %d ms\n",
-                                        timeout);
-                               return -ENOTSUPP;
+                                               timeout);
+                               return -EBUSY;
                        }
                        if (rc == -ERESTARTSYS)
                                return -ERESTARTSYS;
@@ -595,6 +611,7 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                            server->in_flight > 2 * MAX_COMPOUND &&
                            *credits <= MAX_COMPOUND) {
                                spin_unlock(&server->req_lock);
+
                                cifs_num_waiters_inc(server);
                                rc = wait_event_killable_timeout(
                                        server->request_q,
@@ -603,13 +620,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                                        t);
                                cifs_num_waiters_dec(server);
                                if (!rc) {
+                                       spin_lock(&server->req_lock);
+                                       scredits = *credits;
+                                       in_flight = server->in_flight;
+                                       spin_unlock(&server->req_lock);
+
                                        trace_smb3_credit_timeout(
-                                               server->CurrentMid,
-                                               server->hostname, num_credits,
-                                               0);
+                                                       server->CurrentMid,
+                                                       server->conn_id, server->hostname,
+                                                       scredits, num_credits, in_flight);
                                        cifs_server_dbg(VFS, "wait timed out after %d ms\n",
-                                                timeout);
-                                       return -ENOTSUPP;
+                                                       timeout);
+                                       return -EBUSY;
                                }
                                if (rc == -ERESTARTSYS)
                                        return -ERESTARTSYS;
@@ -625,16 +647,18 @@ wait_for_free_credits(struct TCP_Server_Info *server, const int num_credits,
                        /* update # of requests on the wire to server */
                        if ((flags & CIFS_TIMEOUT_MASK) != CIFS_BLOCKING_OP) {
                                *credits -= num_credits;
-                               scredits = *credits;
                                server->in_flight += num_credits;
                                if (server->in_flight > server->max_in_flight)
                                        server->max_in_flight = server->in_flight;
                                *instance = server->reconnect_instance;
                        }
+                       scredits = *credits;
+                       in_flight = server->in_flight;
                        spin_unlock(&server->req_lock);
 
                        trace_smb3_add_credits(server->CurrentMid,
-                                       server->hostname, scredits, -(num_credits));
+                                       server->conn_id, server->hostname, scredits,
+                                       -(num_credits), in_flight);
                        cifs_dbg(FYI, "%s: remove %u credits total=%d\n",
                                        __func__, num_credits, scredits);
                        break;
@@ -656,13 +680,13 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
                          const int flags, unsigned int *instance)
 {
        int *credits;
-       int scredits, sin_flight;
+       int scredits, in_flight;
 
        credits = server->ops->get_credits_field(server, flags & CIFS_OP_MASK);
 
        spin_lock(&server->req_lock);
        scredits = *credits;
-       sin_flight = server->in_flight;
+       in_flight = server->in_flight;
 
        if (*credits < num) {
                /*
@@ -684,10 +708,11 @@ wait_for_compound_request(struct TCP_Server_Info *server, int num,
                if (server->in_flight == 0) {
                        spin_unlock(&server->req_lock);
                        trace_smb3_insufficient_credits(server->CurrentMid,
-                                       server->hostname, scredits, sin_flight);
+                                       server->conn_id, server->hostname, scredits,
+                                       num, in_flight);
                        cifs_dbg(FYI, "%s: %d requests in flight, needed %d total=%d\n",
-                                       __func__, sin_flight, num, scredits);
-                       return -ENOTSUPP;
+                                       __func__, in_flight, num, scredits);
+                       return -EDEADLK;
                }
        }
        spin_unlock(&server->req_lock);
@@ -1171,9 +1196,12 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        /*
         * Compounding is never used during session establish.
         */
-       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP))
+       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
+               mutex_lock(&server->srv_mutex);
                smb311_update_preauth_hash(ses, rqst[0].rq_iov,
                                           rqst[0].rq_nvec);
+               mutex_unlock(&server->srv_mutex);
+       }
 
        for (i = 0; i < num_rqst; i++) {
                rc = wait_for_response(server, midQ[i]);
@@ -1182,7 +1210,7 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        }
        if (rc != 0) {
                for (; i < num_rqst; i++) {
-                       cifs_server_dbg(VFS, "Cancelling wait for mid %llu cmd: %d\n",
+                       cifs_server_dbg(FYI, "Cancelling wait for mid %llu cmd: %d\n",
                                 midQ[i]->mid, le16_to_cpu(midQ[i]->command));
                        send_cancel(server, &rqst[i], midQ[i]);
                        spin_lock(&GlobalMid_Lock);
@@ -1236,12 +1264,14 @@ compound_send_recv(const unsigned int xid, struct cifs_ses *ses,
        /*
         * Compounding is never used during session establish.
         */
-       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP)) {
+       if ((ses->status == CifsNew) || (optype & CIFS_NEG_OP) || (optype & CIFS_SESS_OP)) {
                struct kvec iov = {
                        .iov_base = resp_iov[0].iov_base,
                        .iov_len = resp_iov[0].iov_len
                };
+               mutex_lock(&server->srv_mutex);
                smb311_update_preauth_hash(ses, &iov, 1);
+               mutex_unlock(&server->srv_mutex);
        }
 
 out:
index 6b658a1..41a611e 100644 (file)
@@ -101,6 +101,7 @@ static int cifs_creation_time_set(unsigned int xid, struct cifs_tcon *pTcon,
 }
 
 static int cifs_xattr_set(const struct xattr_handler *handler,
+                         struct user_namespace *mnt_userns,
                          struct dentry *dentry, struct inode *inode,
                          const char *name, const void *value,
                          size_t size, int flags)
index d5ebd36..e7b2775 100644 (file)
@@ -46,10 +46,12 @@ extern const struct file_operations coda_ioctl_operations;
 /* operations shared over more than one file */
 int coda_open(struct inode *i, struct file *f);
 int coda_release(struct inode *i, struct file *f);
-int coda_permission(struct inode *inode, int mask);
+int coda_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                   int mask);
 int coda_revalidate_inode(struct inode *);
-int coda_getattr(const struct path *, struct kstat *, u32, unsigned int);
-int coda_setattr(struct dentry *, struct iattr *);
+int coda_getattr(struct user_namespace *, const struct path *, struct kstat *,
+                u32, unsigned int);
+int coda_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 
 /* this file:  heloers */
 char *coda_f2s(struct CodaFid *f);
index ca40c25..d69989c 100644 (file)
@@ -73,7 +73,8 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
 }
 
 
-int coda_permission(struct inode *inode, int mask)
+int coda_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                   int mask)
 {
        int error;
 
@@ -132,7 +133,8 @@ static inline void coda_dir_drop_nlink(struct inode *dir)
 }
 
 /* creation routines: create, mknod, mkdir, link, symlink */
-static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool excl)
+static int coda_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *de, umode_t mode, bool excl)
 {
        int error;
        const char *name=de->d_name.name;
@@ -164,7 +166,8 @@ err_out:
        return error;
 }
 
-static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
+static int coda_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *de, umode_t mode)
 {
        struct inode *inode;
        struct coda_vattr attrs;
@@ -225,7 +228,8 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
 }
 
 
-static int coda_symlink(struct inode *dir_inode, struct dentry *de,
+static int coda_symlink(struct user_namespace *mnt_userns,
+                       struct inode *dir_inode, struct dentry *de,
                        const char *symname)
 {
        const char *name = de->d_name.name;
@@ -291,9 +295,9 @@ static int coda_rmdir(struct inode *dir, struct dentry *de)
 }
 
 /* rename */
-static int coda_rename(struct inode *old_dir, struct dentry *old_dentry,
-                      struct inode *new_dir, struct dentry *new_dentry,
-                      unsigned int flags)
+static int coda_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        const char *old_name = old_dentry->d_name.name;
        const char *new_name = new_dentry->d_name.name;
index b1c70e2..d9f1bd7 100644 (file)
@@ -251,16 +251,17 @@ static void coda_evict_inode(struct inode *inode)
        coda_cache_clear_inode(inode);
 }
 
-int coda_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int flags)
+int coda_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        int err = coda_revalidate_inode(d_inode(path->dentry));
        if (!err)
-               generic_fillattr(d_inode(path->dentry), stat);
+               generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
        return err;
 }
 
-int coda_setattr(struct dentry *de, struct iattr *iattr)
+int coda_setattr(struct user_namespace *mnt_userns, struct dentry *de,
+                struct iattr *iattr)
 {
        struct inode *inode = d_inode(de);
        struct coda_vattr vattr;
index 3aec27e..cb9fd59 100644 (file)
@@ -24,7 +24,8 @@
 #include "coda_linux.h"
 
 /* pioctl ops */
-static int coda_ioctl_permission(struct inode *inode, int mask);
+static int coda_ioctl_permission(struct user_namespace *mnt_userns,
+                                struct inode *inode, int mask);
 static long coda_pioctl(struct file *filp, unsigned int cmd,
                        unsigned long user_data);
 
@@ -40,7 +41,8 @@ const struct file_operations coda_ioctl_operations = {
 };
 
 /* the coda pioctl inode ops */
-static int coda_ioctl_permission(struct inode *inode, int mask)
+static int coda_ioctl_permission(struct user_namespace *mnt_userns,
+                                struct inode *inode, int mask)
 {
        return (mask & MAY_EXEC) ? -EACCES : 0;
 }
index 22dce2d..9a3aed2 100644 (file)
@@ -79,7 +79,8 @@ extern void configfs_hash_and_remove(struct dentry * dir, const char * name);
 
 extern const unsigned char * configfs_get_name(struct configfs_dirent *sd);
 extern void configfs_drop_dentry(struct configfs_dirent *sd, struct dentry *parent);
-extern int configfs_setattr(struct dentry *dentry, struct iattr *iattr);
+extern int configfs_setattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry, struct iattr *iattr);
 
 extern struct dentry *configfs_pin_fs(void);
 extern void configfs_release_fs(void);
@@ -92,7 +93,8 @@ extern const struct inode_operations configfs_root_inode_operations;
 extern const struct inode_operations configfs_symlink_inode_operations;
 extern const struct dentry_operations configfs_dentry_ops;
 
-extern int configfs_symlink(struct inode *dir, struct dentry *dentry,
+extern int configfs_symlink(struct user_namespace *mnt_userns,
+                           struct inode *dir, struct dentry *dentry,
                            const char *symname);
 extern int configfs_unlink(struct inode *dir, struct dentry *dentry);
 
index b839dd1..b6098e0 100644 (file)
@@ -1268,7 +1268,8 @@ out_root_unlock:
 }
 EXPORT_SYMBOL(configfs_depend_item_unlocked);
 
-static int configfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int configfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode)
 {
        int ret = 0;
        int module_got = 0;
index 1f02702..da8351d 100644 (file)
@@ -378,7 +378,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type
 
        attr = to_attr(dentry);
        if (!attr)
-               goto out_put_item;
+               goto out_free_buffer;
 
        if (type & CONFIGFS_ITEM_BIN_ATTR) {
                buffer->bin_attr = to_bin_attr(dentry);
@@ -391,7 +391,7 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type
        /* Grab the module reference for this attribute if we have one */
        error = -ENODEV;
        if (!try_module_get(buffer->owner))
-               goto out_put_item;
+               goto out_free_buffer;
 
        error = -EACCES;
        if (!buffer->item->ci_type)
@@ -435,8 +435,6 @@ static int __configfs_open_file(struct inode *inode, struct file *file, int type
 
 out_put_module:
        module_put(buffer->owner);
-out_put_item:
-       config_item_put(buffer->item);
 out_free_buffer:
        up_read(&frag->frag_sem);
        kfree(buffer);
index 8bd6a88..42c348b 100644 (file)
@@ -40,7 +40,8 @@ static const struct inode_operations configfs_inode_operations ={
        .setattr        = configfs_setattr,
 };
 
-int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
+int configfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                    struct iattr *iattr)
 {
        struct inode * inode = d_inode(dentry);
        struct configfs_dirent * sd = dentry->d_fsdata;
@@ -67,7 +68,7 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
        }
        /* attributes were changed atleast once in past */
 
-       error = simple_setattr(dentry, iattr);
+       error = simple_setattr(mnt_userns, dentry, iattr);
        if (error)
                return error;
 
index cb61467..77c8543 100644 (file)
@@ -139,7 +139,8 @@ static int get_target(const char *symname, struct path *path,
 }
 
 
-int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+int configfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, const char *symname)
 {
        int ret;
        struct path path;
@@ -197,7 +198,8 @@ int configfs_symlink(struct inode *dir, struct dentry *dentry, const char *symna
        if (dentry->d_inode || d_unhashed(dentry))
                ret = -EEXIST;
        else
-               ret = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+               ret = inode_permission(&init_user_ns, dir,
+                                      MAY_WRITE | MAY_EXEC);
        if (!ret)
                ret = type->ct_item_ops->allow_link(parent_item, target_item);
        if (!ret) {
index a2f6ecc..1c0fdc1 100644 (file)
@@ -703,6 +703,7 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                        goto close_fail;
                }
        } else {
+               struct user_namespace *mnt_userns;
                struct inode *inode;
                int open_flags = O_CREAT | O_RDWR | O_NOFOLLOW |
                                 O_LARGEFILE | O_EXCL;
@@ -780,13 +781,15 @@ void do_coredump(const kernel_siginfo_t *siginfo)
                 * a process dumps core while its cwd is e.g. on a vfat
                 * filesystem.
                 */
-               if (!uid_eq(inode->i_uid, current_fsuid()))
+               mnt_userns = file_mnt_user_ns(cprm.file);
+               if (!uid_eq(i_uid_into_mnt(mnt_userns, inode), current_fsuid()))
                        goto close_fail;
                if ((inode->i_mode & 0677) != 0600)
                        goto close_fail;
                if (!(cprm.file->f_mode & FMODE_CAN_WRITE))
                        goto close_fail;
-               if (do_truncate(cprm.file->f_path.dentry, 0, 0, cprm.file))
+               if (do_truncate(mnt_userns, cprm.file->f_path.dentry,
+                               0, 0, cprm.file))
                        goto close_fail;
        }
 
@@ -894,10 +897,10 @@ int dump_user_range(struct coredump_params *cprm, unsigned long start,
                 */
                page = get_dump_page(addr);
                if (page) {
-                       void *kaddr = kmap(page);
+                       void *kaddr = kmap_local_page(page);
 
                        stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
-                       kunmap(page);
+                       kunmap_local(kaddr);
                        put_page(page);
                } else {
                        stop = !dump_skip(cprm, PAGE_SIZE);
@@ -931,7 +934,8 @@ void dump_truncate(struct coredump_params *cprm)
        if (file->f_op->llseek && file->f_op->llseek != no_llseek) {
                offset = file->f_op->llseek(file, 0, SEEK_CUR);
                if (i_size_read(file->f_mapping->host) < offset)
-                       do_truncate(file->f_path.dentry, offset, 0, file);
+                       do_truncate(file_mnt_user_ns(file), file->f_path.dentry,
+                                   offset, 0, file);
        }
 }
 EXPORT_SYMBOL(dump_truncate);
index b048a0e..68a2de6 100644 (file)
@@ -52,7 +52,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
        int num_pages = 0;
 
        /* This always succeeds since __GFP_DIRECT_RECLAIM is set. */
-       bio = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+       bio = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
 
        while (len) {
                unsigned int blocks_this_page = min(len, blocks_per_page);
@@ -74,7 +74,7 @@ static int fscrypt_zeroout_range_inline_crypt(const struct inode *inode,
                len -= blocks_this_page;
                lblk += blocks_this_page;
                pblk += blocks_this_page;
-               if (num_pages == BIO_MAX_PAGES || !len ||
+               if (num_pages == BIO_MAX_VECS || !len ||
                    !fscrypt_mergeable_bio(bio, inode, lblk)) {
                        err = submit_bio_wait(bio);
                        if (err)
@@ -126,7 +126,7 @@ int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk,
                return fscrypt_zeroout_range_inline_crypt(inode, lblk, pblk,
                                                          len);
 
-       BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_PAGES);
+       BUILD_BUG_ON(ARRAY_SIZE(pages) > BIO_MAX_VECS);
        nr_pages = min_t(unsigned int, ARRAY_SIZE(pages),
                         (len + blocks_per_page - 1) >> blocks_per_page_bits);
 
index a51cef6..ed3d623 100644 (file)
@@ -465,7 +465,7 @@ int fscrypt_ioctl_set_policy(struct file *filp, const void __user *arg)
                return -EFAULT;
        policy.version = version;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        ret = mnt_want_write_file(filp);
index 799d9e4..7d24ff7 100644 (file)
@@ -2176,8 +2176,8 @@ EXPORT_SYMBOL(d_obtain_root);
  * same inode, only the actual correct case is stored in the dcache for
  * case-insensitive filesystems.
  *
- * For a case-insensitive lookup match and if the the case-exact dentry
- * already exists in in the dcache, use it and return it.
+ * For a case-insensitive lookup match and if the case-exact dentry
+ * already exists in the dcache, use it and return it.
  *
  * If no entry exists with the exact case name, allocate new dentry with
  * the exact case, and return the spliced entry.
index 2fcf664..22e86ae 100644 (file)
@@ -42,13 +42,14 @@ static unsigned int debugfs_allow = DEFAULT_DEBUGFS_ALLOW_BITS;
  * so that we can use the file mode as part of a heuristic to determine whether
  * to lock down individual files.
  */
-static int debugfs_setattr(struct dentry *dentry, struct iattr *ia)
+static int debugfs_setattr(struct user_namespace *mnt_userns,
+                          struct dentry *dentry, struct iattr *ia)
 {
        int ret = security_locked_down(LOCKDOWN_DEBUGFS);
 
        if (ret && (ia->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID)))
                return ret;
-       return simple_setattr(dentry, ia);
+       return simple_setattr(&init_user_ns, dentry, ia);
 }
 
 static const struct inode_operations debugfs_file_inode_operations = {
@@ -297,7 +298,7 @@ struct dentry *debugfs_lookup(const char *name, struct dentry *parent)
 {
        struct dentry *dentry;
 
-       if (IS_ERR(parent))
+       if (!debugfs_initialized() || IS_ERR_OR_NULL(name) || IS_ERR(parent))
                return NULL;
 
        if (!parent)
@@ -318,6 +319,9 @@ static struct dentry *start_creating(const char *name, struct dentry *parent)
        if (!(debugfs_allow & DEBUGFS_ALLOW_API))
                return ERR_PTR(-EPERM);
 
+       if (!debugfs_initialized())
+               return ERR_PTR(-ENOENT);
+
        pr_debug("creating file '%s'\n", name);
 
        if (IS_ERR(parent))
@@ -775,8 +779,8 @@ struct dentry *debugfs_rename(struct dentry *old_dir, struct dentry *old_dentry,
 
        take_dentry_name_snapshot(&old_name, old_dentry);
 
-       error = simple_rename(d_inode(old_dir), old_dentry, d_inode(new_dir),
-                             dentry, 0);
+       error = simple_rename(&init_user_ns, d_inode(old_dir), old_dentry,
+                             d_inode(new_dir), dentry, 0);
        if (error) {
                release_dentry_name_snapshot(&old_name);
                goto exit;
index aa1083e..b61491b 100644 (file)
@@ -462,7 +462,7 @@ static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
  * Wait for the next BIO to complete.  Remove it and return it.  NULL is
  * returned once all BIOs have been completed.  This must only be called once
  * all bios have been issued so that dio->refcount can only decrease.  This
- * requires that that the caller hold a reference on the dio.
+ * requires that the caller hold a reference on the dio.
  */
 static struct bio *dio_await_one(struct dio *dio)
 {
@@ -695,7 +695,7 @@ static inline int dio_new_bio(struct dio *dio, struct dio_submit *sdio,
        if (ret)
                goto out;
        sector = start_sector << (sdio->blkbits - 9);
-       nr_pages = min(sdio->pages_in_io, BIO_MAX_PAGES);
+       nr_pages = bio_max_segs(sdio->pages_in_io);
        BUG_ON(nr_pages <= 0);
        dio_bio_alloc(dio, sdio, map_bh->b_bdev, sector, nr_pages);
        sdio->boundary = 0;
@@ -1279,7 +1279,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
        if (retval == -ENOTBLK) {
                /*
                 * The remaining part of the request will be
-                * be handled by buffered I/O when we return
+                * handled by buffered I/O when we return
                 */
                retval = 0;
        }
index 0681540..943e523 100644 (file)
@@ -1110,8 +1110,8 @@ ecryptfs_write_metadata_to_xattr(struct dentry *ecryptfs_dentry,
        }
 
        inode_lock(lower_inode);
-       rc = __vfs_setxattr(lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME,
-                           page_virt, size, 0);
+       rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode,
+                           ECRYPTFS_XATTR_NAME, page_virt, size, 0);
        if (!rc && ecryptfs_inode)
                fsstack_copy_attr_all(ecryptfs_inode, lower_inode);
        inode_unlock(lower_inode);
index 58d0f71..18e9285 100644 (file)
@@ -141,7 +141,8 @@ static int ecryptfs_do_unlink(struct inode *dir, struct dentry *dentry,
        else if (d_unhashed(lower_dentry))
                rc = -EINVAL;
        else
-               rc = vfs_unlink(lower_dir_inode, lower_dentry, NULL);
+               rc = vfs_unlink(&init_user_ns, lower_dir_inode, lower_dentry,
+                               NULL);
        if (rc) {
                printk(KERN_ERR "Error in vfs_unlink; rc = [%d]\n", rc);
                goto out_unlock;
@@ -180,7 +181,8 @@ ecryptfs_do_create(struct inode *directory_inode,
 
        lower_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_create(d_inode(lower_dir_dentry), lower_dentry, mode, true);
+       rc = vfs_create(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
+                       mode, true);
        if (rc) {
                printk(KERN_ERR "%s: Failure to create dentry in lower fs; "
                       "rc = [%d]\n", __func__, rc);
@@ -190,7 +192,8 @@ ecryptfs_do_create(struct inode *directory_inode,
        inode = __ecryptfs_get_inode(d_inode(lower_dentry),
                                     directory_inode->i_sb);
        if (IS_ERR(inode)) {
-               vfs_unlink(d_inode(lower_dir_dentry), lower_dentry, NULL);
+               vfs_unlink(&init_user_ns, d_inode(lower_dir_dentry),
+                          lower_dentry, NULL);
                goto out_lock;
        }
        fsstack_copy_attr_times(directory_inode, d_inode(lower_dir_dentry));
@@ -254,7 +257,8 @@ out:
  * Returns zero on success; non-zero on error condition
  */
 static int
-ecryptfs_create(struct inode *directory_inode, struct dentry *ecryptfs_dentry,
+ecryptfs_create(struct user_namespace *mnt_userns,
+               struct inode *directory_inode, struct dentry *ecryptfs_dentry,
                umode_t mode, bool excl)
 {
        struct inode *ecryptfs_inode;
@@ -436,8 +440,8 @@ static int ecryptfs_link(struct dentry *old_dentry, struct inode *dir,
        dget(lower_old_dentry);
        dget(lower_new_dentry);
        lower_dir_dentry = lock_parent(lower_new_dentry);
-       rc = vfs_link(lower_old_dentry, d_inode(lower_dir_dentry),
-                     lower_new_dentry, NULL);
+       rc = vfs_link(lower_old_dentry, &init_user_ns,
+                     d_inode(lower_dir_dentry), lower_new_dentry, NULL);
        if (rc || d_really_is_negative(lower_new_dentry))
                goto out_lock;
        rc = ecryptfs_interpose(lower_new_dentry, new_dentry, dir->i_sb);
@@ -460,7 +464,8 @@ static int ecryptfs_unlink(struct inode *dir, struct dentry *dentry)
        return ecryptfs_do_unlink(dir, dentry, d_inode(dentry));
 }
 
-static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
+static int ecryptfs_symlink(struct user_namespace *mnt_userns,
+                           struct inode *dir, struct dentry *dentry,
                            const char *symname)
 {
        int rc;
@@ -481,7 +486,7 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
                                                  strlen(symname));
        if (rc)
                goto out_lock;
-       rc = vfs_symlink(d_inode(lower_dir_dentry), lower_dentry,
+       rc = vfs_symlink(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
                         encoded_symname);
        kfree(encoded_symname);
        if (rc || d_really_is_negative(lower_dentry))
@@ -499,7 +504,8 @@ out_lock:
        return rc;
 }
 
-static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ecryptfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode)
 {
        int rc;
        struct dentry *lower_dentry;
@@ -507,7 +513,8 @@ static int ecryptfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_mkdir(d_inode(lower_dir_dentry), lower_dentry, mode);
+       rc = vfs_mkdir(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
+                      mode);
        if (rc || d_really_is_negative(lower_dentry))
                goto out;
        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
@@ -541,7 +548,7 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
        else if (d_unhashed(lower_dentry))
                rc = -EINVAL;
        else
-               rc = vfs_rmdir(lower_dir_inode, lower_dentry);
+               rc = vfs_rmdir(&init_user_ns, lower_dir_inode, lower_dentry);
        if (!rc) {
                clear_nlink(d_inode(dentry));
                fsstack_copy_attr_times(dir, lower_dir_inode);
@@ -555,7 +562,8 @@ static int ecryptfs_rmdir(struct inode *dir, struct dentry *dentry)
 }
 
 static int
-ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+ecryptfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+              struct dentry *dentry, umode_t mode, dev_t dev)
 {
        int rc;
        struct dentry *lower_dentry;
@@ -563,7 +571,8 @@ ecryptfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev
 
        lower_dentry = ecryptfs_dentry_to_lower(dentry);
        lower_dir_dentry = lock_parent(lower_dentry);
-       rc = vfs_mknod(d_inode(lower_dir_dentry), lower_dentry, mode, dev);
+       rc = vfs_mknod(&init_user_ns, d_inode(lower_dir_dentry), lower_dentry,
+                      mode, dev);
        if (rc || d_really_is_negative(lower_dentry))
                goto out;
        rc = ecryptfs_interpose(lower_dentry, dentry, dir->i_sb);
@@ -579,9 +588,9 @@ out:
 }
 
 static int
-ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-               struct inode *new_dir, struct dentry *new_dentry,
-               unsigned int flags)
+ecryptfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+               struct dentry *old_dentry, struct inode *new_dir,
+               struct dentry *new_dentry, unsigned int flags)
 {
        int rc;
        struct dentry *lower_old_dentry;
@@ -590,6 +599,7 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        struct dentry *lower_new_dir_dentry;
        struct dentry *trap;
        struct inode *target_inode;
+       struct renamedata rd = {};
 
        if (flags)
                return -EINVAL;
@@ -619,9 +629,14 @@ ecryptfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                rc = -ENOTEMPTY;
                goto out_lock;
        }
-       rc = vfs_rename(d_inode(lower_old_dir_dentry), lower_old_dentry,
-                       d_inode(lower_new_dir_dentry), lower_new_dentry,
-                       NULL, 0);
+
+       rd.old_mnt_userns       = &init_user_ns;
+       rd.old_dir              = d_inode(lower_old_dir_dentry);
+       rd.old_dentry           = lower_old_dentry;
+       rd.new_mnt_userns       = &init_user_ns;
+       rd.new_dir              = d_inode(lower_new_dir_dentry);
+       rd.new_dentry           = lower_new_dentry;
+       rc = vfs_rename(&rd);
        if (rc)
                goto out_lock;
        if (target_inode)
@@ -855,16 +870,19 @@ int ecryptfs_truncate(struct dentry *dentry, loff_t new_length)
                struct dentry *lower_dentry = ecryptfs_dentry_to_lower(dentry);
 
                inode_lock(d_inode(lower_dentry));
-               rc = notify_change(lower_dentry, &lower_ia, NULL);
+               rc = notify_change(&init_user_ns, lower_dentry,
+                                  &lower_ia, NULL);
                inode_unlock(d_inode(lower_dentry));
        }
        return rc;
 }
 
 static int
-ecryptfs_permission(struct inode *inode, int mask)
+ecryptfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                   int mask)
 {
-       return inode_permission(ecryptfs_inode_to_lower(inode), mask);
+       return inode_permission(&init_user_ns,
+                               ecryptfs_inode_to_lower(inode), mask);
 }
 
 /**
@@ -879,7 +897,8 @@ ecryptfs_permission(struct inode *inode, int mask)
  * All other metadata changes will be passed right to the lower filesystem,
  * and we will just update our inode to look like the lower.
  */
-static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
+static int ecryptfs_setattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry, struct iattr *ia)
 {
        int rc = 0;
        struct dentry *lower_dentry;
@@ -933,7 +952,7 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
        }
        mutex_unlock(&crypt_stat->cs_mutex);
 
-       rc = setattr_prepare(dentry, ia);
+       rc = setattr_prepare(&init_user_ns, dentry, ia);
        if (rc)
                goto out;
        if (ia->ia_valid & ATTR_SIZE) {
@@ -959,14 +978,15 @@ static int ecryptfs_setattr(struct dentry *dentry, struct iattr *ia)
                lower_ia.ia_valid &= ~ATTR_MODE;
 
        inode_lock(d_inode(lower_dentry));
-       rc = notify_change(lower_dentry, &lower_ia, NULL);
+       rc = notify_change(&init_user_ns, lower_dentry, &lower_ia, NULL);
        inode_unlock(d_inode(lower_dentry));
 out:
        fsstack_copy_attr_all(inode, lower_inode);
        return rc;
 }
 
-static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
+static int ecryptfs_getattr_link(struct user_namespace *mnt_userns,
+                                const struct path *path, struct kstat *stat,
                                 u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
@@ -975,7 +995,7 @@ static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
 
        mount_crypt_stat = &ecryptfs_superblock_to_private(
                                                dentry->d_sb)->mount_crypt_stat;
-       generic_fillattr(d_inode(dentry), stat);
+       generic_fillattr(&init_user_ns, d_inode(dentry), stat);
        if (mount_crypt_stat->flags & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES) {
                char *target;
                size_t targetsiz;
@@ -991,7 +1011,8 @@ static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat,
        return rc;
 }
 
-static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
+static int ecryptfs_getattr(struct user_namespace *mnt_userns,
+                           const struct path *path, struct kstat *stat,
                            u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
@@ -1003,7 +1024,7 @@ static int ecryptfs_getattr(const struct path *path, struct kstat *stat,
        if (!rc) {
                fsstack_copy_attr_all(d_inode(dentry),
                                      ecryptfs_inode_to_lower(d_inode(dentry)));
-               generic_fillattr(d_inode(dentry), stat);
+               generic_fillattr(&init_user_ns, d_inode(dentry), stat);
                stat->blocks = lower_stat.blocks;
        }
        return rc;
@@ -1025,7 +1046,7 @@ ecryptfs_setxattr(struct dentry *dentry, struct inode *inode,
                goto out;
        }
        inode_lock(lower_inode);
-       rc = __vfs_setxattr_locked(lower_dentry, name, value, size, flags, NULL);
+       rc = __vfs_setxattr_locked(&init_user_ns, lower_dentry, name, value, size, flags, NULL);
        inode_unlock(lower_inode);
        if (!rc && inode)
                fsstack_copy_attr_all(inode, lower_inode);
@@ -1091,7 +1112,7 @@ static int ecryptfs_removexattr(struct dentry *dentry, struct inode *inode,
                goto out;
        }
        inode_lock(lower_inode);
-       rc = __vfs_removexattr(lower_dentry, name);
+       rc = __vfs_removexattr(&init_user_ns, lower_dentry, name);
        inode_unlock(lower_inode);
 out:
        return rc;
@@ -1135,6 +1156,7 @@ static int ecryptfs_xattr_get(const struct xattr_handler *handler,
 }
 
 static int ecryptfs_xattr_set(const struct xattr_handler *handler,
+                             struct user_namespace *mnt_userns,
                              struct dentry *dentry, struct inode *inode,
                              const char *name, const void *value, size_t size,
                              int flags)
index e63259f..cdf40a5 100644 (file)
@@ -531,6 +531,12 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
                goto out_free;
        }
 
+       if (mnt_user_ns(path.mnt) != &init_user_ns) {
+               rc = -EINVAL;
+               printk(KERN_ERR "Mounting on idmapped mounts currently disallowed\n");
+               goto out_free;
+       }
+
        if (check_ruid && !uid_eq(d_inode(path.dentry)->i_uid, current_uid())) {
                rc = -EPERM;
                printk(KERN_ERR "Mount of device (uid: %d) not owned by "
index 019572c..2f333a4 100644 (file)
@@ -426,8 +426,8 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
        if (size < 0)
                size = 8;
        put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
-       rc = __vfs_setxattr(lower_dentry, lower_inode, ECRYPTFS_XATTR_NAME,
-                           xattr_virt, size, 0);
+       rc = __vfs_setxattr(&init_user_ns, lower_dentry, lower_inode,
+                           ECRYPTFS_XATTR_NAME, xattr_virt, size, 0);
        inode_unlock(lower_inode);
        if (rc)
                printk(KERN_ERR "Error whilst attempting to write inode size "
index feaa5e1..e6bc030 100644 (file)
@@ -137,7 +137,7 @@ efivarfs_ioc_setxflags(struct file *file, void __user *arg)
        unsigned int oldflags = efivarfs_getflags(inode);
        int error;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        if (copy_from_user(&flags, arg, sizeof(flags)))
index 0297ad9..14e2947 100644 (file)
@@ -66,8 +66,8 @@ bool efivarfs_valid_name(const char *str, int len)
        return uuid_is_valid(s);
 }
 
-static int efivarfs_create(struct inode *dir, struct dentry *dentry,
-                         umode_t mode, bool excl)
+static int efivarfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, umode_t mode, bool excl)
 {
        struct inode *inode = NULL;
        struct efivar_entry *var;
index ea4f693..1249e74 100644 (file)
@@ -129,6 +129,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
                                              struct page *page,
                                              erofs_off_t *last_block,
                                              unsigned int nblocks,
+                                             unsigned int *eblks,
                                              bool ra)
 {
        struct inode *const inode = mapping->host;
@@ -145,8 +146,7 @@ static inline struct bio *erofs_read_raw_page(struct bio *bio,
 
        /* note that for readpage case, bio also equals to NULL */
        if (bio &&
-           /* not continuous */
-           *last_block + 1 != current_block) {
+           (*last_block + 1 != current_block || !*eblks)) {
 submit_bio_retry:
                submit_bio(bio);
                bio = NULL;
@@ -215,10 +215,9 @@ submit_bio_retry:
                /* max # of continuous pages */
                if (nblocks > DIV_ROUND_UP(map.m_plen, PAGE_SIZE))
                        nblocks = DIV_ROUND_UP(map.m_plen, PAGE_SIZE);
-               if (nblocks > BIO_MAX_PAGES)
-                       nblocks = BIO_MAX_PAGES;
 
-               bio = bio_alloc(GFP_NOIO, nblocks);
+               *eblks = bio_max_segs(nblocks);
+               bio = bio_alloc(GFP_NOIO, *eblks);
 
                bio->bi_end_io = erofs_readendio;
                bio_set_dev(bio, sb->s_bdev);
@@ -231,16 +230,8 @@ submit_bio_retry:
        /* out of the extent or bio is full */
        if (err < PAGE_SIZE)
                goto submit_bio_retry;
-
+       --*eblks;
        *last_block = current_block;
-
-       /* shift in advance in case of it followed by too many gaps */
-       if (bio->bi_iter.bi_size >= bio->bi_max_vecs * PAGE_SIZE) {
-               /* err should reassign to 0 after submitting */
-               err = 0;
-               goto submit_bio_out;
-       }
-
        return bio;
 
 err_out:
@@ -254,7 +245,6 @@ has_updated:
 
        /* if updated manually, continuous pages has a gap */
        if (bio)
-submit_bio_out:
                submit_bio(bio);
        return err ? ERR_PTR(err) : NULL;
 }
@@ -266,23 +256,26 @@ submit_bio_out:
 static int erofs_raw_access_readpage(struct file *file, struct page *page)
 {
        erofs_off_t last_block;
+       unsigned int eblks;
        struct bio *bio;
 
        trace_erofs_readpage(page, true);
 
        bio = erofs_read_raw_page(NULL, page->mapping,
-                                 page, &last_block, 1, false);
+                                 page, &last_block, 1, &eblks, false);
 
        if (IS_ERR(bio))
                return PTR_ERR(bio);
 
-       DBG_BUGON(bio); /* since we have only one bio -- must be NULL */
+       if (bio)
+               submit_bio(bio);
        return 0;
 }
 
 static void erofs_raw_access_readahead(struct readahead_control *rac)
 {
        erofs_off_t last_block;
+       unsigned int eblks;
        struct bio *bio = NULL;
        struct page *page;
 
@@ -293,7 +286,7 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
                prefetchw(&page->flags);
 
                bio = erofs_read_raw_page(bio, rac->mapping, page, &last_block,
-                               readahead_count(rac), true);
+                               readahead_count(rac), &eblks, true);
 
                /* all the page errors are ignored when readahead */
                if (IS_ERR(bio)) {
@@ -307,7 +300,6 @@ static void erofs_raw_access_readahead(struct readahead_control *rac)
                put_page(page);
        }
 
-       /* the rare case (end in gaps) */
        if (bio)
                submit_bio(bio);
 }
index 3e21c0e..119fdce 100644 (file)
@@ -331,8 +331,9 @@ struct inode *erofs_iget(struct super_block *sb,
        return inode;
 }
 
-int erofs_getattr(const struct path *path, struct kstat *stat,
-                 u32 request_mask, unsigned int query_flags)
+int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, u32 request_mask,
+                 unsigned int query_flags)
 {
        struct inode *const inode = d_inode(path->dentry);
 
@@ -343,7 +344,7 @@ int erofs_getattr(const struct path *path, struct kstat *stat,
        stat->attributes_mask |= (STATX_ATTR_COMPRESSED |
                                  STATX_ATTR_IMMUTABLE);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        return 0;
 }
 
index 67a7ec9..351dae5 100644 (file)
@@ -373,8 +373,9 @@ extern const struct inode_operations erofs_symlink_iops;
 extern const struct inode_operations erofs_fast_symlink_iops;
 
 struct inode *erofs_iget(struct super_block *sb, erofs_nid_t nid, bool dir);
-int erofs_getattr(const struct path *path, struct kstat *stat,
-                 u32 request_mask, unsigned int query_flags);
+int erofs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, u32 request_mask,
+                 unsigned int query_flags);
 
 /* namei.c */
 extern const struct inode_operations erofs_dir_iops;
index 6cb356c..3851e1a 100644 (file)
@@ -1235,7 +1235,7 @@ submit_bio_retry:
                        }
 
                        if (!bio) {
-                               bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+                               bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
 
                                bio->bi_end_io = z_erofs_decompressqueue_endio;
                                bio_set_dev(bio, sb->s_bdev);
index 5a853f0..18594f1 100644 (file)
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -1404,14 +1404,15 @@ EXPORT_SYMBOL(begin_new_exec);
 void would_dump(struct linux_binprm *bprm, struct file *file)
 {
        struct inode *inode = file_inode(file);
-       if (inode_permission(inode, MAY_READ) < 0) {
+       struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+       if (inode_permission(mnt_userns, inode, MAY_READ) < 0) {
                struct user_namespace *old, *user_ns;
                bprm->interp_flags |= BINPRM_FLAGS_ENFORCE_NONDUMP;
 
                /* Ensure mm->user_ns contains the executable */
                user_ns = old = bprm->mm->user_ns;
                while ((user_ns != &init_user_ns) &&
-                      !privileged_wrt_inode_uidgid(user_ns, inode))
+                      !privileged_wrt_inode_uidgid(user_ns, mnt_userns, inode))
                        user_ns = user_ns->parent;
 
                if (old != user_ns) {
@@ -1454,7 +1455,7 @@ EXPORT_SYMBOL(finalize_exec);
 /*
  * Prepare credentials and lock ->cred_guard_mutex.
  * setup_new_exec() commits the new creds and drops the lock.
- * Or, if exec fails before, free_bprm() should release ->cred and
+ * Or, if exec fails before, free_bprm() should release ->cred
  * and unlock.
  */
 static int prepare_bprm_creds(struct linux_binprm *bprm)
@@ -1579,6 +1580,7 @@ static void check_unsafe_exec(struct linux_binprm *bprm)
 static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
 {
        /* Handle suid and sgid on files */
+       struct user_namespace *mnt_userns;
        struct inode *inode;
        unsigned int mode;
        kuid_t uid;
@@ -1595,13 +1597,15 @@ static void bprm_fill_uid(struct linux_binprm *bprm, struct file *file)
        if (!(mode & (S_ISUID|S_ISGID)))
                return;
 
+       mnt_userns = file_mnt_user_ns(file);
+
        /* Be careful if suid/sgid is set */
        inode_lock(inode);
 
        /* reload atomically mode/uid/gid now that lock held */
        mode = inode->i_mode;
-       uid = inode->i_uid;
-       gid = inode->i_gid;
+       uid = i_uid_into_mnt(mnt_userns, inode);
+       gid = i_gid_into_mnt(mnt_userns, inode);
        inode_unlock(inode);
 
        /* We ignore suid/sgid if there are no mappings for them in the ns */
@@ -1837,7 +1841,7 @@ static int bprm_execve(struct linux_binprm *bprm,
 
 out:
        /*
-        * If past the point of no return ensure the the code never
+        * If past the point of no return ensure the code never
         * returns to the userspace process.  Use an existing fatal
         * signal if present otherwise terminate the process with
         * SIGSEGV.
index 764bc64..fa21421 100644 (file)
@@ -416,9 +416,11 @@ int exfat_count_used_clusters(struct super_block *sb, unsigned int *ret_count);
 extern const struct file_operations exfat_file_operations;
 int __exfat_truncate(struct inode *inode, loff_t new_size);
 void exfat_truncate(struct inode *inode, loff_t size);
-int exfat_setattr(struct dentry *dentry, struct iattr *attr);
-int exfat_getattr(const struct path *path, struct kstat *stat,
-               unsigned int request_mask, unsigned int query_flags);
+int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr);
+int exfat_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, unsigned int request_mask,
+                 unsigned int query_flags);
 int exfat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
 
 /* namei.c */
index 183ffdf..f783cf3 100644 (file)
@@ -267,13 +267,14 @@ write_size:
        mutex_unlock(&sbi->s_lock);
 }
 
-int exfat_getattr(const struct path *path, struct kstat *stat,
-               unsigned int request_mask, unsigned int query_flags)
+int exfat_getattr(struct user_namespace *mnt_uerns, const struct path *path,
+                 struct kstat *stat, unsigned int request_mask,
+                 unsigned int query_flags)
 {
        struct inode *inode = d_backing_inode(path->dentry);
        struct exfat_inode_info *ei = EXFAT_I(inode);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        exfat_truncate_atime(&stat->atime);
        stat->result_mask |= STATX_BTIME;
        stat->btime.tv_sec = ei->i_crtime.tv_sec;
@@ -282,7 +283,8 @@ int exfat_getattr(const struct path *path, struct kstat *stat,
        return 0;
 }
 
-int exfat_setattr(struct dentry *dentry, struct iattr *attr)
+int exfat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr)
 {
        struct exfat_sb_info *sbi = EXFAT_SB(dentry->d_sb);
        struct inode *inode = dentry->d_inode;
@@ -305,7 +307,7 @@ int exfat_setattr(struct dentry *dentry, struct iattr *attr)
                                ATTR_TIMES_SET);
        }
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        attr->ia_valid = ia_valid;
        if (error)
                goto out;
@@ -340,7 +342,7 @@ int exfat_setattr(struct dentry *dentry, struct iattr *attr)
                up_write(&EXFAT_I(inode)->truncate_lock);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        exfat_truncate_atime(&inode->i_atime);
        mark_inode_dirty(inode);
 
index 2932b23..d9e8ec6 100644 (file)
@@ -541,8 +541,8 @@ out:
        return ret;
 }
 
-static int exfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool excl)
+static int exfat_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
@@ -827,7 +827,8 @@ unlock:
        return err;
 }
 
-static int exfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int exfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
@@ -1318,9 +1319,10 @@ out:
        return ret;
 }
 
-static int exfat_rename(struct inode *old_dir, struct dentry *old_dentry,
-               struct inode *new_dir, struct dentry *new_dentry,
-               unsigned int flags)
+static int exfat_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
+                       struct inode *new_dir, struct dentry *new_dentry,
+                       unsigned int flags)
 {
        struct inode *old_inode, *new_inode;
        struct super_block *sb = old_dir->i_sb;
index cf4c77f..b9a9db9 100644 (file)
@@ -216,14 +216,16 @@ __ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
  * inode->i_mutex: down
  */
 int
-ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+            struct posix_acl *acl, int type)
 {
        int error;
        int update_mode = 0;
        umode_t mode = inode->i_mode;
 
        if (type == ACL_TYPE_ACCESS && acl) {
-               error = posix_acl_update_mode(inode, &mode, &acl);
+               error = posix_acl_update_mode(&init_user_ns, inode, &mode,
+                                             &acl);
                if (error)
                        return error;
                update_mode = 1;
index 0f01c75..917db5f 100644 (file)
@@ -56,7 +56,8 @@ static inline int ext2_acl_count(size_t size)
 
 /* acl.c */
 extern struct posix_acl *ext2_get_acl(struct inode *inode, int type);
-extern int ext2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int ext2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                       struct posix_acl *acl, int type);
 extern int ext2_init_acl (struct inode *, struct inode *);
 
 #else
index 2a4175f..3309fb2 100644 (file)
@@ -764,8 +764,9 @@ extern struct inode *ext2_iget (struct super_block *, unsigned long);
 extern int ext2_write_inode (struct inode *, struct writeback_control *);
 extern void ext2_evict_inode(struct inode *);
 extern int ext2_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int ext2_setattr (struct dentry *, struct iattr *);
-extern int ext2_getattr (const struct path *, struct kstat *, u32, unsigned int);
+extern int ext2_setattr (struct user_namespace *, struct dentry *, struct iattr *);
+extern int ext2_getattr (struct user_namespace *, const struct path *,
+                        struct kstat *, u32, unsigned int);
 extern void ext2_set_inode_flags(struct inode *inode);
 extern int ext2_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
                       u64 start, u64 len);
index 432c3fe..df14e75 100644 (file)
@@ -551,7 +551,7 @@ got:
                inode->i_uid = current_fsuid();
                inode->i_gid = dir->i_gid;
        } else
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(&init_user_ns, inode, dir, mode);
 
        inode->i_ino = ino;
        inode->i_blocks = 0;
index 78c417d..68178b2 100644 (file)
@@ -1638,8 +1638,8 @@ int ext2_write_inode(struct inode *inode, struct writeback_control *wbc)
        return __ext2_write_inode(inode, wbc->sync_mode == WB_SYNC_ALL);
 }
 
-int ext2_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int query_flags)
+int ext2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct ext2_inode_info *ei = EXT2_I(inode);
@@ -1660,16 +1660,17 @@ int ext2_getattr(const struct path *path, struct kstat *stat,
                        STATX_ATTR_IMMUTABLE |
                        STATX_ATTR_NODUMP);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        return 0;
 }
 
-int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
+int ext2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
-       error = setattr_prepare(dentry, iattr);
+       error = setattr_prepare(&init_user_ns, dentry, iattr);
        if (error)
                return error;
 
@@ -1689,9 +1690,9 @@ int ext2_setattr(struct dentry *dentry, struct iattr *iattr)
                if (error)
                        return error;
        }
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
        if (iattr->ia_valid & ATTR_MODE)
-               error = posix_acl_chmod(inode, inode->i_mode);
+               error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
        mark_inode_dirty(inode);
 
        return error;
index 32a8d10..b399cbb 100644 (file)
@@ -39,7 +39,7 @@ long ext2_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (ret)
                        return ret;
 
-               if (!inode_owner_or_capable(inode)) {
+               if (!inode_owner_or_capable(&init_user_ns, inode)) {
                        ret = -EACCES;
                        goto setflags_out;
                }
@@ -84,7 +84,7 @@ setflags_out:
        case EXT2_IOC_SETVERSION: {
                __u32 generation;
 
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(&init_user_ns, inode))
                        return -EPERM;
                ret = mnt_want_write_file(filp);
                if (ret)
@@ -117,7 +117,7 @@ setversion_out:
                if (!test_opt(inode->i_sb, RESERVATION) ||!S_ISREG(inode->i_mode))
                        return -ENOTTY;
 
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(&init_user_ns, inode))
                        return -EACCES;
 
                if (get_user(rsv_window_size, (int __user *)arg))
index ea980f1..3367384 100644 (file)
@@ -100,7 +100,9 @@ struct dentry *ext2_get_parent(struct dentry *child)
  * If the create succeeds, we fill in the inode information
  * with d_instantiate(). 
  */
-static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
+static int ext2_create (struct user_namespace * mnt_userns,
+                       struct inode * dir, struct dentry * dentry,
+                       umode_t mode, bool excl)
 {
        struct inode *inode;
        int err;
@@ -118,7 +120,8 @@ static int ext2_create (struct inode * dir, struct dentry * dentry, umode_t mode
        return ext2_add_nondir(dentry, inode);
 }
 
-static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext2_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode)
 {
        struct inode *inode = ext2_new_inode(dir, mode, NULL);
        if (IS_ERR(inode))
@@ -131,7 +134,8 @@ static int ext2_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        return 0;
 }
 
-static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int ext2_mknod (struct user_namespace * mnt_userns, struct inode * dir,
+       struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode * inode;
        int err;
@@ -151,8 +155,8 @@ static int ext2_mknod (struct inode * dir, struct dentry *dentry, umode_t mode,
        return err;
 }
 
-static int ext2_symlink (struct inode * dir, struct dentry * dentry,
-       const char * symname)
+static int ext2_symlink (struct user_namespace * mnt_userns, struct inode * dir,
+       struct dentry * dentry, const char * symname)
 {
        struct super_block * sb = dir->i_sb;
        int err = -ENAMETOOLONG;
@@ -225,7 +229,8 @@ static int ext2_link (struct dentry * old_dentry, struct inode * dir,
        return err;
 }
 
-static int ext2_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
+static int ext2_mkdir(struct user_namespace * mnt_userns,
+       struct inode * dir, struct dentry * dentry, umode_t mode)
 {
        struct inode * inode;
        int err;
@@ -315,8 +320,9 @@ static int ext2_rmdir (struct inode * dir, struct dentry *dentry)
        return err;
 }
 
-static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
-                       struct inode * new_dir, struct dentry * new_dentry,
+static int ext2_rename (struct user_namespace * mnt_userns,
+                       struct inode * old_dir, struct dentry * old_dentry,
+                       struct inode * new_dir, struct dentry * new_dentry,
                        unsigned int flags)
 {
        struct inode * old_inode = d_inode(old_dentry);
index 9a682e4..ebade1f 100644 (file)
@@ -19,6 +19,7 @@ ext2_xattr_security_get(const struct xattr_handler *handler,
 
 static int
 ext2_xattr_security_set(const struct xattr_handler *handler,
+                       struct user_namespace *mnt_userns,
                        struct dentry *unused, struct inode *inode,
                        const char *name, const void *value,
                        size_t size, int flags)
index 49add11..18a87d5 100644 (file)
@@ -26,6 +26,7 @@ ext2_xattr_trusted_get(const struct xattr_handler *handler,
 
 static int
 ext2_xattr_trusted_set(const struct xattr_handler *handler,
+                      struct user_namespace *mnt_userns,
                       struct dentry *unused, struct inode *inode,
                       const char *name, const void *value,
                       size_t size, int flags)
index c243a3b..5809244 100644 (file)
@@ -30,6 +30,7 @@ ext2_xattr_user_get(const struct xattr_handler *handler,
 
 static int
 ext2_xattr_user_set(const struct xattr_handler *handler,
+                   struct user_namespace *mnt_userns,
                    struct dentry *unused, struct inode *inode,
                    const char *name, const void *value,
                    size_t size, int flags)
diff --git a/fs/ext4/.kunitconfig b/fs/ext4/.kunitconfig
new file mode 100644 (file)
index 0000000..bf51da7
--- /dev/null
@@ -0,0 +1,3 @@
+CONFIG_KUNIT=y
+CONFIG_EXT4_FS=y
+CONFIG_EXT4_KUNIT_TESTS=y
index 619dd35..86699c8 100644 (file)
@@ -103,8 +103,7 @@ config EXT4_DEBUG
 
 config EXT4_KUNIT_TESTS
        tristate "KUnit tests for ext4" if !KUNIT_ALL_TESTS
-       select EXT4_FS
-       depends on KUNIT
+       depends on EXT4_FS && KUNIT
        default KUNIT_ALL_TESTS
        help
          This builds the ext4 KUnit tests.
index 68aaed4..c5eaffc 100644 (file)
@@ -222,7 +222,8 @@ __ext4_set_acl(handle_t *handle, struct inode *inode, int type,
 }
 
 int
-ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+            struct posix_acl *acl, int type)
 {
        handle_t *handle;
        int error, credits, retries = 0;
@@ -245,7 +246,7 @@ retry:
        ext4_fc_start_update(inode);
 
        if ((type == ACL_TYPE_ACCESS) && acl) {
-               error = posix_acl_update_mode(inode, &mode, &acl);
+               error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
                if (error)
                        goto out_stop;
                if (mode != inode->i_mode)
index 9b63f54..84b8942 100644 (file)
@@ -56,7 +56,8 @@ static inline int ext4_acl_count(size_t size)
 
 /* acl.c */
 struct posix_acl *ext4_get_acl(struct inode *inode, int type);
-int ext4_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ext4_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type);
 extern int ext4_init_acl(handle_t *, struct inode *, struct inode *);
 
 #else  /* CONFIG_EXT4_FS_POSIX_ACL */
index f45f9fe..74a5172 100644 (file)
@@ -626,27 +626,41 @@ int ext4_claim_free_clusters(struct ext4_sb_info *sbi,
 
 /**
  * ext4_should_retry_alloc() - check if a block allocation should be retried
- * @sb:                        super block
- * @retries:           number of attemps has been made
+ * @sb:                        superblock
+ * @retries:           number of retry attempts made so far
  *
- * ext4_should_retry_alloc() is called when ENOSPC is returned, and if
- * it is profitable to retry the operation, this function will wait
- * for the current or committing transaction to complete, and then
- * return TRUE.  We will only retry once.
+ * ext4_should_retry_alloc() is called when ENOSPC is returned while
+ * attempting to allocate blocks.  If there's an indication that a pending
+ * journal transaction might free some space and allow another attempt to
+ * succeed, this function will wait for the current or committing transaction
+ * to complete and then return TRUE.
  */
 int ext4_should_retry_alloc(struct super_block *sb, int *retries)
 {
-       if (!ext4_has_free_clusters(EXT4_SB(sb), 1, 0) ||
-           (*retries)++ > 1 ||
-           !EXT4_SB(sb)->s_journal)
+       struct ext4_sb_info *sbi = EXT4_SB(sb);
+
+       if (!sbi->s_journal)
                return 0;
 
-       smp_mb();
-       if (EXT4_SB(sb)->s_mb_free_pending == 0)
+       if (++(*retries) > 3) {
+               percpu_counter_inc(&sbi->s_sra_exceeded_retry_limit);
                return 0;
+       }
 
+       /*
+        * if there's no indication that blocks are about to be freed it's
+        * possible we just missed a transaction commit that did so
+        */
+       smp_mb();
+       if (sbi->s_mb_free_pending == 0)
+               return ext4_has_free_clusters(sbi, 1, 0);
+
+       /*
+        * it's possible we've just missed a transaction commit here,
+        * so ignore the returned status
+        */
        jbd_debug(1, "%s: retrying operation after ENOSPC\n", sb->s_id);
-       jbd2_journal_force_commit_nested(EXT4_SB(sb)->s_journal);
+       (void) jbd2_journal_force_commit_nested(sbi->s_journal);
        return 1;
 }
 
index 2866d24..826a56e 100644 (file)
@@ -1484,6 +1484,7 @@ struct ext4_sb_info {
        struct percpu_counter s_freeinodes_counter;
        struct percpu_counter s_dirs_counter;
        struct percpu_counter s_dirtyclusters_counter;
+       struct percpu_counter s_sra_exceeded_retry_limit;
        struct blockgroup_lock *s_blockgroup_lock;
        struct proc_dir_entry *s_proc;
        struct kobject s_kobj;
@@ -2755,18 +2756,19 @@ extern int ext4fs_dirhash(const struct inode *dir, const char *name, int len,
 
 /* ialloc.c */
 extern int ext4_mark_inode_used(struct super_block *sb, int ino);
-extern struct inode *__ext4_new_inode(handle_t *, struct inode *, umode_t,
+extern struct inode *__ext4_new_inode(struct user_namespace *, handle_t *,
+                                     struct inode *, umode_t,
                                      const struct qstr *qstr, __u32 goal,
                                      uid_t *owner, __u32 i_flags,
                                      int handle_type, unsigned int line_no,
                                      int nblocks);
 
-#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags) \
-       __ext4_new_inode((handle), (dir), (mode), (qstr), (goal), (owner), \
-                        i_flags, 0, 0, 0)
-#define ext4_new_inode_start_handle(dir, mode, qstr, goal, owner, \
+#define ext4_new_inode(handle, dir, mode, qstr, goal, owner, i_flags)          \
+       __ext4_new_inode(&init_user_ns, (handle), (dir), (mode), (qstr),       \
+                        (goal), (owner), i_flags, 0, 0, 0)
+#define ext4_new_inode_start_handle(mnt_userns, dir, mode, qstr, goal, owner, \
                                    type, nblocks)                  \
-       __ext4_new_inode(NULL, (dir), (mode), (qstr), (goal), (owner), \
+       __ext4_new_inode((mnt_userns), NULL, (dir), (mode), (qstr), (goal), (owner), \
                         0, (type), __LINE__, (nblocks))
 
 
@@ -2792,6 +2794,8 @@ void __ext4_fc_track_link(handle_t *handle, struct inode *inode,
        struct dentry *dentry);
 void ext4_fc_track_unlink(handle_t *handle, struct dentry *dentry);
 void ext4_fc_track_link(handle_t *handle, struct dentry *dentry);
+void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+                           struct dentry *dentry);
 void ext4_fc_track_create(handle_t *handle, struct dentry *dentry);
 void ext4_fc_track_inode(handle_t *handle, struct inode *inode);
 void ext4_fc_mark_ineligible(struct super_block *sb, int reason);
@@ -2877,11 +2881,14 @@ extern struct inode *__ext4_iget(struct super_block *sb, unsigned long ino,
        __ext4_iget((sb), (ino), (flags), __func__, __LINE__)
 
 extern int  ext4_write_inode(struct inode *, struct writeback_control *);
-extern int  ext4_setattr(struct dentry *, struct iattr *);
-extern int  ext4_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int  ext4_setattr(struct user_namespace *, struct dentry *,
+                        struct iattr *);
+extern int  ext4_getattr(struct user_namespace *, const struct path *,
+                        struct kstat *, u32, unsigned int);
 extern void ext4_evict_inode(struct inode *);
 extern void ext4_clear_inode(struct inode *);
-extern int  ext4_file_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int  ext4_file_getattr(struct user_namespace *, const struct path *,
+                             struct kstat *, u32, unsigned int);
 extern int  ext4_sync_inode(handle_t *, struct inode *);
 extern void ext4_dirty_inode(struct inode *, int);
 extern int ext4_change_inode_journal_flag(struct inode *, int);
index 3960b7e..77c84d6 100644 (file)
@@ -4382,8 +4382,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
 {
        struct inode *inode = file_inode(file);
        handle_t *handle;
-       int ret = 0;
-       int ret2 = 0, ret3 = 0;
+       int ret = 0, ret2 = 0, ret3 = 0;
        int retries = 0;
        int depth = 0;
        struct ext4_map_blocks map;
@@ -4408,7 +4407,7 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
        depth = ext_depth(inode);
 
 retry:
-       while (ret >= 0 && len) {
+       while (len) {
                /*
                 * Recalculate credits when extent tree depth changes.
                 */
@@ -4430,9 +4429,13 @@ retry:
                                   inode->i_ino, map.m_lblk,
                                   map.m_len, ret);
                        ext4_mark_inode_dirty(handle, inode);
-                       ret2 = ext4_journal_stop(handle);
+                       ext4_journal_stop(handle);
                        break;
                }
+               /*
+                * allow a full retry cycle for any remaining allocations
+                */
+               retries = 0;
                map.m_lblk += ret;
                map.m_len = len = len - ret;
                epos = (loff_t)map.m_lblk << inode->i_blkbits;
@@ -4450,11 +4453,8 @@ retry:
                if (unlikely(ret2))
                        break;
        }
-       if (ret == -ENOSPC &&
-                       ext4_should_retry_alloc(inode->i_sb, &retries)) {
-               ret = 0;
+       if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
                goto retry;
-       }
 
        return ret > 0 ? ret2 : ret;
 }
index 6e8208a..7541d0b 100644 (file)
@@ -513,10 +513,10 @@ void ext4_fc_track_link(handle_t *handle, struct dentry *dentry)
        __ext4_fc_track_link(handle, d_inode(dentry), dentry);
 }
 
-void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
+void __ext4_fc_track_create(handle_t *handle, struct inode *inode,
+                         struct dentry *dentry)
 {
        struct __track_dentry_update_args args;
-       struct inode *inode = d_inode(dentry);
        int ret;
 
        args.dentry = dentry;
@@ -527,6 +527,11 @@ void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
        trace_ext4_fc_track_create(inode, dentry, ret);
 }
 
+void ext4_fc_track_create(handle_t *handle, struct dentry *dentry)
+{
+       __ext4_fc_track_create(handle, d_inode(dentry), dentry);
+}
+
 /* __track_fn for inode tracking */
 static int __track_inode(struct inode *inode, void *arg, bool update)
 {
@@ -915,13 +920,11 @@ static int ext4_fc_submit_inode_data_all(journal_t *journal)
        struct super_block *sb = (struct super_block *)(journal->j_private);
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_inode_info *ei;
-       struct list_head *pos;
        int ret = 0;
 
        spin_lock(&sbi->s_fc_lock);
        ext4_set_mount_flag(sb, EXT4_MF_FC_COMMITTING);
-       list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
-               ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
+       list_for_each_entry(ei, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
                ext4_set_inode_state(&ei->vfs_inode, EXT4_STATE_FC_COMMITTING);
                while (atomic_read(&ei->i_fc_updates)) {
                        DEFINE_WAIT(wait);
@@ -978,17 +981,15 @@ __releases(&sbi->s_fc_lock)
 {
        struct super_block *sb = (struct super_block *)(journal->j_private);
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_fc_dentry_update *fc_dentry;
+       struct ext4_fc_dentry_update *fc_dentry, *fc_dentry_n;
        struct inode *inode;
-       struct list_head *pos, *n, *fcd_pos, *fcd_n;
-       struct ext4_inode_info *ei;
+       struct ext4_inode_info *ei, *ei_n;
        int ret;
 
        if (list_empty(&sbi->s_fc_dentry_q[FC_Q_MAIN]))
                return 0;
-       list_for_each_safe(fcd_pos, fcd_n, &sbi->s_fc_dentry_q[FC_Q_MAIN]) {
-               fc_dentry = list_entry(fcd_pos, struct ext4_fc_dentry_update,
-                                       fcd_list);
+       list_for_each_entry_safe(fc_dentry, fc_dentry_n,
+                                &sbi->s_fc_dentry_q[FC_Q_MAIN], fcd_list) {
                if (fc_dentry->fcd_op != EXT4_FC_TAG_CREAT) {
                        spin_unlock(&sbi->s_fc_lock);
                        if (!ext4_fc_add_dentry_tlv(
@@ -1004,8 +1005,8 @@ __releases(&sbi->s_fc_lock)
                }
 
                inode = NULL;
-               list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
-                       ei = list_entry(pos, struct ext4_inode_info, i_fc_list);
+               list_for_each_entry_safe(ei, ei_n, &sbi->s_fc_q[FC_Q_MAIN],
+                                        i_fc_list) {
                        if (ei->vfs_inode.i_ino == fc_dentry->fcd_ino) {
                                inode = &ei->vfs_inode;
                                break;
@@ -1057,7 +1058,6 @@ static int ext4_fc_perform_commit(journal_t *journal)
        struct ext4_sb_info *sbi = EXT4_SB(sb);
        struct ext4_inode_info *iter;
        struct ext4_fc_head head;
-       struct list_head *pos;
        struct inode *inode;
        struct blk_plug plug;
        int ret = 0;
@@ -1099,8 +1099,7 @@ static int ext4_fc_perform_commit(journal_t *journal)
                goto out;
        }
 
-       list_for_each(pos, &sbi->s_fc_q[FC_Q_MAIN]) {
-               iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
+       list_for_each_entry(iter, &sbi->s_fc_q[FC_Q_MAIN], i_fc_list) {
                inode = &iter->vfs_inode;
                if (!ext4_test_inode_state(inode, EXT4_STATE_FC_COMMITTING))
                        continue;
@@ -1226,9 +1225,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
 {
        struct super_block *sb = journal->j_private;
        struct ext4_sb_info *sbi = EXT4_SB(sb);
-       struct ext4_inode_info *iter;
+       struct ext4_inode_info *iter, *iter_n;
        struct ext4_fc_dentry_update *fc_dentry;
-       struct list_head *pos, *n;
 
        if (full && sbi->s_fc_bh)
                sbi->s_fc_bh = NULL;
@@ -1236,8 +1234,8 @@ static void ext4_fc_cleanup(journal_t *journal, int full)
        jbd2_fc_release_bufs(journal);
 
        spin_lock(&sbi->s_fc_lock);
-       list_for_each_safe(pos, n, &sbi->s_fc_q[FC_Q_MAIN]) {
-               iter = list_entry(pos, struct ext4_inode_info, i_fc_list);
+       list_for_each_entry_safe(iter, iter_n, &sbi->s_fc_q[FC_Q_MAIN],
+                                i_fc_list) {
                list_del_init(&iter->i_fc_list);
                ext4_clear_inode_state(&iter->vfs_inode,
                                       EXT4_STATE_FC_COMMITTING);
index 20f2fcb..633ae7b 100644 (file)
@@ -919,7 +919,8 @@ static int ext4_xattr_credits_for_new_inode(struct inode *dir, mode_t mode,
  * For other inodes, search forward from the parent directory's block
  * group to find a free inode.
  */
-struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
+struct inode *__ext4_new_inode(struct user_namespace *mnt_userns,
+                              handle_t *handle, struct inode *dir,
                               umode_t mode, const struct qstr *qstr,
                               __u32 goal, uid_t *owner, __u32 i_flags,
                               int handle_type, unsigned int line_no,
@@ -969,10 +970,10 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
                i_gid_write(inode, owner[1]);
        } else if (test_opt(sb, GRPID)) {
                inode->i_mode = mode;
-               inode->i_uid = current_fsuid();
+               inode->i_uid = fsuid_into_mnt(mnt_userns);
                inode->i_gid = dir->i_gid;
        } else
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(mnt_userns, inode, dir, mode);
 
        if (ext4_has_feature_project(sb) &&
            ext4_test_inode_flag(dir, EXT4_INODE_PROJINHERIT))
index de79052..0948a43 100644 (file)
@@ -20,6 +20,7 @@
  */
 
 #include <linux/fs.h>
+#include <linux/mount.h>
 #include <linux/time.h>
 #include <linux/highuid.h>
 #include <linux/pagemap.h>
@@ -1937,13 +1938,13 @@ static int __ext4_journalled_writepage(struct page *page,
        if (!ret)
                ret = err;
 
-       if (!ext4_has_inline_data(inode))
-               ext4_walk_page_buffers(NULL, page_bufs, 0, len,
-                                      NULL, bput_one);
        ext4_set_inode_state(inode, EXT4_STATE_JDATA);
 out:
        unlock_page(page);
 out_no_pagelock:
+       if (!inline_data && page_bufs)
+               ext4_walk_page_buffers(NULL, page_bufs, 0, len,
+                                      NULL, bput_one);
        brelse(inode_bh);
        return ret;
 }
@@ -5025,7 +5026,7 @@ static int ext4_do_update_inode(handle_t *handle,
        struct ext4_inode_info *ei = EXT4_I(inode);
        struct buffer_head *bh = iloc->bh;
        struct super_block *sb = inode->i_sb;
-       int err = 0, rc, block;
+       int err = 0, block;
        int need_datasync = 0, set_large_file = 0;
        uid_t i_uid;
        gid_t i_gid;
@@ -5137,9 +5138,9 @@ static int ext4_do_update_inode(handle_t *handle,
                                              bh->b_data);
 
        BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
-       rc = ext4_handle_dirty_metadata(handle, NULL, bh);
-       if (!err)
-               err = rc;
+       err = ext4_handle_dirty_metadata(handle, NULL, bh);
+       if (err)
+               goto out_brelse;
        ext4_clear_inode_state(inode, EXT4_STATE_NEW);
        if (set_large_file) {
                BUFFER_TRACE(EXT4_SB(sb)->s_sbh, "get write access");
@@ -5315,7 +5316,8 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
  *
  * Called with inode->i_mutex down.
  */
-int ext4_setattr(struct dentry *dentry, struct iattr *attr)
+int ext4_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error, rc = 0;
@@ -5333,7 +5335,7 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                                  ATTR_GID | ATTR_TIMES_SET))))
                return -EPERM;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(mnt_userns, dentry, attr);
        if (error)
                return error;
 
@@ -5385,8 +5387,10 @@ int ext4_setattr(struct dentry *dentry, struct iattr *attr)
                        inode->i_gid = attr->ia_gid;
                error = ext4_mark_inode_dirty(handle, inode);
                ext4_journal_stop(handle);
-               if (unlikely(error))
+               if (unlikely(error)) {
+                       ext4_fc_stop_update(inode);
                        return error;
+               }
        }
 
        if (attr->ia_valid & ATTR_SIZE) {
@@ -5508,7 +5512,7 @@ out_mmap_sem:
        }
 
        if (!error) {
-               setattr_copy(inode, attr);
+               setattr_copy(mnt_userns, inode, attr);
                mark_inode_dirty(inode);
        }
 
@@ -5520,7 +5524,7 @@ out_mmap_sem:
                ext4_orphan_del(NULL, inode);
 
        if (!error && (ia_valid & ATTR_MODE))
-               rc = posix_acl_chmod(inode, inode->i_mode);
+               rc = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
 
 err_out:
        if  (error)
@@ -5531,8 +5535,8 @@ err_out:
        return error;
 }
 
-int ext4_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int query_flags)
+int ext4_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct ext4_inode *raw_inode;
@@ -5567,17 +5571,18 @@ int ext4_getattr(const struct path *path, struct kstat *stat,
                                  STATX_ATTR_NODUMP |
                                  STATX_ATTR_VERITY);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(mnt_userns, inode, stat);
        return 0;
 }
 
-int ext4_file_getattr(const struct path *path, struct kstat *stat,
+int ext4_file_getattr(struct user_namespace *mnt_userns,
+                     const struct path *path, struct kstat *stat,
                      u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        u64 delalloc_blocks;
 
-       ext4_getattr(path, stat, request_mask, query_flags);
+       ext4_getattr(mnt_userns, path, stat, request_mask, query_flags);
 
        /*
         * If there is inline data in the inode, the inode will normally not
index 713b1ae..a2cf350 100644 (file)
@@ -107,10 +107,12 @@ void ext4_reset_inode_seed(struct inode *inode)
  * important fields of the inodes.
  *
  * @sb:         the super block of the filesystem
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode:      the inode to swap with EXT4_BOOT_LOADER_INO
  *
  */
 static long swap_inode_boot_loader(struct super_block *sb,
+                               struct user_namespace *mnt_userns,
                                struct inode *inode)
 {
        handle_t *handle;
@@ -139,7 +141,8 @@ static long swap_inode_boot_loader(struct super_block *sb,
        }
 
        if (IS_RDONLY(inode) || IS_APPEND(inode) || IS_IMMUTABLE(inode) ||
-           !inode_owner_or_capable(inode) || !capable(CAP_SYS_ADMIN)) {
+           !inode_owner_or_capable(mnt_userns, inode) ||
+           !capable(CAP_SYS_ADMIN)) {
                err = -EPERM;
                goto journal_err_out;
        }
@@ -814,6 +817,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        struct inode *inode = file_inode(filp);
        struct super_block *sb = inode->i_sb;
        struct ext4_inode_info *ei = EXT4_I(inode);
+       struct user_namespace *mnt_userns = file_mnt_user_ns(filp);
        unsigned int flags;
 
        ext4_debug("cmd = %u, arg = %lu\n", cmd, arg);
@@ -829,7 +833,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        case FS_IOC_SETFLAGS: {
                int err;
 
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EACCES;
 
                if (get_user(flags, (int __user *) arg))
@@ -871,7 +875,7 @@ static long __ext4_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                __u32 generation;
                int err;
 
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EPERM;
 
                if (ext4_has_metadata_csum(inode->i_sb)) {
@@ -1010,7 +1014,7 @@ mext_out:
        case EXT4_IOC_MIGRATE:
        {
                int err;
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EACCES;
 
                err = mnt_want_write_file(filp);
@@ -1032,7 +1036,7 @@ mext_out:
        case EXT4_IOC_ALLOC_DA_BLKS:
        {
                int err;
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EACCES;
 
                err = mnt_want_write_file(filp);
@@ -1051,7 +1055,7 @@ mext_out:
                err = mnt_want_write_file(filp);
                if (err)
                        return err;
-               err = swap_inode_boot_loader(sb, inode);
+               err = swap_inode_boot_loader(sb, mnt_userns, inode);
                mnt_drop_write_file(filp);
                return err;
        }
@@ -1217,7 +1221,7 @@ resizefs_out:
 
        case EXT4_IOC_CLEAR_ES_CACHE:
        {
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EACCES;
                ext4_clear_inode_es(inode);
                return 0;
@@ -1263,7 +1267,7 @@ resizefs_out:
                        return -EFAULT;
 
                /* Make sure caller has proper permission */
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(mnt_userns, inode))
                        return -EACCES;
 
                if (fa.fsx_xflags & ~EXT4_SUPPORTED_FS_XFLAGS)
index 99bf091..a02fadf 100644 (file)
@@ -2709,8 +2709,15 @@ static int ext4_mb_init_backend(struct super_block *sb)
        }
 
        if (ext4_has_feature_flex_bg(sb)) {
-               /* a single flex group is supposed to be read by a single IO */
-               sbi->s_mb_prefetch = min(1 << sbi->s_es->s_log_groups_per_flex,
+               /* a single flex group is supposed to be read by a single IO.
+                * 2 ^ s_log_groups_per_flex != UINT_MAX as s_mb_prefetch is
+                * unsigned integer, so the maximum shift is 32.
+                */
+               if (sbi->s_es->s_log_groups_per_flex >= 32) {
+                       ext4_msg(sb, KERN_ERR, "too many log groups per flexible block group");
+                       goto err_freesgi;
+               }
+               sbi->s_mb_prefetch = min_t(uint, 1 << sbi->s_es->s_log_groups_per_flex,
                        BLK_MAX_SEGMENT_SIZE >> (sb->s_blocksize_bits - 9));
                sbi->s_mb_prefetch *= 8; /* 8 prefetch IOs in flight at most */
        } else {
index cf652ba..883e2a7 100644 (file)
@@ -731,6 +731,29 @@ struct stats dx_show_entries(struct dx_hash_info *hinfo, struct inode *dir,
                       (space/bcount)*100/blocksize);
        return (struct stats) { names, space, bcount};
 }
+
+/*
+ * Linear search cross check
+ */
+static inline void htree_rep_invariant_check(struct dx_entry *at,
+                                            struct dx_entry *target,
+                                            u32 hash, unsigned int n)
+{
+       while (n--) {
+               dxtrace(printk(KERN_CONT ","));
+               if (dx_get_hash(++at) > hash) {
+                       at--;
+                       break;
+               }
+       }
+       ASSERT(at == target - 1);
+}
+#else /* DX_DEBUG */
+static inline void htree_rep_invariant_check(struct dx_entry *at,
+                                            struct dx_entry *target,
+                                            u32 hash, unsigned int n)
+{
+}
 #endif /* DX_DEBUG */
 
 /*
@@ -827,20 +850,7 @@ dx_probe(struct ext4_filename *fname, struct inode *dir,
                                p = m + 1;
                }
 
-               if (0) { // linear search cross check
-                       unsigned n = count - 1;
-                       at = entries;
-                       while (n--)
-                       {
-                               dxtrace(printk(KERN_CONT ","));
-                               if (dx_get_hash(++at) > hash)
-                               {
-                                       at--;
-                                       break;
-                               }
-                       }
-                       ASSERT(at == p - 1);
-               }
+               htree_rep_invariant_check(entries, p, hash, count - 1);
 
                at = p - 1;
                dxtrace(printk(KERN_CONT " %x->%u\n",
@@ -2401,11 +2411,10 @@ again:
                                                   (frame - 1)->bh);
                        if (err)
                                goto journal_error;
-                       if (restart) {
-                               err = ext4_handle_dirty_dx_node(handle, dir,
-                                                          frame->bh);
+                       err = ext4_handle_dirty_dx_node(handle, dir,
+                                                       frame->bh);
+                       if (err)
                                goto journal_error;
-                       }
                } else {
                        struct dx_root *dxroot;
                        memcpy((char *) entries2, (char *) entries,
@@ -2596,8 +2605,8 @@ static int ext4_add_nondir(handle_t *handle,
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      bool excl)
+static int ext4_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
        handle_t *handle;
        struct inode *inode;
@@ -2610,8 +2619,8 @@ static int ext4_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
 retry:
-       inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
-                                           NULL, EXT4_HT_DIR, credits);
+       inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+                                           0, NULL, EXT4_HT_DIR, credits);
        handle = ext4_journal_current_handle();
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
@@ -2631,8 +2640,8 @@ retry:
        return err;
 }
 
-static int ext4_mknod(struct inode *dir, struct dentry *dentry,
-                     umode_t mode, dev_t rdev)
+static int ext4_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        handle_t *handle;
        struct inode *inode;
@@ -2645,8 +2654,8 @@ static int ext4_mknod(struct inode *dir, struct dentry *dentry,
        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
 retry:
-       inode = ext4_new_inode_start_handle(dir, mode, &dentry->d_name, 0,
-                                           NULL, EXT4_HT_DIR, credits);
+       inode = ext4_new_inode_start_handle(mnt_userns, dir, mode, &dentry->d_name,
+                                           0, NULL, EXT4_HT_DIR, credits);
        handle = ext4_journal_current_handle();
        err = PTR_ERR(inode);
        if (!IS_ERR(inode)) {
@@ -2665,7 +2674,8 @@ retry:
        return err;
 }
 
-static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext4_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode)
 {
        handle_t *handle;
        struct inode *inode;
@@ -2676,7 +2686,7 @@ static int ext4_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
                return err;
 
 retry:
-       inode = ext4_new_inode_start_handle(dir, mode,
+       inode = ext4_new_inode_start_handle(mnt_userns, dir, mode,
                                            NULL, 0, NULL,
                                            EXT4_HT_DIR,
                        EXT4_MAXQUOTAS_INIT_BLOCKS(dir->i_sb) +
@@ -2774,7 +2784,8 @@ out:
        return err;
 }
 
-static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ext4_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        handle_t *handle;
        struct inode *inode;
@@ -2790,7 +2801,7 @@ static int ext4_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        credits = (EXT4_DATA_TRANS_BLOCKS(dir->i_sb) +
                   EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3);
 retry:
-       inode = ext4_new_inode_start_handle(dir, S_IFDIR | mode,
+       inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFDIR | mode,
                                            &dentry->d_name,
                                            0, NULL, EXT4_HT_DIR, credits);
        handle = ext4_journal_current_handle();
@@ -3292,7 +3303,7 @@ out_trace:
        return retval;
 }
 
-static int ext4_symlink(struct inode *dir,
+static int ext4_symlink(struct user_namespace *mnt_userns, struct inode *dir,
                        struct dentry *dentry, const char *symname)
 {
        handle_t *handle;
@@ -3333,7 +3344,7 @@ static int ext4_symlink(struct inode *dir,
                          EXT4_INDEX_EXTRA_TRANS_BLOCKS + 3;
        }
 
-       inode = ext4_new_inode_start_handle(dir, S_IFLNK|S_IRWXUGO,
+       inode = ext4_new_inode_start_handle(mnt_userns, dir, S_IFLNK|S_IRWXUGO,
                                            &dentry->d_name, 0, NULL,
                                            EXT4_HT_DIR, credits);
        handle = ext4_journal_current_handle();
@@ -3602,6 +3613,31 @@ static int ext4_setent(handle_t *handle, struct ext4_renament *ent,
        return retval;
 }
 
+static void ext4_resetent(handle_t *handle, struct ext4_renament *ent,
+                         unsigned ino, unsigned file_type)
+{
+       struct ext4_renament old = *ent;
+       int retval = 0;
+
+       /*
+        * old->de could have moved from under us during make indexed dir,
+        * so the old->de may no longer valid and need to find it again
+        * before reset old inode info.
+        */
+       old.bh = ext4_find_entry(old.dir, &old.dentry->d_name, &old.de, NULL);
+       if (IS_ERR(old.bh))
+               retval = PTR_ERR(old.bh);
+       if (!old.bh)
+               retval = -ENOENT;
+       if (retval) {
+               ext4_std_error(old.dir->i_sb, retval);
+               return;
+       }
+
+       ext4_setent(handle, &old, ino, file_type);
+       brelse(old.bh);
+}
+
 static int ext4_find_delete_entry(handle_t *handle, struct inode *dir,
                                  const struct qstr *d_name)
 {
@@ -3662,7 +3698,8 @@ static void ext4_update_dir_count(handle_t *handle, struct ext4_renament *ent)
        }
 }
 
-static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
+static struct inode *ext4_whiteout_for_rename(struct user_namespace *mnt_userns,
+                                             struct ext4_renament *ent,
                                              int credits, handle_t **h)
 {
        struct inode *wh;
@@ -3676,7 +3713,8 @@ static struct inode *ext4_whiteout_for_rename(struct ext4_renament *ent,
        credits += (EXT4_MAXQUOTAS_TRANS_BLOCKS(ent->dir->i_sb) +
                    EXT4_XATTR_TRANS_BLOCKS + 4);
 retry:
-       wh = ext4_new_inode_start_handle(ent->dir, S_IFCHR | WHITEOUT_MODE,
+       wh = ext4_new_inode_start_handle(mnt_userns, ent->dir,
+                                        S_IFCHR | WHITEOUT_MODE,
                                         &ent->dentry->d_name, 0, NULL,
                                         EXT4_HT_DIR, credits);
 
@@ -3703,9 +3741,9 @@ retry:
  * while new_{dentry,inode) refers to the destination dentry/inode
  * This comes from rename(const char *oldpath, const char *newpath)
  */
-static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
-                      struct inode *new_dir, struct dentry *new_dentry,
-                      unsigned int flags)
+static int ext4_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        handle_t *handle = NULL;
        struct ext4_renament old = {
@@ -3761,14 +3799,14 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
         */
        retval = -ENOENT;
        if (!old.bh || le32_to_cpu(old.de->inode) != old.inode->i_ino)
-               goto end_rename;
+               goto release_bh;
 
        new.bh = ext4_find_entry(new.dir, &new.dentry->d_name,
                                 &new.de, &new.inlined);
        if (IS_ERR(new.bh)) {
                retval = PTR_ERR(new.bh);
                new.bh = NULL;
-               goto end_rename;
+               goto release_bh;
        }
        if (new.bh) {
                if (!new.inode) {
@@ -3785,15 +3823,13 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                handle = ext4_journal_start(old.dir, EXT4_HT_DIR, credits);
                if (IS_ERR(handle)) {
                        retval = PTR_ERR(handle);
-                       handle = NULL;
-                       goto end_rename;
+                       goto release_bh;
                }
        } else {
-               whiteout = ext4_whiteout_for_rename(&old, credits, &handle);
+               whiteout = ext4_whiteout_for_rename(mnt_userns, &old, credits, &handle);
                if (IS_ERR(whiteout)) {
                        retval = PTR_ERR(whiteout);
-                       whiteout = NULL;
-                       goto end_rename;
+                       goto release_bh;
                }
        }
 
@@ -3837,6 +3873,7 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                retval = ext4_mark_inode_dirty(handle, whiteout);
                if (unlikely(retval))
                        goto end_rename;
+
        }
        if (!new.bh) {
                retval = ext4_add_entry(handle, new.dentry, old.inode);
@@ -3910,6 +3947,8 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
                        ext4_fc_track_unlink(handle, new.dentry);
                __ext4_fc_track_link(handle, old.inode, new.dentry);
                __ext4_fc_track_unlink(handle, old.inode, old.dentry);
+               if (whiteout)
+                       __ext4_fc_track_create(handle, whiteout, old.dentry);
        }
 
        if (new.inode) {
@@ -3924,19 +3963,21 @@ static int ext4_rename(struct inode *old_dir, struct dentry *old_dentry,
 end_rename:
        if (whiteout) {
                if (retval) {
-                       ext4_setent(handle, &old,
-                               old.inode->i_ino, old_file_type);
+                       ext4_resetent(handle, &old,
+                                     old.inode->i_ino, old_file_type);
                        drop_nlink(whiteout);
+                       ext4_orphan_add(handle, whiteout);
                }
                unlock_new_inode(whiteout);
+               ext4_journal_stop(handle);
                iput(whiteout);
-
+       } else {
+               ext4_journal_stop(handle);
        }
+release_bh:
        brelse(old.dir_bh);
        brelse(old.bh);
        brelse(new.bh);
-       if (handle)
-               ext4_journal_stop(handle);
        return retval;
 }
 
@@ -4085,7 +4126,8 @@ end_rename:
        return retval;
 }
 
-static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int ext4_rename2(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
@@ -4107,7 +4149,7 @@ static int ext4_rename2(struct inode *old_dir, struct dentry *old_dentry,
                                         new_dir, new_dentry);
        }
 
-       return ext4_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
+       return ext4_rename(mnt_userns, old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
 /*
index 03a44a0..f038d57 100644 (file)
@@ -398,7 +398,7 @@ static void io_submit_init_bio(struct ext4_io_submit *io,
         * bio_alloc will _always_ be able to allocate a bio if
         * __GFP_DIRECT_RECLAIM is set, see comments for bio_alloc_bioset().
         */
-       bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+       bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
        fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO);
        bio->bi_iter.bi_sector = bh->b_blocknr * (bh->b_size >> 9);
        bio_set_dev(bio, bh->b_bdev);
index f014c5e..3db9234 100644 (file)
@@ -371,8 +371,7 @@ int ext4_mpage_readpages(struct inode *inode,
                         * bio_alloc will _always_ be able to allocate a bio if
                         * __GFP_DIRECT_RECLAIM is set, see bio_alloc_bioset().
                         */
-                       bio = bio_alloc(GFP_KERNEL,
-                               min_t(int, nr_pages, BIO_MAX_PAGES));
+                       bio = bio_alloc(GFP_KERNEL, bio_max_segs(nr_pages));
                        fscrypt_set_bio_crypt_ctx(bio, inode, next_block,
                                                  GFP_KERNEL);
                        ext4_set_bio_post_read_ctx(bio, inode, page->index);
index fb59851..b969368 100644 (file)
@@ -59,7 +59,7 @@
 #include <trace/events/ext4.h>
 
 static struct ext4_lazy_init *ext4_li_info;
-static struct mutex ext4_li_mtx;
+static DEFINE_MUTEX(ext4_li_mtx);
 static struct ratelimit_state ext4_mount_msg_ratelimit;
 
 static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
@@ -1210,6 +1210,7 @@ static void ext4_put_super(struct super_block *sb)
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+       percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
        percpu_free_rwsem(&sbi->s_writepages_rwsem);
 #ifdef CONFIG_QUOTA
        for (i = 0; i < EXT4_MAXQUOTAS; i++)
@@ -4875,7 +4876,6 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 
        set_task_ioprio(sbi->s_journal->j_task, journal_ioprio);
 
-       sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
        sbi->s_journal->j_submit_inode_data_buffers =
                ext4_journal_submit_inode_data_buffers;
        sbi->s_journal->j_finish_inode_data_buffers =
@@ -4987,6 +4987,14 @@ no_journal:
                goto failed_mount5;
        }
 
+       /*
+        * We can only set up the journal commit callback once
+        * mballoc is initialized
+        */
+       if (sbi->s_journal)
+               sbi->s_journal->j_commit_callback =
+                       ext4_journal_commit_callback;
+
        block = ext4_count_free_clusters(sb);
        ext4_free_blocks_count_set(sbi->s_es, 
                                   EXT4_C2B(sbi, block));
@@ -5005,6 +5013,9 @@ no_journal:
                err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
                                          GFP_KERNEL);
        if (!err)
+               err = percpu_counter_init(&sbi->s_sra_exceeded_retry_limit, 0,
+                                         GFP_KERNEL);
+       if (!err)
                err = percpu_init_rwsem(&sbi->s_writepages_rwsem);
 
        if (err) {
@@ -5117,6 +5128,7 @@ failed_mount6:
        percpu_counter_destroy(&sbi->s_freeinodes_counter);
        percpu_counter_destroy(&sbi->s_dirs_counter);
        percpu_counter_destroy(&sbi->s_dirtyclusters_counter);
+       percpu_counter_destroy(&sbi->s_sra_exceeded_retry_limit);
        percpu_free_rwsem(&sbi->s_writepages_rwsem);
 failed_mount5:
        ext4_ext_release(sb);
@@ -5142,8 +5154,8 @@ failed_mount_wq:
 failed_mount3a:
        ext4_es_unregister_shrinker(sbi);
 failed_mount3:
-       del_timer_sync(&sbi->s_err_report);
        flush_work(&sbi->s_error_work);
+       del_timer_sync(&sbi->s_err_report);
        if (sbi->s_mmp_tsk)
                kthread_stop(sbi->s_mmp_tsk);
 failed_mount2:
@@ -6654,7 +6666,7 @@ static struct file_system_type ext4_fs_type = {
        .name           = "ext4",
        .mount          = ext4_mount,
        .kill_sb        = kill_block_super,
-       .fs_flags       = FS_REQUIRES_DEV,
+       .fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("ext4");
 
@@ -6667,7 +6679,6 @@ static int __init ext4_init_fs(void)
 
        ratelimit_state_init(&ext4_mount_msg_ratelimit, 30 * HZ, 64);
        ext4_li_info = NULL;
-       mutex_init(&ext4_li_mtx);
 
        /* Build-time check for flags consistency */
        ext4_check_flag_values();
index 075aa3a..a3d0827 100644 (file)
@@ -24,6 +24,7 @@ typedef enum {
        attr_session_write_kbytes,
        attr_lifetime_write_kbytes,
        attr_reserved_clusters,
+       attr_sra_exceeded_retry_limit,
        attr_inode_readahead,
        attr_trigger_test_error,
        attr_first_error_time,
@@ -202,6 +203,7 @@ EXT4_ATTR_FUNC(delayed_allocation_blocks, 0444);
 EXT4_ATTR_FUNC(session_write_kbytes, 0444);
 EXT4_ATTR_FUNC(lifetime_write_kbytes, 0444);
 EXT4_ATTR_FUNC(reserved_clusters, 0644);
+EXT4_ATTR_FUNC(sra_exceeded_retry_limit, 0444);
 
 EXT4_ATTR_OFFSET(inode_readahead_blks, 0644, inode_readahead,
                 ext4_sb_info, s_inode_readahead_blks);
@@ -251,6 +253,7 @@ static struct attribute *ext4_attrs[] = {
        ATTR_LIST(session_write_kbytes),
        ATTR_LIST(lifetime_write_kbytes),
        ATTR_LIST(reserved_clusters),
+       ATTR_LIST(sra_exceeded_retry_limit),
        ATTR_LIST(inode_readahead_blks),
        ATTR_LIST(inode_goal),
        ATTR_LIST(mb_stats),
@@ -374,6 +377,10 @@ static ssize_t ext4_attr_show(struct kobject *kobj,
                return snprintf(buf, PAGE_SIZE, "%llu\n",
                                (unsigned long long)
                                atomic64_read(&sbi->s_resv_clusters));
+       case attr_sra_exceeded_retry_limit:
+               return snprintf(buf, PAGE_SIZE, "%llu\n",
+                               (unsigned long long)
+                       percpu_counter_sum(&sbi->s_sra_exceeded_retry_limit));
        case attr_inode_readahead:
        case attr_pointer_ui:
                if (!ptr)
index 5b7ba8f..00e3cbd 100644 (file)
@@ -201,55 +201,76 @@ static int ext4_end_enable_verity(struct file *filp, const void *desc,
        struct inode *inode = file_inode(filp);
        const int credits = 2; /* superblock and inode for ext4_orphan_del() */
        handle_t *handle;
+       struct ext4_iloc iloc;
        int err = 0;
-       int err2;
 
-       if (desc != NULL) {
-               /* Succeeded; write the verity descriptor. */
-               err = ext4_write_verity_descriptor(inode, desc, desc_size,
-                                                  merkle_tree_size);
-
-               /* Write all pages before clearing VERITY_IN_PROGRESS. */
-               if (!err)
-                       err = filemap_write_and_wait(inode->i_mapping);
-       }
+       /*
+        * If an error already occurred (which fs/verity/ signals by passing
+        * desc == NULL), then only clean-up is needed.
+        */
+       if (desc == NULL)
+               goto cleanup;
 
-       /* If we failed, truncate anything we wrote past i_size. */
-       if (desc == NULL || err)
-               ext4_truncate(inode);
+       /* Append the verity descriptor. */
+       err = ext4_write_verity_descriptor(inode, desc, desc_size,
+                                          merkle_tree_size);
+       if (err)
+               goto cleanup;
 
        /*
-        * We must always clean up by clearing EXT4_STATE_VERITY_IN_PROGRESS and
-        * deleting the inode from the orphan list, even if something failed.
-        * If everything succeeded, we'll also set the verity bit in the same
-        * transaction.
+        * Write all pages (both data and verity metadata).  Note that this must
+        * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages
+        * beyond i_size won't be written properly.  For crash consistency, this
+        * also must happen before the verity inode flag gets persisted.
         */
+       err = filemap_write_and_wait(inode->i_mapping);
+       if (err)
+               goto cleanup;
 
-       ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+       /*
+        * Finally, set the verity inode flag and remove the inode from the
+        * orphan list (in a single transaction).
+        */
 
        handle = ext4_journal_start(inode, EXT4_HT_INODE, credits);
        if (IS_ERR(handle)) {
-               ext4_orphan_del(NULL, inode);
-               return PTR_ERR(handle);
+               err = PTR_ERR(handle);
+               goto cleanup;
        }
 
-       err2 = ext4_orphan_del(handle, inode);
-       if (err2)
-               goto out_stop;
+       err = ext4_orphan_del(handle, inode);
+       if (err)
+               goto stop_and_cleanup;
 
-       if (desc != NULL && !err) {
-               struct ext4_iloc iloc;
+       err = ext4_reserve_inode_write(handle, inode, &iloc);
+       if (err)
+               goto stop_and_cleanup;
 
-               err = ext4_reserve_inode_write(handle, inode, &iloc);
-               if (err)
-                       goto out_stop;
-               ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
-               ext4_set_inode_flags(inode, false);
-               err = ext4_mark_iloc_dirty(handle, inode, &iloc);
-       }
-out_stop:
+       ext4_set_inode_flag(inode, EXT4_INODE_VERITY);
+       ext4_set_inode_flags(inode, false);
+       err = ext4_mark_iloc_dirty(handle, inode, &iloc);
+       if (err)
+               goto stop_and_cleanup;
+
+       ext4_journal_stop(handle);
+
+       ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+       return 0;
+
+stop_and_cleanup:
        ext4_journal_stop(handle);
-       return err ?: err2;
+cleanup:
+       /*
+        * Verity failed to be enabled, so clean up by truncating any verity
+        * metadata that was written beyond i_size (both from cache and from
+        * disk), removing the inode from the orphan list (if it wasn't done
+        * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS.
+        */
+       truncate_inode_pages(inode->i_mapping, inode->i_size);
+       ext4_truncate(inode);
+       ext4_orphan_del(NULL, inode);
+       ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS);
+       return err;
 }
 
 static int ext4_get_verity_descriptor_location(struct inode *inode,
index 3722085..6c10182 100644 (file)
@@ -1462,6 +1462,9 @@ ext4_xattr_inode_cache_find(struct inode *inode, const void *value,
        if (!ce)
                return NULL;
 
+       WARN_ON_ONCE(ext4_handle_valid(journal_current_handle()) &&
+                    !(current->flags & PF_MEMALLOC_NOFS));
+
        ea_data = kvmalloc(value_len, GFP_KERNEL);
        if (!ea_data) {
                mb_cache_entry_put(ea_inode_cache, ce);
@@ -2327,6 +2330,7 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
                        error = -ENOSPC;
                        goto cleanup;
                }
+               WARN_ON_ONCE(!(current->flags & PF_MEMALLOC_NOFS));
        }
 
        error = ext4_reserve_inode_write(handle, inode, &is.iloc);
@@ -2400,7 +2404,7 @@ retry_inode:
                                 * external inode if possible.
                                 */
                                if (ext4_has_feature_ea_inode(inode->i_sb) &&
-                                   !i.in_inode) {
+                                   i.value_len && !i.in_inode) {
                                        i.in_inode = 1;
                                        goto retry_inode;
                                }
index 8cfa74a..c78df57 100644 (file)
@@ -32,6 +32,7 @@ ext4_xattr_hurd_get(const struct xattr_handler *handler,
 
 static int
 ext4_xattr_hurd_set(const struct xattr_handler *handler,
+                   struct user_namespace *mnt_userns,
                    struct dentry *unused, struct inode *inode,
                    const char *name, const void *value,
                    size_t size, int flags)
index 197a9d8..8213f66 100644 (file)
@@ -23,6 +23,7 @@ ext4_xattr_security_get(const struct xattr_handler *handler,
 
 static int
 ext4_xattr_security_set(const struct xattr_handler *handler,
+                       struct user_namespace *mnt_userns,
                        struct dentry *unused, struct inode *inode,
                        const char *name, const void *value,
                        size_t size, int flags)
index e9389e5..7c21ffb 100644 (file)
@@ -30,6 +30,7 @@ ext4_xattr_trusted_get(const struct xattr_handler *handler,
 
 static int
 ext4_xattr_trusted_set(const struct xattr_handler *handler,
+                      struct user_namespace *mnt_userns,
                       struct dentry *unused, struct inode *inode,
                       const char *name, const void *value,
                       size_t size, int flags)
index d454618..2fe7ff0 100644 (file)
@@ -31,6 +31,7 @@ ext4_xattr_user_get(const struct xattr_handler *handler,
 
 static int
 ext4_xattr_user_set(const struct xattr_handler *handler,
+                   struct user_namespace *mnt_userns,
                    struct dentry *unused, struct inode *inode,
                    const char *name, const void *value,
                    size_t size, int flags)
index 732ec10..965037a 100644 (file)
@@ -214,8 +214,8 @@ static int f2fs_acl_update_mode(struct inode *inode, umode_t *mode_p,
                return error;
        if (error == 0)
                *acl = NULL;
-       if (!in_group_p(inode->i_gid) &&
-           !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+       if (!in_group_p(i_gid_into_mnt(&init_user_ns, inode)) &&
+           !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID))
                mode &= ~S_ISGID;
        *mode_p = mode;
        return 0;
@@ -269,7 +269,8 @@ static int __f2fs_set_acl(struct inode *inode, int type,
        return error;
 }
 
-int f2fs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int f2fs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type)
 {
        if (unlikely(f2fs_cp_error(F2FS_I_SB(inode))))
                return -EIO;
index 124868c..986fd1b 100644 (file)
@@ -34,7 +34,8 @@ struct f2fs_acl_header {
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
 
 extern struct posix_acl *f2fs_get_acl(struct inode *, int);
-extern int f2fs_set_acl(struct inode *, struct posix_acl *, int);
+extern int f2fs_set_acl(struct user_namespace *, struct inode *,
+                       struct posix_acl *, int);
 extern int f2fs_init_acl(struct inode *, struct inode *, struct page *,
                                                        struct page *);
 #else
index 174a081..be5415a 100644 (file)
@@ -292,7 +292,7 @@ void f2fs_ra_meta_pages_cond(struct f2fs_sb_info *sbi, pgoff_t index)
        f2fs_put_page(page, 0);
 
        if (readahead)
-               f2fs_ra_meta_pages(sbi, index, BIO_MAX_PAGES, META_POR, true);
+               f2fs_ra_meta_pages(sbi, index, BIO_MAX_VECS, META_POR, true);
 }
 
 static int __f2fs_write_meta_page(struct page *page,
index b9721c8..4e5257c 100644 (file)
@@ -857,7 +857,7 @@ int f2fs_merge_page_bio(struct f2fs_io_info *fio)
                f2fs_submit_merged_ipu_write(fio->sbi, &bio, NULL);
 alloc_new:
        if (!bio) {
-               bio = __bio_alloc(fio, BIO_MAX_PAGES);
+               bio = __bio_alloc(fio, BIO_MAX_VECS);
                __attach_io_flag(fio);
                f2fs_set_bio_crypt_ctx(bio, fio->page->mapping->host,
                                       fio->page->index, fio, GFP_NOIO);
@@ -932,7 +932,7 @@ alloc_new:
                        fio->retry = true;
                        goto skip;
                }
-               io->bio = __bio_alloc(fio, BIO_MAX_PAGES);
+               io->bio = __bio_alloc(fio, BIO_MAX_VECS);
                f2fs_set_bio_crypt_ctx(io->bio, fio->page->mapping->host,
                                       bio_page->index, fio, GFP_NOIO);
                io->fio = *fio;
@@ -969,8 +969,7 @@ static struct bio *f2fs_grab_read_bio(struct inode *inode, block_t blkaddr,
        unsigned int post_read_steps = 0;
 
        bio = bio_alloc_bioset(for_write ? GFP_NOIO : GFP_KERNEL,
-                              min_t(int, nr_pages, BIO_MAX_PAGES),
-                              &f2fs_bioset);
+                              bio_max_segs(nr_pages), &f2fs_bioset);
        if (!bio)
                return ERR_PTR(-ENOMEM);
 
index 506c801..e2d302a 100644 (file)
@@ -3187,9 +3187,10 @@ void f2fs_truncate_data_blocks(struct dnode_of_data *dn);
 int f2fs_do_truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate_blocks(struct inode *inode, u64 from, bool lock);
 int f2fs_truncate(struct inode *inode);
-int f2fs_getattr(const struct path *path, struct kstat *stat,
-                       u32 request_mask, unsigned int flags);
-int f2fs_setattr(struct dentry *dentry, struct iattr *attr);
+int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int flags);
+int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr);
 int f2fs_truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end);
 void f2fs_truncate_data_blocks_range(struct dnode_of_data *dn, int count);
 int f2fs_precache_extents(struct inode *inode);
index 471a6ff..d26ff2a 100644 (file)
@@ -789,8 +789,8 @@ int f2fs_truncate(struct inode *inode)
        return 0;
 }
 
-int f2fs_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int query_flags)
+int f2fs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct f2fs_inode_info *fi = F2FS_I(inode);
@@ -826,7 +826,7 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
                                  STATX_ATTR_NODUMP |
                                  STATX_ATTR_VERITY);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        /* we need to show initial sectors used for inline_data/dentries */
        if ((S_ISREG(inode->i_mode) && f2fs_has_inline_data(inode)) ||
@@ -837,7 +837,8 @@ int f2fs_getattr(const struct path *path, struct kstat *stat,
 }
 
 #ifdef CONFIG_F2FS_FS_POSIX_ACL
-static void __setattr_copy(struct inode *inode, const struct iattr *attr)
+static void __setattr_copy(struct user_namespace *mnt_userns,
+                          struct inode *inode, const struct iattr *attr)
 {
        unsigned int ia_valid = attr->ia_valid;
 
@@ -853,9 +854,9 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
                inode->i_ctime = attr->ia_ctime;
        if (ia_valid & ATTR_MODE) {
                umode_t mode = attr->ia_mode;
+               kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
 
-               if (!in_group_p(inode->i_gid) &&
-                       !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+               if (!in_group_p(kgid) && !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
                        mode &= ~S_ISGID;
                set_acl_inode(inode, mode);
        }
@@ -864,7 +865,8 @@ static void __setattr_copy(struct inode *inode, const struct iattr *attr)
 #define __setattr_copy setattr_copy
 #endif
 
-int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
+int f2fs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int err;
@@ -884,7 +886,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                !f2fs_is_compress_backend_ready(inode))
                return -EOPNOTSUPP;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
@@ -960,10 +962,10 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
                spin_unlock(&F2FS_I(inode)->i_size_lock);
        }
 
-       __setattr_copy(inode, attr);
+       __setattr_copy(&init_user_ns, inode, attr);
 
        if (attr->ia_valid & ATTR_MODE) {
-               err = posix_acl_chmod(inode, f2fs_get_inode_mode(inode));
+               err = posix_acl_chmod(&init_user_ns, inode, f2fs_get_inode_mode(inode));
 
                if (is_inode_flag_set(inode, FI_ACL_MODE)) {
                        if (!err)
@@ -1978,7 +1980,7 @@ static int f2fs_ioc_setflags(struct file *filp, unsigned long arg)
        u32 iflags;
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        if (get_user(fsflags, (int __user *)arg))
@@ -2025,7 +2027,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
        struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        if (!S_ISREG(inode->i_mode))
@@ -2092,7 +2094,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
        struct inode *inode = file_inode(filp);
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        ret = mnt_want_write_file(filp);
@@ -2134,7 +2136,7 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
        struct inode *inode = file_inode(filp);
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        if (!S_ISREG(inode->i_mode))
@@ -2169,7 +2171,7 @@ static int f2fs_ioc_release_volatile_write(struct file *filp)
        struct inode *inode = file_inode(filp);
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        ret = mnt_want_write_file(filp);
@@ -2198,7 +2200,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
        struct inode *inode = file_inode(filp);
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        ret = mnt_want_write_file(filp);
@@ -3175,7 +3177,7 @@ static int f2fs_ioc_fssetxattr(struct file *filp, unsigned long arg)
                return -EFAULT;
 
        /* Make sure caller has proper permission */
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        if (fa.fsx_xflags & ~F2FS_SUPPORTED_XFLAGS)
index 8878049..17bd072 100644 (file)
@@ -46,7 +46,7 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 
        nid_free = true;
 
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
 
        inode->i_ino = ino;
        inode->i_blocks = 0;
@@ -314,8 +314,8 @@ static void set_compress_inode(struct f2fs_sb_info *sbi, struct inode *inode,
        }
 }
 
-static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                                               bool excl)
+static int f2fs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
        struct inode *inode;
@@ -637,8 +637,8 @@ static const char *f2fs_get_link(struct dentry *dentry,
        return link;
 }
 
-static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
-                                       const char *symname)
+static int f2fs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, const char *symname)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
        struct inode *inode;
@@ -717,7 +717,8 @@ out_free_encrypted_link:
        return err;
 }
 
-static int f2fs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int f2fs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
        struct inode *inode;
@@ -770,8 +771,8 @@ static int f2fs_rmdir(struct inode *dir, struct dentry *dentry)
        return -ENOTEMPTY;
 }
 
-static int f2fs_mknod(struct inode *dir, struct dentry *dentry,
-                               umode_t mode, dev_t rdev)
+static int f2fs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
        struct inode *inode;
@@ -878,7 +879,8 @@ out:
        return err;
 }
 
-static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int f2fs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode)
 {
        struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
 
@@ -1255,7 +1257,8 @@ out:
        return err;
 }
 
-static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int f2fs_rename2(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
index a8a0fb8..4b0e2e3 100644 (file)
@@ -2747,7 +2747,7 @@ int f2fs_restore_node_summary(struct f2fs_sb_info *sbi,
        sum_entry = &sum->entries[0];
 
        for (i = 0; i < last_offset; i += nrpages, addr += nrpages) {
-               nrpages = min(last_offset - i, BIO_MAX_PAGES);
+               nrpages = bio_max_segs(last_offset - i);
 
                /* readahead node pages */
                f2fs_ra_meta_pages(sbi, addr, nrpages, META_POR, true);
index 993004f..c286656 100644 (file)
@@ -4381,7 +4381,7 @@ static int build_sit_entries(struct f2fs_sb_info *sbi)
        block_t total_node_blocks = 0;
 
        do {
-               readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_PAGES,
+               readed = f2fs_ra_meta_pages(sbi, start_blk, BIO_MAX_VECS,
                                                        META_SIT, true);
 
                start = start_blk * sit_i->sents_per_block;
index 229814b..e9a7a63 100644 (file)
@@ -851,7 +851,7 @@ static inline int nr_pages_to_skip(struct f2fs_sb_info *sbi, int type)
        else if (type == NODE)
                return 8 * sbi->blocks_per_seg;
        else if (type == META)
-               return 8 * BIO_MAX_PAGES;
+               return 8 * BIO_MAX_VECS;
        else
                return 0;
 }
@@ -868,7 +868,7 @@ static inline long nr_pages_to_write(struct f2fs_sb_info *sbi, int type,
                return 0;
 
        nr_to_write = wbc->nr_to_write;
-       desired = BIO_MAX_PAGES;
+       desired = BIO_MAX_VECS;
        if (type == NODE)
                desired <<= 1;
 
index 7069793..82592b1 100644 (file)
@@ -753,9 +753,9 @@ static int parse_options(struct super_block *sb, char *options, bool is_remount)
                case Opt_io_size_bits:
                        if (args->from && match_int(args, &arg))
                                return -EINVAL;
-                       if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_PAGES)) {
+                       if (arg <= 0 || arg > __ilog2_u32(BIO_MAX_VECS)) {
                                f2fs_warn(sbi, "Not support %d, larger than %d",
-                                         1 << arg, BIO_MAX_PAGES);
+                                         1 << arg, BIO_MAX_VECS);
                                return -EINVAL;
                        }
                        F2FS_OPTION(sbi).write_io_size_bits = arg;
index 8159fae..490f843 100644 (file)
@@ -64,6 +64,7 @@ static int f2fs_xattr_generic_get(const struct xattr_handler *handler,
 }
 
 static int f2fs_xattr_generic_set(const struct xattr_handler *handler,
+               struct user_namespace *mnt_userns,
                struct dentry *unused, struct inode *inode,
                const char *name, const void *value,
                size_t size, int flags)
@@ -107,6 +108,7 @@ static int f2fs_xattr_advise_get(const struct xattr_handler *handler,
 }
 
 static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
+               struct user_namespace *mnt_userns,
                struct dentry *unused, struct inode *inode,
                const char *name, const void *value,
                size_t size, int flags)
@@ -114,7 +116,7 @@ static int f2fs_xattr_advise_set(const struct xattr_handler *handler,
        unsigned char old_advise = F2FS_I(inode)->i_advise;
        unsigned char new_advise;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EPERM;
        if (value == NULL)
                return -EINVAL;
index 922a0c6..02d4d42 100644 (file)
@@ -397,9 +397,11 @@ extern long fat_generic_ioctl(struct file *filp, unsigned int cmd,
                              unsigned long arg);
 extern const struct file_operations fat_file_operations;
 extern const struct inode_operations fat_file_inode_operations;
-extern int fat_setattr(struct dentry *dentry, struct iattr *attr);
+extern int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                      struct iattr *attr);
 extern void fat_truncate_blocks(struct inode *inode, loff_t offset);
-extern int fat_getattr(const struct path *path, struct kstat *stat,
+extern int fat_getattr(struct user_namespace *mnt_userns,
+                      const struct path *path, struct kstat *stat,
                       u32 request_mask, unsigned int flags);
 extern int fat_file_fsync(struct file *file, loff_t start, loff_t end,
                          int datasync);
index 5fee74f..13855ba 100644 (file)
@@ -95,7 +95,7 @@ static int fat_ioctl_set_attributes(struct file *file, u32 __user *user_attr)
                goto out_unlock_inode;
 
        /* This MUST be done before doing anything irreversible... */
-       err = fat_setattr(file->f_path.dentry, &ia);
+       err = fat_setattr(file_mnt_user_ns(file), file->f_path.dentry, &ia);
        if (err)
                goto out_unlock_inode;
 
@@ -394,11 +394,11 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset)
        fat_flush_inodes(inode->i_sb, inode, NULL);
 }
 
-int fat_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int flags)
+int fat_getattr(struct user_namespace *mnt_userns, const struct path *path,
+               struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct inode *inode = d_inode(path->dentry);
-       generic_fillattr(inode, stat);
+       generic_fillattr(mnt_userns, inode, stat);
        stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size;
 
        if (MSDOS_SB(inode->i_sb)->options.nfs == FAT_NFS_NOSTALE_RO) {
@@ -447,12 +447,13 @@ static int fat_sanitize_mode(const struct msdos_sb_info *sbi,
        return 0;
 }
 
-static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
+static int fat_allow_set_time(struct user_namespace *mnt_userns,
+                             struct msdos_sb_info *sbi, struct inode *inode)
 {
        umode_t allow_utime = sbi->options.allow_utime;
 
-       if (!uid_eq(current_fsuid(), inode->i_uid)) {
-               if (in_group_p(inode->i_gid))
+       if (!uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode))) {
+               if (in_group_p(i_gid_into_mnt(mnt_userns, inode)))
                        allow_utime >>= 3;
                if (allow_utime & MAY_WRITE)
                        return 1;
@@ -466,7 +467,8 @@ static int fat_allow_set_time(struct msdos_sb_info *sbi, struct inode *inode)
 /* valid file mode bits */
 #define FAT_VALID_MODE (S_IFREG | S_IFDIR | S_IRWXUGO)
 
-int fat_setattr(struct dentry *dentry, struct iattr *attr)
+int fat_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct iattr *attr)
 {
        struct msdos_sb_info *sbi = MSDOS_SB(dentry->d_sb);
        struct inode *inode = d_inode(dentry);
@@ -476,11 +478,11 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
        /* Check for setting the inode time. */
        ia_valid = attr->ia_valid;
        if (ia_valid & TIMES_SET_FLAGS) {
-               if (fat_allow_set_time(sbi, inode))
+               if (fat_allow_set_time(mnt_userns, sbi, inode))
                        attr->ia_valid &= ~TIMES_SET_FLAGS;
        }
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(mnt_userns, dentry, attr);
        attr->ia_valid = ia_valid;
        if (error) {
                if (sbi->options.quiet)
@@ -550,7 +552,7 @@ int fat_setattr(struct dentry *dentry, struct iattr *attr)
                fat_truncate_time(inode, &attr->ia_mtime, S_MTIME);
        attr->ia_valid &= ~(ATTR_ATIME|ATTR_CTIME|ATTR_MTIME);
 
-       setattr_copy(inode, attr);
+       setattr_copy(mnt_userns, inode, attr);
        mark_inode_dirty(inode);
 out:
        return error;
index 9d06288..efba301 100644 (file)
@@ -261,8 +261,8 @@ static int msdos_add_entry(struct inode *dir, const unsigned char *name,
 }
 
 /***** Create a file */
-static int msdos_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                       bool excl)
+static int msdos_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode = NULL;
@@ -339,7 +339,8 @@ out:
 }
 
 /***** Make a directory */
-static int msdos_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int msdos_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct fat_slot_info sinfo;
@@ -593,7 +594,8 @@ error_inode:
 }
 
 /***** Rename, a wrapper for rename_same_dir & rename_diff_dir */
-static int msdos_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int msdos_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
@@ -665,7 +667,7 @@ static struct file_system_type msdos_fs_type = {
        .name           = "msdos",
        .mount          = msdos_mount,
        .kill_sb        = kill_block_super,
-       .fs_flags       = FS_REQUIRES_DEV,
+       .fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("msdos");
 
index 0cdd0fb..5369d82 100644 (file)
@@ -756,8 +756,8 @@ error:
        return ERR_PTR(err);
 }
 
-static int vfat_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                      bool excl)
+static int vfat_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
@@ -846,7 +846,8 @@ out:
        return err;
 }
 
-static int vfat_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int vfat_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        struct super_block *sb = dir->i_sb;
        struct inode *inode;
@@ -892,9 +893,9 @@ out:
        return err;
 }
 
-static int vfat_rename(struct inode *old_dir, struct dentry *old_dentry,
-                      struct inode *new_dir, struct dentry *new_dentry,
-                      unsigned int flags)
+static int vfat_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        struct buffer_head *dotdot_bh;
        struct msdos_dir_entry *dotdot_de;
@@ -1062,7 +1063,7 @@ static struct file_system_type vfat_fs_type = {
        .name           = "vfat",
        .mount          = vfat_mount,
        .kill_sb        = kill_block_super,
-       .fs_flags       = FS_REQUIRES_DEV,
+       .fs_flags       = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("vfat");
 
index 483ef88..dfc72f1 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/user_namespace.h>
 #include <linux/memfd.h>
 #include <linux/compat.h>
+#include <linux/mount.h>
 
 #include <linux/poll.h>
 #include <asm/siginfo.h>
@@ -46,7 +47,7 @@ static int setfl(int fd, struct file * filp, unsigned long arg)
 
        /* O_NOATIME can only be set by the owner or superuser */
        if ((arg & O_NOATIME) && !(filp->f_flags & O_NOATIME))
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(file_mnt_user_ns(filp), inode))
                        return -EPERM;
 
        /* required for strict SunOS emulation */
index 01263ff..ec6feec 100644 (file)
@@ -173,7 +173,7 @@ static int handle_to_path(int mountdirfd, struct file_handle __user *ufh,
 
        /*
         * With handle we don't look at the execute bit on the
-        * the directory. Ideally we would like CAP_DAC_SEARCH.
+        * directory. Ideally we would like CAP_DAC_SEARCH.
         * But we don't have that
         */
        if (!capable(CAP_DAC_READ_SEARCH)) {
index f3a4bac..f633348 100644 (file)
--- a/fs/file.c
+++ b/fs/file.c
@@ -629,17 +629,30 @@ int close_fd(unsigned fd)
 }
 EXPORT_SYMBOL(close_fd); /* for ksys_close() */
 
+/**
+ * last_fd - return last valid index into fd table
+ * @cur_fds: files struct
+ *
+ * Context: Either rcu read lock or files_lock must be held.
+ *
+ * Returns: Last valid index into fdtable.
+ */
+static inline unsigned last_fd(struct fdtable *fdt)
+{
+       return fdt->max_fds - 1;
+}
+
 static inline void __range_cloexec(struct files_struct *cur_fds,
                                   unsigned int fd, unsigned int max_fd)
 {
        struct fdtable *fdt;
 
-       if (fd > max_fd)
-               return;
-
+       /* make sure we're using the correct maximum value */
        spin_lock(&cur_fds->file_lock);
        fdt = files_fdtable(cur_fds);
-       bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
+       max_fd = min(last_fd(fdt), max_fd);
+       if (fd <= max_fd)
+               bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1);
        spin_unlock(&cur_fds->file_lock);
 }
 
index f529075..e9c0f91 100644 (file)
@@ -50,7 +50,8 @@ struct posix_acl *fuse_get_acl(struct inode *inode, int type)
        return acl;
 }
 
-int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        const char *name;
index 588f8d1..c0fee83 100644 (file)
@@ -844,11 +844,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
        if (WARN_ON(PageMlocked(oldpage)))
                goto out_fallback_unlock;
 
-       err = replace_page_cache_page(oldpage, newpage, GFP_KERNEL);
-       if (err) {
-               unlock_page(newpage);
-               goto out_put_old;
-       }
+       replace_page_cache_page(oldpage, newpage);
 
        get_page(newpage);
 
@@ -2233,19 +2229,21 @@ static int fuse_device_clone(struct fuse_conn *fc, struct file *new)
 static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
                           unsigned long arg)
 {
-       int err = -ENOTTY;
+       int res;
+       int oldfd;
+       struct fuse_dev *fud = NULL;
 
-       if (cmd == FUSE_DEV_IOC_CLONE) {
-               int oldfd;
+       if (_IOC_TYPE(cmd) != FUSE_DEV_IOC_MAGIC)
+               return -ENOTTY;
 
-               err = -EFAULT;
-               if (!get_user(oldfd, (__u32 __user *) arg)) {
+       switch (_IOC_NR(cmd)) {
+       case _IOC_NR(FUSE_DEV_IOC_CLONE):
+               res = -EFAULT;
+               if (!get_user(oldfd, (__u32 __user *)arg)) {
                        struct file *old = fget(oldfd);
 
-                       err = -EINVAL;
+                       res = -EINVAL;
                        if (old) {
-                               struct fuse_dev *fud = NULL;
-
                                /*
                                 * Check against file->f_op because CUSE
                                 * uses the same ioctl handler.
@@ -2256,14 +2254,18 @@ static long fuse_dev_ioctl(struct file *file, unsigned int cmd,
 
                                if (fud) {
                                        mutex_lock(&fuse_mutex);
-                                       err = fuse_device_clone(fud->fc, file);
+                                       res = fuse_device_clone(fud->fc, file);
                                        mutex_unlock(&fuse_mutex);
                                }
                                fput(old);
                        }
                }
+               break;
+       default:
+               res = -ENOTTY;
+               break;
        }
-       return err;
+       return res;
 }
 
 const struct file_operations fuse_dev_operations = {
index 78f9f20..06a1870 100644 (file)
@@ -605,7 +605,8 @@ out_err:
        return err;
 }
 
-static int fuse_mknod(struct inode *, struct dentry *, umode_t, dev_t);
+static int fuse_mknod(struct user_namespace *, struct inode *, struct dentry *,
+                     umode_t, dev_t);
 static int fuse_atomic_open(struct inode *dir, struct dentry *entry,
                            struct file *file, unsigned flags,
                            umode_t mode)
@@ -645,7 +646,7 @@ out_dput:
        return err;
 
 mknod:
-       err = fuse_mknod(dir, entry, mode, 0);
+       err = fuse_mknod(&init_user_ns, dir, entry, mode, 0);
        if (err)
                goto out_dput;
 no_open:
@@ -715,8 +716,8 @@ static int create_new_entry(struct fuse_mount *fm, struct fuse_args *args,
        return err;
 }
 
-static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
-                     dev_t rdev)
+static int fuse_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *entry, umode_t mode, dev_t rdev)
 {
        struct fuse_mknod_in inarg;
        struct fuse_mount *fm = get_fuse_mount(dir);
@@ -738,13 +739,14 @@ static int fuse_mknod(struct inode *dir, struct dentry *entry, umode_t mode,
        return create_new_entry(fm, &args, dir, entry, mode);
 }
 
-static int fuse_create(struct inode *dir, struct dentry *entry, umode_t mode,
-                      bool excl)
+static int fuse_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *entry, umode_t mode, bool excl)
 {
-       return fuse_mknod(dir, entry, mode, 0);
+       return fuse_mknod(&init_user_ns, dir, entry, mode, 0);
 }
 
-static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
+static int fuse_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *entry, umode_t mode)
 {
        struct fuse_mkdir_in inarg;
        struct fuse_mount *fm = get_fuse_mount(dir);
@@ -765,8 +767,8 @@ static int fuse_mkdir(struct inode *dir, struct dentry *entry, umode_t mode)
        return create_new_entry(fm, &args, dir, entry, S_IFDIR);
 }
 
-static int fuse_symlink(struct inode *dir, struct dentry *entry,
-                       const char *link)
+static int fuse_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *entry, const char *link)
 {
        struct fuse_mount *fm = get_fuse_mount(dir);
        unsigned len = strlen(link) + 1;
@@ -908,9 +910,9 @@ static int fuse_rename_common(struct inode *olddir, struct dentry *oldent,
        return err;
 }
 
-static int fuse_rename2(struct inode *olddir, struct dentry *oldent,
-                       struct inode *newdir, struct dentry *newent,
-                       unsigned int flags)
+static int fuse_rename2(struct user_namespace *mnt_userns, struct inode *olddir,
+                       struct dentry *oldent, struct inode *newdir,
+                       struct dentry *newent, unsigned int flags)
 {
        struct fuse_conn *fc = get_fuse_conn(olddir);
        int err;
@@ -1087,7 +1089,7 @@ static int fuse_update_get_attr(struct inode *inode, struct file *file,
                forget_all_cached_acls(inode);
                err = fuse_do_getattr(inode, stat, file);
        } else if (stat) {
-               generic_fillattr(inode, stat);
+               generic_fillattr(&init_user_ns, inode, stat);
                stat->mode = fi->orig_i_mode;
                stat->ino = fi->orig_ino;
        }
@@ -1249,7 +1251,8 @@ static int fuse_perm_getattr(struct inode *inode, int mask)
  * access request is sent.  Execute permission is still checked
  * locally based on file mode.
  */
-static int fuse_permission(struct inode *inode, int mask)
+static int fuse_permission(struct user_namespace *mnt_userns,
+                          struct inode *inode, int mask)
 {
        struct fuse_conn *fc = get_fuse_conn(inode);
        bool refreshed = false;
@@ -1280,7 +1283,7 @@ static int fuse_permission(struct inode *inode, int mask)
        }
 
        if (fc->default_permissions) {
-               err = generic_permission(inode, mask);
+               err = generic_permission(&init_user_ns, inode, mask);
 
                /* If permission is denied, try to refresh file
                   attributes.  This is also needed, because the root
@@ -1288,7 +1291,8 @@ static int fuse_permission(struct inode *inode, int mask)
                if (err == -EACCES && !refreshed) {
                        err = fuse_perm_getattr(inode, mask);
                        if (!err)
-                               err = generic_permission(inode, mask);
+                               err = generic_permission(&init_user_ns,
+                                                        inode, mask);
                }
 
                /* Note: the opposite of the above test does not
@@ -1610,7 +1614,7 @@ int fuse_do_setattr(struct dentry *dentry, struct iattr *attr,
        if (!fc->default_permissions)
                attr->ia_valid |= ATTR_FORCE;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
@@ -1756,7 +1760,8 @@ error:
        return err;
 }
 
-static int fuse_setattr(struct dentry *entry, struct iattr *attr)
+static int fuse_setattr(struct user_namespace *mnt_userns, struct dentry *entry,
+                       struct iattr *attr)
 {
        struct inode *inode = d_inode(entry);
        struct fuse_conn *fc = get_fuse_conn(inode);
@@ -1818,7 +1823,8 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr)
        return ret;
 }
 
-static int fuse_getattr(const struct path *path, struct kstat *stat,
+static int fuse_getattr(struct user_namespace *mnt_userns,
+                       const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags)
 {
        struct inode *inode = d_inode(path->dentry);
index 7c4b8cb..63d97a1 100644 (file)
@@ -863,6 +863,7 @@ static inline u64 fuse_get_attr_version(struct fuse_conn *fc)
 
 static inline void fuse_make_bad(struct inode *inode)
 {
+       remove_inode_hash(inode);
        set_bit(FUSE_I_BAD, &get_fuse_inode(inode)->state);
 }
 
@@ -1180,8 +1181,8 @@ extern const struct xattr_handler *fuse_no_acl_xattr_handlers[];
 
 struct posix_acl;
 struct posix_acl *fuse_get_acl(struct inode *inode, int type);
-int fuse_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-
+int fuse_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type);
 
 /* readdir.c */
 int fuse_readdir(struct file *file, struct dir_context *ctx);
index 8868ac3..4ee6f73 100644 (file)
@@ -1324,8 +1324,15 @@ static int virtio_fs_fill_super(struct super_block *sb, struct fs_context *fsc)
 
        /* virtiofs allocates and installs its own fuse devices */
        ctx->fudptr = NULL;
-       if (ctx->dax)
+       if (ctx->dax) {
+               if (!fs->dax_dev) {
+                       err = -EINVAL;
+                       pr_err("virtio-fs: dax can't be enabled as filesystem"
+                              " device does not support it.\n");
+                       goto err_free_fuse_devs;
+               }
                ctx->dax_dev = fs->dax_dev;
+       }
        err = fuse_fill_super_common(sb, ctx);
        if (err < 0)
                goto err_free_fuse_devs;
index cdea18d..1a7d7ac 100644 (file)
@@ -188,6 +188,7 @@ static int fuse_xattr_get(const struct xattr_handler *handler,
 }
 
 static int fuse_xattr_set(const struct xattr_handler *handler,
+                         struct user_namespace *mnt_userns,
                          struct dentry *dentry, struct inode *inode,
                          const char *name, const void *value, size_t size,
                          int flags)
@@ -214,6 +215,7 @@ static int no_xattr_get(const struct xattr_handler *handler,
 }
 
 static int no_xattr_set(const struct xattr_handler *handler,
+                       struct user_namespace *mnt_userns,
                        struct dentry *dentry, struct inode *nodee,
                        const char *name, const void *value,
                        size_t size, int flags)
index 2e939f5..9165d70 100644 (file)
@@ -106,7 +106,8 @@ out:
        return error;
 }
 
-int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type)
 {
        struct gfs2_inode *ip = GFS2_I(inode);
        struct gfs2_holder gh;
@@ -130,7 +131,7 @@ int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
 
        mode = inode->i_mode;
        if (type == ACL_TYPE_ACCESS && acl) {
-               ret = posix_acl_update_mode(inode, &mode, &acl);
+               ret = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
                if (ret)
                        goto unlock;
        }
index 61353a1..eccc6a4 100644 (file)
@@ -13,6 +13,7 @@
 
 extern struct posix_acl *gfs2_get_acl(struct inode *inode, int type);
 extern int __gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int gfs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int gfs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                       struct posix_acl *acl, int type);
 
 #endif /* __ACL_DOT_H__ */
index 62d9081..7a358ae 100644 (file)
@@ -1230,6 +1230,9 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
 
        gfs2_inplace_release(ip);
 
+       if (ip->i_qadata && ip->i_qadata->qa_qd_num)
+               gfs2_quota_unlock(ip);
+
        if (length != written && (iomap->flags & IOMAP_F_NEW)) {
                /* Deallocate blocks that were just allocated. */
                loff_t blockmask = i_blocksize(inode) - 1;
@@ -1242,9 +1245,6 @@ static int gfs2_iomap_end(struct inode *inode, loff_t pos, loff_t length,
                }
        }
 
-       if (ip->i_qadata && ip->i_qadata->qa_qd_num)
-               gfs2_quota_unlock(ip);
-
        if (unlikely(!written))
                goto out_unlock;
 
@@ -1538,13 +1538,13 @@ more_rgrps:
                                goto out;
                        }
                        ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
-                                                0, rd_gh);
+                                                LM_FLAG_NODE_SCOPE, rd_gh);
                        if (ret)
                                goto out;
 
                        /* Must be done with the rgrp glock held: */
                        if (gfs2_rs_active(&ip->i_res) &&
-                           rgd == ip->i_res.rs_rbm.rgd)
+                           rgd == ip->i_res.rs_rgd)
                                gfs2_rs_deltree(&ip->i_res);
                }
 
index 07f49e5..2d500f9 100644 (file)
@@ -238,7 +238,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
                goto out;
 
        error = -EACCES;
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                goto out;
 
        error = 0;
@@ -256,7 +256,7 @@ static int do_gfs2_set_flags(struct file *filp, u32 reqflags, u32 mask,
            !capable(CAP_LINUX_IMMUTABLE))
                goto out;
        if (!IS_IMMUTABLE(inode)) {
-               error = gfs2_permission(inode, MAY_WRITE);
+               error = gfs2_permission(&init_user_ns, inode, MAY_WRITE);
                if (error)
                        goto out;
        }
@@ -716,10 +716,10 @@ static int gfs2_release(struct inode *inode, struct file *file)
        kfree(file->private_data);
        file->private_data = NULL;
 
-       if (file->f_mode & FMODE_WRITE) {
+       if (gfs2_rs_active(&ip->i_res))
                gfs2_rs_delete(ip, &inode->i_writecount);
+       if (file->f_mode & FMODE_WRITE)
                gfs2_qa_put(ip);
-       }
        return 0;
 }
 
@@ -1112,8 +1112,8 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
                        goto out_qunlock;
 
                /* check if the selected rgrp limits our max_blks further */
-               if (ap.allowed && ap.allowed < max_blks)
-                       max_blks = ap.allowed;
+               if (ip->i_res.rs_reserved < max_blks)
+                       max_blks = ip->i_res.rs_reserved;
 
                /* Almost done. Calculate bytes that can be written using
                 * max_blks. We also recompute max_bytes, data_blocks and
index d87a5bc..9567520 100644 (file)
@@ -313,9 +313,23 @@ void gfs2_glock_put(struct gfs2_glock *gl)
 static inline int may_grant(const struct gfs2_glock *gl, const struct gfs2_holder *gh)
 {
        const struct gfs2_holder *gh_head = list_first_entry(&gl->gl_holders, const struct gfs2_holder, gh_list);
-       if ((gh->gh_state == LM_ST_EXCLUSIVE ||
-            gh_head->gh_state == LM_ST_EXCLUSIVE) && gh != gh_head)
-               return 0;
+
+       if (gh != gh_head) {
+               /**
+                * Here we make a special exception to grant holders who agree
+                * to share the EX lock with other holders who also have the
+                * bit set. If the original holder has the LM_FLAG_NODE_SCOPE bit
+                * is set, we grant more holders with the bit set.
+                */
+               if (gh_head->gh_state == LM_ST_EXCLUSIVE &&
+                   (gh_head->gh_flags & LM_FLAG_NODE_SCOPE) &&
+                   gh->gh_state == LM_ST_EXCLUSIVE &&
+                   (gh->gh_flags & LM_FLAG_NODE_SCOPE))
+                       return 1;
+               if ((gh->gh_state == LM_ST_EXCLUSIVE ||
+                    gh_head->gh_state == LM_ST_EXCLUSIVE))
+                       return 0;
+       }
        if (gl->gl_state == gh->gh_state)
                return 1;
        if (gh->gh_flags & GL_EXACT)
@@ -2030,6 +2044,8 @@ static const char *hflags2str(char *buf, u16 flags, unsigned long iflags)
                *p++ = 'A';
        if (flags & LM_FLAG_PRIORITY)
                *p++ = 'p';
+       if (flags & LM_FLAG_NODE_SCOPE)
+               *p++ = 'n';
        if (flags & GL_ASYNC)
                *p++ = 'a';
        if (flags & GL_EXACT)
index 5381336..31a8f2f 100644 (file)
@@ -75,6 +75,11 @@ enum {
  * request and directly join the other shared lock.  A shared lock request
  * without the priority flag might be forced to wait until the deferred
  * requested had acquired and released the lock.
+ *
+ * LM_FLAG_NODE_SCOPE
+ * This holder agrees to share the lock within this node. In other words,
+ * the glock is held in EX mode according to DLM, but local holders on the
+ * same node can share it.
  */
 
 #define LM_FLAG_TRY            0x0001
@@ -82,6 +87,7 @@ enum {
 #define LM_FLAG_NOEXP          0x0004
 #define LM_FLAG_ANY            0x0008
 #define LM_FLAG_PRIORITY       0x0010
+#define LM_FLAG_NODE_SCOPE     0x0020
 #define GL_ASYNC               0x0040
 #define GL_EXACT               0x0080
 #define GL_SKIP                        0x0100
index 3faa421..8e32d56 100644 (file)
@@ -86,16 +86,12 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        struct gfs2_trans tr;
+       unsigned int revokes;
        int ret;
 
-       memset(&tr, 0, sizeof(tr));
-       INIT_LIST_HEAD(&tr.tr_buf);
-       INIT_LIST_HEAD(&tr.tr_databuf);
-       INIT_LIST_HEAD(&tr.tr_ail1_list);
-       INIT_LIST_HEAD(&tr.tr_ail2_list);
-       tr.tr_revokes = atomic_read(&gl->gl_ail_count);
+       revokes = atomic_read(&gl->gl_ail_count);
 
-       if (!tr.tr_revokes) {
+       if (!revokes) {
                bool have_revokes;
                bool log_in_flight;
 
@@ -122,20 +118,14 @@ static int gfs2_ail_empty_gl(struct gfs2_glock *gl)
                return 0;
        }
 
-       /* A shortened, inline version of gfs2_trans_begin()
-         * tr->alloced is not set since the transaction structure is
-         * on the stack */
-       tr.tr_reserved = 1 + gfs2_struct2blk(sdp, tr.tr_revokes);
-       tr.tr_ip = _RET_IP_;
-       ret = gfs2_log_reserve(sdp, tr.tr_reserved);
-       if (ret < 0)
-               return ret;
-       WARN_ON_ONCE(current->journal_info);
-       current->journal_info = &tr;
-
-       __gfs2_ail_flush(gl, 0, tr.tr_revokes);
-
+       memset(&tr, 0, sizeof(tr));
+       set_bit(TR_ONSTACK, &tr.tr_flags);
+       ret = __gfs2_trans_begin(&tr, sdp, 0, revokes, _RET_IP_);
+       if (ret)
+               goto flush;
+       __gfs2_ail_flush(gl, 0, revokes);
        gfs2_trans_end(sdp);
+
 flush:
        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
                       GFS2_LFC_AIL_EMPTY_GL);
@@ -146,19 +136,15 @@ void gfs2_ail_flush(struct gfs2_glock *gl, bool fsync)
 {
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        unsigned int revokes = atomic_read(&gl->gl_ail_count);
-       unsigned int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
        int ret;
 
        if (!revokes)
                return;
 
-       while (revokes > max_revokes)
-               max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
-
-       ret = gfs2_trans_begin(sdp, 0, max_revokes);
+       ret = gfs2_trans_begin(sdp, 0, revokes);
        if (ret)
                return;
-       __gfs2_ail_flush(gl, fsync, max_revokes);
+       __gfs2_ail_flush(gl, fsync, revokes);
        gfs2_trans_end(sdp);
        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
                       GFS2_LFC_AIL_FLUSH);
index 8e1ab8e..0957119 100644 (file)
@@ -20,6 +20,7 @@
 #include <linux/percpu.h>
 #include <linux/lockref.h>
 #include <linux/rhashtable.h>
+#include <linux/mutex.h>
 
 #define DIO_WAIT       0x00000010
 #define DIO_METADATA   0x00000020
@@ -106,7 +107,8 @@ struct gfs2_rgrpd {
        u32 rd_data;                    /* num of data blocks in rgrp */
        u32 rd_bitbytes;                /* number of bytes in data bitmaps */
        u32 rd_free;
-       u32 rd_reserved;                /* number of blocks reserved */
+       u32 rd_requested;               /* number of blocks in rd_rstree */
+       u32 rd_reserved;                /* number of reserved blocks */
        u32 rd_free_clone;
        u32 rd_dinodes;
        u64 rd_igeneration;
@@ -122,34 +124,10 @@ struct gfs2_rgrpd {
 #define GFS2_RDF_PREFERRED     0x80000000 /* This rgrp is preferred */
 #define GFS2_RDF_MASK          0xf0000000 /* mask for internal flags */
        spinlock_t rd_rsspin;           /* protects reservation related vars */
+       struct mutex rd_mutex;
        struct rb_root rd_rstree;       /* multi-block reservation tree */
 };
 
-struct gfs2_rbm {
-       struct gfs2_rgrpd *rgd;
-       u32 offset;             /* The offset is bitmap relative */
-       int bii;                /* Bitmap index */
-};
-
-static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
-{
-       return rbm->rgd->rd_bits + rbm->bii;
-}
-
-static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
-{
-       BUG_ON(rbm->offset >= rbm->rgd->rd_data);
-       return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
-               rbm->offset;
-}
-
-static inline bool gfs2_rbm_eq(const struct gfs2_rbm *rbm1,
-                              const struct gfs2_rbm *rbm2)
-{
-       return (rbm1->rgd == rbm2->rgd) && (rbm1->bii == rbm2->bii) &&
-              (rbm1->offset == rbm2->offset);
-}
-
 enum gfs2_state_bits {
        BH_Pinned = BH_PrivateStart,
        BH_Escaped = BH_PrivateStart + 1,
@@ -313,9 +291,11 @@ struct gfs2_qadata { /* quota allocation data */
 */
 
 struct gfs2_blkreserv {
-       struct rb_node rs_node;       /* link to other block reservations */
-       struct gfs2_rbm rs_rbm;       /* Start of reservation */
-       u32 rs_free;                  /* how many blocks are still free */
+       struct rb_node rs_node;       /* node within rd_rstree */
+       struct gfs2_rgrpd *rs_rgd;
+       u64 rs_start;
+       u32 rs_requested;
+       u32 rs_reserved;              /* number of reserved blocks */
 };
 
 /*
@@ -490,7 +470,7 @@ struct gfs2_quota_data {
 enum {
        TR_TOUCHED = 1,
        TR_ATTACHED = 2,
-       TR_ALLOCED = 3,
+       TR_ONSTACK = 3,
 };
 
 struct gfs2_trans {
@@ -506,7 +486,6 @@ struct gfs2_trans {
        unsigned int tr_num_buf_rm;
        unsigned int tr_num_databuf_rm;
        unsigned int tr_num_revoke;
-       unsigned int tr_num_revoke_rm;
 
        struct list_head tr_list;
        struct list_head tr_databuf;
@@ -531,6 +510,7 @@ struct gfs2_jdesc {
        unsigned int nr_extents;
        struct work_struct jd_work;
        struct inode *jd_inode;
+       struct bio *jd_log_bio;
        unsigned long jd_flags;
 #define JDF_RECOVERY 1
        unsigned int jd_jid;
@@ -585,6 +565,7 @@ struct gfs2_args {
        unsigned int ar_errors:2;               /* errors=withdraw | panic */
        unsigned int ar_nobarrier:1;            /* do not send barriers */
        unsigned int ar_rgrplvb:1;              /* use lvbs for rgrp info */
+       unsigned int ar_got_rgrplvb:1;          /* Was the rgrplvb opt given? */
        unsigned int ar_loccookie:1;            /* use location based readdir
                                                   cookies */
        s32 ar_commit;                          /* Commit interval */
@@ -821,7 +802,6 @@ struct gfs2_sbd {
 
        struct gfs2_trans *sd_log_tr;
        unsigned int sd_log_blks_reserved;
-       int sd_log_committed_revoke;
 
        atomic_t sd_log_pinned;
        unsigned int sd_log_num_revoke;
@@ -834,24 +814,22 @@ struct gfs2_sbd {
        atomic_t sd_log_thresh2;
        atomic_t sd_log_blks_free;
        atomic_t sd_log_blks_needed;
+       atomic_t sd_log_revokes_available;
        wait_queue_head_t sd_log_waitq;
        wait_queue_head_t sd_logd_waitq;
 
        u64 sd_log_sequence;
-       unsigned int sd_log_head;
-       unsigned int sd_log_tail;
        int sd_log_idle;
 
        struct rw_semaphore sd_log_flush_lock;
        atomic_t sd_log_in_flight;
-       struct bio *sd_log_bio;
        wait_queue_head_t sd_log_flush_wait;
        int sd_log_error; /* First log error */
        wait_queue_head_t sd_withdraw_wait;
 
-       atomic_t sd_reserving_log;
-       wait_queue_head_t sd_reserving_log_wait;
-
+       unsigned int sd_log_tail;
+       unsigned int sd_log_flush_tail;
+       unsigned int sd_log_head;
        unsigned int sd_log_flush_head;
 
        spinlock_t sd_ail_lock;
index c1b77e8..c9775d5 100644 (file)
@@ -325,7 +325,7 @@ struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
        }
 
        if (!is_root) {
-               error = gfs2_permission(dir, MAY_EXEC);
+               error = gfs2_permission(&init_user_ns, dir, MAY_EXEC);
                if (error)
                        goto out;
        }
@@ -355,7 +355,8 @@ static int create_ok(struct gfs2_inode *dip, const struct qstr *name,
 {
        int error;
 
-       error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+       error = gfs2_permission(&init_user_ns, &dip->i_inode,
+                               MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
 
@@ -490,8 +491,8 @@ static void init_dinode(struct gfs2_inode *dip, struct gfs2_inode *ip,
        di = (struct gfs2_dinode *)dibh->b_data;
        gfs2_dinode_out(ip, di);
 
-       di->di_major = cpu_to_be32(MAJOR(ip->i_inode.i_rdev));
-       di->di_minor = cpu_to_be32(MINOR(ip->i_inode.i_rdev));
+       di->di_major = cpu_to_be32(imajor(&ip->i_inode));
+       di->di_minor = cpu_to_be32(iminor(&ip->i_inode));
        di->__pad1 = 0;
        di->__pad2 = 0;
        di->__pad3 = 0;
@@ -843,8 +844,8 @@ fail:
  * Returns: errno
  */
 
-static int gfs2_create(struct inode *dir, struct dentry *dentry,
-                      umode_t mode, bool excl)
+static int gfs2_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
        return gfs2_create_inode(dir, dentry, NULL, S_IFREG | mode, 0, NULL, 0, excl);
 }
@@ -951,7 +952,7 @@ static int gfs2_link(struct dentry *old_dentry, struct inode *dir,
        if (inode->i_nlink == 0)
                goto out_gunlock;
 
-       error = gfs2_permission(dir, MAY_WRITE | MAY_EXEC);
+       error = gfs2_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
        if (error)
                goto out_gunlock;
 
@@ -1068,7 +1069,8 @@ static int gfs2_unlink_ok(struct gfs2_inode *dip, const struct qstr *name,
        if (IS_APPEND(&dip->i_inode))
                return -EPERM;
 
-       error = gfs2_permission(&dip->i_inode, MAY_WRITE | MAY_EXEC);
+       error = gfs2_permission(&init_user_ns, &dip->i_inode,
+                               MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
 
@@ -1145,7 +1147,7 @@ static int gfs2_unlink(struct inode *dir, struct dentry *dentry)
        if (!rgd)
                goto out_inodes;
 
-       gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, ghs + 2);
+       gfs2_holder_init(rgd->rd_gl, LM_ST_EXCLUSIVE, LM_FLAG_NODE_SCOPE, ghs + 2);
 
 
        error = gfs2_glock_nq(ghs); /* parent */
@@ -1204,8 +1206,8 @@ out_inodes:
  * Returns: errno
  */
 
-static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
-                       const char *symname)
+static int gfs2_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, const char *symname)
 {
        unsigned int size;
 
@@ -1225,7 +1227,8 @@ static int gfs2_symlink(struct inode *dir, struct dentry *dentry,
  * Returns: errno
  */
 
-static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int gfs2_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        unsigned dsize = gfs2_max_stuffed_size(GFS2_I(dir));
        return gfs2_create_inode(dir, dentry, NULL, S_IFDIR | mode, 0, NULL, dsize, 0);
@@ -1240,8 +1243,8 @@ static int gfs2_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
  *
  */
 
-static int gfs2_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
-                     dev_t dev)
+static int gfs2_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, dev_t dev)
 {
        return gfs2_create_inode(dir, dentry, NULL, mode, dev, NULL, 0, 0);
 }
@@ -1450,8 +1453,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                        error = -ENOENT;
                        goto out_gunlock;
                }
-               error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE, 0,
-                                          &rd_gh);
+               error = gfs2_glock_nq_init(nrgd->rd_gl, LM_ST_EXCLUSIVE,
+                                          LM_FLAG_NODE_SCOPE, &rd_gh);
                if (error)
                        goto out_gunlock;
        }
@@ -1490,7 +1493,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
                        }
                }
        } else {
-               error = gfs2_permission(ndir, MAY_WRITE | MAY_EXEC);
+               error = gfs2_permission(&init_user_ns, ndir,
+                                       MAY_WRITE | MAY_EXEC);
                if (error)
                        goto out_gunlock;
 
@@ -1525,7 +1529,8 @@ static int gfs2_rename(struct inode *odir, struct dentry *odentry,
        /* Check out the dir to be renamed */
 
        if (dir_rename) {
-               error = gfs2_permission(d_inode(odentry), MAY_WRITE);
+               error = gfs2_permission(&init_user_ns, d_inode(odentry),
+                                       MAY_WRITE);
                if (error)
                        goto out_gunlock;
        }
@@ -1688,12 +1693,14 @@ static int gfs2_exchange(struct inode *odir, struct dentry *odentry,
                goto out_gunlock;
 
        if (S_ISDIR(old_mode)) {
-               error = gfs2_permission(odentry->d_inode, MAY_WRITE);
+               error = gfs2_permission(&init_user_ns, odentry->d_inode,
+                                       MAY_WRITE);
                if (error)
                        goto out_gunlock;
        }
        if (S_ISDIR(new_mode)) {
-               error = gfs2_permission(ndentry->d_inode, MAY_WRITE);
+               error = gfs2_permission(&init_user_ns, ndentry->d_inode,
+                                       MAY_WRITE);
                if (error)
                        goto out_gunlock;
        }
@@ -1747,9 +1754,9 @@ out:
        return error;
 }
 
-static int gfs2_rename2(struct inode *odir, struct dentry *odentry,
-                       struct inode *ndir, struct dentry *ndentry,
-                       unsigned int flags)
+static int gfs2_rename2(struct user_namespace *mnt_userns, struct inode *odir,
+                       struct dentry *odentry, struct inode *ndir,
+                       struct dentry *ndentry, unsigned int flags)
 {
        flags &= ~RENAME_NOREPLACE;
 
@@ -1833,7 +1840,8 @@ out:
  * Returns: errno
  */
 
-int gfs2_permission(struct inode *inode, int mask)
+int gfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                   int mask)
 {
        struct gfs2_inode *ip;
        struct gfs2_holder i_gh;
@@ -1852,7 +1860,7 @@ int gfs2_permission(struct inode *inode, int mask)
        if ((mask & MAY_WRITE) && IS_IMMUTABLE(inode))
                error = -EPERM;
        else
-               error = generic_permission(inode, mask);
+               error = generic_permission(&init_user_ns, inode, mask);
        if (gfs2_holder_initialized(&i_gh))
                gfs2_glock_dq_uninit(&i_gh);
 
@@ -1861,7 +1869,7 @@ int gfs2_permission(struct inode *inode, int mask)
 
 static int __gfs2_setattr_simple(struct inode *inode, struct iattr *attr)
 {
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
@@ -1963,7 +1971,8 @@ out:
  * Returns: errno
  */
 
-static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
+static int gfs2_setattr(struct user_namespace *mnt_userns,
+                       struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct gfs2_inode *ip = GFS2_I(inode);
@@ -1982,7 +1991,7 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
        if (IS_IMMUTABLE(inode) || IS_APPEND(inode))
                goto error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                goto error;
 
@@ -1993,7 +2002,8 @@ static int gfs2_setattr(struct dentry *dentry, struct iattr *attr)
        else {
                error = gfs2_setattr_simple(inode, attr);
                if (!error && attr->ia_valid & ATTR_MODE)
-                       error = posix_acl_chmod(inode, inode->i_mode);
+                       error = posix_acl_chmod(&init_user_ns, inode,
+                                               inode->i_mode);
        }
 
 error:
@@ -2007,6 +2017,7 @@ out:
 
 /**
  * gfs2_getattr - Read out an inode's attributes
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @path: Object to query
  * @stat: The inode's stats
  * @request_mask: Mask of STATX_xxx flags indicating the caller's interests
@@ -2021,7 +2032,8 @@ out:
  * Returns: errno
  */
 
-static int gfs2_getattr(const struct path *path, struct kstat *stat,
+static int gfs2_getattr(struct user_namespace *mnt_userns,
+                       const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int flags)
 {
        struct inode *inode = d_inode(path->dentry);
@@ -2049,7 +2061,7 @@ static int gfs2_getattr(const struct path *path, struct kstat *stat,
                                  STATX_ATTR_IMMUTABLE |
                                  STATX_ATTR_NODUMP);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        if (gfs2_holder_initialized(&gh))
                gfs2_glock_dq_uninit(&gh);
index 8073b8d..c447bd5 100644 (file)
@@ -99,7 +99,8 @@ extern int gfs2_inode_refresh(struct gfs2_inode *ip);
 
 extern struct inode *gfs2_lookupi(struct inode *dir, const struct qstr *name,
                                  int is_root);
-extern int gfs2_permission(struct inode *inode, int mask);
+extern int gfs2_permission(struct user_namespace *mnt_userns,
+                          struct inode *inode, int mask);
 extern int gfs2_setattr_simple(struct inode *inode, struct iattr *attr);
 extern struct inode *gfs2_lookup_simple(struct inode *dip, const char *name);
 extern void gfs2_dinode_out(const struct gfs2_inode *ip, void *buf);
index 9f2b560..153272f 100644 (file)
@@ -284,7 +284,6 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
 {
        struct gfs2_sbd *sdp = gl->gl_name.ln_sbd;
        struct lm_lockstruct *ls = &sdp->sd_lockstruct;
-       int lvb_needs_unlock = 0;
        int error;
 
        if (gl->gl_lksb.sb_lkid == 0) {
@@ -297,13 +296,10 @@ static void gdlm_put_lock(struct gfs2_glock *gl)
        gfs2_sbstats_inc(gl, GFS2_LKS_DCOUNT);
        gfs2_update_request_times(gl);
 
-       /* don't want to skip dlm_unlock writing the lvb when lock is ex */
-
-       if (gl->gl_lksb.sb_lvbptr && (gl->gl_state == LM_ST_EXCLUSIVE))
-               lvb_needs_unlock = 1;
+       /* don't want to skip dlm_unlock writing the lvb when lock has one */
 
        if (test_bit(SDF_SKIP_DLM_UNLOCK, &sdp->sd_flags) &&
-           !lvb_needs_unlock) {
+           !gl->gl_lksb.sb_lvbptr) {
                gfs2_glock_free(gl);
                return;
        }
index 2e93140..6410281 100644 (file)
@@ -50,10 +50,12 @@ unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct)
        unsigned int blks;
        unsigned int first, second;
 
+       /* The initial struct gfs2_log_descriptor block */
        blks = 1;
        first = sdp->sd_ldptrs;
 
        if (nstruct > first) {
+               /* Subsequent struct gfs2_meta_header blocks */
                second = sdp->sd_inptrs;
                blks += DIV_ROUND_UP(nstruct - first, second);
        }
@@ -89,7 +91,7 @@ void gfs2_remove_from_ail(struct gfs2_bufdata *bd)
 
 static int gfs2_ail1_start_one(struct gfs2_sbd *sdp,
                               struct writeback_control *wbc,
-                              struct gfs2_trans *tr)
+                              struct gfs2_trans *tr, struct blk_plug *plug)
 __releases(&sdp->sd_ail_lock)
 __acquires(&sdp->sd_ail_lock)
 {
@@ -131,6 +133,11 @@ __acquires(&sdp->sd_ail_lock)
                        continue;
                spin_unlock(&sdp->sd_ail_lock);
                ret = generic_writepages(mapping, wbc);
+               if (need_resched()) {
+                       blk_finish_plug(plug);
+                       cond_resched();
+                       blk_start_plug(plug);
+               }
                spin_lock(&sdp->sd_ail_lock);
                if (ret == -ENODATA) /* if a jdata write into a new hole */
                        ret = 0; /* ignore it */
@@ -205,7 +212,7 @@ restart:
        list_for_each_entry_reverse(tr, head, tr_list) {
                if (wbc->nr_to_write <= 0)
                        break;
-               ret = gfs2_ail1_start_one(sdp, wbc, tr);
+               ret = gfs2_ail1_start_one(sdp, wbc, tr, &plug);
                if (ret) {
                        if (ret == -EBUSY)
                                goto restart;
@@ -240,6 +247,45 @@ static void gfs2_ail1_start(struct gfs2_sbd *sdp)
        return gfs2_ail1_flush(sdp, &wbc);
 }
 
+static void gfs2_log_update_flush_tail(struct gfs2_sbd *sdp)
+{
+       unsigned int new_flush_tail = sdp->sd_log_head;
+       struct gfs2_trans *tr;
+
+       if (!list_empty(&sdp->sd_ail1_list)) {
+               tr = list_last_entry(&sdp->sd_ail1_list,
+                                    struct gfs2_trans, tr_list);
+               new_flush_tail = tr->tr_first;
+       }
+       sdp->sd_log_flush_tail = new_flush_tail;
+}
+
+static void gfs2_log_update_head(struct gfs2_sbd *sdp)
+{
+       unsigned int new_head = sdp->sd_log_flush_head;
+
+       if (sdp->sd_log_flush_tail == sdp->sd_log_head)
+               sdp->sd_log_flush_tail = new_head;
+       sdp->sd_log_head = new_head;
+}
+
+/**
+ * gfs2_ail_empty_tr - empty one of the ail lists of a transaction
+ */
+
+static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+                             struct list_head *head)
+{
+       struct gfs2_bufdata *bd;
+
+       while (!list_empty(head)) {
+               bd = list_first_entry(head, struct gfs2_bufdata,
+                                     bd_ail_st_list);
+               gfs2_assert(sdp, bd->bd_tr == tr);
+               gfs2_remove_from_ail(bd);
+       }
+}
+
 /**
  * gfs2_ail1_empty_one - Check whether or not a trans in the AIL has been synced
  * @sdp: the filesystem
@@ -315,6 +361,7 @@ static int gfs2_ail1_empty(struct gfs2_sbd *sdp, int max_revokes)
                else
                        oldest_tr = 0;
        }
+       gfs2_log_update_flush_tail(sdp);
        ret = list_empty(&sdp->sd_ail1_list);
        spin_unlock(&sdp->sd_ail_lock);
 
@@ -348,47 +395,69 @@ static void gfs2_ail1_wait(struct gfs2_sbd *sdp)
        spin_unlock(&sdp->sd_ail_lock);
 }
 
-/**
- * gfs2_ail_empty_tr - empty one of the ail lists for a transaction
- */
-
-static void gfs2_ail_empty_tr(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
-                             struct list_head *head)
+static void __ail2_empty(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
 {
-       struct gfs2_bufdata *bd;
-
-       while (!list_empty(head)) {
-               bd = list_first_entry(head, struct gfs2_bufdata,
-                                     bd_ail_st_list);
-               gfs2_assert(sdp, bd->bd_tr == tr);
-               gfs2_remove_from_ail(bd);
-       }
+       gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
+       list_del(&tr->tr_list);
+       gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
+       gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
+       gfs2_trans_free(sdp, tr);
 }
 
 static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 {
-       struct gfs2_trans *tr, *safe;
+       struct list_head *ail2_list = &sdp->sd_ail2_list;
        unsigned int old_tail = sdp->sd_log_tail;
-       int wrap = (new_tail < old_tail);
-       int a, b, rm;
+       struct gfs2_trans *tr, *safe;
 
        spin_lock(&sdp->sd_ail_lock);
+       if (old_tail <= new_tail) {
+               list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
+                       if (old_tail <= tr->tr_first && tr->tr_first < new_tail)
+                               __ail2_empty(sdp, tr);
+               }
+       } else {
+               list_for_each_entry_safe(tr, safe, ail2_list, tr_list) {
+                       if (old_tail <= tr->tr_first || tr->tr_first < new_tail)
+                               __ail2_empty(sdp, tr);
+               }
+       }
+       spin_unlock(&sdp->sd_ail_lock);
+}
 
-       list_for_each_entry_safe(tr, safe, &sdp->sd_ail2_list, tr_list) {
-               a = (old_tail <= tr->tr_first);
-               b = (tr->tr_first < new_tail);
-               rm = (wrap) ? (a || b) : (a && b);
-               if (!rm)
-                       continue;
+/**
+ * gfs2_log_is_empty - Check if the log is empty
+ * @sdp: The GFS2 superblock
+ */
 
-               gfs2_ail_empty_tr(sdp, tr, &tr->tr_ail2_list);
-               list_del(&tr->tr_list);
-               gfs2_assert_warn(sdp, list_empty(&tr->tr_ail1_list));
-               gfs2_assert_warn(sdp, list_empty(&tr->tr_ail2_list));
-               gfs2_trans_free(sdp, tr);
+bool gfs2_log_is_empty(struct gfs2_sbd *sdp) {
+       return atomic_read(&sdp->sd_log_blks_free) == sdp->sd_jdesc->jd_blocks;
+}
+
+static bool __gfs2_log_try_reserve_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
+{
+       unsigned int available;
+
+       available = atomic_read(&sdp->sd_log_revokes_available);
+       while (available >= revokes) {
+               if (atomic_try_cmpxchg(&sdp->sd_log_revokes_available,
+                                      &available, available - revokes))
+                       return true;
        }
+       return false;
+}
 
-       spin_unlock(&sdp->sd_ail_lock);
+/**
+ * gfs2_log_release_revokes - Release a given number of revokes
+ * @sdp: The GFS2 superblock
+ * @revokes: The number of revokes to release
+ *
+ * sdp->sd_log_flush_lock must be held.
+ */
+void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes)
+{
+       if (revokes)
+               atomic_add(revokes, &sdp->sd_log_revokes_available);
 }
 
 /**
@@ -400,86 +469,141 @@ static void ail2_empty(struct gfs2_sbd *sdp, unsigned int new_tail)
 
 void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks)
 {
-
        atomic_add(blks, &sdp->sd_log_blks_free);
        trace_gfs2_log_blocks(sdp, blks);
        gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
                                  sdp->sd_jdesc->jd_blocks);
-       up_read(&sdp->sd_log_flush_lock);
+       if (atomic_read(&sdp->sd_log_blks_needed))
+               wake_up(&sdp->sd_log_waitq);
 }
 
 /**
- * gfs2_log_reserve - Make a log reservation
+ * __gfs2_log_try_reserve - Try to make a log reservation
  * @sdp: The GFS2 superblock
  * @blks: The number of blocks to reserve
+ * @taboo_blks: The number of blocks to leave free
  *
- * Note that we never give out the last few blocks of the journal. Thats
- * due to the fact that there is a small number of header blocks
- * associated with each log flush. The exact number can't be known until
- * flush time, so we ensure that we have just enough free blocks at all
- * times to avoid running out during a log flush.
+ * Try to do the same as __gfs2_log_reserve(), but fail if no more log
+ * space is immediately available.
+ */
+static bool __gfs2_log_try_reserve(struct gfs2_sbd *sdp, unsigned int blks,
+                                  unsigned int taboo_blks)
+{
+       unsigned wanted = blks + taboo_blks;
+       unsigned int free_blocks;
+
+       free_blocks = atomic_read(&sdp->sd_log_blks_free);
+       while (free_blocks >= wanted) {
+               if (atomic_try_cmpxchg(&sdp->sd_log_blks_free, &free_blocks,
+                                      free_blocks - blks)) {
+                       trace_gfs2_log_blocks(sdp, -blks);
+                       return true;
+               }
+       }
+       return false;
+}
+
+/**
+ * __gfs2_log_reserve - Make a log reservation
+ * @sdp: The GFS2 superblock
+ * @blks: The number of blocks to reserve
+ * @taboo_blks: The number of blocks to leave free
+ *
+ * @taboo_blks is set to 0 for logd, and to GFS2_LOG_FLUSH_MIN_BLOCKS
+ * for all other processes.  This ensures that when the log is almost full,
+ * logd will still be able to call gfs2_log_flush one more time  without
+ * blocking, which will advance the tail and make some more log space
+ * available.
  *
  * We no longer flush the log here, instead we wake up logd to do that
  * for us. To avoid the thundering herd and to ensure that we deal fairly
  * with queued waiters, we use an exclusive wait. This means that when we
  * get woken with enough journal space to get our reservation, we need to
  * wake the next waiter on the list.
- *
- * Returns: errno
  */
 
-int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks)
+static void __gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks,
+                              unsigned int taboo_blks)
 {
-       int ret = 0;
-       unsigned reserved_blks = 7 * (4096 / sdp->sd_vfs->s_blocksize);
-       unsigned wanted = blks + reserved_blks;
-       DEFINE_WAIT(wait);
-       int did_wait = 0;
+       unsigned wanted = blks + taboo_blks;
        unsigned int free_blocks;
 
-       if (gfs2_assert_warn(sdp, blks) ||
-           gfs2_assert_warn(sdp, blks <= sdp->sd_jdesc->jd_blocks))
-               return -EINVAL;
        atomic_add(blks, &sdp->sd_log_blks_needed);
-retry:
-       free_blocks = atomic_read(&sdp->sd_log_blks_free);
-       if (unlikely(free_blocks <= wanted)) {
-               do {
-                       prepare_to_wait_exclusive(&sdp->sd_log_waitq, &wait,
-                                       TASK_UNINTERRUPTIBLE);
+       for (;;) {
+               if (current != sdp->sd_logd_process)
                        wake_up(&sdp->sd_logd_waitq);
-                       did_wait = 1;
-                       if (atomic_read(&sdp->sd_log_blks_free) <= wanted)
-                               io_schedule();
-                       free_blocks = atomic_read(&sdp->sd_log_blks_free);
-               } while(free_blocks <= wanted);
-               finish_wait(&sdp->sd_log_waitq, &wait);
-       }
-       atomic_inc(&sdp->sd_reserving_log);
-       if (atomic_cmpxchg(&sdp->sd_log_blks_free, free_blocks,
-                               free_blocks - blks) != free_blocks) {
-               if (atomic_dec_and_test(&sdp->sd_reserving_log))
-                       wake_up(&sdp->sd_reserving_log_wait);
-               goto retry;
+               io_wait_event(sdp->sd_log_waitq,
+                       (free_blocks = atomic_read(&sdp->sd_log_blks_free),
+                        free_blocks >= wanted));
+               do {
+                       if (atomic_try_cmpxchg(&sdp->sd_log_blks_free,
+                                              &free_blocks,
+                                              free_blocks - blks))
+                               goto reserved;
+               } while (free_blocks >= wanted);
        }
-       atomic_sub(blks, &sdp->sd_log_blks_needed);
-       trace_gfs2_log_blocks(sdp, -blks);
 
-       /*
-        * If we waited, then so might others, wake them up _after_ we get
-        * our share of the log.
-        */
-       if (unlikely(did_wait))
+reserved:
+       trace_gfs2_log_blocks(sdp, -blks);
+       if (atomic_sub_return(blks, &sdp->sd_log_blks_needed))
                wake_up(&sdp->sd_log_waitq);
+}
+
+/**
+ * gfs2_log_try_reserve - Try to make a log reservation
+ * @sdp: The GFS2 superblock
+ * @tr: The transaction
+ * @extra_revokes: The number of additional revokes reserved (output)
+ *
+ * This is similar to gfs2_log_reserve, but sdp->sd_log_flush_lock must be
+ * held for correct revoke accounting.
+ */
+
+bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+                         unsigned int *extra_revokes)
+{
+       unsigned int blks = tr->tr_reserved;
+       unsigned int revokes = tr->tr_revokes;
+       unsigned int revoke_blks = 0;
+
+       *extra_revokes = 0;
+       if (revokes && !__gfs2_log_try_reserve_revokes(sdp, revokes)) {
+               revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
+               *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
+               blks += revoke_blks;
+       }
+       if (!blks)
+               return true;
+       if (__gfs2_log_try_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS))
+               return true;
+       if (!revoke_blks)
+               gfs2_log_release_revokes(sdp, revokes);
+       return false;
+}
 
-       down_read(&sdp->sd_log_flush_lock);
-       if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
-               gfs2_log_release(sdp, blks);
-               ret = -EROFS;
+/**
+ * gfs2_log_reserve - Make a log reservation
+ * @sdp: The GFS2 superblock
+ * @tr: The transaction
+ * @extra_revokes: The number of additional revokes reserved (output)
+ *
+ * sdp->sd_log_flush_lock must not be held.
+ */
+
+void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+                     unsigned int *extra_revokes)
+{
+       unsigned int blks = tr->tr_reserved;
+       unsigned int revokes = tr->tr_revokes;
+       unsigned int revoke_blks = 0;
+
+       *extra_revokes = 0;
+       if (revokes) {
+               revoke_blks = DIV_ROUND_UP(revokes, sdp->sd_inptrs);
+               *extra_revokes = revoke_blks * sdp->sd_inptrs - revokes;
+               blks += revoke_blks;
        }
-       if (atomic_dec_and_test(&sdp->sd_reserving_log))
-               wake_up(&sdp->sd_reserving_log_wait);
-       return ret;
+       __gfs2_log_reserve(sdp, blks, GFS2_LOG_FLUSH_MIN_BLOCKS);
 }
 
 /**
@@ -507,24 +631,20 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
 }
 
 /**
- * calc_reserved - Calculate the number of blocks to reserve when
- *                 refunding a transaction's unused buffers.
+ * calc_reserved - Calculate the number of blocks to keep reserved
  * @sdp: The GFS2 superblock
  *
  * This is complex.  We need to reserve room for all our currently used
- * metadata buffers (e.g. normal file I/O rewriting file time stamps) and 
- * all our journaled data buffers for journaled files (e.g. files in the 
+ * metadata blocks (e.g. normal file I/O rewriting file time stamps) and
+ * all our journaled data blocks for journaled files (e.g. files in the
  * meta_fs like rindex, or files for which chattr +j was done.)
- * If we don't reserve enough space, gfs2_log_refund and gfs2_log_flush
- * will count it as free space (sd_log_blks_free) and corruption will follow.
+ * If we don't reserve enough space, corruption will follow.
  *
- * We can have metadata bufs and jdata bufs in the same journal.  So each
- * type gets its own log header, for which we need to reserve a block.
- * In fact, each type has the potential for needing more than one header 
- * in cases where we have more buffers than will fit on a journal page.
+ * We can have metadata blocks and jdata blocks in the same journal.  Each
+ * type gets its own log descriptor, for which we need to reserve a block.
+ * In fact, each type has the potential for needing more than one log descriptor
+ * in cases where we have more blocks than will fit in a log descriptor.
  * Metadata journal entries take up half the space of journaled buffer entries.
- * Thus, metadata entries have buf_limit (502) and journaled buffers have
- * databuf_limit (251) before they cause a wrap around.
  *
  * Also, we need to reserve blocks for revoke journal entries and one for an
  * overall header for the lot.
@@ -533,59 +653,29 @@ static inline unsigned int log_distance(struct gfs2_sbd *sdp, unsigned int newer
  */
 static unsigned int calc_reserved(struct gfs2_sbd *sdp)
 {
-       unsigned int reserved = 0;
-       unsigned int mbuf;
-       unsigned int dbuf;
+       unsigned int reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
+       unsigned int blocks;
        struct gfs2_trans *tr = sdp->sd_log_tr;
 
        if (tr) {
-               mbuf = tr->tr_num_buf_new - tr->tr_num_buf_rm;
-               dbuf = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
-               reserved = mbuf + dbuf;
-               /* Account for header blocks */
-               reserved += DIV_ROUND_UP(mbuf, buf_limit(sdp));
-               reserved += DIV_ROUND_UP(dbuf, databuf_limit(sdp));
+               blocks = tr->tr_num_buf_new - tr->tr_num_buf_rm;
+               reserved += blocks + DIV_ROUND_UP(blocks, buf_limit(sdp));
+               blocks = tr->tr_num_databuf_new - tr->tr_num_databuf_rm;
+               reserved += blocks + DIV_ROUND_UP(blocks, databuf_limit(sdp));
        }
-
-       if (sdp->sd_log_committed_revoke > 0)
-               reserved += gfs2_struct2blk(sdp, sdp->sd_log_committed_revoke);
-       /* One for the overall header */
-       if (reserved)
-               reserved++;
        return reserved;
 }
 
-static unsigned int current_tail(struct gfs2_sbd *sdp)
-{
-       struct gfs2_trans *tr;
-       unsigned int tail;
-
-       spin_lock(&sdp->sd_ail_lock);
-
-       if (list_empty(&sdp->sd_ail1_list)) {
-               tail = sdp->sd_log_head;
-       } else {
-               tr = list_last_entry(&sdp->sd_ail1_list, struct gfs2_trans,
-                               tr_list);
-               tail = tr->tr_first;
-       }
-
-       spin_unlock(&sdp->sd_ail_lock);
-
-       return tail;
-}
-
-static void log_pull_tail(struct gfs2_sbd *sdp, unsigned int new_tail)
+static void log_pull_tail(struct gfs2_sbd *sdp)
 {
-       unsigned int dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
+       unsigned int new_tail = sdp->sd_log_flush_tail;
+       unsigned int dist;
 
+       if (new_tail == sdp->sd_log_tail)
+               return;
+       dist = log_distance(sdp, new_tail, sdp->sd_log_tail);
        ail2_empty(sdp, new_tail);
-
-       atomic_add(dist, &sdp->sd_log_blks_free);
-       trace_gfs2_log_blocks(sdp, dist);
-       gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
-                            sdp->sd_jdesc->jd_blocks);
-
+       gfs2_log_release(sdp, dist);
        sdp->sd_log_tail = new_tail;
 }
 
@@ -698,7 +788,7 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
 }
 
 /**
- * gfs2_write_revokes - Add as many revokes to the system transaction as we can
+ * gfs2_flush_revokes - Add as many revokes to the system transaction as we can
  * @sdp: The GFS2 superblock
  *
  * Our usual strategy is to defer writing revokes as much as we can in the hope
@@ -709,38 +799,14 @@ void gfs2_glock_remove_revoke(struct gfs2_glock *gl)
  * been written back.  This will basically come at no cost now, and will save
  * us from having to keep track of those blocks on the AIL2 list later.
  */
-void gfs2_write_revokes(struct gfs2_sbd *sdp)
+void gfs2_flush_revokes(struct gfs2_sbd *sdp)
 {
        /* number of revokes we still have room for */
-       int max_revokes = (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_log_descriptor)) / sizeof(u64);
+       unsigned int max_revokes = atomic_read(&sdp->sd_log_revokes_available);
 
        gfs2_log_lock(sdp);
-       while (sdp->sd_log_num_revoke > max_revokes)
-               max_revokes += (sdp->sd_sb.sb_bsize - sizeof(struct gfs2_meta_header)) / sizeof(u64);
-       max_revokes -= sdp->sd_log_num_revoke;
-       if (!sdp->sd_log_num_revoke) {
-               atomic_dec(&sdp->sd_log_blks_free);
-               /* If no blocks have been reserved, we need to also
-                * reserve a block for the header */
-               if (!sdp->sd_log_blks_reserved) {
-                       atomic_dec(&sdp->sd_log_blks_free);
-                       trace_gfs2_log_blocks(sdp, -2);
-               } else {
-                       trace_gfs2_log_blocks(sdp, -1);
-               }
-       }
        gfs2_ail1_empty(sdp, max_revokes);
        gfs2_log_unlock(sdp);
-
-       if (!sdp->sd_log_num_revoke) {
-               atomic_inc(&sdp->sd_log_blks_free);
-               if (!sdp->sd_log_blks_reserved) {
-                       atomic_inc(&sdp->sd_log_blks_free);
-                       trace_gfs2_log_blocks(sdp, 2);
-               } else {
-                       trace_gfs2_log_blocks(sdp, 1);
-               }
-       }
 }
 
 /**
@@ -769,7 +835,7 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
        u64 dblock;
 
        if (gfs2_withdrawn(sdp))
-               goto out;
+               return;
 
        page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
        lh = page_address(page);
@@ -822,10 +888,8 @@ void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
                     sb->s_blocksize - LH_V1_SIZE - 4);
        lh->lh_crc = cpu_to_be32(crc);
 
-       gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
-       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE | op_flags);
-out:
-       log_flush_wait(sdp);
+       gfs2_log_write(sdp, jd, page, sb->s_blocksize, 0, dblock);
+       gfs2_log_submit_bio(&jd->jd_log_bio, REQ_OP_WRITE | op_flags);
 }
 
 /**
@@ -838,25 +902,24 @@ out:
 
 static void log_write_header(struct gfs2_sbd *sdp, u32 flags)
 {
-       unsigned int tail;
        int op_flags = REQ_PREFLUSH | REQ_FUA | REQ_META | REQ_SYNC;
        enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
 
        gfs2_assert_withdraw(sdp, (state != SFS_FROZEN));
-       tail = current_tail(sdp);
 
        if (test_bit(SDF_NOBARRIERS, &sdp->sd_flags)) {
                gfs2_ordered_wait(sdp);
                log_flush_wait(sdp);
                op_flags = REQ_SYNC | REQ_META | REQ_PRIO;
        }
-       sdp->sd_log_idle = (tail == sdp->sd_log_flush_head);
-       gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++, tail,
-                             sdp->sd_log_flush_head, flags, op_flags);
+       sdp->sd_log_idle = (sdp->sd_log_flush_tail == sdp->sd_log_flush_head);
+       gfs2_write_log_header(sdp, sdp->sd_jdesc, sdp->sd_log_sequence++,
+                             sdp->sd_log_flush_tail, sdp->sd_log_flush_head,
+                             flags, op_flags);
        gfs2_log_incr_head(sdp);
-
-       if (sdp->sd_log_tail != tail)
-               log_pull_tail(sdp, tail);
+       log_flush_wait(sdp);
+       log_pull_tail(sdp);
+       gfs2_log_update_head(sdp);
 }
 
 /**
@@ -935,12 +998,16 @@ static void trans_drain(struct gfs2_trans *tr)
        while (!list_empty(head)) {
                bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
                list_del_init(&bd->bd_list);
+               if (!list_empty(&bd->bd_ail_st_list))
+                       gfs2_remove_from_ail(bd);
                kmem_cache_free(gfs2_bufdata_cachep, bd);
        }
        head = &tr->tr_databuf;
        while (!list_empty(head)) {
                bd = list_first_entry(head, struct gfs2_bufdata, bd_list);
                list_del_init(&bd->bd_list);
+               if (!list_empty(&bd->bd_ail_st_list))
+                       gfs2_remove_from_ail(bd);
                kmem_cache_free(gfs2_bufdata_cachep, bd);
        }
 }
@@ -956,42 +1023,66 @@ static void trans_drain(struct gfs2_trans *tr)
 void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
 {
        struct gfs2_trans *tr = NULL;
+       unsigned int reserved_blocks = 0, used_blocks = 0;
        enum gfs2_freeze_state state = atomic_read(&sdp->sd_freeze_state);
+       unsigned int first_log_head;
+       unsigned int reserved_revokes = 0;
 
        down_write(&sdp->sd_log_flush_lock);
+       trace_gfs2_log_flush(sdp, 1, flags);
 
+repeat:
        /*
         * Do this check while holding the log_flush_lock to prevent new
         * buffers from being added to the ail via gfs2_pin()
         */
-       if (gfs2_withdrawn(sdp))
+       if (gfs2_withdrawn(sdp) || !test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                goto out;
 
        /* Log might have been flushed while we waited for the flush lock */
        if (gl && !test_bit(GLF_LFLUSH, &gl->gl_flags))
                goto out;
-       trace_gfs2_log_flush(sdp, 1, flags);
 
-       if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
-               clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+       first_log_head = sdp->sd_log_head;
+       sdp->sd_log_flush_head = first_log_head;
 
-       sdp->sd_log_flush_head = sdp->sd_log_head;
        tr = sdp->sd_log_tr;
-       if (tr) {
-               sdp->sd_log_tr = NULL;
-               tr->tr_first = sdp->sd_log_flush_head;
-               if (unlikely (state == SFS_FROZEN))
-                       if (gfs2_assert_withdraw_delayed(sdp,
-                              !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
-                               goto out_withdraw;
+       if (tr || sdp->sd_log_num_revoke) {
+               if (reserved_blocks)
+                       gfs2_log_release(sdp, reserved_blocks);
+               reserved_blocks = sdp->sd_log_blks_reserved;
+               reserved_revokes = sdp->sd_log_num_revoke;
+               if (tr) {
+                       sdp->sd_log_tr = NULL;
+                       tr->tr_first = first_log_head;
+                       if (unlikely (state == SFS_FROZEN)) {
+                               if (gfs2_assert_withdraw_delayed(sdp,
+                                      !tr->tr_num_buf_new && !tr->tr_num_databuf_new))
+                                       goto out_withdraw;
+                       }
+               }
+       } else if (!reserved_blocks) {
+               unsigned int taboo_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
+
+               reserved_blocks = GFS2_LOG_FLUSH_MIN_BLOCKS;
+               if (current == sdp->sd_logd_process)
+                       taboo_blocks = 0;
+
+               if (!__gfs2_log_try_reserve(sdp, reserved_blocks, taboo_blocks)) {
+                       up_write(&sdp->sd_log_flush_lock);
+                       __gfs2_log_reserve(sdp, reserved_blocks, taboo_blocks);
+                       down_write(&sdp->sd_log_flush_lock);
+                       goto repeat;
+               }
+               BUG_ON(sdp->sd_log_num_revoke);
        }
 
+       if (flags & GFS2_LOG_HEAD_FLUSH_SHUTDOWN)
+               clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
+
        if (unlikely(state == SFS_FROZEN))
-               if (gfs2_assert_withdraw_delayed(sdp, !sdp->sd_log_num_revoke))
+               if (gfs2_assert_withdraw_delayed(sdp, !reserved_revokes))
                        goto out_withdraw;
-       if (gfs2_assert_withdraw_delayed(sdp,
-                       sdp->sd_log_num_revoke == sdp->sd_log_committed_revoke))
-               goto out_withdraw;
 
        gfs2_ordered_write(sdp);
        if (gfs2_withdrawn(sdp))
@@ -999,16 +1090,13 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
        lops_before_commit(sdp, tr);
        if (gfs2_withdrawn(sdp))
                goto out_withdraw;
-       gfs2_log_submit_bio(&sdp->sd_log_bio, REQ_OP_WRITE);
+       gfs2_log_submit_bio(&sdp->sd_jdesc->jd_log_bio, REQ_OP_WRITE);
        if (gfs2_withdrawn(sdp))
                goto out_withdraw;
 
        if (sdp->sd_log_head != sdp->sd_log_flush_head) {
-               log_flush_wait(sdp);
                log_write_header(sdp, flags);
-       } else if (sdp->sd_log_tail != current_tail(sdp) && !sdp->sd_log_idle){
-               atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
-               trace_gfs2_log_blocks(sdp, -1);
+       } else if (sdp->sd_log_tail != sdp->sd_log_flush_tail && !sdp->sd_log_idle) {
                log_write_header(sdp, flags);
        }
        if (gfs2_withdrawn(sdp))
@@ -1016,9 +1104,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
        lops_after_commit(sdp, tr);
 
        gfs2_log_lock(sdp);
-       sdp->sd_log_head = sdp->sd_log_flush_head;
        sdp->sd_log_blks_reserved = 0;
-       sdp->sd_log_committed_revoke = 0;
 
        spin_lock(&sdp->sd_ail_lock);
        if (tr && !list_empty(&tr->tr_ail1_list)) {
@@ -1033,10 +1119,7 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
                        empty_ail1_list(sdp);
                        if (gfs2_withdrawn(sdp))
                                goto out_withdraw;
-                       atomic_dec(&sdp->sd_log_blks_free); /* Adjust for unreserved buffer */
-                       trace_gfs2_log_blocks(sdp, -1);
                        log_write_header(sdp, flags);
-                       sdp->sd_log_head = sdp->sd_log_flush_head;
                }
                if (flags & (GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
                             GFS2_LOG_HEAD_FLUSH_FREEZE))
@@ -1046,12 +1129,22 @@ void gfs2_log_flush(struct gfs2_sbd *sdp, struct gfs2_glock *gl, u32 flags)
        }
 
 out_end:
-       trace_gfs2_log_flush(sdp, 0, flags);
+       used_blocks = log_distance(sdp, sdp->sd_log_flush_head, first_log_head);
+       reserved_revokes += atomic_read(&sdp->sd_log_revokes_available);
+       atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
+       gfs2_assert_withdraw(sdp, reserved_revokes % sdp->sd_inptrs == sdp->sd_ldptrs);
+       if (reserved_revokes > sdp->sd_ldptrs)
+               reserved_blocks += (reserved_revokes - sdp->sd_ldptrs) / sdp->sd_inptrs;
 out:
+       if (used_blocks != reserved_blocks) {
+               gfs2_assert_withdraw_delayed(sdp, used_blocks < reserved_blocks);
+               gfs2_log_release(sdp, reserved_blocks - used_blocks);
+       }
        up_write(&sdp->sd_log_flush_lock);
        gfs2_trans_free(sdp, tr);
        if (gfs2_withdrawing(sdp))
                gfs2_withdraw(sdp);
+       trace_gfs2_log_flush(sdp, 0, flags);
        return;
 
 out_withdraw:
@@ -1087,8 +1180,8 @@ static void gfs2_merge_trans(struct gfs2_sbd *sdp, struct gfs2_trans *new)
        old->tr_num_databuf_new += new->tr_num_databuf_new;
        old->tr_num_buf_rm      += new->tr_num_buf_rm;
        old->tr_num_databuf_rm  += new->tr_num_databuf_rm;
+       old->tr_revokes         += new->tr_revokes;
        old->tr_num_revoke      += new->tr_num_revoke;
-       old->tr_num_revoke_rm   += new->tr_num_revoke_rm;
 
        list_splice_tail_init(&new->tr_databuf, &old->tr_databuf);
        list_splice_tail_init(&new->tr_buf, &old->tr_buf);
@@ -1110,20 +1203,17 @@ static void log_refund(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
        if (sdp->sd_log_tr) {
                gfs2_merge_trans(sdp, tr);
        } else if (tr->tr_num_buf_new || tr->tr_num_databuf_new) {
-               gfs2_assert_withdraw(sdp, test_bit(TR_ALLOCED, &tr->tr_flags));
+               gfs2_assert_withdraw(sdp, !test_bit(TR_ONSTACK, &tr->tr_flags));
                sdp->sd_log_tr = tr;
                set_bit(TR_ATTACHED, &tr->tr_flags);
        }
 
-       sdp->sd_log_committed_revoke += tr->tr_num_revoke - tr->tr_num_revoke_rm;
        reserved = calc_reserved(sdp);
        maxres = sdp->sd_log_blks_reserved + tr->tr_reserved;
        gfs2_assert_withdraw(sdp, maxres >= reserved);
        unused = maxres - reserved;
-       atomic_add(unused, &sdp->sd_log_blks_free);
-       trace_gfs2_log_blocks(sdp, unused);
-       gfs2_assert_withdraw(sdp, atomic_read(&sdp->sd_log_blks_free) <=
-                            sdp->sd_jdesc->jd_blocks);
+       if (unused)
+               gfs2_log_release(sdp, unused);
        sdp->sd_log_blks_reserved = reserved;
 
        gfs2_log_unlock(sdp);
@@ -1166,15 +1256,11 @@ static void gfs2_log_shutdown(struct gfs2_sbd *sdp)
        gfs2_assert_withdraw(sdp, !sdp->sd_log_num_revoke);
        gfs2_assert_withdraw(sdp, list_empty(&sdp->sd_ail1_list));
 
-       sdp->sd_log_flush_head = sdp->sd_log_head;
-
        log_write_header(sdp, GFS2_LOG_HEAD_UNMOUNT | GFS2_LFC_SHUTDOWN);
+       log_pull_tail(sdp);
 
        gfs2_assert_warn(sdp, sdp->sd_log_head == sdp->sd_log_tail);
        gfs2_assert_warn(sdp, list_empty(&sdp->sd_ail2_list));
-
-       sdp->sd_log_head = sdp->sd_log_flush_head;
-       sdp->sd_log_tail = sdp->sd_log_head;
 }
 
 static inline int gfs2_jrnl_flush_reqd(struct gfs2_sbd *sdp)
@@ -1208,7 +1294,6 @@ int gfs2_logd(void *data)
        struct gfs2_sbd *sdp = data;
        unsigned long t = 1;
        DEFINE_WAIT(wait);
-       bool did_flush;
 
        while (!kthread_should_stop()) {
 
@@ -1227,12 +1312,10 @@ int gfs2_logd(void *data)
                        continue;
                }
 
-               did_flush = false;
                if (gfs2_jrnl_flush_reqd(sdp) || t == 0) {
                        gfs2_ail1_empty(sdp, 0);
                        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
-                                      GFS2_LFC_LOGD_JFLUSH_REQD);
-                       did_flush = true;
+                                                 GFS2_LFC_LOGD_JFLUSH_REQD);
                }
 
                if (gfs2_ail_flush_reqd(sdp)) {
@@ -1240,13 +1323,9 @@ int gfs2_logd(void *data)
                        gfs2_ail1_wait(sdp);
                        gfs2_ail1_empty(sdp, 0);
                        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
-                                      GFS2_LFC_LOGD_AIL_FLUSH_REQD);
-                       did_flush = true;
+                                                 GFS2_LFC_LOGD_AIL_FLUSH_REQD);
                }
 
-               if (!gfs2_ail_flush_reqd(sdp) || did_flush)
-                       wake_up(&sdp->sd_log_waitq);
-
                t = gfs2_tune_get(sdp, gt_logd_secs) * HZ;
 
                try_to_freeze();
index 79f9729..eea5801 100644 (file)
 #include "incore.h"
 #include "inode.h"
 
+/*
+ * The minimum amount of log space required for a log flush is one block for
+ * revokes and one block for the log header.  Log flushes other than
+ * GFS2_LOG_HEAD_FLUSH_NORMAL may write one or two more log headers.
+ */
+#define GFS2_LOG_FLUSH_MIN_BLOCKS 4
+
 /**
  * gfs2_log_lock - acquire the right to mess with the log manager
  * @sdp: the filesystem
@@ -43,7 +50,9 @@ static inline void gfs2_log_pointers_init(struct gfs2_sbd *sdp,
        if (++value == sdp->sd_jdesc->jd_blocks) {
                value = 0;
        }
-       sdp->sd_log_head = sdp->sd_log_tail = value;
+       sdp->sd_log_tail = value;
+       sdp->sd_log_flush_tail = value;
+       sdp->sd_log_head = value;
 }
 
 static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
@@ -64,8 +73,13 @@ static inline void gfs2_ordered_add_inode(struct gfs2_inode *ip)
 extern void gfs2_ordered_del_inode(struct gfs2_inode *ip);
 extern unsigned int gfs2_struct2blk(struct gfs2_sbd *sdp, unsigned int nstruct);
 extern void gfs2_remove_from_ail(struct gfs2_bufdata *bd);
+extern bool gfs2_log_is_empty(struct gfs2_sbd *sdp);
+extern void gfs2_log_release_revokes(struct gfs2_sbd *sdp, unsigned int revokes);
 extern void gfs2_log_release(struct gfs2_sbd *sdp, unsigned int blks);
-extern int gfs2_log_reserve(struct gfs2_sbd *sdp, unsigned int blks);
+extern bool gfs2_log_try_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+                                unsigned int *extra_revokes);
+extern void gfs2_log_reserve(struct gfs2_sbd *sdp, struct gfs2_trans *tr,
+                            unsigned int *extra_revokes);
 extern void gfs2_write_log_header(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
                                  u64 seq, u32 tail, u32 lblock, u32 flags,
                                  int op_flags);
@@ -78,6 +92,6 @@ extern void log_flush_wait(struct gfs2_sbd *sdp);
 extern int gfs2_logd(void *data);
 extern void gfs2_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd);
 extern void gfs2_glock_remove_revoke(struct gfs2_glock *gl);
-extern void gfs2_write_revokes(struct gfs2_sbd *sdp);
+extern void gfs2_flush_revokes(struct gfs2_sbd *sdp);
 
 #endif /* __LOG_DOT_H__ */
index 3922b26..a82f474 100644 (file)
@@ -76,15 +76,20 @@ static void maybe_release_space(struct gfs2_bufdata *bd)
        unsigned int index = bd->bd_bh->b_blocknr - gl->gl_name.ln_number;
        struct gfs2_bitmap *bi = rgd->rd_bits + index;
 
+       rgrp_lock_local(rgd);
        if (bi->bi_clone == NULL)
-               return;
+               goto out;
        if (sdp->sd_args.ar_discard)
                gfs2_rgrp_send_discards(sdp, rgd->rd_data0, bd->bd_bh, bi, 1, NULL);
        memcpy(bi->bi_clone + bi->bi_offset,
               bd->bd_bh->b_data + bi->bi_offset, bi->bi_bytes);
        clear_bit(GBF_FULL, &bi->bi_flags);
        rgd->rd_free_clone = rgd->rd_free;
+       BUG_ON(rgd->rd_free_clone < rgd->rd_reserved);
        rgd->rd_extfail_pt = rgd->rd_free;
+
+out:
+       rgrp_unlock_local(rgd);
 }
 
 /**
@@ -262,7 +267,7 @@ static struct bio *gfs2_log_alloc_bio(struct gfs2_sbd *sdp, u64 blkno,
                                      bio_end_io_t *end_io)
 {
        struct super_block *sb = sdp->sd_vfs;
-       struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_PAGES);
+       struct bio *bio = bio_alloc(GFP_NOIO, BIO_MAX_VECS);
 
        bio->bi_iter.bi_sector = blkno << sdp->sd_fsb2bb_shift;
        bio_set_dev(bio, sb->s_bdev);
@@ -322,17 +327,18 @@ static struct bio *gfs2_log_get_bio(struct gfs2_sbd *sdp, u64 blkno,
  * then add the page segment to that.
  */
 
-void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
-                   unsigned size, unsigned offset, u64 blkno)
+void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+                   struct page *page, unsigned size, unsigned offset,
+                   u64 blkno)
 {
        struct bio *bio;
        int ret;
 
-       bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio, REQ_OP_WRITE,
+       bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio, REQ_OP_WRITE,
                               gfs2_end_log_write, false);
        ret = bio_add_page(bio, page, size, offset);
        if (ret == 0) {
-               bio = gfs2_log_get_bio(sdp, blkno, &sdp->sd_log_bio,
+               bio = gfs2_log_get_bio(sdp, blkno, &jd->jd_log_bio,
                                       REQ_OP_WRITE, gfs2_end_log_write, true);
                ret = bio_add_page(bio, page, size, offset);
                WARN_ON(ret == 0);
@@ -355,7 +361,8 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
 
        dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
        gfs2_log_incr_head(sdp);
-       gfs2_log_write(sdp, bh->b_page, bh->b_size, bh_offset(bh), dblock);
+       gfs2_log_write(sdp, sdp->sd_jdesc, bh->b_page, bh->b_size,
+                      bh_offset(bh), dblock);
 }
 
 /**
@@ -369,14 +376,14 @@ static void gfs2_log_write_bh(struct gfs2_sbd *sdp, struct buffer_head *bh)
  * the page may be freed at any time.
  */
 
-void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
+static void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page)
 {
        struct super_block *sb = sdp->sd_vfs;
        u64 dblock;
 
        dblock = gfs2_log_bmap(sdp->sd_jdesc, sdp->sd_log_flush_head);
        gfs2_log_incr_head(sdp);
-       gfs2_log_write(sdp, page, sb->s_blocksize, 0, dblock);
+       gfs2_log_write(sdp, sdp->sd_jdesc, page, sb->s_blocksize, 0, dblock);
 }
 
 /**
@@ -845,7 +852,7 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
        struct page *page;
        unsigned int length;
 
-       gfs2_write_revokes(sdp);
+       gfs2_flush_revokes(sdp);
        if (!sdp->sd_log_num_revoke)
                return;
 
@@ -857,7 +864,6 @@ static void revoke_lo_before_commit(struct gfs2_sbd *sdp, struct gfs2_trans *tr)
                sdp->sd_log_num_revoke--;
 
                if (offset + sizeof(u64) > sdp->sd_sb.sb_bsize) {
-
                        gfs2_log_write_page(sdp, page);
                        page = mempool_alloc(gfs2_page_pool, GFP_NOIO);
                        mh = page_address(page);
index fbdbb08..31b6dd0 100644 (file)
 #include <linux/list.h>
 #include "incore.h"
 
-#define BUF_OFFSET \
-       ((sizeof(struct gfs2_log_descriptor) + sizeof(__be64) - 1) & \
-        ~(sizeof(__be64) - 1))
-#define DATABUF_OFFSET \
-       ((sizeof(struct gfs2_log_descriptor) + (2 * sizeof(__be64) - 1)) & \
-        ~(2 * sizeof(__be64) - 1))
-
 extern const struct gfs2_log_operations *gfs2_log_ops[];
 extern void gfs2_log_incr_head(struct gfs2_sbd *sdp);
 extern u64 gfs2_log_bmap(struct gfs2_jdesc *jd, unsigned int lbn);
-extern void gfs2_log_write(struct gfs2_sbd *sdp, struct page *page,
-                          unsigned size, unsigned offset, u64 blkno);
-extern void gfs2_log_write_page(struct gfs2_sbd *sdp, struct page *page);
+extern void gfs2_log_write(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
+                          struct page *page, unsigned size, unsigned offset,
+                          u64 blkno);
 extern void gfs2_log_submit_bio(struct bio **biop, int opf);
 extern void gfs2_pin(struct gfs2_sbd *sdp, struct buffer_head *bh);
 extern int gfs2_find_jhead(struct gfs2_jdesc *jd,
                           struct gfs2_log_header_host *head, bool keep_cache);
 static inline unsigned int buf_limit(struct gfs2_sbd *sdp)
 {
-       unsigned int limit;
-
-       limit = (sdp->sd_sb.sb_bsize - BUF_OFFSET) / sizeof(__be64);
-       return limit;
+       return sdp->sd_ldptrs;
 }
 
 static inline unsigned int databuf_limit(struct gfs2_sbd *sdp)
 {
-       unsigned int limit;
-
-       limit = (sdp->sd_sb.sb_bsize - DATABUF_OFFSET) / (2 * sizeof(__be64));
-       return limit;
+       return sdp->sd_ldptrs / 2;
 }
 
 static inline void lops_before_commit(struct gfs2_sbd *sdp,
index c7393ee..28d0eb2 100644 (file)
@@ -98,7 +98,7 @@ static int __init init_gfs2_fs(void)
        error = -ENOMEM;
        gfs2_glock_cachep = kmem_cache_create("gfs2_glock",
                                              sizeof(struct gfs2_glock),
-                                             0, 0,
+                                             0, SLAB_RECLAIM_ACCOUNT,
                                              gfs2_init_glock_once);
        if (!gfs2_glock_cachep)
                goto fail_cachep1;
@@ -134,7 +134,7 @@ static int __init init_gfs2_fs(void)
 
        gfs2_quotad_cachep = kmem_cache_create("gfs2_quotad",
                                               sizeof(struct gfs2_quota_data),
-                                              0, 0, NULL);
+                                              0, SLAB_RECLAIM_ACCOUNT, NULL);
        if (!gfs2_quotad_cachep)
                goto fail_cachep6;
 
index 61fce59..aa41360 100644 (file)
@@ -136,8 +136,6 @@ static struct gfs2_sbd *init_sbd(struct super_block *sb)
 
        init_rwsem(&sdp->sd_log_flush_lock);
        atomic_set(&sdp->sd_log_in_flight, 0);
-       atomic_set(&sdp->sd_reserving_log, 0);
-       init_waitqueue_head(&sdp->sd_reserving_log_wait);
        init_waitqueue_head(&sdp->sd_log_flush_wait);
        atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
        mutex_init(&sdp->sd_freeze_mutex);
@@ -171,7 +169,8 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
                return -EINVAL;
        }
 
-       if (sb->sb_fs_format != GFS2_FORMAT_FS ||
+       if (sb->sb_fs_format < GFS2_FS_FORMAT_MIN ||
+           sb->sb_fs_format > GFS2_FS_FORMAT_MAX ||
            sb->sb_multihost_format != GFS2_FORMAT_MULTI) {
                fs_warn(sdp, "Unknown on-disk format, unable to mount\n");
                return -EINVAL;
@@ -179,7 +178,7 @@ static int gfs2_check_sb(struct gfs2_sbd *sdp, int silent)
 
        if (sb->sb_bsize < 512 || sb->sb_bsize > PAGE_SIZE ||
            (sb->sb_bsize & (sb->sb_bsize - 1))) {
-               pr_warn("Invalid superblock size\n");
+               pr_warn("Invalid block size\n");
                return -EINVAL;
        }
 
@@ -317,6 +316,13 @@ static int gfs2_read_sb(struct gfs2_sbd *sdp, int silent)
                                     sizeof(struct gfs2_meta_header))
                * GFS2_NBBY; /* not the rgrp bitmap, subsequent bitmaps only */
 
+       /*
+        * We always keep at least one block reserved for revokes in
+        * transactions.  This greatly simplifies allocating additional
+        * revoke blocks.
+        */
+       atomic_set(&sdp->sd_log_revokes_available, sdp->sd_ldptrs);
+
        /* Compute maximum reservation required to add a entry to a directory */
 
        hash_blocks = DIV_ROUND_UP(sizeof(u64) * BIT(GFS2_DIR_MAX_DEPTH),
@@ -488,6 +494,19 @@ static int init_sb(struct gfs2_sbd *sdp, int silent)
                goto out;
        }
 
+       switch(sdp->sd_sb.sb_fs_format) {
+       case GFS2_FS_FORMAT_MAX:
+               sb->s_xattr = gfs2_xattr_handlers_max;
+               break;
+
+       case GFS2_FS_FORMAT_MIN:
+               sb->s_xattr = gfs2_xattr_handlers_min;
+               break;
+
+       default:
+               BUG();
+       }
+
        /* Set up the buffer cache and SB for real */
        if (sdp->sd_sb.sb_bsize < bdev_logical_block_size(sb->s_bdev)) {
                ret = -EINVAL;
@@ -1032,13 +1051,14 @@ hostdata_error:
        }
 
        if (lm->lm_mount == NULL) {
-               fs_info(sdp, "Now mounting FS...\n");
+               fs_info(sdp, "Now mounting FS (format %u)...\n", sdp->sd_sb.sb_fs_format);
                complete_all(&sdp->sd_locking_init);
                return 0;
        }
        ret = lm->lm_mount(sdp, table);
        if (ret == 0)
-               fs_info(sdp, "Joined cluster. Now mounting FS...\n");
+               fs_info(sdp, "Joined cluster. Now mounting FS (format %u)...\n",
+                       sdp->sd_sb.sb_fs_format);
        complete_all(&sdp->sd_locking_init);
        return ret;
 }
@@ -1084,6 +1104,7 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
        int silent = fc->sb_flags & SB_SILENT;
        struct gfs2_sbd *sdp;
        struct gfs2_holder mount_gh;
+       struct gfs2_holder freeze_gh;
        int error;
 
        sdp = init_sbd(sb);
@@ -1107,7 +1128,6 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
        sb->s_op = &gfs2_super_ops;
        sb->s_d_op = &gfs2_dops;
        sb->s_export_op = &gfs2_export_ops;
-       sb->s_xattr = gfs2_xattr_handlers;
        sb->s_qcop = &gfs2_quotactl_ops;
        sb->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP;
        sb_dqopt(sb)->flags |= DQUOT_QUOTA_SYS_FILE;
@@ -1156,6 +1176,10 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
        if (error)
                goto fail_locking;
 
+       /* Turn rgrplvb on by default if fs format is recent enough */
+       if (!sdp->sd_args.ar_got_rgrplvb && sdp->sd_sb.sb_fs_format > 1801)
+               sdp->sd_args.ar_rgrplvb = 1;
+
        error = wait_on_journal(sdp);
        if (error)
                goto fail_sb;
@@ -1195,25 +1219,18 @@ static int gfs2_fill_super(struct super_block *sb, struct fs_context *fc)
                goto fail_per_node;
        }
 
-       if (sb_rdonly(sb)) {
-               struct gfs2_holder freeze_gh;
+       error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
+       if (error)
+               goto fail_per_node;
 
-               error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
-                                          LM_FLAG_NOEXP | GL_EXACT,
-                                          &freeze_gh);
-               if (error) {
-                       fs_err(sdp, "can't make FS RO: %d\n", error);
-                       goto fail_per_node;
-               }
-               gfs2_glock_dq_uninit(&freeze_gh);
-       } else {
+       if (!sb_rdonly(sb))
                error = gfs2_make_fs_rw(sdp);
-               if (error) {
-                       fs_err(sdp, "can't make FS RW: %d\n", error);
-                       goto fail_per_node;
-               }
-       }
 
+       gfs2_freeze_unlock(&freeze_gh);
+       if (error) {
+               fs_err(sdp, "can't make FS RW: %d\n", error);
+               goto fail_per_node;
+       }
        gfs2_glock_dq_uninit(&mount_gh);
        gfs2_online_uevent(sdp);
        return 0;
@@ -1456,6 +1473,7 @@ static int gfs2_parse_param(struct fs_context *fc, struct fs_parameter *param)
                break;
        case Opt_rgrplvb:
                args->ar_rgrplvb = result.boolean;
+               args->ar_got_rgrplvb = 1;
                break;
        case Opt_loccookie:
                args->ar_loccookie = result.boolean;
@@ -1514,15 +1532,20 @@ static int gfs2_reconfigure(struct fs_context *fc)
                fc->sb_flags |= SB_RDONLY;
 
        if ((sb->s_flags ^ fc->sb_flags) & SB_RDONLY) {
+               struct gfs2_holder freeze_gh;
+
+               error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
+               if (error)
+                       return -EINVAL;
+
                if (fc->sb_flags & SB_RDONLY) {
-                       error = gfs2_make_fs_ro(sdp);
-                       if (error)
-                               errorfc(fc, "unable to remount read-only");
+                       gfs2_make_fs_ro(sdp);
                } else {
                        error = gfs2_make_fs_rw(sdp);
                        if (error)
                                errorfc(fc, "unable to remount read-write");
                }
+               gfs2_freeze_unlock(&freeze_gh);
        }
        sdp->sd_args = *newargs;
 
index c26c68e..2821737 100644 (file)
@@ -470,9 +470,7 @@ void gfs2_recover_func(struct work_struct *work)
 
                /* Acquire a shared hold on the freeze lock */
 
-               error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
-                                          LM_FLAG_NOEXP | LM_FLAG_PRIORITY |
-                                          GL_EXACT, &thaw_gh);
+               error = gfs2_freeze_lock(sdp, &thaw_gh, LM_FLAG_PRIORITY);
                if (error)
                        goto fail_gunlock_ji;
 
@@ -507,22 +505,24 @@ void gfs2_recover_func(struct work_struct *work)
 
                /* We take the sd_log_flush_lock here primarily to prevent log
                 * flushes and simultaneous journal replays from stomping on
-                * each other wrt sd_log_bio. */
+                * each other wrt jd_log_bio. */
                down_read(&sdp->sd_log_flush_lock);
                for (pass = 0; pass < 2; pass++) {
                        lops_before_scan(jd, &head, pass);
                        error = foreach_descriptor(jd, head.lh_tail,
                                                   head.lh_blkno, pass);
                        lops_after_scan(jd, error, pass);
-                       if (error)
+                       if (error) {
+                               up_read(&sdp->sd_log_flush_lock);
                                goto fail_gunlock_thaw;
+                       }
                }
 
                recover_local_statfs(jd, &head);
                clean_journal(jd, &head);
                up_read(&sdp->sd_log_flush_lock);
 
-               gfs2_glock_dq_uninit(&thaw_gh);
+               gfs2_freeze_unlock(&thaw_gh);
                t_rep = ktime_get();
                fs_info(sdp, "jid=%u: Journal replayed in %lldms [jlck:%lldms, "
                        "jhead:%lldms, tlck:%lldms, replay:%lldms]\n",
@@ -544,7 +544,7 @@ void gfs2_recover_func(struct work_struct *work)
        goto done;
 
 fail_gunlock_thaw:
-       gfs2_glock_dq_uninit(&thaw_gh);
+       gfs2_freeze_unlock(&thaw_gh);
 fail_gunlock_ji:
        if (jlocked) {
                gfs2_glock_dq_uninit(&ji_gh);
index 5e8eef9..89c37a8 100644 (file)
 #define BFITNOENT ((u32)~0)
 #define NO_BLOCK ((u64)~0)
 
+struct gfs2_rbm {
+       struct gfs2_rgrpd *rgd;
+       u32 offset;             /* The offset is bitmap relative */
+       int bii;                /* Bitmap index */
+};
+
+static inline struct gfs2_bitmap *rbm_bi(const struct gfs2_rbm *rbm)
+{
+       return rbm->rgd->rd_bits + rbm->bii;
+}
+
+static inline u64 gfs2_rbm_to_block(const struct gfs2_rbm *rbm)
+{
+       BUG_ON(rbm->offset >= rbm->rgd->rd_data);
+       return rbm->rgd->rd_data0 + (rbm_bi(rbm)->bi_start * GFS2_NBBY) +
+               rbm->offset;
+}
+
 /*
  * These routines are used by the resource group routines (rgrp.c)
  * to keep track of block allocation.  Each block is represented by two
@@ -61,7 +79,7 @@ static const char valid_change[16] = {
 };
 
 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
-                        const struct gfs2_inode *ip, bool nowrap);
+                        struct gfs2_blkreserv *rs, bool nowrap);
 
 
 /**
@@ -175,7 +193,7 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
 
 /**
  * rs_cmp - multi-block reservation range compare
- * @blk: absolute file system block number of the new reservation
+ * @start: start of the new reservation
  * @len: number of blocks in the new reservation
  * @rs: existing reservation to compare against
  *
@@ -183,13 +201,11 @@ static inline u64 gfs2_bit_search(const __le64 *ptr, u64 mask, u8 state)
  *         -1 if the block range is before the start of the reservation
  *          0 if the block range overlaps with the reservation
  */
-static inline int rs_cmp(u64 blk, u32 len, struct gfs2_blkreserv *rs)
+static inline int rs_cmp(u64 start, u32 len, struct gfs2_blkreserv *rs)
 {
-       u64 startblk = gfs2_rbm_to_block(&rs->rs_rbm);
-
-       if (blk >= startblk + rs->rs_free)
+       if (start >= rs->rs_start + rs->rs_requested)
                return 1;
-       if (blk + len - 1 < startblk)
+       if (rs->rs_start >= start + len)
                return -1;
        return 0;
 }
@@ -277,29 +293,38 @@ static int gfs2_rbm_from_block(struct gfs2_rbm *rbm, u64 block)
 }
 
 /**
- * gfs2_rbm_incr - increment an rbm structure
+ * gfs2_rbm_add - add a number of blocks to an rbm
  * @rbm: The rbm with rgd already set correctly
+ * @blocks: The number of blocks to add to rpm
  *
- * This function takes an existing rbm structure and increments it to the next
- * viable block offset.
- *
- * Returns: If incrementing the offset would cause the rbm to go past the
- *          end of the rgrp, true is returned, otherwise false.
+ * This function takes an existing rbm structure and adds a number of blocks to
+ * it.
  *
+ * Returns: True if the new rbm would point past the end of the rgrp.
  */
 
-static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
+static bool gfs2_rbm_add(struct gfs2_rbm *rbm, u32 blocks)
 {
-       if (rbm->offset + 1 < rbm_bi(rbm)->bi_blocks) { /* in the same bitmap */
-               rbm->offset++;
+       struct gfs2_rgrpd *rgd = rbm->rgd;
+       struct gfs2_bitmap *bi = rgd->rd_bits + rbm->bii;
+
+       if (rbm->offset + blocks < bi->bi_blocks) {
+               rbm->offset += blocks;
                return false;
        }
-       if (rbm->bii == rbm->rgd->rd_length - 1) /* at the last bitmap */
-               return true;
+       blocks -= bi->bi_blocks - rbm->offset;
 
-       rbm->offset = 0;
-       rbm->bii++;
-       return false;
+       for(;;) {
+               bi++;
+               if (bi == rgd->rd_bits + rgd->rd_length)
+                       return true;
+               if (blocks < bi->bi_blocks) {
+                       rbm->offset = blocks;
+                       rbm->bii = bi - rgd->rd_bits;
+                       return false;
+               }
+               blocks -= bi->bi_blocks;
+       }
 }
 
 /**
@@ -308,7 +333,8 @@ static bool gfs2_rbm_incr(struct gfs2_rbm *rbm)
  * @n_unaligned: Number of unaligned blocks to check
  * @len: Decremented for each block found (terminate on zero)
  *
- * Returns: true if a non-free block is encountered
+ * Returns: true if a non-free block is encountered or the end of the resource
+ *         group is reached.
  */
 
 static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *len)
@@ -323,7 +349,7 @@ static bool gfs2_unaligned_extlen(struct gfs2_rbm *rbm, u32 n_unaligned, u32 *le
                (*len)--;
                if (*len == 0)
                        return true;
-               if (gfs2_rbm_incr(rbm))
+               if (gfs2_rbm_add(rbm, 1))
                        return true;
        }
 
@@ -595,10 +621,11 @@ static void dump_rs(struct seq_file *seq, const struct gfs2_blkreserv *rs,
 {
        struct gfs2_inode *ip = container_of(rs, struct gfs2_inode, i_res);
 
-       gfs2_print_dbg(seq, "%s  B: n:%llu s:%llu b:%u f:%u\n", fs_id_buf,
+       gfs2_print_dbg(seq, "%s  B: n:%llu s:%llu f:%u\n",
+                      fs_id_buf,
                       (unsigned long long)ip->i_no_addr,
-                      (unsigned long long)gfs2_rbm_to_block(&rs->rs_rbm),
-                      rs->rs_rbm.offset, rs->rs_free);
+                      (unsigned long long)rs->rs_start,
+                      rs->rs_requested);
 }
 
 /**
@@ -613,33 +640,22 @@ static void __rs_deltree(struct gfs2_blkreserv *rs)
        if (!gfs2_rs_active(rs))
                return;
 
-       rgd = rs->rs_rbm.rgd;
+       rgd = rs->rs_rgd;
        trace_gfs2_rs(rs, TRACE_RS_TREEDEL);
        rb_erase(&rs->rs_node, &rgd->rd_rstree);
        RB_CLEAR_NODE(&rs->rs_node);
 
-       if (rs->rs_free) {
-               u64 last_block = gfs2_rbm_to_block(&rs->rs_rbm) +
-                                rs->rs_free - 1;
-               struct gfs2_rbm last_rbm = { .rgd = rs->rs_rbm.rgd, };
-               struct gfs2_bitmap *start, *last;
+       if (rs->rs_requested) {
+               /* return requested blocks to the rgrp */
+               BUG_ON(rs->rs_rgd->rd_requested < rs->rs_requested);
+               rs->rs_rgd->rd_requested -= rs->rs_requested;
 
-               /* return reserved blocks to the rgrp */
-               BUG_ON(rs->rs_rbm.rgd->rd_reserved < rs->rs_free);
-               rs->rs_rbm.rgd->rd_reserved -= rs->rs_free;
                /* The rgrp extent failure point is likely not to increase;
                   it will only do so if the freed blocks are somehow
                   contiguous with a span of free blocks that follows. Still,
                   it will force the number to be recalculated later. */
-               rgd->rd_extfail_pt += rs->rs_free;
-               rs->rs_free = 0;
-               if (gfs2_rbm_from_block(&last_rbm, last_block))
-                       return;
-               start = rbm_bi(&rs->rs_rbm);
-               last = rbm_bi(&last_rbm);
-               do
-                       clear_bit(GBF_FULL, &start->bi_flags);
-               while (start++ != last);
+               rgd->rd_extfail_pt += rs->rs_requested;
+               rs->rs_requested = 0;
        }
 }
 
@@ -652,11 +668,11 @@ void gfs2_rs_deltree(struct gfs2_blkreserv *rs)
 {
        struct gfs2_rgrpd *rgd;
 
-       rgd = rs->rs_rbm.rgd;
+       rgd = rs->rs_rgd;
        if (rgd) {
                spin_lock(&rgd->rd_rsspin);
                __rs_deltree(rs);
-               BUG_ON(rs->rs_free);
+               BUG_ON(rs->rs_requested);
                spin_unlock(&rgd->rd_rsspin);
        }
 }
@@ -904,6 +920,7 @@ static int read_rindex_entry(struct gfs2_inode *ip)
        rgd->rd_data = be32_to_cpu(buf.ri_data);
        rgd->rd_bitbytes = be32_to_cpu(buf.ri_bitbytes);
        spin_lock_init(&rgd->rd_rsspin);
+       mutex_init(&rgd->rd_mutex);
 
        error = compute_bitstructs(rgd);
        if (error)
@@ -1149,6 +1166,23 @@ static u32 count_unlinked(struct gfs2_rgrpd *rgd)
        return count;
 }
 
+static void rgrp_set_bitmap_flags(struct gfs2_rgrpd *rgd)
+{
+       struct gfs2_bitmap *bi;
+       int x;
+
+       if (rgd->rd_free) {
+               for (x = 0; x < rgd->rd_length; x++) {
+                       bi = rgd->rd_bits + x;
+                       clear_bit(GBF_FULL, &bi->bi_flags);
+               }
+       } else {
+               for (x = 0; x < rgd->rd_length; x++) {
+                       bi = rgd->rd_bits + x;
+                       set_bit(GBF_FULL, &bi->bi_flags);
+               }
+       }
+}
 
 /**
  * gfs2_rgrp_bh_get - Read in a RG's header and bitmaps
@@ -1192,11 +1226,11 @@ static int gfs2_rgrp_bh_get(struct gfs2_rgrpd *rgd)
        }
 
        if (!(rgd->rd_flags & GFS2_RDF_UPTODATE)) {
-               for (x = 0; x < length; x++)
-                       clear_bit(GBF_FULL, &rgd->rd_bits[x].bi_flags);
                gfs2_rgrp_in(rgd, (rgd->rd_bits[0].bi_bh)->b_data);
+               rgrp_set_bitmap_flags(rgd);
                rgd->rd_flags |= (GFS2_RDF_UPTODATE | GFS2_RDF_CHECK);
                rgd->rd_free_clone = rgd->rd_free;
+               BUG_ON(rgd->rd_reserved);
                /* max out the rgrp allocation failure point */
                rgd->rd_extfail_pt = rgd->rd_free;
        }
@@ -1244,7 +1278,11 @@ static int update_rgrp_lvb(struct gfs2_rgrpd *rgd)
        if (rgd->rd_rgl->rl_unlinked == 0)
                rgd->rd_flags &= ~GFS2_RDF_CHECK;
        rgd->rd_free = be32_to_cpu(rgd->rd_rgl->rl_free);
+       rgrp_set_bitmap_flags(rgd);
        rgd->rd_free_clone = rgd->rd_free;
+       BUG_ON(rgd->rd_reserved);
+       /* max out the rgrp allocation failure point */
+       rgd->rd_extfail_pt = rgd->rd_free;
        rgd->rd_dinodes = be32_to_cpu(rgd->rd_rgl->rl_dinodes);
        rgd->rd_igeneration = be64_to_cpu(rgd->rd_rgl->rl_igeneration);
        return 0;
@@ -1404,7 +1442,8 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
 
        while (1) {
 
-               ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+               ret = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                                        LM_FLAG_NODE_SCOPE, &gh);
                if (ret)
                        goto out;
 
@@ -1412,9 +1451,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                        /* Trim each bitmap in the rgrp */
                        for (x = 0; x < rgd->rd_length; x++) {
                                struct gfs2_bitmap *bi = rgd->rd_bits + x;
+                               rgrp_lock_local(rgd);
                                ret = gfs2_rgrp_send_discards(sdp,
                                                rgd->rd_data0, NULL, bi, minlen,
                                                &amt);
+                               rgrp_unlock_local(rgd);
                                if (ret) {
                                        gfs2_glock_dq_uninit(&gh);
                                        goto out;
@@ -1426,9 +1467,11 @@ int gfs2_fitrim(struct file *filp, void __user *argp)
                        ret = gfs2_trans_begin(sdp, RES_RG_HDR, 0);
                        if (ret == 0) {
                                bh = rgd->rd_bits[0].bi_bh;
+                               rgrp_lock_local(rgd);
                                rgd->rd_flags |= GFS2_RGF_TRIMMED;
                                gfs2_trans_add_meta(rgd->rd_gl, bh);
                                gfs2_rgrp_out(rgd, bh->b_data);
+                               rgrp_unlock_local(rgd);
                                gfs2_trans_end(sdp);
                        }
                }
@@ -1458,8 +1501,7 @@ static void rs_insert(struct gfs2_inode *ip)
        struct rb_node **newn, *parent = NULL;
        int rc;
        struct gfs2_blkreserv *rs = &ip->i_res;
-       struct gfs2_rgrpd *rgd = rs->rs_rbm.rgd;
-       u64 fsblock = gfs2_rbm_to_block(&rs->rs_rbm);
+       struct gfs2_rgrpd *rgd = rs->rs_rgd;
 
        BUG_ON(gfs2_rs_active(rs));
 
@@ -1470,7 +1512,7 @@ static void rs_insert(struct gfs2_inode *ip)
                        rb_entry(*newn, struct gfs2_blkreserv, rs_node);
 
                parent = *newn;
-               rc = rs_cmp(fsblock, rs->rs_free, cur);
+               rc = rs_cmp(rs->rs_start, rs->rs_requested, cur);
                if (rc > 0)
                        newn = &((*newn)->rb_right);
                else if (rc < 0)
@@ -1486,7 +1528,7 @@ static void rs_insert(struct gfs2_inode *ip)
        rb_insert_color(&rs->rs_node, &rgd->rd_rstree);
 
        /* Do our rgrp accounting for the reservation */
-       rgd->rd_reserved += rs->rs_free; /* blocks reserved */
+       rgd->rd_requested += rs->rs_requested; /* blocks requested */
        spin_unlock(&rgd->rd_rsspin);
        trace_gfs2_rs(rs, TRACE_RS_INSERT);
 }
@@ -1507,9 +1549,9 @@ static inline u32 rgd_free(struct gfs2_rgrpd *rgd, struct gfs2_blkreserv *rs)
 {
        u32 tot_reserved, tot_free;
 
-       if (WARN_ON_ONCE(rgd->rd_reserved < rs->rs_free))
+       if (WARN_ON_ONCE(rgd->rd_requested < rs->rs_requested))
                return 0;
-       tot_reserved = rgd->rd_reserved - rs->rs_free;
+       tot_reserved = rgd->rd_requested - rs->rs_requested;
 
        if (rgd->rd_free_clone < tot_reserved)
                tot_reserved = 0;
@@ -1534,17 +1576,26 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
        u64 goal;
        struct gfs2_blkreserv *rs = &ip->i_res;
        u32 extlen;
-       u32 free_blocks = rgd_free(rgd, rs);
+       u32 free_blocks, blocks_available;
        int ret;
        struct inode *inode = &ip->i_inode;
 
+       spin_lock(&rgd->rd_rsspin);
+       free_blocks = rgd_free(rgd, rs);
+       if (rgd->rd_free_clone < rgd->rd_requested)
+               free_blocks = 0;
+       blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+       if (rgd == rs->rs_rgd)
+               blocks_available += rs->rs_reserved;
+       spin_unlock(&rgd->rd_rsspin);
+
        if (S_ISDIR(inode->i_mode))
                extlen = 1;
        else {
                extlen = max_t(u32, atomic_read(&ip->i_sizehint), ap->target);
                extlen = clamp(extlen, (u32)RGRP_RSRV_MINBLKS, free_blocks);
        }
-       if ((rgd->rd_free_clone < rgd->rd_reserved) || (free_blocks < extlen))
+       if (free_blocks < extlen || blocks_available < extlen)
                return;
 
        /* Find bitmap block that contains bits for goal block */
@@ -1556,10 +1607,10 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
        if (WARN_ON(gfs2_rbm_from_block(&rbm, goal)))
                return;
 
-       ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, ip, true);
+       ret = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &extlen, &ip->i_res, true);
        if (ret == 0) {
-               rs->rs_rbm = rbm;
-               rs->rs_free = extlen;
+               rs->rs_start = gfs2_rbm_to_block(&rbm);
+               rs->rs_requested = extlen;
                rs_insert(ip);
        } else {
                if (goal == rgd->rd_last_alloc + rgd->rd_data0)
@@ -1572,7 +1623,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
  * @rgd: The resource group
  * @block: The starting block
  * @length: The required length
- * @ip: Ignore any reservations for this inode
+ * @ignore_rs: Reservation to ignore
  *
  * If the block does not appear in any reservation, then return the
  * block number unchanged. If it does appear in the reservation, then
@@ -1582,7 +1633,7 @@ static void rg_mblk_search(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip,
 
 static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
                                      u32 length,
-                                     const struct gfs2_inode *ip)
+                                     struct gfs2_blkreserv *ignore_rs)
 {
        struct gfs2_blkreserv *rs;
        struct rb_node *n;
@@ -1602,8 +1653,8 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
        }
 
        if (n) {
-               while ((rs_cmp(block, length, rs) == 0) && (&ip->i_res != rs)) {
-                       block = gfs2_rbm_to_block(&rs->rs_rbm) + rs->rs_free;
+               while (rs_cmp(block, length, rs) == 0 && rs != ignore_rs) {
+                       block = rs->rs_start + rs->rs_requested;
                        n = n->rb_right;
                        if (n == NULL)
                                break;
@@ -1618,7 +1669,7 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
 /**
  * gfs2_reservation_check_and_update - Check for reservations during block alloc
  * @rbm: The current position in the resource group
- * @ip: The inode for which we are searching for blocks
+ * @rs: Our own reservation
  * @minext: The minimum extent length
  * @maxext: A pointer to the maximum extent structure
  *
@@ -1632,20 +1683,19 @@ static u64 gfs2_next_unreserved_block(struct gfs2_rgrpd *rgd, u64 block,
  */
 
 static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
-                                            const struct gfs2_inode *ip,
+                                            struct gfs2_blkreserv *rs,
                                             u32 minext,
                                             struct gfs2_extent *maxext)
 {
        u64 block = gfs2_rbm_to_block(rbm);
        u32 extlen = 1;
        u64 nblock;
-       int ret;
 
        /*
         * If we have a minimum extent length, then skip over any extent
         * which is less than the min extent length in size.
         */
-       if (minext) {
+       if (minext > 1) {
                extlen = gfs2_free_extlen(rbm, minext);
                if (extlen <= maxext->len)
                        goto fail;
@@ -1655,7 +1705,7 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
         * Check the extent which has been found against the reservations
         * and skip if parts of it are already reserved
         */
-       nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, ip);
+       nblock = gfs2_next_unreserved_block(rbm->rgd, block, extlen, rs);
        if (nblock == block) {
                if (!minext || extlen >= minext)
                        return 0;
@@ -1664,12 +1714,15 @@ static int gfs2_reservation_check_and_update(struct gfs2_rbm *rbm,
                        maxext->len = extlen;
                        maxext->rbm = *rbm;
                }
-fail:
-               nblock = block + extlen;
+       } else {
+               u64 len = nblock - block;
+               if (len >= (u64)1 << 32)
+                       return -E2BIG;
+               extlen = len;
        }
-       ret = gfs2_rbm_from_block(rbm, nblock);
-       if (ret < 0)
-               return ret;
+fail:
+       if (gfs2_rbm_add(rbm, extlen))
+               return -E2BIG;
        return 1;
 }
 
@@ -1677,9 +1730,9 @@ fail:
  * gfs2_rbm_find - Look for blocks of a particular state
  * @rbm: Value/result starting position and final position
  * @state: The state which we want to find
- * @minext: Pointer to the requested extent length (NULL for a single block)
+ * @minext: Pointer to the requested extent length
  *          This is updated to be the actual reservation size.
- * @ip: If set, check for reservations
+ * @rs: Our own reservation (NULL to skip checking for reservations)
  * @nowrap: Stop looking at the end of the rgrp, rather than wrapping
  *          around until we've reached the starting point.
  *
@@ -1693,7 +1746,7 @@ fail:
  */
 
 static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
-                        const struct gfs2_inode *ip, bool nowrap)
+                        struct gfs2_blkreserv *rs, bool nowrap)
 {
        bool scan_from_start = rbm->bii == 0 && rbm->offset == 0;
        struct buffer_head *bh;
@@ -1714,8 +1767,7 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
 
        while(1) {
                bi = rbm_bi(rbm);
-               if ((ip == NULL || !gfs2_rs_active(&ip->i_res)) &&
-                   test_bit(GBF_FULL, &bi->bi_flags) &&
+               if (test_bit(GBF_FULL, &bi->bi_flags) &&
                    (state == GFS2_BLKST_FREE))
                        goto next_bitmap;
 
@@ -1731,11 +1783,10 @@ static int gfs2_rbm_find(struct gfs2_rbm *rbm, u8 state, u32 *minext,
                        goto next_bitmap;
                }
                rbm->offset = offset;
-               if (ip == NULL)
+               if (!rs)
                        return 0;
 
-               ret = gfs2_reservation_check_and_update(rbm, ip,
-                                                       minext ? *minext : 0,
+               ret = gfs2_reservation_check_and_update(rbm, rs, *minext,
                                                        &maxext);
                if (ret == 0)
                        return 0;
@@ -1767,7 +1818,7 @@ next_iter:
                        break;
        }
 
-       if (minext == NULL || state != GFS2_BLKST_FREE)
+       if (state != GFS2_BLKST_FREE)
                return -ENOSPC;
 
        /* If the extent was too small, and it's smaller than the smallest
@@ -1775,7 +1826,7 @@ next_iter:
           useless to search this rgrp again for this amount or more. */
        if (wrapped && (scan_from_start || rbm->bii > last_bii) &&
            *minext < rbm->rgd->rd_extfail_pt)
-               rbm->rgd->rd_extfail_pt = *minext;
+               rbm->rgd->rd_extfail_pt = *minext - 1;
 
        /* If the maximum extent we found is big enough to fulfill the
           minimum requirements, use it anyway. */
@@ -1938,7 +1989,7 @@ static bool gfs2_rgrp_used_recently(const struct gfs2_blkreserv *rs,
        u64 tdiff;
 
        tdiff = ktime_to_ns(ktime_sub(ktime_get_real(),
-                            rs->rs_rbm.rgd->rd_gl->gl_dstamp));
+                            rs->rs_rgd->rd_gl->gl_dstamp));
 
        return tdiff > (msecs * 1000 * 1000);
 }
@@ -1993,8 +2044,7 @@ static inline int fast_to_acquire(struct gfs2_rgrpd *rgd)
  * We try our best to find an rgrp that has at least ap->target blocks
  * available. After a couple of passes (loops == 2), the prospects of finding
  * such an rgrp diminish. At this stage, we return the first rgrp that has
- * at least ap->min_target blocks available. Either way, we set ap->allowed to
- * the number of blocks available in the chosen rgrp.
+ * at least ap->min_target blocks available.
  *
  * Returns: 0 on success,
  *          -ENOMEM if a suitable rgrp can't be found
@@ -2006,56 +2056,64 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_rgrpd *begin = NULL;
        struct gfs2_blkreserv *rs = &ip->i_res;
-       int error = 0, rg_locked, flags = 0;
+       int error = 0, flags = LM_FLAG_NODE_SCOPE;
+       bool rg_locked;
        u64 last_unlinked = NO_BLOCK;
+       u32 target = ap->target;
        int loops = 0;
-       u32 free_blocks, skip = 0;
+       u32 free_blocks, blocks_available, skip = 0;
+
+       BUG_ON(rs->rs_reserved);
 
        if (sdp->sd_args.ar_rgrplvb)
                flags |= GL_SKIP;
-       if (gfs2_assert_warn(sdp, ap->target))
+       if (gfs2_assert_warn(sdp, target))
                return -EINVAL;
        if (gfs2_rs_active(rs)) {
-               begin = rs->rs_rbm.rgd;
-       } else if (rs->rs_rbm.rgd &&
-                  rgrp_contains_block(rs->rs_rbm.rgd, ip->i_goal)) {
-               begin = rs->rs_rbm.rgd;
+               begin = rs->rs_rgd;
+       } else if (rs->rs_rgd &&
+                  rgrp_contains_block(rs->rs_rgd, ip->i_goal)) {
+               begin = rs->rs_rgd;
        } else {
                check_and_update_goal(ip);
-               rs->rs_rbm.rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
+               rs->rs_rgd = begin = gfs2_blk2rgrpd(sdp, ip->i_goal, 1);
        }
        if (S_ISDIR(ip->i_inode.i_mode) && (ap->aflags & GFS2_AF_ORLOV))
                skip = gfs2_orlov_skip(ip);
-       if (rs->rs_rbm.rgd == NULL)
+       if (rs->rs_rgd == NULL)
                return -EBADSLT;
 
        while (loops < 3) {
-               rg_locked = 1;
+               struct gfs2_rgrpd *rgd;
 
-               if (!gfs2_glock_is_locked_by_me(rs->rs_rbm.rgd->rd_gl)) {
-                       rg_locked = 0;
+               rg_locked = gfs2_glock_is_locked_by_me(rs->rs_rgd->rd_gl);
+               if (rg_locked) {
+                       rgrp_lock_local(rs->rs_rgd);
+               } else {
                        if (skip && skip--)
                                goto next_rgrp;
                        if (!gfs2_rs_active(rs)) {
                                if (loops == 0 &&
-                                   !fast_to_acquire(rs->rs_rbm.rgd))
+                                   !fast_to_acquire(rs->rs_rgd))
                                        goto next_rgrp;
                                if ((loops < 2) &&
                                    gfs2_rgrp_used_recently(rs, 1000) &&
-                                   gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+                                   gfs2_rgrp_congested(rs->rs_rgd, loops))
                                        goto next_rgrp;
                        }
-                       error = gfs2_glock_nq_init(rs->rs_rbm.rgd->rd_gl,
+                       error = gfs2_glock_nq_init(rs->rs_rgd->rd_gl,
                                                   LM_ST_EXCLUSIVE, flags,
                                                   &ip->i_rgd_gh);
                        if (unlikely(error))
                                return error;
+                       rgrp_lock_local(rs->rs_rgd);
                        if (!gfs2_rs_active(rs) && (loops < 2) &&
-                           gfs2_rgrp_congested(rs->rs_rbm.rgd, loops))
+                           gfs2_rgrp_congested(rs->rs_rgd, loops))
                                goto skip_rgrp;
                        if (sdp->sd_args.ar_rgrplvb) {
-                               error = update_rgrp_lvb(rs->rs_rbm.rgd);
+                               error = update_rgrp_lvb(rs->rs_rgd);
                                if (unlikely(error)) {
+                                       rgrp_unlock_local(rs->rs_rgd);
                                        gfs2_glock_dq_uninit(&ip->i_rgd_gh);
                                        return error;
                                }
@@ -2063,36 +2121,46 @@ int gfs2_inplace_reserve(struct gfs2_inode *ip, struct gfs2_alloc_parms *ap)
                }
 
                /* Skip unusable resource groups */
-               if ((rs->rs_rbm.rgd->rd_flags & (GFS2_RGF_NOALLOC |
+               if ((rs->rs_rgd->rd_flags & (GFS2_RGF_NOALLOC |
                                                 GFS2_RDF_ERROR)) ||
-                   (loops == 0 && ap->target > rs->rs_rbm.rgd->rd_extfail_pt))
+                   (loops == 0 && target > rs->rs_rgd->rd_extfail_pt))
                        goto skip_rgrp;
 
                if (sdp->sd_args.ar_rgrplvb)
-                       gfs2_rgrp_bh_get(rs->rs_rbm.rgd);
+                       gfs2_rgrp_bh_get(rs->rs_rgd);
 
                /* Get a reservation if we don't already have one */
                if (!gfs2_rs_active(rs))
-                       rg_mblk_search(rs->rs_rbm.rgd, ip, ap);
+                       rg_mblk_search(rs->rs_rgd, ip, ap);
 
                /* Skip rgrps when we can't get a reservation on first pass */
                if (!gfs2_rs_active(rs) && (loops < 1))
                        goto check_rgrp;
 
                /* If rgrp has enough free space, use it */
-               free_blocks = rgd_free(rs->rs_rbm.rgd, rs);
-               if (free_blocks >= ap->target ||
-                   (loops == 2 && ap->min_target &&
-                    free_blocks >= ap->min_target)) {
-                       ap->allowed = free_blocks;
-                       return 0;
+               rgd = rs->rs_rgd;
+               spin_lock(&rgd->rd_rsspin);
+               free_blocks = rgd_free(rgd, rs);
+               blocks_available = rgd->rd_free_clone - rgd->rd_reserved;
+               if (free_blocks < target || blocks_available < target) {
+                       spin_unlock(&rgd->rd_rsspin);
+                       goto check_rgrp;
                }
+               rs->rs_reserved = ap->target;
+               if (rs->rs_reserved > blocks_available)
+                       rs->rs_reserved = blocks_available;
+               rgd->rd_reserved += rs->rs_reserved;
+               spin_unlock(&rgd->rd_rsspin);
+               rgrp_unlock_local(rs->rs_rgd);
+               return 0;
 check_rgrp:
                /* Check for unlinked inodes which can be reclaimed */
-               if (rs->rs_rbm.rgd->rd_flags & GFS2_RDF_CHECK)
-                       try_rgrp_unlink(rs->rs_rbm.rgd, &last_unlinked,
+               if (rs->rs_rgd->rd_flags & GFS2_RDF_CHECK)
+                       try_rgrp_unlink(rs->rs_rgd, &last_unlinked,
                                        ip->i_no_addr);
 skip_rgrp:
+               rgrp_unlock_local(rs->rs_rgd);
+
                /* Drop reservation, if we couldn't use reserved rgrp */
                if (gfs2_rs_active(rs))
                        gfs2_rs_deltree(rs);
@@ -2102,7 +2170,7 @@ skip_rgrp:
                        gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 next_rgrp:
                /* Find the next rgrp, and continue looking */
-               if (gfs2_select_rgrp(&rs->rs_rbm.rgd, begin))
+               if (gfs2_select_rgrp(&rs->rs_rgd, begin))
                        continue;
                if (skip)
                        continue;
@@ -2119,9 +2187,12 @@ next_rgrp:
                                return error;
                }
                /* Flushing the log may release space */
-               if (loops == 2)
+               if (loops == 2) {
+                       if (ap->min_target)
+                               target = ap->min_target;
                        gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
                                       GFS2_LFC_INPLACE_RESERVE);
+               }
        }
 
        return -ENOSPC;
@@ -2136,6 +2207,17 @@ next_rgrp:
 
 void gfs2_inplace_release(struct gfs2_inode *ip)
 {
+       struct gfs2_blkreserv *rs = &ip->i_res;
+
+       if (rs->rs_reserved) {
+               struct gfs2_rgrpd *rgd = rs->rs_rgd;
+
+               spin_lock(&rgd->rd_rsspin);
+               BUG_ON(rgd->rd_reserved < rs->rs_reserved);
+               rgd->rd_reserved -= rs->rs_reserved;
+               spin_unlock(&rgd->rd_rsspin);
+               rs->rs_reserved = 0;
+       }
        if (gfs2_holder_initialized(&ip->i_rgd_gh))
                gfs2_glock_dq_uninit(&ip->i_rgd_gh);
 }
@@ -2205,7 +2287,7 @@ static void rgblk_free(struct gfs2_sbd *sdp, struct gfs2_rgrpd *rgd,
                        bi_prev = bi;
                }
                gfs2_setbit(&rbm, false, new_state);
-               gfs2_rbm_incr(&rbm);
+               gfs2_rbm_add(&rbm, 1);
        }
 }
 
@@ -2223,11 +2305,12 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
        struct gfs2_blkreserv *trs;
        const struct rb_node *n;
 
-       gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u r:%u e:%u\n",
+       spin_lock(&rgd->rd_rsspin);
+       gfs2_print_dbg(seq, "%s R: n:%llu f:%02x b:%u/%u i:%u q:%u r:%u e:%u\n",
                       fs_id_buf,
                       (unsigned long long)rgd->rd_addr, rgd->rd_flags,
                       rgd->rd_free, rgd->rd_free_clone, rgd->rd_dinodes,
-                      rgd->rd_reserved, rgd->rd_extfail_pt);
+                      rgd->rd_requested, rgd->rd_reserved, rgd->rd_extfail_pt);
        if (rgd->rd_sbd->sd_args.ar_rgrplvb) {
                struct gfs2_rgrp_lvb *rgl = rgd->rd_rgl;
 
@@ -2236,7 +2319,6 @@ void gfs2_rgrp_dump(struct seq_file *seq, struct gfs2_rgrpd *rgd,
                               be32_to_cpu(rgl->rl_free),
                               be32_to_cpu(rgl->rl_dinodes));
        }
-       spin_lock(&rgd->rd_rsspin);
        for (n = rb_first(&rgd->rd_rstree); n; n = rb_next(&trs->rs_node)) {
                trs = rb_entry(n, struct gfs2_blkreserv, rs_node);
                dump_rs(seq, trs, fs_id_buf);
@@ -2273,29 +2355,29 @@ static void gfs2_adjust_reservation(struct gfs2_inode *ip,
 {
        struct gfs2_blkreserv *rs = &ip->i_res;
        struct gfs2_rgrpd *rgd = rbm->rgd;
-       unsigned rlen;
-       u64 block;
-       int ret;
 
-       spin_lock(&rgd->rd_rsspin);
+       BUG_ON(rs->rs_reserved < len);
+       rs->rs_reserved -= len;
        if (gfs2_rs_active(rs)) {
-               if (gfs2_rbm_eq(&rs->rs_rbm, rbm)) {
-                       block = gfs2_rbm_to_block(rbm);
-                       ret = gfs2_rbm_from_block(&rs->rs_rbm, block + len);
-                       rlen = min(rs->rs_free, len);
-                       rs->rs_free -= rlen;
-                       rgd->rd_reserved -= rlen;
+               u64 start = gfs2_rbm_to_block(rbm);
+
+               if (rs->rs_start == start) {
+                       unsigned int rlen;
+
+                       rs->rs_start += len;
+                       rlen = min(rs->rs_requested, len);
+                       rs->rs_requested -= rlen;
+                       rgd->rd_requested -= rlen;
                        trace_gfs2_rs(rs, TRACE_RS_CLAIM);
-                       if (rs->rs_free && !ret)
-                               goto out;
+                       if (rs->rs_start < rgd->rd_data0 + rgd->rd_data &&
+                           rs->rs_requested)
+                               return;
                        /* We used up our block reservation, so we should
                           reserve more blocks next time. */
                        atomic_add(RGRP_RSRV_ADDBLKS, &ip->i_sizehint);
                }
                __rs_deltree(rs);
        }
-out:
-       spin_unlock(&rgd->rd_rsspin);
 }
 
 /**
@@ -2315,15 +2397,13 @@ static void gfs2_set_alloc_start(struct gfs2_rbm *rbm,
        u64 goal;
 
        if (gfs2_rs_active(&ip->i_res)) {
-               *rbm = ip->i_res.rs_rbm;
-               return;
+               goal = ip->i_res.rs_start;
+       } else {
+               if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
+                       goal = ip->i_goal;
+               else
+                       goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
        }
-
-       if (!dinode && rgrp_contains_block(rbm->rgd, ip->i_goal))
-               goal = ip->i_goal;
-       else
-               goal = rbm->rgd->rd_last_alloc + rbm->rgd->rd_data0;
-
        if (WARN_ON_ONCE(gfs2_rbm_from_block(rbm, goal))) {
                rbm->bii = 0;
                rbm->offset = 0;
@@ -2346,17 +2426,21 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct buffer_head *dibh;
-       struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rbm.rgd, };
-       unsigned int ndata;
+       struct gfs2_rbm rbm = { .rgd = ip->i_res.rs_rgd, };
        u64 block; /* block, within the file system scope */
-       int error;
+       u32 minext = 1;
+       int error = -ENOSPC;
 
-       gfs2_set_alloc_start(&rbm, ip, dinode);
-       error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, ip, false);
+       BUG_ON(ip->i_res.rs_reserved < *nblocks);
 
+       rgrp_lock_local(rbm.rgd);
+       if (gfs2_rs_active(&ip->i_res)) {
+               gfs2_set_alloc_start(&rbm, ip, dinode);
+               error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, &ip->i_res, false);
+       }
        if (error == -ENOSPC) {
                gfs2_set_alloc_start(&rbm, ip, dinode);
-               error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, NULL, NULL, false);
+               error = gfs2_rbm_find(&rbm, GFS2_BLKST_FREE, &minext, NULL, false);
        }
 
        /* Since all blocks are reserved in advance, this shouldn't happen */
@@ -2371,14 +2455,8 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
        gfs2_alloc_extent(&rbm, dinode, nblocks);
        block = gfs2_rbm_to_block(&rbm);
        rbm.rgd->rd_last_alloc = block - rbm.rgd->rd_data0;
-       if (gfs2_rs_active(&ip->i_res))
-               gfs2_adjust_reservation(ip, &rbm, *nblocks);
-       ndata = *nblocks;
-       if (dinode)
-               ndata--;
-
        if (!dinode) {
-               ip->i_goal = block + ndata - 1;
+               ip->i_goal = block + *nblocks - 1;
                error = gfs2_meta_inode_buffer(ip, &dibh);
                if (error == 0) {
                        struct gfs2_dinode *di =
@@ -2389,12 +2467,20 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
                        brelse(dibh);
                }
        }
-       if (rbm.rgd->rd_free < *nblocks) {
+       spin_lock(&rbm.rgd->rd_rsspin);
+       gfs2_adjust_reservation(ip, &rbm, *nblocks);
+       if (rbm.rgd->rd_free < *nblocks || rbm.rgd->rd_reserved < *nblocks) {
                fs_warn(sdp, "nblocks=%u\n", *nblocks);
+               spin_unlock(&rbm.rgd->rd_rsspin);
                goto rgrp_error;
        }
-
+       BUG_ON(rbm.rgd->rd_reserved < *nblocks);
+       BUG_ON(rbm.rgd->rd_free_clone < *nblocks);
+       BUG_ON(rbm.rgd->rd_free < *nblocks);
+       rbm.rgd->rd_reserved -= *nblocks;
+       rbm.rgd->rd_free_clone -= *nblocks;
        rbm.rgd->rd_free -= *nblocks;
+       spin_unlock(&rbm.rgd->rd_rsspin);
        if (dinode) {
                rbm.rgd->rd_dinodes++;
                *generation = rbm.rgd->rd_igeneration++;
@@ -2404,6 +2490,7 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
        gfs2_trans_add_meta(rbm.rgd->rd_gl, rbm.rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rbm.rgd, rbm.rgd->rd_bits[0].bi_bh->b_data);
+       rgrp_unlock_local(rbm.rgd);
 
        gfs2_statfs_change(sdp, 0, -(s64)*nblocks, dinode ? 1 : 0);
        if (dinode)
@@ -2411,13 +2498,13 @@ int gfs2_alloc_blocks(struct gfs2_inode *ip, u64 *bn, unsigned int *nblocks,
 
        gfs2_quota_change(ip, *nblocks, ip->i_inode.i_uid, ip->i_inode.i_gid);
 
-       rbm.rgd->rd_free_clone -= *nblocks;
        trace_gfs2_block_alloc(ip, rbm.rgd, block, *nblocks,
                               dinode ? GFS2_BLKST_DINODE : GFS2_BLKST_USED);
        *bn = block;
        return 0;
 
 rgrp_error:
+       rgrp_unlock_local(rbm.rgd);
        gfs2_rgrp_error(rbm.rgd);
        return -EIO;
 }
@@ -2437,12 +2524,14 @@ void __gfs2_free_blocks(struct gfs2_inode *ip, struct gfs2_rgrpd *rgd,
 {
        struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
 
+       rgrp_lock_local(rgd);
        rgblk_free(sdp, rgd, bstart, blen, GFS2_BLKST_FREE);
        trace_gfs2_block_alloc(ip, rgd, bstart, blen, GFS2_BLKST_FREE);
        rgd->rd_free += blen;
        rgd->rd_flags &= ~GFS2_RGF_TRIMMED;
        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+       rgrp_unlock_local(rgd);
 
        /* Directories keep their data in the metadata address space */
        if (meta || ip->i_depth || gfs2_is_jdata(ip))
@@ -2478,17 +2567,20 @@ void gfs2_unlink_di(struct inode *inode)
        rgd = gfs2_blk2rgrpd(sdp, blkno, true);
        if (!rgd)
                return;
+       rgrp_lock_local(rgd);
        rgblk_free(sdp, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
        trace_gfs2_block_alloc(ip, rgd, blkno, 1, GFS2_BLKST_UNLINKED);
        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
        be32_add_cpu(&rgd->rd_rgl->rl_unlinked, 1);
+       rgrp_unlock_local(rgd);
 }
 
 void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 {
        struct gfs2_sbd *sdp = rgd->rd_sbd;
 
+       rgrp_lock_local(rgd);
        rgblk_free(sdp, rgd, ip->i_no_addr, 1, GFS2_BLKST_FREE);
        if (!rgd->rd_dinodes)
                gfs2_consist_rgrpd(rgd);
@@ -2497,6 +2589,7 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
 
        gfs2_trans_add_meta(rgd->rd_gl, rgd->rd_bits[0].bi_bh);
        gfs2_rgrp_out(rgd, rgd->rd_bits[0].bi_bh->b_data);
+       rgrp_unlock_local(rgd);
        be32_add_cpu(&rgd->rd_rgl->rl_unlinked, -1);
 
        gfs2_statfs_change(sdp, 0, +1, -1);
@@ -2511,6 +2604,10 @@ void gfs2_free_di(struct gfs2_rgrpd *rgd, struct gfs2_inode *ip)
  * @no_addr: The block number to check
  * @type: The block type we are looking for
  *
+ * The inode glock of @no_addr must be held.  The @type to check for is either
+ * GFS2_BLKST_DINODE or GFS2_BLKST_UNLINKED; checking for type GFS2_BLKST_FREE
+ * or GFS2_BLKST_USED would make no sense.
+ *
  * Returns: 0 if the block type matches the expected type
  *          -ESTALE if it doesn't match
  *          or -ve errno if something went wrong while checking
@@ -2534,6 +2631,13 @@ int gfs2_check_blk_type(struct gfs2_sbd *sdp, u64 no_addr, unsigned int type)
        rbm.rgd = rgd;
        error = gfs2_rbm_from_block(&rbm, no_addr);
        if (!WARN_ON_ONCE(error)) {
+               /*
+                * No need to take the local resource group lock here; the
+                * inode glock of @no_addr provides the necessary
+                * synchronization in case the block is an inode.  (In case
+                * the block is not an inode, the block type will not match
+                * the @type we are looking for.)
+                */
                if (gfs2_testbit(&rbm, false) != type)
                        error = -ESTALE;
        }
@@ -2578,7 +2682,7 @@ void gfs2_rlist_add(struct gfs2_inode *ip, struct gfs2_rgrp_list *rlist,
                        return;
                rgd = gfs2_blk2rgrpd(sdp, block, 1);
        } else {
-               rgd = ip->i_res.rs_rbm.rgd;
+               rgd = ip->i_res.rs_rgd;
                if (!rgd || !rgrp_contains_block(rgd, block))
                        rgd = gfs2_blk2rgrpd(sdp, block, 1);
        }
@@ -2633,9 +2737,8 @@ void gfs2_rlist_alloc(struct gfs2_rgrp_list *rlist)
                                      sizeof(struct gfs2_holder),
                                      GFP_NOFS | __GFP_NOFAIL);
        for (x = 0; x < rlist->rl_rgrps; x++)
-               gfs2_holder_init(rlist->rl_rgd[x]->rd_gl,
-                               LM_ST_EXCLUSIVE, 0,
-                               &rlist->rl_ghs[x]);
+               gfs2_holder_init(rlist->rl_rgd[x]->rd_gl, LM_ST_EXCLUSIVE,
+                                LM_FLAG_NODE_SCOPE, &rlist->rl_ghs[x]);
 }
 
 /**
@@ -2658,3 +2761,14 @@ void gfs2_rlist_free(struct gfs2_rgrp_list *rlist)
        }
 }
 
+void rgrp_lock_local(struct gfs2_rgrpd *rgd)
+{
+       BUG_ON(!gfs2_glock_is_held_excl(rgd->rd_gl) &&
+              !test_bit(SDF_NORECOVERY, &rgd->rd_sbd->sd_flags));
+       mutex_lock(&rgd->rd_mutex);
+}
+
+void rgrp_unlock_local(struct gfs2_rgrpd *rgd)
+{
+       mutex_unlock(&rgd->rd_mutex);
+}
index 9a587ad..a6855fd 100644 (file)
@@ -77,7 +77,7 @@ extern int gfs2_fitrim(struct file *filp, void __user *argp);
 /* This is how to tell if a reservation is in the rgrp tree: */
 static inline bool gfs2_rs_active(const struct gfs2_blkreserv *rs)
 {
-       return rs && !RB_EMPTY_NODE(&rs->rs_node);
+       return !RB_EMPTY_NODE(&rs->rs_node);
 }
 
 static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
@@ -88,4 +88,8 @@ static inline int rgrp_contains_block(struct gfs2_rgrpd *rgd, u64 block)
 }
 
 extern void check_and_update_goal(struct gfs2_inode *ip);
+
+extern void rgrp_lock_local(struct gfs2_rgrpd *rgd);
+extern void rgrp_unlock_local(struct gfs2_rgrpd *rgd);
+
 #endif /* __RGRP_DOT_H__ */
index 042b942..8fb9602 100644 (file)
@@ -81,19 +81,12 @@ void gfs2_jindex_free(struct gfs2_sbd *sdp)
 static struct gfs2_jdesc *jdesc_find_i(struct list_head *head, unsigned int jid)
 {
        struct gfs2_jdesc *jd;
-       int found = 0;
 
        list_for_each_entry(jd, head, jd_list) {
-               if (jd->jd_jid == jid) {
-                       found = 1;
-                       break;
-               }
+               if (jd->jd_jid == jid)
+                       return jd;
        }
-
-       if (!found)
-               jd = NULL;
-
-       return jd;
+       return NULL;
 }
 
 struct gfs2_jdesc *gfs2_jdesc_find(struct gfs2_sbd *sdp, unsigned int jid)
@@ -165,19 +158,14 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 {
        struct gfs2_inode *ip = GFS2_I(sdp->sd_jdesc->jd_inode);
        struct gfs2_glock *j_gl = ip->i_gl;
-       struct gfs2_holder freeze_gh;
        struct gfs2_log_header_host head;
        int error;
 
        error = init_threads(sdp);
-       if (error)
+       if (error) {
+               gfs2_withdraw_delayed(sdp);
                return error;
-
-       error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
-                                  LM_FLAG_NOEXP | GL_EXACT,
-                                  &freeze_gh);
-       if (error)
-               goto fail_threads;
+       }
 
        j_gl->gl_ops->go_inval(j_gl, DIO_METADATA);
        if (gfs2_withdrawn(sdp)) {
@@ -205,13 +193,9 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp)
 
        set_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-       gfs2_glock_dq_uninit(&freeze_gh);
-
        return 0;
 
 fail:
-       gfs2_glock_dq_uninit(&freeze_gh);
-fail_threads:
        if (sdp->sd_quotad_process)
                kthread_stop(sdp->sd_quotad_process);
        sdp->sd_quotad_process = NULL;
@@ -452,7 +436,7 @@ static int gfs2_lock_fs_check_clean(struct gfs2_sbd *sdp)
        }
 
        if (error)
-               gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+               gfs2_freeze_unlock(&sdp->sd_freeze_gh);
 
 out:
        while (!list_empty(&list)) {
@@ -605,32 +589,10 @@ out:
  * Returns: errno
  */
 
-int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
+void gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 {
-       struct gfs2_holder freeze_gh;
-       int error = 0;
        int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
 
-       gfs2_holder_mark_uninitialized(&freeze_gh);
-       if (sdp->sd_freeze_gl &&
-           !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
-               if (!log_write_allowed) {
-                       error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
-                                                  LM_ST_SHARED, LM_FLAG_TRY |
-                                                  LM_FLAG_NOEXP | GL_EXACT,
-                                                  &freeze_gh);
-                       if (error == GLR_TRYFAILED)
-                               error = 0;
-               } else {
-                       error = gfs2_glock_nq_init(sdp->sd_freeze_gl,
-                                                  LM_ST_SHARED,
-                                                  LM_FLAG_NOEXP | GL_EXACT,
-                                                  &freeze_gh);
-                       if (error && !gfs2_withdrawn(sdp))
-                               return error;
-               }
-       }
-
        gfs2_flush_delete_work(sdp);
        if (!log_write_allowed && current == sdp->sd_quotad_process)
                fs_warn(sdp, "The quotad daemon is withdrawing.\n");
@@ -650,24 +612,19 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 
                gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_SHUTDOWN |
                               GFS2_LFC_MAKE_FS_RO);
-               wait_event(sdp->sd_reserving_log_wait,
-                          atomic_read(&sdp->sd_reserving_log) == 0);
-               gfs2_assert_warn(sdp, atomic_read(&sdp->sd_log_blks_free) ==
-                                sdp->sd_jdesc->jd_blocks);
+               wait_event_timeout(sdp->sd_log_waitq,
+                                  gfs2_log_is_empty(sdp),
+                                  HZ * 5);
+               gfs2_assert_warn(sdp, gfs2_log_is_empty(sdp));
        } else {
-               wait_event_timeout(sdp->sd_reserving_log_wait,
-                                  atomic_read(&sdp->sd_reserving_log) == 0,
+               wait_event_timeout(sdp->sd_log_waitq,
+                                  gfs2_log_is_empty(sdp),
                                   HZ * 5);
        }
-       if (gfs2_holder_initialized(&freeze_gh))
-               gfs2_glock_dq_uninit(&freeze_gh);
-
        gfs2_quota_cleanup(sdp);
 
        if (!log_write_allowed)
                sdp->sd_vfs->s_flags |= SB_RDONLY;
-
-       return error;
 }
 
 /**
@@ -679,7 +636,6 @@ int gfs2_make_fs_ro(struct gfs2_sbd *sdp)
 static void gfs2_put_super(struct super_block *sb)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
-       int error;
        struct gfs2_jdesc *jd;
 
        /* No more recovery requests */
@@ -700,9 +656,7 @@ restart:
        spin_unlock(&sdp->sd_jindex_spin);
 
        if (!sb_rdonly(sb)) {
-               error = gfs2_make_fs_ro(sdp);
-               if (error)
-                       gfs2_io_error(sdp);
+               gfs2_make_fs_ro(sdp);
        }
        WARN_ON(gfs2_withdrawing(sdp));
 
@@ -770,10 +724,8 @@ void gfs2_freeze_func(struct work_struct *work)
        struct super_block *sb = sdp->sd_vfs;
 
        atomic_inc(&sb->s_active);
-       error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED,
-                                  LM_FLAG_NOEXP | GL_EXACT, &freeze_gh);
+       error = gfs2_freeze_lock(sdp, &freeze_gh, 0);
        if (error) {
-               fs_info(sdp, "GFS2: couldn't get freeze lock : %d\n", error);
                gfs2_assert_withdraw(sdp, 0);
        } else {
                atomic_set(&sdp->sd_freeze_state, SFS_UNFROZEN);
@@ -783,7 +735,7 @@ void gfs2_freeze_func(struct work_struct *work)
                                error);
                        gfs2_assert_withdraw(sdp, 0);
                }
-               gfs2_glock_dq_uninit(&freeze_gh);
+               gfs2_freeze_unlock(&freeze_gh);
        }
        deactivate_super(sb);
        clear_bit_unlock(SDF_FS_FROZEN, &sdp->sd_flags);
@@ -800,11 +752,13 @@ void gfs2_freeze_func(struct work_struct *work)
 static int gfs2_freeze(struct super_block *sb)
 {
        struct gfs2_sbd *sdp = sb->s_fs_info;
-       int error = 0;
+       int error;
 
        mutex_lock(&sdp->sd_freeze_mutex);
-       if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN)
+       if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) {
+               error = -EBUSY;
                goto out;
+       }
 
        for (;;) {
                if (gfs2_withdrawn(sdp)) {
@@ -845,13 +799,13 @@ static int gfs2_unfreeze(struct super_block *sb)
        struct gfs2_sbd *sdp = sb->s_fs_info;
 
        mutex_lock(&sdp->sd_freeze_mutex);
-        if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
+       if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN ||
            !gfs2_holder_initialized(&sdp->sd_freeze_gh)) {
                mutex_unlock(&sdp->sd_freeze_mutex);
-                return 0;
+               return -EINVAL;
        }
 
-       gfs2_glock_dq_uninit(&sdp->sd_freeze_gh);
+       gfs2_freeze_unlock(&sdp->sd_freeze_gh);
        mutex_unlock(&sdp->sd_freeze_mutex);
        return wait_on_bit(&sdp->sd_flags, SDF_FS_FROZEN, TASK_INTERRUPTIBLE);
 }
@@ -1227,7 +1181,8 @@ static int gfs2_dinode_dealloc(struct gfs2_inode *ip)
                goto out_qs;
        }
 
-       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                                  LM_FLAG_NODE_SCOPE, &gh);
        if (error)
                goto out_qs;
 
index c9fb2a6..ec4affb 100644 (file)
 #include <linux/dcache.h>
 #include "incore.h"
 
+/* Supported fs format version range */
+#define GFS2_FS_FORMAT_MIN (1801)
+#define GFS2_FS_FORMAT_MAX (1802)
+
 extern void gfs2_lm_unmount(struct gfs2_sbd *sdp);
 
 static inline unsigned int gfs2_jindex_size(struct gfs2_sbd *sdp)
@@ -30,7 +34,7 @@ extern int gfs2_lookup_in_master_dir(struct gfs2_sbd *sdp, char *filename,
                                     struct gfs2_inode **ipp);
 
 extern int gfs2_make_fs_rw(struct gfs2_sbd *sdp);
-extern int gfs2_make_fs_ro(struct gfs2_sbd *sdp);
+extern void gfs2_make_fs_ro(struct gfs2_sbd *sdp);
 extern void gfs2_online_uevent(struct gfs2_sbd *sdp);
 extern int gfs2_statfs_init(struct gfs2_sbd *sdp);
 extern void gfs2_statfs_change(struct gfs2_sbd *sdp, s64 total, s64 free,
@@ -54,7 +58,9 @@ extern struct file_system_type gfs2meta_fs_type;
 extern const struct export_operations gfs2_export_ops;
 extern const struct super_operations gfs2_super_ops;
 extern const struct dentry_operations gfs2_dops;
-extern const struct xattr_handler *gfs2_xattr_handlers[];
+
+extern const struct xattr_handler *gfs2_xattr_handlers_max[];
+extern const struct xattr_handler **gfs2_xattr_handlers_min;
 
 #endif /* __SUPER_DOT_H__ */
 
index 0b2f858..bd6c8e9 100644 (file)
@@ -560,6 +560,7 @@ TRACE_EVENT(gfs2_block_alloc,
                __field(        u8,     block_state             )
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
+               __field(        u32,    rd_requested            )
                __field(        u32,    rd_reserved             )
        ),
 
@@ -571,17 +572,20 @@ TRACE_EVENT(gfs2_block_alloc,
                __entry->block_state    = block_state;
                __entry->rd_addr        = rgd->rd_addr;
                __entry->rd_free_clone  = rgd->rd_free_clone;
+               __entry->rd_requested   = rgd->rd_requested;
                __entry->rd_reserved    = rgd->rd_reserved;
        ),
 
-       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rr:%lu",
+       TP_printk("%u,%u bmap %llu alloc %llu/%lu %s rg:%llu rf:%u rq:%u rr:%u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long)__entry->len,
                  block_state_name(__entry->block_state),
                  (unsigned long long)__entry->rd_addr,
-                 __entry->rd_free_clone, (unsigned long)__entry->rd_reserved)
+                 __entry->rd_free_clone,
+                 __entry->rd_requested,
+                 __entry->rd_reserved)
 );
 
 /* Keep track of multi-block reservations as they are allocated/freed */
@@ -595,33 +599,40 @@ TRACE_EVENT(gfs2_rs,
                __field(        dev_t,  dev                     )
                __field(        u64,    rd_addr                 )
                __field(        u32,    rd_free_clone           )
+               __field(        u32,    rd_requested            )
                __field(        u32,    rd_reserved             )
                __field(        u64,    inum                    )
                __field(        u64,    start                   )
-               __field(        u32,    free                    )
+               __field(        u32,    requested               )
+               __field(        u32,    reserved                )
                __field(        u8,     func                    )
        ),
 
        TP_fast_assign(
-               __entry->dev            = rs->rs_rbm.rgd->rd_sbd->sd_vfs->s_dev;
-               __entry->rd_addr        = rs->rs_rbm.rgd->rd_addr;
-               __entry->rd_free_clone  = rs->rs_rbm.rgd->rd_free_clone;
-               __entry->rd_reserved    = rs->rs_rbm.rgd->rd_reserved;
+               __entry->dev            = rs->rs_rgd->rd_sbd->sd_vfs->s_dev;
+               __entry->rd_addr        = rs->rs_rgd->rd_addr;
+               __entry->rd_free_clone  = rs->rs_rgd->rd_free_clone;
+               __entry->rd_requested   = rs->rs_rgd->rd_requested;
+               __entry->rd_reserved    = rs->rs_rgd->rd_reserved;
                __entry->inum           = container_of(rs, struct gfs2_inode,
                                                       i_res)->i_no_addr;
-               __entry->start          = gfs2_rbm_to_block(&rs->rs_rbm);
-               __entry->free           = rs->rs_free;
+               __entry->start          = rs->rs_start;
+               __entry->requested      = rs->rs_requested;
+               __entry->reserved       = rs->rs_reserved;
                __entry->func           = func;
        ),
 
-       TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%lu rr:%lu %s f:%lu",
+       TP_printk("%u,%u bmap %llu resrv %llu rg:%llu rf:%u rq:%u rr:%u %s q:%u r:%u",
                  MAJOR(__entry->dev), MINOR(__entry->dev),
                  (unsigned long long)__entry->inum,
                  (unsigned long long)__entry->start,
                  (unsigned long long)__entry->rd_addr,
-                 (unsigned long)__entry->rd_free_clone,
-                 (unsigned long)__entry->rd_reserved,
-                 rs_func_name(__entry->func), (unsigned long)__entry->free)
+                 __entry->rd_free_clone,
+                 __entry->rd_requested,
+                 __entry->rd_reserved,
+                 rs_func_name(__entry->func),
+                 __entry->requested,
+                 __entry->reserved)
 );
 
 #endif /* _TRACE_GFS2_H */
index 6d4bf7e..63fec11 100644 (file)
@@ -31,17 +31,17 @@ static void gfs2_print_trans(struct gfs2_sbd *sdp, const struct gfs2_trans *tr)
        fs_warn(sdp, "blocks=%u revokes=%u reserved=%u touched=%u\n",
                tr->tr_blocks, tr->tr_revokes, tr->tr_reserved,
                test_bit(TR_TOUCHED, &tr->tr_flags));
-       fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u/%u\n",
+       fs_warn(sdp, "Buf %u/%u Databuf %u/%u Revoke %u\n",
                tr->tr_num_buf_new, tr->tr_num_buf_rm,
                tr->tr_num_databuf_new, tr->tr_num_databuf_rm,
-               tr->tr_num_revoke, tr->tr_num_revoke_rm);
+               tr->tr_num_revoke);
 }
 
-int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
-                    unsigned int revokes)
+int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+                      unsigned int blocks, unsigned int revokes,
+                      unsigned long ip)
 {
-       struct gfs2_trans *tr;
-       int error;
+       unsigned int extra_revokes;
 
        if (current->journal_info) {
                gfs2_print_trans(sdp, current->journal_info);
@@ -52,39 +52,72 @@ int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
        if (!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))
                return -EROFS;
 
-       tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
-       if (!tr)
-               return -ENOMEM;
-
-       tr->tr_ip = _RET_IP_;
+       tr->tr_ip = ip;
        tr->tr_blocks = blocks;
        tr->tr_revokes = revokes;
-       tr->tr_reserved = 1;
-       set_bit(TR_ALLOCED, &tr->tr_flags);
-       if (blocks)
-               tr->tr_reserved += 6 + blocks;
-       if (revokes)
-               tr->tr_reserved += gfs2_struct2blk(sdp, revokes);
+       tr->tr_reserved = GFS2_LOG_FLUSH_MIN_BLOCKS;
+       if (blocks) {
+               /*
+                * The reserved blocks are either used for data or metadata.
+                * We can have mixed data and metadata, each with its own log
+                * descriptor block; see calc_reserved().
+                */
+               tr->tr_reserved += blocks + 1 + DIV_ROUND_UP(blocks - 1, databuf_limit(sdp));
+       }
        INIT_LIST_HEAD(&tr->tr_databuf);
        INIT_LIST_HEAD(&tr->tr_buf);
        INIT_LIST_HEAD(&tr->tr_list);
        INIT_LIST_HEAD(&tr->tr_ail1_list);
        INIT_LIST_HEAD(&tr->tr_ail2_list);
 
+       if (gfs2_assert_warn(sdp, tr->tr_reserved <= sdp->sd_jdesc->jd_blocks))
+               return -EINVAL;
+
        sb_start_intwrite(sdp->sd_vfs);
 
-       error = gfs2_log_reserve(sdp, tr->tr_reserved);
-       if (error)
-               goto fail;
+       /*
+        * Try the reservations under sd_log_flush_lock to prevent log flushes
+        * from creating inconsistencies between the number of allocated and
+        * reserved revokes.  If that fails, do a full-block allocation outside
+        * of the lock to avoid stalling log flushes.  Then, allot the
+        * appropriate number of blocks to revokes, use as many revokes locally
+        * as needed, and "release" the surplus into the revokes pool.
+        */
+
+       down_read(&sdp->sd_log_flush_lock);
+       if (gfs2_log_try_reserve(sdp, tr, &extra_revokes))
+               goto reserved;
+       up_read(&sdp->sd_log_flush_lock);
+       gfs2_log_reserve(sdp, tr, &extra_revokes);
+       down_read(&sdp->sd_log_flush_lock);
+
+reserved:
+       gfs2_log_release_revokes(sdp, extra_revokes);
+       if (unlikely(!test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags))) {
+               gfs2_log_release_revokes(sdp, tr->tr_revokes);
+               up_read(&sdp->sd_log_flush_lock);
+               gfs2_log_release(sdp, tr->tr_reserved);
+               sb_end_intwrite(sdp->sd_vfs);
+               return -EROFS;
+       }
 
        current->journal_info = tr;
 
        return 0;
+}
 
-fail:
-       sb_end_intwrite(sdp->sd_vfs);
-       kmem_cache_free(gfs2_trans_cachep, tr);
+int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
+                    unsigned int revokes)
+{
+       struct gfs2_trans *tr;
+       int error;
 
+       tr = kmem_cache_zalloc(gfs2_trans_cachep, GFP_NOFS);
+       if (!tr)
+               return -ENOMEM;
+       error = __gfs2_trans_begin(tr, sdp, blocks, revokes, _RET_IP_);
+       if (error)
+               kmem_cache_free(gfs2_trans_cachep, tr);
        return error;
 }
 
@@ -92,37 +125,39 @@ void gfs2_trans_end(struct gfs2_sbd *sdp)
 {
        struct gfs2_trans *tr = current->journal_info;
        s64 nbuf;
-       int alloced = test_bit(TR_ALLOCED, &tr->tr_flags);
 
        current->journal_info = NULL;
 
        if (!test_bit(TR_TOUCHED, &tr->tr_flags)) {
+               gfs2_log_release_revokes(sdp, tr->tr_revokes);
+               up_read(&sdp->sd_log_flush_lock);
                gfs2_log_release(sdp, tr->tr_reserved);
-               if (alloced) {
+               if (!test_bit(TR_ONSTACK, &tr->tr_flags))
                        gfs2_trans_free(sdp, tr);
-                       sb_end_intwrite(sdp->sd_vfs);
-               }
+               sb_end_intwrite(sdp->sd_vfs);
                return;
        }
 
+       gfs2_log_release_revokes(sdp, tr->tr_revokes - tr->tr_num_revoke);
+
        nbuf = tr->tr_num_buf_new + tr->tr_num_databuf_new;
        nbuf -= tr->tr_num_buf_rm;
        nbuf -= tr->tr_num_databuf_rm;
 
-       if (gfs2_assert_withdraw(sdp, (nbuf <= tr->tr_blocks) &&
-                                      (tr->tr_num_revoke <= tr->tr_revokes)))
+       if (gfs2_assert_withdraw(sdp, nbuf <= tr->tr_blocks) ||
+           gfs2_assert_withdraw(sdp, tr->tr_num_revoke <= tr->tr_revokes))
                gfs2_print_trans(sdp, tr);
 
        gfs2_log_commit(sdp, tr);
-       if (alloced && !test_bit(TR_ATTACHED, &tr->tr_flags))
+       if (!test_bit(TR_ONSTACK, &tr->tr_flags) &&
+           !test_bit(TR_ATTACHED, &tr->tr_flags))
                gfs2_trans_free(sdp, tr);
        up_read(&sdp->sd_log_flush_lock);
 
        if (sdp->sd_vfs->s_flags & SB_SYNCHRONOUS)
                gfs2_log_flush(sdp, NULL, GFS2_LOG_HEAD_FLUSH_NORMAL |
                               GFS2_LFC_TRANS_END);
-       if (alloced)
-               sb_end_intwrite(sdp->sd_vfs);
+       sb_end_intwrite(sdp->sd_vfs);
 }
 
 static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
@@ -134,6 +169,8 @@ static struct gfs2_bufdata *gfs2_alloc_bufdata(struct gfs2_glock *gl,
        bd->bd_bh = bh;
        bd->bd_gl = gl;
        INIT_LIST_HEAD(&bd->bd_list);
+       INIT_LIST_HEAD(&bd->bd_ail_st_list);
+       INIT_LIST_HEAD(&bd->bd_ail_gl_list);
        bh->b_private = bd;
        return bd;
 }
@@ -262,7 +299,6 @@ void gfs2_trans_add_revoke(struct gfs2_sbd *sdp, struct gfs2_bufdata *bd)
 void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
 {
        struct gfs2_bufdata *bd, *tmp;
-       struct gfs2_trans *tr = current->journal_info;
        unsigned int n = len;
 
        gfs2_log_lock(sdp);
@@ -274,7 +310,7 @@ void gfs2_trans_remove_revoke(struct gfs2_sbd *sdp, u64 blkno, unsigned int len)
                        if (bd->bd_gl)
                                gfs2_glock_remove_revoke(bd->bd_gl);
                        kmem_cache_free(gfs2_bufdata_cachep, bd);
-                       tr->tr_num_revoke_rm++;
+                       gfs2_log_release_revokes(sdp, 1);
                        if (--n == 0)
                                break;
                }
index 83199ce..c76ad9a 100644 (file)
@@ -27,13 +27,16 @@ struct gfs2_glock;
  * block, or all of the blocks in the rg, whichever is smaller */
 static inline unsigned int gfs2_rg_blocks(const struct gfs2_inode *ip, unsigned requested)
 {
-       struct gfs2_rgrpd *rgd = ip->i_res.rs_rbm.rgd;
+       struct gfs2_rgrpd *rgd = ip->i_res.rs_rgd;
 
        if (requested < rgd->rd_length)
                return requested + 1;
        return rgd->rd_length;
 }
 
+extern int __gfs2_trans_begin(struct gfs2_trans *tr, struct gfs2_sbd *sdp,
+                             unsigned int blocks, unsigned int revokes,
+                             unsigned long ip);
 extern int gfs2_trans_begin(struct gfs2_sbd *sdp, unsigned int blocks,
                            unsigned int revokes);
 
index a374397..4f034b8 100644 (file)
@@ -91,18 +91,50 @@ out_unlock:
        return error;
 }
 
+/**
+ * gfs2_freeze_lock - hold the freeze glock
+ * @sdp: the superblock
+ * @freeze_gh: pointer to the requested holder
+ * @caller_flags: any additional flags needed by the caller
+ */
+int gfs2_freeze_lock(struct gfs2_sbd *sdp, struct gfs2_holder *freeze_gh,
+                    int caller_flags)
+{
+       int flags = LM_FLAG_NOEXP | GL_EXACT | caller_flags;
+       int error;
+
+       error = gfs2_glock_nq_init(sdp->sd_freeze_gl, LM_ST_SHARED, flags,
+                                  freeze_gh);
+       if (error && error != GLR_TRYFAILED)
+               fs_err(sdp, "can't lock the freeze lock: %d\n", error);
+       return error;
+}
+
+void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh)
+{
+       if (gfs2_holder_initialized(freeze_gh))
+               gfs2_glock_dq_uninit(freeze_gh);
+}
+
 static void signal_our_withdraw(struct gfs2_sbd *sdp)
 {
-       struct gfs2_glock *gl = sdp->sd_live_gh.gh_gl;
-       struct inode *inode = sdp->sd_jdesc->jd_inode;
-       struct gfs2_inode *ip = GFS2_I(inode);
-       u64 no_formal_ino = ip->i_no_formal_ino;
+       struct gfs2_glock *live_gl = sdp->sd_live_gh.gh_gl;
+       struct inode *inode;
+       struct gfs2_inode *ip;
+       struct gfs2_glock *i_gl;
+       u64 no_formal_ino;
+       int log_write_allowed = test_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
        int ret = 0;
        int tries;
 
-       if (test_bit(SDF_NORECOVERY, &sdp->sd_flags))
+       if (test_bit(SDF_NORECOVERY, &sdp->sd_flags) || !sdp->sd_jdesc)
                return;
 
+       inode = sdp->sd_jdesc->jd_inode;
+       ip = GFS2_I(inode);
+       i_gl = ip->i_gl;
+       no_formal_ino = ip->i_no_formal_ino;
+
        /* Prevent any glock dq until withdraw recovery is complete */
        set_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
        /*
@@ -117,8 +149,21 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
         * therefore we need to clear SDF_JOURNAL_LIVE manually.
         */
        clear_bit(SDF_JOURNAL_LIVE, &sdp->sd_flags);
-       if (!sb_rdonly(sdp->sd_vfs))
-               ret = gfs2_make_fs_ro(sdp);
+       if (!sb_rdonly(sdp->sd_vfs)) {
+               struct gfs2_holder freeze_gh;
+
+               gfs2_holder_mark_uninitialized(&freeze_gh);
+               if (sdp->sd_freeze_gl &&
+                   !gfs2_glock_is_locked_by_me(sdp->sd_freeze_gl)) {
+                       ret = gfs2_freeze_lock(sdp, &freeze_gh,
+                                      log_write_allowed ? 0 : LM_FLAG_TRY);
+                       if (ret == GLR_TRYFAILED)
+                               ret = 0;
+               }
+               if (!ret)
+                       gfs2_make_fs_ro(sdp);
+               gfs2_freeze_unlock(&freeze_gh);
+       }
 
        if (sdp->sd_lockstruct.ls_ops->lm_lock == NULL) { /* lock_nolock */
                if (!ret)
@@ -141,7 +186,8 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
                atomic_set(&sdp->sd_freeze_state, SFS_FROZEN);
                thaw_super(sdp->sd_vfs);
        } else {
-               wait_on_bit(&gl->gl_flags, GLF_DEMOTE, TASK_UNINTERRUPTIBLE);
+               wait_on_bit(&i_gl->gl_flags, GLF_DEMOTE,
+                           TASK_UNINTERRUPTIBLE);
        }
 
        /*
@@ -161,15 +207,15 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
         * on other nodes to be successful, otherwise we remain the owner of
         * the glock as far as dlm is concerned.
         */
-       if (gl->gl_ops->go_free) {
-               set_bit(GLF_FREEING, &gl->gl_flags);
-               wait_on_bit(&gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
+       if (i_gl->gl_ops->go_free) {
+               set_bit(GLF_FREEING, &i_gl->gl_flags);
+               wait_on_bit(&i_gl->gl_flags, GLF_FREEING, TASK_UNINTERRUPTIBLE);
        }
 
        /*
         * Dequeue the "live" glock, but keep a reference so it's never freed.
         */
-       gfs2_glock_hold(gl);
+       gfs2_glock_hold(live_gl);
        gfs2_glock_dq_wait(&sdp->sd_live_gh);
        /*
         * We enqueue the "live" glock in EX so that all other nodes
@@ -208,7 +254,7 @@ static void signal_our_withdraw(struct gfs2_sbd *sdp)
                gfs2_glock_nq(&sdp->sd_live_gh);
        }
 
-       gfs2_glock_queue_put(gl); /* drop the extra reference we acquired */
+       gfs2_glock_queue_put(live_gl); /* drop extra reference we acquired */
        clear_bit(SDF_WITHDRAW_RECOVERY, &sdp->sd_flags);
 
        /*
index a4443dd..69e1a0a 100644 (file)
@@ -149,6 +149,9 @@ int gfs2_io_error_i(struct gfs2_sbd *sdp, const char *function,
 
 extern int check_journal_clean(struct gfs2_sbd *sdp, struct gfs2_jdesc *jd,
                               bool verbose);
+extern int gfs2_freeze_lock(struct gfs2_sbd *sdp,
+                           struct gfs2_holder *freeze_gh, int caller_flags);
+extern void gfs2_freeze_unlock(struct gfs2_holder *freeze_gh);
 
 #define gfs2_io_error(sdp) \
 gfs2_io_error_i((sdp), __func__, __FILE__, __LINE__)
index 9d7667b..124b3d5 100644 (file)
@@ -70,6 +70,20 @@ static int ea_check_size(struct gfs2_sbd *sdp, unsigned int nsize, size_t dsize)
        return 0;
 }
 
+static bool gfs2_eatype_valid(struct gfs2_sbd *sdp, u8 type)
+{
+       switch(sdp->sd_sb.sb_fs_format) {
+       case GFS2_FS_FORMAT_MAX:
+               return true;
+
+       case GFS2_FS_FORMAT_MIN:
+               return type <= GFS2_EATYPE_SECURITY;
+
+       default:
+               return false;
+       }
+}
+
 typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
                          struct gfs2_ea_header *ea,
                          struct gfs2_ea_header *prev, void *private);
@@ -77,6 +91,7 @@ typedef int (*ea_call_t) (struct gfs2_inode *ip, struct buffer_head *bh,
 static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
                        ea_call_t ea_call, void *data)
 {
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct gfs2_ea_header *ea, *prev = NULL;
        int error = 0;
 
@@ -89,9 +104,8 @@ static int ea_foreach_i(struct gfs2_inode *ip, struct buffer_head *bh,
                if (!(bh->b_data <= (char *)ea && (char *)GFS2_EA2NEXT(ea) <=
                                                  bh->b_data + bh->b_size))
                        goto fail;
-               if (!GFS2_EATYPE_VALID(ea->ea_type))
+               if (!gfs2_eatype_valid(sdp, ea->ea_type))
                        goto fail;
-
                error = ea_call(ip, bh, ea, prev, data);
                if (error)
                        return error;
@@ -259,7 +273,8 @@ static int ea_dealloc_unstuffed(struct gfs2_inode *ip, struct buffer_head *bh,
                return -EIO;
        }
 
-       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &rg_gh);
+       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                                  LM_FLAG_NODE_SCOPE, &rg_gh);
        if (error)
                return error;
 
@@ -344,6 +359,7 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
                     struct gfs2_ea_header *ea, struct gfs2_ea_header *prev,
                     void *private)
 {
+       struct gfs2_sbd *sdp = GFS2_SB(&ip->i_inode);
        struct ea_list *ei = private;
        struct gfs2_ea_request *er = ei->ei_er;
        unsigned int ea_size;
@@ -353,6 +369,8 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
        if (ea->ea_type == GFS2_EATYPE_UNUSED)
                return 0;
 
+       BUG_ON(ea->ea_type > GFS2_EATYPE_SECURITY &&
+              sdp->sd_sb.sb_fs_format == GFS2_FS_FORMAT_MIN);
        switch (ea->ea_type) {
        case GFS2_EATYPE_USR:
                prefix = "user.";
@@ -366,8 +384,12 @@ static int ea_list_i(struct gfs2_inode *ip, struct buffer_head *bh,
                prefix = "security.";
                l = 9;
                break;
+       case GFS2_EATYPE_TRUSTED:
+               prefix = "trusted.";
+               l = 8;
+               break;
        default:
-               BUG();
+               return 0;
        }
 
        ea_size = l + ea->ea_name_len + 1;
@@ -1214,6 +1236,7 @@ int __gfs2_xattr_set(struct inode *inode, const char *name,
 }
 
 static int gfs2_xattr_set(const struct xattr_handler *handler,
+                         struct user_namespace *mnt_userns,
                          struct dentry *unused, struct inode *inode,
                          const char *name, const void *value,
                          size_t size, int flags)
@@ -1385,7 +1408,8 @@ static int ea_dealloc_block(struct gfs2_inode *ip)
                return -EIO;
        }
 
-       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE, 0, &gh);
+       error = gfs2_glock_nq_init(rgd->rd_gl, LM_ST_EXCLUSIVE,
+                                  LM_FLAG_NODE_SCOPE, &gh);
        if (error)
                return error;
 
@@ -1463,7 +1487,25 @@ static const struct xattr_handler gfs2_xattr_security_handler = {
        .set    = gfs2_xattr_set,
 };
 
-const struct xattr_handler *gfs2_xattr_handlers[] = {
+static bool
+gfs2_xattr_trusted_list(struct dentry *dentry)
+{
+       return capable(CAP_SYS_ADMIN);
+}
+
+static const struct xattr_handler gfs2_xattr_trusted_handler = {
+       .prefix = XATTR_TRUSTED_PREFIX,
+       .flags  = GFS2_EATYPE_TRUSTED,
+       .list   = gfs2_xattr_trusted_list,
+       .get    = gfs2_xattr_get,
+       .set    = gfs2_xattr_set,
+};
+
+const struct xattr_handler *gfs2_xattr_handlers_max[] = {
+       /* GFS2_FS_FORMAT_MAX */
+       &gfs2_xattr_trusted_handler,
+
+       /* GFS2_FS_FORMAT_MIN */
        &gfs2_xattr_user_handler,
        &gfs2_xattr_security_handler,
        &posix_acl_access_xattr_handler,
@@ -1471,3 +1513,4 @@ const struct xattr_handler *gfs2_xattr_handlers[] = {
        NULL,
 };
 
+const struct xattr_handler **gfs2_xattr_handlers_min = gfs2_xattr_handlers_max + 1;
index 74fa626..2bd54ef 100644 (file)
@@ -121,6 +121,7 @@ static int hfs_xattr_get(const struct xattr_handler *handler,
 }
 
 static int hfs_xattr_set(const struct xattr_handler *handler,
+                        struct user_namespace *mnt_userns,
                         struct dentry *unused, struct inode *inode,
                         const char *name, const void *value, size_t size,
                         int flags)
index 3bf2ae0..527f6e4 100644 (file)
@@ -189,8 +189,8 @@ static int hfs_dir_release(struct inode *inode, struct file *file)
  * a directory and return a corresponding inode, given the inode for
  * the directory and the name (and its length) of the new file.
  */
-static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                     bool excl)
+static int hfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, bool excl)
 {
        struct inode *inode;
        int res;
@@ -219,7 +219,8 @@ static int hfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
  * in a directory, given the inode for the parent directory and the
  * name (and its length) of the new directory.
  */
-static int hfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        int res;
@@ -279,9 +280,9 @@ static int hfs_remove(struct inode *dir, struct dentry *dentry)
  * new file/directory.
  * XXX: how do you handle must_be dir?
  */
-static int hfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags)
+static int hfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags)
 {
        int res;
 
index f71c384..b8eb032 100644 (file)
@@ -204,7 +204,8 @@ extern const struct address_space_operations hfs_btree_aops;
 extern struct inode *hfs_new_inode(struct inode *, const struct qstr *, umode_t);
 extern void hfs_inode_write_fork(struct inode *, struct hfs_extent *, __be32 *, __be32 *);
 extern int hfs_write_inode(struct inode *, struct writeback_control *);
-extern int hfs_inode_setattr(struct dentry *, struct iattr *);
+extern int hfs_inode_setattr(struct user_namespace *, struct dentry *,
+                            struct iattr *);
 extern void hfs_inode_read_fork(struct inode *inode, struct hfs_extent *ext,
                        __be32 log_size, __be32 phys_size, u32 clump_size);
 extern struct inode *hfs_iget(struct super_block *, struct hfs_cat_key *, hfs_cat_rec *);
index f35a37c..3fc5cb3 100644 (file)
@@ -602,13 +602,15 @@ static int hfs_file_release(struct inode *inode, struct file *file)
  *     correspond to the same HFS file.
  */
 
-int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
+int hfs_inode_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                     struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct hfs_sb_info *hsb = HFS_SB(inode->i_sb);
        int error;
 
-       error = setattr_prepare(dentry, attr); /* basic permission checks */
+       error = setattr_prepare(&init_user_ns, dentry,
+                               attr); /* basic permission checks */
        if (error)
                return error;
 
@@ -647,7 +649,7 @@ int hfs_inode_setattr(struct dentry *dentry, struct iattr * attr)
                                                  current_time(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 29a9dcf..03e6c04 100644 (file)
@@ -434,8 +434,8 @@ out:
        return res;
 }
 
-static int hfsplus_symlink(struct inode *dir, struct dentry *dentry,
-                          const char *symname)
+static int hfsplus_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, const char *symname)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
        struct inode *inode;
@@ -476,8 +476,8 @@ out:
        return res;
 }
 
-static int hfsplus_mknod(struct inode *dir, struct dentry *dentry,
-                        umode_t mode, dev_t rdev)
+static int hfsplus_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct hfsplus_sb_info *sbi = HFSPLUS_SB(dir->i_sb);
        struct inode *inode;
@@ -517,18 +517,20 @@ out:
        return res;
 }
 
-static int hfsplus_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                         bool excl)
+static int hfsplus_create(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode, bool excl)
 {
-       return hfsplus_mknod(dir, dentry, mode, 0);
+       return hfsplus_mknod(&init_user_ns, dir, dentry, mode, 0);
 }
 
-static int hfsplus_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hfsplus_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
 {
-       return hfsplus_mknod(dir, dentry, mode | S_IFDIR, 0);
+       return hfsplus_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
 }
 
-static int hfsplus_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int hfsplus_rename(struct user_namespace *mnt_userns,
+                         struct inode *old_dir, struct dentry *old_dentry,
                          struct inode *new_dir, struct dentry *new_dentry,
                          unsigned int flags)
 {
index a92de51..12b2047 100644 (file)
@@ -488,8 +488,9 @@ void hfsplus_inode_write_fork(struct inode *inode,
                              struct hfsplus_fork_raw *fork);
 int hfsplus_cat_read_inode(struct inode *inode, struct hfs_find_data *fd);
 int hfsplus_cat_write_inode(struct inode *inode);
-int hfsplus_getattr(const struct path *path, struct kstat *stat,
-                   u32 request_mask, unsigned int query_flags);
+int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                   struct kstat *stat, u32 request_mask,
+                   unsigned int query_flags);
 int hfsplus_file_fsync(struct file *file, loff_t start, loff_t end,
                       int datasync);
 
index ca46432..078c5c8 100644 (file)
@@ -241,12 +241,13 @@ static int hfsplus_file_release(struct inode *inode, struct file *file)
        return 0;
 }
 
-static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
+static int hfsplus_setattr(struct user_namespace *mnt_userns,
+                          struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -264,14 +265,15 @@ static int hfsplus_setattr(struct dentry *dentry, struct iattr *attr)
                inode->i_mtime = inode->i_ctime = current_time(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
 
        return 0;
 }
 
-int hfsplus_getattr(const struct path *path, struct kstat *stat,
-                   u32 request_mask, unsigned int query_flags)
+int hfsplus_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                   struct kstat *stat, u32 request_mask,
+                   unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct hfsplus_inode_info *hip = HFSPLUS_I(inode);
@@ -286,7 +288,7 @@ int hfsplus_getattr(const struct path *path, struct kstat *stat,
        stat->attributes_mask |= STATX_ATTR_APPEND | STATX_ATTR_IMMUTABLE |
                                 STATX_ATTR_NODUMP;
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        return 0;
 }
 
@@ -376,7 +378,7 @@ struct inode *hfsplus_new_inode(struct super_block *sb, struct inode *dir,
                return NULL;
 
        inode->i_ino = sbi->next_cnid++;
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        set_nlink(inode, 1);
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 
index ce15b94..3edb192 100644 (file)
@@ -91,7 +91,7 @@ static int hfsplus_ioctl_setflags(struct file *file, int __user *user_flags)
        if (err)
                goto out;
 
-       if (!inode_owner_or_capable(inode)) {
+       if (!inode_owner_or_capable(&init_user_ns, inode)) {
                err = -EACCES;
                goto out_drop_write;
        }
index bb0b27d..4d169c5 100644 (file)
@@ -858,6 +858,7 @@ static int hfsplus_osx_getxattr(const struct xattr_handler *handler,
 }
 
 static int hfsplus_osx_setxattr(const struct xattr_handler *handler,
+                               struct user_namespace *mnt_userns,
                                struct dentry *unused, struct inode *inode,
                                const char *name, const void *buffer,
                                size_t size, int flags)
index cfbe6a3..c1c7a16 100644 (file)
@@ -23,6 +23,7 @@ static int hfsplus_security_getxattr(const struct xattr_handler *handler,
 }
 
 static int hfsplus_security_setxattr(const struct xattr_handler *handler,
+                                    struct user_namespace *mnt_userns,
                                     struct dentry *unused, struct inode *inode,
                                     const char *name, const void *buffer,
                                     size_t size, int flags)
index fbad91e..e150372 100644 (file)
@@ -22,6 +22,7 @@ static int hfsplus_trusted_getxattr(const struct xattr_handler *handler,
 }
 
 static int hfsplus_trusted_setxattr(const struct xattr_handler *handler,
+                                   struct user_namespace *mnt_userns,
                                    struct dentry *unused, struct inode *inode,
                                    const char *name, const void *buffer,
                                    size_t size, int flags)
index 74d19fa..a6b60b1 100644 (file)
@@ -22,6 +22,7 @@ static int hfsplus_user_getxattr(const struct xattr_handler *handler,
 }
 
 static int hfsplus_user_setxattr(const struct xattr_handler *handler,
+                                struct user_namespace *mnt_userns,
                                 struct dentry *unused, struct inode *inode,
                                 const char *name, const void *buffer,
                                 size_t size, int flags)
index 4a5beca..743a005 100644 (file)
@@ -144,7 +144,7 @@ static char *follow_link(char *link)
        char *name, *resolved, *end;
        int n;
 
-       name = __getname();
+       name = kmalloc(PATH_MAX, GFP_KERNEL);
        if (!name) {
                n = -ENOMEM;
                goto out_free;
@@ -173,12 +173,11 @@ static char *follow_link(char *link)
                goto out_free;
        }
 
-       __putname(name);
-       kfree(link);
+       kfree(name);
        return resolved;
 
  out_free:
-       __putname(name);
+       kfree(name);
        return ERR_PTR(n);
 }
 
@@ -557,8 +556,8 @@ static int read_name(struct inode *ino, char *name)
        return 0;
 }
 
-static int hostfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                        bool excl)
+static int hostfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode, bool excl)
 {
        struct inode *inode;
        char *name;
@@ -656,8 +655,8 @@ static int hostfs_unlink(struct inode *ino, struct dentry *dentry)
        return err;
 }
 
-static int hostfs_symlink(struct inode *ino, struct dentry *dentry,
-                         const char *to)
+static int hostfs_symlink(struct user_namespace *mnt_userns, struct inode *ino,
+                         struct dentry *dentry, const char *to)
 {
        char *file;
        int err;
@@ -669,7 +668,8 @@ static int hostfs_symlink(struct inode *ino, struct dentry *dentry,
        return err;
 }
 
-static int hostfs_mkdir(struct inode *ino, struct dentry *dentry, umode_t mode)
+static int hostfs_mkdir(struct user_namespace *mnt_userns, struct inode *ino,
+                       struct dentry *dentry, umode_t mode)
 {
        char *file;
        int err;
@@ -693,7 +693,8 @@ static int hostfs_rmdir(struct inode *ino, struct dentry *dentry)
        return err;
 }
 
-static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+static int hostfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode *inode;
        char *name;
@@ -731,7 +732,8 @@ static int hostfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
        return err;
 }
 
-static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
+static int hostfs_rename2(struct user_namespace *mnt_userns,
+                         struct inode *old_dir, struct dentry *old_dentry,
                          struct inode *new_dir, struct dentry *new_dentry,
                          unsigned int flags)
 {
@@ -759,7 +761,8 @@ static int hostfs_rename2(struct inode *old_dir, struct dentry *old_dentry,
        return err;
 }
 
-static int hostfs_permission(struct inode *ino, int desired)
+static int hostfs_permission(struct user_namespace *mnt_userns,
+                            struct inode *ino, int desired)
 {
        char *name;
        int r = 0, w = 0, x = 0, err;
@@ -781,11 +784,12 @@ static int hostfs_permission(struct inode *ino, int desired)
                err = access_file(name, r, w, x);
        __putname(name);
        if (!err)
-               err = generic_permission(ino, desired);
+               err = generic_permission(&init_user_ns, ino, desired);
        return err;
 }
 
-static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int hostfs_setattr(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct hostfs_iattr attrs;
@@ -794,7 +798,7 @@ static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
 
        int fd = HOSTFS_I(inode)->fd;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
@@ -851,7 +855,7 @@ static int hostfs_setattr(struct dentry *dentry, struct iattr *attr)
            attr->ia_size != i_size_read(inode))
                truncate_setsize(inode, attr->ia_size);
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 1cca832..167ec68 100644 (file)
@@ -280,7 +280,7 @@ void hpfs_init_inode(struct inode *);
 void hpfs_read_inode(struct inode *);
 void hpfs_write_inode(struct inode *);
 void hpfs_write_inode_nolock(struct inode *);
-int hpfs_setattr(struct dentry *, struct iattr *);
+int hpfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 void hpfs_write_if_changed(struct inode *);
 void hpfs_evict_inode(struct inode *);
 
index eb8b4ba..82208cc 100644 (file)
@@ -257,7 +257,8 @@ void hpfs_write_inode_nolock(struct inode *i)
        brelse(bh);
 }
 
-int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
+int hpfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error = -EINVAL;
@@ -274,7 +275,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
        if ((attr->ia_valid & ATTR_SIZE) && attr->ia_size > inode->i_size)
                goto out_unlock;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                goto out_unlock;
 
@@ -288,7 +289,7 @@ int hpfs_setattr(struct dentry *dentry, struct iattr *attr)
                hpfs_truncate(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
 
        hpfs_write_inode(inode);
 
index 1aee391..d73f8a6 100644 (file)
@@ -20,7 +20,8 @@ static void hpfs_update_directory_times(struct inode *dir)
        hpfs_write_inode_nolock(dir);
 }
 
-static int hpfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hpfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
@@ -128,7 +129,8 @@ bail:
        return err;
 }
 
-static int hpfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+static int hpfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
@@ -215,7 +217,8 @@ bail:
        return err;
 }
 
-static int hpfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int hpfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
@@ -289,7 +292,8 @@ bail:
        return err;
 }
 
-static int hpfs_symlink(struct inode *dir, struct dentry *dentry, const char *symlink)
+static int hpfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, const char *symlink)
 {
        const unsigned char *name = dentry->d_name.name;
        unsigned len = dentry->d_name.len;
@@ -506,10 +510,10 @@ fail:
 const struct address_space_operations hpfs_symlink_aops = {
        .readpage       = hpfs_symlink_readpage
 };
-       
-static int hpfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                      struct inode *new_dir, struct dentry *new_dentry,
-                      unsigned int flags)
+
+static int hpfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        const unsigned char *old_name = old_dentry->d_name.name;
        unsigned old_len = old_dentry->d_name.len;
index 21c20fd..701c82c 100644 (file)
@@ -171,7 +171,7 @@ static int hugetlbfs_file_mmap(struct file *file, struct vm_area_struct *vma)
        file_accessed(file);
 
        ret = -ENOMEM;
-       if (hugetlb_reserve_pages(inode,
+       if (!hugetlb_reserve_pages(inode,
                                vma->vm_pgoff >> huge_page_order(h),
                                len >> huge_page_shift(h), vma,
                                vma->vm_flags))
@@ -310,7 +310,7 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
 
 /*
  * Support for read() - Find the page attached to f_mapping and copy out the
- * data. Its *very* similar to do_generic_mapping_read(), we can't use that
+ * data. Its *very* similar to generic_file_buffered_read(), we can't use that
  * since it has PAGE_SIZE assumptions.
  */
 static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
@@ -442,15 +442,15 @@ hugetlb_vmdelete_list(struct rb_root_cached *root, pgoff_t start, pgoff_t end)
  *
  * truncation is indicated by end of range being LLONG_MAX
  *     In this case, we first scan the range and release found pages.
- *     After releasing pages, hugetlb_unreserve_pages cleans up region/reserv
+ *     After releasing pages, hugetlb_unreserve_pages cleans up region/reserve
  *     maps and global counts.  Page faults can not race with truncation
  *     in this routine.  hugetlb_no_page() holds i_mmap_rwsem and prevents
  *     page faults in the truncated range by checking i_size.  i_size is
  *     modified while holding i_mmap_rwsem.
  * hole punch is indicated if end is not LLONG_MAX
  *     In the hole punch case we scan the range and release found pages.
- *     Only when releasing a page is the associated region/reserv map
- *     deleted.  The region/reserv map for ranges without associated
+ *     Only when releasing a page is the associated region/reserve map
+ *     deleted.  The region/reserve map for ranges without associated
  *     pages are not modified.  Page faults can race with hole punch.
  *     This is indicated if we find a mapped page.
  * Note: If the passed end of range value is beyond the end of file, but
@@ -567,7 +567,7 @@ static void hugetlbfs_evict_inode(struct inode *inode)
        clear_inode(inode);
 }
 
-static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
+static void hugetlb_vmtruncate(struct inode *inode, loff_t offset)
 {
        pgoff_t pgoff;
        struct address_space *mapping = inode->i_mapping;
@@ -582,7 +582,6 @@ static int hugetlb_vmtruncate(struct inode *inode, loff_t offset)
                hugetlb_vmdelete_list(&mapping->i_mmap, pgoff, 0);
        i_mmap_unlock_write(mapping);
        remove_inode_hugepages(inode, offset, LLONG_MAX);
-       return 0;
 }
 
 static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
@@ -604,7 +603,7 @@ static long hugetlbfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
 
                inode_lock(inode);
 
-               /* protected by i_mutex */
+               /* protected by i_rwsem */
                if (info->seals & (F_SEAL_WRITE | F_SEAL_FUTURE_WRITE)) {
                        inode_unlock(inode);
                        return -EPERM;
@@ -680,7 +679,6 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                 */
                struct page *page;
                unsigned long addr;
-               int avoid_reserve = 0;
 
                cond_resched();
 
@@ -716,8 +714,15 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
                        continue;
                }
 
-               /* Allocate page and add to page cache */
-               page = alloc_huge_page(&pseudo_vma, addr, avoid_reserve);
+               /*
+                * Allocate page without setting the avoid_reserve argument.
+                * There certainly are no reserves associated with the
+                * pseudo_vma.  However, there could be shared mappings with
+                * reserves for the file at the inode level.  If we fallocate
+                * pages in these areas, we need to consume the reserves
+                * to keep reservation accounting consistent.
+                */
+               page = alloc_huge_page(&pseudo_vma, addr, 0);
                hugetlb_drop_vma_policy(&pseudo_vma);
                if (IS_ERR(page)) {
                        mutex_unlock(&hugetlb_fault_mutex_table[hash]);
@@ -735,7 +740,7 @@ static long hugetlbfs_fallocate(struct file *file, int mode, loff_t offset,
 
                mutex_unlock(&hugetlb_fault_mutex_table[hash]);
 
-               set_page_huge_active(page);
+               SetHPageMigratable(page);
                /*
                 * unlock_page because locked by add_to_page_cache()
                 * put_page() due to reference from alloc_huge_page()
@@ -752,7 +757,8 @@ out:
        return error;
 }
 
-static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int hugetlbfs_setattr(struct user_namespace *mnt_userns,
+                            struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct hstate *h = hstate_inode(inode);
@@ -760,9 +766,7 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
        unsigned int ia_valid = attr->ia_valid;
        struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 
-       BUG_ON(!inode);
-
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -772,16 +776,14 @@ static int hugetlbfs_setattr(struct dentry *dentry, struct iattr *attr)
 
                if (newsize & ~huge_page_mask(h))
                        return -EINVAL;
-               /* protected by i_mutex */
+               /* protected by i_rwsem */
                if ((newsize < oldsize && (info->seals & F_SEAL_SHRINK)) ||
                    (newsize > oldsize && (info->seals & F_SEAL_GROW)))
                        return -EPERM;
-               error = hugetlb_vmtruncate(inode, newsize);
-               if (error)
-                       return error;
+               hugetlb_vmtruncate(inode, newsize);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
@@ -837,7 +839,7 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
                struct hugetlbfs_inode_info *info = HUGETLBFS_I(inode);
 
                inode->i_ino = get_next_ino();
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(&init_user_ns, inode, dir, mode);
                lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
                                &hugetlbfs_i_mmap_rwsem_key);
                inode->i_mapping->a_ops = &hugetlbfs_aops;
@@ -899,33 +901,39 @@ static int do_hugetlbfs_mknod(struct inode *dir,
        return error;
 }
 
-static int hugetlbfs_mknod(struct inode *dir,
-                       struct dentry *dentry, umode_t mode, dev_t dev)
+static int hugetlbfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, umode_t mode, dev_t dev)
 {
        return do_hugetlbfs_mknod(dir, dentry, mode, dev, false);
 }
 
-static int hugetlbfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int hugetlbfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, umode_t mode)
 {
-       int retval = hugetlbfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+       int retval = hugetlbfs_mknod(&init_user_ns, dir, dentry,
+                                    mode | S_IFDIR, 0);
        if (!retval)
                inc_nlink(dir);
        return retval;
 }
 
-static int hugetlbfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+static int hugetlbfs_create(struct user_namespace *mnt_userns,
+                           struct inode *dir, struct dentry *dentry,
+                           umode_t mode, bool excl)
 {
-       return hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0);
+       return hugetlbfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
 }
 
-static int hugetlbfs_tmpfile(struct inode *dir,
-                       struct dentry *dentry, umode_t mode)
+static int hugetlbfs_tmpfile(struct user_namespace *mnt_userns,
+                            struct inode *dir, struct dentry *dentry,
+                            umode_t mode)
 {
        return do_hugetlbfs_mknod(dir, dentry, mode | S_IFREG, 0, true);
 }
 
-static int hugetlbfs_symlink(struct inode *dir,
-                       struct dentry *dentry, const char *symname)
+static int hugetlbfs_symlink(struct user_namespace *mnt_userns,
+                            struct inode *dir, struct dentry *dentry,
+                            const char *symname)
 {
        struct inode *inode;
        int error = -ENOSPC;
@@ -945,17 +953,6 @@ static int hugetlbfs_symlink(struct inode *dir,
        return error;
 }
 
-/*
- * mark the head page dirty
- */
-static int hugetlbfs_set_page_dirty(struct page *page)
-{
-       struct page *head = compound_head(page);
-
-       SetPageDirty(head);
-       return 0;
-}
-
 static int hugetlbfs_migrate_page(struct address_space *mapping,
                                struct page *newpage, struct page *page,
                                enum migrate_mode mode)
@@ -966,15 +963,9 @@ static int hugetlbfs_migrate_page(struct address_space *mapping,
        if (rc != MIGRATEPAGE_SUCCESS)
                return rc;
 
-       /*
-        * page_private is subpool pointer in hugetlb pages.  Transfer to
-        * new page.  PagePrivate is not associated with page_private for
-        * hugetlb pages and can not be set here as only page_huge_active
-        * pages can be migrated.
-        */
-       if (page_private(page)) {
-               set_page_private(newpage, page_private(page));
-               set_page_private(page, 0);
+       if (hugetlb_page_subpool(page)) {
+               hugetlb_set_page_subpool(newpage, hugetlb_page_subpool(page));
+               hugetlb_set_page_subpool(page, NULL);
        }
 
        if (mode != MIGRATE_SYNC_NO_COPY)
@@ -1149,7 +1140,7 @@ static void hugetlbfs_destroy_inode(struct inode *inode)
 static const struct address_space_operations hugetlbfs_aops = {
        .write_begin    = hugetlbfs_write_begin,
        .write_end      = hugetlbfs_write_end,
-       .set_page_dirty = hugetlbfs_set_page_dirty,
+       .set_page_dirty =  __set_page_dirty_no_writeback,
        .migratepage    = hugetlbfs_migrate_page,
        .error_remove_page      = hugetlbfs_error_remove_page,
 };
@@ -1349,7 +1340,7 @@ hugetlbfs_fill_super(struct super_block *sb, struct fs_context *fc)
 
        /*
         * Allocate and initialize subpool if maximum or minimum size is
-        * specified.  Any needed reservations (for minimim size) are taken
+        * specified.  Any needed reservations (for minimum size) are taken
         * taken when the subpool is created.
         */
        if (ctx->max_hpages != -1 || ctx->min_hpages != -1) {
@@ -1492,7 +1483,7 @@ struct file *hugetlb_file_setup(const char *name, size_t size,
        inode->i_size = size;
        clear_nlink(inode);
 
-       if (hugetlb_reserve_pages(inode, 0,
+       if (!hugetlb_reserve_pages(inode, 0,
                        size >> huge_page_shift(hstate_inode(inode)), NULL,
                        acctflag))
                file = ERR_PTR(-ENOMEM);
@@ -1526,8 +1517,8 @@ static struct vfsmount *__init mount_one_hugetlbfs(struct hstate *h)
                put_fs_context(fc);
        }
        if (IS_ERR(mnt))
-               pr_err("Cannot mount internal hugetlbfs for page size %uK",
-                      1U << (h->order + PAGE_SHIFT - 10));
+               pr_err("Cannot mount internal hugetlbfs for page size %luK",
+                      huge_page_size(h) >> 10);
        return mnt;
 }
 
@@ -1555,7 +1546,7 @@ static int __init init_hugetlbfs_fs(void)
                goto out_free;
 
        /* default hstate mount is required */
-       mnt = mount_one_hugetlbfs(&hstates[default_hstate_idx]);
+       mnt = mount_one_hugetlbfs(&default_hstate);
        if (IS_ERR(mnt)) {
                error = PTR_ERR(mnt);
                goto out_unreg;
index e9c320a..5c36ada 100644 (file)
--- a/fs/init.c
+++ b/fs/init.c
@@ -49,7 +49,7 @@ int __init init_chdir(const char *filename)
        error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
        if (error)
                return error;
-       error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+       error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
        if (!error)
                set_fs_pwd(current->fs, &path);
        path_put(&path);
@@ -64,7 +64,7 @@ int __init init_chroot(const char *filename)
        error = kern_path(filename, LOOKUP_FOLLOW | LOOKUP_DIRECTORY, &path);
        if (error)
                return error;
-       error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+       error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
        if (error)
                goto dput_and_out;
        error = -EPERM;
@@ -118,7 +118,7 @@ int __init init_eaccess(const char *filename)
        error = kern_path(filename, LOOKUP_FOLLOW, &path);
        if (error)
                return error;
-       error = inode_permission(d_inode(path.dentry), MAY_ACCESS);
+       error = path_permission(&path, MAY_ACCESS);
        path_put(&path);
        return error;
 }
@@ -157,8 +157,8 @@ int __init init_mknod(const char *filename, umode_t mode, unsigned int dev)
                mode &= ~current_umask();
        error = security_path_mknod(&path, dentry, mode, dev);
        if (!error)
-               error = vfs_mknod(path.dentry->d_inode, dentry, mode,
-                                 new_decode_dev(dev));
+               error = vfs_mknod(mnt_user_ns(path.mnt), path.dentry->d_inode,
+                                 dentry, mode, new_decode_dev(dev));
        done_path_create(&path, dentry);
        return error;
 }
@@ -167,6 +167,7 @@ int __init init_link(const char *oldname, const char *newname)
 {
        struct dentry *new_dentry;
        struct path old_path, new_path;
+       struct user_namespace *mnt_userns;
        int error;
 
        error = kern_path(oldname, 0, &old_path);
@@ -181,14 +182,15 @@ int __init init_link(const char *oldname, const char *newname)
        error = -EXDEV;
        if (old_path.mnt != new_path.mnt)
                goto out_dput;
-       error = may_linkat(&old_path);
+       mnt_userns = mnt_user_ns(new_path.mnt);
+       error = may_linkat(mnt_userns, &old_path);
        if (unlikely(error))
                goto out_dput;
        error = security_path_link(old_path.dentry, &new_path, new_dentry);
        if (error)
                goto out_dput;
-       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry,
-                        NULL);
+       error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
+                        new_dentry, NULL);
 out_dput:
        done_path_create(&new_path, new_dentry);
 out:
@@ -207,7 +209,8 @@ int __init init_symlink(const char *oldname, const char *newname)
                return PTR_ERR(dentry);
        error = security_path_symlink(&path, dentry, oldname);
        if (!error)
-               error = vfs_symlink(path.dentry->d_inode, dentry, oldname);
+               error = vfs_symlink(mnt_user_ns(path.mnt), path.dentry->d_inode,
+                                   dentry, oldname);
        done_path_create(&path, dentry);
        return error;
 }
@@ -230,7 +233,8 @@ int __init init_mkdir(const char *pathname, umode_t mode)
                mode &= ~current_umask();
        error = security_path_mkdir(&path, dentry, mode);
        if (!error)
-               error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+               error = vfs_mkdir(mnt_user_ns(path.mnt), path.dentry->d_inode,
+                                 dentry, mode);
        done_path_create(&path, dentry);
        return error;
 }
index 8742421..a047ab3 100644 (file)
@@ -142,6 +142,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
        atomic_set(&inode->i_count, 1);
        inode->i_op = &empty_iops;
        inode->i_fop = &no_open_fops;
+       inode->i_ino = 0;
        inode->__i_nlink = 1;
        inode->i_opflags = 0;
        if (sb->s_xattr)
@@ -1798,7 +1799,7 @@ bool atime_needs_update(const struct path *path, struct inode *inode)
        /* Atime updates will likely cause i_uid and i_gid to be written
         * back improprely if their true value is unknown to the vfs.
         */
-       if (HAS_UNMAPPED_ID(inode))
+       if (HAS_UNMAPPED_ID(mnt_user_ns(mnt), inode))
                return false;
 
        if (IS_NOATIME(inode))
@@ -1905,7 +1906,8 @@ int dentry_needs_remove_privs(struct dentry *dentry)
        return mask;
 }
 
-static int __remove_privs(struct dentry *dentry, int kill)
+static int __remove_privs(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, int kill)
 {
        struct iattr newattrs;
 
@@ -1914,7 +1916,7 @@ static int __remove_privs(struct dentry *dentry, int kill)
         * Note we call this on write, so notify_change will not
         * encounter any conflicting delegations:
         */
-       return notify_change(dentry, &newattrs, NULL);
+       return notify_change(mnt_userns, dentry, &newattrs, NULL);
 }
 
 /*
@@ -1941,7 +1943,7 @@ int file_remove_privs(struct file *file)
        if (kill < 0)
                return kill;
        if (kill)
-               error = __remove_privs(dentry, kill);
+               error = __remove_privs(file_mnt_user_ns(file), dentry, kill);
        if (!error)
                inode_has_no_xattr(inode);
 
@@ -2132,14 +2134,21 @@ EXPORT_SYMBOL(init_special_inode);
 
 /**
  * inode_init_owner - Init uid,gid,mode for new inode according to posix standards
+ * @mnt_userns:        User namespace of the mount the inode was created from
  * @inode: New inode
  * @dir: Directory inode
  * @mode: mode of the new inode
+ *
+ * If the inode has been created through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions
+ * and initializing i_uid and i_gid. On non-idmapped mounts or if permission
+ * checking is to be performed on the raw inode simply passs init_user_ns.
  */
-void inode_init_owner(struct inode *inode, const struct inode *dir,
-                       umode_t mode)
+void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+                     const struct inode *dir, umode_t mode)
 {
-       inode->i_uid = current_fsuid();
+       inode->i_uid = fsuid_into_mnt(mnt_userns);
        if (dir && dir->i_mode & S_ISGID) {
                inode->i_gid = dir->i_gid;
 
@@ -2147,31 +2156,41 @@ void inode_init_owner(struct inode *inode, const struct inode *dir,
                if (S_ISDIR(mode))
                        mode |= S_ISGID;
                else if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP) &&
-                        !in_group_p(inode->i_gid) &&
-                        !capable_wrt_inode_uidgid(dir, CAP_FSETID))
+                        !in_group_p(i_gid_into_mnt(mnt_userns, dir)) &&
+                        !capable_wrt_inode_uidgid(mnt_userns, dir, CAP_FSETID))
                        mode &= ~S_ISGID;
        } else
-               inode->i_gid = current_fsgid();
+               inode->i_gid = fsgid_into_mnt(mnt_userns);
        inode->i_mode = mode;
 }
 EXPORT_SYMBOL(inode_init_owner);
 
 /**
  * inode_owner_or_capable - check current task permissions to inode
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode: inode being checked
  *
  * Return true if current either has CAP_FOWNER in a namespace with the
  * inode owner uid mapped, or owns the file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
  */
-bool inode_owner_or_capable(const struct inode *inode)
+bool inode_owner_or_capable(struct user_namespace *mnt_userns,
+                           const struct inode *inode)
 {
+       kuid_t i_uid;
        struct user_namespace *ns;
 
-       if (uid_eq(current_fsuid(), inode->i_uid))
+       i_uid = i_uid_into_mnt(mnt_userns, inode);
+       if (uid_eq(current_fsuid(), i_uid))
                return true;
 
        ns = current_user_ns();
-       if (kuid_has_mapping(ns, inode->i_uid) && ns_capable(ns, CAP_FOWNER))
+       if (kuid_has_mapping(ns, i_uid) && ns_capable(ns, CAP_FOWNER))
                return true;
        return false;
 }
index 49bfb37..6aeae7e 100644 (file)
@@ -74,7 +74,7 @@ extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
                           const char *, unsigned int, struct path *);
 long do_rmdir(int dfd, struct filename *name);
 long do_unlinkat(int dfd, struct filename *name);
-int may_linkat(struct path *link);
+int may_linkat(struct user_namespace *mnt_userns, struct path *link);
 int do_renameat2(int olddfd, struct filename *oldname, int newdfd,
                 struct filename *newname, unsigned int flags);
 
index c36bbcd..433c4d3 100644 (file)
 #include <linux/sched/mm.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
-#include <linux/kthread.h>
 #include <linux/rculist_nulls.h>
-#include <linux/fs_struct.h>
-#include <linux/task_work.h>
-#include <linux/blk-cgroup.h>
-#include <linux/audit.h>
 #include <linux/cpu.h>
+#include <linux/tracehook.h>
 
 #include "../kernel/sched/sched.h"
 #include "io-wq.h"
@@ -36,7 +32,6 @@ enum {
 
 enum {
        IO_WQ_BIT_EXIT          = 0,    /* wq exiting */
-       IO_WQ_BIT_ERROR         = 1,    /* error on setup */
 };
 
 enum {
@@ -57,14 +52,9 @@ struct io_worker {
        struct io_wq_work *cur_work;
        spinlock_t lock;
 
+       struct completion ref_done;
+
        struct rcu_head rcu;
-       struct mm_struct *mm;
-#ifdef CONFIG_BLK_CGROUP
-       struct cgroup_subsys_state *blkcg_css;
-#endif
-       const struct cred *cur_creds;
-       const struct cred *saved_creds;
-       struct nsproxy *restore_nsproxy;
 };
 
 #if BITS_PER_LONG == 64
@@ -93,7 +83,6 @@ struct io_wqe {
        struct {
                raw_spinlock_t lock;
                struct io_wq_work_list work_list;
-               unsigned long hash_map;
                unsigned flags;
        } ____cacheline_aligned_in_smp;
 
@@ -103,6 +92,8 @@ struct io_wqe {
        struct hlist_nulls_head free_list;
        struct list_head all_list;
 
+       struct wait_queue_entry wait;
+
        struct io_wq *wq;
        struct io_wq_work *hash_tail[IO_WQ_NR_HASH_BUCKETS];
 };
@@ -118,17 +109,33 @@ struct io_wq {
        io_wq_work_fn *do_work;
 
        struct task_struct *manager;
-       struct user_struct *user;
+
+       struct io_wq_hash *hash;
+
        refcount_t refs;
-       struct completion done;
+       struct completion exited;
+
+       atomic_t worker_refs;
+       struct completion worker_done;
 
        struct hlist_node cpuhp_node;
 
-       refcount_t use_refs;
+       pid_t task_pid;
 };
 
 static enum cpuhp_state io_wq_online;
 
+struct io_cb_cancel_data {
+       work_cancel_fn *fn;
+       void *data;
+       int nr_running;
+       int nr_pending;
+       bool cancel_all;
+};
+
+static void io_wqe_cancel_pending_work(struct io_wqe *wqe,
+                                      struct io_cb_cancel_data *match);
+
 static bool io_worker_get(struct io_worker *worker)
 {
        return refcount_inc_not_zero(&worker->ref);
@@ -137,62 +144,7 @@ static bool io_worker_get(struct io_worker *worker)
 static void io_worker_release(struct io_worker *worker)
 {
        if (refcount_dec_and_test(&worker->ref))
-               wake_up_process(worker->task);
-}
-
-/*
- * Note: drops the wqe->lock if returning true! The caller must re-acquire
- * the lock in that case. Some callers need to restart handling if this
- * happens, so we can't just re-acquire the lock on behalf of the caller.
- */
-static bool __io_worker_unuse(struct io_wqe *wqe, struct io_worker *worker)
-{
-       bool dropped_lock = false;
-
-       if (worker->saved_creds) {
-               revert_creds(worker->saved_creds);
-               worker->cur_creds = worker->saved_creds = NULL;
-       }
-
-       if (current->files) {
-               __acquire(&wqe->lock);
-               raw_spin_unlock_irq(&wqe->lock);
-               dropped_lock = true;
-
-               task_lock(current);
-               current->files = NULL;
-               current->nsproxy = worker->restore_nsproxy;
-               task_unlock(current);
-       }
-
-       if (current->fs)
-               current->fs = NULL;
-
-       /*
-        * If we have an active mm, we need to drop the wq lock before unusing
-        * it. If we do, return true and let the caller retry the idle loop.
-        */
-       if (worker->mm) {
-               if (!dropped_lock) {
-                       __acquire(&wqe->lock);
-                       raw_spin_unlock_irq(&wqe->lock);
-                       dropped_lock = true;
-               }
-               __set_current_state(TASK_RUNNING);
-               kthread_unuse_mm(worker->mm);
-               mmput(worker->mm);
-               worker->mm = NULL;
-       }
-
-#ifdef CONFIG_BLK_CGROUP
-       if (worker->blkcg_css) {
-               kthread_associate_blkcg(NULL);
-               worker->blkcg_css = NULL;
-       }
-#endif
-       if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
-               current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-       return dropped_lock;
+               complete(&worker->ref_done);
 }
 
 static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
@@ -204,9 +156,10 @@ static inline struct io_wqe_acct *io_work_get_acct(struct io_wqe *wqe,
        return &wqe->acct[IO_WQ_ACCT_BOUND];
 }
 
-static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
-                                                 struct io_worker *worker)
+static inline struct io_wqe_acct *io_wqe_get_acct(struct io_worker *worker)
 {
+       struct io_wqe *wqe = worker->wqe;
+
        if (worker->flags & IO_WORKER_F_BOUND)
                return &wqe->acct[IO_WQ_ACCT_BOUND];
 
@@ -216,39 +169,33 @@ static inline struct io_wqe_acct *io_wqe_get_acct(struct io_wqe *wqe,
 static void io_worker_exit(struct io_worker *worker)
 {
        struct io_wqe *wqe = worker->wqe;
-       struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+       struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+       unsigned flags;
 
-       /*
-        * If we're not at zero, someone else is holding a brief reference
-        * to the worker. Wait for that to go away.
-        */
-       set_current_state(TASK_INTERRUPTIBLE);
-       if (!refcount_dec_and_test(&worker->ref))
-               schedule();
-       __set_current_state(TASK_RUNNING);
+       if (refcount_dec_and_test(&worker->ref))
+               complete(&worker->ref_done);
+       wait_for_completion(&worker->ref_done);
 
        preempt_disable();
        current->flags &= ~PF_IO_WORKER;
-       if (worker->flags & IO_WORKER_F_RUNNING)
+       flags = worker->flags;
+       worker->flags = 0;
+       if (flags & IO_WORKER_F_RUNNING)
                atomic_dec(&acct->nr_running);
-       if (!(worker->flags & IO_WORKER_F_BOUND))
-               atomic_dec(&wqe->wq->user->processes);
        worker->flags = 0;
        preempt_enable();
 
        raw_spin_lock_irq(&wqe->lock);
-       hlist_nulls_del_rcu(&worker->nulls_node);
+       if (flags & IO_WORKER_F_FREE)
+               hlist_nulls_del_rcu(&worker->nulls_node);
        list_del_rcu(&worker->all_list);
-       if (__io_worker_unuse(wqe, worker)) {
-               __release(&wqe->lock);
-               raw_spin_lock_irq(&wqe->lock);
-       }
        acct->nr_workers--;
        raw_spin_unlock_irq(&wqe->lock);
 
        kfree_rcu(worker, rcu);
-       if (refcount_dec_and_test(&wqe->wq->refs))
-               complete(&wqe->wq->done);
+       if (atomic_dec_and_test(&wqe->wq->worker_refs))
+               complete(&wqe->wq->worker_done);
+       do_exit(0);
 }
 
 static inline bool io_wqe_run_queue(struct io_wqe *wqe)
@@ -306,35 +253,23 @@ static void io_wqe_wake_worker(struct io_wqe *wqe, struct io_wqe_acct *acct)
                wake_up_process(wqe->wq->manager);
 }
 
-static void io_wqe_inc_running(struct io_wqe *wqe, struct io_worker *worker)
+static void io_wqe_inc_running(struct io_worker *worker)
 {
-       struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+       struct io_wqe_acct *acct = io_wqe_get_acct(worker);
 
        atomic_inc(&acct->nr_running);
 }
 
-static void io_wqe_dec_running(struct io_wqe *wqe, struct io_worker *worker)
+static void io_wqe_dec_running(struct io_worker *worker)
        __must_hold(wqe->lock)
 {
-       struct io_wqe_acct *acct = io_wqe_get_acct(wqe, worker);
+       struct io_wqe_acct *acct = io_wqe_get_acct(worker);
+       struct io_wqe *wqe = worker->wqe;
 
        if (atomic_dec_and_test(&acct->nr_running) && io_wqe_run_queue(wqe))
                io_wqe_wake_worker(wqe, acct);
 }
 
-static void io_worker_start(struct io_wqe *wqe, struct io_worker *worker)
-{
-       allow_kernel_signal(SIGINT);
-
-       current->flags |= PF_IO_WORKER;
-       current->fs = NULL;
-       current->files = NULL;
-
-       worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
-       worker->restore_nsproxy = current->nsproxy;
-       io_wqe_inc_running(wqe, worker);
-}
-
 /*
  * Worker will start processing some work. Move it to the busy list, if
  * it's currently on the freelist
@@ -357,19 +292,17 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
        worker_bound = (worker->flags & IO_WORKER_F_BOUND) != 0;
        work_bound = (work->flags & IO_WQ_WORK_UNBOUND) == 0;
        if (worker_bound != work_bound) {
-               io_wqe_dec_running(wqe, worker);
+               io_wqe_dec_running(worker);
                if (work_bound) {
                        worker->flags |= IO_WORKER_F_BOUND;
                        wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers--;
                        wqe->acct[IO_WQ_ACCT_BOUND].nr_workers++;
-                       atomic_dec(&wqe->wq->user->processes);
                } else {
                        worker->flags &= ~IO_WORKER_F_BOUND;
                        wqe->acct[IO_WQ_ACCT_UNBOUND].nr_workers++;
                        wqe->acct[IO_WQ_ACCT_BOUND].nr_workers--;
-                       atomic_inc(&wqe->wq->user->processes);
                }
-               io_wqe_inc_running(wqe, worker);
+               io_wqe_inc_running(worker);
         }
 }
 
@@ -380,15 +313,13 @@ static void __io_worker_busy(struct io_wqe *wqe, struct io_worker *worker,
  * retry the loop in that case (we changed task state), we don't regrab
  * the lock if we return success.
  */
-static bool __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
+static void __io_worker_idle(struct io_wqe *wqe, struct io_worker *worker)
        __must_hold(wqe->lock)
 {
        if (!(worker->flags & IO_WORKER_F_FREE)) {
                worker->flags |= IO_WORKER_F_FREE;
                hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
        }
-
-       return __io_worker_unuse(wqe, worker);
 }
 
 static inline unsigned int io_get_work_hash(struct io_wq_work *work)
@@ -396,14 +327,31 @@ static inline unsigned int io_get_work_hash(struct io_wq_work *work)
        return work->flags >> IO_WQ_HASH_SHIFT;
 }
 
+static void io_wait_on_hash(struct io_wqe *wqe, unsigned int hash)
+{
+       struct io_wq *wq = wqe->wq;
+
+       spin_lock(&wq->hash->wait.lock);
+       if (list_empty(&wqe->wait.entry)) {
+               __add_wait_queue(&wq->hash->wait, &wqe->wait);
+               if (!test_bit(hash, &wq->hash->map)) {
+                       __set_current_state(TASK_RUNNING);
+                       list_del_init(&wqe->wait.entry);
+               }
+       }
+       spin_unlock(&wq->hash->wait.lock);
+}
+
 static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
        __must_hold(wqe->lock)
 {
        struct io_wq_work_node *node, *prev;
        struct io_wq_work *work, *tail;
-       unsigned int hash;
+       unsigned int stall_hash = -1U;
 
        wq_list_for_each(node, prev, &wqe->work_list) {
+               unsigned int hash;
+
                work = container_of(node, struct io_wq_work, list);
 
                /* not hashed, can run anytime */
@@ -412,111 +360,49 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe)
                        return work;
                }
 
-               /* hashed, can run if not already running */
                hash = io_get_work_hash(work);
-               if (!(wqe->hash_map & BIT(hash))) {
-                       wqe->hash_map |= BIT(hash);
-                       /* all items with this hash lie in [work, tail] */
-                       tail = wqe->hash_tail[hash];
+               /* all items with this hash lie in [work, tail] */
+               tail = wqe->hash_tail[hash];
+
+               /* hashed, can run if not already running */
+               if (!test_and_set_bit(hash, &wqe->wq->hash->map)) {
                        wqe->hash_tail[hash] = NULL;
                        wq_list_cut(&wqe->work_list, &tail->list, prev);
                        return work;
                }
+               if (stall_hash == -1U)
+                       stall_hash = hash;
+               /* fast forward to a next hash, for-each will fix up @prev */
+               node = &tail->list;
        }
 
-       return NULL;
-}
-
-static void io_wq_switch_mm(struct io_worker *worker, struct io_wq_work *work)
-{
-       if (worker->mm) {
-               kthread_unuse_mm(worker->mm);
-               mmput(worker->mm);
-               worker->mm = NULL;
-       }
-
-       if (mmget_not_zero(work->identity->mm)) {
-               kthread_use_mm(work->identity->mm);
-               worker->mm = work->identity->mm;
-               return;
+       if (stall_hash != -1U) {
+               raw_spin_unlock(&wqe->lock);
+               io_wait_on_hash(wqe, stall_hash);
+               raw_spin_lock(&wqe->lock);
        }
 
-       /* failed grabbing mm, ensure work gets cancelled */
-       work->flags |= IO_WQ_WORK_CANCEL;
-}
-
-static inline void io_wq_switch_blkcg(struct io_worker *worker,
-                                     struct io_wq_work *work)
-{
-#ifdef CONFIG_BLK_CGROUP
-       if (!(work->flags & IO_WQ_WORK_BLKCG))
-               return;
-       if (work->identity->blkcg_css != worker->blkcg_css) {
-               kthread_associate_blkcg(work->identity->blkcg_css);
-               worker->blkcg_css = work->identity->blkcg_css;
-       }
-#endif
-}
-
-static void io_wq_switch_creds(struct io_worker *worker,
-                              struct io_wq_work *work)
-{
-       const struct cred *old_creds = override_creds(work->identity->creds);
-
-       worker->cur_creds = work->identity->creds;
-       if (worker->saved_creds)
-               put_cred(old_creds); /* creds set by previous switch */
-       else
-               worker->saved_creds = old_creds;
+       return NULL;
 }
 
-static void io_impersonate_work(struct io_worker *worker,
-                               struct io_wq_work *work)
+static bool io_flush_signals(void)
 {
-       if ((work->flags & IO_WQ_WORK_FILES) &&
-           current->files != work->identity->files) {
-               task_lock(current);
-               current->files = work->identity->files;
-               current->nsproxy = work->identity->nsproxy;
-               task_unlock(current);
-               if (!work->identity->files) {
-                       /* failed grabbing files, ensure work gets cancelled */
-                       work->flags |= IO_WQ_WORK_CANCEL;
-               }
+       if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) {
+               __set_current_state(TASK_RUNNING);
+               tracehook_notify_signal();
+               return true;
        }
-       if ((work->flags & IO_WQ_WORK_FS) && current->fs != work->identity->fs)
-               current->fs = work->identity->fs;
-       if ((work->flags & IO_WQ_WORK_MM) && work->identity->mm != worker->mm)
-               io_wq_switch_mm(worker, work);
-       if ((work->flags & IO_WQ_WORK_CREDS) &&
-           worker->cur_creds != work->identity->creds)
-               io_wq_switch_creds(worker, work);
-       if (work->flags & IO_WQ_WORK_FSIZE)
-               current->signal->rlim[RLIMIT_FSIZE].rlim_cur = work->identity->fsize;
-       else if (current->signal->rlim[RLIMIT_FSIZE].rlim_cur != RLIM_INFINITY)
-               current->signal->rlim[RLIMIT_FSIZE].rlim_cur = RLIM_INFINITY;
-       io_wq_switch_blkcg(worker, work);
-#ifdef CONFIG_AUDIT
-       current->loginuid = work->identity->loginuid;
-       current->sessionid = work->identity->sessionid;
-#endif
+       return false;
 }
 
 static void io_assign_current_work(struct io_worker *worker,
                                   struct io_wq_work *work)
 {
        if (work) {
-               /* flush pending signals before assigning new work */
-               if (signal_pending(current))
-                       flush_signals(current);
+               io_flush_signals();
                cond_resched();
        }
 
-#ifdef CONFIG_AUDIT
-       current->loginuid = KUIDT_INIT(AUDIT_UID_UNSET);
-       current->sessionid = AUDIT_SID_UNSET;
-#endif
-
        spin_lock_irq(&worker->lock);
        worker->cur_work = work;
        spin_unlock_irq(&worker->lock);
@@ -550,6 +436,7 @@ get_next:
                if (!work)
                        break;
                io_assign_current_work(worker, work);
+               __set_current_state(TASK_RUNNING);
 
                /* handle a whole dependent link */
                do {
@@ -557,7 +444,6 @@ get_next:
                        unsigned int hash = io_get_work_hash(work);
 
                        next_hashed = wq_next_work(work);
-                       io_impersonate_work(worker, work);
                        wq->do_work(work);
                        io_assign_current_work(worker, NULL);
 
@@ -572,8 +458,10 @@ get_next:
                                io_wqe_enqueue(wqe, linked);
 
                        if (hash != -1U && !next_hashed) {
+                               clear_bit(hash, &wq->hash->map);
+                               if (wq_has_sleeper(&wq->hash->wait))
+                                       wake_up(&wq->hash->wait);
                                raw_spin_lock_irq(&wqe->lock);
-                               wqe->hash_map &= ~BIT_ULL(hash);
                                wqe->flags &= ~IO_WQE_FLAG_STALLED;
                                /* skip unnecessary unlock-lock wqe->lock */
                                if (!work)
@@ -591,27 +479,37 @@ static int io_wqe_worker(void *data)
        struct io_worker *worker = data;
        struct io_wqe *wqe = worker->wqe;
        struct io_wq *wq = wqe->wq;
+       char buf[TASK_COMM_LEN];
 
-       io_worker_start(wqe, worker);
+       worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING);
+       io_wqe_inc_running(worker);
+
+       snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task_pid);
+       set_task_comm(current, buf);
 
        while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) {
+               long ret;
+
                set_current_state(TASK_INTERRUPTIBLE);
 loop:
                raw_spin_lock_irq(&wqe->lock);
                if (io_wqe_run_queue(wqe)) {
-                       __set_current_state(TASK_RUNNING);
                        io_worker_handle_work(worker);
                        goto loop;
                }
-               /* drops the lock on success, retry */
-               if (__io_worker_idle(wqe, worker)) {
-                       __release(&wqe->lock);
-                       goto loop;
-               }
+               __io_worker_idle(wqe, worker);
                raw_spin_unlock_irq(&wqe->lock);
-               if (signal_pending(current))
-                       flush_signals(current);
-               if (schedule_timeout(WORKER_IDLE_TIMEOUT))
+               if (io_flush_signals())
+                       continue;
+               ret = schedule_timeout(WORKER_IDLE_TIMEOUT);
+               if (signal_pending(current)) {
+                       struct ksignal ksig;
+
+                       if (!get_signal(&ksig))
+                               continue;
+                       break;
+               }
+               if (ret)
                        continue;
                /* timed out, exit unless we're the fixed worker */
                if (test_bit(IO_WQ_BIT_EXIT, &wq->state) ||
@@ -636,15 +534,16 @@ loop:
  */
 void io_wq_worker_running(struct task_struct *tsk)
 {
-       struct io_worker *worker = kthread_data(tsk);
-       struct io_wqe *wqe = worker->wqe;
+       struct io_worker *worker = tsk->pf_io_worker;
 
+       if (!worker)
+               return;
        if (!(worker->flags & IO_WORKER_F_UP))
                return;
        if (worker->flags & IO_WORKER_F_RUNNING)
                return;
        worker->flags |= IO_WORKER_F_RUNNING;
-       io_wqe_inc_running(wqe, worker);
+       io_wqe_inc_running(worker);
 }
 
 /*
@@ -654,9 +553,10 @@ void io_wq_worker_running(struct task_struct *tsk)
  */
 void io_wq_worker_sleeping(struct task_struct *tsk)
 {
-       struct io_worker *worker = kthread_data(tsk);
-       struct io_wqe *wqe = worker->wqe;
+       struct io_worker *worker = tsk->pf_io_worker;
 
+       if (!worker)
+               return;
        if (!(worker->flags & IO_WORKER_F_UP))
                return;
        if (!(worker->flags & IO_WORKER_F_RUNNING))
@@ -664,15 +564,18 @@ void io_wq_worker_sleeping(struct task_struct *tsk)
 
        worker->flags &= ~IO_WORKER_F_RUNNING;
 
-       raw_spin_lock_irq(&wqe->lock);
-       io_wqe_dec_running(wqe, worker);
-       raw_spin_unlock_irq(&wqe->lock);
+       raw_spin_lock_irq(&worker->wqe->lock);
+       io_wqe_dec_running(worker);
+       raw_spin_unlock_irq(&worker->wqe->lock);
 }
 
 static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
 {
        struct io_wqe_acct *acct = &wqe->acct[index];
        struct io_worker *worker;
+       struct task_struct *tsk;
+
+       __set_current_state(TASK_RUNNING);
 
        worker = kzalloc_node(sizeof(*worker), GFP_KERNEL, wqe->node);
        if (!worker)
@@ -682,14 +585,22 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
        worker->nulls_node.pprev = NULL;
        worker->wqe = wqe;
        spin_lock_init(&worker->lock);
+       init_completion(&worker->ref_done);
+
+       atomic_inc(&wq->worker_refs);
 
-       worker->task = kthread_create_on_node(io_wqe_worker, worker, wqe->node,
-                               "io_wqe_worker-%d/%d", index, wqe->node);
-       if (IS_ERR(worker->task)) {
+       tsk = create_io_thread(io_wqe_worker, worker, wqe->node);
+       if (IS_ERR(tsk)) {
+               if (atomic_dec_and_test(&wq->worker_refs))
+                       complete(&wq->worker_done);
                kfree(worker);
                return false;
        }
-       kthread_bind_mask(worker->task, cpumask_of_node(wqe->node));
+
+       tsk->pf_io_worker = worker;
+       worker->task = tsk;
+       set_cpus_allowed_ptr(tsk, cpumask_of_node(wqe->node));
+       tsk->flags |= PF_NO_SETAFFINITY;
 
        raw_spin_lock_irq(&wqe->lock);
        hlist_nulls_add_head_rcu(&worker->nulls_node, &wqe->free_list);
@@ -701,12 +612,7 @@ static bool create_io_worker(struct io_wq *wq, struct io_wqe *wqe, int index)
                worker->flags |= IO_WORKER_F_FIXED;
        acct->nr_workers++;
        raw_spin_unlock_irq(&wqe->lock);
-
-       if (index == IO_WQ_ACCT_UNBOUND)
-               atomic_inc(&wq->user->processes);
-
-       refcount_inc(&wq->refs);
-       wake_up_process(worker->task);
+       wake_up_new_task(tsk);
        return true;
 }
 
@@ -715,6 +621,8 @@ static inline bool io_wqe_need_worker(struct io_wqe *wqe, int index)
 {
        struct io_wqe_acct *acct = &wqe->acct[index];
 
+       if (acct->nr_workers && test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state))
+               return false;
        /* if we have available workers or no work, no need */
        if (!hlist_nulls_empty(&wqe->free_list) || !io_wqe_run_queue(wqe))
                return false;
@@ -748,97 +656,96 @@ static bool io_wq_for_each_worker(struct io_wqe *wqe,
 
 static bool io_wq_worker_wake(struct io_worker *worker, void *data)
 {
+       set_notify_signal(worker->task);
        wake_up_process(worker->task);
        return false;
 }
 
-/*
- * Manager thread. Tasked with creating new workers, if we need them.
- */
-static int io_wq_manager(void *data)
+static void io_wq_check_workers(struct io_wq *wq)
 {
-       struct io_wq *wq = data;
        int node;
 
-       /* create fixed workers */
-       refcount_set(&wq->refs, 1);
        for_each_node(node) {
+               struct io_wqe *wqe = wq->wqes[node];
+               bool fork_worker[2] = { false, false };
+
                if (!node_online(node))
                        continue;
-               if (create_io_worker(wq, wq->wqes[node], IO_WQ_ACCT_BOUND))
-                       continue;
-               set_bit(IO_WQ_BIT_ERROR, &wq->state);
-               set_bit(IO_WQ_BIT_EXIT, &wq->state);
-               goto out;
+
+               raw_spin_lock_irq(&wqe->lock);
+               if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
+                       fork_worker[IO_WQ_ACCT_BOUND] = true;
+               if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
+                       fork_worker[IO_WQ_ACCT_UNBOUND] = true;
+               raw_spin_unlock_irq(&wqe->lock);
+               if (fork_worker[IO_WQ_ACCT_BOUND])
+                       create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
+               if (fork_worker[IO_WQ_ACCT_UNBOUND])
+                       create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
        }
+}
 
-       complete(&wq->done);
+static bool io_wq_work_match_all(struct io_wq_work *work, void *data)
+{
+       return true;
+}
 
-       while (!kthread_should_stop()) {
-               if (current->task_works)
-                       task_work_run();
+static void io_wq_cancel_pending(struct io_wq *wq)
+{
+       struct io_cb_cancel_data match = {
+               .fn             = io_wq_work_match_all,
+               .cancel_all     = true,
+       };
+       int node;
 
-               for_each_node(node) {
-                       struct io_wqe *wqe = wq->wqes[node];
-                       bool fork_worker[2] = { false, false };
+       for_each_node(node)
+               io_wqe_cancel_pending_work(wq->wqes[node], &match);
+}
 
-                       if (!node_online(node))
-                               continue;
+/*
+ * Manager thread. Tasked with creating new workers, if we need them.
+ */
+static int io_wq_manager(void *data)
+{
+       struct io_wq *wq = data;
+       char buf[TASK_COMM_LEN];
+       int node;
 
-                       raw_spin_lock_irq(&wqe->lock);
-                       if (io_wqe_need_worker(wqe, IO_WQ_ACCT_BOUND))
-                               fork_worker[IO_WQ_ACCT_BOUND] = true;
-                       if (io_wqe_need_worker(wqe, IO_WQ_ACCT_UNBOUND))
-                               fork_worker[IO_WQ_ACCT_UNBOUND] = true;
-                       raw_spin_unlock_irq(&wqe->lock);
-                       if (fork_worker[IO_WQ_ACCT_BOUND])
-                               create_io_worker(wq, wqe, IO_WQ_ACCT_BOUND);
-                       if (fork_worker[IO_WQ_ACCT_UNBOUND])
-                               create_io_worker(wq, wqe, IO_WQ_ACCT_UNBOUND);
-               }
+       snprintf(buf, sizeof(buf), "iou-mgr-%d", wq->task_pid);
+       set_task_comm(current, buf);
+
+       do {
                set_current_state(TASK_INTERRUPTIBLE);
+               io_wq_check_workers(wq);
                schedule_timeout(HZ);
-       }
-
-       if (current->task_works)
-               task_work_run();
-
-out:
-       if (refcount_dec_and_test(&wq->refs)) {
-               complete(&wq->done);
-               return 0;
-       }
-       /* if ERROR is set and we get here, we have workers to wake */
-       if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
-               rcu_read_lock();
-               for_each_node(node)
-                       io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
-               rcu_read_unlock();
-       }
-       return 0;
-}
+               if (signal_pending(current)) {
+                       struct ksignal ksig;
 
-static bool io_wq_can_queue(struct io_wqe *wqe, struct io_wqe_acct *acct,
-                           struct io_wq_work *work)
-{
-       bool free_worker;
+                       if (!get_signal(&ksig))
+                               continue;
+                       set_bit(IO_WQ_BIT_EXIT, &wq->state);
+               }
+       } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state));
 
-       if (!(work->flags & IO_WQ_WORK_UNBOUND))
-               return true;
-       if (atomic_read(&acct->nr_running))
-               return true;
+       io_wq_check_workers(wq);
 
        rcu_read_lock();
-       free_worker = !hlist_nulls_empty(&wqe->free_list);
+       for_each_node(node)
+               io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
        rcu_read_unlock();
-       if (free_worker)
-               return true;
 
-       if (atomic_read(&wqe->wq->user->processes) >= acct->max_workers &&
-           !(capable(CAP_SYS_RESOURCE) || capable(CAP_SYS_ADMIN)))
-               return false;
+       if (atomic_dec_and_test(&wq->worker_refs))
+               complete(&wq->worker_done);
+       wait_for_completion(&wq->worker_done);
 
-       return true;
+       spin_lock_irq(&wq->hash->wait.lock);
+       for_each_node(node)
+               list_del_init(&wq->wqes[node]->wait.entry);
+       spin_unlock_irq(&wq->hash->wait.lock);
+
+       io_wq_cancel_pending(wq);
+       complete(&wq->exited);
+       do_exit(0);
 }
 
 static void io_run_cancel(struct io_wq_work *work, struct io_wqe *wqe)
@@ -872,19 +779,39 @@ append:
        wq_list_add_after(&work->list, &tail->list, &wqe->work_list);
 }
 
+static int io_wq_fork_manager(struct io_wq *wq)
+{
+       struct task_struct *tsk;
+
+       if (wq->manager)
+               return 0;
+
+       WARN_ON_ONCE(test_bit(IO_WQ_BIT_EXIT, &wq->state));
+
+       init_completion(&wq->worker_done);
+       atomic_set(&wq->worker_refs, 1);
+       tsk = create_io_thread(io_wq_manager, wq, NUMA_NO_NODE);
+       if (!IS_ERR(tsk)) {
+               wq->manager = get_task_struct(tsk);
+               wake_up_new_task(tsk);
+               return 0;
+       }
+
+       if (atomic_dec_and_test(&wq->worker_refs))
+               complete(&wq->worker_done);
+
+       return PTR_ERR(tsk);
+}
+
 static void io_wqe_enqueue(struct io_wqe *wqe, struct io_wq_work *work)
 {
        struct io_wqe_acct *acct = io_work_get_acct(wqe, work);
        int work_flags;
        unsigned long flags;
 
-       /*
-        * Do early check to see if we need a new unbound worker, and if we do,
-        * if we're allowed to do so. This isn't 100% accurate as there's a
-        * gap between this check and incrementing the value, but that's OK.
-        * It's close enough to not be an issue, fork() has the same delay.
-        */
-       if (unlikely(!io_wq_can_queue(wqe, acct, work))) {
+       /* Can only happen if manager creation fails after exec */
+       if (io_wq_fork_manager(wqe->wq) ||
+           test_bit(IO_WQ_BIT_EXIT, &wqe->wq->state)) {
                io_run_cancel(work, wqe);
                return;
        }
@@ -919,14 +846,6 @@ void io_wq_hash_work(struct io_wq_work *work, void *val)
        work->flags |= (IO_WQ_WORK_HASHED | (bit << IO_WQ_HASH_SHIFT));
 }
 
-struct io_cb_cancel_data {
-       work_cancel_fn *fn;
-       void *data;
-       int nr_running;
-       int nr_pending;
-       bool cancel_all;
-};
-
 static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
 {
        struct io_cb_cancel_data *match = data;
@@ -939,7 +858,7 @@ static bool io_wq_worker_cancel(struct io_worker *worker, void *data)
        spin_lock_irqsave(&worker->lock, flags);
        if (worker->cur_work &&
            match->fn(worker->cur_work, match->data)) {
-               send_sig(SIGINT, worker->task, 1);
+               set_notify_signal(worker->task);
                match->nr_running++;
        }
        spin_unlock_irqrestore(&worker->lock, flags);
@@ -1043,6 +962,24 @@ enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
        return IO_WQ_CANCEL_NOTFOUND;
 }
 
+static int io_wqe_hash_wake(struct wait_queue_entry *wait, unsigned mode,
+                           int sync, void *key)
+{
+       struct io_wqe *wqe = container_of(wait, struct io_wqe, wait);
+       int ret;
+
+       list_del_init(&wait->entry);
+
+       rcu_read_lock();
+       ret = io_wqe_activate_free_worker(wqe);
+       rcu_read_unlock();
+
+       if (!ret)
+               wake_up_process(wqe->wq->manager);
+
+       return 1;
+}
+
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
 {
        int ret = -ENOMEM, node;
@@ -1063,12 +1000,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
        if (ret)
                goto err_wqes;
 
+       refcount_inc(&data->hash->refs);
+       wq->hash = data->hash;
        wq->free_work = data->free_work;
        wq->do_work = data->do_work;
 
-       /* caller must already hold a reference to this */
-       wq->user = data->user;
-
        ret = -ENOMEM;
        for_each_node(node) {
                struct io_wqe *wqe;
@@ -1083,11 +1019,11 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
                wqe->node = alloc_node;
                wqe->acct[IO_WQ_ACCT_BOUND].max_workers = bounded;
                atomic_set(&wqe->acct[IO_WQ_ACCT_BOUND].nr_running, 0);
-               if (wq->user) {
-                       wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
+               wqe->acct[IO_WQ_ACCT_UNBOUND].max_workers =
                                        task_rlimit(current, RLIMIT_NPROC);
-               }
                atomic_set(&wqe->acct[IO_WQ_ACCT_UNBOUND].nr_running, 0);
+               wqe->wait.func = io_wqe_hash_wake;
+               INIT_LIST_HEAD(&wqe->wait.entry);
                wqe->wq = wq;
                raw_spin_lock_init(&wqe->lock);
                INIT_WQ_LIST(&wqe->work_list);
@@ -1095,24 +1031,15 @@ struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data)
                INIT_LIST_HEAD(&wqe->all_list);
        }
 
-       init_completion(&wq->done);
+       wq->task_pid = current->pid;
+       init_completion(&wq->exited);
+       refcount_set(&wq->refs, 1);
 
-       wq->manager = kthread_create(io_wq_manager, wq, "io_wq_manager");
-       if (!IS_ERR(wq->manager)) {
-               wake_up_process(wq->manager);
-               wait_for_completion(&wq->done);
-               if (test_bit(IO_WQ_BIT_ERROR, &wq->state)) {
-                       ret = -ENOMEM;
-                       goto err;
-               }
-               refcount_set(&wq->use_refs, 1);
-               reinit_completion(&wq->done);
+       ret = io_wq_fork_manager(wq);
+       if (!ret)
                return wq;
-       }
-
-       ret = PTR_ERR(wq->manager);
-       complete(&wq->done);
 err:
+       io_wq_put_hash(data->hash);
        cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
        for_each_node(node)
                kfree(wq->wqes[node]);
@@ -1123,46 +1050,50 @@ err_wq:
        return ERR_PTR(ret);
 }
 
-bool io_wq_get(struct io_wq *wq, struct io_wq_data *data)
+static void io_wq_destroy_manager(struct io_wq *wq)
 {
-       if (data->free_work != wq->free_work || data->do_work != wq->do_work)
-               return false;
-
-       return refcount_inc_not_zero(&wq->use_refs);
+       if (wq->manager) {
+               wake_up_process(wq->manager);
+               wait_for_completion(&wq->exited);
+               put_task_struct(wq->manager);
+               wq->manager = NULL;
+       }
 }
 
-static void __io_wq_destroy(struct io_wq *wq)
+static void io_wq_destroy(struct io_wq *wq)
 {
        int node;
 
        cpuhp_state_remove_instance_nocalls(io_wq_online, &wq->cpuhp_node);
 
        set_bit(IO_WQ_BIT_EXIT, &wq->state);
-       if (wq->manager)
-               kthread_stop(wq->manager);
-
-       rcu_read_lock();
-       for_each_node(node)
-               io_wq_for_each_worker(wq->wqes[node], io_wq_worker_wake, NULL);
-       rcu_read_unlock();
-
-       wait_for_completion(&wq->done);
+       io_wq_destroy_manager(wq);
 
-       for_each_node(node)
-               kfree(wq->wqes[node]);
+       for_each_node(node) {
+               struct io_wqe *wqe = wq->wqes[node];
+               struct io_cb_cancel_data match = {
+                       .fn             = io_wq_work_match_all,
+                       .cancel_all     = true,
+               };
+               io_wqe_cancel_pending_work(wqe, &match);
+               kfree(wqe);
+       }
+       io_wq_put_hash(wq->hash);
        kfree(wq->wqes);
        kfree(wq);
 }
 
-void io_wq_destroy(struct io_wq *wq)
+void io_wq_put(struct io_wq *wq)
 {
-       if (refcount_dec_and_test(&wq->use_refs))
-               __io_wq_destroy(wq);
+       if (refcount_dec_and_test(&wq->refs))
+               io_wq_destroy(wq);
 }
 
-struct task_struct *io_wq_get_task(struct io_wq *wq)
+void io_wq_put_and_exit(struct io_wq *wq)
 {
-       return wq->manager;
+       set_bit(IO_WQ_BIT_EXIT, &wq->state);
+       io_wq_destroy_manager(wq);
+       io_wq_put(wq);
 }
 
 static bool io_wq_worker_affinity(struct io_worker *worker, void *data)
index 096f102..80d5905 100644 (file)
@@ -1,7 +1,7 @@
 #ifndef INTERNAL_IO_WQ_H
 #define INTERNAL_IO_WQ_H
 
-#include <linux/io_uring.h>
+#include <linux/refcount.h>
 
 struct io_wq;
 
@@ -11,13 +11,6 @@ enum {
        IO_WQ_WORK_UNBOUND      = 4,
        IO_WQ_WORK_CONCURRENT   = 16,
 
-       IO_WQ_WORK_FILES        = 32,
-       IO_WQ_WORK_FS           = 64,
-       IO_WQ_WORK_MM           = 128,
-       IO_WQ_WORK_CREDS        = 256,
-       IO_WQ_WORK_BLKCG        = 512,
-       IO_WQ_WORK_FSIZE        = 1024,
-
        IO_WQ_HASH_SHIFT        = 24,   /* upper 8 bits are used for hash key */
 };
 
@@ -27,6 +20,15 @@ enum io_wq_cancel {
        IO_WQ_CANCEL_NOTFOUND,  /* work not found */
 };
 
+struct io_wq_work_node {
+       struct io_wq_work_node *next;
+};
+
+struct io_wq_work_list {
+       struct io_wq_work_node *first;
+       struct io_wq_work_node *last;
+};
+
 static inline void wq_list_add_after(struct io_wq_work_node *node,
                                     struct io_wq_work_node *pos,
                                     struct io_wq_work_list *list)
@@ -85,7 +87,7 @@ static inline void wq_list_del(struct io_wq_work_list *list,
 
 struct io_wq_work {
        struct io_wq_work_node list;
-       struct io_identity *identity;
+       const struct cred *creds;
        unsigned flags;
 };
 
@@ -100,16 +102,27 @@ static inline struct io_wq_work *wq_next_work(struct io_wq_work *work)
 typedef struct io_wq_work *(free_work_fn)(struct io_wq_work *);
 typedef void (io_wq_work_fn)(struct io_wq_work *);
 
-struct io_wq_data {
-       struct user_struct *user;
+struct io_wq_hash {
+       refcount_t refs;
+       unsigned long map;
+       struct wait_queue_head wait;
+};
+
+static inline void io_wq_put_hash(struct io_wq_hash *hash)
+{
+       if (refcount_dec_and_test(&hash->refs))
+               kfree(hash);
+}
 
+struct io_wq_data {
+       struct io_wq_hash *hash;
        io_wq_work_fn *do_work;
        free_work_fn *free_work;
 };
 
 struct io_wq *io_wq_create(unsigned bounded, struct io_wq_data *data);
-bool io_wq_get(struct io_wq *wq, struct io_wq_data *data);
-void io_wq_destroy(struct io_wq *wq);
+void io_wq_put(struct io_wq *wq);
+void io_wq_put_and_exit(struct io_wq *wq);
 
 void io_wq_enqueue(struct io_wq *wq, struct io_wq_work *work);
 void io_wq_hash_work(struct io_wq_work *work, void *val);
@@ -124,8 +137,6 @@ typedef bool (work_cancel_fn)(struct io_wq_work *, void *);
 enum io_wq_cancel io_wq_cancel_cb(struct io_wq *wq, work_cancel_fn *cancel,
                                        void *data, bool cancel_all);
 
-struct task_struct *io_wq_get_task(struct io_wq *wq);
-
 #if defined(CONFIG_IO_WQ)
 extern void io_wq_worker_sleeping(struct task_struct *);
 extern void io_wq_worker_running(struct task_struct *);
@@ -140,6 +151,7 @@ static inline void io_wq_worker_running(struct task_struct *tsk)
 
 static inline bool io_wq_current_is_worker(void)
 {
-       return in_task() && (current->flags & PF_IO_WORKER);
+       return in_task() && (current->flags & PF_IO_WORKER) &&
+               current->pf_io_worker;
 }
 #endif
index 14ce789..65a17d5 100644 (file)
@@ -57,7 +57,6 @@
 #include <linux/mman.h>
 #include <linux/percpu.h>
 #include <linux/slab.h>
-#include <linux/kthread.h>
 #include <linux/blkdev.h>
 #include <linux/bvec.h>
 #include <linux/net.h>
 #include <linux/fsnotify.h>
 #include <linux/fadvise.h>
 #include <linux/eventpoll.h>
-#include <linux/fs_struct.h>
 #include <linux/splice.h>
 #include <linux/task_work.h>
 #include <linux/pagemap.h>
 #include <linux/io_uring.h>
-#include <linux/blk-cgroup.h>
-#include <linux/audit.h>
 
 #define CREATE_TRACE_POINTS
 #include <trace/events/io_uring.h>
 #define IORING_MAX_RESTRICTIONS        (IORING_RESTRICTION_LAST + \
                                 IORING_REGISTER_LAST + IORING_OP_LAST)
 
+#define SQE_VALID_FLAGS        (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
+                               IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
+                               IOSQE_BUFFER_SELECT)
+
 struct io_uring {
        u32 head ____cacheline_aligned_in_smp;
        u32 tail ____cacheline_aligned_in_smp;
@@ -232,6 +232,7 @@ struct fixed_rsrc_data {
        struct fixed_rsrc_ref_node      *node;
        struct percpu_ref               refs;
        struct completion               done;
+       bool                            quiesce;
 };
 
 struct io_buffer {
@@ -249,19 +250,30 @@ struct io_restriction {
        bool registered;
 };
 
+enum {
+       IO_SQ_THREAD_SHOULD_STOP = 0,
+       IO_SQ_THREAD_SHOULD_PARK,
+};
+
 struct io_sq_data {
        refcount_t              refs;
+       atomic_t                park_pending;
        struct mutex            lock;
 
        /* ctx's that are using this sqd */
        struct list_head        ctx_list;
-       struct list_head        ctx_new_list;
-       struct mutex            ctx_lock;
 
        struct task_struct      *thread;
        struct wait_queue_head  wait;
 
        unsigned                sq_thread_idle;
+       int                     sq_cpu;
+       pid_t                   task_pid;
+       pid_t                   task_tgid;
+
+       unsigned long           state;
+       struct completion       exited;
+       struct callback_head    *park_task_work;
 };
 
 #define IO_IOPOLL_BATCH                        8
@@ -279,8 +291,14 @@ struct io_comp_state {
        struct list_head        locked_free_list;
 };
 
+struct io_submit_link {
+       struct io_kiocb         *head;
+       struct io_kiocb         *last;
+};
+
 struct io_submit_state {
        struct blk_plug         plug;
+       struct io_submit_link   link;
 
        /*
         * io_kiocb alloc cache
@@ -312,12 +330,10 @@ struct io_ring_ctx {
        struct {
                unsigned int            flags;
                unsigned int            compat: 1;
-               unsigned int            limit_mem: 1;
                unsigned int            cq_overflow_flushed: 1;
                unsigned int            drain_next: 1;
                unsigned int            eventfd_async: 1;
                unsigned int            restricted: 1;
-               unsigned int            sqo_dead: 1;
 
                /*
                 * Ring buffer of indices into array of io_uring_sqe, which is
@@ -339,6 +355,9 @@ struct io_ring_ctx {
                unsigned                cached_cq_overflow;
                unsigned long           sq_check_overflow;
 
+               /* hashed buffered write serialization */
+               struct io_wq_hash       *hash_map;
+
                struct list_head        defer_list;
                struct list_head        timeout_list;
                struct list_head        cq_overflow_list;
@@ -355,22 +374,10 @@ struct io_ring_ctx {
 
        struct io_rings *rings;
 
-       /* IO offload */
-       struct io_wq            *io_wq;
-
-       /*
-        * For SQPOLL usage - we hold a reference to the parent task, so we
-        * have access to the ->files
-        */
-       struct task_struct      *sqo_task;
-
        /* Only used for accounting purposes */
        struct mm_struct        *mm_account;
 
-#ifdef CONFIG_BLK_CGROUP
-       struct cgroup_subsys_state      *sqo_blkcg_css;
-#endif
-
+       const struct cred       *sq_creds;      /* cred used for __io_sq_thread() */
        struct io_sq_data       *sq_data;       /* if using sq thread polling */
 
        struct wait_queue_head  sqo_sq_wait;
@@ -390,23 +397,16 @@ struct io_ring_ctx {
 
        struct user_struct      *user;
 
-       const struct cred       *creds;
-
-#ifdef CONFIG_AUDIT
-       kuid_t                  loginuid;
-       unsigned int            sessionid;
-#endif
-
        struct completion       ref_comp;
-       struct completion       sq_thread_comp;
 
 #if defined(CONFIG_UNIX)
        struct socket           *ring_sock;
 #endif
 
-       struct idr              io_buffer_idr;
+       struct xarray           io_buffers;
 
-       struct idr              personality_idr;
+       struct xarray           personalities;
+       u32                     pers_next;
 
        struct {
                unsigned                cached_cq_tail;
@@ -445,8 +445,30 @@ struct io_ring_ctx {
 
        struct io_restriction           restrictions;
 
+       /* exit task_work */
+       struct callback_head            *exit_task_work;
+
+       struct wait_queue_head          hash_wait;
+
        /* Keep this last, we don't need it for the fast path */
        struct work_struct              exit_work;
+       struct list_head                tctx_list;
+};
+
+struct io_uring_task {
+       /* submission side */
+       struct xarray           xa;
+       struct wait_queue_head  wait;
+       const struct io_ring_ctx *last;
+       struct io_wq            *io_wq;
+       struct percpu_counter   inflight;
+       atomic_t                in_idle;
+       bool                    sqpoll;
+
+       spinlock_t              task_lock;
+       struct io_wq_work_list  task_list;
+       unsigned long           task_state;
+       struct callback_head    task_work;
 };
 
 /*
@@ -673,9 +695,9 @@ enum {
        REQ_F_POLLED_BIT,
        REQ_F_BUFFER_SELECTED_BIT,
        REQ_F_NO_FILE_TABLE_BIT,
-       REQ_F_WORK_INITIALIZED_BIT,
        REQ_F_LTIMEOUT_ACTIVE_BIT,
        REQ_F_COMPLETE_INLINE_BIT,
+       REQ_F_REISSUE_BIT,
 
        /* not a real bit, just to check we're not overflowing the space */
        __REQ_F_LAST_BIT,
@@ -697,7 +719,7 @@ enum {
 
        /* fail rest of links */
        REQ_F_FAIL_LINK         = BIT(REQ_F_FAIL_LINK_BIT),
-       /* on inflight list */
+       /* on inflight list, should be cancelled and waited on exit reliably */
        REQ_F_INFLIGHT          = BIT(REQ_F_INFLIGHT_BIT),
        /* read/write uses file position */
        REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
@@ -715,12 +737,12 @@ enum {
        REQ_F_BUFFER_SELECTED   = BIT(REQ_F_BUFFER_SELECTED_BIT),
        /* doesn't need file table for this request */
        REQ_F_NO_FILE_TABLE     = BIT(REQ_F_NO_FILE_TABLE_BIT),
-       /* io_wq_work is initialized */
-       REQ_F_WORK_INITIALIZED  = BIT(REQ_F_WORK_INITIALIZED_BIT),
        /* linked timeout is active, i.e. prepared by link's head */
        REQ_F_LTIMEOUT_ACTIVE   = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT),
        /* completion is deferred through io_comp_state */
        REQ_F_COMPLETE_INLINE   = BIT(REQ_F_COMPLETE_INLINE_BIT),
+       /* caller should reissue async */
+       REQ_F_REISSUE           = BIT(REQ_F_REISSUE_BIT),
 };
 
 struct async_poll {
@@ -801,6 +823,12 @@ struct io_kiocb {
        struct io_wq_work               work;
 };
 
+struct io_tctx_node {
+       struct list_head        ctx_node;
+       struct task_struct      *task;
+       struct io_ring_ctx      *ctx;
+};
+
 struct io_defer_entry {
        struct list_head        list;
        struct io_kiocb         *req;
@@ -827,7 +855,6 @@ struct io_op_def {
        unsigned                plug : 1;
        /* size of async data needed, if any */
        unsigned short          async_size;
-       unsigned                work_flags;
 };
 
 static const struct io_op_def io_op_defs[] = {
@@ -840,7 +867,6 @@ static const struct io_op_def io_op_defs[] = {
                .needs_async_data       = 1,
                .plug                   = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_WRITEV] = {
                .needs_file             = 1,
@@ -850,12 +876,9 @@ static const struct io_op_def io_op_defs[] = {
                .needs_async_data       = 1,
                .plug                   = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-                                               IO_WQ_WORK_FSIZE,
        },
        [IORING_OP_FSYNC] = {
                .needs_file             = 1,
-               .work_flags             = IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_READ_FIXED] = {
                .needs_file             = 1,
@@ -863,7 +886,6 @@ static const struct io_op_def io_op_defs[] = {
                .pollin                 = 1,
                .plug                   = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
        },
        [IORING_OP_WRITE_FIXED] = {
                .needs_file             = 1,
@@ -872,8 +894,6 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .plug                   = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE |
-                                               IO_WQ_WORK_MM,
        },
        [IORING_OP_POLL_ADD] = {
                .needs_file             = 1,
@@ -882,7 +902,6 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_POLL_REMOVE] = {},
        [IORING_OP_SYNC_FILE_RANGE] = {
                .needs_file             = 1,
-               .work_flags             = IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_SENDMSG] = {
                .needs_file             = 1,
@@ -890,8 +909,6 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .needs_async_data       = 1,
                .async_size             = sizeof(struct io_async_msghdr),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-                                               IO_WQ_WORK_FS,
        },
        [IORING_OP_RECVMSG] = {
                .needs_file             = 1,
@@ -900,29 +917,23 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
                .needs_async_data       = 1,
                .async_size             = sizeof(struct io_async_msghdr),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-                                               IO_WQ_WORK_FS,
        },
        [IORING_OP_TIMEOUT] = {
                .needs_async_data       = 1,
                .async_size             = sizeof(struct io_timeout_data),
-               .work_flags             = IO_WQ_WORK_MM,
        },
        [IORING_OP_TIMEOUT_REMOVE] = {
                /* used by timeout updates' prep() */
-               .work_flags             = IO_WQ_WORK_MM,
        },
        [IORING_OP_ACCEPT] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollin                 = 1,
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_FILES,
        },
        [IORING_OP_ASYNC_CANCEL] = {},
        [IORING_OP_LINK_TIMEOUT] = {
                .needs_async_data       = 1,
                .async_size             = sizeof(struct io_timeout_data),
-               .work_flags             = IO_WQ_WORK_MM,
        },
        [IORING_OP_CONNECT] = {
                .needs_file             = 1,
@@ -930,26 +941,14 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .needs_async_data       = 1,
                .async_size             = sizeof(struct io_async_connect),
-               .work_flags             = IO_WQ_WORK_MM,
        },
        [IORING_OP_FALLOCATE] = {
                .needs_file             = 1,
-               .work_flags             = IO_WQ_WORK_BLKCG | IO_WQ_WORK_FSIZE,
-       },
-       [IORING_OP_OPENAT] = {
-               .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG |
-                                               IO_WQ_WORK_FS | IO_WQ_WORK_MM,
-       },
-       [IORING_OP_CLOSE] = {
-               .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_BLKCG,
-       },
-       [IORING_OP_FILES_UPDATE] = {
-               .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_MM,
-       },
-       [IORING_OP_STATX] = {
-               .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_MM |
-                                               IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
        },
+       [IORING_OP_OPENAT] = {},
+       [IORING_OP_CLOSE] = {},
+       [IORING_OP_FILES_UPDATE] = {},
+       [IORING_OP_STATX] = {},
        [IORING_OP_READ] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
@@ -957,7 +956,6 @@ static const struct io_op_def io_op_defs[] = {
                .buffer_select          = 1,
                .plug                   = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_WRITE] = {
                .needs_file             = 1,
@@ -965,42 +963,31 @@ static const struct io_op_def io_op_defs[] = {
                .pollout                = 1,
                .plug                   = 1,
                .async_size             = sizeof(struct io_async_rw),
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG |
-                                               IO_WQ_WORK_FSIZE,
        },
        [IORING_OP_FADVISE] = {
                .needs_file             = 1,
-               .work_flags             = IO_WQ_WORK_BLKCG,
-       },
-       [IORING_OP_MADVISE] = {
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
        },
+       [IORING_OP_MADVISE] = {},
        [IORING_OP_SEND] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollout                = 1,
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_RECV] = {
                .needs_file             = 1,
                .unbound_nonreg_file    = 1,
                .pollin                 = 1,
                .buffer_select          = 1,
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_OPENAT2] = {
-               .work_flags             = IO_WQ_WORK_FILES | IO_WQ_WORK_FS |
-                                               IO_WQ_WORK_BLKCG | IO_WQ_WORK_MM,
        },
        [IORING_OP_EPOLL_CTL] = {
                .unbound_nonreg_file    = 1,
-               .work_flags             = IO_WQ_WORK_FILES,
        },
        [IORING_OP_SPLICE] = {
                .needs_file             = 1,
                .hash_reg_file          = 1,
                .unbound_nonreg_file    = 1,
-               .work_flags             = IO_WQ_WORK_BLKCG,
        },
        [IORING_OP_PROVIDE_BUFFERS] = {},
        [IORING_OP_REMOVE_BUFFERS] = {},
@@ -1012,24 +999,20 @@ static const struct io_op_def io_op_defs[] = {
        [IORING_OP_SHUTDOWN] = {
                .needs_file             = 1,
        },
-       [IORING_OP_RENAMEAT] = {
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
-                                               IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
-       },
-       [IORING_OP_UNLINKAT] = {
-               .work_flags             = IO_WQ_WORK_MM | IO_WQ_WORK_FILES |
-                                               IO_WQ_WORK_FS | IO_WQ_WORK_BLKCG,
-       },
+       [IORING_OP_RENAMEAT] = {},
+       [IORING_OP_UNLINKAT] = {},
 };
 
+static bool io_disarm_next(struct io_kiocb *req);
+static void io_uring_del_task_file(unsigned long index);
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                                         struct task_struct *task,
                                         struct files_struct *files);
+static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx);
 static void destroy_fixed_rsrc_ref_node(struct fixed_rsrc_ref_node *ref_node);
 static struct fixed_rsrc_ref_node *alloc_fixed_rsrc_ref_node(
                        struct io_ring_ctx *ctx);
-static void init_fixed_file_ref_node(struct io_ring_ctx *ctx,
-                                    struct fixed_rsrc_ref_node *ref_node);
+static void io_ring_file_put(struct io_ring_ctx *ctx, struct io_rsrc_put *prsrc);
 
 static bool io_rw_reissue(struct io_kiocb *req);
 static void io_cqring_fill_event(struct io_kiocb *req, long res);
@@ -1112,190 +1095,18 @@ static bool io_match_task(struct io_kiocb *head,
                return true;
 
        io_for_each_link(req, head) {
-               if (!(req->flags & REQ_F_WORK_INITIALIZED))
-                       continue;
-               if (req->file && req->file->f_op == &io_uring_fops)
-                       return true;
-               if ((req->work.flags & IO_WQ_WORK_FILES) &&
-                   req->work.identity->files == files)
+               if (req->flags & REQ_F_INFLIGHT)
                        return true;
        }
        return false;
 }
 
-static void io_sq_thread_drop_mm_files(void)
-{
-       struct files_struct *files = current->files;
-       struct mm_struct *mm = current->mm;
-
-       if (mm) {
-               kthread_unuse_mm(mm);
-               mmput(mm);
-               current->mm = NULL;
-       }
-       if (files) {
-               struct nsproxy *nsproxy = current->nsproxy;
-
-               task_lock(current);
-               current->files = NULL;
-               current->nsproxy = NULL;
-               task_unlock(current);
-               put_files_struct(files);
-               put_nsproxy(nsproxy);
-       }
-}
-
-static int __io_sq_thread_acquire_files(struct io_ring_ctx *ctx)
-{
-       if (!current->files) {
-               struct files_struct *files;
-               struct nsproxy *nsproxy;
-
-               task_lock(ctx->sqo_task);
-               files = ctx->sqo_task->files;
-               if (!files) {
-                       task_unlock(ctx->sqo_task);
-                       return -EOWNERDEAD;
-               }
-               atomic_inc(&files->count);
-               get_nsproxy(ctx->sqo_task->nsproxy);
-               nsproxy = ctx->sqo_task->nsproxy;
-               task_unlock(ctx->sqo_task);
-
-               task_lock(current);
-               current->files = files;
-               current->nsproxy = nsproxy;
-               task_unlock(current);
-       }
-       return 0;
-}
-
-static int __io_sq_thread_acquire_mm(struct io_ring_ctx *ctx)
-{
-       struct mm_struct *mm;
-
-       if (current->mm)
-               return 0;
-
-       task_lock(ctx->sqo_task);
-       mm = ctx->sqo_task->mm;
-       if (unlikely(!mm || !mmget_not_zero(mm)))
-               mm = NULL;
-       task_unlock(ctx->sqo_task);
-
-       if (mm) {
-               kthread_use_mm(mm);
-               return 0;
-       }
-
-       return -EFAULT;
-}
-
-static int __io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
-                                          struct io_kiocb *req)
-{
-       const struct io_op_def *def = &io_op_defs[req->opcode];
-       int ret;
-
-       if (def->work_flags & IO_WQ_WORK_MM) {
-               ret = __io_sq_thread_acquire_mm(ctx);
-               if (unlikely(ret))
-                       return ret;
-       }
-
-       if (def->needs_file || (def->work_flags & IO_WQ_WORK_FILES)) {
-               ret = __io_sq_thread_acquire_files(ctx);
-               if (unlikely(ret))
-                       return ret;
-       }
-
-       return 0;
-}
-
-static inline int io_sq_thread_acquire_mm_files(struct io_ring_ctx *ctx,
-                                               struct io_kiocb *req)
-{
-       if (!(ctx->flags & IORING_SETUP_SQPOLL))
-               return 0;
-       return __io_sq_thread_acquire_mm_files(ctx, req);
-}
-
-static void io_sq_thread_associate_blkcg(struct io_ring_ctx *ctx,
-                                        struct cgroup_subsys_state **cur_css)
-
-{
-#ifdef CONFIG_BLK_CGROUP
-       /* puts the old one when swapping */
-       if (*cur_css != ctx->sqo_blkcg_css) {
-               kthread_associate_blkcg(ctx->sqo_blkcg_css);
-               *cur_css = ctx->sqo_blkcg_css;
-       }
-#endif
-}
-
-static void io_sq_thread_unassociate_blkcg(void)
-{
-#ifdef CONFIG_BLK_CGROUP
-       kthread_associate_blkcg(NULL);
-#endif
-}
-
 static inline void req_set_fail_links(struct io_kiocb *req)
 {
        if ((req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) == REQ_F_LINK)
                req->flags |= REQ_F_FAIL_LINK;
 }
 
-/*
- * None of these are dereferenced, they are simply used to check if any of
- * them have changed. If we're under current and check they are still the
- * same, we're fine to grab references to them for actual out-of-line use.
- */
-static void io_init_identity(struct io_identity *id)
-{
-       id->files = current->files;
-       id->mm = current->mm;
-#ifdef CONFIG_BLK_CGROUP
-       rcu_read_lock();
-       id->blkcg_css = blkcg_css();
-       rcu_read_unlock();
-#endif
-       id->creds = current_cred();
-       id->nsproxy = current->nsproxy;
-       id->fs = current->fs;
-       id->fsize = rlimit(RLIMIT_FSIZE);
-#ifdef CONFIG_AUDIT
-       id->loginuid = current->loginuid;
-       id->sessionid = current->sessionid;
-#endif
-       refcount_set(&id->count, 1);
-}
-
-static inline void __io_req_init_async(struct io_kiocb *req)
-{
-       memset(&req->work, 0, sizeof(req->work));
-       req->flags |= REQ_F_WORK_INITIALIZED;
-}
-
-/*
- * Note: must call io_req_init_async() for the first time you
- * touch any members of io_wq_work.
- */
-static inline void io_req_init_async(struct io_kiocb *req)
-{
-       struct io_uring_task *tctx = current->io_uring;
-
-       if (req->flags & REQ_F_WORK_INITIALIZED)
-               return;
-
-       __io_req_init_async(req);
-
-       /* Grab a ref if this isn't our static identity */
-       req->work.identity = tctx->identity;
-       if (tctx->identity != &tctx->__identity)
-               refcount_inc(&req->work.identity->count);
-}
-
 static void io_ring_ctx_ref_free(struct percpu_ref *ref)
 {
        struct io_ring_ctx *ctx = container_of(ref, struct io_ring_ctx, refs);
@@ -1342,9 +1153,8 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        init_waitqueue_head(&ctx->cq_wait);
        INIT_LIST_HEAD(&ctx->cq_overflow_list);
        init_completion(&ctx->ref_comp);
-       init_completion(&ctx->sq_thread_comp);
-       idr_init(&ctx->io_buffer_idr);
-       idr_init(&ctx->personality_idr);
+       xa_init_flags(&ctx->io_buffers, XA_FLAGS_ALLOC1);
+       xa_init_flags(&ctx->personalities, XA_FLAGS_ALLOC1);
        mutex_init(&ctx->uring_lock);
        init_waitqueue_head(&ctx->wait);
        spin_lock_init(&ctx->completion_lock);
@@ -1357,6 +1167,7 @@ static struct io_ring_ctx *io_ring_ctx_alloc(struct io_uring_params *p)
        INIT_LIST_HEAD(&ctx->rsrc_ref_list);
        INIT_DELAYED_WORK(&ctx->rsrc_put_work, io_rsrc_put_work);
        init_llist_head(&ctx->rsrc_put_llist);
+       INIT_LIST_HEAD(&ctx->tctx_list);
        INIT_LIST_HEAD(&ctx->submit_state.comp.free_list);
        INIT_LIST_HEAD(&ctx->submit_state.comp.locked_free_list);
        return ctx;
@@ -1378,111 +1189,11 @@ static bool req_need_defer(struct io_kiocb *req, u32 seq)
        return false;
 }
 
-static void io_put_identity(struct io_uring_task *tctx, struct io_kiocb *req)
-{
-       if (req->work.identity == &tctx->__identity)
-               return;
-       if (refcount_dec_and_test(&req->work.identity->count))
-               kfree(req->work.identity);
-}
-
-static void io_req_clean_work(struct io_kiocb *req)
-{
-       if (!(req->flags & REQ_F_WORK_INITIALIZED))
-               return;
-
-       if (req->work.flags & IO_WQ_WORK_MM)
-               mmdrop(req->work.identity->mm);
-#ifdef CONFIG_BLK_CGROUP
-       if (req->work.flags & IO_WQ_WORK_BLKCG)
-               css_put(req->work.identity->blkcg_css);
-#endif
-       if (req->work.flags & IO_WQ_WORK_CREDS)
-               put_cred(req->work.identity->creds);
-       if (req->work.flags & IO_WQ_WORK_FS) {
-               struct fs_struct *fs = req->work.identity->fs;
-
-               spin_lock(&req->work.identity->fs->lock);
-               if (--fs->users)
-                       fs = NULL;
-               spin_unlock(&req->work.identity->fs->lock);
-               if (fs)
-                       free_fs_struct(fs);
-       }
-       if (req->work.flags & IO_WQ_WORK_FILES) {
-               put_files_struct(req->work.identity->files);
-               put_nsproxy(req->work.identity->nsproxy);
-       }
-       if (req->flags & REQ_F_INFLIGHT) {
-               struct io_ring_ctx *ctx = req->ctx;
-               struct io_uring_task *tctx = req->task->io_uring;
-               unsigned long flags;
-
-               spin_lock_irqsave(&ctx->inflight_lock, flags);
-               list_del(&req->inflight_entry);
-               spin_unlock_irqrestore(&ctx->inflight_lock, flags);
-               req->flags &= ~REQ_F_INFLIGHT;
-               if (atomic_read(&tctx->in_idle))
-                       wake_up(&tctx->wait);
-       }
-
-       req->flags &= ~REQ_F_WORK_INITIALIZED;
-       req->work.flags &= ~(IO_WQ_WORK_MM | IO_WQ_WORK_BLKCG | IO_WQ_WORK_FS |
-                            IO_WQ_WORK_CREDS | IO_WQ_WORK_FILES);
-       io_put_identity(req->task->io_uring, req);
-}
-
-/*
- * Create a private copy of io_identity, since some fields don't match
- * the current context.
- */
-static bool io_identity_cow(struct io_kiocb *req)
-{
-       struct io_uring_task *tctx = current->io_uring;
-       const struct cred *creds = NULL;
-       struct io_identity *id;
-
-       if (req->work.flags & IO_WQ_WORK_CREDS)
-               creds = req->work.identity->creds;
-
-       id = kmemdup(req->work.identity, sizeof(*id), GFP_KERNEL);
-       if (unlikely(!id)) {
-               req->work.flags |= IO_WQ_WORK_CANCEL;
-               return false;
-       }
-
-       /*
-        * We can safely just re-init the creds we copied  Either the field
-        * matches the current one, or we haven't grabbed it yet. The only
-        * exception is ->creds, through registered personalities, so handle
-        * that one separately.
-        */
-       io_init_identity(id);
-       if (creds)
-               id->creds = creds;
-
-       /* add one for this request */
-       refcount_inc(&id->count);
-
-       /* drop tctx and req identity references, if needed */
-       if (tctx->identity != &tctx->__identity &&
-           refcount_dec_and_test(&tctx->identity->count))
-               kfree(tctx->identity);
-       if (req->work.identity != &tctx->__identity &&
-           refcount_dec_and_test(&req->work.identity->count))
-               kfree(req->work.identity);
-
-       req->work.identity = id;
-       tctx->identity = id;
-       return true;
-}
-
 static void io_req_track_inflight(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
        if (!(req->flags & REQ_F_INFLIGHT)) {
-               io_req_init_async(req);
                req->flags |= REQ_F_INFLIGHT;
 
                spin_lock_irq(&ctx->inflight_lock);
@@ -1491,85 +1202,13 @@ static void io_req_track_inflight(struct io_kiocb *req)
        }
 }
 
-static bool io_grab_identity(struct io_kiocb *req)
-{
-       const struct io_op_def *def = &io_op_defs[req->opcode];
-       struct io_identity *id = req->work.identity;
-
-       if (def->work_flags & IO_WQ_WORK_FSIZE) {
-               if (id->fsize != rlimit(RLIMIT_FSIZE))
-                       return false;
-               req->work.flags |= IO_WQ_WORK_FSIZE;
-       }
-#ifdef CONFIG_BLK_CGROUP
-       if (!(req->work.flags & IO_WQ_WORK_BLKCG) &&
-           (def->work_flags & IO_WQ_WORK_BLKCG)) {
-               rcu_read_lock();
-               if (id->blkcg_css != blkcg_css()) {
-                       rcu_read_unlock();
-                       return false;
-               }
-               /*
-                * This should be rare, either the cgroup is dying or the task
-                * is moving cgroups. Just punt to root for the handful of ios.
-                */
-               if (css_tryget_online(id->blkcg_css))
-                       req->work.flags |= IO_WQ_WORK_BLKCG;
-               rcu_read_unlock();
-       }
-#endif
-       if (!(req->work.flags & IO_WQ_WORK_CREDS)) {
-               if (id->creds != current_cred())
-                       return false;
-               get_cred(id->creds);
-               req->work.flags |= IO_WQ_WORK_CREDS;
-       }
-#ifdef CONFIG_AUDIT
-       if (!uid_eq(current->loginuid, id->loginuid) ||
-           current->sessionid != id->sessionid)
-               return false;
-#endif
-       if (!(req->work.flags & IO_WQ_WORK_FS) &&
-           (def->work_flags & IO_WQ_WORK_FS)) {
-               if (current->fs != id->fs)
-                       return false;
-               spin_lock(&id->fs->lock);
-               if (!id->fs->in_exec) {
-                       id->fs->users++;
-                       req->work.flags |= IO_WQ_WORK_FS;
-               } else {
-                       req->work.flags |= IO_WQ_WORK_CANCEL;
-               }
-               spin_unlock(&current->fs->lock);
-       }
-       if (!(req->work.flags & IO_WQ_WORK_FILES) &&
-           (def->work_flags & IO_WQ_WORK_FILES) &&
-           !(req->flags & REQ_F_NO_FILE_TABLE)) {
-               if (id->files != current->files ||
-                   id->nsproxy != current->nsproxy)
-                       return false;
-               atomic_inc(&id->files->count);
-               get_nsproxy(id->nsproxy);
-               req->work.flags |= IO_WQ_WORK_FILES;
-               io_req_track_inflight(req);
-       }
-       if (!(req->work.flags & IO_WQ_WORK_MM) &&
-           (def->work_flags & IO_WQ_WORK_MM)) {
-               if (id->mm != current->mm)
-                       return false;
-               mmgrab(id->mm);
-               req->work.flags |= IO_WQ_WORK_MM;
-       }
-
-       return true;
-}
-
 static void io_prep_async_work(struct io_kiocb *req)
 {
        const struct io_op_def *def = &io_op_defs[req->opcode];
        struct io_ring_ctx *ctx = req->ctx;
 
-       io_req_init_async(req);
+       if (!req->work.creds)
+               req->work.creds = get_current_cred();
 
        if (req->flags & REQ_F_FORCE_ASYNC)
                req->work.flags |= IO_WQ_WORK_CONCURRENT;
@@ -1577,21 +1216,10 @@ static void io_prep_async_work(struct io_kiocb *req)
        if (req->flags & REQ_F_ISREG) {
                if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL))
                        io_wq_hash_work(&req->work, file_inode(req->file));
-       } else {
+       } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) {
                if (def->unbound_nonreg_file)
                        req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
-
-       /* if we fail grabbing identity, we must COW, regrab, and retry */
-       if (io_grab_identity(req))
-               return;
-
-       if (!io_identity_cow(req))
-               return;
-
-       /* can't fail at this point */
-       if (!io_grab_identity(req))
-               WARN_ON(1);
 }
 
 static void io_prep_async_link(struct io_kiocb *req)
@@ -1602,30 +1230,25 @@ static void io_prep_async_link(struct io_kiocb *req)
                io_prep_async_work(cur);
 }
 
-static struct io_kiocb *__io_queue_async_work(struct io_kiocb *req)
+static void io_queue_async_work(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
        struct io_kiocb *link = io_prep_linked_timeout(req);
+       struct io_uring_task *tctx = req->task->io_uring;
 
-       trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
-                                       &req->work, req->flags);
-       io_wq_enqueue(ctx->io_wq, &req->work);
-       return link;
-}
-
-static void io_queue_async_work(struct io_kiocb *req)
-{
-       struct io_kiocb *link;
+       BUG_ON(!tctx);
+       BUG_ON(!tctx->io_wq);
 
        /* init ->work of the whole link before punting */
        io_prep_async_link(req);
-       link = __io_queue_async_work(req);
-
+       trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req,
+                                       &req->work, req->flags);
+       io_wq_enqueue(tctx->io_wq, &req->work);
        if (link)
                io_queue_linked_timeout(link);
 }
 
-static void io_kill_timeout(struct io_kiocb *req)
+static void io_kill_timeout(struct io_kiocb *req, int status)
 {
        struct io_timeout_data *io = req->async_data;
        int ret;
@@ -1635,31 +1258,11 @@ static void io_kill_timeout(struct io_kiocb *req)
                atomic_set(&req->ctx->cq_timeouts,
                        atomic_read(&req->ctx->cq_timeouts) + 1);
                list_del_init(&req->timeout.list);
-               io_cqring_fill_event(req, 0);
+               io_cqring_fill_event(req, status);
                io_put_req_deferred(req, 1);
        }
 }
 
-/*
- * Returns true if we found and killed one or more timeouts
- */
-static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
-                            struct files_struct *files)
-{
-       struct io_kiocb *req, *tmp;
-       int canceled = 0;
-
-       spin_lock_irq(&ctx->completion_lock);
-       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
-               if (io_match_task(req, tsk, files)) {
-                       io_kill_timeout(req);
-                       canceled++;
-               }
-       }
-       spin_unlock_irq(&ctx->completion_lock);
-       return canceled != 0;
-}
-
 static void __io_queue_deferred(struct io_ring_ctx *ctx)
 {
        do {
@@ -1704,7 +1307,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx)
                        break;
 
                list_del_init(&req->timeout.list);
-               io_kill_timeout(req);
+               io_kill_timeout(req, 0);
        } while (!list_empty(&ctx->timeout_list));
 
        ctx->cq_last_tm_flush = seq;
@@ -1855,18 +1458,22 @@ static bool __io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
        return all_flushed;
 }
 
-static void io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
+static bool io_cqring_overflow_flush(struct io_ring_ctx *ctx, bool force,
                                     struct task_struct *tsk,
                                     struct files_struct *files)
 {
+       bool ret = true;
+
        if (test_bit(0, &ctx->cq_check_overflow)) {
                /* iopoll syncs against uring_lock, not completion_lock */
                if (ctx->flags & IORING_SETUP_IOPOLL)
                        mutex_lock(&ctx->uring_lock);
-               __io_cqring_overflow_flush(ctx, force, tsk, files);
+               ret = __io_cqring_overflow_flush(ctx, force, tsk, files);
                if (ctx->flags & IORING_SETUP_IOPOLL)
                        mutex_unlock(&ctx->uring_lock);
        }
+
+       return ret;
 }
 
 static void __io_cqring_fill_event(struct io_kiocb *req, long res, long cflags)
@@ -1914,15 +1521,14 @@ static void io_cqring_fill_event(struct io_kiocb *req, long res)
        __io_cqring_fill_event(req, res, 0);
 }
 
-static inline void io_req_complete_post(struct io_kiocb *req, long res,
-                                       unsigned int cflags)
+static void io_req_complete_post(struct io_kiocb *req, long res,
+                                unsigned int cflags)
 {
        struct io_ring_ctx *ctx = req->ctx;
        unsigned long flags;
 
        spin_lock_irqsave(&ctx->completion_lock, flags);
        __io_cqring_fill_event(req, res, cflags);
-       io_commit_cqring(ctx);
        /*
         * If we're the last reference to this request, add to our locked
         * free_list cache.
@@ -1930,17 +1536,27 @@ static inline void io_req_complete_post(struct io_kiocb *req, long res,
        if (refcount_dec_and_test(&req->refs)) {
                struct io_comp_state *cs = &ctx->submit_state.comp;
 
+               if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
+                       if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL_LINK))
+                               io_disarm_next(req);
+                       if (req->link) {
+                               io_req_task_queue(req->link);
+                               req->link = NULL;
+                       }
+               }
                io_dismantle_req(req);
                io_put_task(req->task, 1);
                list_add(&req->compl.list, &cs->locked_free_list);
                cs->locked_free_nr++;
-       } else
-               req = NULL;
+       } else {
+               if (!percpu_ref_tryget(&ctx->refs))
+                       req = NULL;
+       }
+       io_commit_cqring(ctx);
        spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
-       io_cqring_ev_posted(ctx);
        if (req) {
-               io_queue_next(req);
+               io_cqring_ev_posted(ctx);
                percpu_ref_put(&ctx->refs);
        }
 }
@@ -2048,9 +1664,23 @@ static void io_dismantle_req(struct io_kiocb *req)
                io_put_file(req, req->file, (req->flags & REQ_F_FIXED_FILE));
        if (req->fixed_rsrc_refs)
                percpu_ref_put(req->fixed_rsrc_refs);
-       io_req_clean_work(req);
+       if (req->work.creds) {
+               put_cred(req->work.creds);
+               req->work.creds = NULL;
+       }
+
+       if (req->flags & REQ_F_INFLIGHT) {
+               struct io_ring_ctx *ctx = req->ctx;
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->inflight_lock, flags);
+               list_del(&req->inflight_entry);
+               spin_unlock_irqrestore(&ctx->inflight_lock, flags);
+               req->flags &= ~REQ_F_INFLIGHT;
+       }
 }
 
+/* must to be called somewhat shortly after putting a request */
 static inline void io_put_task(struct task_struct *task, int nr)
 {
        struct io_uring_task *tctx = task->io_uring;
@@ -2080,15 +1710,11 @@ static inline void io_remove_next_linked(struct io_kiocb *req)
        nxt->link = NULL;
 }
 
-static void io_kill_linked_timeout(struct io_kiocb *req)
+static bool io_kill_linked_timeout(struct io_kiocb *req)
+       __must_hold(&req->ctx->completion_lock)
 {
-       struct io_ring_ctx *ctx = req->ctx;
-       struct io_kiocb *link;
+       struct io_kiocb *link = req->link;
        bool cancelled = false;
-       unsigned long flags;
-
-       spin_lock_irqsave(&ctx->completion_lock, flags);
-       link = req->link;
 
        /*
         * Can happen if a linked timeout fired and link had been like
@@ -2103,73 +1729,71 @@ static void io_kill_linked_timeout(struct io_kiocb *req)
                ret = hrtimer_try_to_cancel(&io->timer);
                if (ret != -1) {
                        io_cqring_fill_event(link, -ECANCELED);
-                       io_commit_cqring(ctx);
+                       io_put_req_deferred(link, 1);
                        cancelled = true;
                }
        }
        req->flags &= ~REQ_F_LINK_TIMEOUT;
-       spin_unlock_irqrestore(&ctx->completion_lock, flags);
-
-       if (cancelled) {
-               io_cqring_ev_posted(ctx);
-               io_put_req(link);
-       }
+       return cancelled;
 }
 
-
 static void io_fail_links(struct io_kiocb *req)
+       __must_hold(&req->ctx->completion_lock)
 {
-       struct io_kiocb *link, *nxt;
-       struct io_ring_ctx *ctx = req->ctx;
-       unsigned long flags;
+       struct io_kiocb *nxt, *link = req->link;
 
-       spin_lock_irqsave(&ctx->completion_lock, flags);
-       link = req->link;
        req->link = NULL;
-
        while (link) {
                nxt = link->link;
                link->link = NULL;
 
                trace_io_uring_fail_link(req, link);
                io_cqring_fill_event(link, -ECANCELED);
-
-               /*
-                * It's ok to free under spinlock as they're not linked anymore,
-                * but avoid REQ_F_WORK_INITIALIZED because it may deadlock on
-                * work.fs->lock.
-                */
-               if (link->flags & REQ_F_WORK_INITIALIZED)
-                       io_put_req_deferred(link, 2);
-               else
-                       io_double_put_req(link);
+               io_put_req_deferred(link, 2);
                link = nxt;
        }
-       io_commit_cqring(ctx);
-       spin_unlock_irqrestore(&ctx->completion_lock, flags);
-
-       io_cqring_ev_posted(ctx);
 }
 
-static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
+static bool io_disarm_next(struct io_kiocb *req)
+       __must_hold(&req->ctx->completion_lock)
 {
-       if (req->flags & REQ_F_LINK_TIMEOUT)
-               io_kill_linked_timeout(req);
+       bool posted = false;
 
-       /*
-        * If LINK is set, we have dependent requests in this chain. If we
+       if (likely(req->flags & REQ_F_LINK_TIMEOUT))
+               posted = io_kill_linked_timeout(req);
+       if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
+               posted |= (req->link != NULL);
+               io_fail_links(req);
+       }
+       return posted;
+}
+
+static struct io_kiocb *__io_req_find_next(struct io_kiocb *req)
+{
+       struct io_kiocb *nxt;
+
+       /*
+        * If LINK is set, we have dependent requests in this chain. If we
         * didn't fail this request, queue the first one up, moving any other
         * dependencies to the next request. In case of failure, fail the rest
         * of the chain.
         */
-       if (likely(!(req->flags & REQ_F_FAIL_LINK))) {
-               struct io_kiocb *nxt = req->link;
+       if (req->flags & (REQ_F_LINK_TIMEOUT | REQ_F_FAIL_LINK)) {
+               struct io_ring_ctx *ctx = req->ctx;
+               unsigned long flags;
+               bool posted;
 
-               req->link = NULL;
-               return nxt;
+               spin_lock_irqsave(&ctx->completion_lock, flags);
+               posted = io_disarm_next(req);
+               if (posted)
+                       io_commit_cqring(req->ctx);
+               spin_unlock_irqrestore(&ctx->completion_lock, flags);
+               if (posted)
+                       io_cqring_ev_posted(ctx);
        }
-       io_fail_links(req);
-       return NULL;
+       nxt = req->link;
+       req->link = NULL;
+       return nxt;
 }
 
 static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
@@ -2179,6 +1803,18 @@ static inline struct io_kiocb *io_req_find_next(struct io_kiocb *req)
        return __io_req_find_next(req);
 }
 
+static void ctx_flush_and_put(struct io_ring_ctx *ctx)
+{
+       if (!ctx)
+               return;
+       if (ctx->submit_state.comp.nr) {
+               mutex_lock(&ctx->uring_lock);
+               io_submit_flush_completions(&ctx->submit_state.comp, ctx);
+               mutex_unlock(&ctx->uring_lock);
+       }
+       percpu_ref_put(&ctx->refs);
+}
+
 static bool __tctx_task_work(struct io_uring_task *tctx)
 {
        struct io_ring_ctx *ctx = NULL;
@@ -2196,30 +1832,20 @@ static bool __tctx_task_work(struct io_uring_task *tctx)
        node = list.first;
        while (node) {
                struct io_wq_work_node *next = node->next;
-               struct io_ring_ctx *this_ctx;
                struct io_kiocb *req;
 
                req = container_of(node, struct io_kiocb, io_task_work.node);
-               this_ctx = req->ctx;
-               req->task_work.func(&req->task_work);
-               node = next;
-
-               if (!ctx) {
-                       ctx = this_ctx;
-               } else if (ctx != this_ctx) {
-                       mutex_lock(&ctx->uring_lock);
-                       io_submit_flush_completions(&ctx->submit_state.comp, ctx);
-                       mutex_unlock(&ctx->uring_lock);
-                       ctx = this_ctx;
+               if (req->ctx != ctx) {
+                       ctx_flush_and_put(ctx);
+                       ctx = req->ctx;
+                       percpu_ref_get(&ctx->refs);
                }
-       }
 
-       if (ctx && ctx->submit_state.comp.nr) {
-               mutex_lock(&ctx->uring_lock);
-               io_submit_flush_completions(&ctx->submit_state.comp, ctx);
-               mutex_unlock(&ctx->uring_lock);
+               req->task_work.func(&req->task_work);
+               node = next;
        }
 
+       ctx_flush_and_put(ctx);
        return list.first != NULL;
 }
 
@@ -2227,10 +1853,10 @@ static void tctx_task_work(struct callback_head *cb)
 {
        struct io_uring_task *tctx = container_of(cb, struct io_uring_task, task_work);
 
+       clear_bit(0, &tctx->task_state);
+
        while (__tctx_task_work(tctx))
                cond_resched();
-
-       clear_bit(0, &tctx->task_state);
 }
 
 static int io_task_work_add(struct task_struct *tsk, struct io_kiocb *req,
@@ -2300,14 +1926,44 @@ static int io_req_task_work_add(struct io_kiocb *req)
        return ret;
 }
 
+static bool io_run_task_work_head(struct callback_head **work_head)
+{
+       struct callback_head *work, *next;
+       bool executed = false;
+
+       do {
+               work = xchg(work_head, NULL);
+               if (!work)
+                       break;
+
+               do {
+                       next = work->next;
+                       work->func(work);
+                       work = next;
+                       cond_resched();
+               } while (work);
+               executed = true;
+       } while (1);
+
+       return executed;
+}
+
+static void io_task_work_add_head(struct callback_head **work_head,
+                                 struct callback_head *task_work)
+{
+       struct callback_head *head;
+
+       do {
+               head = READ_ONCE(*work_head);
+               task_work->next = head;
+       } while (cmpxchg(work_head, head, task_work) != head);
+}
+
 static void io_req_task_work_add_fallback(struct io_kiocb *req,
                                          task_work_func_t cb)
 {
-       struct task_struct *tsk = io_wq_get_task(req->ctx->io_wq);
-
        init_task_work(&req->task_work, cb);
-       task_work_add(tsk, &req->task_work, TWA_NONE);
-       wake_up_process(tsk);
+       io_task_work_add_head(&req->ctx->exit_task_work, &req->task_work);
 }
 
 static void __io_req_task_cancel(struct io_kiocb *req, int error)
@@ -2329,7 +1985,9 @@ static void io_req_task_cancel(struct callback_head *cb)
        struct io_kiocb *req = container_of(cb, struct io_kiocb, task_work);
        struct io_ring_ctx *ctx = req->ctx;
 
-       __io_req_task_cancel(req, -ECANCELED);
+       mutex_lock(&ctx->uring_lock);
+       __io_req_task_cancel(req, req->result);
+       mutex_unlock(&ctx->uring_lock);
        percpu_ref_put(&ctx->refs);
 }
 
@@ -2339,15 +1997,11 @@ static void __io_req_task_submit(struct io_kiocb *req)
 
        /* ctx stays valid until unlock, even if we drop all ours ctx->refs */
        mutex_lock(&ctx->uring_lock);
-       if (!ctx->sqo_dead && !(current->flags & PF_EXITING) &&
-           !io_sq_thread_acquire_mm_files(ctx, req))
+       if (!(current->flags & PF_EXITING) && !current->in_execve)
                __io_queue_sqe(req);
        else
                __io_req_task_cancel(req, -EFAULT);
        mutex_unlock(&ctx->uring_lock);
-
-       if (ctx->flags & IORING_SETUP_SQPOLL)
-               io_sq_thread_drop_mm_files();
 }
 
 static void io_req_task_submit(struct callback_head *cb)
@@ -2364,11 +2018,22 @@ static void io_req_task_queue(struct io_kiocb *req)
        req->task_work.func = io_req_task_submit;
        ret = io_req_task_work_add(req);
        if (unlikely(ret)) {
+               req->result = -ECANCELED;
                percpu_ref_get(&req->ctx->refs);
                io_req_task_work_add_fallback(req, io_req_task_cancel);
        }
 }
 
+static void io_req_task_queue_fail(struct io_kiocb *req, int ret)
+{
+       percpu_ref_get(&req->ctx->refs);
+       req->result = ret;
+       req->task_work.func = io_req_task_cancel;
+
+       if (unlikely(io_req_task_work_add(req)))
+               io_req_task_work_add_fallback(req, io_req_task_cancel);
+}
+
 static inline void io_queue_next(struct io_kiocb *req)
 {
        struct io_kiocb *nxt = io_req_find_next(req);
@@ -2794,24 +2459,42 @@ static bool io_resubmit_prep(struct io_kiocb *req)
                return false;
        return !io_setup_async_rw(req, iovec, inline_vecs, &iter, false);
 }
-#endif
 
-static bool io_rw_reissue(struct io_kiocb *req)
+static bool io_rw_should_reissue(struct io_kiocb *req)
 {
-#ifdef CONFIG_BLOCK
        umode_t mode = file_inode(req->file)->i_mode;
-       int ret;
+       struct io_ring_ctx *ctx = req->ctx;
 
        if (!S_ISBLK(mode) && !S_ISREG(mode))
                return false;
-       if ((req->flags & REQ_F_NOWAIT) || io_wq_current_is_worker())
+       if ((req->flags & REQ_F_NOWAIT) || (io_wq_current_is_worker() &&
+           !(ctx->flags & IORING_SETUP_IOPOLL)))
+               return false;
+       /*
+        * If ref is dying, we might be running poll reap from the exit work.
+        * Don't attempt to reissue from that path, just let it fail with
+        * -EAGAIN.
+        */
+       if (percpu_ref_is_dying(&ctx->refs))
+               return false;
+       return true;
+}
+#else
+static bool io_rw_should_reissue(struct io_kiocb *req)
+{
+       return false;
+}
+#endif
+
+static bool io_rw_reissue(struct io_kiocb *req)
+{
+#ifdef CONFIG_BLOCK
+       if (!io_rw_should_reissue(req))
                return false;
 
        lockdep_assert_held(&req->ctx->uring_lock);
 
-       ret = io_sq_thread_acquire_mm_files(req->ctx, req);
-
-       if (!ret && io_resubmit_prep(req)) {
+       if (io_resubmit_prep(req)) {
                refcount_inc(&req->refs);
                io_queue_async_work(req);
                return true;
@@ -2826,13 +2509,14 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2,
 {
        int cflags = 0;
 
-       if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req))
+       if (req->rw.kiocb.ki_flags & IOCB_WRITE)
+               kiocb_end_write(req);
+       if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) {
+               req->flags |= REQ_F_REISSUE;
                return;
+       }
        if (res != req->result)
                req_set_fail_links(req);
-
-       if (req->rw.kiocb.ki_flags & IOCB_WRITE)
-               kiocb_end_write(req);
        if (req->flags & REQ_F_BUFFER_SELECTED)
                cflags = io_put_rw_kbuf(req);
        __io_req_complete(req, issue_flags, res, cflags);
@@ -2849,6 +2533,19 @@ static void io_complete_rw_iopoll(struct kiocb *kiocb, long res, long res2)
 {
        struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb);
 
+#ifdef CONFIG_BLOCK
+       /* Rewind iter, if we have one. iopoll path resubmits as usual */
+       if (res == -EAGAIN && io_rw_should_reissue(req)) {
+               struct io_async_rw *rw = req->async_data;
+
+               if (rw)
+                       iov_iter_revert(&rw->iter,
+                                       req->result - iov_iter_count(&rw->iter));
+               else if (!io_resubmit_prep(req))
+                       res = -EIO;
+       }
+#endif
+
        if (kiocb->ki_flags & IOCB_WRITE)
                kiocb_end_write(req);
 
@@ -3180,7 +2877,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
 
        lockdep_assert_held(&req->ctx->uring_lock);
 
-       head = idr_find(&req->ctx->io_buffer_idr, bgid);
+       head = xa_load(&req->ctx->io_buffers, bgid);
        if (head) {
                if (!list_empty(&head->list)) {
                        kbuf = list_last_entry(&head->list, struct io_buffer,
@@ -3188,7 +2885,7 @@ static struct io_buffer *io_buffer_select(struct io_kiocb *req, size_t *len,
                        list_del(&kbuf->list);
                } else {
                        kbuf = head;
-                       idr_remove(&req->ctx->io_buffer_idr, bgid);
+                       xa_erase(&req->ctx->io_buffers, bgid);
                }
                if (*len > kbuf->len)
                        *len = kbuf->len;
@@ -3467,19 +3164,9 @@ static inline int io_rw_prep_async(struct io_kiocb *req, int rw)
 
 static int io_read_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       ssize_t ret;
-
-       ret = io_prep_rw(req, sqe);
-       if (ret)
-               return ret;
-
        if (unlikely(!(req->file->f_mode & FMODE_READ)))
                return -EBADF;
-
-       /* either don't need iovec imported or already have it */
-       if (!req->async_data)
-               return 0;
-       return io_rw_prep_async(req, READ);
+       return io_prep_rw(req, sqe);
 }
 
 /*
@@ -3606,12 +3293,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
 
        ret = io_iter_do_read(req, iter);
 
-       if (ret == -EIOCBQUEUED) {
-               /* it's faster to check here then delegate to kfree */
-               if (iovec)
-                       kfree(iovec);
-               return 0;
-       } else if (ret == -EAGAIN) {
+       if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) {
                /* IOPOLL retry should happen for io-wq threads */
                if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL))
                        goto done;
@@ -3621,6 +3303,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                /* some cases will consume bytes even on error returns */
                iov_iter_revert(iter, io_size - iov_iter_count(iter));
                ret = 0;
+       } else if (ret == -EIOCBQUEUED) {
+               goto out_free;
        } else if (ret <= 0 || ret == io_size || !force_nonblock ||
                   (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) {
                /* read all, failed, already did sync or don't want to retry */
@@ -3631,6 +3315,7 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
        if (ret2)
                return ret2;
 
+       iovec = NULL;
        rw = req->async_data;
        /* now use our persistent iterator, if we aren't already */
        iter = &rw->iter;
@@ -3654,27 +3339,22 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags)
                if (ret == -EIOCBQUEUED)
                        return 0;
                /* we got some bytes, but not all. retry. */
+               kiocb->ki_flags &= ~IOCB_WAITQ;
        } while (ret > 0 && ret < io_size);
 done:
        kiocb_done(kiocb, ret, issue_flags);
+out_free:
+       /* it's faster to check here then delegate to kfree */
+       if (iovec)
+               kfree(iovec);
        return 0;
 }
 
 static int io_write_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       ssize_t ret;
-
-       ret = io_prep_rw(req, sqe);
-       if (ret)
-               return ret;
-
        if (unlikely(!(req->file->f_mode & FMODE_WRITE)))
                return -EBADF;
-
-       /* either don't need iovec imported or already have it */
-       if (!req->async_data)
-               return 0;
-       return io_rw_prep_async(req, WRITE);
+       return io_prep_rw(req, sqe);
 }
 
 static int io_write(struct io_kiocb *req, unsigned int issue_flags)
@@ -3737,6 +3417,9 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags)
        else
                ret2 = -EINVAL;
 
+       if (req->flags & REQ_F_REISSUE)
+               ret2 = -EAGAIN;
+
        /*
         * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just
         * retry them without IOCB_NOWAIT.
@@ -3924,7 +3607,6 @@ static int __io_splice_prep(struct io_kiocb *req,
                 * Splice operation will be punted aync, and here need to
                 * modify io_wq_work.flags, so initialize io_wq_work firstly.
                 */
-               io_req_init_async(req);
                req->work.flags |= IO_WQ_WORK_UNBOUND;
        }
 
@@ -4011,7 +3693,7 @@ static int io_nop(struct io_kiocb *req, unsigned int issue_flags)
        return 0;
 }
 
-static int io_prep_fsync(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_fsync_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
@@ -4200,7 +3882,7 @@ err:
 
 static int io_openat(struct io_kiocb *req, unsigned int issue_flags)
 {
-       return io_openat2(req, issue_flags & IO_URING_F_NONBLOCK);
+       return io_openat2(req, issue_flags);
 }
 
 static int io_remove_buffers_prep(struct io_kiocb *req,
@@ -4243,7 +3925,7 @@ static int __io_remove_buffers(struct io_ring_ctx *ctx, struct io_buffer *buf,
        }
        i++;
        kfree(buf);
-       idr_remove(&ctx->io_buffer_idr, bgid);
+       xa_erase(&ctx->io_buffers, bgid);
 
        return i;
 }
@@ -4261,7 +3943,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
        lockdep_assert_held(&ctx->uring_lock);
 
        ret = -ENOENT;
-       head = idr_find(&ctx->io_buffer_idr, p->bgid);
+       head = xa_load(&ctx->io_buffers, p->bgid);
        if (head)
                ret = __io_remove_buffers(ctx, head, p->bgid, p->nbufs);
        if (ret < 0)
@@ -4281,6 +3963,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags)
 static int io_provide_buffers_prep(struct io_kiocb *req,
                                   const struct io_uring_sqe *sqe)
 {
+       unsigned long size;
        struct io_provide_buf *p = &req->pbuf;
        u64 tmp;
 
@@ -4294,7 +3977,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req,
        p->addr = READ_ONCE(sqe->addr);
        p->len = READ_ONCE(sqe->len);
 
-       if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs)))
+       size = (unsigned long)p->len * p->nbufs;
+       if (!access_ok(u64_to_user_ptr(p->addr), size))
                return -EFAULT;
 
        p->bgid = READ_ONCE(sqe->buf_group);
@@ -4344,21 +4028,14 @@ static int io_provide_buffers(struct io_kiocb *req, unsigned int issue_flags)
 
        lockdep_assert_held(&ctx->uring_lock);
 
-       list = head = idr_find(&ctx->io_buffer_idr, p->bgid);
+       list = head = xa_load(&ctx->io_buffers, p->bgid);
 
        ret = io_add_buffers(p, &head);
-       if (ret < 0)
-               goto out;
-
-       if (!list) {
-               ret = idr_alloc(&ctx->io_buffer_idr, head, p->bgid, p->bgid + 1,
-                                       GFP_KERNEL);
-               if (ret < 0) {
+       if (ret >= 0 && !list) {
+               ret = xa_insert(&ctx->io_buffers, p->bgid, head, GFP_KERNEL);
+               if (ret < 0)
                        __io_remove_buffers(ctx, head, p->bgid, -1U);
-                       goto out;
-               }
        }
-out:
        if (ret < 0)
                req_set_fail_links(req);
 
@@ -4598,13 +4275,10 @@ err:
        return 0;
 }
 
-static int io_prep_sfr(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_sfr_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_ring_ctx *ctx = req->ctx;
 
-       if (!req->file)
-               return -EBADF;
-
        if (unlikely(ctx->flags & IORING_SETUP_IOPOLL))
                return -EINVAL;
        if (unlikely(sqe->addr || sqe->ioprio || sqe->buf_index))
@@ -4664,11 +4338,21 @@ static int io_sendmsg_copy_hdr(struct io_kiocb *req,
                                   req->sr_msg.msg_flags, &iomsg->free_iov);
 }
 
+static int io_sendmsg_prep_async(struct io_kiocb *req)
+{
+       int ret;
+
+       if (!io_op_defs[req->opcode].needs_async_data)
+               return 0;
+       ret = io_sendmsg_copy_hdr(req, req->async_data);
+       if (!ret)
+               req->flags |= REQ_F_NEED_CLEANUP;
+       return ret;
+}
+
 static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
-       struct io_async_msghdr *async_msg = req->async_data;
        struct io_sr_msg *sr = &req->sr_msg;
-       int ret;
 
        if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                return -EINVAL;
@@ -4681,13 +4365,7 @@ static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (req->ctx->compat)
                sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
-
-       if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
-               return 0;
-       ret = io_sendmsg_copy_hdr(req, async_msg);
-       if (!ret)
-               req->flags |= REQ_F_NEED_CLEANUP;
-       return ret;
+       return 0;
 }
 
 static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
@@ -4695,6 +4373,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
        struct io_async_msghdr iomsg, *kmsg;
        struct socket *sock;
        unsigned flags;
+       int min_ret = 0;
        int ret;
 
        sock = sock_from_file(req->file);
@@ -4709,12 +4388,15 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
                kmsg = &iomsg;
        }
 
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
        if (flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
        else if (issue_flags & IO_URING_F_NONBLOCK)
                flags |= MSG_DONTWAIT;
 
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
        ret = __sys_sendmsg_sock(sock, &kmsg->msg, flags);
        if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
                return io_setup_async_msg(req, kmsg);
@@ -4725,7 +4407,7 @@ static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
        if (kmsg->free_iov)
                kfree(kmsg->free_iov);
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
+       if (ret < min_ret)
                req_set_fail_links(req);
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
@@ -4738,6 +4420,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        struct iovec iov;
        struct socket *sock;
        unsigned flags;
+       int min_ret = 0;
        int ret;
 
        sock = sock_from_file(req->file);
@@ -4753,12 +4436,15 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        msg.msg_controllen = 0;
        msg.msg_namelen = 0;
 
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
        if (flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
        else if (issue_flags & IO_URING_F_NONBLOCK)
                flags |= MSG_DONTWAIT;
 
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
+
        msg.msg_flags = flags;
        ret = sock_sendmsg(sock, &msg);
        if ((issue_flags & IO_URING_F_NONBLOCK) && ret == -EAGAIN)
@@ -4766,7 +4452,7 @@ static int io_send(struct io_kiocb *req, unsigned int issue_flags)
        if (ret == -ERESTARTSYS)
                ret = -EINTR;
 
-       if (ret < 0)
+       if (ret < min_ret)
                req_set_fail_links(req);
        __io_req_complete(req, issue_flags, ret, 0);
        return 0;
@@ -4881,13 +4567,22 @@ static inline unsigned int io_put_recv_kbuf(struct io_kiocb *req)
        return io_put_kbuf(req, req->sr_msg.kbuf);
 }
 
-static int io_recvmsg_prep(struct io_kiocb *req,
-                          const struct io_uring_sqe *sqe)
+static int io_recvmsg_prep_async(struct io_kiocb *req)
 {
-       struct io_async_msghdr *async_msg = req->async_data;
-       struct io_sr_msg *sr = &req->sr_msg;
        int ret;
 
+       if (!io_op_defs[req->opcode].needs_async_data)
+               return 0;
+       ret = io_recvmsg_copy_hdr(req, req->async_data);
+       if (!ret)
+               req->flags |= REQ_F_NEED_CLEANUP;
+       return ret;
+}
+
+static int io_recvmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+{
+       struct io_sr_msg *sr = &req->sr_msg;
+
        if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                return -EINVAL;
 
@@ -4900,13 +4595,7 @@ static int io_recvmsg_prep(struct io_kiocb *req,
        if (req->ctx->compat)
                sr->msg_flags |= MSG_CMSG_COMPAT;
 #endif
-
-       if (!async_msg || !io_op_defs[req->opcode].needs_async_data)
-               return 0;
-       ret = io_recvmsg_copy_hdr(req, async_msg);
-       if (!ret)
-               req->flags |= REQ_F_NEED_CLEANUP;
-       return ret;
+       return 0;
 }
 
 static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
@@ -4915,6 +4604,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
        struct socket *sock;
        struct io_buffer *kbuf;
        unsigned flags;
+       int min_ret = 0;
        int ret, cflags = 0;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
@@ -4940,12 +4630,15 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
                                1, req->sr_msg.len);
        }
 
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
        if (flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
        else if (force_nonblock)
                flags |= MSG_DONTWAIT;
 
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&kmsg->msg.msg_iter);
+
        ret = __sys_recvmsg_sock(sock, &kmsg->msg, req->sr_msg.umsg,
                                        kmsg->uaddr, flags);
        if (force_nonblock && ret == -EAGAIN)
@@ -4959,7 +4652,7 @@ static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
        if (kmsg->free_iov)
                kfree(kmsg->free_iov);
        req->flags &= ~REQ_F_NEED_CLEANUP;
-       if (ret < 0)
+       if (ret < min_ret || ((flags & MSG_WAITALL) && (kmsg->msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
                req_set_fail_links(req);
        __io_req_complete(req, issue_flags, ret, cflags);
        return 0;
@@ -4974,6 +4667,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
        struct socket *sock;
        struct iovec iov;
        unsigned flags;
+       int min_ret = 0;
        int ret, cflags = 0;
        bool force_nonblock = issue_flags & IO_URING_F_NONBLOCK;
 
@@ -4999,12 +4693,15 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
        msg.msg_iocb = NULL;
        msg.msg_flags = 0;
 
-       flags = req->sr_msg.msg_flags;
+       flags = req->sr_msg.msg_flags | MSG_NOSIGNAL;
        if (flags & MSG_DONTWAIT)
                req->flags |= REQ_F_NOWAIT;
        else if (force_nonblock)
                flags |= MSG_DONTWAIT;
 
+       if (flags & MSG_WAITALL)
+               min_ret = iov_iter_count(&msg.msg_iter);
+
        ret = sock_recvmsg(sock, &msg, flags);
        if (force_nonblock && ret == -EAGAIN)
                return -EAGAIN;
@@ -5013,7 +4710,7 @@ static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
 out_free:
        if (req->flags & REQ_F_BUFFER_SELECTED)
                cflags = io_put_recv_kbuf(req);
-       if (ret < 0)
+       if (ret < min_ret || ((flags & MSG_WAITALL) && (msg.msg_flags & (MSG_TRUNC | MSG_CTRUNC))))
                req_set_fail_links(req);
        __io_req_complete(req, issue_flags, ret, cflags);
        return 0;
@@ -5059,10 +4756,17 @@ static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
        return 0;
 }
 
+static int io_connect_prep_async(struct io_kiocb *req)
+{
+       struct io_async_connect *io = req->async_data;
+       struct io_connect *conn = &req->connect;
+
+       return move_addr_to_kernel(conn->addr, conn->addr_len, &io->address);
+}
+
 static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 {
        struct io_connect *conn = &req->connect;
-       struct io_async_connect *io = req->async_data;
 
        if (unlikely(req->ctx->flags & IORING_SETUP_IOPOLL))
                return -EINVAL;
@@ -5071,12 +4775,7 @@ static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
 
        conn->addr = u64_to_user_ptr(READ_ONCE(sqe->addr));
        conn->addr_len =  READ_ONCE(sqe->addr2);
-
-       if (!io)
-               return 0;
-
-       return move_addr_to_kernel(conn->addr, conn->addr_len,
-                                       &io->address);
+       return 0;
 }
 
 static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
@@ -5108,7 +4807,6 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
                        ret = -ENOMEM;
                        goto out;
                }
-               io = req->async_data;
                memcpy(req->async_data, &__io, sizeof(__io));
                return -EAGAIN;
        }
@@ -5121,56 +4819,32 @@ out:
        return 0;
 }
 #else /* !CONFIG_NET */
-static int io_sendmsg_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_sendmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_send(struct io_kiocb *req, unsigned int issue_flags)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_recvmsg_prep(struct io_kiocb *req,
-                          const struct io_uring_sqe *sqe)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_recvmsg(struct io_kiocb *req, unsigned int issue_flags)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_recv(struct io_kiocb *req, unsigned int issue_flags)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_accept_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_accept(struct io_kiocb *req, unsigned int issue_flags)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_connect_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
-{
-       return -EOPNOTSUPP;
-}
-
-static int io_connect(struct io_kiocb *req, unsigned int issue_flags)
-{
-       return -EOPNOTSUPP;
-}
+#define IO_NETOP_FN(op)                                                        \
+static int io_##op(struct io_kiocb *req, unsigned int issue_flags)     \
+{                                                                      \
+       return -EOPNOTSUPP;                                             \
+}
+
+#define IO_NETOP_PREP(op)                                              \
+IO_NETOP_FN(op)                                                                \
+static int io_##op##_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) \
+{                                                                      \
+       return -EOPNOTSUPP;                                             \
+}                                                                      \
+
+#define IO_NETOP_PREP_ASYNC(op)                                                \
+IO_NETOP_PREP(op)                                                      \
+static int io_##op##_prep_async(struct io_kiocb *req)                  \
+{                                                                      \
+       return -EOPNOTSUPP;                                             \
+}
+
+IO_NETOP_PREP_ASYNC(sendmsg);
+IO_NETOP_PREP_ASYNC(recvmsg);
+IO_NETOP_PREP_ASYNC(connect);
+IO_NETOP_PREP(accept);
+IO_NETOP_FN(send);
+IO_NETOP_FN(recv);
 #endif /* CONFIG_NET */
 
 struct io_poll_table {
@@ -5357,6 +5031,9 @@ static void __io_queue_proc(struct io_poll_iocb *poll, struct io_poll_table *pt,
                        pt->error = -EINVAL;
                        return;
                }
+               /* double add on the same waitqueue head, ignore */
+               if (poll->head == head)
+                       return;
                poll = kmalloc(sizeof(*poll), GFP_ATOMIC);
                if (!poll) {
                        pt->error = -ENOMEM;
@@ -5892,6 +5569,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 
        data->mode = io_translate_timeout_mode(flags);
        hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode);
+       if (is_timeout_link)
+               io_req_track_inflight(req);
        return 0;
 }
 
@@ -5945,19 +5624,30 @@ add:
        return 0;
 }
 
+struct io_cancel_data {
+       struct io_ring_ctx *ctx;
+       u64 user_data;
+};
+
 static bool io_cancel_cb(struct io_wq_work *work, void *data)
 {
        struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+       struct io_cancel_data *cd = data;
 
-       return req->user_data == (unsigned long) data;
+       return req->ctx == cd->ctx && req->user_data == cd->user_data;
 }
 
-static int io_async_cancel_one(struct io_ring_ctx *ctx, void *sqe_addr)
+static int io_async_cancel_one(struct io_uring_task *tctx, u64 user_data,
+                              struct io_ring_ctx *ctx)
 {
+       struct io_cancel_data data = { .ctx = ctx, .user_data = user_data, };
        enum io_wq_cancel cancel_ret;
        int ret = 0;
 
-       cancel_ret = io_wq_cancel_cb(ctx->io_wq, io_cancel_cb, sqe_addr, false);
+       if (!tctx || !tctx->io_wq)
+               return -ENOENT;
+
+       cancel_ret = io_wq_cancel_cb(tctx->io_wq, io_cancel_cb, &data, false);
        switch (cancel_ret) {
        case IO_WQ_CANCEL_OK:
                ret = 0;
@@ -5980,7 +5670,7 @@ static void io_async_find_and_cancel(struct io_ring_ctx *ctx,
        unsigned long flags;
        int ret;
 
-       ret = io_async_cancel_one(ctx, (void *) (unsigned long) sqe_addr);
+       ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
        if (ret != -ENOENT) {
                spin_lock_irqsave(&ctx->completion_lock, flags);
                goto done;
@@ -6021,19 +5711,58 @@ static int io_async_cancel_prep(struct io_kiocb *req,
 static int io_async_cancel(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_ring_ctx *ctx = req->ctx;
+       u64 sqe_addr = req->cancel.addr;
+       struct io_tctx_node *node;
+       int ret;
 
-       io_async_find_and_cancel(ctx, req, req->cancel.addr, 0);
-       return 0;
-}
-
-static int io_rsrc_update_prep(struct io_kiocb *req,
-                               const struct io_uring_sqe *sqe)
-{
-       if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
-               return -EINVAL;
-       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
-               return -EINVAL;
-       if (sqe->ioprio || sqe->rw_flags)
+       /* tasks should wait for their io-wq threads, so safe w/o sync */
+       ret = io_async_cancel_one(req->task->io_uring, sqe_addr, ctx);
+       spin_lock_irq(&ctx->completion_lock);
+       if (ret != -ENOENT)
+               goto done;
+       ret = io_timeout_cancel(ctx, sqe_addr);
+       if (ret != -ENOENT)
+               goto done;
+       ret = io_poll_cancel(ctx, sqe_addr);
+       if (ret != -ENOENT)
+               goto done;
+       spin_unlock_irq(&ctx->completion_lock);
+
+       /* slow path, try all io-wq's */
+       io_ring_submit_lock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+       ret = -ENOENT;
+       list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+               struct io_uring_task *tctx = node->task->io_uring;
+
+               if (!tctx || !tctx->io_wq)
+                       continue;
+               ret = io_async_cancel_one(tctx, req->cancel.addr, ctx);
+               if (ret != -ENOENT)
+                       break;
+       }
+       io_ring_submit_unlock(ctx, !(issue_flags & IO_URING_F_NONBLOCK));
+
+       spin_lock_irq(&ctx->completion_lock);
+done:
+       io_cqring_fill_event(req, ret);
+       io_commit_cqring(ctx);
+       spin_unlock_irq(&ctx->completion_lock);
+       io_cqring_ev_posted(ctx);
+
+       if (ret < 0)
+               req_set_fail_links(req);
+       io_put_req(req);
+       return 0;
+}
+
+static int io_rsrc_update_prep(struct io_kiocb *req,
+                               const struct io_uring_sqe *sqe)
+{
+       if (unlikely(req->ctx->flags & IORING_SETUP_SQPOLL))
+               return -EINVAL;
+       if (unlikely(req->flags & (REQ_F_FIXED_FILE | REQ_F_BUFFER_SELECT)))
+               return -EINVAL;
+       if (sqe->ioprio || sqe->rw_flags)
                return -EINVAL;
 
        req->rsrc_update.offset = READ_ONCE(sqe->off);
@@ -6084,9 +5813,9 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        case IORING_OP_POLL_REMOVE:
                return io_poll_remove_prep(req, sqe);
        case IORING_OP_FSYNC:
-               return io_prep_fsync(req, sqe);
+               return io_fsync_prep(req, sqe);
        case IORING_OP_SYNC_FILE_RANGE:
-               return io_prep_sfr(req, sqe);
+               return io_sfr_prep(req, sqe);
        case IORING_OP_SENDMSG:
        case IORING_OP_SEND:
                return io_sendmsg_prep(req, sqe);
@@ -6144,14 +5873,39 @@ static int io_req_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        return-EINVAL;
 }
 
-static int io_req_defer_prep(struct io_kiocb *req,
-                            const struct io_uring_sqe *sqe)
+static int io_req_prep_async(struct io_kiocb *req)
+{
+       switch (req->opcode) {
+       case IORING_OP_READV:
+       case IORING_OP_READ_FIXED:
+       case IORING_OP_READ:
+               return io_rw_prep_async(req, READ);
+       case IORING_OP_WRITEV:
+       case IORING_OP_WRITE_FIXED:
+       case IORING_OP_WRITE:
+               return io_rw_prep_async(req, WRITE);
+       case IORING_OP_SENDMSG:
+       case IORING_OP_SEND:
+               return io_sendmsg_prep_async(req);
+       case IORING_OP_RECVMSG:
+       case IORING_OP_RECV:
+               return io_recvmsg_prep_async(req);
+       case IORING_OP_CONNECT:
+               return io_connect_prep_async(req);
+       }
+       return 0;
+}
+
+static int io_req_defer_prep(struct io_kiocb *req)
 {
-       if (!sqe)
+       if (!io_op_defs[req->opcode].needs_async_data)
                return 0;
-       if (io_alloc_async_data(req))
+       /* some opcodes init it during the inital prep */
+       if (req->async_data)
+               return 0;
+       if (__io_alloc_async_data(req))
                return -EAGAIN;
-       return io_req_prep(req, sqe);
+       return io_req_prep_async(req);
 }
 
 static u32 io_get_sequence(struct io_kiocb *req)
@@ -6167,7 +5921,7 @@ static u32 io_get_sequence(struct io_kiocb *req)
        return total_submitted - nr_reqs;
 }
 
-static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static int io_req_defer(struct io_kiocb *req)
 {
        struct io_ring_ctx *ctx = req->ctx;
        struct io_defer_entry *de;
@@ -6184,11 +5938,9 @@ static int io_req_defer(struct io_kiocb *req, const struct io_uring_sqe *sqe)
        if (!req_need_defer(req, seq) && list_empty_careful(&ctx->defer_list))
                return 0;
 
-       if (!req->async_data) {
-               ret = io_req_defer_prep(req, sqe);
-               if (ret)
-                       return ret;
-       }
+       ret = io_req_defer_prep(req);
+       if (ret)
+               return ret;
        io_prep_async_link(req);
        de = kmalloc(sizeof(*de), GFP_KERNEL);
        if (!de)
@@ -6272,8 +6024,12 @@ static void __io_clean_op(struct io_kiocb *req)
 static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
 {
        struct io_ring_ctx *ctx = req->ctx;
+       const struct cred *creds = NULL;
        int ret;
 
+       if (req->work.creds && req->work.creds != current_cred())
+               creds = override_creds(req->work.creds);
+
        switch (req->opcode) {
        case IORING_OP_NOP:
                ret = io_nop(req, issue_flags);
@@ -6380,6 +6136,9 @@ static int io_issue_sqe(struct io_kiocb *req, unsigned int issue_flags)
                break;
        }
 
+       if (creds)
+               revert_creds(creds);
+
        if (ret)
                return ret;
 
@@ -6414,6 +6173,7 @@ static void io_wq_submit_work(struct io_wq_work *work)
                ret = -ECANCELED;
 
        if (!ret) {
+               req->flags &= ~REQ_F_REISSUE;
                do {
                        ret = io_issue_sqe(req, 0);
                        /*
@@ -6427,29 +6187,11 @@ static void io_wq_submit_work(struct io_wq_work *work)
                } while (1);
        }
 
+       /* avoid locking problems by failing it from a clean context */
        if (ret) {
-               struct io_ring_ctx *lock_ctx = NULL;
-
-               if (req->ctx->flags & IORING_SETUP_IOPOLL)
-                       lock_ctx = req->ctx;
-
-               /*
-                * io_iopoll_complete() does not hold completion_lock to
-                * complete polled io, so here for polled io, we can not call
-                * io_req_complete() directly, otherwise there maybe concurrent
-                * access to cqring, defer_list, etc, which is not safe. Given
-                * that io_iopoll_complete() is always called under uring_lock,
-                * so here for polled io, we also get uring_lock to complete
-                * it.
-                */
-               if (lock_ctx)
-                       mutex_lock(&lock_ctx->uring_lock);
-
-               req_set_fail_links(req);
-               io_req_complete(req, ret);
-
-               if (lock_ctx)
-                       mutex_unlock(&lock_ctx->uring_lock);
+               /* io-wq is going to take one down */
+               refcount_inc(&req->refs);
+               io_req_task_queue_fail(req, ret);
        }
 }
 
@@ -6507,7 +6249,6 @@ static enum hrtimer_restart io_link_timeout_fn(struct hrtimer *timer)
        spin_unlock_irqrestore(&ctx->completion_lock, flags);
 
        if (prev) {
-               req_set_fail_links(prev);
                io_async_find_and_cancel(ctx, req, prev->user_data, -ETIME);
                io_put_req_deferred(prev, 1);
        } else {
@@ -6561,19 +6302,10 @@ static struct io_kiocb *io_prep_linked_timeout(struct io_kiocb *req)
 static void __io_queue_sqe(struct io_kiocb *req)
 {
        struct io_kiocb *linked_timeout = io_prep_linked_timeout(req);
-       const struct cred *old_creds = NULL;
        int ret;
 
-       if ((req->flags & REQ_F_WORK_INITIALIZED) &&
-           (req->work.flags & IO_WQ_WORK_CREDS) &&
-           req->work.identity->creds != current_cred())
-               old_creds = override_creds(req->work.identity->creds);
-
        ret = io_issue_sqe(req, IO_URING_F_NONBLOCK|IO_URING_F_COMPLETE_DEFER);
 
-       if (old_creds)
-               revert_creds(old_creds);
-
        /*
         * We async punt it if the file wasn't marked NOWAIT, or if the file
         * doesn't support non-blocking read/write attempts
@@ -6607,11 +6339,11 @@ static void __io_queue_sqe(struct io_kiocb *req)
                io_queue_linked_timeout(linked_timeout);
 }
 
-static void io_queue_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe)
+static void io_queue_sqe(struct io_kiocb *req)
 {
        int ret;
 
-       ret = io_req_defer(req, sqe);
+       ret = io_req_defer(req);
        if (ret) {
                if (ret != -EIOCBQUEUED) {
 fail_req:
@@ -6620,42 +6352,140 @@ fail_req:
                        io_req_complete(req, ret);
                }
        } else if (req->flags & REQ_F_FORCE_ASYNC) {
-               if (!req->async_data) {
-                       ret = io_req_defer_prep(req, sqe);
-                       if (unlikely(ret))
-                               goto fail_req;
-               }
+               ret = io_req_defer_prep(req);
+               if (unlikely(ret))
+                       goto fail_req;
                io_queue_async_work(req);
        } else {
-               if (sqe) {
-                       ret = io_req_prep(req, sqe);
-                       if (unlikely(ret))
-                               goto fail_req;
-               }
                __io_queue_sqe(req);
        }
 }
 
-static inline void io_queue_link_head(struct io_kiocb *req)
+/*
+ * Check SQE restrictions (opcode and flags).
+ *
+ * Returns 'true' if SQE is allowed, 'false' otherwise.
+ */
+static inline bool io_check_restriction(struct io_ring_ctx *ctx,
+                                       struct io_kiocb *req,
+                                       unsigned int sqe_flags)
 {
-       if (unlikely(req->flags & REQ_F_FAIL_LINK)) {
-               io_put_req(req);
-               io_req_complete(req, -ECANCELED);
-       } else
-               io_queue_sqe(req, NULL);
+       if (!ctx->restricted)
+               return true;
+
+       if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
+               return false;
+
+       if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
+           ctx->restrictions.sqe_flags_required)
+               return false;
+
+       if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
+                         ctx->restrictions.sqe_flags_required))
+               return false;
+
+       return true;
 }
 
-struct io_submit_link {
-       struct io_kiocb *head;
-       struct io_kiocb *last;
-};
+static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
+                      const struct io_uring_sqe *sqe)
+{
+       struct io_submit_state *state;
+       unsigned int sqe_flags;
+       int personality, ret = 0;
 
-static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
-                        struct io_submit_link *link)
+       req->opcode = READ_ONCE(sqe->opcode);
+       /* same numerical values with corresponding REQ_F_*, safe to copy */
+       req->flags = sqe_flags = READ_ONCE(sqe->flags);
+       req->user_data = READ_ONCE(sqe->user_data);
+       req->async_data = NULL;
+       req->file = NULL;
+       req->ctx = ctx;
+       req->link = NULL;
+       req->fixed_rsrc_refs = NULL;
+       /* one is dropped after submission, the other at completion */
+       refcount_set(&req->refs, 2);
+       req->task = current;
+       req->result = 0;
+       req->work.list.next = NULL;
+       req->work.creds = NULL;
+       req->work.flags = 0;
+
+       /* enforce forwards compatibility on users */
+       if (unlikely(sqe_flags & ~SQE_VALID_FLAGS)) {
+               req->flags = 0;
+               return -EINVAL;
+       }
+
+       if (unlikely(req->opcode >= IORING_OP_LAST))
+               return -EINVAL;
+
+       if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
+               return -EACCES;
+
+       if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
+           !io_op_defs[req->opcode].buffer_select)
+               return -EOPNOTSUPP;
+
+       personality = READ_ONCE(sqe->personality);
+       if (personality) {
+               req->work.creds = xa_load(&ctx->personalities, personality);
+               if (!req->work.creds)
+                       return -EINVAL;
+               get_cred(req->work.creds);
+       }
+       state = &ctx->submit_state;
+
+       /*
+        * Plug now if we have more than 1 IO left after this, and the target
+        * is potentially a read/write to block based storage.
+        */
+       if (!state->plug_started && state->ios_left > 1 &&
+           io_op_defs[req->opcode].plug) {
+               blk_start_plug(&state->plug);
+               state->plug_started = true;
+       }
+
+       if (io_op_defs[req->opcode].needs_file) {
+               bool fixed = req->flags & REQ_F_FIXED_FILE;
+
+               req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
+               if (unlikely(!req->file))
+                       ret = -EBADF;
+       }
+
+       state->ios_left--;
+       return ret;
+}
+
+static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req,
+                        const struct io_uring_sqe *sqe)
 {
-       struct io_ring_ctx *ctx = req->ctx;
+       struct io_submit_link *link = &ctx->submit_state.link;
        int ret;
 
+       ret = io_init_req(ctx, req, sqe);
+       if (unlikely(ret)) {
+fail_req:
+               if (link->head) {
+                       /* fail even hard links since we don't submit */
+                       link->head->flags |= REQ_F_FAIL_LINK;
+                       io_put_req(link->head);
+                       io_req_complete(link->head, -ECANCELED);
+                       link->head = NULL;
+               }
+               io_put_req(req);
+               io_req_complete(req, ret);
+               return ret;
+       }
+       ret = io_req_prep(req, sqe);
+       if (unlikely(ret))
+               goto fail_req;
+
+       /* don't need @sqe from now on */
+       trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
+                               true, ctx->flags & IORING_SETUP_SQPOLL);
+
        /*
         * If we already have a head request, queue this one for async
         * submittal once the head completes. If we don't have a head but
@@ -6677,19 +6507,16 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        head->flags |= REQ_F_IO_DRAIN;
                        ctx->drain_next = 1;
                }
-               ret = io_req_defer_prep(req, sqe);
-               if (unlikely(ret)) {
-                       /* fail even hard links since we don't submit */
-                       head->flags |= REQ_F_FAIL_LINK;
-                       return ret;
-               }
+               ret = io_req_defer_prep(req);
+               if (unlikely(ret))
+                       goto fail_req;
                trace_io_uring_link(ctx, req, head);
                link->last->link = req;
                link->last = req;
 
                /* last request of a link, enqueue the link */
                if (!(req->flags & (REQ_F_LINK | REQ_F_HARDLINK))) {
-                       io_queue_link_head(head);
+                       io_queue_sqe(head);
                        link->head = NULL;
                }
        } else {
@@ -6698,13 +6525,10 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
                        ctx->drain_next = 0;
                }
                if (req->flags & (REQ_F_LINK | REQ_F_HARDLINK)) {
-                       ret = io_req_defer_prep(req, sqe);
-                       if (unlikely(ret))
-                               req->flags |= REQ_F_FAIL_LINK;
                        link->head = req;
                        link->last = req;
                } else {
-                       io_queue_sqe(req, sqe);
+                       io_queue_sqe(req);
                }
        }
 
@@ -6717,6 +6541,8 @@ static int io_submit_sqe(struct io_kiocb *req, const struct io_uring_sqe *sqe,
 static void io_submit_state_end(struct io_submit_state *state,
                                struct io_ring_ctx *ctx)
 {
+       if (state->link.head)
+               io_queue_sqe(state->link.head);
        if (state->comp.nr)
                io_submit_flush_completions(&state->comp, ctx);
        if (state->plug_started)
@@ -6732,6 +6558,8 @@ static void io_submit_state_start(struct io_submit_state *state,
 {
        state->plug_started = false;
        state->ios_left = max_ios;
+       /* set only head, no need to init link_last in advance */
+       state->link.head = NULL;
 }
 
 static void io_commit_sqring(struct io_ring_ctx *ctx)
@@ -6777,117 +6605,9 @@ static const struct io_uring_sqe *io_get_sqe(struct io_ring_ctx *ctx)
        return NULL;
 }
 
-/*
- * Check SQE restrictions (opcode and flags).
- *
- * Returns 'true' if SQE is allowed, 'false' otherwise.
- */
-static inline bool io_check_restriction(struct io_ring_ctx *ctx,
-                                       struct io_kiocb *req,
-                                       unsigned int sqe_flags)
-{
-       if (!ctx->restricted)
-               return true;
-
-       if (!test_bit(req->opcode, ctx->restrictions.sqe_op))
-               return false;
-
-       if ((sqe_flags & ctx->restrictions.sqe_flags_required) !=
-           ctx->restrictions.sqe_flags_required)
-               return false;
-
-       if (sqe_flags & ~(ctx->restrictions.sqe_flags_allowed |
-                         ctx->restrictions.sqe_flags_required))
-               return false;
-
-       return true;
-}
-
-#define SQE_VALID_FLAGS        (IOSQE_FIXED_FILE|IOSQE_IO_DRAIN|IOSQE_IO_LINK| \
-                               IOSQE_IO_HARDLINK | IOSQE_ASYNC | \
-                               IOSQE_BUFFER_SELECT)
-
-static int io_init_req(struct io_ring_ctx *ctx, struct io_kiocb *req,
-                      const struct io_uring_sqe *sqe)
-{
-       struct io_submit_state *state;
-       unsigned int sqe_flags;
-       int id, ret = 0;
-
-       req->opcode = READ_ONCE(sqe->opcode);
-       /* same numerical values with corresponding REQ_F_*, safe to copy */
-       req->flags = sqe_flags = READ_ONCE(sqe->flags);
-       req->user_data = READ_ONCE(sqe->user_data);
-       req->async_data = NULL;
-       req->file = NULL;
-       req->ctx = ctx;
-       req->link = NULL;
-       req->fixed_rsrc_refs = NULL;
-       /* one is dropped after submission, the other at completion */
-       refcount_set(&req->refs, 2);
-       req->task = current;
-       req->result = 0;
-
-       /* enforce forwards compatibility on users */
-       if (unlikely(sqe_flags & ~SQE_VALID_FLAGS))
-               return -EINVAL;
-
-       if (unlikely(req->opcode >= IORING_OP_LAST))
-               return -EINVAL;
-
-       if (unlikely(io_sq_thread_acquire_mm_files(ctx, req)))
-               return -EFAULT;
-
-       if (unlikely(!io_check_restriction(ctx, req, sqe_flags)))
-               return -EACCES;
-
-       if ((sqe_flags & IOSQE_BUFFER_SELECT) &&
-           !io_op_defs[req->opcode].buffer_select)
-               return -EOPNOTSUPP;
-
-       id = READ_ONCE(sqe->personality);
-       if (id) {
-               struct io_identity *iod;
-
-               iod = idr_find(&ctx->personality_idr, id);
-               if (unlikely(!iod))
-                       return -EINVAL;
-               refcount_inc(&iod->count);
-
-               __io_req_init_async(req);
-               get_cred(iod->creds);
-               req->work.identity = iod;
-               req->work.flags |= IO_WQ_WORK_CREDS;
-       }
-
-       state = &ctx->submit_state;
-
-       /*
-        * Plug now if we have more than 1 IO left after this, and the target
-        * is potentially a read/write to block based storage.
-        */
-       if (!state->plug_started && state->ios_left > 1 &&
-           io_op_defs[req->opcode].plug) {
-               blk_start_plug(&state->plug);
-               state->plug_started = true;
-       }
-
-       if (io_op_defs[req->opcode].needs_file) {
-               bool fixed = req->flags & REQ_F_FIXED_FILE;
-
-               req->file = io_file_get(state, req, READ_ONCE(sqe->fd), fixed);
-               if (unlikely(!req->file))
-                       ret = -EBADF;
-       }
-
-       state->ios_left--;
-       return ret;
-}
-
 static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 {
-       struct io_submit_link link;
-       int i, submitted = 0;
+       int submitted = 0;
 
        /* if we have a backlog and couldn't flush it all, return BUSY */
        if (test_bit(0, &ctx->sq_check_overflow)) {
@@ -6903,14 +6623,11 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
 
        percpu_counter_add(&current->io_uring->inflight, nr);
        refcount_add(nr, &current->usage);
-
        io_submit_state_start(&ctx->submit_state, nr);
-       link.head = NULL;
 
-       for (i = 0; i < nr; i++) {
+       while (submitted < nr) {
                const struct io_uring_sqe *sqe;
                struct io_kiocb *req;
-               int err;
 
                req = io_alloc_req(ctx);
                if (unlikely(!req)) {
@@ -6925,20 +6642,8 @@ static int io_submit_sqes(struct io_ring_ctx *ctx, unsigned int nr)
                }
                /* will complete beyond this point, count as submitted */
                submitted++;
-
-               err = io_init_req(ctx, req, sqe);
-               if (unlikely(err)) {
-fail_req:
-                       io_put_req(req);
-                       io_req_complete(req, err);
+               if (io_submit_sqe(ctx, req, sqe))
                        break;
-               }
-
-               trace_io_uring_submit_sqe(ctx, req->opcode, req->user_data,
-                                       true, ctx->flags & IORING_SETUP_SQPOLL);
-               err = io_submit_sqe(req, sqe, &link);
-               if (err)
-                       goto fail_req;
        }
 
        if (unlikely(submitted != nr)) {
@@ -6950,10 +6655,8 @@ fail_req:
                percpu_counter_sub(&tctx->inflight, unused);
                put_task_struct_many(current, unused);
        }
-       if (link.head)
-               io_queue_link_head(link.head);
-       io_submit_state_end(&ctx->submit_state, ctx);
 
+       io_submit_state_end(&ctx->submit_state, ctx);
         /* Commit SQ ring head once we've consumed and submitted all SQEs */
        io_commit_sqring(ctx);
 
@@ -6992,8 +6695,8 @@ static int __io_sq_thread(struct io_ring_ctx *ctx, bool cap_entries)
                if (!list_empty(&ctx->iopoll_list))
                        io_do_iopoll(ctx, &nr_events, 0);
 
-               if (to_submit && !ctx->sqo_dead &&
-                   likely(!percpu_ref_is_dying(&ctx->refs)))
+               if (to_submit && likely(!percpu_ref_is_dying(&ctx->refs)) &&
+                   !(ctx->flags & IORING_SETUP_R_DISABLED))
                        ret = io_submit_sqes(ctx, to_submit);
                mutex_unlock(&ctx->uring_lock);
        }
@@ -7017,84 +6720,64 @@ static void io_sqd_update_thread_idle(struct io_sq_data *sqd)
        sqd->sq_thread_idle = sq_thread_idle;
 }
 
-static void io_sqd_init_new(struct io_sq_data *sqd)
-{
-       struct io_ring_ctx *ctx;
-
-       while (!list_empty(&sqd->ctx_new_list)) {
-               ctx = list_first_entry(&sqd->ctx_new_list, struct io_ring_ctx, sqd_list);
-               list_move_tail(&ctx->sqd_list, &sqd->ctx_list);
-               complete(&ctx->sq_thread_comp);
-       }
-
-       io_sqd_update_thread_idle(sqd);
-}
-
 static int io_sq_thread(void *data)
 {
-       struct cgroup_subsys_state *cur_css = NULL;
-       struct files_struct *old_files = current->files;
-       struct nsproxy *old_nsproxy = current->nsproxy;
-       const struct cred *old_cred = NULL;
        struct io_sq_data *sqd = data;
        struct io_ring_ctx *ctx;
        unsigned long timeout = 0;
+       char buf[TASK_COMM_LEN];
        DEFINE_WAIT(wait);
 
-       task_lock(current);
-       current->files = NULL;
-       current->nsproxy = NULL;
-       task_unlock(current);
+       snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid);
+       set_task_comm(current, buf);
+       current->pf_io_worker = NULL;
+
+       if (sqd->sq_cpu != -1)
+               set_cpus_allowed_ptr(current, cpumask_of(sqd->sq_cpu));
+       else
+               set_cpus_allowed_ptr(current, cpu_online_mask);
+       current->flags |= PF_NO_SETAFFINITY;
 
-       while (!kthread_should_stop()) {
+       mutex_lock(&sqd->lock);
+       while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) {
                int ret;
                bool cap_entries, sqt_spin, needs_sched;
 
-               /*
-                * Any changes to the sqd lists are synchronized through the
-                * kthread parking. This synchronizes the thread vs users,
-                * the users are synchronized on the sqd->ctx_lock.
-                */
-               if (kthread_should_park()) {
-                       kthread_parkme();
-                       /*
-                        * When sq thread is unparked, in case the previous park operation
-                        * comes from io_put_sq_data(), which means that sq thread is going
-                        * to be stopped, so here needs to have a check.
-                        */
-                       if (kthread_should_stop())
-                               break;
-               }
+               if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) ||
+                   signal_pending(current)) {
+                       bool did_sig = false;
 
-               if (unlikely(!list_empty(&sqd->ctx_new_list))) {
-                       io_sqd_init_new(sqd);
+                       mutex_unlock(&sqd->lock);
+                       if (signal_pending(current)) {
+                               struct ksignal ksig;
+
+                               did_sig = get_signal(&ksig);
+                       }
+                       cond_resched();
+                       mutex_lock(&sqd->lock);
+                       if (did_sig)
+                               break;
+                       io_run_task_work();
+                       io_run_task_work_head(&sqd->park_task_work);
                        timeout = jiffies + sqd->sq_thread_idle;
+                       continue;
                }
-
                sqt_spin = false;
                cap_entries = !list_is_singular(&sqd->ctx_list);
                list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) {
-                       if (current->cred != ctx->creds) {
-                               if (old_cred)
-                                       revert_creds(old_cred);
-                               old_cred = override_creds(ctx->creds);
-                       }
-                       io_sq_thread_associate_blkcg(ctx, &cur_css);
-#ifdef CONFIG_AUDIT
-                       current->loginuid = ctx->loginuid;
-                       current->sessionid = ctx->sessionid;
-#endif
+                       const struct cred *creds = NULL;
 
+                       if (ctx->sq_creds != current_cred())
+                               creds = override_creds(ctx->sq_creds);
                        ret = __io_sq_thread(ctx, cap_entries);
+                       if (creds)
+                               revert_creds(creds);
                        if (!sqt_spin && (ret > 0 || !list_empty(&ctx->iopoll_list)))
                                sqt_spin = true;
-
-                       io_sq_thread_drop_mm_files();
                }
 
                if (sqt_spin || !time_after(jiffies, timeout)) {
                        io_run_task_work();
-                       io_sq_thread_drop_mm_files();
                        cond_resched();
                        if (sqt_spin)
                                timeout = jiffies + sqd->sq_thread_idle;
@@ -7115,35 +6798,33 @@ static int io_sq_thread(void *data)
                        }
                }
 
-               if (needs_sched && !kthread_should_park()) {
+               if (needs_sched && !test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) {
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
                                io_ring_set_wakeup_flag(ctx);
 
+                       mutex_unlock(&sqd->lock);
                        schedule();
+                       mutex_lock(&sqd->lock);
                        list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
                                io_ring_clear_wakeup_flag(ctx);
                }
 
                finish_wait(&sqd->wait, &wait);
+               io_run_task_work_head(&sqd->park_task_work);
                timeout = jiffies + sqd->sq_thread_idle;
        }
 
-       io_run_task_work();
-       io_sq_thread_drop_mm_files();
-
-       if (cur_css)
-               io_sq_thread_unassociate_blkcg();
-       if (old_cred)
-               revert_creds(old_cred);
-
-       task_lock(current);
-       current->files = old_files;
-       current->nsproxy = old_nsproxy;
-       task_unlock(current);
-
-       kthread_parkme();
+       list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+               io_uring_cancel_sqpoll(ctx);
+       sqd->thread = NULL;
+       list_for_each_entry(ctx, &sqd->ctx_list, sqd_list)
+               io_ring_set_wakeup_flag(ctx);
+       mutex_unlock(&sqd->lock);
 
-       return 0;
+       io_run_task_work();
+       io_run_task_work_head(&sqd->park_task_work);
+       complete(&sqd->exited);
+       do_exit(0);
 }
 
 struct io_wait_queue {
@@ -7187,7 +6868,7 @@ static int io_run_task_work_sig(void)
                return 1;
        if (!signal_pending(current))
                return 0;
-       if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))
+       if (test_thread_flag(TIF_NOTIFY_SIGNAL))
                return -ERESTARTSYS;
        return -EINTR;
 }
@@ -7264,11 +6945,16 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int min_events,
        iowq.nr_timeouts = atomic_read(&ctx->cq_timeouts);
        trace_io_uring_cqring_wait(ctx, min_events);
        do {
-               io_cqring_overflow_flush(ctx, false, NULL, NULL);
+               /* if we can't even flush overflow, don't wait for more */
+               if (!io_cqring_overflow_flush(ctx, false, NULL, NULL)) {
+                       ret = -EBUSY;
+                       break;
+               }
                prepare_to_wait_exclusive(&ctx->wait, &iowq.wq,
                                                TASK_INTERRUPTIBLE);
                ret = io_cqring_wait_schedule(ctx, &iowq, &timeout);
                finish_wait(&ctx->wait, &iowq.wq);
+               cond_resched();
        } while (ret > 0);
 
        restore_saved_sigmask_unless(ret == -EINTR);
@@ -7328,38 +7014,59 @@ static void io_sqe_rsrc_set_node(struct io_ring_ctx *ctx,
        percpu_ref_get(&rsrc_data->refs);
 }
 
-static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
-                              struct io_ring_ctx *ctx,
-                              struct fixed_rsrc_ref_node *backup_node)
+static void io_sqe_rsrc_kill_node(struct io_ring_ctx *ctx, struct fixed_rsrc_data *data)
 {
-       struct fixed_rsrc_ref_node *ref_node;
-       int ret;
+       struct fixed_rsrc_ref_node *ref_node = NULL;
 
        io_rsrc_ref_lock(ctx);
        ref_node = data->node;
+       data->node = NULL;
        io_rsrc_ref_unlock(ctx);
        if (ref_node)
                percpu_ref_kill(&ref_node->refs);
+}
 
-       percpu_ref_kill(&data->refs);
+static int io_rsrc_ref_quiesce(struct fixed_rsrc_data *data,
+                              struct io_ring_ctx *ctx,
+                              void (*rsrc_put)(struct io_ring_ctx *ctx,
+                                               struct io_rsrc_put *prsrc))
+{
+       struct fixed_rsrc_ref_node *backup_node;
+       int ret;
+
+       if (data->quiesce)
+               return -ENXIO;
 
-       /* wait for all refs nodes to complete */
-       flush_delayed_work(&ctx->rsrc_put_work);
+       data->quiesce = true;
        do {
+               ret = -ENOMEM;
+               backup_node = alloc_fixed_rsrc_ref_node(ctx);
+               if (!backup_node)
+                       break;
+               backup_node->rsrc_data = data;
+               backup_node->rsrc_put = rsrc_put;
+
+               io_sqe_rsrc_kill_node(ctx, data);
+               percpu_ref_kill(&data->refs);
+               flush_delayed_work(&ctx->rsrc_put_work);
+
                ret = wait_for_completion_interruptible(&data->done);
                if (!ret)
                        break;
+
+               percpu_ref_resurrect(&data->refs);
+               io_sqe_rsrc_set_node(ctx, data, backup_node);
+               backup_node = NULL;
+               reinit_completion(&data->done);
+               mutex_unlock(&ctx->uring_lock);
                ret = io_run_task_work_sig();
-               if (ret < 0) {
-                       percpu_ref_resurrect(&data->refs);
-                       reinit_completion(&data->done);
-                       io_sqe_rsrc_set_node(ctx, data, backup_node);
-                       return ret;
-               }
-       } while (1);
+               mutex_lock(&ctx->uring_lock);
+       } while (ret >= 0);
+       data->quiesce = false;
 
-       destroy_fixed_rsrc_ref_node(backup_node);
-       return 0;
+       if (backup_node)
+               destroy_fixed_rsrc_ref_node(backup_node);
+       return ret;
 }
 
 static struct fixed_rsrc_data *alloc_fixed_rsrc_data(struct io_ring_ctx *ctx)
@@ -7390,18 +7097,17 @@ static void free_fixed_rsrc_data(struct fixed_rsrc_data *data)
 static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
 {
        struct fixed_rsrc_data *data = ctx->file_data;
-       struct fixed_rsrc_ref_node *backup_node;
        unsigned nr_tables, i;
        int ret;
 
-       if (!data)
+       /*
+        * percpu_ref_is_dying() is to stop parallel files unregister
+        * Since we possibly drop uring lock later in this function to
+        * run task work.
+        */
+       if (!data || percpu_ref_is_dying(&data->refs))
                return -ENXIO;
-       backup_node = alloc_fixed_rsrc_ref_node(ctx);
-       if (!backup_node)
-               return -ENOMEM;
-       init_fixed_file_ref_node(ctx, backup_node);
-
-       ret = io_rsrc_ref_quiesce(data, ctx, backup_node);
+       ret = io_rsrc_ref_quiesce(data, ctx, io_ring_file_put);
        if (ret)
                return ret;
 
@@ -7415,23 +7121,72 @@ static int io_sqe_files_unregister(struct io_ring_ctx *ctx)
        return 0;
 }
 
+static void io_sq_thread_unpark(struct io_sq_data *sqd)
+       __releases(&sqd->lock)
+{
+       WARN_ON_ONCE(sqd->thread == current);
+
+       /*
+        * Do the dance but not conditional clear_bit() because it'd race with
+        * other threads incrementing park_pending and setting the bit.
+        */
+       clear_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+       if (atomic_dec_return(&sqd->park_pending))
+               set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+       mutex_unlock(&sqd->lock);
+}
+
+static void io_sq_thread_park(struct io_sq_data *sqd)
+       __acquires(&sqd->lock)
+{
+       WARN_ON_ONCE(sqd->thread == current);
+
+       atomic_inc(&sqd->park_pending);
+       set_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state);
+       mutex_lock(&sqd->lock);
+       if (sqd->thread)
+               wake_up_process(sqd->thread);
+}
+
+static void io_sq_thread_stop(struct io_sq_data *sqd)
+{
+       WARN_ON_ONCE(sqd->thread == current);
+
+       mutex_lock(&sqd->lock);
+       set_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state);
+       if (sqd->thread)
+               wake_up_process(sqd->thread);
+       mutex_unlock(&sqd->lock);
+       wait_for_completion(&sqd->exited);
+}
+
 static void io_put_sq_data(struct io_sq_data *sqd)
 {
        if (refcount_dec_and_test(&sqd->refs)) {
-               /*
-                * The park is a bit of a work-around, without it we get
-                * warning spews on shutdown with SQPOLL set and affinity
-                * set to a single CPU.
-                */
-               if (sqd->thread) {
-                       kthread_park(sqd->thread);
-                       kthread_stop(sqd->thread);
-               }
+               WARN_ON_ONCE(atomic_read(&sqd->park_pending));
 
+               io_sq_thread_stop(sqd);
                kfree(sqd);
        }
 }
 
+static void io_sq_thread_finish(struct io_ring_ctx *ctx)
+{
+       struct io_sq_data *sqd = ctx->sq_data;
+
+       if (sqd) {
+               io_sq_thread_park(sqd);
+               list_del_init(&ctx->sqd_list);
+               io_sqd_update_thread_idle(sqd);
+               io_sq_thread_unpark(sqd);
+
+               io_put_sq_data(sqd);
+               ctx->sq_data = NULL;
+               if (ctx->sq_creds)
+                       put_cred(ctx->sq_creds);
+       }
+}
+
 static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
 {
        struct io_ring_ctx *ctx_attach;
@@ -7452,91 +7207,46 @@ static struct io_sq_data *io_attach_sq_data(struct io_uring_params *p)
                fdput(f);
                return ERR_PTR(-EINVAL);
        }
+       if (sqd->task_tgid != current->tgid) {
+               fdput(f);
+               return ERR_PTR(-EPERM);
+       }
 
        refcount_inc(&sqd->refs);
        fdput(f);
        return sqd;
 }
 
-static struct io_sq_data *io_get_sq_data(struct io_uring_params *p)
+static struct io_sq_data *io_get_sq_data(struct io_uring_params *p,
+                                        bool *attached)
 {
        struct io_sq_data *sqd;
 
-       if (p->flags & IORING_SETUP_ATTACH_WQ)
-               return io_attach_sq_data(p);
+       *attached = false;
+       if (p->flags & IORING_SETUP_ATTACH_WQ) {
+               sqd = io_attach_sq_data(p);
+               if (!IS_ERR(sqd)) {
+                       *attached = true;
+                       return sqd;
+               }
+               /* fall through for EPERM case, setup new sqd/task */
+               if (PTR_ERR(sqd) != -EPERM)
+                       return sqd;
+       }
 
        sqd = kzalloc(sizeof(*sqd), GFP_KERNEL);
        if (!sqd)
                return ERR_PTR(-ENOMEM);
 
+       atomic_set(&sqd->park_pending, 0);
        refcount_set(&sqd->refs, 1);
        INIT_LIST_HEAD(&sqd->ctx_list);
-       INIT_LIST_HEAD(&sqd->ctx_new_list);
-       mutex_init(&sqd->ctx_lock);
        mutex_init(&sqd->lock);
        init_waitqueue_head(&sqd->wait);
+       init_completion(&sqd->exited);
        return sqd;
 }
 
-static void io_sq_thread_unpark(struct io_sq_data *sqd)
-       __releases(&sqd->lock)
-{
-       if (!sqd->thread)
-               return;
-       kthread_unpark(sqd->thread);
-       mutex_unlock(&sqd->lock);
-}
-
-static void io_sq_thread_park(struct io_sq_data *sqd)
-       __acquires(&sqd->lock)
-{
-       if (!sqd->thread)
-               return;
-       mutex_lock(&sqd->lock);
-       kthread_park(sqd->thread);
-}
-
-static void io_sq_thread_stop(struct io_ring_ctx *ctx)
-{
-       struct io_sq_data *sqd = ctx->sq_data;
-
-       if (sqd) {
-               if (sqd->thread) {
-                       /*
-                        * We may arrive here from the error branch in
-                        * io_sq_offload_create() where the kthread is created
-                        * without being waked up, thus wake it up now to make
-                        * sure the wait will complete.
-                        */
-                       wake_up_process(sqd->thread);
-                       wait_for_completion(&ctx->sq_thread_comp);
-
-                       io_sq_thread_park(sqd);
-               }
-
-               mutex_lock(&sqd->ctx_lock);
-               list_del(&ctx->sqd_list);
-               io_sqd_update_thread_idle(sqd);
-               mutex_unlock(&sqd->ctx_lock);
-
-               if (sqd->thread)
-                       io_sq_thread_unpark(sqd);
-
-               io_put_sq_data(sqd);
-               ctx->sq_data = NULL;
-       }
-}
-
-static void io_finish_async(struct io_ring_ctx *ctx)
-{
-       io_sq_thread_stop(ctx);
-
-       if (ctx->io_wq) {
-               io_wq_destroy(ctx->io_wq);
-               ctx->io_wq = NULL;
-       }
-}
-
 #if defined(CONFIG_UNIX)
 /*
  * Ensure the UNIX gc is aware of our file set, so we are certain that
@@ -7563,7 +7273,7 @@ static int __io_sqe_files_scm(struct io_ring_ctx *ctx, int nr, int offset)
        skb->sk = sk;
 
        nr_files = 0;
-       fpl->user = get_uid(ctx->user);
+       fpl->user = get_uid(current_user());
        for (i = 0; i < nr; i++) {
                struct file *file = io_file_from_index(ctx, i + offset);
 
@@ -8095,54 +7805,34 @@ static struct io_wq_work *io_free_work(struct io_wq_work *work)
        return req ? &req->work : NULL;
 }
 
-static int io_init_wq_offload(struct io_ring_ctx *ctx,
-                             struct io_uring_params *p)
+static struct io_wq *io_init_wq_offload(struct io_ring_ctx *ctx)
 {
+       struct io_wq_hash *hash;
        struct io_wq_data data;
-       struct fd f;
-       struct io_ring_ctx *ctx_attach;
        unsigned int concurrency;
-       int ret = 0;
-
-       data.user = ctx->user;
-       data.free_work = io_free_work;
-       data.do_work = io_wq_submit_work;
 
-       if (!(p->flags & IORING_SETUP_ATTACH_WQ)) {
-               /* Do QD, or 4 * CPUS, whatever is smallest */
-               concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
-
-               ctx->io_wq = io_wq_create(concurrency, &data);
-               if (IS_ERR(ctx->io_wq)) {
-                       ret = PTR_ERR(ctx->io_wq);
-                       ctx->io_wq = NULL;
-               }
-               return ret;
+       hash = ctx->hash_map;
+       if (!hash) {
+               hash = kzalloc(sizeof(*hash), GFP_KERNEL);
+               if (!hash)
+                       return ERR_PTR(-ENOMEM);
+               refcount_set(&hash->refs, 1);
+               init_waitqueue_head(&hash->wait);
+               ctx->hash_map = hash;
        }
 
-       f = fdget(p->wq_fd);
-       if (!f.file)
-               return -EBADF;
-
-       if (f.file->f_op != &io_uring_fops) {
-               ret = -EINVAL;
-               goto out_fput;
-       }
+       data.hash = hash;
+       data.free_work = io_free_work;
+       data.do_work = io_wq_submit_work;
 
-       ctx_attach = f.file->private_data;
-       /* @io_wq is protected by holding the fd */
-       if (!io_wq_get(ctx_attach->io_wq, &data)) {
-               ret = -EINVAL;
-               goto out_fput;
-       }
+       /* Do QD, or 4 * CPUS, whatever is smallest */
+       concurrency = min(ctx->sq_entries, 4 * num_online_cpus());
 
-       ctx->io_wq = ctx_attach->io_wq;
-out_fput:
-       fdput(f);
-       return ret;
+       return io_wq_create(concurrency, &data);
 }
 
-static int io_uring_alloc_task_context(struct task_struct *task)
+static int io_uring_alloc_task_context(struct task_struct *task,
+                                      struct io_ring_ctx *ctx)
 {
        struct io_uring_task *tctx;
        int ret;
@@ -8151,8 +7841,16 @@ static int io_uring_alloc_task_context(struct task_struct *task)
        if (unlikely(!tctx))
                return -ENOMEM;
 
-       ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
-       if (unlikely(ret)) {
+       ret = percpu_counter_init(&tctx->inflight, 0, GFP_KERNEL);
+       if (unlikely(ret)) {
+               kfree(tctx);
+               return ret;
+       }
+
+       tctx->io_wq = io_init_wq_offload(ctx);
+       if (IS_ERR(tctx->io_wq)) {
+               ret = PTR_ERR(tctx->io_wq);
+               percpu_counter_destroy(&tctx->inflight);
                kfree(tctx);
                return ret;
        }
@@ -8161,9 +7859,6 @@ static int io_uring_alloc_task_context(struct task_struct *task)
        init_waitqueue_head(&tctx->wait);
        tctx->last = NULL;
        atomic_set(&tctx->in_idle, 0);
-       tctx->sqpoll = false;
-       io_init_identity(&tctx->__identity);
-       tctx->identity = &tctx->__identity;
        task->io_uring = tctx;
        spin_lock_init(&tctx->task_lock);
        INIT_WQ_LIST(&tctx->task_list);
@@ -8177,9 +7872,8 @@ void __io_uring_free(struct task_struct *tsk)
        struct io_uring_task *tctx = tsk->io_uring;
 
        WARN_ON_ONCE(!xa_empty(&tctx->xa));
-       WARN_ON_ONCE(refcount_read(&tctx->identity->count) != 1);
-       if (tctx->identity != &tctx->__identity)
-               kfree(tctx->identity);
+       WARN_ON_ONCE(tctx->io_wq);
+
        percpu_counter_destroy(&tctx->inflight);
        kfree(tctx);
        tsk->io_uring = NULL;
@@ -8190,54 +7884,80 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
 {
        int ret;
 
+       /* Retain compatibility with failing for an invalid attach attempt */
+       if ((ctx->flags & (IORING_SETUP_ATTACH_WQ | IORING_SETUP_SQPOLL)) ==
+                               IORING_SETUP_ATTACH_WQ) {
+               struct fd f;
+
+               f = fdget(p->wq_fd);
+               if (!f.file)
+                       return -ENXIO;
+               if (f.file->f_op != &io_uring_fops) {
+                       fdput(f);
+                       return -EINVAL;
+               }
+               fdput(f);
+       }
        if (ctx->flags & IORING_SETUP_SQPOLL) {
+               struct task_struct *tsk;
                struct io_sq_data *sqd;
+               bool attached;
 
                ret = -EPERM;
                if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_NICE))
                        goto err;
 
-               sqd = io_get_sq_data(p);
+               sqd = io_get_sq_data(p, &attached);
                if (IS_ERR(sqd)) {
                        ret = PTR_ERR(sqd);
                        goto err;
                }
 
+               ctx->sq_creds = get_current_cred();
                ctx->sq_data = sqd;
-               io_sq_thread_park(sqd);
-               mutex_lock(&sqd->ctx_lock);
-               list_add(&ctx->sqd_list, &sqd->ctx_new_list);
-               mutex_unlock(&sqd->ctx_lock);
-               io_sq_thread_unpark(sqd);
-
                ctx->sq_thread_idle = msecs_to_jiffies(p->sq_thread_idle);
                if (!ctx->sq_thread_idle)
                        ctx->sq_thread_idle = HZ;
 
-               if (sqd->thread)
-                       goto done;
+               ret = 0;
+               io_sq_thread_park(sqd);
+               list_add(&ctx->sqd_list, &sqd->ctx_list);
+               io_sqd_update_thread_idle(sqd);
+               /* don't attach to a dying SQPOLL thread, would be racy */
+               if (attached && !sqd->thread)
+                       ret = -ENXIO;
+               io_sq_thread_unpark(sqd);
+
+               if (ret < 0)
+                       goto err;
+               if (attached)
+                       return 0;
 
                if (p->flags & IORING_SETUP_SQ_AFF) {
                        int cpu = p->sq_thread_cpu;
 
                        ret = -EINVAL;
                        if (cpu >= nr_cpu_ids)
-                               goto err;
+                               goto err_sqpoll;
                        if (!cpu_online(cpu))
-                               goto err;
+                               goto err_sqpoll;
 
-                       sqd->thread = kthread_create_on_cpu(io_sq_thread, sqd,
-                                                       cpu, "io_uring-sq");
+                       sqd->sq_cpu = cpu;
                } else {
-                       sqd->thread = kthread_create(io_sq_thread, sqd,
-                                                       "io_uring-sq");
+                       sqd->sq_cpu = -1;
                }
-               if (IS_ERR(sqd->thread)) {
-                       ret = PTR_ERR(sqd->thread);
-                       sqd->thread = NULL;
-                       goto err;
+
+               sqd->task_pid = current->pid;
+               sqd->task_tgid = current->tgid;
+               tsk = create_io_thread(io_sq_thread, sqd, NUMA_NO_NODE);
+               if (IS_ERR(tsk)) {
+                       ret = PTR_ERR(tsk);
+                       goto err_sqpoll;
                }
-               ret = io_uring_alloc_task_context(sqd->thread);
+
+               sqd->thread = tsk;
+               ret = io_uring_alloc_task_context(tsk, ctx);
+               wake_up_new_task(tsk);
                if (ret)
                        goto err;
        } else if (p->flags & IORING_SETUP_SQ_AFF) {
@@ -8246,23 +7966,13 @@ static int io_sq_offload_create(struct io_ring_ctx *ctx,
                goto err;
        }
 
-done:
-       ret = io_init_wq_offload(ctx, p);
-       if (ret)
-               goto err;
-
        return 0;
 err:
-       io_finish_async(ctx);
+       io_sq_thread_finish(ctx);
        return ret;
-}
-
-static void io_sq_offload_start(struct io_ring_ctx *ctx)
-{
-       struct io_sq_data *sqd = ctx->sq_data;
-
-       if ((ctx->flags & IORING_SETUP_SQPOLL) && sqd->thread)
-               wake_up_process(sqd->thread);
+err_sqpoll:
+       complete(&ctx->sq_data->exited);
+       goto err;
 }
 
 static inline void __io_unaccount_mem(struct user_struct *user,
@@ -8292,7 +8002,7 @@ static inline int __io_account_mem(struct user_struct *user,
 
 static void io_unaccount_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
 {
-       if (ctx->limit_mem)
+       if (ctx->user)
                __io_unaccount_mem(ctx->user, nr_pages);
 
        if (ctx->mm_account)
@@ -8303,7 +8013,7 @@ static int io_account_mem(struct io_ring_ctx *ctx, unsigned long nr_pages)
 {
        int ret;
 
-       if (ctx->limit_mem) {
+       if (ctx->user) {
                ret = __io_account_mem(ctx->user, nr_pages);
                if (ret)
                        return ret;
@@ -8672,19 +8382,13 @@ static int io_eventfd_unregister(struct io_ring_ctx *ctx)
        return -ENXIO;
 }
 
-static int __io_destroy_buffers(int id, void *p, void *data)
-{
-       struct io_ring_ctx *ctx = data;
-       struct io_buffer *buf = p;
-
-       __io_remove_buffers(ctx, buf, id, -1U);
-       return 0;
-}
-
 static void io_destroy_buffers(struct io_ring_ctx *ctx)
 {
-       idr_for_each(&ctx->io_buffer_idr, __io_destroy_buffers, ctx);
-       idr_destroy(&ctx->io_buffer_idr);
+       struct io_buffer *buf;
+       unsigned long index;
+
+       xa_for_each(&ctx->io_buffers, index, buf)
+               __io_remove_buffers(ctx, buf, index, -1U);
 }
 
 static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
@@ -8699,22 +8403,26 @@ static void io_req_cache_free(struct list_head *list, struct task_struct *tsk)
        }
 }
 
-static void io_req_caches_free(struct io_ring_ctx *ctx, struct task_struct *tsk)
+static void io_req_caches_free(struct io_ring_ctx *ctx)
 {
        struct io_submit_state *submit_state = &ctx->submit_state;
+       struct io_comp_state *cs = &ctx->submit_state.comp;
 
        mutex_lock(&ctx->uring_lock);
 
-       if (submit_state->free_reqs)
+       if (submit_state->free_reqs) {
                kmem_cache_free_bulk(req_cachep, submit_state->free_reqs,
                                     submit_state->reqs);
-
-       io_req_cache_free(&submit_state->comp.free_list, NULL);
+               submit_state->free_reqs = 0;
+       }
 
        spin_lock_irq(&ctx->completion_lock);
-       io_req_cache_free(&submit_state->comp.locked_free_list, NULL);
+       list_splice_init(&cs->locked_free_list, &cs->free_list);
+       cs->locked_free_nr = 0;
        spin_unlock_irq(&ctx->completion_lock);
 
+       io_req_cache_free(&cs->free_list, NULL);
+
        mutex_unlock(&ctx->uring_lock);
 }
 
@@ -8722,31 +8430,27 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 {
        /*
         * Some may use context even when all refs and requests have been put,
-        * and they are free to do so while still holding uring_lock, see
-        * __io_req_task_submit(). Wait for them to finish.
+        * and they are free to do so while still holding uring_lock or
+        * completion_lock, see __io_req_task_submit(). Wait for them to finish.
         */
        mutex_lock(&ctx->uring_lock);
        mutex_unlock(&ctx->uring_lock);
+       spin_lock_irq(&ctx->completion_lock);
+       spin_unlock_irq(&ctx->completion_lock);
 
-       io_finish_async(ctx);
+       io_sq_thread_finish(ctx);
        io_sqe_buffers_unregister(ctx);
 
-       if (ctx->sqo_task) {
-               put_task_struct(ctx->sqo_task);
-               ctx->sqo_task = NULL;
+       if (ctx->mm_account) {
                mmdrop(ctx->mm_account);
                ctx->mm_account = NULL;
        }
 
-#ifdef CONFIG_BLK_CGROUP
-       if (ctx->sqo_blkcg_css)
-               css_put(ctx->sqo_blkcg_css);
-#endif
-
+       mutex_lock(&ctx->uring_lock);
        io_sqe_files_unregister(ctx);
+       mutex_unlock(&ctx->uring_lock);
        io_eventfd_unregister(ctx);
        io_destroy_buffers(ctx);
-       idr_destroy(&ctx->personality_idr);
 
 #if defined(CONFIG_UNIX)
        if (ctx->ring_sock) {
@@ -8760,8 +8464,9 @@ static void io_ring_ctx_free(struct io_ring_ctx *ctx)
 
        percpu_ref_exit(&ctx->refs);
        free_uid(ctx->user);
-       put_cred(ctx->creds);
-       io_req_caches_free(ctx, NULL);
+       io_req_caches_free(ctx);
+       if (ctx->hash_map)
+               io_wq_put_hash(ctx->hash_map);
        kfree(ctx->cancel_hash);
        kfree(ctx);
 }
@@ -8808,31 +8513,58 @@ static int io_uring_fasync(int fd, struct file *file, int on)
 
 static int io_unregister_personality(struct io_ring_ctx *ctx, unsigned id)
 {
-       struct io_identity *iod;
+       const struct cred *creds;
 
-       iod = idr_remove(&ctx->personality_idr, id);
-       if (iod) {
-               put_cred(iod->creds);
-               if (refcount_dec_and_test(&iod->count))
-                       kfree(iod);
+       creds = xa_erase(&ctx->personalities, id);
+       if (creds) {
+               put_cred(creds);
                return 0;
        }
 
        return -EINVAL;
 }
 
-static int io_remove_personalities(int id, void *p, void *data)
+static inline bool io_run_ctx_fallback(struct io_ring_ctx *ctx)
 {
-       struct io_ring_ctx *ctx = data;
+       return io_run_task_work_head(&ctx->exit_task_work);
+}
 
-       io_unregister_personality(ctx, id);
-       return 0;
+struct io_tctx_exit {
+       struct callback_head            task_work;
+       struct completion               completion;
+       struct io_ring_ctx              *ctx;
+};
+
+static void io_tctx_exit_cb(struct callback_head *cb)
+{
+       struct io_uring_task *tctx = current->io_uring;
+       struct io_tctx_exit *work;
+
+       work = container_of(cb, struct io_tctx_exit, task_work);
+       /*
+        * When @in_idle, we're in cancellation and it's racy to remove the
+        * node. It'll be removed by the end of cancellation, just ignore it.
+        */
+       if (!atomic_read(&tctx->in_idle))
+               io_uring_del_task_file((unsigned long)work->ctx);
+       complete(&work->completion);
 }
 
 static void io_ring_exit_work(struct work_struct *work)
 {
-       struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx,
-                                              exit_work);
+       struct io_ring_ctx *ctx = container_of(work, struct io_ring_ctx, exit_work);
+       unsigned long timeout = jiffies + HZ * 60 * 5;
+       struct io_tctx_exit exit;
+       struct io_tctx_node *node;
+       int ret;
+
+       /* prevent SQPOLL from submitting new requests */
+       if (ctx->sq_data) {
+               io_sq_thread_park(ctx->sq_data);
+               list_del_init(&ctx->sqd_list);
+               io_sqd_update_thread_idle(ctx->sq_data);
+               io_sq_thread_unpark(ctx->sq_data);
+       }
 
        /*
         * If we're doing polled IO and end up having requests being
@@ -8842,38 +8574,74 @@ static void io_ring_exit_work(struct work_struct *work)
         */
        do {
                io_uring_try_cancel_requests(ctx, NULL, NULL);
+
+               WARN_ON_ONCE(time_after(jiffies, timeout));
        } while (!wait_for_completion_timeout(&ctx->ref_comp, HZ/20));
+
+       mutex_lock(&ctx->uring_lock);
+       while (!list_empty(&ctx->tctx_list)) {
+               WARN_ON_ONCE(time_after(jiffies, timeout));
+
+               node = list_first_entry(&ctx->tctx_list, struct io_tctx_node,
+                                       ctx_node);
+               exit.ctx = ctx;
+               init_completion(&exit.completion);
+               init_task_work(&exit.task_work, io_tctx_exit_cb);
+               ret = task_work_add(node->task, &exit.task_work, TWA_SIGNAL);
+               if (WARN_ON_ONCE(ret))
+                       continue;
+               wake_up_process(node->task);
+
+               mutex_unlock(&ctx->uring_lock);
+               wait_for_completion(&exit.completion);
+               cond_resched();
+               mutex_lock(&ctx->uring_lock);
+       }
+       mutex_unlock(&ctx->uring_lock);
+
        io_ring_ctx_free(ctx);
 }
 
-static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
+/* Returns true if we found and killed one or more timeouts */
+static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk,
+                            struct files_struct *files)
 {
-       struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+       struct io_kiocb *req, *tmp;
+       int canceled = 0;
 
-       return req->ctx == data;
+       spin_lock_irq(&ctx->completion_lock);
+       list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) {
+               if (io_match_task(req, tsk, files)) {
+                       io_kill_timeout(req, -ECANCELED);
+                       canceled++;
+               }
+       }
+       if (canceled != 0)
+               io_commit_cqring(ctx);
+       spin_unlock_irq(&ctx->completion_lock);
+       if (canceled != 0)
+               io_cqring_ev_posted(ctx);
+       return canceled != 0;
 }
 
 static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx)
 {
+       unsigned long index;
+       struct creds *creds;
+
        mutex_lock(&ctx->uring_lock);
        percpu_ref_kill(&ctx->refs);
-
-       if (WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) && !ctx->sqo_dead))
-               ctx->sqo_dead = 1;
-
        /* if force is set, the ring is going away. always drop after that */
        ctx->cq_overflow_flushed = 1;
        if (ctx->rings)
                __io_cqring_overflow_flush(ctx, true, NULL, NULL);
-       idr_for_each(&ctx->personality_idr, io_remove_personalities, ctx);
+       xa_for_each(&ctx->personalities, index, creds)
+               io_unregister_personality(ctx, index);
        mutex_unlock(&ctx->uring_lock);
 
        io_kill_timeouts(ctx, NULL, NULL);
        io_poll_remove_all(ctx, NULL, NULL);
 
-       if (ctx->io_wq)
-               io_wq_cancel_cb(ctx->io_wq, io_cancel_ctx_cb, ctx, true);
-
        /* if we failed setting up the ctx, we might not have any rings */
        io_iopoll_try_reap_events(ctx);
 
@@ -8921,11 +8689,11 @@ static bool io_cancel_task_cb(struct io_wq_work *work, void *data)
        return ret;
 }
 
-static void io_cancel_defer_files(struct io_ring_ctx *ctx,
+static bool io_cancel_defer_files(struct io_ring_ctx *ctx,
                                  struct task_struct *task,
                                  struct files_struct *files)
 {
-       struct io_defer_entry *de = NULL;
+       struct io_defer_entry *de;
        LIST_HEAD(list);
 
        spin_lock_irq(&ctx->completion_lock);
@@ -8936,6 +8704,8 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
                }
        }
        spin_unlock_irq(&ctx->completion_lock);
+       if (list_empty(&list))
+               return false;
 
        while (!list_empty(&list)) {
                de = list_first_entry(&list, struct io_defer_entry, list);
@@ -8945,6 +8715,38 @@ static void io_cancel_defer_files(struct io_ring_ctx *ctx,
                io_req_complete(de->req, -ECANCELED);
                kfree(de);
        }
+       return true;
+}
+
+static bool io_cancel_ctx_cb(struct io_wq_work *work, void *data)
+{
+       struct io_kiocb *req = container_of(work, struct io_kiocb, work);
+
+       return req->ctx == data;
+}
+
+static bool io_uring_try_cancel_iowq(struct io_ring_ctx *ctx)
+{
+       struct io_tctx_node *node;
+       enum io_wq_cancel cret;
+       bool ret = false;
+
+       mutex_lock(&ctx->uring_lock);
+       list_for_each_entry(node, &ctx->tctx_list, ctx_node) {
+               struct io_uring_task *tctx = node->task->io_uring;
+
+               /*
+                * io_wq will stay alive while we hold uring_lock, because it's
+                * killed after ctx nodes, which requires to take the lock.
+                */
+               if (!tctx || !tctx->io_wq)
+                       continue;
+               cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_ctx_cb, ctx, true);
+               ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
+       }
+       mutex_unlock(&ctx->uring_lock);
+
+       return ret;
 }
 
 static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
@@ -8952,28 +8754,38 @@ static void io_uring_try_cancel_requests(struct io_ring_ctx *ctx,
                                         struct files_struct *files)
 {
        struct io_task_cancel cancel = { .task = task, .files = files, };
+       struct io_uring_task *tctx = task ? task->io_uring : NULL;
 
        while (1) {
                enum io_wq_cancel cret;
                bool ret = false;
 
-               if (ctx->io_wq) {
-                       cret = io_wq_cancel_cb(ctx->io_wq, io_cancel_task_cb,
+               if (!task) {
+                       ret |= io_uring_try_cancel_iowq(ctx);
+               } else if (tctx && tctx->io_wq) {
+                       /*
+                        * Cancels requests of all rings, not only @ctx, but
+                        * it's fine as the task is in exit/exec.
+                        */
+                       cret = io_wq_cancel_cb(tctx->io_wq, io_cancel_task_cb,
                                               &cancel, true);
                        ret |= (cret != IO_WQ_CANCEL_NOTFOUND);
                }
 
                /* SQPOLL thread does its own polling */
-               if (!(ctx->flags & IORING_SETUP_SQPOLL) && !files) {
+               if ((!(ctx->flags & IORING_SETUP_SQPOLL) && !files) ||
+                   (ctx->sq_data && ctx->sq_data->thread == current)) {
                        while (!list_empty_careful(&ctx->iopoll_list)) {
                                io_iopoll_try_reap_events(ctx);
                                ret = true;
                        }
                }
 
+               ret |= io_cancel_defer_files(ctx, task, files);
                ret |= io_poll_remove_all(ctx, task, files);
                ret |= io_kill_timeouts(ctx, task, files);
                ret |= io_run_task_work();
+               ret |= io_run_ctx_fallback(ctx);
                io_cqring_overflow_flush(ctx, true, task, files);
                if (!ret)
                        break;
@@ -9009,156 +8821,166 @@ static void io_uring_cancel_files(struct io_ring_ctx *ctx,
 
                io_uring_try_cancel_requests(ctx, task, files);
 
-               if (ctx->sq_data)
-                       io_sq_thread_unpark(ctx->sq_data);
                prepare_to_wait(&task->io_uring->wait, &wait,
                                TASK_UNINTERRUPTIBLE);
                if (inflight == io_uring_count_inflight(ctx, task, files))
                        schedule();
                finish_wait(&task->io_uring->wait, &wait);
-               if (ctx->sq_data)
-                       io_sq_thread_park(ctx->sq_data);
-       }
-}
-
-static void io_disable_sqo_submit(struct io_ring_ctx *ctx)
-{
-       mutex_lock(&ctx->uring_lock);
-       ctx->sqo_dead = 1;
-       mutex_unlock(&ctx->uring_lock);
-
-       /* make sure callers enter the ring to get error */
-       if (ctx->rings)
-               io_ring_set_wakeup_flag(ctx);
-}
-
-/*
- * We need to iteratively cancel requests, in case a request has dependent
- * hard links. These persist even for failure of cancelations, hence keep
- * looping until none are found.
- */
-static void io_uring_cancel_task_requests(struct io_ring_ctx *ctx,
-                                         struct files_struct *files)
-{
-       struct task_struct *task = current;
-
-       if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
-               io_disable_sqo_submit(ctx);
-               task = ctx->sq_data->thread;
-               atomic_inc(&task->io_uring->in_idle);
-               io_sq_thread_park(ctx->sq_data);
-       }
-
-       io_cancel_defer_files(ctx, task, files);
-
-       io_uring_cancel_files(ctx, task, files);
-       if (!files)
-               io_uring_try_cancel_requests(ctx, task, NULL);
-
-       if ((ctx->flags & IORING_SETUP_SQPOLL) && ctx->sq_data) {
-               atomic_dec(&task->io_uring->in_idle);
-               io_sq_thread_unpark(ctx->sq_data);
        }
 }
 
 /*
  * Note that this task has used io_uring. We use it for cancelation purposes.
  */
-static int io_uring_add_task_file(struct io_ring_ctx *ctx, struct file *file)
+static int io_uring_add_task_file(struct io_ring_ctx *ctx)
 {
        struct io_uring_task *tctx = current->io_uring;
+       struct io_tctx_node *node;
        int ret;
 
        if (unlikely(!tctx)) {
-               ret = io_uring_alloc_task_context(current);
+               ret = io_uring_alloc_task_context(current, ctx);
                if (unlikely(ret))
                        return ret;
                tctx = current->io_uring;
        }
-       if (tctx->last != file) {
-               void *old = xa_load(&tctx->xa, (unsigned long)file);
+       if (tctx->last != ctx) {
+               void *old = xa_load(&tctx->xa, (unsigned long)ctx);
 
                if (!old) {
-                       get_file(file);
-                       ret = xa_err(xa_store(&tctx->xa, (unsigned long)file,
-                                               file, GFP_KERNEL));
+                       node = kmalloc(sizeof(*node), GFP_KERNEL);
+                       if (!node)
+                               return -ENOMEM;
+                       node->ctx = ctx;
+                       node->task = current;
+
+                       ret = xa_err(xa_store(&tctx->xa, (unsigned long)ctx,
+                                               node, GFP_KERNEL));
                        if (ret) {
-                               fput(file);
+                               kfree(node);
                                return ret;
                        }
 
-                       /* one and only SQPOLL file note, held by sqo_task */
-                       WARN_ON_ONCE((ctx->flags & IORING_SETUP_SQPOLL) &&
-                                    current != ctx->sqo_task);
+                       mutex_lock(&ctx->uring_lock);
+                       list_add(&node->ctx_node, &ctx->tctx_list);
+                       mutex_unlock(&ctx->uring_lock);
                }
-               tctx->last = file;
+               tctx->last = ctx;
        }
-
-       /*
-        * This is race safe in that the task itself is doing this, hence it
-        * cannot be going through the exit/cancel paths at the same time.
-        * This cannot be modified while exit/cancel is running.
-        */
-       if (!tctx->sqpoll && (ctx->flags & IORING_SETUP_SQPOLL))
-               tctx->sqpoll = true;
-
        return 0;
 }
 
 /*
  * Remove this io_uring_file -> task mapping.
  */
-static void io_uring_del_task_file(struct file *file)
+static void io_uring_del_task_file(unsigned long index)
 {
        struct io_uring_task *tctx = current->io_uring;
+       struct io_tctx_node *node;
+
+       if (!tctx)
+               return;
+       node = xa_erase(&tctx->xa, index);
+       if (!node)
+               return;
+
+       WARN_ON_ONCE(current != node->task);
+       WARN_ON_ONCE(list_empty(&node->ctx_node));
 
-       if (tctx->last == file)
+       mutex_lock(&node->ctx->uring_lock);
+       list_del(&node->ctx_node);
+       mutex_unlock(&node->ctx->uring_lock);
+
+       if (tctx->last == node->ctx)
                tctx->last = NULL;
-       file = xa_erase(&tctx->xa, (unsigned long)file);
-       if (file)
-               fput(file);
+       kfree(node);
 }
 
-static void io_uring_remove_task_files(struct io_uring_task *tctx)
+static void io_uring_clean_tctx(struct io_uring_task *tctx)
 {
-       struct file *file;
+       struct io_tctx_node *node;
        unsigned long index;
 
-       xa_for_each(&tctx->xa, index, file)
-               io_uring_del_task_file(file);
+       xa_for_each(&tctx->xa, index, node)
+               io_uring_del_task_file(index);
+       if (tctx->io_wq) {
+               io_wq_put_and_exit(tctx->io_wq);
+               tctx->io_wq = NULL;
+       }
+}
+
+static s64 tctx_inflight(struct io_uring_task *tctx)
+{
+       return percpu_counter_sum(&tctx->inflight);
+}
+
+static void io_sqpoll_cancel_cb(struct callback_head *cb)
+{
+       struct io_tctx_exit *work = container_of(cb, struct io_tctx_exit, task_work);
+       struct io_ring_ctx *ctx = work->ctx;
+       struct io_sq_data *sqd = ctx->sq_data;
+
+       if (sqd->thread)
+               io_uring_cancel_sqpoll(ctx);
+       complete(&work->completion);
+}
+
+static void io_sqpoll_cancel_sync(struct io_ring_ctx *ctx)
+{
+       struct io_sq_data *sqd = ctx->sq_data;
+       struct io_tctx_exit work = { .ctx = ctx, };
+       struct task_struct *task;
+
+       io_sq_thread_park(sqd);
+       list_del_init(&ctx->sqd_list);
+       io_sqd_update_thread_idle(sqd);
+       task = sqd->thread;
+       if (task) {
+               init_completion(&work.completion);
+               init_task_work(&work.task_work, io_sqpoll_cancel_cb);
+               io_task_work_add_head(&sqd->park_task_work, &work.task_work);
+               wake_up_process(task);
+       }
+       io_sq_thread_unpark(sqd);
+
+       if (task)
+               wait_for_completion(&work.completion);
 }
 
 void __io_uring_files_cancel(struct files_struct *files)
 {
        struct io_uring_task *tctx = current->io_uring;
-       struct file *file;
+       struct io_tctx_node *node;
        unsigned long index;
 
        /* make sure overflow events are dropped */
        atomic_inc(&tctx->in_idle);
-       xa_for_each(&tctx->xa, index, file)
-               io_uring_cancel_task_requests(file->private_data, files);
+       xa_for_each(&tctx->xa, index, node) {
+               struct io_ring_ctx *ctx = node->ctx;
+
+               if (ctx->sq_data) {
+                       io_sqpoll_cancel_sync(ctx);
+                       continue;
+               }
+               io_uring_cancel_files(ctx, current, files);
+               if (!files)
+                       io_uring_try_cancel_requests(ctx, current, NULL);
+       }
        atomic_dec(&tctx->in_idle);
 
        if (files)
-               io_uring_remove_task_files(tctx);
-}
-
-static s64 tctx_inflight(struct io_uring_task *tctx)
-{
-       return percpu_counter_sum(&tctx->inflight);
+               io_uring_clean_tctx(tctx);
 }
 
+/* should only be called by SQPOLL task */
 static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
 {
-       struct io_uring_task *tctx;
+       struct io_sq_data *sqd = ctx->sq_data;
+       struct io_uring_task *tctx = current->io_uring;
        s64 inflight;
        DEFINE_WAIT(wait);
 
-       if (!ctx->sq_data)
-               return;
-       tctx = ctx->sq_data->thread->io_uring;
-       io_disable_sqo_submit(ctx);
+       WARN_ON_ONCE(!sqd || ctx->sq_data->thread != current);
 
        atomic_inc(&tctx->in_idle);
        do {
@@ -9166,7 +8988,7 @@ static void io_uring_cancel_sqpoll(struct io_ring_ctx *ctx)
                inflight = tctx_inflight(tctx);
                if (!inflight)
                        break;
-               io_uring_cancel_task_requests(ctx, NULL);
+               io_uring_try_cancel_requests(ctx, current, NULL);
 
                prepare_to_wait(&tctx->wait, &wait, TASK_UNINTERRUPTIBLE);
                /*
@@ -9193,15 +9015,7 @@ void __io_uring_task_cancel(void)
 
        /* make sure overflow events are dropped */
        atomic_inc(&tctx->in_idle);
-
-       /* trigger io_disable_sqo_submit() */
-       if (tctx->sqpoll) {
-               struct file *file;
-               unsigned long index;
-
-               xa_for_each(&tctx->xa, index, file)
-                       io_uring_cancel_sqpoll(file->private_data);
-       }
+       __io_uring_files_cancel(NULL);
 
        do {
                /* read completions before cancelations */
@@ -9224,47 +9038,9 @@ void __io_uring_task_cancel(void)
 
        atomic_dec(&tctx->in_idle);
 
-       io_uring_remove_task_files(tctx);
-}
-
-static int io_uring_flush(struct file *file, void *data)
-{
-       struct io_uring_task *tctx = current->io_uring;
-       struct io_ring_ctx *ctx = file->private_data;
-
-       if (fatal_signal_pending(current) || (current->flags & PF_EXITING)) {
-               io_uring_cancel_task_requests(ctx, NULL);
-               io_req_caches_free(ctx, current);
-       }
-
-       if (!tctx)
-               return 0;
-
-       /* we should have cancelled and erased it before PF_EXITING */
-       WARN_ON_ONCE((current->flags & PF_EXITING) &&
-                    xa_load(&tctx->xa, (unsigned long)file));
-
-       /*
-        * fput() is pending, will be 2 if the only other ref is our potential
-        * task file note. If the task is exiting, drop regardless of count.
-        */
-       if (atomic_long_read(&file->f_count) != 2)
-               return 0;
-
-       if (ctx->flags & IORING_SETUP_SQPOLL) {
-               /* there is only one file note, which is owned by sqo_task */
-               WARN_ON_ONCE(ctx->sqo_task != current &&
-                            xa_load(&tctx->xa, (unsigned long)file));
-               /* sqo_dead check is for when this happens after cancellation */
-               WARN_ON_ONCE(ctx->sqo_task == current && !ctx->sqo_dead &&
-                            !xa_load(&tctx->xa, (unsigned long)file));
-
-               io_disable_sqo_submit(ctx);
-       }
-
-       if (!(ctx->flags & IORING_SETUP_SQPOLL) || ctx->sqo_task == current)
-               io_uring_del_task_file(file);
-       return 0;
+       io_uring_clean_tctx(tctx);
+       /* all current's requests should be gone, we can kill tctx */
+       __io_uring_free(current);
 }
 
 static void *io_uring_validate_mmap_request(struct file *file,
@@ -9339,29 +9115,20 @@ static unsigned long io_uring_nommu_get_unmapped_area(struct file *file,
 
 static int io_sqpoll_wait_sq(struct io_ring_ctx *ctx)
 {
-       int ret = 0;
        DEFINE_WAIT(wait);
 
        do {
                if (!io_sqring_full(ctx))
                        break;
-
                prepare_to_wait(&ctx->sqo_sq_wait, &wait, TASK_INTERRUPTIBLE);
 
-               if (unlikely(ctx->sqo_dead)) {
-                       ret = -EOWNERDEAD;
-                       goto out;
-               }
-
                if (!io_sqring_full(ctx))
                        break;
-
                schedule();
        } while (!signal_pending(current));
 
        finish_wait(&ctx->sqo_sq_wait, &wait);
-out:
-       return ret;
+       return 0;
 }
 
 static int io_get_ext_arg(unsigned flags, const void __user *argp, size_t *argsz,
@@ -9436,8 +9203,9 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                io_cqring_overflow_flush(ctx, false, NULL, NULL);
 
                ret = -EOWNERDEAD;
-               if (unlikely(ctx->sqo_dead))
+               if (unlikely(ctx->sq_data->thread == NULL)) {
                        goto out;
+               }
                if (flags & IORING_ENTER_SQ_WAKEUP)
                        wake_up(&ctx->sq_data->wait);
                if (flags & IORING_ENTER_SQ_WAIT) {
@@ -9447,7 +9215,7 @@ SYSCALL_DEFINE6(io_uring_enter, unsigned int, fd, u32, to_submit,
                }
                submitted = to_submit;
        } else if (to_submit) {
-               ret = io_uring_add_task_file(ctx, f.file);
+               ret = io_uring_add_task_file(ctx);
                if (unlikely(ret))
                        goto out;
                mutex_lock(&ctx->uring_lock);
@@ -9489,11 +9257,9 @@ out_fput:
 }
 
 #ifdef CONFIG_PROC_FS
-static int io_uring_show_cred(int id, void *p, void *data)
+static int io_uring_show_cred(struct seq_file *m, unsigned int id,
+               const struct cred *cred)
 {
-       struct io_identity *iod = p;
-       const struct cred *cred = iod->creds;
-       struct seq_file *m = data;
        struct user_namespace *uns = seq_user_ns(m);
        struct group_info *gi;
        kernel_cap_t cap;
@@ -9537,8 +9303,11 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
         */
        has_lock = mutex_trylock(&ctx->uring_lock);
 
-       if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL))
+       if (has_lock && (ctx->flags & IORING_SETUP_SQPOLL)) {
                sq = ctx->sq_data;
+               if (!sq->thread)
+                       sq = NULL;
+       }
 
        seq_printf(m, "SqThread:\t%d\n", sq ? task_pid_nr(sq->thread) : -1);
        seq_printf(m, "SqThreadCpu:\t%d\n", sq ? task_cpu(sq->thread) : -1);
@@ -9558,9 +9327,13 @@ static void __io_uring_show_fdinfo(struct io_ring_ctx *ctx, struct seq_file *m)
                seq_printf(m, "%5u: 0x%llx/%u\n", i, buf->ubuf,
                                                (unsigned int) buf->len);
        }
-       if (has_lock && !idr_is_empty(&ctx->personality_idr)) {
+       if (has_lock && !xa_empty(&ctx->personalities)) {
+               unsigned long index;
+               const struct cred *cred;
+
                seq_printf(m, "Personalities:\n");
-               idr_for_each(&ctx->personality_idr, io_uring_show_cred, m);
+               xa_for_each(&ctx->personalities, index, cred)
+                       io_uring_show_cred(m, index, cred);
        }
        seq_printf(m, "PollList:\n");
        spin_lock_irq(&ctx->completion_lock);
@@ -9590,7 +9363,6 @@ static void io_uring_show_fdinfo(struct seq_file *m, struct file *f)
 
 static const struct file_operations io_uring_fops = {
        .release        = io_uring_release,
-       .flush          = io_uring_flush,
        .mmap           = io_uring_mmap,
 #ifndef CONFIG_MMU
        .get_unmapped_area = io_uring_nommu_get_unmapped_area,
@@ -9655,7 +9427,7 @@ static int io_uring_install_fd(struct io_ring_ctx *ctx, struct file *file)
        if (fd < 0)
                return fd;
 
-       ret = io_uring_add_task_file(ctx, file);
+       ret = io_uring_add_task_file(ctx);
        if (ret) {
                put_unused_fd(fd);
                return ret;
@@ -9698,7 +9470,6 @@ static struct file *io_uring_get_file(struct io_ring_ctx *ctx)
 static int io_uring_create(unsigned entries, struct io_uring_params *p,
                           struct io_uring_params __user *params)
 {
-       struct user_struct *user = NULL;
        struct io_ring_ctx *ctx;
        struct file *file;
        int ret;
@@ -9740,22 +9511,12 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
                p->cq_entries = 2 * p->sq_entries;
        }
 
-       user = get_uid(current_user());
-
        ctx = io_ring_ctx_alloc(p);
-       if (!ctx) {
-               free_uid(user);
+       if (!ctx)
                return -ENOMEM;
-       }
        ctx->compat = in_compat_syscall();
-       ctx->limit_mem = !capable(CAP_IPC_LOCK);
-       ctx->user = user;
-       ctx->creds = get_current_cred();
-#ifdef CONFIG_AUDIT
-       ctx->loginuid = current->loginuid;
-       ctx->sessionid = current->sessionid;
-#endif
-       ctx->sqo_task = get_task_struct(current);
+       if (!capable(CAP_IPC_LOCK))
+               ctx->user = get_uid(current_user());
 
        /*
         * This is just grabbed for accounting purposes. When a process exits,
@@ -9766,24 +9527,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
        mmgrab(current->mm);
        ctx->mm_account = current->mm;
 
-#ifdef CONFIG_BLK_CGROUP
-       /*
-        * The sq thread will belong to the original cgroup it was inited in.
-        * If the cgroup goes offline (e.g. disabling the io controller), then
-        * issued bios will be associated with the closest cgroup later in the
-        * block layer.
-        */
-       rcu_read_lock();
-       ctx->sqo_blkcg_css = blkcg_css();
-       ret = css_tryget_online(ctx->sqo_blkcg_css);
-       rcu_read_unlock();
-       if (!ret) {
-               /* don't init against a dying cgroup, have the user try again */
-               ctx->sqo_blkcg_css = NULL;
-               ret = -ENODEV;
-               goto err;
-       }
-#endif
        ret = io_allocate_scq_urings(ctx, p);
        if (ret)
                goto err;
@@ -9792,9 +9535,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
        if (ret)
                goto err;
 
-       if (!(p->flags & IORING_SETUP_R_DISABLED))
-               io_sq_offload_start(ctx);
-
        memset(&p->sq_off, 0, sizeof(p->sq_off));
        p->sq_off.head = offsetof(struct io_rings, sq.head);
        p->sq_off.tail = offsetof(struct io_rings, sq.tail);
@@ -9817,7 +9557,7 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
                        IORING_FEAT_SUBMIT_STABLE | IORING_FEAT_RW_CUR_POS |
                        IORING_FEAT_CUR_PERSONALITY | IORING_FEAT_FAST_POLL |
                        IORING_FEAT_POLL_32BITS | IORING_FEAT_SQPOLL_NONFIXED |
-                       IORING_FEAT_EXT_ARG;
+                       IORING_FEAT_EXT_ARG | IORING_FEAT_NATIVE_WORKERS;
 
        if (copy_to_user(params, p, sizeof(*p))) {
                ret = -EFAULT;
@@ -9836,7 +9576,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
         */
        ret = io_uring_install_fd(ctx, file);
        if (ret < 0) {
-               io_disable_sqo_submit(ctx);
                /* fput will clean it up */
                fput(file);
                return ret;
@@ -9845,7 +9584,6 @@ static int io_uring_create(unsigned entries, struct io_uring_params *p,
        trace_io_uring_create(ret, ctx, p->sq_entries, p->cq_entries, p->flags);
        return ret;
 err:
-       io_disable_sqo_submit(ctx);
        io_ring_ctx_wait_and_kill(ctx);
        return ret;
 }
@@ -9923,21 +9661,17 @@ out:
 
 static int io_register_personality(struct io_ring_ctx *ctx)
 {
-       struct io_identity *id;
+       const struct cred *creds;
+       u32 id;
        int ret;
 
-       id = kmalloc(sizeof(*id), GFP_KERNEL);
-       if (unlikely(!id))
-               return -ENOMEM;
-
-       io_init_identity(id);
-       id->creds = get_current_cred();
+       creds = get_current_cred();
 
-       ret = idr_alloc_cyclic(&ctx->personality_idr, id, 1, USHRT_MAX, GFP_KERNEL);
-       if (ret < 0) {
-               put_cred(id->creds);
-               kfree(id);
-       }
+       ret = xa_alloc_cyclic(&ctx->personalities, &id, (void *)creds,
+                       XA_LIMIT(0, USHRT_MAX), &ctx->pers_next, GFP_KERNEL);
+       if (!ret)
+               return id;
+       put_cred(creds);
        return ret;
 }
 
@@ -10020,9 +9754,8 @@ static int io_register_enable_rings(struct io_ring_ctx *ctx)
                ctx->restricted = 1;
 
        ctx->flags &= ~IORING_SETUP_R_DISABLED;
-
-       io_sq_offload_start(ctx);
-
+       if (ctx->sq_data && wq_has_sleeper(&ctx->sq_data->wait))
+               wake_up(&ctx->sq_data->wait);
        return 0;
 }
 
@@ -10196,6 +9929,8 @@ SYSCALL_DEFINE4(io_uring_register, unsigned int, fd, unsigned int, opcode,
 
        ctx = f.file->private_data;
 
+       io_run_task_work();
+
        mutex_lock(&ctx->uring_lock);
        ret = __io_uring_register(ctx, opcode, arg, nr_args);
        mutex_unlock(&ctx->uring_lock);
index 16a1e82..414769a 100644 (file)
@@ -278,14 +278,14 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
        if (!is_contig || bio_full(ctx->bio, plen)) {
                gfp_t gfp = mapping_gfp_constraint(page->mapping, GFP_KERNEL);
                gfp_t orig_gfp = gfp;
-               int nr_vecs = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
+               unsigned int nr_vecs = DIV_ROUND_UP(length, PAGE_SIZE);
 
                if (ctx->bio)
                        submit_bio(ctx->bio);
 
                if (ctx->rac) /* same as readahead_gfp_mask */
                        gfp |= __GFP_NORETRY | __GFP_NOWARN;
-               ctx->bio = bio_alloc(gfp, min(BIO_MAX_PAGES, nr_vecs));
+               ctx->bio = bio_alloc(gfp, bio_max_segs(nr_vecs));
                /*
                 * If the bio_alloc fails, try it again for a single page to
                 * avoid having to deal with partial page reads.  This emulates
@@ -1221,7 +1221,7 @@ iomap_alloc_ioend(struct inode *inode, struct iomap_writepage_ctx *wpc,
        struct iomap_ioend *ioend;
        struct bio *bio;
 
-       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_PAGES, &iomap_ioend_bioset);
+       bio = bio_alloc_bioset(GFP_NOFS, BIO_MAX_VECS, &iomap_ioend_bioset);
        bio_set_dev(bio, wpc->iomap.bdev);
        bio->bi_iter.bi_sector = sector;
        bio->bi_opf = REQ_OP_WRITE | wbc_to_write_flags(wbc);
@@ -1252,7 +1252,7 @@ iomap_chain_bio(struct bio *prev)
 {
        struct bio *new;
 
-       new = bio_alloc(GFP_NOFS, BIO_MAX_PAGES);
+       new = bio_alloc(GFP_NOFS, BIO_MAX_VECS);
        bio_copy_dev(new, prev);/* also copies over blkcg information */
        new->bi_iter.bi_sector = bio_end_sector(prev);
        new->bi_opf = prev->bi_opf;
@@ -1459,13 +1459,6 @@ iomap_do_writepage(struct page *page, struct writeback_control *wbc, void *data)
                goto redirty;
 
        /*
-        * Given that we do not allow direct reclaim to call us, we should
-        * never be called in a recursive filesystem reclaim context.
-        */
-       if (WARN_ON_ONCE(current->flags & PF_MEMALLOC_NOFS))
-               goto redirty;
-
-       /*
         * Is this page beyond the end of the file?
         *
         * The page index is less than the end_index, adjust the end_offset
index e2c4991..bdd0d89 100644 (file)
@@ -296,7 +296,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
         */
        bio_opf = iomap_dio_bio_opflags(dio, iomap, use_fua);
 
-       nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_PAGES);
+       nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter, BIO_MAX_VECS);
        do {
                size_t n;
                if (dio->error) {
@@ -338,7 +338,7 @@ iomap_dio_bio_actor(struct inode *inode, loff_t pos, loff_t length,
                copied += n;
 
                nr_pages = bio_iov_vecs_to_alloc(dio->submit.iter,
-                                                BIO_MAX_PAGES);
+                                                BIO_MAX_VECS);
                iomap_dio_submit_bio(dio, iomap, bio, pos);
                pos += n;
        } while (nr_pages);
index 107ee80..dab1b02 100644 (file)
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 
-/*
- * Seek for SEEK_DATA / SEEK_HOLE within @page, starting at @lastoff.
- * Returns true if found and updates @lastoff to the offset in file.
- */
-static bool
-page_seek_hole_data(struct inode *inode, struct page *page, loff_t *lastoff,
-               int whence)
-{
-       const struct address_space_operations *ops = inode->i_mapping->a_ops;
-       unsigned int bsize = i_blocksize(inode), off;
-       bool seek_data = whence == SEEK_DATA;
-       loff_t poff = page_offset(page);
-
-       if (WARN_ON_ONCE(*lastoff >= poff + PAGE_SIZE))
-               return false;
-
-       if (*lastoff < poff) {
-               /*
-                * Last offset smaller than the start of the page means we found
-                * a hole:
-                */
-               if (whence == SEEK_HOLE)
-                       return true;
-               *lastoff = poff;
-       }
-
-       /*
-        * Just check the page unless we can and should check block ranges:
-        */
-       if (bsize == PAGE_SIZE || !ops->is_partially_uptodate)
-               return PageUptodate(page) == seek_data;
-
-       lock_page(page);
-       if (unlikely(page->mapping != inode->i_mapping))
-               goto out_unlock_not_found;
-
-       for (off = 0; off < PAGE_SIZE; off += bsize) {
-               if (offset_in_page(*lastoff) >= off + bsize)
-                       continue;
-               if (ops->is_partially_uptodate(page, off, bsize) == seek_data) {
-                       unlock_page(page);
-                       return true;
-               }
-               *lastoff = poff + off + bsize;
-       }
-
-out_unlock_not_found:
-       unlock_page(page);
-       return false;
-}
-
-/*
- * Seek for SEEK_DATA / SEEK_HOLE in the page cache.
- *
- * Within unwritten extents, the page cache determines which parts are holes
- * and which are data: uptodate buffer heads count as data; everything else
- * counts as a hole.
- *
- * Returns the resulting offset on successs, and -ENOENT otherwise.
- */
 static loff_t
-page_cache_seek_hole_data(struct inode *inode, loff_t offset, loff_t length,
-               int whence)
-{
-       pgoff_t index = offset >> PAGE_SHIFT;
-       pgoff_t end = DIV_ROUND_UP(offset + length, PAGE_SIZE);
-       loff_t lastoff = offset;
-       struct pagevec pvec;
-
-       if (length <= 0)
-               return -ENOENT;
-
-       pagevec_init(&pvec);
-
-       do {
-               unsigned nr_pages, i;
-
-               nr_pages = pagevec_lookup_range(&pvec, inode->i_mapping, &index,
-                                               end - 1);
-               if (nr_pages == 0)
-                       break;
-
-               for (i = 0; i < nr_pages; i++) {
-                       struct page *page = pvec.pages[i];
-
-                       if (page_seek_hole_data(inode, page, &lastoff, whence))
-                               goto check_range;
-                       lastoff = page_offset(page) + PAGE_SIZE;
-               }
-               pagevec_release(&pvec);
-       } while (index < end);
-
-       /* When no page at lastoff and we are not done, we found a hole. */
-       if (whence != SEEK_HOLE)
-               goto not_found;
-
-check_range:
-       if (lastoff < offset + length)
-               goto out;
-not_found:
-       lastoff = -ENOENT;
-out:
-       pagevec_release(&pvec);
-       return lastoff;
-}
-
-
-static loff_t
-iomap_seek_hole_actor(struct inode *inode, loff_t offset, loff_t length,
+iomap_seek_hole_actor(struct inode *inode, loff_t start, loff_t length,
                      void *data, struct iomap *iomap, struct iomap *srcmap)
 {
+       loff_t offset = start;
+
        switch (iomap->type) {
        case IOMAP_UNWRITTEN:
-               offset = page_cache_seek_hole_data(inode, offset, length,
-                                                  SEEK_HOLE);
-               if (offset < 0)
+               offset = mapping_seek_hole_data(inode->i_mapping, start,
+                               start + length, SEEK_HOLE);
+               if (offset == start + length)
                        return length;
                fallthrough;
        case IOMAP_HOLE:
@@ -164,15 +59,17 @@ iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
 EXPORT_SYMBOL_GPL(iomap_seek_hole);
 
 static loff_t
-iomap_seek_data_actor(struct inode *inode, loff_t offset, loff_t length,
+iomap_seek_data_actor(struct inode *inode, loff_t start, loff_t length,
                      void *data, struct iomap *iomap, struct iomap *srcmap)
 {
+       loff_t offset = start;
+
        switch (iomap->type) {
        case IOMAP_HOLE:
                return length;
        case IOMAP_UNWRITTEN:
-               offset = page_cache_seek_hole_data(inode, offset, length,
-                                                  SEEK_DATA);
+               offset = mapping_seek_hole_data(inode->i_mapping, start,
+                               start + length, SEEK_DATA);
                if (offset < 0)
                        return length;
                fallthrough;
index a648dbf..a5e478d 100644 (file)
@@ -170,6 +170,16 @@ int iomap_swapfile_activate(struct swap_info_struct *sis,
                        return ret;
        }
 
+       /*
+        * If this swapfile doesn't contain even a single page-aligned
+        * contiguous range of blocks, reject this useless swapfile to
+        * prevent confusion later on.
+        */
+       if (isi.nr_pages == 0) {
+               pr_warn("swapon: Cannot find a single usable page in file.\n");
+               return -EINVAL;
+       }
+
        *pagespan = 1 + isi.highest_ppage - isi.lowest_ppage;
        sis->max = isi.nr_pages;
        sis->pages = isi.nr_pages - 1;
index 093ffbd..55a79df 100644 (file)
@@ -226,7 +226,8 @@ static int __jffs2_set_acl(struct inode *inode, int xprefix, struct posix_acl *a
        return rc;
 }
 
-int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                 struct posix_acl *acl, int type)
 {
        int rc, xprefix;
 
@@ -236,7 +237,8 @@ int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type)
                if (acl) {
                        umode_t mode;
 
-                       rc = posix_acl_update_mode(inode, &mode, &acl);
+                       rc = posix_acl_update_mode(&init_user_ns, inode, &mode,
+                                                  &acl);
                        if (rc)
                                return rc;
                        if (inode->i_mode != mode) {
index 12d0271..62c50da 100644 (file)
@@ -28,7 +28,8 @@ struct jffs2_acl_header {
 #ifdef CONFIG_JFFS2_FS_POSIX_ACL
 
 struct posix_acl *jffs2_get_acl(struct inode *inode, int type);
-int jffs2_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int jffs2_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                 struct posix_acl *acl, int type);
 extern int jffs2_init_acl_pre(struct inode *, struct inode *, umode_t *);
 extern int jffs2_init_acl_post(struct inode *);
 
index 7764937..c0aabbc 100644 (file)
 
 static int jffs2_readdir (struct file *, struct dir_context *);
 
-static int jffs2_create (struct inode *,struct dentry *,umode_t,
-                        bool);
+static int jffs2_create (struct user_namespace *, struct inode *,
+                        struct dentry *, umode_t, bool);
 static struct dentry *jffs2_lookup (struct inode *,struct dentry *,
                                    unsigned int);
 static int jffs2_link (struct dentry *,struct inode *,struct dentry *);
 static int jffs2_unlink (struct inode *,struct dentry *);
-static int jffs2_symlink (struct inode *,struct dentry *,const char *);
-static int jffs2_mkdir (struct inode *,struct dentry *,umode_t);
+static int jffs2_symlink (struct user_namespace *, struct inode *,
+                         struct dentry *, const char *);
+static int jffs2_mkdir (struct user_namespace *, struct inode *,struct dentry *,
+                       umode_t);
 static int jffs2_rmdir (struct inode *,struct dentry *);
-static int jffs2_mknod (struct inode *,struct dentry *,umode_t,dev_t);
-static int jffs2_rename (struct inode *, struct dentry *,
-                        struct inode *, struct dentry *,
+static int jffs2_mknod (struct user_namespace *, struct inode *,struct dentry *,
+                       umode_t,dev_t);
+static int jffs2_rename (struct user_namespace *, struct inode *,
+                        struct dentry *, struct inode *, struct dentry *,
                         unsigned int);
 
 const struct file_operations jffs2_dir_operations =
@@ -157,8 +160,8 @@ static int jffs2_readdir(struct file *file, struct dir_context *ctx)
 /***********************************************************************/
 
 
-static int jffs2_create(struct inode *dir_i, struct dentry *dentry,
-                       umode_t mode, bool excl)
+static int jffs2_create(struct user_namespace *mnt_userns, struct inode *dir_i,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct jffs2_raw_inode *ri;
        struct jffs2_inode_info *f, *dir_f;
@@ -276,7 +279,8 @@ static int jffs2_link (struct dentry *old_dentry, struct inode *dir_i, struct de
 
 /***********************************************************************/
 
-static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char *target)
+static int jffs2_symlink (struct user_namespace *mnt_userns, struct inode *dir_i,
+                         struct dentry *dentry, const char *target)
 {
        struct jffs2_inode_info *f, *dir_f;
        struct jffs2_sb_info *c;
@@ -438,7 +442,8 @@ static int jffs2_symlink (struct inode *dir_i, struct dentry *dentry, const char
 }
 
 
-static int jffs2_mkdir (struct inode *dir_i, struct dentry *dentry, umode_t mode)
+static int jffs2_mkdir (struct user_namespace *mnt_userns, struct inode *dir_i,
+                       struct dentry *dentry, umode_t mode)
 {
        struct jffs2_inode_info *f, *dir_f;
        struct jffs2_sb_info *c;
@@ -609,7 +614,8 @@ static int jffs2_rmdir (struct inode *dir_i, struct dentry *dentry)
        return ret;
 }
 
-static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int jffs2_mknod (struct user_namespace *mnt_userns, struct inode *dir_i,
+                       struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct jffs2_inode_info *f, *dir_f;
        struct jffs2_sb_info *c;
@@ -756,7 +762,8 @@ static int jffs2_mknod (struct inode *dir_i, struct dentry *dentry, umode_t mode
        return ret;
 }
 
-static int jffs2_rename (struct inode *old_dir_i, struct dentry *old_dentry,
+static int jffs2_rename (struct user_namespace *mnt_userns,
+                        struct inode *old_dir_i, struct dentry *old_dentry,
                         struct inode *new_dir_i, struct dentry *new_dentry,
                         unsigned int flags)
 {
index 78858f6..2ac4104 100644 (file)
@@ -190,18 +190,19 @@ int jffs2_do_setattr (struct inode *inode, struct iattr *iattr)
        return 0;
 }
 
-int jffs2_setattr(struct dentry *dentry, struct iattr *iattr)
+int jffs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        int rc;
 
-       rc = setattr_prepare(dentry, iattr);
+       rc = setattr_prepare(&init_user_ns, dentry, iattr);
        if (rc)
                return rc;
 
        rc = jffs2_do_setattr(inode, iattr);
        if (!rc && (iattr->ia_valid & ATTR_MODE))
-               rc = posix_acl_chmod(inode, inode->i_mode);
+               rc = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
 
        return rc;
 }
index ef1cfa6..173ecca 100644 (file)
@@ -164,7 +164,7 @@ long jffs2_ioctl(struct file *, unsigned int, unsigned long);
 extern const struct inode_operations jffs2_symlink_inode_operations;
 
 /* fs.c */
-int jffs2_setattr (struct dentry *, struct iattr *);
+int jffs2_setattr (struct user_namespace *, struct dentry *, struct iattr *);
 int jffs2_do_setattr (struct inode *, struct iattr *);
 struct inode *jffs2_iget(struct super_block *, unsigned long);
 void jffs2_evict_inode (struct inode *);
index c2332e3..aef5522 100644 (file)
@@ -57,6 +57,7 @@ static int jffs2_security_getxattr(const struct xattr_handler *handler,
 }
 
 static int jffs2_security_setxattr(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, const void *buffer,
                                   size_t size, int flags)
index 5d60308..cc3f248 100644 (file)
@@ -25,6 +25,7 @@ static int jffs2_trusted_getxattr(const struct xattr_handler *handler,
 }
 
 static int jffs2_trusted_setxattr(const struct xattr_handler *handler,
+                                 struct user_namespace *mnt_userns,
                                  struct dentry *unused, struct inode *inode,
                                  const char *name, const void *buffer,
                                  size_t size, int flags)
index 9d027b4..fb94597 100644 (file)
@@ -25,6 +25,7 @@ static int jffs2_user_getxattr(const struct xattr_handler *handler,
 }
 
 static int jffs2_user_setxattr(const struct xattr_handler *handler,
+                              struct user_namespace *mnt_userns,
                               struct dentry *unused, struct inode *inode,
                               const char *name, const void *buffer,
                               size_t size, int flags)
index 92cc0ac..43c285c 100644 (file)
@@ -91,7 +91,8 @@ out:
        return rc;
 }
 
-int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+               struct posix_acl *acl, int type)
 {
        int rc;
        tid_t tid;
@@ -101,7 +102,7 @@ int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        tid = txBegin(inode->i_sb, 0);
        mutex_lock(&JFS_IP(inode)->commit_mutex);
        if (type == ACL_TYPE_ACCESS && acl) {
-               rc = posix_acl_update_mode(inode, &mode, &acl);
+               rc = posix_acl_update_mode(&init_user_ns, inode, &mode, &acl);
                if (rc)
                        goto end_tx;
                if (mode != inode->i_mode)
index 930d270..28b70e7 100644 (file)
@@ -85,12 +85,13 @@ static int jfs_release(struct inode *inode, struct file *file)
        return 0;
 }
 
-int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
+int jfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        int rc;
 
-       rc = setattr_prepare(dentry, iattr);
+       rc = setattr_prepare(&init_user_ns, dentry, iattr);
        if (rc)
                return rc;
 
@@ -118,11 +119,11 @@ int jfs_setattr(struct dentry *dentry, struct iattr *iattr)
                jfs_truncate(inode);
        }
 
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
        mark_inode_dirty(inode);
 
        if (iattr->ia_valid & ATTR_MODE)
-               rc = posix_acl_chmod(inode, inode->i_mode);
+               rc = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
        return rc;
 }
 
index 10ee0ec..2581d4d 100644 (file)
@@ -76,7 +76,7 @@ long jfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                if (err)
                        return err;
 
-               if (!inode_owner_or_capable(inode)) {
+               if (!inode_owner_or_capable(&init_user_ns, inode)) {
                        err = -EACCES;
                        goto setflags_out;
                }
index 9f8f92d..7ae389a 100644 (file)
@@ -8,7 +8,8 @@
 #ifdef CONFIG_JFS_POSIX_ACL
 
 struct posix_acl *jfs_get_acl(struct inode *inode, int type);
-int jfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int jfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+               struct posix_acl *acl, int type);
 int jfs_init_acl(tid_t, struct inode *, struct inode *);
 
 #else
index 4cef170..5937908 100644 (file)
@@ -64,7 +64,7 @@ struct inode *ialloc(struct inode *parent, umode_t mode)
                goto fail_put;
        }
 
-       inode_init_owner(inode, parent, mode);
+       inode_init_owner(&init_user_ns, inode, parent, mode);
        /*
         * New inodes need to save sane values on disk when
         * uid & gid mount options are used
index 70a0d12..01daa0c 100644 (file)
@@ -26,7 +26,7 @@ extern struct dentry *jfs_fh_to_parent(struct super_block *sb, struct fid *fid,
        int fh_len, int fh_type);
 extern void jfs_set_inode_flags(struct inode *);
 extern int jfs_get_block(struct inode *, sector_t, struct buffer_head *, int);
-extern int jfs_setattr(struct dentry *, struct iattr *);
+extern int jfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 
 extern const struct address_space_operations jfs_aops;
 extern const struct inode_operations jfs_dir_inode_operations;
index 7a55d14..9abed0d 100644 (file)
@@ -59,8 +59,8 @@ static inline void free_ea_wmap(struct inode *inode)
  * RETURN:     Errors from subroutines
  *
  */
-static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
-               bool excl)
+static int jfs_create(struct user_namespace *mnt_userns, struct inode *dip,
+                     struct dentry *dentry, umode_t mode, bool excl)
 {
        int rc = 0;
        tid_t tid;              /* transaction id */
@@ -192,7 +192,8 @@ static int jfs_create(struct inode *dip, struct dentry *dentry, umode_t mode,
  * note:
  * EACCES: user needs search+write permission on the parent directory
  */
-static int jfs_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
+static int jfs_mkdir(struct user_namespace *mnt_userns, struct inode *dip,
+                    struct dentry *dentry, umode_t mode)
 {
        int rc = 0;
        tid_t tid;              /* transaction id */
@@ -868,8 +869,8 @@ static int jfs_link(struct dentry *old_dentry,
  * an intermediate result whose length exceeds PATH_MAX [XPG4.2]
 */
 
-static int jfs_symlink(struct inode *dip, struct dentry *dentry,
-               const char *name)
+static int jfs_symlink(struct user_namespace *mnt_userns, struct inode *dip,
+                      struct dentry *dentry, const char *name)
 {
        int rc;
        tid_t tid;
@@ -1058,9 +1059,9 @@ static int jfs_symlink(struct inode *dip, struct dentry *dentry,
  *
  * FUNCTION:   rename a file or directory
  */
-static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags)
+static int jfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags)
 {
        struct btstack btstack;
        ino_t ino;
@@ -1344,8 +1345,8 @@ static int jfs_rename(struct inode *old_dir, struct dentry *old_dentry,
  *
  * FUNCTION:   Create a special file (device)
  */
-static int jfs_mknod(struct inode *dir, struct dentry *dentry,
-               umode_t mode, dev_t rdev)
+static int jfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct jfs_inode_info *jfs_ip;
        struct btstack btstack;
index b2dc4d1..1f0ffab 100644 (file)
@@ -551,7 +551,6 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
                ret = -ENOMEM;
                goto out_unload;
        }
-       inode->i_ino = 0;
        inode->i_size = i_size_read(sb->s_bdev->bd_inode);
        inode->i_mapping->a_ops = &jfs_metapage_aops;
        inode_fake_hash(inode);
index db41e78..f9273f6 100644 (file)
@@ -932,6 +932,7 @@ static int jfs_xattr_get(const struct xattr_handler *handler,
 }
 
 static int jfs_xattr_set(const struct xattr_handler *handler,
+                        struct user_namespace *mnt_userns,
                         struct dentry *unused, struct inode *inode,
                         const char *name, const void *value,
                         size_t size, int flags)
@@ -950,6 +951,7 @@ static int jfs_xattr_get_os2(const struct xattr_handler *handler,
 }
 
 static int jfs_xattr_set_os2(const struct xattr_handler *handler,
+                            struct user_namespace *mnt_userns,
                             struct dentry *unused, struct inode *inode,
                             const char *name, const void *value,
                             size_t size, int flags)
index 7a53eed..7e0e62d 100644 (file)
@@ -1110,7 +1110,8 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
        return ret;
 }
 
-static int kernfs_iop_mkdir(struct inode *dir, struct dentry *dentry,
+static int kernfs_iop_mkdir(struct user_namespace *mnt_userns,
+                           struct inode *dir, struct dentry *dentry,
                            umode_t mode)
 {
        struct kernfs_node *parent = dir->i_private;
@@ -1147,7 +1148,8 @@ static int kernfs_iop_rmdir(struct inode *dir, struct dentry *dentry)
        return ret;
 }
 
-static int kernfs_iop_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int kernfs_iop_rename(struct user_namespace *mnt_userns,
+                            struct inode *old_dir, struct dentry *old_dentry,
                             struct inode *new_dir, struct dentry *new_dentry,
                             unsigned int flags)
 {
index fc2469a..d73950f 100644 (file)
@@ -112,7 +112,8 @@ int kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr)
        return ret;
 }
 
-int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
+int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                      struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        struct kernfs_node *kn = inode->i_private;
@@ -122,7 +123,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
                return -EINVAL;
 
        mutex_lock(&kernfs_mutex);
-       error = setattr_prepare(dentry, iattr);
+       error = setattr_prepare(&init_user_ns, dentry, iattr);
        if (error)
                goto out;
 
@@ -131,7 +132,7 @@ int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr)
                goto out;
 
        /* this ignores size changes */
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
 
 out:
        mutex_unlock(&kernfs_mutex);
@@ -183,7 +184,8 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode)
                set_nlink(inode, kn->dir.subdirs + 2);
 }
 
-int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+int kernfs_iop_getattr(struct user_namespace *mnt_userns,
+                      const struct path *path, struct kstat *stat,
                       u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
@@ -193,7 +195,7 @@ int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
        kernfs_refresh_inode(kn, inode);
        mutex_unlock(&kernfs_mutex);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        return 0;
 }
 
@@ -272,7 +274,8 @@ void kernfs_evict_inode(struct inode *inode)
        kernfs_put(kn);
 }
 
-int kernfs_iop_permission(struct inode *inode, int mask)
+int kernfs_iop_permission(struct user_namespace *mnt_userns,
+                         struct inode *inode, int mask)
 {
        struct kernfs_node *kn;
 
@@ -285,7 +288,7 @@ int kernfs_iop_permission(struct inode *inode, int mask)
        kernfs_refresh_inode(kn, inode);
        mutex_unlock(&kernfs_mutex);
 
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
 int kernfs_xattr_get(struct kernfs_node *kn, const char *name,
@@ -319,6 +322,7 @@ static int kernfs_vfs_xattr_get(const struct xattr_handler *handler,
 }
 
 static int kernfs_vfs_xattr_set(const struct xattr_handler *handler,
+                               struct user_namespace *mnt_userns,
                                struct dentry *unused, struct inode *inode,
                                const char *suffix, const void *value,
                                size_t size, int flags)
@@ -385,6 +389,7 @@ static int kernfs_vfs_user_xattr_rm(struct kernfs_node *kn,
 }
 
 static int kernfs_vfs_user_xattr_set(const struct xattr_handler *handler,
+                                    struct user_namespace *mnt_userns,
                                     struct dentry *unused, struct inode *inode,
                                     const char *suffix, const void *value,
                                     size_t size, int flags)
index 7ee97ef..ccc3b44 100644 (file)
@@ -89,9 +89,12 @@ extern struct kmem_cache *kernfs_node_cache, *kernfs_iattrs_cache;
  */
 extern const struct xattr_handler *kernfs_xattr_handlers[];
 void kernfs_evict_inode(struct inode *inode);
-int kernfs_iop_permission(struct inode *inode, int mask);
-int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr);
-int kernfs_iop_getattr(const struct path *path, struct kstat *stat,
+int kernfs_iop_permission(struct user_namespace *mnt_userns,
+                         struct inode *inode, int mask);
+int kernfs_iop_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                      struct iattr *iattr);
+int kernfs_iop_getattr(struct user_namespace *mnt_userns,
+                      const struct path *path, struct kstat *stat,
                       u32 request_mask, unsigned int query_flags);
 ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size);
 int __kernfs_setattr(struct kernfs_node *kn, const struct iattr *iattr);
index 1e55176..e2de540 100644 (file)
 
 #include "internal.h"
 
-int simple_getattr(const struct path *path, struct kstat *stat,
-                  u32 request_mask, unsigned int query_flags)
+int simple_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                  struct kstat *stat, u32 request_mask,
+                  unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
        return 0;
 }
@@ -447,9 +448,9 @@ int simple_rmdir(struct inode *dir, struct dentry *dentry)
 }
 EXPORT_SYMBOL(simple_rmdir);
 
-int simple_rename(struct inode *old_dir, struct dentry *old_dentry,
-                 struct inode *new_dir, struct dentry *new_dentry,
-                 unsigned int flags)
+int simple_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                 struct dentry *old_dentry, struct inode *new_dir,
+                 struct dentry *new_dentry, unsigned int flags)
 {
        struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = d_is_dir(old_dentry);
@@ -492,18 +493,19 @@ EXPORT_SYMBOL(simple_rename);
  * on simple regular filesystems.  Anything that needs to change on-disk
  * or wire state on size changes needs its own setattr method.
  */
-int simple_setattr(struct dentry *dentry, struct iattr *iattr)
+int simple_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                  struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
-       error = setattr_prepare(dentry, iattr);
+       error = setattr_prepare(mnt_userns, dentry, iattr);
        if (error)
                return error;
 
        if (iattr->ia_valid & ATTR_SIZE)
                truncate_setsize(inode, iattr->ia_size);
-       setattr_copy(inode, iattr);
+       setattr_copy(mnt_userns, inode, iattr);
        mark_inode_dirty(inode);
        return 0;
 }
@@ -1295,15 +1297,17 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry,
        return ERR_PTR(-ENOENT);
 }
 
-static int empty_dir_getattr(const struct path *path, struct kstat *stat,
+static int empty_dir_getattr(struct user_namespace *mnt_userns,
+                            const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        return 0;
 }
 
-static int empty_dir_setattr(struct dentry *dentry, struct iattr *attr)
+static int empty_dir_setattr(struct user_namespace *mnt_userns,
+                            struct dentry *dentry, struct iattr *attr)
 {
        return -EPERM;
 }
index 99ca97e..6125d2d 100644 (file)
@@ -1808,9 +1808,6 @@ check_conflicting_open(struct file *filp, const long arg, int flags)
 
        if (flags & FL_LAYOUT)
                return 0;
-       if (flags & FL_DELEG)
-               /* We leave these checks to the caller. */
-               return 0;
 
        if (arg == F_RDLCK)
                return inode_is_open_for_write(inode) ? -EAGAIN : 0;
index f4e5e51..9115948 100644 (file)
@@ -252,7 +252,7 @@ struct inode *minix_new_inode(const struct inode *dir, umode_t mode, int *error)
                iput(inode);
                return NULL;
        }
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_ino = j;
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
        inode->i_blocks = 0;
index c50b0a2..6a7bd2d 100644 (file)
@@ -22,12 +22,13 @@ const struct file_operations minix_file_operations = {
        .splice_read    = generic_file_splice_read,
 };
 
-static int minix_setattr(struct dentry *dentry, struct iattr *attr)
+static int minix_setattr(struct user_namespace *mnt_userns,
+                        struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -41,7 +42,7 @@ static int minix_setattr(struct dentry *dentry, struct iattr *attr)
                minix_truncate(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 34f5464..a532a99 100644 (file)
@@ -652,13 +652,13 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc)
        return err;
 }
 
-int minix_getattr(const struct path *path, struct kstat *stat,
-                 u32 request_mask, unsigned int flags)
+int minix_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct super_block *sb = path->dentry->d_sb;
        struct inode *inode = d_inode(path->dentry);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        if (INODE_VERSION(inode) == MINIX_V1)
                stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb);
        else
index 168d45d..2021733 100644 (file)
@@ -51,7 +51,8 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb);
 extern int minix_new_block(struct inode * inode);
 extern void minix_free_block(struct inode *inode, unsigned long block);
 extern unsigned long minix_count_free_blocks(struct super_block *sb);
-extern int minix_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int minix_getattr(struct user_namespace *, const struct path *,
+                        struct kstat *, u32, unsigned int);
 extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len);
 
 extern void V1_minix_truncate(struct inode *);
index 1a6084d..937fa5f 100644 (file)
@@ -33,7 +33,8 @@ static struct dentry *minix_lookup(struct inode * dir, struct dentry *dentry, un
        return d_splice_alias(inode, dentry);
 }
 
-static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int minix_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        int error;
        struct inode *inode;
@@ -51,7 +52,8 @@ static int minix_mknod(struct inode * dir, struct dentry *dentry, umode_t mode,
        return error;
 }
 
-static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int minix_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
 {
        int error;
        struct inode *inode = minix_new_inode(dir, mode, &error);
@@ -63,14 +65,14 @@ static int minix_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        return error;
 }
 
-static int minix_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool excl)
+static int minix_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
-       return minix_mknod(dir, dentry, mode, 0);
+       return minix_mknod(mnt_userns, dir, dentry, mode, 0);
 }
 
-static int minix_symlink(struct inode * dir, struct dentry *dentry,
-         const char * symname)
+static int minix_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        int err = -ENAMETOOLONG;
        int i = strlen(symname)+1;
@@ -109,7 +111,8 @@ static int minix_link(struct dentry * old_dentry, struct inode * dir,
        return add_nondir(dentry, inode);
 }
 
-static int minix_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
+static int minix_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct inode * inode;
        int err;
@@ -181,8 +184,9 @@ static int minix_rmdir(struct inode * dir, struct dentry *dentry)
        return err;
 }
 
-static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
-                       struct inode * new_dir, struct dentry *new_dentry,
+static int minix_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
+                       struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
        struct inode * old_inode = d_inode(old_dentry);
index ce6c376..0b6e08c 100644 (file)
@@ -124,16 +124,6 @@ static inline void get_mnt_ns(struct mnt_namespace *ns)
 
 extern seqlock_t mount_lock;
 
-static inline void lock_mount_hash(void)
-{
-       write_seqlock(&mount_lock);
-}
-
-static inline void unlock_mount_hash(void)
-{
-       write_sequnlock(&mount_lock);
-}
-
 struct proc_mounts {
        struct mnt_namespace *ns;
        struct path root;
index 830e6cc..334e7d0 100644 (file)
@@ -304,9 +304,7 @@ alloc_new:
                                goto out;
                }
                args->bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-                                       min_t(int, args->nr_pages,
-                                             BIO_MAX_PAGES),
-                                       gfp);
+                                       bio_max_segs(args->nr_pages), gfp);
                if (args->bio == NULL)
                        goto confused;
        }
@@ -618,7 +616,7 @@ alloc_new:
                                goto out;
                }
                bio = mpage_alloc(bdev, blocks[0] << (blkbits - 9),
-                               BIO_MAX_PAGES, GFP_NOFS|__GFP_HIGH);
+                               BIO_MAX_VECS, GFP_NOFS|__GFP_HIGH);
                if (bio == NULL)
                        goto confused;
 
index de74ad2..48a2f28 100644 (file)
@@ -259,7 +259,24 @@ void putname(struct filename *name)
                __putname(name);
 }
 
-static int check_acl(struct inode *inode, int mask)
+/**
+ * check_acl - perform ACL permission checking
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     inode to check permissions on
+ * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
+ * This function performs the ACL permission checking. Since this function
+ * retrieve POSIX acls it needs to know whether it is called from a blocking or
+ * non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+static int check_acl(struct user_namespace *mnt_userns,
+                    struct inode *inode, int mask)
 {
 #ifdef CONFIG_FS_POSIX_ACL
        struct posix_acl *acl;
@@ -271,14 +288,14 @@ static int check_acl(struct inode *inode, int mask)
                /* no ->get_acl() calls in RCU mode... */
                if (is_uncached_acl(acl))
                        return -ECHILD;
-               return posix_acl_permission(inode, acl, mask);
+               return posix_acl_permission(mnt_userns, inode, acl, mask);
        }
 
        acl = get_acl(inode, ACL_TYPE_ACCESS);
        if (IS_ERR(acl))
                return PTR_ERR(acl);
        if (acl) {
-               int error = posix_acl_permission(inode, acl, mask);
+               int error = posix_acl_permission(mnt_userns, inode, acl, mask);
                posix_acl_release(acl);
                return error;
        }
@@ -287,18 +304,31 @@ static int check_acl(struct inode *inode, int mask)
        return -EAGAIN;
 }
 
-/*
- * This does the basic UNIX permission checking.
+/**
+ * acl_permission_check - perform basic UNIX permission checking
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     inode to check permissions on
+ * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
+ * This function performs the basic UNIX permission checking. Since this
+ * function may retrieve POSIX acls it needs to know whether it is called from a
+ * blocking or non-blocking context and thus cares about the MAY_NOT_BLOCK bit.
  *
- * Note that the POSIX ACL check cares about the MAY_NOT_BLOCK bit,
- * for RCU walking.
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
  */
-static int acl_permission_check(struct inode *inode, int mask)
+static int acl_permission_check(struct user_namespace *mnt_userns,
+                               struct inode *inode, int mask)
 {
        unsigned int mode = inode->i_mode;
+       kuid_t i_uid;
 
        /* Are we the owner? If so, ACL's don't matter */
-       if (likely(uid_eq(current_fsuid(), inode->i_uid))) {
+       i_uid = i_uid_into_mnt(mnt_userns, inode);
+       if (likely(uid_eq(current_fsuid(), i_uid))) {
                mask &= 7;
                mode >>= 6;
                return (mask & ~mode) ? -EACCES : 0;
@@ -306,7 +336,7 @@ static int acl_permission_check(struct inode *inode, int mask)
 
        /* Do we have ACL's? */
        if (IS_POSIXACL(inode) && (mode & S_IRWXG)) {
-               int error = check_acl(inode, mask);
+               int error = check_acl(mnt_userns, inode, mask);
                if (error != -EAGAIN)
                        return error;
        }
@@ -320,7 +350,8 @@ static int acl_permission_check(struct inode *inode, int mask)
         * about? Need to check group ownership if so.
         */
        if (mask & (mode ^ (mode >> 3))) {
-               if (in_group_p(inode->i_gid))
+               kgid_t kgid = i_gid_into_mnt(mnt_userns, inode);
+               if (in_group_p(kgid))
                        mode >>= 3;
        }
 
@@ -330,6 +361,7 @@ static int acl_permission_check(struct inode *inode, int mask)
 
 /**
  * generic_permission -  check for access rights on a Posix-like filesystem
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode:     inode to check access rights for
  * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC,
  *             %MAY_NOT_BLOCK ...)
@@ -342,25 +374,33 @@ static int acl_permission_check(struct inode *inode, int mask)
  * generic_permission is rcu-walk aware. It returns -ECHILD in case an rcu-walk
  * request cannot be satisfied (eg. requires blocking or too much complexity).
  * It would then be called again in ref-walk mode.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
  */
-int generic_permission(struct inode *inode, int mask)
+int generic_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                      int mask)
 {
        int ret;
 
        /*
         * Do the basic permission checks.
         */
-       ret = acl_permission_check(inode, mask);
+       ret = acl_permission_check(mnt_userns, inode, mask);
        if (ret != -EACCES)
                return ret;
 
        if (S_ISDIR(inode->i_mode)) {
                /* DACs are overridable for directories */
                if (!(mask & MAY_WRITE))
-                       if (capable_wrt_inode_uidgid(inode,
+                       if (capable_wrt_inode_uidgid(mnt_userns, inode,
                                                     CAP_DAC_READ_SEARCH))
                                return 0;
-               if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
+               if (capable_wrt_inode_uidgid(mnt_userns, inode,
+                                            CAP_DAC_OVERRIDE))
                        return 0;
                return -EACCES;
        }
@@ -370,7 +410,8 @@ int generic_permission(struct inode *inode, int mask)
         */
        mask &= MAY_READ | MAY_WRITE | MAY_EXEC;
        if (mask == MAY_READ)
-               if (capable_wrt_inode_uidgid(inode, CAP_DAC_READ_SEARCH))
+               if (capable_wrt_inode_uidgid(mnt_userns, inode,
+                                            CAP_DAC_READ_SEARCH))
                        return 0;
        /*
         * Read/write DACs are always overridable.
@@ -378,31 +419,38 @@ int generic_permission(struct inode *inode, int mask)
         * at least one exec bit set.
         */
        if (!(mask & MAY_EXEC) || (inode->i_mode & S_IXUGO))
-               if (capable_wrt_inode_uidgid(inode, CAP_DAC_OVERRIDE))
+               if (capable_wrt_inode_uidgid(mnt_userns, inode,
+                                            CAP_DAC_OVERRIDE))
                        return 0;
 
        return -EACCES;
 }
 EXPORT_SYMBOL(generic_permission);
 
-/*
+/**
+ * do_inode_permission - UNIX permission checking
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     inode to check permissions on
+ * @mask:      right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC ...)
+ *
  * We _really_ want to just do "generic_permission()" without
  * even looking at the inode->i_op values. So we keep a cache
  * flag in inode->i_opflags, that says "this has not special
  * permission function, use the fast case".
  */
-static inline int do_inode_permission(struct inode *inode, int mask)
+static inline int do_inode_permission(struct user_namespace *mnt_userns,
+                                     struct inode *inode, int mask)
 {
        if (unlikely(!(inode->i_opflags & IOP_FASTPERM))) {
                if (likely(inode->i_op->permission))
-                       return inode->i_op->permission(inode, mask);
+                       return inode->i_op->permission(mnt_userns, inode, mask);
 
                /* This gets set once for the inode lifetime */
                spin_lock(&inode->i_lock);
                inode->i_opflags |= IOP_FASTPERM;
                spin_unlock(&inode->i_lock);
        }
-       return generic_permission(inode, mask);
+       return generic_permission(mnt_userns, inode, mask);
 }
 
 /**
@@ -427,8 +475,9 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
 
 /**
  * inode_permission - Check for access rights to a given inode
- * @inode: Inode to check permission on
- * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
+ * @mnt_userns:        User namespace of the mount the inode was found from
+ * @inode:     Inode to check permission on
+ * @mask:      Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC)
  *
  * Check for read/write/execute permissions on an inode.  We use fs[ug]id for
  * this, letting us set arbitrary permissions for filesystem access without
@@ -436,7 +485,8 @@ static int sb_permission(struct super_block *sb, struct inode *inode, int mask)
  *
  * When checking for MAY_APPEND, MAY_WRITE must also be set in @mask.
  */
-int inode_permission(struct inode *inode, int mask)
+int inode_permission(struct user_namespace *mnt_userns,
+                    struct inode *inode, int mask)
 {
        int retval;
 
@@ -456,11 +506,11 @@ int inode_permission(struct inode *inode, int mask)
                 * written back improperly if their true value is unknown
                 * to the vfs.
                 */
-               if (HAS_UNMAPPED_ID(inode))
+               if (HAS_UNMAPPED_ID(mnt_userns, inode))
                        return -EACCES;
        }
 
-       retval = do_inode_permission(inode, mask);
+       retval = do_inode_permission(mnt_userns, inode, mask);
        if (retval)
                return retval;
 
@@ -529,6 +579,8 @@ static void set_nameidata(struct nameidata *p, int dfd, struct filename *name)
        p->stack = p->internal;
        p->dfd = dfd;
        p->name = name;
+       p->path.mnt = NULL;
+       p->path.dentry = NULL;
        p->total_link_count = old ? old->total_link_count : 0;
        p->saved = old;
        current->nameidata = p;
@@ -602,6 +654,8 @@ static void terminate_walk(struct nameidata *nd)
                rcu_read_unlock();
        }
        nd->depth = 0;
+       nd->path.mnt = NULL;
+       nd->path.dentry = NULL;
 }
 
 /* path_put is needed afterwards regardless of success or failure */
@@ -960,11 +1014,16 @@ int sysctl_protected_regular __read_mostly;
  */
 static inline int may_follow_link(struct nameidata *nd, const struct inode *inode)
 {
+       struct user_namespace *mnt_userns;
+       kuid_t i_uid;
+
        if (!sysctl_protected_symlinks)
                return 0;
 
+       mnt_userns = mnt_user_ns(nd->path.mnt);
+       i_uid = i_uid_into_mnt(mnt_userns, inode);
        /* Allowed if owner and follower match. */
-       if (uid_eq(current_cred()->fsuid, inode->i_uid))
+       if (uid_eq(current_cred()->fsuid, i_uid))
                return 0;
 
        /* Allowed if parent directory not sticky and world-writable. */
@@ -972,7 +1031,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
                return 0;
 
        /* Allowed if parent directory and link owner match. */
-       if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, inode->i_uid))
+       if (uid_valid(nd->dir_uid) && uid_eq(nd->dir_uid, i_uid))
                return 0;
 
        if (nd->flags & LOOKUP_RCU)
@@ -985,6 +1044,7 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
 
 /**
  * safe_hardlink_source - Check for safe hardlink conditions
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode: the source inode to hardlink from
  *
  * Return false if at least one of the following conditions:
@@ -995,7 +1055,8 @@ static inline int may_follow_link(struct nameidata *nd, const struct inode *inod
  *
  * Otherwise returns true.
  */
-static bool safe_hardlink_source(struct inode *inode)
+static bool safe_hardlink_source(struct user_namespace *mnt_userns,
+                                struct inode *inode)
 {
        umode_t mode = inode->i_mode;
 
@@ -1012,7 +1073,7 @@ static bool safe_hardlink_source(struct inode *inode)
                return false;
 
        /* Hardlinking to unreadable or unwritable sources is dangerous. */
-       if (inode_permission(inode, MAY_READ | MAY_WRITE))
+       if (inode_permission(mnt_userns, inode, MAY_READ | MAY_WRITE))
                return false;
 
        return true;
@@ -1020,6 +1081,7 @@ static bool safe_hardlink_source(struct inode *inode)
 
 /**
  * may_linkat - Check permissions for creating a hardlink
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @link: the source to hardlink from
  *
  * Block hardlink when all of:
@@ -1028,14 +1090,21 @@ static bool safe_hardlink_source(struct inode *inode)
  *  - hardlink source is unsafe (see safe_hardlink_source() above)
  *  - not CAP_FOWNER in a namespace with the inode owner uid mapped
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ *
  * Returns 0 if successful, -ve on error.
  */
-int may_linkat(struct path *link)
+int may_linkat(struct user_namespace *mnt_userns, struct path *link)
 {
        struct inode *inode = link->dentry->d_inode;
 
        /* Inode writeback is not safe when the uid or gid are invalid. */
-       if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+       if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+           !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
                return -EOVERFLOW;
 
        if (!sysctl_protected_hardlinks)
@@ -1044,7 +1113,8 @@ int may_linkat(struct path *link)
        /* Source inode owner (or CAP_FOWNER) can hardlink all they like,
         * otherwise, it must be a safe source.
         */
-       if (safe_hardlink_source(inode) || inode_owner_or_capable(inode))
+       if (safe_hardlink_source(mnt_userns, inode) ||
+           inode_owner_or_capable(mnt_userns, inode))
                return 0;
 
        audit_log_path_denied(AUDIT_ANOM_LINK, "linkat");
@@ -1055,6 +1125,7 @@ int may_linkat(struct path *link)
  * may_create_in_sticky - Check whether an O_CREAT open in a sticky directory
  *                       should be allowed, or not, on files that already
  *                       exist.
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dir_mode: mode bits of directory
  * @dir_uid: owner of directory
  * @inode: the inode of the file to open
@@ -1070,16 +1141,25 @@ int may_linkat(struct path *link)
  * the directory doesn't have to be world writable: being group writable will
  * be enough.
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ *
  * Returns 0 if the open is allowed, -ve on error.
  */
-static int may_create_in_sticky(umode_t dir_mode, kuid_t dir_uid,
-                               struct inode * const inode)
+static int may_create_in_sticky(struct user_namespace *mnt_userns,
+                               struct nameidata *nd, struct inode *const inode)
 {
+       umode_t dir_mode = nd->dir_mode;
+       kuid_t dir_uid = nd->dir_uid;
+
        if ((!sysctl_protected_fifos && S_ISFIFO(inode->i_mode)) ||
            (!sysctl_protected_regular && S_ISREG(inode->i_mode)) ||
            likely(!(dir_mode & S_ISVTX)) ||
-           uid_eq(inode->i_uid, dir_uid) ||
-           uid_eq(current_fsuid(), inode->i_uid))
+           uid_eq(i_uid_into_mnt(mnt_userns, inode), dir_uid) ||
+           uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
                return 0;
 
        if (likely(dir_mode & 0002) ||
@@ -1569,14 +1649,15 @@ static struct dentry *lookup_slow(const struct qstr *name,
        return res;
 }
 
-static inline int may_lookup(struct nameidata *nd)
+static inline int may_lookup(struct user_namespace *mnt_userns,
+                            struct nameidata *nd)
 {
        if (nd->flags & LOOKUP_RCU) {
-               int err = inode_permission(nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
+               int err = inode_permission(mnt_userns, nd->inode, MAY_EXEC|MAY_NOT_BLOCK);
                if (err != -ECHILD || !try_to_unlazy(nd))
                        return err;
        }
-       return inode_permission(nd->inode, MAY_EXEC);
+       return inode_permission(mnt_userns, nd->inode, MAY_EXEC);
 }
 
 static int reserve_stack(struct nameidata *nd, struct path *link, unsigned seq)
@@ -2122,11 +2203,13 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 
        /* At this point we know we have a real path component. */
        for(;;) {
+               struct user_namespace *mnt_userns;
                const char *link;
                u64 hash_len;
                int type;
 
-               err = may_lookup(nd);
+               mnt_userns = mnt_user_ns(nd->path.mnt);
+               err = may_lookup(mnt_userns, nd);
                if (err)
                        return err;
 
@@ -2174,7 +2257,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 OK:
                        /* pathname or trailing symlink, done */
                        if (!depth) {
-                               nd->dir_uid = nd->inode->i_uid;
+                               nd->dir_uid = i_uid_into_mnt(mnt_userns, nd->inode);
                                nd->dir_mode = nd->inode->i_mode;
                                nd->flags &= ~LOOKUP_PARENT;
                                return 0;
@@ -2243,8 +2326,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags)
        }
 
        nd->root.mnt = NULL;
-       nd->path.mnt = NULL;
-       nd->path.dentry = NULL;
 
        /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */
        if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) {
@@ -2340,16 +2421,16 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path
        while (!(err = link_path_walk(s, nd)) &&
               (s = lookup_last(nd)) != NULL)
                ;
+       if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
+               err = handle_lookup_down(nd);
+               nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
+       }
        if (!err)
                err = complete_walk(nd);
 
        if (!err && nd->flags & LOOKUP_DIRECTORY)
                if (!d_can_lookup(nd->path.dentry))
                        err = -ENOTDIR;
-       if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) {
-               err = handle_lookup_down(nd);
-               nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please...
-       }
        if (!err) {
                *path = nd->path;
                nd->path.mnt = NULL;
@@ -2511,7 +2592,7 @@ static int lookup_one_len_common(const char *name, struct dentry *base,
                        return err;
        }
 
-       return inode_permission(base->d_inode, MAY_EXEC);
+       return inode_permission(&init_user_ns, base->d_inode, MAY_EXEC);
 }
 
 /**
@@ -2656,15 +2737,16 @@ int user_path_at_empty(int dfd, const char __user *name, unsigned flags,
 }
 EXPORT_SYMBOL(user_path_at_empty);
 
-int __check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
+                  struct inode *inode)
 {
        kuid_t fsuid = current_fsuid();
 
-       if (uid_eq(inode->i_uid, fsuid))
+       if (uid_eq(i_uid_into_mnt(mnt_userns, inode), fsuid))
                return 0;
-       if (uid_eq(dir->i_uid, fsuid))
+       if (uid_eq(i_uid_into_mnt(mnt_userns, dir), fsuid))
                return 0;
-       return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
+       return !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FOWNER);
 }
 EXPORT_SYMBOL(__check_sticky);
 
@@ -2688,7 +2770,8 @@ EXPORT_SYMBOL(__check_sticky);
  * 11. We don't allow removal of NFS sillyrenamed files; it's handled by
  *     nfs_async_unlink().
  */
-static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
+static int may_delete(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *victim, bool isdir)
 {
        struct inode *inode = d_backing_inode(victim);
        int error;
@@ -2700,19 +2783,21 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
        BUG_ON(victim->d_parent->d_inode != dir);
 
        /* Inode writeback is not safe when the uid or gid are invalid. */
-       if (!uid_valid(inode->i_uid) || !gid_valid(inode->i_gid))
+       if (!uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+           !gid_valid(i_gid_into_mnt(mnt_userns, inode)))
                return -EOVERFLOW;
 
        audit_inode_child(dir, victim, AUDIT_TYPE_CHILD_DELETE);
 
-       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
        if (IS_APPEND(dir))
                return -EPERM;
 
-       if (check_sticky(dir, inode) || IS_APPEND(inode) ||
-           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) || HAS_UNMAPPED_ID(inode))
+       if (check_sticky(mnt_userns, dir, inode) || IS_APPEND(inode) ||
+           IS_IMMUTABLE(inode) || IS_SWAPFILE(inode) ||
+           HAS_UNMAPPED_ID(mnt_userns, inode))
                return -EPERM;
        if (isdir) {
                if (!d_is_dir(victim))
@@ -2737,7 +2822,8 @@ static int may_delete(struct inode *dir, struct dentry *victim, bool isdir)
  *  4. We should have write and exec permissions on dir
  *  5. We can't do it if dir is immutable (done in permission())
  */
-static inline int may_create(struct inode *dir, struct dentry *child)
+static inline int may_create(struct user_namespace *mnt_userns,
+                            struct inode *dir, struct dentry *child)
 {
        struct user_namespace *s_user_ns;
        audit_inode_child(dir, child, AUDIT_TYPE_CHILD_CREATE);
@@ -2746,10 +2832,10 @@ static inline int may_create(struct inode *dir, struct dentry *child)
        if (IS_DEADDIR(dir))
                return -ENOENT;
        s_user_ns = dir->i_sb->s_user_ns;
-       if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-           !kgid_has_mapping(s_user_ns, current_fsgid()))
+       if (!kuid_has_mapping(s_user_ns, fsuid_into_mnt(mnt_userns)) ||
+           !kgid_has_mapping(s_user_ns, fsgid_into_mnt(mnt_userns)))
                return -EOVERFLOW;
-       return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       return inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
 }
 
 /*
@@ -2796,10 +2882,26 @@ void unlock_rename(struct dentry *p1, struct dentry *p2)
 }
 EXPORT_SYMBOL(unlock_rename);
 
-int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool want_excl)
+/**
+ * vfs_create - create new file
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dir:       inode of @dentry
+ * @dentry:    pointer to dentry of the base directory
+ * @mode:      mode of the new file
+ * @want_excl: whether the file must not yet exist
+ *
+ * Create a new file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+              struct dentry *dentry, umode_t mode, bool want_excl)
 {
-       int error = may_create(dir, dentry);
+       int error = may_create(mnt_userns, dir, dentry);
        if (error)
                return error;
 
@@ -2810,7 +2912,7 @@ int vfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        error = security_inode_create(dir, dentry, mode);
        if (error)
                return error;
-       error = dir->i_op->create(dir, dentry, mode, want_excl);
+       error = dir->i_op->create(mnt_userns, dir, dentry, mode, want_excl);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
@@ -2822,7 +2924,7 @@ int vfs_mkobj(struct dentry *dentry, umode_t mode,
                void *arg)
 {
        struct inode *dir = dentry->d_parent->d_inode;
-       int error = may_create(dir, dentry);
+       int error = may_create(&init_user_ns, dir, dentry);
        if (error)
                return error;
 
@@ -2844,7 +2946,8 @@ bool may_open_dev(const struct path *path)
                !(path->mnt->mnt_sb->s_iflags & SB_I_NODEV);
 }
 
-static int may_open(const struct path *path, int acc_mode, int flag)
+static int may_open(struct user_namespace *mnt_userns, const struct path *path,
+                   int acc_mode, int flag)
 {
        struct dentry *dentry = path->dentry;
        struct inode *inode = dentry->d_inode;
@@ -2879,7 +2982,7 @@ static int may_open(const struct path *path, int acc_mode, int flag)
                break;
        }
 
-       error = inode_permission(inode, MAY_OPEN | acc_mode);
+       error = inode_permission(mnt_userns, inode, MAY_OPEN | acc_mode);
        if (error)
                return error;
 
@@ -2894,13 +2997,13 @@ static int may_open(const struct path *path, int acc_mode, int flag)
        }
 
        /* O_NOATIME can only be set by the owner or superuser */
-       if (flag & O_NOATIME && !inode_owner_or_capable(inode))
+       if (flag & O_NOATIME && !inode_owner_or_capable(mnt_userns, inode))
                return -EPERM;
 
        return 0;
 }
 
-static int handle_truncate(struct file *filp)
+static int handle_truncate(struct user_namespace *mnt_userns, struct file *filp)
 {
        const struct path *path = &filp->f_path;
        struct inode *inode = path->dentry->d_inode;
@@ -2914,7 +3017,7 @@ static int handle_truncate(struct file *filp)
        if (!error)
                error = security_path_truncate(path);
        if (!error) {
-               error = do_truncate(path->dentry, 0,
+               error = do_truncate(mnt_userns, path->dentry, 0,
                                    ATTR_MTIME|ATTR_CTIME|ATTR_OPEN,
                                    filp);
        }
@@ -2929,7 +3032,9 @@ static inline int open_to_namei_flags(int flag)
        return flag;
 }
 
-static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t mode)
+static int may_o_create(struct user_namespace *mnt_userns,
+                       const struct path *dir, struct dentry *dentry,
+                       umode_t mode)
 {
        struct user_namespace *s_user_ns;
        int error = security_path_mknod(dir, dentry, mode, 0);
@@ -2937,11 +3042,12 @@ static int may_o_create(const struct path *dir, struct dentry *dentry, umode_t m
                return error;
 
        s_user_ns = dir->dentry->d_sb->s_user_ns;
-       if (!kuid_has_mapping(s_user_ns, current_fsuid()) ||
-           !kgid_has_mapping(s_user_ns, current_fsgid()))
+       if (!kuid_has_mapping(s_user_ns, fsuid_into_mnt(mnt_userns)) ||
+           !kgid_has_mapping(s_user_ns, fsgid_into_mnt(mnt_userns)))
                return -EOVERFLOW;
 
-       error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(mnt_userns, dir->dentry->d_inode,
+                                MAY_WRITE | MAY_EXEC);
        if (error)
                return error;
 
@@ -3020,6 +3126,7 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
                                  const struct open_flags *op,
                                  bool got_write)
 {
+       struct user_namespace *mnt_userns;
        struct dentry *dir = nd->path.dentry;
        struct inode *dir_inode = dir->d_inode;
        int open_flag = op->open_flag;
@@ -3067,13 +3174,15 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
         */
        if (unlikely(!got_write))
                open_flag &= ~O_TRUNC;
+       mnt_userns = mnt_user_ns(nd->path.mnt);
        if (open_flag & O_CREAT) {
                if (open_flag & O_EXCL)
                        open_flag &= ~O_TRUNC;
                if (!IS_POSIXACL(dir->d_inode))
                        mode &= ~current_umask();
                if (likely(got_write))
-                       create_error = may_o_create(&nd->path, dentry, mode);
+                       create_error = may_o_create(mnt_userns, &nd->path,
+                                                   dentry, mode);
                else
                        create_error = -EROFS;
        }
@@ -3108,8 +3217,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
                        error = -EACCES;
                        goto out_dput;
                }
-               error = dir_inode->i_op->create(dir_inode, dentry, mode,
-                                               open_flag & O_EXCL);
+
+               error = dir_inode->i_op->create(mnt_userns, dir_inode, dentry,
+                                               mode, open_flag & O_EXCL);
                if (error)
                        goto out_dput;
        }
@@ -3213,6 +3323,7 @@ finish_lookup:
 static int do_open(struct nameidata *nd,
                   struct file *file, const struct open_flags *op)
 {
+       struct user_namespace *mnt_userns;
        int open_flag = op->open_flag;
        bool do_truncate;
        int acc_mode;
@@ -3225,12 +3336,13 @@ static int do_open(struct nameidata *nd,
        }
        if (!(file->f_mode & FMODE_CREATED))
                audit_inode(nd->name, nd->path.dentry, 0);
+       mnt_userns = mnt_user_ns(nd->path.mnt);
        if (open_flag & O_CREAT) {
                if ((open_flag & O_EXCL) && !(file->f_mode & FMODE_CREATED))
                        return -EEXIST;
                if (d_is_dir(nd->path.dentry))
                        return -EISDIR;
-               error = may_create_in_sticky(nd->dir_mode, nd->dir_uid,
+               error = may_create_in_sticky(mnt_userns, nd,
                                             d_backing_inode(nd->path.dentry));
                if (unlikely(error))
                        return error;
@@ -3250,13 +3362,13 @@ static int do_open(struct nameidata *nd,
                        return error;
                do_truncate = true;
        }
-       error = may_open(&nd->path, acc_mode, open_flag);
+       error = may_open(mnt_userns, &nd->path, acc_mode, open_flag);
        if (!error && !(file->f_mode & FMODE_OPENED))
                error = vfs_open(&nd->path, file);
        if (!error)
                error = ima_file_check(file, op->acc_mode);
        if (!error && do_truncate)
-               error = handle_truncate(file);
+               error = handle_truncate(mnt_userns, file);
        if (unlikely(error > 0)) {
                WARN_ON(1);
                error = -EINVAL;
@@ -3266,7 +3378,23 @@ static int do_open(struct nameidata *nd,
        return error;
 }
 
-struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
+/**
+ * vfs_tmpfile - create tmpfile
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dentry:    pointer to dentry of the base directory
+ * @mode:      mode of the new tmpfile
+ * @open_flags:        flags
+ *
+ * Create a temporary file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
+                          struct dentry *dentry, umode_t mode, int open_flag)
 {
        struct dentry *child = NULL;
        struct inode *dir = dentry->d_inode;
@@ -3274,7 +3402,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
        int error;
 
        /* we want directory to be writable */
-       error = inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       error = inode_permission(mnt_userns, dir, MAY_WRITE | MAY_EXEC);
        if (error)
                goto out_err;
        error = -EOPNOTSUPP;
@@ -3284,7 +3412,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
        child = d_alloc(dentry, &slash_name);
        if (unlikely(!child))
                goto out_err;
-       error = dir->i_op->tmpfile(dir, child, mode);
+       error = dir->i_op->tmpfile(mnt_userns, dir, child, mode);
        if (error)
                goto out_err;
        error = -ENOENT;
@@ -3296,7 +3424,7 @@ struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode, int open_flag)
                inode->i_state |= I_LINKABLE;
                spin_unlock(&inode->i_lock);
        }
-       ima_post_create_tmpfile(inode);
+       ima_post_create_tmpfile(mnt_userns, inode);
        return child;
 
 out_err:
@@ -3309,6 +3437,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
                const struct open_flags *op,
                struct file *file)
 {
+       struct user_namespace *mnt_userns;
        struct dentry *child;
        struct path path;
        int error = path_lookupat(nd, flags | LOOKUP_DIRECTORY, &path);
@@ -3317,7 +3446,8 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
        error = mnt_want_write(path.mnt);
        if (unlikely(error))
                goto out;
-       child = vfs_tmpfile(path.dentry, op->mode, op->open_flag);
+       mnt_userns = mnt_user_ns(path.mnt);
+       child = vfs_tmpfile(mnt_userns, path.dentry, op->mode, op->open_flag);
        error = PTR_ERR(child);
        if (IS_ERR(child))
                goto out2;
@@ -3325,7 +3455,7 @@ static int do_tmpfile(struct nameidata *nd, unsigned flags,
        path.dentry = child;
        audit_inode(nd->name, child, 0);
        /* Don't check for other permissions, the inode was just created */
-       error = may_open(&path, 0, op->open_flag);
+       error = may_open(mnt_userns, &path, 0, op->open_flag);
        if (!error)
                error = vfs_open(&path, file);
 out2:
@@ -3527,10 +3657,27 @@ inline struct dentry *user_path_create(int dfd, const char __user *pathname,
 }
 EXPORT_SYMBOL(user_path_create);
 
-int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+/**
+ * vfs_mknod - create device node or file
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dir:       inode of @dentry
+ * @dentry:    pointer to dentry of the base directory
+ * @mode:      mode of the new device node or file
+ * @dev:       device number of device to create
+ *
+ * Create a device node or file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+             struct dentry *dentry, umode_t mode, dev_t dev)
 {
        bool is_whiteout = S_ISCHR(mode) && dev == WHITEOUT_DEV;
-       int error = may_create(dir, dentry);
+       int error = may_create(mnt_userns, dir, dentry);
 
        if (error)
                return error;
@@ -3550,7 +3697,7 @@ int vfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
        if (error)
                return error;
 
-       error = dir->i_op->mknod(dir, dentry, mode, dev);
+       error = dir->i_op->mknod(mnt_userns, dir, dentry, mode, dev);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
@@ -3577,6 +3724,7 @@ static int may_mknod(umode_t mode)
 static long do_mknodat(int dfd, const char __user *filename, umode_t mode,
                unsigned int dev)
 {
+       struct user_namespace *mnt_userns;
        struct dentry *dentry;
        struct path path;
        int error;
@@ -3595,18 +3743,22 @@ retry:
        error = security_path_mknod(&path, dentry, mode, dev);
        if (error)
                goto out;
+
+       mnt_userns = mnt_user_ns(path.mnt);
        switch (mode & S_IFMT) {
                case 0: case S_IFREG:
-                       error = vfs_create(path.dentry->d_inode,dentry,mode,true);
+                       error = vfs_create(mnt_userns, path.dentry->d_inode,
+                                          dentry, mode, true);
                        if (!error)
-                               ima_post_path_mknod(dentry);
+                               ima_post_path_mknod(mnt_userns, dentry);
                        break;
                case S_IFCHR: case S_IFBLK:
-                       error = vfs_mknod(path.dentry->d_inode,dentry,mode,
-                                       new_decode_dev(dev));
+                       error = vfs_mknod(mnt_userns, path.dentry->d_inode,
+                                         dentry, mode, new_decode_dev(dev));
                        break;
                case S_IFIFO: case S_IFSOCK:
-                       error = vfs_mknod(path.dentry->d_inode,dentry,mode,0);
+                       error = vfs_mknod(mnt_userns, path.dentry->d_inode,
+                                         dentry, mode, 0);
                        break;
        }
 out:
@@ -3629,9 +3781,25 @@ SYSCALL_DEFINE3(mknod, const char __user *, filename, umode_t, mode, unsigned, d
        return do_mknodat(AT_FDCWD, filename, mode, dev);
 }
 
-int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+/**
+ * vfs_mkdir - create directory
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dir:       inode of @dentry
+ * @dentry:    pointer to dentry of the base directory
+ * @mode:      mode of the new directory
+ *
+ * Create a directory.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+             struct dentry *dentry, umode_t mode)
 {
-       int error = may_create(dir, dentry);
+       int error = may_create(mnt_userns, dir, dentry);
        unsigned max_links = dir->i_sb->s_max_links;
 
        if (error)
@@ -3648,7 +3816,7 @@ int vfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
        if (max_links && dir->i_nlink >= max_links)
                return -EMLINK;
 
-       error = dir->i_op->mkdir(dir, dentry, mode);
+       error = dir->i_op->mkdir(mnt_userns, dir, dentry, mode);
        if (!error)
                fsnotify_mkdir(dir, dentry);
        return error;
@@ -3670,8 +3838,12 @@ retry:
        if (!IS_POSIXACL(path.dentry->d_inode))
                mode &= ~current_umask();
        error = security_path_mkdir(&path, dentry, mode);
-       if (!error)
-               error = vfs_mkdir(path.dentry->d_inode, dentry, mode);
+       if (!error) {
+               struct user_namespace *mnt_userns;
+               mnt_userns = mnt_user_ns(path.mnt);
+               error = vfs_mkdir(mnt_userns, path.dentry->d_inode, dentry,
+                                 mode);
+       }
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
@@ -3690,9 +3862,24 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
        return do_mkdirat(AT_FDCWD, pathname, mode);
 }
 
-int vfs_rmdir(struct inode *dir, struct dentry *dentry)
+/**
+ * vfs_rmdir - remove directory
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dir:       inode of @dentry
+ * @dentry:    pointer to dentry of the base directory
+ *
+ * Remove a directory.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_rmdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry)
 {
-       int error = may_delete(dir, dentry, 1);
+       int error = may_delete(mnt_userns, dir, dentry, 1);
 
        if (error)
                return error;
@@ -3732,6 +3919,7 @@ EXPORT_SYMBOL(vfs_rmdir);
 
 long do_rmdir(int dfd, struct filename *name)
 {
+       struct user_namespace *mnt_userns;
        int error = 0;
        struct dentry *dentry;
        struct path path;
@@ -3772,7 +3960,8 @@ retry:
        error = security_path_rmdir(&path, dentry);
        if (error)
                goto exit3;
-       error = vfs_rmdir(path.dentry->d_inode, dentry);
+       mnt_userns = mnt_user_ns(path.mnt);
+       error = vfs_rmdir(mnt_userns, path.dentry->d_inode, dentry);
 exit3:
        dput(dentry);
 exit2:
@@ -3795,6 +3984,7 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
 
 /**
  * vfs_unlink - unlink a filesystem object
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dir:       parent directory
  * @dentry:    victim
  * @delegated_inode: returns victim inode, if the inode is delegated.
@@ -3810,11 +4000,18 @@ SYSCALL_DEFINE1(rmdir, const char __user *, pathname)
  * Alternatively, a caller may pass NULL for delegated_inode.  This may
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
  */
-int vfs_unlink(struct inode *dir, struct dentry *dentry, struct inode **delegated_inode)
+int vfs_unlink(struct user_namespace *mnt_userns, struct inode *dir,
+              struct dentry *dentry, struct inode **delegated_inode)
 {
        struct inode *target = dentry->d_inode;
-       int error = may_delete(dir, dentry, 0);
+       int error = may_delete(mnt_userns, dir, dentry, 0);
 
        if (error)
                return error;
@@ -3885,6 +4082,8 @@ retry_deleg:
        dentry = __lookup_hash(&last, path.dentry, lookup_flags);
        error = PTR_ERR(dentry);
        if (!IS_ERR(dentry)) {
+               struct user_namespace *mnt_userns;
+
                /* Why not before? Because we want correct error value */
                if (last.name[last.len])
                        goto slashes;
@@ -3895,7 +4094,9 @@ retry_deleg:
                error = security_path_unlink(&path, dentry);
                if (error)
                        goto exit2;
-               error = vfs_unlink(path.dentry->d_inode, dentry, &delegated_inode);
+               mnt_userns = mnt_user_ns(path.mnt);
+               error = vfs_unlink(mnt_userns, path.dentry->d_inode, dentry,
+                                  &delegated_inode);
 exit2:
                dput(dentry);
        }
@@ -3944,9 +4145,25 @@ SYSCALL_DEFINE1(unlink, const char __user *, pathname)
        return do_unlinkat(AT_FDCWD, getname(pathname));
 }
 
-int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
+/**
+ * vfs_symlink - create symlink
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dir:       inode of @dentry
+ * @dentry:    pointer to dentry of the base directory
+ * @oldname:   name of the file to link to
+ *
+ * Create a symlink.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
+ */
+int vfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+               struct dentry *dentry, const char *oldname)
 {
-       int error = may_create(dir, dentry);
+       int error = may_create(mnt_userns, dir, dentry);
 
        if (error)
                return error;
@@ -3958,7 +4175,7 @@ int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
        if (error)
                return error;
 
-       error = dir->i_op->symlink(dir, dentry, oldname);
+       error = dir->i_op->symlink(mnt_userns, dir, dentry, oldname);
        if (!error)
                fsnotify_create(dir, dentry);
        return error;
@@ -3984,8 +4201,13 @@ retry:
                goto out_putname;
 
        error = security_path_symlink(&path, dentry, from->name);
-       if (!error)
-               error = vfs_symlink(path.dentry->d_inode, dentry, from->name);
+       if (!error) {
+               struct user_namespace *mnt_userns;
+
+               mnt_userns = mnt_user_ns(path.mnt);
+               error = vfs_symlink(mnt_userns, path.dentry->d_inode, dentry,
+                                   from->name);
+       }
        done_path_create(&path, dentry);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
@@ -4010,6 +4232,7 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
 /**
  * vfs_link - create a new link
  * @old_dentry:        object to be linked
+ * @mnt_userns:        the user namespace of the mount
  * @dir:       new parent
  * @new_dentry:        where to create the new link
  * @delegated_inode: returns inode needing a delegation break
@@ -4025,8 +4248,16 @@ SYSCALL_DEFINE2(symlink, const char __user *, oldname, const char __user *, newn
  * Alternatively, a caller may pass NULL for delegated_inode.  This may
  * be appropriate for callers that expect the underlying filesystem not
  * to be NFS exported.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then take
+ * care to map the inode according to @mnt_userns before checking permissions.
+ * On non-idmapped mounts or if permission checking is to be performed on the
+ * raw inode simply passs init_user_ns.
  */
-int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry, struct inode **delegated_inode)
+int vfs_link(struct dentry *old_dentry, struct user_namespace *mnt_userns,
+            struct inode *dir, struct dentry *new_dentry,
+            struct inode **delegated_inode)
 {
        struct inode *inode = old_dentry->d_inode;
        unsigned max_links = dir->i_sb->s_max_links;
@@ -4035,7 +4266,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
        if (!inode)
                return -ENOENT;
 
-       error = may_create(dir, new_dentry);
+       error = may_create(mnt_userns, dir, new_dentry);
        if (error)
                return error;
 
@@ -4052,7 +4283,7 @@ int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_de
         * be writen back improperly if their true value is unknown to
         * the vfs.
         */
-       if (HAS_UNMAPPED_ID(inode))
+       if (HAS_UNMAPPED_ID(mnt_userns, inode))
                return -EPERM;
        if (!dir->i_op->link)
                return -EPERM;
@@ -4099,6 +4330,7 @@ EXPORT_SYMBOL(vfs_link);
 static int do_linkat(int olddfd, const char __user *oldname, int newdfd,
              const char __user *newname, int flags)
 {
+       struct user_namespace *mnt_userns;
        struct dentry *new_dentry;
        struct path old_path, new_path;
        struct inode *delegated_inode = NULL;
@@ -4134,13 +4366,15 @@ retry:
        error = -EXDEV;
        if (old_path.mnt != new_path.mnt)
                goto out_dput;
-       error = may_linkat(&old_path);
+       mnt_userns = mnt_user_ns(new_path.mnt);
+       error = may_linkat(mnt_userns, &old_path);
        if (unlikely(error))
                goto out_dput;
        error = security_path_link(old_path.dentry, &new_path, new_dentry);
        if (error)
                goto out_dput;
-       error = vfs_link(old_path.dentry, new_path.dentry->d_inode, new_dentry, &delegated_inode);
+       error = vfs_link(old_path.dentry, mnt_userns, new_path.dentry->d_inode,
+                        new_dentry, &delegated_inode);
 out_dput:
        done_path_create(&new_path, new_dentry);
        if (delegated_inode) {
@@ -4174,12 +4408,14 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
 
 /**
  * vfs_rename - rename a filesystem object
- * @old_dir:   parent of source
- * @old_dentry:        source
- * @new_dir:   parent of destination
- * @new_dentry:        destination
- * @delegated_inode: returns an inode needing a delegation break
- * @flags:     rename flags
+ * @old_mnt_userns:    old user namespace of the mount the inode was found from
+ * @old_dir:           parent of source
+ * @old_dentry:                source
+ * @new_mnt_userns:    new user namespace of the mount the inode was found from
+ * @new_dir:           parent of destination
+ * @new_dentry:                destination
+ * @delegated_inode:   returns an inode needing a delegation break
+ * @flags:             rename flags
  *
  * The caller must hold multiple mutexes--see lock_rename()).
  *
@@ -4222,11 +4458,14 @@ SYSCALL_DEFINE2(link, const char __user *, oldname, const char __user *, newname
  *        ->i_mutex on parents, which works but leads to some truly excessive
  *        locking].
  */
-int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-              struct inode *new_dir, struct dentry *new_dentry,
-              struct inode **delegated_inode, unsigned int flags)
+int vfs_rename(struct renamedata *rd)
 {
        int error;
+       struct inode *old_dir = rd->old_dir, *new_dir = rd->new_dir;
+       struct dentry *old_dentry = rd->old_dentry;
+       struct dentry *new_dentry = rd->new_dentry;
+       struct inode **delegated_inode = rd->delegated_inode;
+       unsigned int flags = rd->flags;
        bool is_dir = d_is_dir(old_dentry);
        struct inode *source = old_dentry->d_inode;
        struct inode *target = new_dentry->d_inode;
@@ -4237,19 +4476,21 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (source == target)
                return 0;
 
-       error = may_delete(old_dir, old_dentry, is_dir);
+       error = may_delete(rd->old_mnt_userns, old_dir, old_dentry, is_dir);
        if (error)
                return error;
 
        if (!target) {
-               error = may_create(new_dir, new_dentry);
+               error = may_create(rd->new_mnt_userns, new_dir, new_dentry);
        } else {
                new_is_dir = d_is_dir(new_dentry);
 
                if (!(flags & RENAME_EXCHANGE))
-                       error = may_delete(new_dir, new_dentry, is_dir);
+                       error = may_delete(rd->new_mnt_userns, new_dir,
+                                          new_dentry, is_dir);
                else
-                       error = may_delete(new_dir, new_dentry, new_is_dir);
+                       error = may_delete(rd->new_mnt_userns, new_dir,
+                                          new_dentry, new_is_dir);
        }
        if (error)
                return error;
@@ -4263,12 +4504,14 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
         */
        if (new_dir != old_dir) {
                if (is_dir) {
-                       error = inode_permission(source, MAY_WRITE);
+                       error = inode_permission(rd->old_mnt_userns, source,
+                                                MAY_WRITE);
                        if (error)
                                return error;
                }
                if ((flags & RENAME_EXCHANGE) && new_is_dir) {
-                       error = inode_permission(target, MAY_WRITE);
+                       error = inode_permission(rd->new_mnt_userns, target,
+                                                MAY_WRITE);
                        if (error)
                                return error;
                }
@@ -4308,8 +4551,8 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
                if (error)
                        goto out;
        }
-       error = old_dir->i_op->rename(old_dir, old_dentry,
-                                      new_dir, new_dentry, flags);
+       error = old_dir->i_op->rename(rd->new_mnt_userns, old_dir, old_dentry,
+                                     new_dir, new_dentry, flags);
        if (error)
                goto out;
 
@@ -4350,6 +4593,7 @@ EXPORT_SYMBOL(vfs_rename);
 int do_renameat2(int olddfd, struct filename *from, int newdfd,
                 struct filename *to, unsigned int flags)
 {
+       struct renamedata rd;
        struct dentry *old_dentry, *new_dentry;
        struct dentry *trap;
        struct path old_path, new_path;
@@ -4453,9 +4697,16 @@ retry_deleg:
                                     &new_path, new_dentry, flags);
        if (error)
                goto exit5;
-       error = vfs_rename(old_path.dentry->d_inode, old_dentry,
-                          new_path.dentry->d_inode, new_dentry,
-                          &delegated_inode, flags);
+
+       rd.old_dir         = old_path.dentry->d_inode;
+       rd.old_dentry      = old_dentry;
+       rd.old_mnt_userns  = mnt_user_ns(old_path.mnt);
+       rd.new_dir         = new_path.dentry->d_inode;
+       rd.new_dentry      = new_dentry;
+       rd.new_mnt_userns  = mnt_user_ns(new_path.mnt);
+       rd.delegated_inode = &delegated_inode;
+       rd.flags           = flags;
+       error = vfs_rename(&rd);
 exit5:
        dput(new_dentry);
 exit4:
index 9d33909..56bb5a5 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/proc_ns.h>
 #include <linux/magic.h>
 #include <linux/memblock.h>
+#include <linux/proc_fs.h>
 #include <linux/task_work.h>
 #include <linux/sched/task.h>
 #include <uapi/linux/mount.h>
@@ -73,6 +74,15 @@ static DECLARE_RWSEM(namespace_sem);
 static HLIST_HEAD(unmounted);  /* protected by namespace_sem */
 static LIST_HEAD(ex_mountpoints); /* protected by namespace_sem */
 
+struct mount_kattr {
+       unsigned int attr_set;
+       unsigned int attr_clr;
+       unsigned int propagation;
+       unsigned int lookup_flags;
+       bool recurse;
+       struct user_namespace *mnt_userns;
+};
+
 /* /sys/fs */
 struct kobject *fs_kobj;
 EXPORT_SYMBOL_GPL(fs_kobj);
@@ -87,6 +97,16 @@ EXPORT_SYMBOL_GPL(fs_kobj);
  */
 __cacheline_aligned_in_smp DEFINE_SEQLOCK(mount_lock);
 
+static inline void lock_mount_hash(void)
+{
+       write_seqlock(&mount_lock);
+}
+
+static inline void unlock_mount_hash(void)
+{
+       write_sequnlock(&mount_lock);
+}
+
 static inline struct hlist_head *m_hash(struct vfsmount *mnt, struct dentry *dentry)
 {
        unsigned long tmp = ((unsigned long)mnt / L1_CACHE_BYTES);
@@ -210,6 +230,7 @@ static struct mount *alloc_vfsmnt(const char *name)
                INIT_HLIST_NODE(&mnt->mnt_mp_list);
                INIT_LIST_HEAD(&mnt->mnt_umounting);
                INIT_HLIST_HEAD(&mnt->mnt_stuck_children);
+               mnt->mnt.mnt_userns = &init_user_ns;
        }
        return mnt;
 
@@ -360,50 +381,36 @@ int mnt_want_write(struct vfsmount *m)
 EXPORT_SYMBOL_GPL(mnt_want_write);
 
 /**
- * mnt_clone_write - get write access to a mount
- * @mnt: the mount on which to take a write
- *
- * This is effectively like mnt_want_write, except
- * it must only be used to take an extra write reference
- * on a mountpoint that we already know has a write reference
- * on it. This allows some optimisation.
- *
- * After finished, mnt_drop_write must be called as usual to
- * drop the reference.
- */
-int mnt_clone_write(struct vfsmount *mnt)
-{
-       /* superblock may be r/o */
-       if (__mnt_is_readonly(mnt))
-               return -EROFS;
-       preempt_disable();
-       mnt_inc_writers(real_mount(mnt));
-       preempt_enable();
-       return 0;
-}
-EXPORT_SYMBOL_GPL(mnt_clone_write);
-
-/**
  * __mnt_want_write_file - get write access to a file's mount
  * @file: the file who's mount on which to take a write
  *
- * This is like __mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like __mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the check for emergency r/o remounts.  This must be
+ * paired with __mnt_drop_write_file.
  */
 int __mnt_want_write_file(struct file *file)
 {
-       if (!(file->f_mode & FMODE_WRITER))
-               return __mnt_want_write(file->f_path.mnt);
-       else
-               return mnt_clone_write(file->f_path.mnt);
+       if (file->f_mode & FMODE_WRITER) {
+               /*
+                * Superblock may have become readonly while there are still
+                * writable fd's, e.g. due to a fs error with errors=remount-ro
+                */
+               if (__mnt_is_readonly(file->f_path.mnt))
+                       return -EROFS;
+               return 0;
+       }
+       return __mnt_want_write(file->f_path.mnt);
 }
 
 /**
  * mnt_want_write_file - get write access to a file's mount
  * @file: the file who's mount on which to take a write
  *
- * This is like mnt_want_write, but it takes a file and can
- * do some optimisations if the file is open for write already
+ * This is like mnt_want_write, but if the file is already open for writing it
+ * skips incrementing mnt_writers (since the open file already has a reference)
+ * and instead only does the freeze protection and the check for emergency r/o
+ * remounts.  This must be paired with mnt_drop_write_file.
  */
 int mnt_want_write_file(struct file *file)
 {
@@ -449,7 +456,8 @@ EXPORT_SYMBOL_GPL(mnt_drop_write);
 
 void __mnt_drop_write_file(struct file *file)
 {
-       __mnt_drop_write(file->f_path.mnt);
+       if (!(file->f_mode & FMODE_WRITER))
+               __mnt_drop_write(file->f_path.mnt);
 }
 
 void mnt_drop_write_file(struct file *file)
@@ -459,11 +467,8 @@ void mnt_drop_write_file(struct file *file)
 }
 EXPORT_SYMBOL(mnt_drop_write_file);
 
-static int mnt_make_readonly(struct mount *mnt)
+static inline int mnt_hold_writers(struct mount *mnt)
 {
-       int ret = 0;
-
-       lock_mount_hash();
        mnt->mnt.mnt_flags |= MNT_WRITE_HOLD;
        /*
         * After storing MNT_WRITE_HOLD, we'll read the counters. This store
@@ -488,25 +493,30 @@ static int mnt_make_readonly(struct mount *mnt)
         * we're counting up here.
         */
        if (mnt_get_writers(mnt) > 0)
-               ret = -EBUSY;
-       else
-               mnt->mnt.mnt_flags |= MNT_READONLY;
+               return -EBUSY;
+
+       return 0;
+}
+
+static inline void mnt_unhold_writers(struct mount *mnt)
+{
        /*
         * MNT_READONLY must become visible before ~MNT_WRITE_HOLD, so writers
         * that become unheld will see MNT_READONLY.
         */
        smp_wmb();
        mnt->mnt.mnt_flags &= ~MNT_WRITE_HOLD;
-       unlock_mount_hash();
-       return ret;
 }
 
-static int __mnt_unmake_readonly(struct mount *mnt)
+static int mnt_make_readonly(struct mount *mnt)
 {
-       lock_mount_hash();
-       mnt->mnt.mnt_flags &= ~MNT_READONLY;
-       unlock_mount_hash();
-       return 0;
+       int ret;
+
+       ret = mnt_hold_writers(mnt);
+       if (!ret)
+               mnt->mnt.mnt_flags |= MNT_READONLY;
+       mnt_unhold_writers(mnt);
+       return ret;
 }
 
 int sb_prepare_remount_readonly(struct super_block *sb)
@@ -547,6 +557,11 @@ int sb_prepare_remount_readonly(struct super_block *sb)
 
 static void free_vfsmnt(struct mount *mnt)
 {
+       struct user_namespace *mnt_userns;
+
+       mnt_userns = mnt_user_ns(&mnt->mnt);
+       if (mnt_userns != &init_user_ns)
+               put_user_ns(mnt_userns);
        kfree_const(mnt->mnt_devname);
 #ifdef CONFIG_SMP
        free_percpu(mnt->mnt_pcp);
@@ -1055,6 +1070,9 @@ static struct mount *clone_mnt(struct mount *old, struct dentry *root,
        mnt->mnt.mnt_flags &= ~(MNT_WRITE_HOLD|MNT_MARKED|MNT_INTERNAL);
 
        atomic_inc(&sb->s_active);
+       mnt->mnt.mnt_userns = mnt_user_ns(&old->mnt);
+       if (mnt->mnt.mnt_userns != &init_user_ns)
+               mnt->mnt.mnt_userns = get_user_ns(mnt->mnt.mnt_userns);
        mnt->mnt.mnt_sb = sb;
        mnt->mnt.mnt_root = dget(root);
        mnt->mnt_mountpoint = mnt->mnt.mnt_root;
@@ -2514,20 +2532,15 @@ static int change_mount_ro_state(struct mount *mnt, unsigned int mnt_flags)
        if (readonly_request)
                return mnt_make_readonly(mnt);
 
-       return __mnt_unmake_readonly(mnt);
+       mnt->mnt.mnt_flags &= ~MNT_READONLY;
+       return 0;
 }
 
-/*
- * Update the user-settable attributes on a mount.  The caller must hold
- * sb->s_umount for writing.
- */
 static void set_mount_attributes(struct mount *mnt, unsigned int mnt_flags)
 {
-       lock_mount_hash();
        mnt_flags |= mnt->mnt.mnt_flags & ~MNT_USER_SETTABLE_MASK;
        mnt->mnt.mnt_flags = mnt_flags;
        touch_mnt_namespace(mnt->mnt_ns);
-       unlock_mount_hash();
 }
 
 static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *mnt)
@@ -2572,11 +2585,17 @@ static int do_reconfigure_mnt(struct path *path, unsigned int mnt_flags)
        if (!can_change_locked_flags(mnt, mnt_flags))
                return -EPERM;
 
-       down_write(&sb->s_umount);
+       /*
+        * We're only checking whether the superblock is read-only not
+        * changing it, so only take down_read(&sb->s_umount).
+        */
+       down_read(&sb->s_umount);
+       lock_mount_hash();
        ret = change_mount_ro_state(mnt, mnt_flags);
        if (ret == 0)
                set_mount_attributes(mnt, mnt_flags);
-       up_write(&sb->s_umount);
+       unlock_mount_hash();
+       up_read(&sb->s_umount);
 
        mnt_warn_timestamp_expiry(path, &mnt->mnt);
 
@@ -2616,8 +2635,11 @@ static int do_remount(struct path *path, int ms_flags, int sb_flags,
                err = -EPERM;
                if (ns_capable(sb->s_user_ns, CAP_SYS_ADMIN)) {
                        err = reconfigure_super(fc);
-                       if (!err)
+                       if (!err) {
+                               lock_mount_hash();
                                set_mount_attributes(mnt, mnt_flags);
+                               unlock_mount_hash();
+                       }
                }
                up_write(&sb->s_umount);
        }
@@ -3440,6 +3462,33 @@ out_type:
        return ret;
 }
 
+#define FSMOUNT_VALID_FLAGS \
+       (MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NODEV | \
+        MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME | MOUNT_ATTR_NODIRATIME)
+
+#define MOUNT_SETATTR_VALID_FLAGS (FSMOUNT_VALID_FLAGS | MOUNT_ATTR_IDMAP)
+
+#define MOUNT_SETATTR_PROPAGATION_FLAGS \
+       (MS_UNBINDABLE | MS_PRIVATE | MS_SLAVE | MS_SHARED)
+
+static unsigned int attr_flags_to_mnt_flags(u64 attr_flags)
+{
+       unsigned int mnt_flags = 0;
+
+       if (attr_flags & MOUNT_ATTR_RDONLY)
+               mnt_flags |= MNT_READONLY;
+       if (attr_flags & MOUNT_ATTR_NOSUID)
+               mnt_flags |= MNT_NOSUID;
+       if (attr_flags & MOUNT_ATTR_NODEV)
+               mnt_flags |= MNT_NODEV;
+       if (attr_flags & MOUNT_ATTR_NOEXEC)
+               mnt_flags |= MNT_NOEXEC;
+       if (attr_flags & MOUNT_ATTR_NODIRATIME)
+               mnt_flags |= MNT_NODIRATIME;
+
+       return mnt_flags;
+}
+
 /*
  * Create a kernel mount representation for a new, prepared superblock
  * (specified by fs_fd) and attach to an open_tree-like file descriptor.
@@ -3462,24 +3511,10 @@ SYSCALL_DEFINE3(fsmount, int, fs_fd, unsigned int, flags,
        if ((flags & ~(FSMOUNT_CLOEXEC)) != 0)
                return -EINVAL;
 
-       if (attr_flags & ~(MOUNT_ATTR_RDONLY |
-                          MOUNT_ATTR_NOSUID |
-                          MOUNT_ATTR_NODEV |
-                          MOUNT_ATTR_NOEXEC |
-                          MOUNT_ATTR__ATIME |
-                          MOUNT_ATTR_NODIRATIME))
+       if (attr_flags & ~FSMOUNT_VALID_FLAGS)
                return -EINVAL;
 
-       if (attr_flags & MOUNT_ATTR_RDONLY)
-               mnt_flags |= MNT_READONLY;
-       if (attr_flags & MOUNT_ATTR_NOSUID)
-               mnt_flags |= MNT_NOSUID;
-       if (attr_flags & MOUNT_ATTR_NODEV)
-               mnt_flags |= MNT_NODEV;
-       if (attr_flags & MOUNT_ATTR_NOEXEC)
-               mnt_flags |= MNT_NOEXEC;
-       if (attr_flags & MOUNT_ATTR_NODIRATIME)
-               mnt_flags |= MNT_NODIRATIME;
+       mnt_flags = attr_flags_to_mnt_flags(attr_flags);
 
        switch (attr_flags & MOUNT_ATTR__ATIME) {
        case MOUNT_ATTR_STRICTATIME:
@@ -3787,6 +3822,362 @@ out0:
        return error;
 }
 
+static unsigned int recalc_flags(struct mount_kattr *kattr, struct mount *mnt)
+{
+       unsigned int flags = mnt->mnt.mnt_flags;
+
+       /*  flags to clear */
+       flags &= ~kattr->attr_clr;
+       /* flags to raise */
+       flags |= kattr->attr_set;
+
+       return flags;
+}
+
+static int can_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+{
+       struct vfsmount *m = &mnt->mnt;
+
+       if (!kattr->mnt_userns)
+               return 0;
+
+       /*
+        * Once a mount has been idmapped we don't allow it to change its
+        * mapping. It makes things simpler and callers can just create
+        * another bind-mount they can idmap if they want to.
+        */
+       if (mnt_user_ns(m) != &init_user_ns)
+               return -EPERM;
+
+       /* The underlying filesystem doesn't support idmapped mounts yet. */
+       if (!(m->mnt_sb->s_type->fs_flags & FS_ALLOW_IDMAP))
+               return -EINVAL;
+
+       /* We're not controlling the superblock. */
+       if (!ns_capable(m->mnt_sb->s_user_ns, CAP_SYS_ADMIN))
+               return -EPERM;
+
+       /* Mount has already been visible in the filesystem hierarchy. */
+       if (!is_anon_ns(mnt->mnt_ns))
+               return -EINVAL;
+
+       return 0;
+}
+
+static struct mount *mount_setattr_prepare(struct mount_kattr *kattr,
+                                          struct mount *mnt, int *err)
+{
+       struct mount *m = mnt, *last = NULL;
+
+       if (!is_mounted(&m->mnt)) {
+               *err = -EINVAL;
+               goto out;
+       }
+
+       if (!(mnt_has_parent(m) ? check_mnt(m) : is_anon_ns(m->mnt_ns))) {
+               *err = -EINVAL;
+               goto out;
+       }
+
+       do {
+               unsigned int flags;
+
+               flags = recalc_flags(kattr, m);
+               if (!can_change_locked_flags(m, flags)) {
+                       *err = -EPERM;
+                       goto out;
+               }
+
+               *err = can_idmap_mount(kattr, m);
+               if (*err)
+                       goto out;
+
+               last = m;
+
+               if ((kattr->attr_set & MNT_READONLY) &&
+                   !(m->mnt.mnt_flags & MNT_READONLY)) {
+                       *err = mnt_hold_writers(m);
+                       if (*err)
+                               goto out;
+               }
+       } while (kattr->recurse && (m = next_mnt(m, mnt)));
+
+out:
+       return last;
+}
+
+static void do_idmap_mount(const struct mount_kattr *kattr, struct mount *mnt)
+{
+       struct user_namespace *mnt_userns;
+
+       if (!kattr->mnt_userns)
+               return;
+
+       mnt_userns = get_user_ns(kattr->mnt_userns);
+       /* Pairs with smp_load_acquire() in mnt_user_ns(). */
+       smp_store_release(&mnt->mnt.mnt_userns, mnt_userns);
+}
+
+static void mount_setattr_commit(struct mount_kattr *kattr,
+                                struct mount *mnt, struct mount *last,
+                                int err)
+{
+       struct mount *m = mnt;
+
+       do {
+               if (!err) {
+                       unsigned int flags;
+
+                       do_idmap_mount(kattr, m);
+                       flags = recalc_flags(kattr, m);
+                       WRITE_ONCE(m->mnt.mnt_flags, flags);
+               }
+
+               /*
+                * We either set MNT_READONLY above so make it visible
+                * before ~MNT_WRITE_HOLD or we failed to recursively
+                * apply mount options.
+                */
+               if ((kattr->attr_set & MNT_READONLY) &&
+                   (m->mnt.mnt_flags & MNT_WRITE_HOLD))
+                       mnt_unhold_writers(m);
+
+               if (!err && kattr->propagation)
+                       change_mnt_propagation(m, kattr->propagation);
+
+               /*
+                * On failure, only cleanup until we found the first mount
+                * we failed to handle.
+                */
+               if (err && m == last)
+                       break;
+       } while (kattr->recurse && (m = next_mnt(m, mnt)));
+
+       if (!err)
+               touch_mnt_namespace(mnt->mnt_ns);
+}
+
+static int do_mount_setattr(struct path *path, struct mount_kattr *kattr)
+{
+       struct mount *mnt = real_mount(path->mnt), *last = NULL;
+       int err = 0;
+
+       if (path->dentry != mnt->mnt.mnt_root)
+               return -EINVAL;
+
+       if (kattr->propagation) {
+               /*
+                * Only take namespace_lock() if we're actually changing
+                * propagation.
+                */
+               namespace_lock();
+               if (kattr->propagation == MS_SHARED) {
+                       err = invent_group_ids(mnt, kattr->recurse);
+                       if (err) {
+                               namespace_unlock();
+                               return err;
+                       }
+               }
+       }
+
+       lock_mount_hash();
+
+       /*
+        * Get the mount tree in a shape where we can change mount
+        * properties without failure.
+        */
+       last = mount_setattr_prepare(kattr, mnt, &err);
+       if (last) /* Commit all changes or revert to the old state. */
+               mount_setattr_commit(kattr, mnt, last, err);
+
+       unlock_mount_hash();
+
+       if (kattr->propagation) {
+               namespace_unlock();
+               if (err)
+                       cleanup_group_ids(mnt, NULL);
+       }
+
+       return err;
+}
+
+static int build_mount_idmapped(const struct mount_attr *attr, size_t usize,
+                               struct mount_kattr *kattr, unsigned int flags)
+{
+       int err = 0;
+       struct ns_common *ns;
+       struct user_namespace *mnt_userns;
+       struct file *file;
+
+       if (!((attr->attr_set | attr->attr_clr) & MOUNT_ATTR_IDMAP))
+               return 0;
+
+       /*
+        * We currently do not support clearing an idmapped mount. If this ever
+        * is a use-case we can revisit this but for now let's keep it simple
+        * and not allow it.
+        */
+       if (attr->attr_clr & MOUNT_ATTR_IDMAP)
+               return -EINVAL;
+
+       if (attr->userns_fd > INT_MAX)
+               return -EINVAL;
+
+       file = fget(attr->userns_fd);
+       if (!file)
+               return -EBADF;
+
+       if (!proc_ns_file(file)) {
+               err = -EINVAL;
+               goto out_fput;
+       }
+
+       ns = get_proc_ns(file_inode(file));
+       if (ns->ops->type != CLONE_NEWUSER) {
+               err = -EINVAL;
+               goto out_fput;
+       }
+
+       /*
+        * The init_user_ns is used to indicate that a vfsmount is not idmapped.
+        * This is simpler than just having to treat NULL as unmapped. Users
+        * wanting to idmap a mount to init_user_ns can just use a namespace
+        * with an identity mapping.
+        */
+       mnt_userns = container_of(ns, struct user_namespace, ns);
+       if (mnt_userns == &init_user_ns) {
+               err = -EPERM;
+               goto out_fput;
+       }
+       kattr->mnt_userns = get_user_ns(mnt_userns);
+
+out_fput:
+       fput(file);
+       return err;
+}
+
+static int build_mount_kattr(const struct mount_attr *attr, size_t usize,
+                            struct mount_kattr *kattr, unsigned int flags)
+{
+       unsigned int lookup_flags = LOOKUP_AUTOMOUNT | LOOKUP_FOLLOW;
+
+       if (flags & AT_NO_AUTOMOUNT)
+               lookup_flags &= ~LOOKUP_AUTOMOUNT;
+       if (flags & AT_SYMLINK_NOFOLLOW)
+               lookup_flags &= ~LOOKUP_FOLLOW;
+       if (flags & AT_EMPTY_PATH)
+               lookup_flags |= LOOKUP_EMPTY;
+
+       *kattr = (struct mount_kattr) {
+               .lookup_flags   = lookup_flags,
+               .recurse        = !!(flags & AT_RECURSIVE),
+       };
+
+       if (attr->propagation & ~MOUNT_SETATTR_PROPAGATION_FLAGS)
+               return -EINVAL;
+       if (hweight32(attr->propagation & MOUNT_SETATTR_PROPAGATION_FLAGS) > 1)
+               return -EINVAL;
+       kattr->propagation = attr->propagation;
+
+       if ((attr->attr_set | attr->attr_clr) & ~MOUNT_SETATTR_VALID_FLAGS)
+               return -EINVAL;
+
+       kattr->attr_set = attr_flags_to_mnt_flags(attr->attr_set);
+       kattr->attr_clr = attr_flags_to_mnt_flags(attr->attr_clr);
+
+       /*
+        * Since the MOUNT_ATTR_<atime> values are an enum, not a bitmap,
+        * users wanting to transition to a different atime setting cannot
+        * simply specify the atime setting in @attr_set, but must also
+        * specify MOUNT_ATTR__ATIME in the @attr_clr field.
+        * So ensure that MOUNT_ATTR__ATIME can't be partially set in
+        * @attr_clr and that @attr_set can't have any atime bits set if
+        * MOUNT_ATTR__ATIME isn't set in @attr_clr.
+        */
+       if (attr->attr_clr & MOUNT_ATTR__ATIME) {
+               if ((attr->attr_clr & MOUNT_ATTR__ATIME) != MOUNT_ATTR__ATIME)
+                       return -EINVAL;
+
+               /*
+                * Clear all previous time settings as they are mutually
+                * exclusive.
+                */
+               kattr->attr_clr |= MNT_RELATIME | MNT_NOATIME;
+               switch (attr->attr_set & MOUNT_ATTR__ATIME) {
+               case MOUNT_ATTR_RELATIME:
+                       kattr->attr_set |= MNT_RELATIME;
+                       break;
+               case MOUNT_ATTR_NOATIME:
+                       kattr->attr_set |= MNT_NOATIME;
+                       break;
+               case MOUNT_ATTR_STRICTATIME:
+                       break;
+               default:
+                       return -EINVAL;
+               }
+       } else {
+               if (attr->attr_set & MOUNT_ATTR__ATIME)
+                       return -EINVAL;
+       }
+
+       return build_mount_idmapped(attr, usize, kattr, flags);
+}
+
+static void finish_mount_kattr(struct mount_kattr *kattr)
+{
+       put_user_ns(kattr->mnt_userns);
+       kattr->mnt_userns = NULL;
+}
+
+SYSCALL_DEFINE5(mount_setattr, int, dfd, const char __user *, path,
+               unsigned int, flags, struct mount_attr __user *, uattr,
+               size_t, usize)
+{
+       int err;
+       struct path target;
+       struct mount_attr attr;
+       struct mount_kattr kattr;
+
+       BUILD_BUG_ON(sizeof(struct mount_attr) != MOUNT_ATTR_SIZE_VER0);
+
+       if (flags & ~(AT_EMPTY_PATH |
+                     AT_RECURSIVE |
+                     AT_SYMLINK_NOFOLLOW |
+                     AT_NO_AUTOMOUNT))
+               return -EINVAL;
+
+       if (unlikely(usize > PAGE_SIZE))
+               return -E2BIG;
+       if (unlikely(usize < MOUNT_ATTR_SIZE_VER0))
+               return -EINVAL;
+
+       if (!may_mount())
+               return -EPERM;
+
+       err = copy_struct_from_user(&attr, sizeof(attr), uattr, usize);
+       if (err)
+               return err;
+
+       /* Don't bother walking through the mounts if this is a nop. */
+       if (attr.attr_set == 0 &&
+           attr.attr_clr == 0 &&
+           attr.propagation == 0)
+               return 0;
+
+       err = build_mount_kattr(&attr, usize, &kattr, flags);
+       if (err)
+               return err;
+
+       err = user_path_at(dfd, path, kattr.lookup_flags, &target);
+       if (err)
+               return err;
+
+       err = do_mount_setattr(&target, &kattr);
+       finish_mount_kattr(&kattr);
+       path_put(&target);
+       return err;
+}
+
 static void __init init_mount_tree(void)
 {
        struct vfsmount *mnt;
index e2a488d..14a7222 100644 (file)
@@ -127,7 +127,7 @@ config PNFS_BLOCK
 config PNFS_FLEXFILE_LAYOUT
        tristate
        depends on NFS_V4_1 && NFS_V3
-       default m
+       default NFS_V4
 
 config NFS_V4_1_IMPLEMENTATION_ID_DOMAIN
        string "NFSv4.1 Implementation ID Domain"
index 1a96ce2..fe860c5 100644 (file)
@@ -115,13 +115,13 @@ bl_submit_bio(struct bio *bio)
        return NULL;
 }
 
-static struct bio *
-bl_alloc_init_bio(int npg, struct block_device *bdev, sector_t disk_sector,
+static struct bio *bl_alloc_init_bio(unsigned int npg,
+               struct block_device *bdev, sector_t disk_sector,
                bio_end_io_t end_io, struct parallel_io *par)
 {
        struct bio *bio;
 
-       npg = min(npg, BIO_MAX_PAGES);
+       npg = bio_max_segs(npg);
        bio = bio_alloc(GFP_NOIO, npg);
        if (bio) {
                bio->bi_iter.bi_sector = disk_sector;
index ef827ae..fc4f490 100644 (file)
@@ -81,8 +81,9 @@ static struct nfs_open_dir_context *alloc_nfs_open_dir_context(struct inode *dir
                spin_lock(&dir->i_lock);
                if (list_empty(&nfsi->open_files) &&
                    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
-                       nfsi->cache_validity |= NFS_INO_INVALID_DATA |
-                               NFS_INO_REVAL_FORCED;
+                       nfs_set_cache_invalid(dir,
+                                             NFS_INO_INVALID_DATA |
+                                                     NFS_INO_REVAL_FORCED);
                list_add(&ctx->list, &nfsi->open_files);
                spin_unlock(&dir->i_lock);
                return ctx;
@@ -1401,6 +1402,13 @@ out_force:
        goto out;
 }
 
+static void nfs_mark_dir_for_revalidate(struct inode *inode)
+{
+       spin_lock(&inode->i_lock);
+       nfs_set_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE);
+       spin_unlock(&inode->i_lock);
+}
+
 /*
  * We judge how long we want to trust negative
  * dentries by looking at the parent inode mtime.
@@ -1435,19 +1443,14 @@ nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
                        __func__, dentry);
                return 1;
        case 0:
-               nfs_mark_for_revalidate(dir);
-               if (inode && S_ISDIR(inode->i_mode)) {
-                       /* Purge readdir caches. */
-                       nfs_zap_caches(inode);
-                       /*
-                        * We can't d_drop the root of a disconnected tree:
-                        * its d_hash is on the s_anon list and d_drop() would hide
-                        * it from shrink_dcache_for_unmount(), leading to busy
-                        * inodes on unmount and further oopses.
-                        */
-                       if (IS_ROOT(dentry))
-                               return 1;
-               }
+               /*
+                * We can't d_drop the root of a disconnected tree:
+                * its d_hash is on the s_anon list and d_drop() would hide
+                * it from shrink_dcache_for_unmount(), leading to busy
+                * inodes on unmount and further oopses.
+                */
+               if (inode && IS_ROOT(dentry))
+                       return 1;
                dfprintk(LOOKUPCACHE, "NFS: %s(%pd2) is invalid\n",
                                __func__, dentry);
                return 0;
@@ -1525,6 +1528,13 @@ out:
        nfs_free_fattr(fattr);
        nfs_free_fhandle(fhandle);
        nfs4_label_free(label);
+
+       /*
+        * If the lookup failed despite the dentry change attribute being
+        * a match, then we should revalidate the directory cache.
+        */
+       if (!ret && nfs_verify_change_attribute(dir, dentry->d_time))
+               nfs_mark_dir_for_revalidate(dir);
        return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
 }
 
@@ -1567,7 +1577,7 @@ nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
                error = nfs_lookup_verify_inode(inode, flags);
                if (error) {
                        if (error == -ESTALE)
-                               nfs_zap_caches(dir);
+                               nfs_mark_dir_for_revalidate(dir);
                        goto out_bad;
                }
                nfs_advise_use_readdirplus(dir);
@@ -1691,10 +1701,9 @@ static void nfs_drop_nlink(struct inode *inode)
        if (inode->i_nlink > 0)
                drop_nlink(inode);
        NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
-       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
-               | NFS_INO_INVALID_CTIME
-               | NFS_INO_INVALID_OTHER
-               | NFS_INO_REVAL_FORCED;
+       nfs_set_cache_invalid(
+               inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
+                              NFS_INO_INVALID_OTHER | NFS_INO_REVAL_FORCED);
        spin_unlock(&inode->i_lock);
 }
 
@@ -1706,7 +1715,7 @@ static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
 {
        if (S_ISDIR(inode->i_mode))
                /* drop any readdir cache as it could easily be old */
-               NFS_I(inode)->cache_validity |= NFS_INO_INVALID_DATA;
+               nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA);
 
        if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
                nfs_complete_unlink(dentry, inode);
@@ -2064,7 +2073,6 @@ out:
        dput(parent);
        return d;
 out_error:
-       nfs_mark_for_revalidate(dir);
        d = ERR_PTR(error);
        goto out;
 }
@@ -2095,8 +2103,8 @@ EXPORT_SYMBOL_GPL(nfs_instantiate);
  * that the operation succeeded on the server, but an error in the
  * reply path made it appear to have failed.
  */
-int nfs_create(struct inode *dir, struct dentry *dentry,
-               umode_t mode, bool excl)
+int nfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+              struct dentry *dentry, umode_t mode, bool excl)
 {
        struct iattr attr;
        int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
@@ -2124,7 +2132,8 @@ EXPORT_SYMBOL_GPL(nfs_create);
  * See comments for nfs_proc_create regarding failed operations.
  */
 int
-nfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+nfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+         struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct iattr attr;
        int status;
@@ -2150,7 +2159,8 @@ EXPORT_SYMBOL_GPL(nfs_mknod);
 /*
  * See comments for nfs_proc_create regarding failed operations.
  */
-int nfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+int nfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+             struct dentry *dentry, umode_t mode)
 {
        struct iattr attr;
        int error;
@@ -2295,7 +2305,8 @@ EXPORT_SYMBOL_GPL(nfs_unlink);
  * now have a new file handle and can instantiate an in-core NFS inode
  * and move the raw page into its mapping.
  */
-int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+int nfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+               struct dentry *dentry, const char *symname)
 {
        struct page *page;
        char *kaddr;
@@ -2398,9 +2409,9 @@ EXPORT_SYMBOL_GPL(nfs_link);
  * If these conditions are met, we can drop the dentries before doing
  * the rename.
  */
-int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-              struct inode *new_dir, struct dentry *new_dentry,
-              unsigned int flags)
+int nfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+              struct dentry *old_dentry, struct inode *new_dir,
+              struct dentry *new_dentry, unsigned int flags)
 {
        struct inode *old_inode = d_inode(old_dentry);
        struct inode *new_inode = d_inode(new_dentry);
@@ -2470,9 +2481,9 @@ int nfs_rename(struct inode *old_dir, struct dentry *old_dentry,
        if (error == 0) {
                spin_lock(&old_inode->i_lock);
                NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
-               NFS_I(old_inode)->cache_validity |= NFS_INO_INVALID_CHANGE
-                       | NFS_INO_INVALID_CTIME
-                       | NFS_INO_REVAL_FORCED;
+               nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE |
+                                                        NFS_INO_INVALID_CTIME |
+                                                        NFS_INO_REVAL_FORCED);
                spin_unlock(&old_inode->i_lock);
        }
 out:
@@ -2939,7 +2950,9 @@ static int nfs_execute_ok(struct inode *inode, int mask)
        return ret;
 }
 
-int nfs_permission(struct inode *inode, int mask)
+int nfs_permission(struct user_namespace *mnt_userns,
+                  struct inode *inode,
+                  int mask)
 {
        const struct cred *cred = current_cred();
        int res = 0;
@@ -2987,7 +3000,7 @@ out_notsup:
 
        res = nfs_revalidate_inode(NFS_SERVER(inode), inode);
        if (res == 0)
-               res = generic_permission(inode, mask);
+               res = generic_permission(&init_user_ns, inode, mask);
        goto out;
 }
 EXPORT_SYMBOL_GPL(nfs_permission);
index 63940a7..16ad505 100644 (file)
@@ -89,7 +89,7 @@ nfs_file_release(struct inode *inode, struct file *filp)
 EXPORT_SYMBOL_GPL(nfs_file_release);
 
 /**
- * nfs_revalidate_size - Revalidate the file size
+ * nfs_revalidate_file_size - Revalidate the file size
  * @inode: pointer to inode struct
  * @filp: pointer to struct file
  *
@@ -606,8 +606,8 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 {
        struct file *file = iocb->ki_filp;
        struct inode *inode = file_inode(file);
-       unsigned long written = 0;
-       ssize_t result;
+       unsigned int mntflags = NFS_SERVER(inode)->flags;
+       ssize_t result, written;
        errseq_t since;
        int error;
 
@@ -626,13 +626,13 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
        /*
         * O_APPEND implies that we must revalidate the file length.
         */
-       if (iocb->ki_flags & IOCB_APPEND) {
+       if (iocb->ki_flags & IOCB_APPEND || iocb->ki_pos > i_size_read(inode)) {
                result = nfs_revalidate_file_size(inode, file);
                if (result)
                        goto out;
        }
-       if (iocb->ki_pos > i_size_read(inode))
-               nfs_revalidate_mapping(inode, file->f_mapping);
+
+       nfs_clear_invalid_mapping(file->f_mapping);
 
        since = filemap_sample_wb_err(file->f_mapping);
        nfs_start_io_write(inode);
@@ -648,6 +648,21 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 
        written = result;
        iocb->ki_pos += written;
+
+       if (mntflags & NFS_MOUNT_WRITE_EAGER) {
+               result = filemap_fdatawrite_range(file->f_mapping,
+                                                 iocb->ki_pos - written,
+                                                 iocb->ki_pos - 1);
+               if (result < 0)
+                       goto out;
+       }
+       if (mntflags & NFS_MOUNT_WRITE_WAIT) {
+               result = filemap_fdatawait_range(file->f_mapping,
+                                                iocb->ki_pos - written,
+                                                iocb->ki_pos - 1);
+               if (result < 0)
+                       goto out;
+       }
        result = generic_write_sync(iocb, written);
        if (result < 0)
                goto out;
index 06894bc..971a925 100644 (file)
@@ -82,6 +82,7 @@ enum nfs_param {
        Opt_v,
        Opt_vers,
        Opt_wsize,
+       Opt_write,
 };
 
 enum {
@@ -113,6 +114,19 @@ static const struct constant_table nfs_param_enums_lookupcache[] = {
        {}
 };
 
+enum {
+       Opt_write_lazy,
+       Opt_write_eager,
+       Opt_write_wait,
+};
+
+static const struct constant_table nfs_param_enums_write[] = {
+       { "lazy",               Opt_write_lazy },
+       { "eager",              Opt_write_eager },
+       { "wait",               Opt_write_wait },
+       {}
+};
+
 static const struct fs_parameter_spec nfs_fs_parameters[] = {
        fsparam_flag_no("ac",           Opt_ac),
        fsparam_u32   ("acdirmax",      Opt_acdirmax),
@@ -171,6 +185,7 @@ static const struct fs_parameter_spec nfs_fs_parameters[] = {
        fsparam_flag  ("v4.1",          Opt_v),
        fsparam_flag  ("v4.2",          Opt_v),
        fsparam_string("vers",          Opt_vers),
+       fsparam_enum  ("write",         Opt_write, nfs_param_enums_write),
        fsparam_u32   ("wsize",         Opt_wsize),
        {}
 };
@@ -770,6 +785,24 @@ static int nfs_fs_context_parse_param(struct fs_context *fc,
                        goto out_invalid_value;
                }
                break;
+       case Opt_write:
+               switch (result.uint_32) {
+               case Opt_write_lazy:
+                       ctx->flags &=
+                               ~(NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT);
+                       break;
+               case Opt_write_eager:
+                       ctx->flags |= NFS_MOUNT_WRITE_EAGER;
+                       ctx->flags &= ~NFS_MOUNT_WRITE_WAIT;
+                       break;
+               case Opt_write_wait:
+                       ctx->flags |=
+                               NFS_MOUNT_WRITE_EAGER | NFS_MOUNT_WRITE_WAIT;
+                       break;
+               default:
+                       goto out_invalid_value;
+               }
+               break;
 
                /*
                 * Special options
@@ -1479,6 +1512,8 @@ static int nfs_init_fs_context(struct fs_context *fc)
                ctx->selected_flavor    = RPC_AUTH_MAXFLAVOR;
                ctx->minorversion       = 0;
                ctx->need_mount         = true;
+
+               fc->s_iflags            |= SB_I_STABLE_WRITES;
        }
        fc->fs_private = ctx;
        fc->ops = &nfs_fs_context_ops;
index a60df88..c4c021c 100644 (file)
@@ -390,10 +390,6 @@ static void nfs_readpage_from_fscache_complete(struct page *page,
        if (!error) {
                SetPageUptodate(page);
                unlock_page(page);
-       } else {
-               error = nfs_readpage_async(context, page->mapping->host, page);
-               if (error)
-                       unlock_page(page);
        }
 }
 
index 522aa10..a7fb076 100644 (file)
@@ -195,7 +195,19 @@ bool nfs_check_cache_invalid(struct inode *inode, unsigned long flags)
 }
 EXPORT_SYMBOL_GPL(nfs_check_cache_invalid);
 
-static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
+#ifdef CONFIG_NFS_V4_2
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+       return nfsi->xattr_cache != NULL;
+}
+#else
+static bool nfs_has_xattr_cache(const struct nfs_inode *nfsi)
+{
+       return false;
+}
+#endif
+
+void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
        bool have_delegation = NFS_PROTO(inode)->have_delegation(inode, FMODE_READ);
@@ -209,12 +221,15 @@ static void nfs_set_cache_invalid(struct inode *inode, unsigned long flags)
                                | NFS_INO_INVALID_XATTR);
        }
 
+       if (!nfs_has_xattr_cache(nfsi))
+               flags &= ~NFS_INO_INVALID_XATTR;
        if (inode->i_mapping->nrpages == 0)
                flags &= ~(NFS_INO_INVALID_DATA|NFS_INO_DATA_INVAL_DEFER);
        nfsi->cache_validity |= flags;
        if (flags & NFS_INO_INVALID_DATA)
                nfs_fscache_invalidate(inode);
 }
+EXPORT_SYMBOL_GPL(nfs_set_cache_invalid);
 
 /*
  * Invalidate the local caches
@@ -594,7 +609,8 @@ EXPORT_SYMBOL_GPL(nfs_fhget);
 #define NFS_VALID_ATTRS (ATTR_MODE|ATTR_UID|ATTR_GID|ATTR_SIZE|ATTR_ATIME|ATTR_ATIME_SET|ATTR_MTIME|ATTR_MTIME_SET|ATTR_FILE|ATTR_OPEN)
 
 int
-nfs_setattr(struct dentry *dentry, struct iattr *attr)
+nfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+           struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct nfs_fattr *fattr;
@@ -787,8 +803,8 @@ static bool nfs_need_revalidate_inode(struct inode *inode)
        return false;
 }
 
-int nfs_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int query_flags)
+int nfs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+               struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct nfs_server *server = NFS_SERVER(inode);
@@ -857,7 +873,7 @@ out_no_revalidate:
        /* Only return attributes that were revalidated. */
        stat->result_mask &= request_mask;
 out_no_update:
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode));
        if (S_ISDIR(inode->i_mode))
                stat->blksize = NFS_SERVER(inode)->dtsize;
@@ -1052,8 +1068,8 @@ void nfs_inode_attach_open_context(struct nfs_open_context *ctx)
        spin_lock(&inode->i_lock);
        if (list_empty(&nfsi->open_files) &&
            (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
-               nfsi->cache_validity |= NFS_INO_INVALID_DATA |
-                       NFS_INO_REVAL_FORCED;
+               nfs_set_cache_invalid(inode, NFS_INO_INVALID_DATA |
+                                                    NFS_INO_REVAL_FORCED);
        list_add_tail_rcu(&ctx->list, &nfsi->open_files);
        spin_unlock(&inode->i_lock);
 }
@@ -1257,55 +1273,19 @@ static int nfs_invalidate_mapping(struct inode *inode, struct address_space *map
        return 0;
 }
 
-bool nfs_mapping_need_revalidate_inode(struct inode *inode)
-{
-       return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
-               NFS_STALE(inode);
-}
-
-int nfs_revalidate_mapping_rcu(struct inode *inode)
-{
-       struct nfs_inode *nfsi = NFS_I(inode);
-       unsigned long *bitlock = &nfsi->flags;
-       int ret = 0;
-
-       if (IS_SWAPFILE(inode))
-               goto out;
-       if (nfs_mapping_need_revalidate_inode(inode)) {
-               ret = -ECHILD;
-               goto out;
-       }
-       spin_lock(&inode->i_lock);
-       if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
-           (nfsi->cache_validity & NFS_INO_INVALID_DATA))
-               ret = -ECHILD;
-       spin_unlock(&inode->i_lock);
-out:
-       return ret;
-}
-
 /**
- * nfs_revalidate_mapping - Revalidate the pagecache
- * @inode: pointer to host inode
+ * nfs_clear_invalid_mapping - Conditionally clear a mapping
  * @mapping: pointer to mapping
+ *
+ * If the NFS_INO_INVALID_DATA inode flag is set, clear the mapping.
  */
-int nfs_revalidate_mapping(struct inode *inode,
-               struct address_space *mapping)
+int nfs_clear_invalid_mapping(struct address_space *mapping)
 {
+       struct inode *inode = mapping->host;
        struct nfs_inode *nfsi = NFS_I(inode);
        unsigned long *bitlock = &nfsi->flags;
        int ret = 0;
 
-       /* swapfiles are not supposed to be shared. */
-       if (IS_SWAPFILE(inode))
-               goto out;
-
-       if (nfs_mapping_need_revalidate_inode(inode)) {
-               ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
-               if (ret < 0)
-                       goto out;
-       }
-
        /*
         * We must clear NFS_INO_INVALID_DATA first to ensure that
         * invalidations that come in while we're shooting down the mappings
@@ -1336,8 +1316,8 @@ int nfs_revalidate_mapping(struct inode *inode,
 
        set_bit(NFS_INO_INVALIDATING, bitlock);
        smp_wmb();
-       nfsi->cache_validity &= ~(NFS_INO_INVALID_DATA|
-                       NFS_INO_DATA_INVAL_DEFER);
+       nfsi->cache_validity &=
+               ~(NFS_INO_INVALID_DATA | NFS_INO_DATA_INVAL_DEFER);
        spin_unlock(&inode->i_lock);
        trace_nfs_invalidate_mapping_enter(inode);
        ret = nfs_invalidate_mapping(inode, mapping);
@@ -1350,6 +1330,53 @@ out:
        return ret;
 }
 
+bool nfs_mapping_need_revalidate_inode(struct inode *inode)
+{
+       return nfs_check_cache_invalid(inode, NFS_INO_REVAL_PAGECACHE) ||
+               NFS_STALE(inode);
+}
+
+int nfs_revalidate_mapping_rcu(struct inode *inode)
+{
+       struct nfs_inode *nfsi = NFS_I(inode);
+       unsigned long *bitlock = &nfsi->flags;
+       int ret = 0;
+
+       if (IS_SWAPFILE(inode))
+               goto out;
+       if (nfs_mapping_need_revalidate_inode(inode)) {
+               ret = -ECHILD;
+               goto out;
+       }
+       spin_lock(&inode->i_lock);
+       if (test_bit(NFS_INO_INVALIDATING, bitlock) ||
+           (nfsi->cache_validity & NFS_INO_INVALID_DATA))
+               ret = -ECHILD;
+       spin_unlock(&inode->i_lock);
+out:
+       return ret;
+}
+
+/**
+ * nfs_revalidate_mapping - Revalidate the pagecache
+ * @inode: pointer to host inode
+ * @mapping: pointer to mapping
+ */
+int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping)
+{
+       /* swapfiles are not supposed to be shared. */
+       if (IS_SWAPFILE(inode))
+               return 0;
+
+       if (nfs_mapping_need_revalidate_inode(inode)) {
+               int ret = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
+               if (ret < 0)
+                       return ret;
+       }
+
+       return nfs_clear_invalid_mapping(mapping);
+}
+
 static bool nfs_file_has_writers(struct nfs_inode *nfsi)
 {
        struct inode *inode = &nfsi->vfs_inode;
index 62d3189..7b644d6 100644 (file)
@@ -378,14 +378,18 @@ extern unsigned long nfs_access_cache_count(struct shrinker *shrink,
 extern unsigned long nfs_access_cache_scan(struct shrinker *shrink,
                                           struct shrink_control *sc);
 struct dentry *nfs_lookup(struct inode *, struct dentry *, unsigned int);
-int nfs_create(struct inode *, struct dentry *, umode_t, bool);
-int nfs_mkdir(struct inode *, struct dentry *, umode_t);
+int nfs_create(struct user_namespace *, struct inode *, struct dentry *,
+              umode_t, bool);
+int nfs_mkdir(struct user_namespace *, struct inode *, struct dentry *,
+             umode_t);
 int nfs_rmdir(struct inode *, struct dentry *);
 int nfs_unlink(struct inode *, struct dentry *);
-int nfs_symlink(struct inode *, struct dentry *, const char *);
+int nfs_symlink(struct user_namespace *, struct inode *, struct dentry *,
+               const char *);
 int nfs_link(struct dentry *, struct inode *, struct dentry *);
-int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-int nfs_rename(struct inode *, struct dentry *,
+int nfs_mknod(struct user_namespace *, struct inode *, struct dentry *, umode_t,
+             dev_t);
+int nfs_rename(struct user_namespace *, struct inode *, struct dentry *,
               struct inode *, struct dentry *, unsigned int);
 
 /* file.c */
@@ -407,7 +411,8 @@ extern int nfs_write_inode(struct inode *, struct writeback_control *);
 extern int nfs_drop_inode(struct inode *);
 extern void nfs_clear_inode(struct inode *);
 extern void nfs_evict_inode(struct inode *);
-void nfs_zap_acl_cache(struct inode *inode);
+extern void nfs_zap_acl_cache(struct inode *inode);
+extern void nfs_set_cache_invalid(struct inode *inode, unsigned long flags);
 extern bool nfs_check_cache_invalid(struct inode *, unsigned long);
 extern int nfs_wait_bit_killable(struct wait_bit_key *key, int mode);
 extern int nfs_wait_atomic_killable(atomic_t *p, unsigned int mode);
index 2bcbe38..93e60e9 100644 (file)
@@ -208,20 +208,23 @@ out_fc:
 }
 
 static int
-nfs_namespace_getattr(const struct path *path, struct kstat *stat,
-                       u32 request_mask, unsigned int query_flags)
+nfs_namespace_getattr(struct user_namespace *mnt_userns,
+                     const struct path *path, struct kstat *stat,
+                     u32 request_mask, unsigned int query_flags)
 {
        if (NFS_FH(d_inode(path->dentry))->size != 0)
-               return nfs_getattr(path, stat, request_mask, query_flags);
-       generic_fillattr(d_inode(path->dentry), stat);
+               return nfs_getattr(mnt_userns, path, stat, request_mask,
+                                  query_flags);
+       generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
        return 0;
 }
 
 static int
-nfs_namespace_setattr(struct dentry *dentry, struct iattr *attr)
+nfs_namespace_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                     struct iattr *attr)
 {
        if (NFS_FH(d_inode(dentry))->size != 0)
-               return nfs_setattr(dentry, attr);
+               return nfs_setattr(mnt_userns, dentry, attr);
        return -EACCES;
 }
 
index 1b950b6..c8a1928 100644 (file)
@@ -12,7 +12,8 @@
  */
 #ifdef CONFIG_NFS_V3_ACL
 extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
-extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                       struct posix_acl *acl, int type);
 extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
                struct posix_acl *dfacl);
 extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
index c6c8633..bb386a6 100644 (file)
@@ -111,6 +111,7 @@ struct posix_acl *nfs3_get_acl(struct inode *inode, int type)
                        fallthrough;
                case -ENOTSUPP:
                        status = -EOPNOTSUPP;
+                       goto getout;
                default:
                        goto getout;
        }
@@ -251,7 +252,8 @@ int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
 
 }
 
-int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int nfs3_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type)
 {
        struct posix_acl *orig = acl, *dfacl = NULL, *alloc;
        int status;
index ca10072..ed1c837 100644 (file)
@@ -36,6 +36,7 @@
 #define NFS3_pagepad_sz                (1) /* Page padding */
 #define NFS3_fhandle_sz                (1+16)
 #define NFS3_fh_sz             (NFS3_fhandle_sz)       /* shorthand */
+#define NFS3_post_op_fh_sz     (1+NFS3_fh_sz)
 #define NFS3_sattr_sz          (15)
 #define NFS3_filename_sz       (1+(NFS3_MAXNAMLEN>>2))
 #define NFS3_path_sz           (1+(NFS3_MAXPATHLEN>>2))
@@ -73,7 +74,7 @@
 #define NFS3_readlinkres_sz    (1+NFS3_post_op_attr_sz+1+NFS3_pagepad_sz)
 #define NFS3_readres_sz                (1+NFS3_post_op_attr_sz+3+NFS3_pagepad_sz)
 #define NFS3_writeres_sz       (1+NFS3_wcc_data_sz+4)
-#define NFS3_createres_sz      (1+NFS3_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
+#define NFS3_createres_sz      (1+NFS3_post_op_fh_sz+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
 #define NFS3_renameres_sz      (1+(2 * NFS3_wcc_data_sz))
 #define NFS3_linkres_sz                (1+NFS3_post_op_attr_sz+NFS3_wcc_data_sz)
 #define NFS3_readdirres_sz     (1+NFS3_post_op_attr_sz+2+NFS3_pagepad_sz)
index f3fd935..094024b 100644 (file)
@@ -357,13 +357,15 @@ static ssize_t _nfs42_proc_copy(struct file *src,
        truncate_pagecache_range(dst_inode, pos_dst,
                                 pos_dst + res->write_res.count);
        spin_lock(&dst_inode->i_lock);
-       NFS_I(dst_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
-                       NFS_INO_REVAL_FORCED | NFS_INO_INVALID_SIZE |
-                       NFS_INO_INVALID_ATTR | NFS_INO_INVALID_DATA);
+       nfs_set_cache_invalid(
+               dst_inode, NFS_INO_REVAL_PAGECACHE | NFS_INO_REVAL_FORCED |
+                                  NFS_INO_INVALID_SIZE | NFS_INO_INVALID_ATTR |
+                                  NFS_INO_INVALID_DATA);
        spin_unlock(&dst_inode->i_lock);
        spin_lock(&src_inode->i_lock);
-       NFS_I(src_inode)->cache_validity |= (NFS_INO_REVAL_PAGECACHE |
-                       NFS_INO_REVAL_FORCED | NFS_INO_INVALID_ATIME);
+       nfs_set_cache_invalid(src_inode, NFS_INO_REVAL_PAGECACHE |
+                                                NFS_INO_REVAL_FORCED |
+                                                NFS_INO_INVALID_ATIME);
        spin_unlock(&src_inode->i_lock);
        status = res->write_res.count;
 out:
index 86acffe..889a9f4 100644 (file)
@@ -609,6 +609,7 @@ found:
                         * changed. Schedule recovery!
                         */
                        nfs4_schedule_path_down_recovery(pos);
+                       goto out;
                default:
                        goto out;
                }
index 2f4679a..c65c4b4 100644 (file)
 
 #include "nfs4trace.h"
 
-#ifdef CONFIG_NFS_V4_2
-#include "nfs42.h"
-#endif /* CONFIG_NFS_V4_2 */
-
 #define NFSDBG_FACILITY                NFSDBG_PROC
 
 #define NFS4_BITMASK_SZ                3
@@ -1173,14 +1169,14 @@ int nfs4_call_sync(struct rpc_clnt *clnt,
 static void
 nfs4_inc_nlink_locked(struct inode *inode)
 {
-       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
+       nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
        inc_nlink(inode);
 }
 
 static void
 nfs4_dec_nlink_locked(struct inode *inode)
 {
-       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
+       nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
        drop_nlink(inode);
 }
 
@@ -1191,35 +1187,31 @@ nfs4_update_changeattr_locked(struct inode *inode,
 {
        struct nfs_inode *nfsi = NFS_I(inode);
 
-       nfsi->cache_validity |= NFS_INO_INVALID_CTIME
-               | NFS_INO_INVALID_MTIME
-               | cache_validity;
+       cache_validity |= NFS_INO_INVALID_CTIME | NFS_INO_INVALID_MTIME;
 
        if (cinfo->atomic && cinfo->before == inode_peek_iversion_raw(inode)) {
                nfsi->cache_validity &= ~NFS_INO_REVAL_PAGECACHE;
                nfsi->attrtimeo_timestamp = jiffies;
        } else {
                if (S_ISDIR(inode->i_mode)) {
-                       nfsi->cache_validity |= NFS_INO_INVALID_DATA;
+                       cache_validity |= NFS_INO_INVALID_DATA;
                        nfs_force_lookup_revalidate(inode);
                } else {
                        if (!NFS_PROTO(inode)->have_delegation(inode,
                                                               FMODE_READ))
-                               nfsi->cache_validity |= NFS_INO_REVAL_PAGECACHE;
+                               cache_validity |= NFS_INO_REVAL_PAGECACHE;
                }
 
                if (cinfo->before != inode_peek_iversion_raw(inode))
-                       nfsi->cache_validity |= NFS_INO_INVALID_ACCESS |
-                                               NFS_INO_INVALID_ACL |
-                                               NFS_INO_INVALID_XATTR;
+                       cache_validity |= NFS_INO_INVALID_ACCESS |
+                                         NFS_INO_INVALID_ACL |
+                                         NFS_INO_INVALID_XATTR;
        }
        inode_set_iversion_raw(inode, cinfo->after);
        nfsi->read_cache_jiffies = timestamp;
        nfsi->attr_gencount = nfs_inc_attr_generation_counter();
+       nfs_set_cache_invalid(inode, cache_validity);
        nfsi->cache_validity &= ~NFS_INO_INVALID_CHANGE;
-
-       if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
-               nfs_fscache_invalidate(inode);
 }
 
 void
@@ -2231,6 +2223,7 @@ static int nfs4_handle_delegation_recall_error(struct nfs_server *server, struct
                default:
                        printk(KERN_ERR "NFS: %s: unhandled error "
                                        "%d.\n", __func__, err);
+                       fallthrough;
                case 0:
                case -ENOENT:
                case -EAGAIN:
@@ -5438,15 +5431,16 @@ static void nfs4_bitmask_adjust(__u32 *bitmask, struct inode *inode,
 
        if (cache_validity & NFS_INO_INVALID_ATIME)
                bitmask[1] |= FATTR4_WORD1_TIME_ACCESS;
-       if (cache_validity & NFS_INO_INVALID_ACCESS)
-               bitmask[0] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
-                               FATTR4_WORD1_OWNER_GROUP;
-       if (cache_validity & NFS_INO_INVALID_ACL)
-               bitmask[0] |= FATTR4_WORD0_ACL;
-       if (cache_validity & NFS_INO_INVALID_LABEL)
+       if (cache_validity & NFS_INO_INVALID_OTHER)
+               bitmask[1] |= FATTR4_WORD1_MODE | FATTR4_WORD1_OWNER |
+                               FATTR4_WORD1_OWNER_GROUP |
+                               FATTR4_WORD1_NUMLINKS;
+       if (label && label->len && cache_validity & NFS_INO_INVALID_LABEL)
                bitmask[2] |= FATTR4_WORD2_SECURITY_LABEL;
-       if (cache_validity & NFS_INO_INVALID_CTIME)
+       if (cache_validity & NFS_INO_INVALID_CHANGE)
                bitmask[0] |= FATTR4_WORD0_CHANGE;
+       if (cache_validity & NFS_INO_INVALID_CTIME)
+               bitmask[1] |= FATTR4_WORD1_TIME_METADATA;
        if (cache_validity & NFS_INO_INVALID_MTIME)
                bitmask[1] |= FATTR4_WORD1_TIME_MODIFY;
        if (cache_validity & NFS_INO_INVALID_SIZE)
@@ -5895,6 +5889,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
        unsigned int npages = DIV_ROUND_UP(buflen, PAGE_SIZE);
        int ret, i;
 
+       /* You can't remove system.nfs4_acl: */
+       if (buflen == 0)
+               return -EINVAL;
        if (!nfs4_server_supports_acls(server))
                return -EOPNOTSUPP;
        if (npages > ARRAY_SIZE(pages))
@@ -5917,9 +5914,9 @@ static int __nfs4_proc_set_acl(struct inode *inode, const void *buf, size_t bufl
         * so mark the attribute cache invalid.
         */
        spin_lock(&inode->i_lock);
-       NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
-               | NFS_INO_INVALID_CTIME
-               | NFS_INO_REVAL_FORCED;
+       nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+                                            NFS_INO_INVALID_CTIME |
+                                            NFS_INO_REVAL_FORCED);
        spin_unlock(&inode->i_lock);
        nfs_access_zap_cache(inode);
        nfs_zap_acl_cache(inode);
@@ -5971,7 +5968,7 @@ static int _nfs4_get_security_label(struct inode *inode, void *buf,
                return ret;
        if (!(fattr.valid & NFS_ATTR_FATTR_V4_SECURITY_LABEL))
                return -ENOENT;
-       return 0;
+       return label.len;
 }
 
 static int nfs4_get_security_label(struct inode *inode, void *buf,
@@ -7491,6 +7488,7 @@ nfs4_release_lockowner(struct nfs_server *server, struct nfs4_lock_state *lsp)
 #define XATTR_NAME_NFSV4_ACL "system.nfs4_acl"
 
 static int nfs4_xattr_set_nfs4_acl(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *unused, struct inode *inode,
                                   const char *key, const void *buf,
                                   size_t buflen, int flags)
@@ -7513,6 +7511,7 @@ static bool nfs4_xattr_list_nfs4_acl(struct dentry *dentry)
 #ifdef CONFIG_NFS_V4_SECURITY_LABEL
 
 static int nfs4_xattr_set_nfs4_label(const struct xattr_handler *handler,
+                                    struct user_namespace *mnt_userns,
                                     struct dentry *unused, struct inode *inode,
                                     const char *key, const void *buf,
                                     size_t buflen, int flags)
@@ -7563,6 +7562,7 @@ nfs4_listxattr_nfs4_label(struct inode *inode, char *list, size_t list_len)
 
 #ifdef CONFIG_NFS_V4_2
 static int nfs4_xattr_set_nfs4_user(const struct xattr_handler *handler,
+                                   struct user_namespace *mnt_userns,
                                    struct dentry *unused, struct inode *inode,
                                    const char *key, const void *buf,
                                    size_t buflen, int flags)
@@ -9705,6 +9705,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
        case -NFS4ERR_BADLAYOUT:     /* no layout */
        case -NFS4ERR_GRACE:        /* loca_recalim always false */
                task->tk_status = 0;
+               break;
        case 0:
                break;
        default:
index 4bf1079..3a51351 100644 (file)
@@ -1125,6 +1125,7 @@ static void nfs_increment_seqid(int status, struct nfs_seqid *seqid)
                                        " sequence-id error on an"
                                        " unconfirmed sequence %p!\n",
                                        seqid->sequence);
+                       return;
                case -NFS4ERR_STALE_CLIENTID:
                case -NFS4ERR_STALE_STATEID:
                case -NFS4ERR_BAD_STATEID:
index af64b4e..102b66e 100644 (file)
@@ -2875,6 +2875,7 @@ pnfs_do_write(struct nfs_pageio_descriptor *desc,
        switch (trypnfs) {
        case PNFS_NOT_ATTEMPTED:
                pnfs_write_through_mds(desc, hdr);
+               break;
        case PNFS_ATTEMPTED:
                break;
        case PNFS_TRY_AGAIN:
@@ -3019,6 +3020,7 @@ pnfs_do_read(struct nfs_pageio_descriptor *desc, struct nfs_pgio_header *hdr)
        switch (trypnfs) {
        case PNFS_NOT_ATTEMPTED:
                pnfs_read_through_mds(desc, hdr);
+               break;
        case PNFS_ATTEMPTED:
                break;
        case PNFS_TRY_AGAIN:
index eb854f1..d2b6dce 100644 (file)
@@ -74,6 +74,24 @@ void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio,
 }
 EXPORT_SYMBOL_GPL(nfs_pageio_init_read);
 
+static void nfs_pageio_complete_read(struct nfs_pageio_descriptor *pgio,
+                                    struct inode *inode)
+{
+       struct nfs_pgio_mirror *pgm;
+       unsigned long npages;
+
+       nfs_pageio_complete(pgio);
+
+       /* It doesn't make sense to do mirrored reads! */
+       WARN_ON_ONCE(pgio->pg_mirror_count != 1);
+
+       pgm = &pgio->pg_mirrors[0];
+       NFS_I(inode)->read_io += pgm->pg_bytes_written;
+       npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >> PAGE_SHIFT;
+       nfs_add_stats(inode, NFSIOS_READPAGES, npages);
+}
+
+
 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio)
 {
        struct nfs_pgio_mirror *mirror;
@@ -114,41 +132,10 @@ static void nfs_readpage_release(struct nfs_page *req, int error)
        nfs_release_request(req);
 }
 
-int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
-                      struct page *page)
-{
-       struct nfs_page *new;
-       unsigned int len;
+struct nfs_readdesc {
        struct nfs_pageio_descriptor pgio;
-       struct nfs_pgio_mirror *pgm;
-
-       len = nfs_page_length(page);
-       if (len == 0)
-               return nfs_return_empty_page(page);
-       new = nfs_create_request(ctx, page, 0, len);
-       if (IS_ERR(new)) {
-               unlock_page(page);
-               return PTR_ERR(new);
-       }
-       if (len < PAGE_SIZE)
-               zero_user_segment(page, len, PAGE_SIZE);
-
-       nfs_pageio_init_read(&pgio, inode, false,
-                            &nfs_async_read_completion_ops);
-       if (!nfs_pageio_add_request(&pgio, new)) {
-               nfs_list_remove_request(new);
-               nfs_readpage_release(new, pgio.pg_error);
-       }
-       nfs_pageio_complete(&pgio);
-
-       /* It doesn't make sense to do mirrored reads! */
-       WARN_ON_ONCE(pgio.pg_mirror_count != 1);
-
-       pgm = &pgio.pg_mirrors[0];
-       NFS_I(inode)->read_io += pgm->pg_bytes_written;
-
-       return pgio.pg_error < 0 ? pgio.pg_error : 0;
-}
+       struct nfs_open_context *ctx;
+};
 
 static void nfs_page_group_set_uptodate(struct nfs_page *req)
 {
@@ -171,8 +158,7 @@ static void nfs_read_completion(struct nfs_pgio_header *hdr)
 
                if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) {
                        /* note: regions of the page not covered by a
-                        * request are zeroed in nfs_readpage_async /
-                        * readpage_async_filler */
+                        * request are zeroed in readpage_async_filler */
                        if (bytes > hdr->good_bytes) {
                                /* nothing in this request was good, so zero
                                 * the full extent of the request */
@@ -304,6 +290,38 @@ static void nfs_readpage_result(struct rpc_task *task,
                nfs_readpage_retry(task, hdr);
 }
 
+static int
+readpage_async_filler(void *data, struct page *page)
+{
+       struct nfs_readdesc *desc = data;
+       struct nfs_page *new;
+       unsigned int len;
+       int error;
+
+       len = nfs_page_length(page);
+       if (len == 0)
+               return nfs_return_empty_page(page);
+
+       new = nfs_create_request(desc->ctx, page, 0, len);
+       if (IS_ERR(new))
+               goto out_error;
+
+       if (len < PAGE_SIZE)
+               zero_user_segment(page, len, PAGE_SIZE);
+       if (!nfs_pageio_add_request(&desc->pgio, new)) {
+               nfs_list_remove_request(new);
+               error = desc->pgio.pg_error;
+               nfs_readpage_release(new, error);
+               goto out;
+       }
+       return 0;
+out_error:
+       error = PTR_ERR(new);
+       unlock_page(page);
+out:
+       return error;
+}
+
 /*
  * Read a page over NFS.
  * We read the page synchronously in the following case:
@@ -312,14 +330,13 @@ static void nfs_readpage_result(struct rpc_task *task,
  */
 int nfs_readpage(struct file *file, struct page *page)
 {
-       struct nfs_open_context *ctx;
+       struct nfs_readdesc desc;
        struct inode *inode = page_file_mapping(page)->host;
-       int             error;
+       int ret;
 
        dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
                page, PAGE_SIZE, page_index(page));
        nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
-       nfs_add_stats(inode, NFSIOS_READPAGES, 1);
 
        /*
         * Try to flush any pending writes to the file..
@@ -328,93 +345,59 @@ int nfs_readpage(struct file *file, struct page *page)
         * be any new pending writes generated at this point
         * for this page (other pages can be written to).
         */
-       error = nfs_wb_page(inode, page);
-       if (error)
+       ret = nfs_wb_page(inode, page);
+       if (ret)
                goto out_unlock;
        if (PageUptodate(page))
                goto out_unlock;
 
-       error = -ESTALE;
+       ret = -ESTALE;
        if (NFS_STALE(inode))
                goto out_unlock;
 
        if (file == NULL) {
-               error = -EBADF;
-               ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
-               if (ctx == NULL)
+               ret = -EBADF;
+               desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
+               if (desc.ctx == NULL)
                        goto out_unlock;
        } else
-               ctx = get_nfs_open_context(nfs_file_open_context(file));
+               desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
 
        if (!IS_SYNC(inode)) {
-               error = nfs_readpage_from_fscache(ctx, inode, page);
-               if (error == 0)
+               ret = nfs_readpage_from_fscache(desc.ctx, inode, page);
+               if (ret == 0)
                        goto out;
        }
 
-       xchg(&ctx->error, 0);
-       error = nfs_readpage_async(ctx, inode, page);
-       if (!error) {
-               error = wait_on_page_locked_killable(page);
-               if (!PageUptodate(page) && !error)
-                       error = xchg(&ctx->error, 0);
-       }
-out:
-       put_nfs_open_context(ctx);
-       return error;
-out_unlock:
-       unlock_page(page);
-       return error;
-}
-
-struct nfs_readdesc {
-       struct nfs_pageio_descriptor *pgio;
-       struct nfs_open_context *ctx;
-};
-
-static int
-readpage_async_filler(void *data, struct page *page)
-{
-       struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
-       struct nfs_page *new;
-       unsigned int len;
-       int error;
+       xchg(&desc.ctx->error, 0);
+       nfs_pageio_init_read(&desc.pgio, inode, false,
+                            &nfs_async_read_completion_ops);
 
-       len = nfs_page_length(page);
-       if (len == 0)
-               return nfs_return_empty_page(page);
+       ret = readpage_async_filler(&desc, page);
 
-       new = nfs_create_request(desc->ctx, page, 0, len);
-       if (IS_ERR(new))
-               goto out_error;
+       if (!ret)
+               nfs_pageio_complete_read(&desc.pgio, inode);
 
-       if (len < PAGE_SIZE)
-               zero_user_segment(page, len, PAGE_SIZE);
-       if (!nfs_pageio_add_request(desc->pgio, new)) {
-               nfs_list_remove_request(new);
-               error = desc->pgio->pg_error;
-               nfs_readpage_release(new, error);
-               goto out;
+       ret = desc.pgio.pg_error < 0 ? desc.pgio.pg_error : 0;
+       if (!ret) {
+               ret = wait_on_page_locked_killable(page);
+               if (!PageUptodate(page) && !ret)
+                       ret = xchg(&desc.ctx->error, 0);
        }
-       return 0;
-out_error:
-       error = PTR_ERR(new);
-       unlock_page(page);
 out:
-       return error;
+       put_nfs_open_context(desc.ctx);
+       return ret;
+out_unlock:
+       unlock_page(page);
+       return ret;
 }
 
-int nfs_readpages(struct file *filp, struct address_space *mapping,
+int nfs_readpages(struct file *file, struct address_space *mapping,
                struct list_head *pages, unsigned nr_pages)
 {
-       struct nfs_pageio_descriptor pgio;
-       struct nfs_pgio_mirror *pgm;
-       struct nfs_readdesc desc = {
-               .pgio = &pgio,
-       };
+       struct nfs_readdesc desc;
        struct inode *inode = mapping->host;
-       unsigned long npages;
-       int ret = -ESTALE;
+       int ret;
 
        dprintk("NFS: nfs_readpages (%s/%Lu %d)\n",
                        inode->i_sb->s_id,
@@ -422,15 +405,17 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
                        nr_pages);
        nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
 
+       ret = -ESTALE;
        if (NFS_STALE(inode))
                goto out;
 
-       if (filp == NULL) {
+       if (file == NULL) {
+               ret = -EBADF;
                desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
                if (desc.ctx == NULL)
-                       return -EBADF;
+                       goto out;
        } else
-               desc.ctx = get_nfs_open_context(nfs_file_open_context(filp));
+               desc.ctx = get_nfs_open_context(nfs_file_open_context(file));
 
        /* attempt to read as many of the pages as possible from the cache
         * - this returns -ENOBUFS immediately if the cookie is negative
@@ -440,20 +425,13 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
        if (ret == 0)
                goto read_complete; /* all pages were read */
 
-       nfs_pageio_init_read(&pgio, inode, false,
+       nfs_pageio_init_read(&desc.pgio, inode, false,
                             &nfs_async_read_completion_ops);
 
        ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
-       nfs_pageio_complete(&pgio);
 
-       /* It doesn't make sense to do mirrored reads! */
-       WARN_ON_ONCE(pgio.pg_mirror_count != 1);
+       nfs_pageio_complete_read(&desc.pgio, inode);
 
-       pgm = &pgio.pg_mirrors[0];
-       NFS_I(inode)->read_io += pgm->pg_bytes_written;
-       npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
-                PAGE_SHIFT;
-       nfs_add_stats(inode, NFSIOS_READPAGES, npages);
 read_complete:
        put_nfs_open_context(desc.ctx);
 out:
index c7a9245..94885c6 100644 (file)
@@ -523,6 +523,13 @@ static void nfs_show_mount_options(struct seq_file *m, struct nfs_server *nfss,
                seq_puts(m, ",local_lock=flock");
        else
                seq_puts(m, ",local_lock=posix");
+
+       if (nfss->flags & NFS_MOUNT_WRITE_EAGER) {
+               if (nfss->flags & NFS_MOUNT_WRITE_WAIT)
+                       seq_puts(m, ",write=wait");
+               else
+                       seq_puts(m, ",write=eager");
+       }
 }
 
 /*
index b27ebdc..5fa11e1 100644 (file)
@@ -500,9 +500,9 @@ nfs_sillyrename(struct inode *dir, struct dentry *dentry)
                nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
                spin_lock(&inode->i_lock);
                NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
-               NFS_I(inode)->cache_validity |= NFS_INO_INVALID_CHANGE
-                       | NFS_INO_INVALID_CTIME
-                       | NFS_INO_REVAL_FORCED;
+               nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE |
+                                                    NFS_INO_INVALID_CTIME |
+                                                    NFS_INO_REVAL_FORCED);
                spin_unlock(&inode->i_lock);
                d_move(dentry, sdentry);
                break;
index 639c34f..f05a903 100644 (file)
@@ -303,9 +303,9 @@ static void nfs_set_pageerror(struct address_space *mapping)
        nfs_zap_mapping(mapping->host, mapping);
        /* Force file size revalidation */
        spin_lock(&inode->i_lock);
-       NFS_I(inode)->cache_validity |= NFS_INO_REVAL_FORCED |
-                                       NFS_INO_REVAL_PAGECACHE |
-                                       NFS_INO_INVALID_SIZE;
+       nfs_set_cache_invalid(inode, NFS_INO_REVAL_FORCED |
+                                            NFS_INO_REVAL_PAGECACHE |
+                                            NFS_INO_INVALID_SIZE);
        spin_unlock(&inode->i_lock);
 }
 
@@ -712,16 +712,23 @@ int nfs_writepages(struct address_space *mapping, struct writeback_control *wbc)
 {
        struct inode *inode = mapping->host;
        struct nfs_pageio_descriptor pgio;
-       struct nfs_io_completion *ioc;
+       struct nfs_io_completion *ioc = NULL;
+       unsigned int mntflags = NFS_SERVER(inode)->flags;
+       int priority = 0;
        int err;
 
        nfs_inc_stats(inode, NFSIOS_VFSWRITEPAGES);
 
-       ioc = nfs_io_completion_alloc(GFP_KERNEL);
-       if (ioc)
-               nfs_io_completion_init(ioc, nfs_io_completion_commit, inode);
+       if (!(mntflags & NFS_MOUNT_WRITE_EAGER) || wbc->for_kupdate ||
+           wbc->for_background || wbc->for_sync || wbc->for_reclaim) {
+               ioc = nfs_io_completion_alloc(GFP_KERNEL);
+               if (ioc)
+                       nfs_io_completion_init(ioc, nfs_io_completion_commit,
+                                              inode);
+               priority = wb_priority(wbc);
+       }
 
-       nfs_pageio_init_write(&pgio, inode, wb_priority(wbc), false,
+       nfs_pageio_init_write(&pgio, inode, priority, false,
                                &nfs_async_write_completion_ops);
        pgio.pg_io_completion = ioc;
        err = write_cache_pages(mapping, wbc, nfs_writepages_callback, &pgio);
@@ -1278,19 +1285,21 @@ bool nfs_ctx_key_to_expire(struct nfs_open_context *ctx, struct inode *inode)
  * the PageUptodate() flag. In this case, we will need to turn off
  * write optimisations that depend on the page contents being correct.
  */
-static bool nfs_write_pageuptodate(struct page *page, struct inode *inode)
+static bool nfs_write_pageuptodate(struct page *page, struct inode *inode,
+                                  unsigned int pagelen)
 {
        struct nfs_inode *nfsi = NFS_I(inode);
 
        if (nfs_have_delegated_attributes(inode))
                goto out;
-       if (nfsi->cache_validity & NFS_INO_REVAL_PAGECACHE)
+       if (nfsi->cache_validity &
+           (NFS_INO_REVAL_PAGECACHE | NFS_INO_INVALID_SIZE))
                return false;
        smp_rmb();
-       if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags))
+       if (test_bit(NFS_INO_INVALIDATING, &nfsi->flags) && pagelen != 0)
                return false;
 out:
-       if (nfsi->cache_validity & NFS_INO_INVALID_DATA)
+       if (nfsi->cache_validity & NFS_INO_INVALID_DATA && pagelen != 0)
                return false;
        return PageUptodate(page) != 0;
 }
@@ -1310,7 +1319,8 @@ is_whole_file_wrlock(struct file_lock *fl)
  * If the file is opened for synchronous writes then we can just skip the rest
  * of the checks.
  */
-static int nfs_can_extend_write(struct file *file, struct page *page, struct inode *inode)
+static int nfs_can_extend_write(struct file *file, struct page *page,
+                               struct inode *inode, unsigned int pagelen)
 {
        int ret;
        struct file_lock_context *flctx = inode->i_flctx;
@@ -1318,7 +1328,7 @@ static int nfs_can_extend_write(struct file *file, struct page *page, struct ino
 
        if (file->f_flags & O_DSYNC)
                return 0;
-       if (!nfs_write_pageuptodate(page, inode))
+       if (!nfs_write_pageuptodate(page, inode, pagelen))
                return 0;
        if (NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
                return 1;
@@ -1356,6 +1366,7 @@ int nfs_updatepage(struct file *file, struct page *page,
        struct nfs_open_context *ctx = nfs_file_open_context(file);
        struct address_space *mapping = page_file_mapping(page);
        struct inode    *inode = mapping->host;
+       unsigned int    pagelen = nfs_page_length(page);
        int             status = 0;
 
        nfs_inc_stats(inode, NFSIOS_VFSUPDATEPAGE);
@@ -1366,8 +1377,8 @@ int nfs_updatepage(struct file *file, struct page *page,
        if (!count)
                goto out;
 
-       if (nfs_can_extend_write(file, page, inode)) {
-               count = max(count + offset, nfs_page_length(page));
+       if (nfs_can_extend_write(file, page, inode, pagelen)) {
+               count = max(count + offset, pagelen);
                offset = 0;
        }
 
@@ -1593,7 +1604,7 @@ static int nfs_writeback_done(struct rpc_task *task,
        /* Deal with the suid/sgid bit corner case */
        if (nfs_should_remove_suid(inode)) {
                spin_lock(&inode->i_lock);
-               NFS_I(inode)->cache_validity |= NFS_INO_INVALID_OTHER;
+               nfs_set_cache_invalid(inode, NFS_INO_INVALID_OTHER);
                spin_unlock(&inode->i_lock);
        }
        return 0;
index 821e591..d6cff5f 100644 (file)
@@ -73,6 +73,7 @@ config NFSD_V4
        select NFSD_V3
        select FS_POSIX_ACL
        select SUNRPC_GSS
+       select CRYPTO
        select CRYPTO_MD5
        select CRYPTO_SHA256
        select GRACE_PERIOD
index 7c863f2..9421dae 100644 (file)
@@ -386,8 +386,9 @@ static struct svc_export *svc_export_update(struct svc_export *new,
                                            struct svc_export *old);
 static struct svc_export *svc_export_lookup(struct svc_export *);
 
-static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
+static int check_export(struct path *path, int *flags, unsigned char *uuid)
 {
+       struct inode *inode = d_inode(path->dentry);
 
        /*
         * We currently export only dirs, regular files, and (for v4
@@ -411,6 +412,7 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
         *       or an FSID number (so NFSEXP_FSID or ->uuid is needed).
         * 2:  We must be able to find an inode from a filehandle.
         *       This means that s_export_op must be set.
+        * 3: We must not currently be on an idmapped mount.
         */
        if (!(inode->i_sb->s_type->fs_flags & FS_REQUIRES_DEV) &&
            !(*flags & NFSEXP_FSID) &&
@@ -425,6 +427,11 @@ static int check_export(struct inode *inode, int *flags, unsigned char *uuid)
                return -EINVAL;
        }
 
+       if (mnt_user_ns(path->mnt) != &init_user_ns) {
+               dprintk("exp_export: export of idmapped mounts not yet supported.\n");
+               return -EINVAL;
+       }
+
        if (inode->i_sb->s_export_op->flags & EXPORT_OP_NOSUBTREECHK &&
            !(*flags & NFSEXP_NOSUBTREECHECK)) {
                dprintk("%s: %s does not support subtree checking!\n",
@@ -653,8 +660,7 @@ static int svc_export_parse(struct cache_detail *cd, char *mesg, int mlen)
                                goto out4;
                }
 
-               err = check_export(d_inode(exp.ex_path.dentry), &exp.ex_flags,
-                                  exp.ex_uuid);
+               err = check_export(&exp.ex_path, &exp.ex_flags, exp.ex_uuid);
                if (err)
                        goto out4;
                /*
index 53fcbf7..7629248 100644 (file)
@@ -898,6 +898,8 @@ nfsd_file_find_locked(struct inode *inode, unsigned int may_flags,
                        continue;
                if (!nfsd_match_cred(nf->nf_cred, current_cred()))
                        continue;
+               if (!test_bit(NFSD_FILE_HASHED, &nf->nf_flags))
+                       continue;
                if (nfsd_file_get(nf) != NULL)
                        return nf;
        }
index 7eeac5b..855e177 100644 (file)
@@ -113,10 +113,12 @@ static __be32 nfsacld_proc_setacl(struct svc_rqst *rqstp)
 
        fh_lock(fh);
 
-       error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+       error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS,
+                             argp->acl_access);
        if (error)
                goto out_drop_lock;
-       error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+       error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT,
+                             argp->acl_default);
        if (error)
                goto out_drop_lock;
 
index a568b84..9a6f18d 100644 (file)
@@ -103,10 +103,12 @@ static __be32 nfsd3_proc_setacl(struct svc_rqst *rqstp)
 
        fh_lock(fh);
 
-       error = set_posix_acl(inode, ACL_TYPE_ACCESS, argp->acl_access);
+       error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS,
+                             argp->acl_access);
        if (error)
                goto out_drop_lock;
-       error = set_posix_acl(inode, ACL_TYPE_DEFAULT, argp->acl_default);
+       error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_DEFAULT,
+                             argp->acl_default);
 
 out_drop_lock:
        fh_unlock(fh);
index 71292a0..eaa3a0c 100644 (file)
@@ -781,12 +781,13 @@ nfsd4_set_nfs4_acl(struct svc_rqst *rqstp, struct svc_fh *fhp,
 
        fh_lock(fhp);
 
-       host_error = set_posix_acl(inode, ACL_TYPE_ACCESS, pacl);
+       host_error = set_posix_acl(&init_user_ns, inode, ACL_TYPE_ACCESS, pacl);
        if (host_error < 0)
                goto out_drop_lock;
 
        if (S_ISDIR(inode->i_mode)) {
-               host_error = set_posix_acl(inode, ACL_TYPE_DEFAULT, dpacl);
+               host_error = set_posix_acl(&init_user_ns, inode,
+                                          ACL_TYPE_DEFAULT, dpacl);
        }
 
 out_drop_lock:
index 052be5b..7325592 100644 (file)
@@ -1189,6 +1189,7 @@ static void nfsd4_cb_done(struct rpc_task *task, void *calldata)
                switch (task->tk_status) {
                case -EIO:
                case -ETIMEDOUT:
+               case -EACCES:
                        nfsd4_mark_cb_down(clp, task->tk_status);
                }
                break;
index acdb3cd..dd9f38d 100644 (file)
@@ -1302,7 +1302,7 @@ nfsd4_cleanup_inter_ssc(struct vfsmount *ss_mnt, struct nfsd_file *src,
                        struct nfsd_file *dst)
 {
        nfs42_ssc_close(src->nf_file);
-       /* 'src' is freed by nfsd4_do_async_copy */
+       fput(src->nf_file);
        nfsd_file_put(dst);
        mntput(ss_mnt);
 }
index 186fa2c..891395c 100644 (file)
@@ -233,7 +233,7 @@ nfsd4_create_clid_dir(struct nfs4_client *clp)
                 * as well be forgiving and just succeed silently.
                 */
                goto out_put;
-       status = vfs_mkdir(d_inode(dir), dentry, S_IRWXU);
+       status = vfs_mkdir(&init_user_ns, d_inode(dir), dentry, S_IRWXU);
 out_put:
        dput(dentry);
 out_unlock:
@@ -353,7 +353,7 @@ nfsd4_unlink_clid_dir(char *name, int namlen, struct nfsd_net *nn)
        status = -ENOENT;
        if (d_really_is_negative(dentry))
                goto out;
-       status = vfs_rmdir(d_inode(dir), dentry);
+       status = vfs_rmdir(&init_user_ns, d_inode(dir), dentry);
 out:
        dput(dentry);
 out_unlock:
@@ -443,7 +443,7 @@ purge_old(struct dentry *parent, struct dentry *child, struct nfsd_net *nn)
        if (nfs4_has_reclaimed_state(name, nn))
                goto out_free;
 
-       status = vfs_rmdir(d_inode(parent), child);
+       status = vfs_rmdir(&init_user_ns, d_inode(parent), child);
        if (status)
                printk("failed to remove client recovery directory %pd\n",
                                child);
index 423fd66..97447a6 100644 (file)
@@ -4940,31 +4940,6 @@ static struct file_lock *nfs4_alloc_init_lease(struct nfs4_delegation *dp,
        return fl;
 }
 
-static int nfsd4_check_conflicting_opens(struct nfs4_client *clp,
-                                               struct nfs4_file *fp)
-{
-       struct nfs4_clnt_odstate *co;
-       struct file *f = fp->fi_deleg_file->nf_file;
-       struct inode *ino = locks_inode(f);
-       int writes = atomic_read(&ino->i_writecount);
-
-       if (fp->fi_fds[O_WRONLY])
-               writes--;
-       if (fp->fi_fds[O_RDWR])
-               writes--;
-       if (writes > 0)
-               return -EAGAIN;
-       spin_lock(&fp->fi_lock);
-       list_for_each_entry(co, &fp->fi_clnt_odstate, co_perfile) {
-               if (co->co_client != clp) {
-                       spin_unlock(&fp->fi_lock);
-                       return -EAGAIN;
-               }
-       }
-       spin_unlock(&fp->fi_lock);
-       return 0;
-}
-
 static struct nfs4_delegation *
 nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
                    struct nfs4_file *fp, struct nfs4_clnt_odstate *odstate)
@@ -4984,12 +4959,9 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
 
        nf = find_readable_file(fp);
        if (!nf) {
-               /*
-                * We probably could attempt another open and get a read
-                * delegation, but for now, don't bother until the
-                * client actually sends us one.
-                */
-               return ERR_PTR(-EAGAIN);
+               /* We should always have a readable file here */
+               WARN_ON_ONCE(1);
+               return ERR_PTR(-EBADF);
        }
        spin_lock(&state_lock);
        spin_lock(&fp->fi_lock);
@@ -5019,19 +4991,11 @@ nfs4_set_delegation(struct nfs4_client *clp, struct svc_fh *fh,
        if (!fl)
                goto out_clnt_odstate;
 
-       status = nfsd4_check_conflicting_opens(clp, fp);
-       if (status) {
-               locks_free_lock(fl);
-               goto out_clnt_odstate;
-       }
        status = vfs_setlease(fp->fi_deleg_file->nf_file, fl->fl_type, &fl, NULL);
        if (fl)
                locks_free_lock(fl);
        if (status)
                goto out_clnt_odstate;
-       status = nfsd4_check_conflicting_opens(clp, fp);
-       if (status)
-               goto out_clnt_odstate;
 
        spin_lock(&state_lock);
        spin_lock(&fp->fi_lock);
@@ -5113,6 +5077,17 @@ nfs4_open_delegation(struct svc_fh *fh, struct nfsd4_open *open,
                                goto out_no_deleg;
                        if (!cb_up || !(oo->oo_flags & NFS4_OO_CONFIRMED))
                                goto out_no_deleg;
+                       /*
+                        * Also, if the file was opened for write or
+                        * create, there's a good chance the client's
+                        * about to write to it, resulting in an
+                        * immediate recall (since we don't support
+                        * write delegations):
+                        */
+                       if (open->op_share_access & NFS4_SHARE_ACCESS_WRITE)
+                               goto out_no_deleg;
+                       if (open->op_create == NFS4_OPEN_CREATE)
+                               goto out_no_deleg;
                        break;
                default:
                        goto out_no_deleg;
@@ -5389,7 +5364,7 @@ nfs4_laundromat(struct nfsd_net *nn)
        idr_for_each_entry(&nn->s2s_cp_stateids, cps_t, i) {
                cps = container_of(cps_t, struct nfs4_cpntf_state, cp_stateid);
                if (cps->cp_stateid.sc_type == NFS4_COPYNOTIFY_STID &&
-                               cps->cpntf_time > cutoff)
+                               cps->cpntf_time < cutoff)
                        _free_cpntf_state_locked(nn, cps);
        }
        spin_unlock(&nn->s2s_cp_lock);
index 4744a27..10b4442 100644 (file)
@@ -40,7 +40,8 @@ static int nfsd_acceptable(void *expv, struct dentry *dentry)
                /* make sure parents give x permission to user */
                int err;
                parent = dget_parent(tdentry);
-               err = inode_permission(d_inode(parent), MAY_EXEC);
+               err = inode_permission(&init_user_ns,
+                                      d_inode(parent), MAY_EXEC);
                if (err < 0) {
                        dput(parent);
                        break;
index b2f8035..a8d5449 100644 (file)
@@ -90,7 +90,7 @@ nfsd_proc_setattr(struct svc_rqst *rqstp)
                if (delta < 0)
                        delta = -delta;
                if (delta < MAX_TOUCH_TIME_ERROR &&
-                   setattr_prepare(fhp->fh_dentry, iap) != 0) {
+                   setattr_prepare(&init_user_ns, fhp->fh_dentry, iap) != 0) {
                        /*
                         * Turn off ATTR_[AM]TIME_SET but leave ATTR_[AM]TIME.
                         * This will cause notify_change to set these times
index d316e11..fd6be35 100644 (file)
@@ -448,7 +448,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
                        .ia_size        = iap->ia_size,
                };
 
-               host_err = notify_change(dentry, &size_attr, NULL);
+               host_err = notify_change(&init_user_ns, dentry, &size_attr, NULL);
                if (host_err)
                        goto out_unlock;
                iap->ia_valid &= ~ATTR_SIZE;
@@ -463,7 +463,7 @@ nfsd_setattr(struct svc_rqst *rqstp, struct svc_fh *fhp, struct iattr *iap,
        }
 
        iap->ia_valid |= ATTR_CTIME;
-       host_err = notify_change(dentry, iap, NULL);
+       host_err = notify_change(&init_user_ns, dentry, iap, NULL);
 
 out_unlock:
        fh_unlock(fhp);
@@ -499,7 +499,8 @@ int nfsd4_is_junction(struct dentry *dentry)
                return 0;
        if (!(inode->i_mode & S_ISVTX))
                return 0;
-       if (vfs_getxattr(dentry, NFSD_JUNCTION_XATTR_NAME, NULL, 0) <= 0)
+       if (vfs_getxattr(&init_user_ns, dentry, NFSD_JUNCTION_XATTR_NAME,
+                        NULL, 0) <= 0)
                return 0;
        return 1;
 }
@@ -1254,12 +1255,12 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
        host_err = 0;
        switch (type) {
        case S_IFREG:
-               host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
+               host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
                if (!host_err)
                        nfsd_check_ignore_resizing(iap);
                break;
        case S_IFDIR:
-               host_err = vfs_mkdir(dirp, dchild, iap->ia_mode);
+               host_err = vfs_mkdir(&init_user_ns, dirp, dchild, iap->ia_mode);
                if (!host_err && unlikely(d_unhashed(dchild))) {
                        struct dentry *d;
                        d = lookup_one_len(dchild->d_name.name,
@@ -1287,7 +1288,8 @@ nfsd_create_locked(struct svc_rqst *rqstp, struct svc_fh *fhp,
        case S_IFBLK:
        case S_IFIFO:
        case S_IFSOCK:
-               host_err = vfs_mknod(dirp, dchild, iap->ia_mode, rdev);
+               host_err = vfs_mknod(&init_user_ns, dirp, dchild,
+                                    iap->ia_mode, rdev);
                break;
        default:
                printk(KERN_WARNING "nfsd: bad file type %o in nfsd_create\n",
@@ -1485,7 +1487,7 @@ do_nfsd_create(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (!IS_POSIXACL(dirp))
                iap->ia_mode &= ~current_umask();
 
-       host_err = vfs_create(dirp, dchild, iap->ia_mode, true);
+       host_err = vfs_create(&init_user_ns, dirp, dchild, iap->ia_mode, true);
        if (host_err < 0) {
                fh_drop_write(fhp);
                goto out_nfserr;
@@ -1609,7 +1611,7 @@ nfsd_symlink(struct svc_rqst *rqstp, struct svc_fh *fhp,
        if (IS_ERR(dnew))
                goto out_nfserr;
 
-       host_err = vfs_symlink(d_inode(dentry), dnew, path);
+       host_err = vfs_symlink(&init_user_ns, d_inode(dentry), dnew, path);
        err = nfserrno(host_err);
        if (!err)
                err = nfserrno(commit_metadata(fhp));
@@ -1677,7 +1679,7 @@ nfsd_link(struct svc_rqst *rqstp, struct svc_fh *ffhp,
        err = nfserr_noent;
        if (d_really_is_negative(dold))
                goto out_dput;
-       host_err = vfs_link(dold, dirp, dnew, NULL);
+       host_err = vfs_link(dold, &init_user_ns, dirp, dnew, NULL);
        if (!host_err) {
                err = nfserrno(commit_metadata(ffhp));
                if (!err)
@@ -1797,7 +1799,15 @@ retry:
                close_cached = true;
                goto out_dput_old;
        } else {
-               host_err = vfs_rename(fdir, odentry, tdir, ndentry, NULL, 0);
+               struct renamedata rd = {
+                       .old_mnt_userns = &init_user_ns,
+                       .old_dir        = fdir,
+                       .old_dentry     = odentry,
+                       .new_mnt_userns = &init_user_ns,
+                       .new_dir        = tdir,
+                       .new_dentry     = ndentry,
+               };
+               host_err = vfs_rename(&rd);
                if (!host_err) {
                        host_err = commit_metadata(tfhp);
                        if (!host_err)
@@ -1884,9 +1894,9 @@ nfsd_unlink(struct svc_rqst *rqstp, struct svc_fh *fhp, int type,
        if (type != S_IFDIR) {
                if (rdentry->d_sb->s_export_op->flags & EXPORT_OP_CLOSE_BEFORE_UNLINK)
                        nfsd_close_cached_files(rdentry);
-               host_err = vfs_unlink(dirp, rdentry, NULL);
+               host_err = vfs_unlink(&init_user_ns, dirp, rdentry, NULL);
        } else {
-               host_err = vfs_rmdir(dirp, rdentry);
+               host_err = vfs_rmdir(&init_user_ns, dirp, rdentry);
        }
 
        if (!host_err)
@@ -2149,7 +2159,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
 
        inode_lock_shared(inode);
 
-       len = vfs_getxattr(dentry, name, NULL, 0);
+       len = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
 
        /*
         * Zero-length attribute, just return.
@@ -2176,7 +2186,7 @@ nfsd_getxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
                goto out;
        }
 
-       len = vfs_getxattr(dentry, name, buf, len);
+       len = vfs_getxattr(&init_user_ns, dentry, name, buf, len);
        if (len <= 0) {
                kvfree(buf);
                buf = NULL;
@@ -2283,7 +2293,8 @@ nfsd_removexattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name)
 
        fh_lock(fhp);
 
-       ret = __vfs_removexattr_locked(fhp->fh_dentry, name, NULL);
+       ret = __vfs_removexattr_locked(&init_user_ns, fhp->fh_dentry,
+                                      name, NULL);
 
        fh_unlock(fhp);
        fh_drop_write(fhp);
@@ -2307,8 +2318,8 @@ nfsd_setxattr(struct svc_rqst *rqstp, struct svc_fh *fhp, char *name,
                return nfserrno(ret);
        fh_lock(fhp);
 
-       ret = __vfs_setxattr_locked(fhp->fh_dentry, name, buf, len, flags,
-                                   NULL);
+       ret = __vfs_setxattr_locked(&init_user_ns, fhp->fh_dentry, name, buf,
+                                   len, flags, NULL);
 
        fh_unlock(fhp);
        fh_drop_write(fhp);
@@ -2391,13 +2402,14 @@ nfsd_permission(struct svc_rqst *rqstp, struct svc_export *exp,
                return 0;
 
        /* This assumes  NFSD_MAY_{READ,WRITE,EXEC} == MAY_{READ,WRITE,EXEC} */
-       err = inode_permission(inode, acc & (MAY_READ|MAY_WRITE|MAY_EXEC));
+       err = inode_permission(&init_user_ns, inode,
+                              acc & (MAY_READ | MAY_WRITE | MAY_EXEC));
 
        /* Allow read access to binaries even when mode 111 */
        if (err == -EACCES && S_ISREG(inode->i_mode) &&
             (acc == (NFSD_MAY_READ | NFSD_MAY_OWNER_OVERRIDE) ||
              acc == (NFSD_MAY_READ | NFSD_MAY_READ_IF_EXEC)))
-               err = inode_permission(inode, MAY_EXEC);
+               err = inode_permission(&init_user_ns, inode, MAY_EXEC);
 
        return err? nfserrno(err) : 0;
 }
index 745d371..2e8eb26 100644 (file)
@@ -348,7 +348,7 @@ struct inode *nilfs_new_inode(struct inode *dir, umode_t mode)
        /* reference count of i_bh inherits from nilfs_mdt_read_block() */
 
        atomic64_inc(&root->inodes_count);
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_ino = ino;
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
 
@@ -805,14 +805,15 @@ void nilfs_evict_inode(struct inode *inode)
         */
 }
 
-int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
+int nilfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *iattr)
 {
        struct nilfs_transaction_info ti;
        struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        int err;
 
-       err = setattr_prepare(dentry, iattr);
+       err = setattr_prepare(&init_user_ns, dentry, iattr);
        if (err)
                return err;
 
@@ -827,7 +828,7 @@ int nilfs_setattr(struct dentry *dentry, struct iattr *iattr)
                nilfs_truncate(inode);
        }
 
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
        mark_inode_dirty(inode);
 
        if (iattr->ia_valid & ATTR_MODE) {
@@ -843,7 +844,8 @@ out_err:
        return err;
 }
 
-int nilfs_permission(struct inode *inode, int mask)
+int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                    int mask)
 {
        struct nilfs_root *root = NILFS_I(inode)->i_root;
 
@@ -851,7 +853,7 @@ int nilfs_permission(struct inode *inode, int mask)
            root->cno != NILFS_CPTREE_CURRENT_CNO)
                return -EROFS; /* snapshot is not writable */
 
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh)
index 07d26f6..b053b40 100644 (file)
@@ -132,7 +132,7 @@ static int nilfs_ioctl_setflags(struct inode *inode, struct file *filp,
        unsigned int flags, oldflags;
        int ret;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        if (get_user(flags, (int __user *)argp))
index a6ec796..ecace5f 100644 (file)
@@ -72,8 +72,8 @@ nilfs_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
  * If the create succeeds, we fill in the inode information
  * with d_instantiate().
  */
-static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                       bool excl)
+static int nilfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct inode *inode;
        struct nilfs_transaction_info ti;
@@ -100,7 +100,8 @@ static int nilfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 }
 
 static int
-nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+nilfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+           struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode *inode;
        struct nilfs_transaction_info ti;
@@ -124,8 +125,8 @@ nilfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
        return err;
 }
 
-static int nilfs_symlink(struct inode *dir, struct dentry *dentry,
-                        const char *symname)
+static int nilfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        struct nilfs_transaction_info ti;
        struct super_block *sb = dir->i_sb;
@@ -201,7 +202,8 @@ static int nilfs_link(struct dentry *old_dentry, struct inode *dir,
        return err;
 }
 
-static int nilfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int nilfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        struct nilfs_transaction_info ti;
@@ -338,8 +340,9 @@ static int nilfs_rmdir(struct inode *dir, struct dentry *dentry)
        return err;
 }
 
-static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                       struct inode *new_dir,  struct dentry *new_dentry,
+static int nilfs_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
+                       struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
        struct inode *old_inode = d_inode(old_dentry);
index f8450ee..c4a45a0 100644 (file)
@@ -267,9 +267,11 @@ extern struct inode *nilfs_iget_for_gc(struct super_block *sb,
 extern void nilfs_update_inode(struct inode *, struct buffer_head *, int);
 extern void nilfs_truncate(struct inode *);
 extern void nilfs_evict_inode(struct inode *);
-extern int nilfs_setattr(struct dentry *, struct iattr *);
+extern int nilfs_setattr(struct user_namespace *, struct dentry *,
+                        struct iattr *);
 extern void nilfs_write_failed(struct address_space *mapping, loff_t to);
-int nilfs_permission(struct inode *inode, int mask);
+int nilfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                    int mask);
 int nilfs_load_inode_block(struct inode *inode, struct buffer_head **pbh);
 extern int nilfs_inode_dirty(struct inode *);
 int nilfs_set_file_dirty(struct inode *inode, unsigned int nr_dirty);
index 1e75417..56872e9 100644 (file)
@@ -399,7 +399,7 @@ static void nilfs_segbuf_prepare_write(struct nilfs_segment_buffer *segbuf,
 {
        wi->bio = NULL;
        wi->rest_blocks = segbuf->sb_sum.nblocks;
-       wi->max_pages = BIO_MAX_PAGES;
+       wi->max_pages = BIO_MAX_VECS;
        wi->nr_vecs = min(wi->max_pages, wi->rest_blocks);
        wi->start = wi->end = 0;
        wi->blocknr = segbuf->sb_pseg_start;
index b78dd1f..9e0c1af 100644 (file)
@@ -702,7 +702,7 @@ static int fanotify_find_path(int dfd, const char __user *filename,
        }
 
        /* you can only watch an inode if you have read permissions on it */
-       ret = inode_permission(path->dentry->d_inode, MAY_READ);
+       ret = path_permission(path, MAY_READ);
        if (ret) {
                path_put(path);
                goto out;
index 266d17e..c71be4f 100644 (file)
@@ -352,7 +352,7 @@ static int inotify_find_inode(const char __user *dirname, struct path *path,
        if (error)
                return error;
        /* you can only watch an inode if you have read permissions on it */
-       error = inode_permission(path->dentry->d_inode, MAY_READ);
+       error = path_permission(path, MAY_READ);
        if (error) {
                path_put(path);
                return error;
index f7e4cbc..f5c058b 100644 (file)
@@ -629,6 +629,12 @@ static int ntfs_read_locked_inode(struct inode *vi)
        }
        a = ctx->attr;
        /* Get the standard information attribute value. */
+       if ((u8 *)a + le16_to_cpu(a->data.resident.value_offset)
+                       + le32_to_cpu(a->data.resident.value_length) >
+                       (u8 *)ctx->mrec + vol->mft_record_size) {
+               ntfs_error(vi->i_sb, "Corrupt standard information attribute in inode.");
+               goto unm_err_out;
+       }
        si = (STANDARD_INFORMATION*)((u8*)a +
                        le16_to_cpu(a->data.resident.value_offset));
 
@@ -2848,6 +2854,7 @@ void ntfs_truncate_vfs(struct inode *vi) {
 
 /**
  * ntfs_setattr - called from notify_change() when an attribute is being changed
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dentry:    dentry whose attributes to change
  * @attr:      structure describing the attributes and the changes
  *
@@ -2860,13 +2867,14 @@ void ntfs_truncate_vfs(struct inode *vi) {
  *
  * Called with ->i_mutex held.
  */
-int ntfs_setattr(struct dentry *dentry, struct iattr *attr)
+int ntfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr)
 {
        struct inode *vi = d_inode(dentry);
        int err;
        unsigned int ia_valid = attr->ia_valid;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                goto out;
        /* We do not support NTFS ACLs yet. */
index 363e4e8..6f78ee0 100644 (file)
@@ -289,7 +289,8 @@ extern int ntfs_show_options(struct seq_file *sf, struct dentry *root);
 extern int ntfs_truncate(struct inode *vi);
 extern void ntfs_truncate_vfs(struct inode *vi);
 
-extern int ntfs_setattr(struct dentry *dentry, struct iattr *attr);
+extern int ntfs_setattr(struct user_namespace *mnt_userns,
+                       struct dentry *dentry, struct iattr *attr);
 
 extern int __ntfs_write_inode(struct inode *vi, int sync);
 
index 8542276..5d4bf7a 100644 (file)
@@ -703,7 +703,7 @@ typedef struct {
 /* 14*/        le16 instance;          /* The instance of this attribute record. This
                                   number is unique within this mft record (see
                                   MFT_RECORD/next_attribute_instance notes in
-                                  in mft.h for more details). */
+                                  mft.h for more details). */
 /* 16*/        union {
                /* Resident attributes. */
                struct {
@@ -1838,7 +1838,7 @@ typedef struct {
  * Also, each security descriptor is stored twice in the $SDS stream with a
  * fixed offset of 0x40000 bytes (256kib, the Windows cache manager's max size)
  * between them; i.e. if a SDS_ENTRY specifies an offset of 0x51d0, then the
- * the first copy of the security descriptor will be at offset 0x51d0 in the
+ * first copy of the security descriptor will be at offset 0x51d0 in the
  * $SDS data stream and the second copy will be at offset 0x451d0.
  */
 typedef struct {
index 7b07f5d..5259bad 100644 (file)
@@ -262,7 +262,8 @@ static int ocfs2_set_acl(handle_t *handle,
        return ret;
 }
 
-int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                     struct posix_acl *acl, int type)
 {
        struct buffer_head *bh = NULL;
        int status, had_lock;
@@ -274,7 +275,8 @@ int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        if (type == ACL_TYPE_ACCESS && acl) {
                umode_t mode;
 
-               status = posix_acl_update_mode(inode, &mode, &acl);
+               status = posix_acl_update_mode(&init_user_ns, inode, &mode,
+                                              &acl);
                if (status)
                        goto unlock;
 
index 127b134..4e86450 100644 (file)
@@ -19,7 +19,8 @@ struct ocfs2_acl_entry {
 };
 
 struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type);
-int ocfs2_iop_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int ocfs2_iop_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                     struct posix_acl *acl, int type);
 extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
 extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
                          struct buffer_head *, struct buffer_head *,
index 0179a73..12a7590 100644 (file)
@@ -2042,7 +2042,7 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
                        o2hb_nego_timeout_handler,
                        reg, NULL, &reg->hr_handler_list);
        if (ret)
-               goto free;
+               goto remove_item;
 
        ret = o2net_register_handler(O2HB_NEGO_APPROVE_MSG, reg->hr_key,
                        sizeof(struct o2hb_nego_msg),
@@ -2057,6 +2057,12 @@ static struct config_item *o2hb_heartbeat_group_make_item(struct config_group *g
 
 unregister_handler:
        o2net_unregister_handler_list(&reg->hr_handler_list);
+remove_item:
+       spin_lock(&o2hb_live_lock);
+       list_del(&reg->hr_all_item);
+       if (o2hb_global_heartbeat_active())
+               clear_bit(reg->hr_region_num, o2hb_region_bitmap);
+       spin_unlock(&o2hb_live_lock);
 free:
        kfree(reg);
        return ERR_PTR(ret);
index 6abaded..70a1076 100644 (file)
@@ -165,16 +165,6 @@ void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
        spin_unlock(&lock->spinlock);
 }
 
-void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock)
-{
-       BUG_ON(!dlm);
-       BUG_ON(!lock);
-
-       spin_lock(&dlm->ast_lock);
-       __dlm_queue_bast(dlm, lock);
-       spin_unlock(&dlm->ast_lock);
-}
-
 static void dlm_update_lvb(struct dlm_ctxt *dlm, struct dlm_lock_resource *res,
                           struct dlm_lock *lock)
 {
index c8a4446..58d57e2 100644 (file)
 
 #define DLM_LOCKID_NAME_MAX    32
 
-#define DLM_DOMAIN_NAME_MAX_LEN    255
 #define DLM_LOCK_RES_OWNER_UNKNOWN     O2NM_MAX_NODES
-#define DLM_THREAD_SHUFFLE_INTERVAL    5     // flush everything every 5 passes
-#define DLM_THREAD_MS                  200   // flush at least every 200 ms
 
 #define DLM_HASH_SIZE_DEFAULT  (1 << 17)
 #if DLM_HASH_SIZE_DEFAULT < PAGE_SIZE
@@ -902,7 +899,6 @@ void __dlm_lockres_grab_inflight_worker(struct dlm_ctxt *dlm,
                struct dlm_lock_resource *res);
 
 void dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
-void dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void __dlm_queue_ast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void __dlm_queue_bast(struct dlm_ctxt *dlm, struct dlm_lock *lock);
 void dlm_do_local_ast(struct dlm_ctxt *dlm,
index 583820e..b2870f1 100644 (file)
@@ -190,17 +190,18 @@ static int dlmfs_file_release(struct inode *inode,
  * We do ->setattr() just to override size changes.  Our size is the size
  * of the LVB and nothing else.
  */
-static int dlmfs_file_setattr(struct dentry *dentry, struct iattr *attr)
+static int dlmfs_file_setattr(struct user_namespace *mnt_userns,
+                             struct dentry *dentry, struct iattr *attr)
 {
        int error;
        struct inode *inode = d_inode(dentry);
 
        attr->ia_valid &= ~ATTR_SIZE;
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
@@ -329,7 +330,7 @@ static struct inode *dlmfs_get_root_inode(struct super_block *sb)
 
        if (inode) {
                inode->i_ino = get_next_ino();
-               inode_init_owner(inode, NULL, mode);
+               inode_init_owner(&init_user_ns, inode, NULL, mode);
                inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
                inc_nlink(inode);
 
@@ -352,7 +353,7 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
                return NULL;
 
        inode->i_ino = get_next_ino();
-       inode_init_owner(inode, parent, mode);
+       inode_init_owner(&init_user_ns, inode, parent, mode);
        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
 
        ip = DLMFS_I(inode);
@@ -395,7 +396,8 @@ static struct inode *dlmfs_get_inode(struct inode *parent,
  * File creation. Allocate an inode, and we're done..
  */
 /* SMP-safe */
-static int dlmfs_mkdir(struct inode * dir,
+static int dlmfs_mkdir(struct user_namespace * mnt_userns,
+                      struct inode * dir,
                       struct dentry * dentry,
                       umode_t mode)
 {
@@ -443,7 +445,8 @@ bail:
        return status;
 }
 
-static int dlmfs_create(struct inode *dir,
+static int dlmfs_create(struct user_namespace *mnt_userns,
+                       struct inode *dir,
                        struct dentry *dentry,
                        umode_t mode,
                        bool excl)
index df6d709..6611c64 100644 (file)
@@ -1112,7 +1112,8 @@ out:
        return ret;
 }
 
-int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
+int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr)
 {
        int status = 0, size_change;
        int inode_locked = 0;
@@ -1142,7 +1143,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
        if (!(attr->ia_valid & OCFS2_VALID_ATTRS))
                return 0;
 
-       status = setattr_prepare(dentry, attr);
+       status = setattr_prepare(&init_user_ns, dentry, attr);
        if (status)
                return status;
 
@@ -1263,7 +1264,7 @@ int ocfs2_setattr(struct dentry *dentry, struct iattr *attr)
                }
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
 
        status = ocfs2_mark_inode_dirty(handle, inode, bh);
@@ -1298,8 +1299,8 @@ bail:
        return status;
 }
 
-int ocfs2_getattr(const struct path *path, struct kstat *stat,
-                 u32 request_mask, unsigned int flags)
+int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct super_block *sb = path->dentry->d_sb;
@@ -1313,7 +1314,7 @@ int ocfs2_getattr(const struct path *path, struct kstat *stat,
                goto bail;
        }
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        /*
         * If there is inline data in the inode, the inode will normally not
         * have data blocks allocated (it may have an external xattr block).
@@ -1330,7 +1331,8 @@ bail:
        return err;
 }
 
-int ocfs2_permission(struct inode *inode, int mask)
+int ocfs2_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                    int mask)
 {
        int ret, had_lock;
        struct ocfs2_lock_holder oh;
@@ -1355,7 +1357,7 @@ int ocfs2_permission(struct inode *inode, int mask)
                dump_stack();
        }
 
-       ret = generic_permission(inode, mask);
+       ret = generic_permission(&init_user_ns, inode, mask);
 
        ocfs2_inode_unlock_tracker(inode, 0, &oh, had_lock);
 out:
index 4832cbc..8536cec 100644 (file)
@@ -51,10 +51,13 @@ int ocfs2_extend_no_holes(struct inode *inode, struct buffer_head *di_bh,
                          u64 new_i_size, u64 zero_to);
 int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh,
                      loff_t zero_to);
-int ocfs2_setattr(struct dentry *dentry, struct iattr *attr);
-int ocfs2_getattr(const struct path *path, struct kstat *stat,
-                 u32 request_mask, unsigned int flags);
-int ocfs2_permission(struct inode *inode, int mask);
+int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr);
+int ocfs2_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, u32 request_mask, unsigned int flags);
+int ocfs2_permission(struct user_namespace *mnt_userns,
+                    struct inode *inode,
+                    int mask);
 
 int ocfs2_should_update_atime(struct inode *inode,
                              struct vfsmount *vfsmnt);
index 8998417..50c9b30 100644 (file)
@@ -96,7 +96,7 @@ static int ocfs2_set_inode_attr(struct inode *inode, unsigned flags,
        }
 
        status = -EACCES;
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                goto bail_unlock;
 
        if (!S_ISDIR(inode->i_mode))
index 2a237ab..3abdd36 100644 (file)
@@ -198,7 +198,7 @@ static struct inode *ocfs2_get_init_inode(struct inode *dir, umode_t mode)
         * callers. */
        if (S_ISDIR(mode))
                set_nlink(inode, 2);
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        status = dquot_initialize(inode);
        if (status)
                return ERR_PTR(status);
@@ -221,7 +221,8 @@ static void ocfs2_cleanup_add_entry_failure(struct ocfs2_super *osb,
        iput(inode);
 }
 
-static int ocfs2_mknod(struct inode *dir,
+static int ocfs2_mknod(struct user_namespace *mnt_userns,
+                      struct inode *dir,
                       struct dentry *dentry,
                       umode_t mode,
                       dev_t dev)
@@ -645,7 +646,8 @@ static int ocfs2_mknod_locked(struct ocfs2_super *osb,
        return status;
 }
 
-static int ocfs2_mkdir(struct inode *dir,
+static int ocfs2_mkdir(struct user_namespace *mnt_userns,
+                      struct inode *dir,
                       struct dentry *dentry,
                       umode_t mode)
 {
@@ -653,14 +655,15 @@ static int ocfs2_mkdir(struct inode *dir,
 
        trace_ocfs2_mkdir(dir, dentry, dentry->d_name.len, dentry->d_name.name,
                          OCFS2_I(dir)->ip_blkno, mode);
-       ret = ocfs2_mknod(dir, dentry, mode | S_IFDIR, 0);
+       ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
        if (ret)
                mlog_errno(ret);
 
        return ret;
 }
 
-static int ocfs2_create(struct inode *dir,
+static int ocfs2_create(struct user_namespace *mnt_userns,
+                       struct inode *dir,
                        struct dentry *dentry,
                        umode_t mode,
                        bool excl)
@@ -669,7 +672,7 @@ static int ocfs2_create(struct inode *dir,
 
        trace_ocfs2_create(dir, dentry, dentry->d_name.len, dentry->d_name.name,
                           (unsigned long long)OCFS2_I(dir)->ip_blkno, mode);
-       ret = ocfs2_mknod(dir, dentry, mode | S_IFREG, 0);
+       ret = ocfs2_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
        if (ret)
                mlog_errno(ret);
 
@@ -1195,7 +1198,8 @@ static void ocfs2_double_unlock(struct inode *inode1, struct inode *inode2)
                ocfs2_inode_unlock(inode2, 1);
 }
 
-static int ocfs2_rename(struct inode *old_dir,
+static int ocfs2_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir,
                        struct dentry *old_dentry,
                        struct inode *new_dir,
                        struct dentry *new_dentry,
@@ -1784,7 +1788,8 @@ bail:
        return status;
 }
 
-static int ocfs2_symlink(struct inode *dir,
+static int ocfs2_symlink(struct user_namespace *mnt_userns,
+                        struct inode *dir,
                         struct dentry *dentry,
                         const char *symname)
 {
index 3b397fa..c19a463 100644 (file)
@@ -978,7 +978,7 @@ static int ocfs2_get_refcount_cpos_end(struct ocfs2_caching_info *ci,
                return 0;
        }
 
-       if (!eb || (eb && !eb->h_next_leaf_blk)) {
+       if (!eb || !eb->h_next_leaf_blk) {
                /*
                 * We are the last extent rec, so any high cpos should
                 * be stored in this leaf refcount block.
@@ -4346,7 +4346,7 @@ static inline int ocfs2_may_create(struct inode *dir, struct dentry *child)
                return -EEXIST;
        if (IS_DEADDIR(dir))
                return -ENOENT;
-       return inode_permission(dir, MAY_WRITE | MAY_EXEC);
+       return inode_permission(&init_user_ns, dir, MAY_WRITE | MAY_EXEC);
 }
 
 /**
@@ -4400,7 +4400,7 @@ static int ocfs2_vfs_reflink(struct dentry *old_dentry, struct inode *dir,
         * file.
         */
        if (!preserve) {
-               error = inode_permission(inode, MAY_READ);
+               error = inode_permission(&init_user_ns, inode, MAY_READ);
                if (error)
                        return error;
        }
index 2febc76..079f882 100644 (file)
@@ -973,8 +973,6 @@ static void ocfs2_disable_quotas(struct ocfs2_super *osb)
                 * quota files */
                dquot_disable(sb, type, DQUOT_USAGE_ENABLED |
                                        DQUOT_LIMITS_ENABLED);
-               if (!inode)
-                       continue;
                iput(inode);
        }
 }
index 9ccd19d..36ae47a 100644 (file)
@@ -7249,6 +7249,7 @@ static int ocfs2_xattr_security_get(const struct xattr_handler *handler,
 }
 
 static int ocfs2_xattr_security_set(const struct xattr_handler *handler,
+                                   struct user_namespace *mnt_userns,
                                    struct dentry *unused, struct inode *inode,
                                    const char *name, const void *value,
                                    size_t size, int flags)
@@ -7321,6 +7322,7 @@ static int ocfs2_xattr_trusted_get(const struct xattr_handler *handler,
 }
 
 static int ocfs2_xattr_trusted_set(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, const void *value,
                                   size_t size, int flags)
@@ -7351,6 +7353,7 @@ static int ocfs2_xattr_user_get(const struct xattr_handler *handler,
 }
 
 static int ocfs2_xattr_user_set(const struct xattr_handler *handler,
+                               struct user_namespace *mnt_userns,
                                struct dentry *unused, struct inode *inode,
                                const char *name, const void *value,
                                size_t size, int flags)
index a0f4565..c219f91 100644 (file)
@@ -279,13 +279,14 @@ out_free_inode:
        return err;
 }
 
-static int omfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int omfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        return omfs_add_node(dir, dentry, mode | S_IFDIR);
 }
 
-static int omfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool excl)
+static int omfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
        return omfs_add_node(dir, dentry, mode | S_IFREG);
 }
@@ -369,9 +370,9 @@ static bool omfs_fill_chain(struct inode *dir, struct dir_context *ctx,
        return true;
 }
 
-static int omfs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                      struct inode *new_dir, struct dentry *new_dentry,
-                      unsigned int flags)
+static int omfs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        struct inode *new_inode = d_inode(new_dentry);
        struct inode *old_inode = d_inode(old_dentry);
index 2c7b70e..11e733a 100644 (file)
@@ -343,12 +343,13 @@ const struct file_operations omfs_file_operations = {
        .splice_read = generic_file_splice_read,
 };
 
-static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
+static int omfs_setattr(struct user_namespace *mnt_userns,
+                       struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -361,7 +362,7 @@ static int omfs_setattr(struct dentry *dentry, struct iattr *attr)
                omfs_truncate(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index ce93ccc..2a0e832 100644 (file)
@@ -48,7 +48,7 @@ struct inode *omfs_new_inode(struct inode *dir, umode_t mode)
                goto fail;
 
        inode->i_ino = new_block;
-       inode_init_owner(inode, NULL, mode);
+       inode_init_owner(&init_user_ns, inode, NULL, mode);
        inode->i_mapping->a_ops = &omfs_aops;
 
        inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
index ca54447..e53af13 100644 (file)
--- a/fs/open.c
+++ b/fs/open.c
@@ -35,8 +35,8 @@
 
 #include "internal.h"
 
-int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
-       struct file *filp)
+int do_truncate(struct user_namespace *mnt_userns, struct dentry *dentry,
+               loff_t length, unsigned int time_attrs, struct file *filp)
 {
        int ret;
        struct iattr newattrs;
@@ -61,13 +61,14 @@ int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
 
        inode_lock(dentry->d_inode);
        /* Note any delegations or leases have already been broken: */
-       ret = notify_change(dentry, &newattrs, NULL);
+       ret = notify_change(mnt_userns, dentry, &newattrs, NULL);
        inode_unlock(dentry->d_inode);
        return ret;
 }
 
 long vfs_truncate(const struct path *path, loff_t length)
 {
+       struct user_namespace *mnt_userns;
        struct inode *inode;
        long error;
 
@@ -83,7 +84,8 @@ long vfs_truncate(const struct path *path, loff_t length)
        if (error)
                goto out;
 
-       error = inode_permission(inode, MAY_WRITE);
+       mnt_userns = mnt_user_ns(path->mnt);
+       error = inode_permission(mnt_userns, inode, MAY_WRITE);
        if (error)
                goto mnt_drop_write_and_out;
 
@@ -107,7 +109,7 @@ long vfs_truncate(const struct path *path, loff_t length)
        if (!error)
                error = security_path_truncate(path);
        if (!error)
-               error = do_truncate(path->dentry, length, 0, NULL);
+               error = do_truncate(mnt_userns, path->dentry, length, 0, NULL);
 
 put_write_and_out:
        put_write_access(inode);
@@ -186,13 +188,13 @@ long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
        /* Check IS_APPEND on real upper inode */
        if (IS_APPEND(file_inode(f.file)))
                goto out_putf;
-
        sb_start_write(inode->i_sb);
        error = locks_verify_truncate(inode, f.file, length);
        if (!error)
                error = security_path_truncate(&f.file->f_path);
        if (!error)
-               error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
+               error = do_truncate(file_mnt_user_ns(f.file), dentry, length,
+                                   ATTR_MTIME | ATTR_CTIME, f.file);
        sb_end_write(inode->i_sb);
 out_putf:
        fdput(f);
@@ -436,7 +438,7 @@ retry:
                        goto out_path_release;
        }
 
-       res = inode_permission(inode, mode | MAY_ACCESS);
+       res = inode_permission(mnt_user_ns(path.mnt), inode, mode | MAY_ACCESS);
        /* SuS v2 requires we report a read only fs too */
        if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
                goto out_path_release;
@@ -492,7 +494,7 @@ retry:
        if (error)
                goto out;
 
-       error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+       error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
        if (error)
                goto dput_and_out;
 
@@ -521,7 +523,7 @@ SYSCALL_DEFINE1(fchdir, unsigned int, fd)
        if (!d_can_lookup(f.file->f_path.dentry))
                goto out_putf;
 
-       error = inode_permission(file_inode(f.file), MAY_EXEC | MAY_CHDIR);
+       error = file_permission(f.file, MAY_EXEC | MAY_CHDIR);
        if (!error)
                set_fs_pwd(current->fs, &f.file->f_path);
 out_putf:
@@ -540,7 +542,7 @@ retry:
        if (error)
                goto out;
 
-       error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
+       error = path_permission(&path, MAY_EXEC | MAY_CHDIR);
        if (error)
                goto dput_and_out;
 
@@ -580,7 +582,8 @@ retry_deleg:
                goto out_unlock;
        newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
        newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
-       error = notify_change(path->dentry, &newattrs, &delegated_inode);
+       error = notify_change(mnt_user_ns(path->mnt), path->dentry,
+                             &newattrs, &delegated_inode);
 out_unlock:
        inode_unlock(inode);
        if (delegated_inode) {
@@ -641,6 +644,7 @@ SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
 
 int chown_common(const struct path *path, uid_t user, gid_t group)
 {
+       struct user_namespace *mnt_userns;
        struct inode *inode = path->dentry->d_inode;
        struct inode *delegated_inode = NULL;
        int error;
@@ -651,6 +655,10 @@ int chown_common(const struct path *path, uid_t user, gid_t group)
        uid = make_kuid(current_user_ns(), user);
        gid = make_kgid(current_user_ns(), group);
 
+       mnt_userns = mnt_user_ns(path->mnt);
+       uid = kuid_from_mnt(mnt_userns, uid);
+       gid = kgid_from_mnt(mnt_userns, gid);
+
 retry_deleg:
        newattrs.ia_valid =  ATTR_CTIME;
        if (user != (uid_t) -1) {
@@ -671,7 +679,8 @@ retry_deleg:
        inode_lock(inode);
        error = security_path_chown(path, uid, gid);
        if (!error)
-               error = notify_change(path->dentry, &newattrs, &delegated_inode);
+               error = notify_change(mnt_userns, path->dentry, &newattrs,
+                                     &delegated_inode);
        inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
index a25e6c8..18852b9 100644 (file)
@@ -116,7 +116,8 @@ out:
        return error;
 }
 
-int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int orangefs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                    struct posix_acl *acl, int type)
 {
        int error;
        struct iattr iattr;
@@ -132,7 +133,8 @@ int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
                 * and "mode" to the new desired value. It is up to
                 * us to propagate the new mode back to the server...
                 */
-               error = posix_acl_update_mode(inode, &iattr.ia_mode, &acl);
+               error = posix_acl_update_mode(&init_user_ns, inode,
+                                             &iattr.ia_mode, &acl);
                if (error) {
                        gossip_err("%s: posix_acl_update_mode err: %d\n",
                                   __func__,
index 48f0547..5079cfa 100644 (file)
@@ -855,13 +855,13 @@ again:
                ORANGEFS_I(inode)->attr_uid = current_fsuid();
                ORANGEFS_I(inode)->attr_gid = current_fsgid();
        }
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
        spin_unlock(&inode->i_lock);
        mark_inode_dirty(inode);
 
        if (iattr->ia_valid & ATTR_MODE)
                /* change mod on a file that has ACLs */
-               ret = posix_acl_chmod(inode, inode->i_mode);
+               ret = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
 
        ret = 0;
 out:
@@ -871,12 +871,13 @@ out:
 /*
  * Change attributes of an object referenced by dentry.
  */
-int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
+int orangefs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                    struct iattr *iattr)
 {
        int ret;
        gossip_debug(GOSSIP_INODE_DEBUG, "__orangefs_setattr: called on %pd\n",
            dentry);
-       ret = setattr_prepare(dentry, iattr);
+       ret = setattr_prepare(&init_user_ns, dentry, iattr);
        if (ret)
                goto out;
        ret = __orangefs_setattr(d_inode(dentry), iattr);
@@ -890,8 +891,8 @@ out:
 /*
  * Obtain attributes of an object given a dentry
  */
-int orangefs_getattr(const struct path *path, struct kstat *stat,
-                    u32 request_mask, unsigned int flags)
+int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                    struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        int ret;
        struct inode *inode = path->dentry->d_inode;
@@ -903,7 +904,7 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
        ret = orangefs_inode_getattr(inode,
            request_mask & STATX_SIZE ? ORANGEFS_GETATTR_SIZE : 0);
        if (ret == 0) {
-               generic_fillattr(inode, stat);
+               generic_fillattr(&init_user_ns, inode, stat);
 
                /* override block size reported to stat */
                if (!(request_mask & STATX_SIZE))
@@ -919,7 +920,8 @@ int orangefs_getattr(const struct path *path, struct kstat *stat,
        return ret;
 }
 
-int orangefs_permission(struct inode *inode, int mask)
+int orangefs_permission(struct user_namespace *mnt_userns,
+                       struct inode *inode, int mask)
 {
        int ret;
 
@@ -933,7 +935,7 @@ int orangefs_permission(struct inode *inode, int mask)
        if (ret < 0)
                return ret;
 
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
 int orangefs_update_time(struct inode *inode, struct timespec64 *time, int flags)
index 3e7cf3d..600e8ee 100644 (file)
@@ -15,7 +15,8 @@
 /*
  * Get a newly allocated inode to go with a negative dentry.
  */
-static int orangefs_create(struct inode *dir,
+static int orangefs_create(struct user_namespace *mnt_userns,
+                       struct inode *dir,
                        struct dentry *dentry,
                        umode_t mode,
                        bool exclusive)
@@ -215,7 +216,8 @@ static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
        return ret;
 }
 
-static int orangefs_symlink(struct inode *dir,
+static int orangefs_symlink(struct user_namespace *mnt_userns,
+                        struct inode *dir,
                         struct dentry *dentry,
                         const char *symname)
 {
@@ -303,7 +305,8 @@ out:
        return ret;
 }
 
-static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int orangefs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode)
 {
        struct orangefs_inode_s *parent = ORANGEFS_I(dir);
        struct orangefs_kernel_op_s *new_op;
@@ -372,7 +375,8 @@ out:
        return ret;
 }
 
-static int orangefs_rename(struct inode *old_dir,
+static int orangefs_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir,
                        struct dentry *old_dentry,
                        struct inode *new_dir,
                        struct dentry *new_dentry,
index e12aeb9..0e6b976 100644 (file)
@@ -107,7 +107,9 @@ extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
 extern const struct xattr_handler *orangefs_xattr_handlers[];
 
 extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
-extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int orangefs_set_acl(struct user_namespace *mnt_userns,
+                           struct inode *inode, struct posix_acl *acl,
+                           int type);
 
 /*
  * orangefs data structures
@@ -359,12 +361,13 @@ struct inode *orangefs_new_inode(struct super_block *sb,
                              struct orangefs_object_kref *ref);
 
 int __orangefs_setattr(struct inode *, struct iattr *);
-int orangefs_setattr(struct dentry *, struct iattr *);
+int orangefs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 
-int orangefs_getattr(const struct path *path, struct kstat *stat,
-                    u32 request_mask, unsigned int flags);
+int orangefs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                    struct kstat *stat, u32 request_mask, unsigned int flags);
 
-int orangefs_permission(struct inode *inode, int mask);
+int orangefs_permission(struct user_namespace *mnt_userns,
+                       struct inode *inode, int mask);
 
 int orangefs_update_time(struct inode *, struct timespec64 *, int);
 
index bdc285a..9a5b757 100644 (file)
@@ -526,6 +526,7 @@ out_unlock:
 }
 
 static int orangefs_xattr_set_default(const struct xattr_handler *handler,
+                                     struct user_namespace *mnt_userns,
                                      struct dentry *unused,
                                      struct inode *inode,
                                      const char *name,
index 0fed532..0b2891c 100644 (file)
@@ -93,9 +93,9 @@ int ovl_copy_xattr(struct super_block *sb, struct dentry *old,
                        continue; /* Discard */
                }
 retry:
-               size = vfs_getxattr(old, name, value, value_size);
+               size = vfs_getxattr(&init_user_ns, old, name, value, value_size);
                if (size == -ERANGE)
-                       size = vfs_getxattr(old, name, NULL, 0);
+                       size = vfs_getxattr(&init_user_ns, old, name, NULL, 0);
 
                if (size < 0) {
                        error = size;
@@ -115,7 +115,7 @@ retry:
                        goto retry;
                }
 
-               error = vfs_setxattr(new, name, value, size, 0);
+               error = vfs_setxattr(&init_user_ns, new, name, value, size, 0);
                if (error) {
                        if (error != -EOPNOTSUPP || ovl_must_copy_xattr(name))
                                break;
@@ -236,7 +236,7 @@ static int ovl_set_size(struct dentry *upperdentry, struct kstat *stat)
                .ia_size = stat->size,
        };
 
-       return notify_change(upperdentry, &attr, NULL);
+       return notify_change(&init_user_ns, upperdentry, &attr, NULL);
 }
 
 static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
@@ -248,7 +248,7 @@ static int ovl_set_timestamps(struct dentry *upperdentry, struct kstat *stat)
                .ia_mtime = stat->mtime,
        };
 
-       return notify_change(upperdentry, &attr, NULL);
+       return notify_change(&init_user_ns, upperdentry, &attr, NULL);
 }
 
 int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
@@ -260,7 +260,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
                        .ia_valid = ATTR_MODE,
                        .ia_mode = stat->mode,
                };
-               err = notify_change(upperdentry, &attr, NULL);
+               err = notify_change(&init_user_ns, upperdentry, &attr, NULL);
        }
        if (!err) {
                struct iattr attr = {
@@ -268,7 +268,7 @@ int ovl_set_attr(struct dentry *upperdentry, struct kstat *stat)
                        .ia_uid = stat->uid,
                        .ia_gid = stat->gid,
                };
-               err = notify_change(upperdentry, &attr, NULL);
+               err = notify_change(&init_user_ns, upperdentry, &attr, NULL);
        }
        if (!err)
                ovl_set_timestamps(upperdentry, stat);
@@ -796,7 +796,7 @@ static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
        ssize_t res;
        char *buf;
 
-       res = vfs_getxattr(dentry, name, NULL, 0);
+       res = vfs_getxattr(&init_user_ns, dentry, name, NULL, 0);
        if (res == -ENODATA || res == -EOPNOTSUPP)
                res = 0;
 
@@ -805,7 +805,7 @@ static ssize_t ovl_getxattr(struct dentry *dentry, char *name, char **value)
                if (!buf)
                        return -ENOMEM;
 
-               res = vfs_getxattr(dentry, name, buf, res);
+               res = vfs_getxattr(&init_user_ns, dentry, name, buf, res);
                if (res < 0)
                        kfree(buf);
                else
@@ -847,8 +847,8 @@ static int ovl_copy_up_meta_inode_data(struct ovl_copy_up_ctx *c)
         * don't want that to happen for normal copy-up operation.
         */
        if (capability) {
-               err = vfs_setxattr(upperpath.dentry, XATTR_NAME_CAPS,
-                                  capability, cap_size, 0);
+               err = vfs_setxattr(&init_user_ns, upperpath.dentry,
+                                  XATTR_NAME_CAPS, capability, cap_size, 0);
                if (err)
                        goto out_free;
        }
index d1efa3a..836f14b 100644 (file)
@@ -449,7 +449,7 @@ static int ovl_set_upper_acl(struct dentry *upperdentry, const char *name,
        if (err < 0)
                goto out_free;
 
-       err = vfs_setxattr(upperdentry, name, buffer, size, XATTR_CREATE);
+       err = vfs_setxattr(&init_user_ns, upperdentry, name, buffer, size, XATTR_CREATE);
 out_free:
        kfree(buffer);
        return err;
@@ -508,7 +508,7 @@ static int ovl_create_over_whiteout(struct dentry *dentry, struct inode *inode,
                        .ia_mode = cattr->mode,
                };
                inode_lock(newdentry->d_inode);
-               err = notify_change(newdentry, &attr, NULL);
+               err = notify_change(&init_user_ns, newdentry, &attr, NULL);
                inode_unlock(newdentry->d_inode);
                if (err)
                        goto out_cleanup;
@@ -636,7 +636,7 @@ static int ovl_create_object(struct dentry *dentry, int mode, dev_t rdev,
        inode->i_state |= I_CREATING;
        spin_unlock(&inode->i_lock);
 
-       inode_init_owner(inode, dentry->d_parent->d_inode, mode);
+       inode_init_owner(&init_user_ns, inode, dentry->d_parent->d_inode, mode);
        attr.mode = inode->i_mode;
 
        err = ovl_create_or_link(dentry, inode, &attr, false);
@@ -650,19 +650,20 @@ out:
        return err;
 }
 
-static int ovl_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                     bool excl)
+static int ovl_create(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, bool excl)
 {
        return ovl_create_object(dentry, (mode & 07777) | S_IFREG, 0, NULL);
 }
 
-static int ovl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ovl_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode)
 {
        return ovl_create_object(dentry, (mode & 07777) | S_IFDIR, 0, NULL);
 }
 
-static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
-                    dev_t rdev)
+static int ovl_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        /* Don't allow creation of "whiteout" on overlay */
        if (S_ISCHR(mode) && rdev == WHITEOUT_DEV)
@@ -671,8 +672,8 @@ static int ovl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
        return ovl_create_object(dentry, mode, rdev, NULL);
 }
 
-static int ovl_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *link)
+static int ovl_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, const char *link)
 {
        return ovl_create_object(dentry, S_IFLNK, 0, link);
 }
@@ -821,9 +822,9 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir,
                goto out_dput_upper;
 
        if (is_dir)
-               err = vfs_rmdir(dir, upper);
+               err = vfs_rmdir(&init_user_ns, dir, upper);
        else
-               err = vfs_unlink(dir, upper, NULL);
+               err = vfs_unlink(&init_user_ns, dir, upper, NULL);
        ovl_dir_modified(dentry->d_parent, ovl_type_origin(dentry));
 
        /*
@@ -1069,9 +1070,9 @@ static int ovl_set_redirect(struct dentry *dentry, bool samedir)
        return err;
 }
 
-static int ovl_rename(struct inode *olddir, struct dentry *old,
-                     struct inode *newdir, struct dentry *new,
-                     unsigned int flags)
+static int ovl_rename(struct user_namespace *mnt_userns, struct inode *olddir,
+                     struct dentry *old, struct inode *newdir,
+                     struct dentry *new, unsigned int flags)
 {
        int err;
        struct dentry *old_upperdir;
index 077d3ad..dbfb35f 100644 (file)
@@ -50,11 +50,11 @@ static struct file *ovl_open_realfile(const struct file *file,
                acc_mode |= MAY_APPEND;
 
        old_cred = ovl_override_creds(inode->i_sb);
-       err = inode_permission(realinode, MAY_OPEN | acc_mode);
+       err = inode_permission(&init_user_ns, realinode, MAY_OPEN | acc_mode);
        if (err) {
                realfile = ERR_PTR(err);
        } else {
-               if (!inode_owner_or_capable(realinode))
+               if (!inode_owner_or_capable(&init_user_ns, realinode))
                        flags &= ~O_NOATIME;
 
                realfile = open_with_fake_path(&file->f_path, flags, realinode,
@@ -521,7 +521,7 @@ static long ovl_ioctl_set_flags(struct file *file, unsigned int cmd,
        long ret;
        struct inode *inode = file_inode(file);
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                return -EACCES;
 
        ret = mnt_want_write_file(file);
index cf41bcb..003cf83 100644 (file)
 #include "overlayfs.h"
 
 
-int ovl_setattr(struct dentry *dentry, struct iattr *attr)
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct iattr *attr)
 {
        int err;
        bool full_copy_up = false;
        struct dentry *upperdentry;
        const struct cred *old_cred;
 
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
@@ -79,7 +80,7 @@ int ovl_setattr(struct dentry *dentry, struct iattr *attr)
 
                inode_lock(upperdentry->d_inode);
                old_cred = ovl_override_creds(dentry->d_sb);
-               err = notify_change(upperdentry, attr, NULL);
+               err = notify_change(&init_user_ns, upperdentry, attr, NULL);
                revert_creds(old_cred);
                if (!err)
                        ovl_copyattr(upperdentry->d_inode, dentry->d_inode);
@@ -154,8 +155,8 @@ static int ovl_map_dev_ino(struct dentry *dentry, struct kstat *stat, int fsid)
        return 0;
 }
 
-int ovl_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int flags)
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+               struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
        enum ovl_path_type type;
@@ -277,7 +278,8 @@ out:
        return err;
 }
 
-int ovl_permission(struct inode *inode, int mask)
+int ovl_permission(struct user_namespace *mnt_userns,
+                  struct inode *inode, int mask)
 {
        struct inode *upperinode = ovl_inode_upper(inode);
        struct inode *realinode = upperinode ?: ovl_inode_lower(inode);
@@ -294,7 +296,7 @@ int ovl_permission(struct inode *inode, int mask)
         * Check overlay inode with the creds of task and underlying inode
         * with creds of mounter
         */
-       err = generic_permission(inode, mask);
+       err = generic_permission(&init_user_ns, inode, mask);
        if (err)
                return err;
 
@@ -305,7 +307,7 @@ int ovl_permission(struct inode *inode, int mask)
                /* Make sure mounter can read file for copy up later */
                mask |= MAY_READ;
        }
-       err = inode_permission(realinode, mask);
+       err = inode_permission(&init_user_ns, realinode, mask);
        revert_creds(old_cred);
 
        return err;
@@ -353,7 +355,7 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
 
        if (!value && !upperdentry) {
                old_cred = ovl_override_creds(dentry->d_sb);
-               err = vfs_getxattr(realdentry, name, NULL, 0);
+               err = vfs_getxattr(&init_user_ns, realdentry, name, NULL, 0);
                revert_creds(old_cred);
                if (err < 0)
                        goto out_drop_write;
@@ -369,10 +371,11 @@ int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
 
        old_cred = ovl_override_creds(dentry->d_sb);
        if (value)
-               err = vfs_setxattr(realdentry, name, value, size, flags);
+               err = vfs_setxattr(&init_user_ns, realdentry, name, value, size,
+                                  flags);
        else {
                WARN_ON(flags != XATTR_REPLACE);
-               err = vfs_removexattr(realdentry, name);
+               err = vfs_removexattr(&init_user_ns, realdentry, name);
        }
        revert_creds(old_cred);
 
@@ -394,7 +397,7 @@ int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
                ovl_i_dentry_upper(inode) ?: ovl_dentry_lower(dentry);
 
        old_cred = ovl_override_creds(dentry->d_sb);
-       res = vfs_getxattr(realdentry, name, value, size);
+       res = vfs_getxattr(&init_user_ns, realdentry, name, value, size);
        revert_creds(old_cred);
        return res;
 }
index cb4e2d6..95cff83 100644 (file)
@@ -123,7 +123,7 @@ static inline const char *ovl_xattr(struct ovl_fs *ofs, enum ovl_xattr ox)
 
 static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
 {
-       int err = vfs_rmdir(dir, dentry);
+       int err = vfs_rmdir(&init_user_ns, dir, dentry);
 
        pr_debug("rmdir(%pd2) = %i\n", dentry, err);
        return err;
@@ -131,7 +131,7 @@ static inline int ovl_do_rmdir(struct inode *dir, struct dentry *dentry)
 
 static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
 {
-       int err = vfs_unlink(dir, dentry, NULL);
+       int err = vfs_unlink(&init_user_ns, dir, dentry, NULL);
 
        pr_debug("unlink(%pd2) = %i\n", dentry, err);
        return err;
@@ -140,7 +140,7 @@ static inline int ovl_do_unlink(struct inode *dir, struct dentry *dentry)
 static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
                              struct dentry *new_dentry)
 {
-       int err = vfs_link(old_dentry, dir, new_dentry, NULL);
+       int err = vfs_link(old_dentry, &init_user_ns, dir, new_dentry, NULL);
 
        pr_debug("link(%pd2, %pd2) = %i\n", old_dentry, new_dentry, err);
        return err;
@@ -149,7 +149,7 @@ static inline int ovl_do_link(struct dentry *old_dentry, struct inode *dir,
 static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
                                umode_t mode)
 {
-       int err = vfs_create(dir, dentry, mode, true);
+       int err = vfs_create(&init_user_ns, dir, dentry, mode, true);
 
        pr_debug("create(%pd2, 0%o) = %i\n", dentry, mode, err);
        return err;
@@ -158,7 +158,7 @@ static inline int ovl_do_create(struct inode *dir, struct dentry *dentry,
 static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
                               umode_t mode)
 {
-       int err = vfs_mkdir(dir, dentry, mode);
+       int err = vfs_mkdir(&init_user_ns, dir, dentry, mode);
        pr_debug("mkdir(%pd2, 0%o) = %i\n", dentry, mode, err);
        return err;
 }
@@ -166,7 +166,7 @@ static inline int ovl_do_mkdir(struct inode *dir, struct dentry *dentry,
 static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
                               umode_t mode, dev_t dev)
 {
-       int err = vfs_mknod(dir, dentry, mode, dev);
+       int err = vfs_mknod(&init_user_ns, dir, dentry, mode, dev);
 
        pr_debug("mknod(%pd2, 0%o, 0%o) = %i\n", dentry, mode, dev, err);
        return err;
@@ -175,7 +175,7 @@ static inline int ovl_do_mknod(struct inode *dir, struct dentry *dentry,
 static inline int ovl_do_symlink(struct inode *dir, struct dentry *dentry,
                                 const char *oldname)
 {
-       int err = vfs_symlink(dir, dentry, oldname);
+       int err = vfs_symlink(&init_user_ns, dir, dentry, oldname);
 
        pr_debug("symlink(\"%s\", %pd2) = %i\n", oldname, dentry, err);
        return err;
@@ -186,7 +186,7 @@ static inline ssize_t ovl_do_getxattr(struct ovl_fs *ofs, struct dentry *dentry,
                                      size_t size)
 {
        const char *name = ovl_xattr(ofs, ox);
-       return vfs_getxattr(dentry, name, value, size);
+       return vfs_getxattr(&init_user_ns, dentry, name, value, size);
 }
 
 static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
@@ -194,7 +194,7 @@ static inline int ovl_do_setxattr(struct ovl_fs *ofs, struct dentry *dentry,
                                  size_t size)
 {
        const char *name = ovl_xattr(ofs, ox);
-       int err = vfs_setxattr(dentry, name, value, size, 0);
+       int err = vfs_setxattr(&init_user_ns, dentry, name, value, size, 0);
        pr_debug("setxattr(%pd2, \"%s\", \"%*pE\", %zu, 0) = %i\n",
                 dentry, name, min((int)size, 48), value, size, err);
        return err;
@@ -204,7 +204,7 @@ static inline int ovl_do_removexattr(struct ovl_fs *ofs, struct dentry *dentry,
                                     enum ovl_xattr ox)
 {
        const char *name = ovl_xattr(ofs, ox);
-       int err = vfs_removexattr(dentry, name);
+       int err = vfs_removexattr(&init_user_ns, dentry, name);
        pr_debug("removexattr(%pd2, \"%s\") = %i\n", dentry, name, err);
        return err;
 }
@@ -214,9 +214,18 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
                                unsigned int flags)
 {
        int err;
+       struct renamedata rd = {
+               .old_mnt_userns = &init_user_ns,
+               .old_dir        = olddir,
+               .old_dentry     = olddentry,
+               .new_mnt_userns = &init_user_ns,
+               .new_dir        = newdir,
+               .new_dentry     = newdentry,
+               .flags          = flags,
+       };
 
        pr_debug("rename(%pd2, %pd2, 0x%x)\n", olddentry, newdentry, flags);
-       err = vfs_rename(olddir, olddentry, newdir, newdentry, NULL, flags);
+       err = vfs_rename(&rd);
        if (err) {
                pr_debug("...rename(%pd2, %pd2, ...) = %i\n",
                         olddentry, newdentry, err);
@@ -226,14 +235,14 @@ static inline int ovl_do_rename(struct inode *olddir, struct dentry *olddentry,
 
 static inline int ovl_do_whiteout(struct inode *dir, struct dentry *dentry)
 {
-       int err = vfs_whiteout(dir, dentry);
+       int err = vfs_whiteout(&init_user_ns, dir, dentry);
        pr_debug("whiteout(%pd2) = %i\n", dentry, err);
        return err;
 }
 
 static inline struct dentry *ovl_do_tmpfile(struct dentry *dentry, umode_t mode)
 {
-       struct dentry *ret = vfs_tmpfile(dentry, mode, 0);
+       struct dentry *ret = vfs_tmpfile(&init_user_ns, dentry, mode, 0);
        int err = PTR_ERR_OR_ZERO(ret);
 
        pr_debug("tmpfile(%pd2, 0%o) = %i\n", dentry, mode, err);
@@ -436,10 +445,12 @@ int ovl_set_nlink_lower(struct dentry *dentry);
 unsigned int ovl_get_nlink(struct ovl_fs *ofs, struct dentry *lowerdentry,
                           struct dentry *upperdentry,
                           unsigned int fallback);
-int ovl_setattr(struct dentry *dentry, struct iattr *attr);
-int ovl_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int flags);
-int ovl_permission(struct inode *inode, int mask);
+int ovl_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct iattr *attr);
+int ovl_getattr(struct user_namespace *mnt_userns, const struct path *path,
+               struct kstat *stat, u32 request_mask, unsigned int flags);
+int ovl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                  int mask);
 int ovl_xattr_set(struct dentry *dentry, struct inode *inode, const char *name,
                  const void *value, size_t size, int flags);
 int ovl_xattr_get(struct dentry *dentry, struct inode *inode, const char *name,
index d58b8f2..fdd72f1 100644 (file)
@@ -803,17 +803,19 @@ retry:
                 * allowed as upper are limited to "normal" ones, where checking
                 * for the above two errors is sufficient.
                 */
-               err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_DEFAULT);
+               err = vfs_removexattr(&init_user_ns, work,
+                                     XATTR_NAME_POSIX_ACL_DEFAULT);
                if (err && err != -ENODATA && err != -EOPNOTSUPP)
                        goto out_dput;
 
-               err = vfs_removexattr(work, XATTR_NAME_POSIX_ACL_ACCESS);
+               err = vfs_removexattr(&init_user_ns, work,
+                                     XATTR_NAME_POSIX_ACL_ACCESS);
                if (err && err != -ENODATA && err != -EOPNOTSUPP)
                        goto out_dput;
 
                /* Clear any inherited mode bits */
                inode_lock(work->d_inode);
-               err = notify_change(work, &attr, NULL);
+               err = notify_change(&init_user_ns, work, &attr, NULL);
                inode_unlock(work->d_inode);
                if (err)
                        goto out_dput;
@@ -865,6 +867,10 @@ static int ovl_mount_dir_noesc(const char *name, struct path *path)
                pr_err("filesystem on '%s' not supported\n", name);
                goto out_put;
        }
+       if (mnt_user_ns(path->mnt) != &init_user_ns) {
+               pr_err("idmapped layers are currently not supported\n");
+               goto out_put;
+       }
        if (!d_is_dir(path->dentry)) {
                pr_err("'%s' not a directory\n", name);
                goto out_put;
@@ -989,6 +995,7 @@ ovl_posix_acl_xattr_get(const struct xattr_handler *handler,
 
 static int __maybe_unused
 ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
+                       struct user_namespace *mnt_userns,
                        struct dentry *dentry, struct inode *inode,
                        const char *name, const void *value,
                        size_t size, int flags)
@@ -1014,7 +1021,7 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
                goto out_acl_release;
        }
        err = -EPERM;
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(&init_user_ns, inode))
                goto out_acl_release;
 
        posix_acl_release(acl);
@@ -1026,10 +1033,10 @@ ovl_posix_acl_xattr_set(const struct xattr_handler *handler,
        if (unlikely(inode->i_mode & S_ISGID) &&
            handler->flags == ACL_TYPE_ACCESS &&
            !in_group_p(inode->i_gid) &&
-           !capable_wrt_inode_uidgid(inode, CAP_FSETID)) {
+           !capable_wrt_inode_uidgid(&init_user_ns, inode, CAP_FSETID)) {
                struct iattr iattr = { .ia_valid = ATTR_KILL_SGID };
 
-               err = ovl_setattr(dentry, &iattr);
+               err = ovl_setattr(&init_user_ns, dentry, &iattr);
                if (err)
                        return err;
        }
@@ -1053,6 +1060,7 @@ static int ovl_own_xattr_get(const struct xattr_handler *handler,
 }
 
 static int ovl_own_xattr_set(const struct xattr_handler *handler,
+                            struct user_namespace *mnt_userns,
                             struct dentry *dentry, struct inode *inode,
                             const char *name, const void *value,
                             size_t size, int flags)
@@ -1068,6 +1076,7 @@ static int ovl_other_xattr_get(const struct xattr_handler *handler,
 }
 
 static int ovl_other_xattr_set(const struct xattr_handler *handler,
+                              struct user_namespace *mnt_userns,
                               struct dentry *dentry, struct inode *inode,
                               const char *name, const void *value,
                               size_t size, int flags)
index 9826b00..7f5a01a 100644 (file)
@@ -479,12 +479,12 @@ struct file *ovl_path_open(struct path *path, int flags)
                BUG();
        }
 
-       err = inode_permission(inode, acc_mode | MAY_OPEN);
+       err = inode_permission(&init_user_ns, inode, acc_mode | MAY_OPEN);
        if (err)
                return ERR_PTR(err);
 
        /* O_NOATIME is an optimization, don't fail if not permitted */
-       if (inode_owner_or_capable(inode))
+       if (inode_owner_or_capable(&init_user_ns, inode))
                flags |= O_NOATIME;
 
        return dentry_open(path, flags, current_cred());
index 39c9684..bfd946a 100644 (file)
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL(generic_pipe_buf_try_steal);
  *
  * Description:
  *     This function grabs an extra reference to @buf. It's used in
- *     in the tee() system call, when we duplicate the buffers in one
+ *     the tee() system call, when we duplicate the buffers in one
  *     pipe into another.
  */
 bool generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
index 26f74e0..988f1aa 100644 (file)
@@ -12,7 +12,7 @@
 
 #define IS_MNT_SHARED(m) ((m)->mnt.mnt_flags & MNT_SHARED)
 #define IS_MNT_SLAVE(m) ((m)->mnt_master)
-#define IS_MNT_NEW(m)  (!(m)->mnt_ns)
+#define IS_MNT_NEW(m)  (!(m)->mnt_ns || is_anon_ns((m)->mnt_ns))
 #define CLEAR_MNT_SHARED(m) ((m)->mnt.mnt_flags &= ~MNT_SHARED)
 #define IS_MNT_UNBINDABLE(m) ((m)->mnt.mnt_flags & MNT_UNBINDABLE)
 #define IS_MNT_MARKED(m) ((m)->mnt.mnt_flags & MNT_MARKED)
index 95882b3..f3309a7 100644 (file)
@@ -345,10 +345,13 @@ EXPORT_SYMBOL(posix_acl_from_mode);
  * by the acl. Returns -E... otherwise.
  */
 int
-posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
+posix_acl_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                    const struct posix_acl *acl, int want)
 {
        const struct posix_acl_entry *pa, *pe, *mask_obj;
        int found = 0;
+       kuid_t uid;
+       kgid_t gid;
 
        want &= MAY_READ | MAY_WRITE | MAY_EXEC;
 
@@ -356,22 +359,26 @@ posix_acl_permission(struct inode *inode, const struct posix_acl *acl, int want)
                 switch(pa->e_tag) {
                         case ACL_USER_OBJ:
                                /* (May have been checked already) */
-                               if (uid_eq(inode->i_uid, current_fsuid()))
+                               uid = i_uid_into_mnt(mnt_userns, inode);
+                               if (uid_eq(uid, current_fsuid()))
                                         goto check_perm;
                                 break;
                         case ACL_USER:
-                               if (uid_eq(pa->e_uid, current_fsuid()))
+                               uid = kuid_into_mnt(mnt_userns, pa->e_uid);
+                               if (uid_eq(uid, current_fsuid()))
                                         goto mask;
                                break;
                         case ACL_GROUP_OBJ:
-                                if (in_group_p(inode->i_gid)) {
+                               gid = i_gid_into_mnt(mnt_userns, inode);
+                               if (in_group_p(gid)) {
                                        found = 1;
                                        if ((pa->e_perm & want) == want)
                                                goto mask;
                                 }
                                break;
                         case ACL_GROUP:
-                               if (in_group_p(pa->e_gid)) {
+                               gid = kgid_into_mnt(mnt_userns, pa->e_gid);
+                               if (in_group_p(gid)) {
                                        found = 1;
                                        if ((pa->e_perm & want) == want)
                                                goto mask;
@@ -551,8 +558,22 @@ __posix_acl_chmod(struct posix_acl **acl, gfp_t gfp, umode_t mode)
 }
 EXPORT_SYMBOL(__posix_acl_chmod);
 
+/**
+ * posix_acl_chmod - chmod a posix acl
+ *
+ * @mnt_userns:        user namespace of the mount @inode was found from
+ * @inode:     inode to check permissions on
+ * @mode:      the new mode of @inode
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
 int
-posix_acl_chmod(struct inode *inode, umode_t mode)
+ posix_acl_chmod(struct user_namespace *mnt_userns, struct inode *inode,
+                   umode_t mode)
 {
        struct posix_acl *acl;
        int ret = 0;
@@ -572,7 +593,7 @@ posix_acl_chmod(struct inode *inode, umode_t mode)
        ret = __posix_acl_chmod(&acl, GFP_KERNEL, mode);
        if (ret)
                return ret;
-       ret = inode->i_op->set_acl(inode, acl, ACL_TYPE_ACCESS);
+       ret = inode->i_op->set_acl(mnt_userns, inode, acl, ACL_TYPE_ACCESS);
        posix_acl_release(acl);
        return ret;
 }
@@ -631,9 +652,10 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
 
 /**
  * posix_acl_update_mode  -  update mode in set_acl
- * @inode: target inode
- * @mode_p: mode (pointer) for update
- * @acl: acl pointer
+ * @mnt_userns:        user namespace of the mount @inode was found from
+ * @inode:     target inode
+ * @mode_p:    mode (pointer) for update
+ * @acl:       acl pointer
  *
  * Update the file mode when setting an ACL: compute the new file permission
  * bits based on the ACL.  In addition, if the ACL is equivalent to the new
@@ -642,9 +664,16 @@ EXPORT_SYMBOL_GPL(posix_acl_create);
  * As with chmod, clear the setgid bit if the caller is not in the owning group
  * or capable of CAP_FSETID (see inode_change_ok).
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
  * Called from set_acl inode operations.
  */
-int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
+int posix_acl_update_mode(struct user_namespace *mnt_userns,
+                         struct inode *inode, umode_t *mode_p,
                          struct posix_acl **acl)
 {
        umode_t mode = inode->i_mode;
@@ -655,8 +684,8 @@ int posix_acl_update_mode(struct inode *inode, umode_t *mode_p,
                return error;
        if (error == 0)
                *acl = NULL;
-       if (!in_group_p(inode->i_gid) &&
-           !capable_wrt_inode_uidgid(inode, CAP_FSETID))
+       if (!in_group_p(i_gid_into_mnt(mnt_userns, inode)) &&
+           !capable_wrt_inode_uidgid(mnt_userns, inode, CAP_FSETID))
                mode &= ~S_ISGID;
        *mode_p = mode;
        return 0;
@@ -668,7 +697,8 @@ EXPORT_SYMBOL(posix_acl_update_mode);
  */
 static void posix_acl_fix_xattr_userns(
        struct user_namespace *to, struct user_namespace *from,
-       void *value, size_t size)
+       struct user_namespace *mnt_userns,
+       void *value, size_t size, bool from_user)
 {
        struct posix_acl_xattr_header *header = value;
        struct posix_acl_xattr_entry *entry = (void *)(header + 1), *end;
@@ -693,10 +723,18 @@ static void posix_acl_fix_xattr_userns(
                switch(le16_to_cpu(entry->e_tag)) {
                case ACL_USER:
                        uid = make_kuid(from, le32_to_cpu(entry->e_id));
+                       if (from_user)
+                               uid = kuid_from_mnt(mnt_userns, uid);
+                       else
+                               uid = kuid_into_mnt(mnt_userns, uid);
                        entry->e_id = cpu_to_le32(from_kuid(to, uid));
                        break;
                case ACL_GROUP:
                        gid = make_kgid(from, le32_to_cpu(entry->e_id));
+                       if (from_user)
+                               gid = kgid_from_mnt(mnt_userns, gid);
+                       else
+                               gid = kgid_into_mnt(mnt_userns, gid);
                        entry->e_id = cpu_to_le32(from_kgid(to, gid));
                        break;
                default:
@@ -705,20 +743,24 @@ static void posix_acl_fix_xattr_userns(
        }
 }
 
-void posix_acl_fix_xattr_from_user(void *value, size_t size)
+void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+                                  void *value, size_t size)
 {
        struct user_namespace *user_ns = current_user_ns();
-       if (user_ns == &init_user_ns)
+       if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
                return;
-       posix_acl_fix_xattr_userns(&init_user_ns, user_ns, value, size);
+       posix_acl_fix_xattr_userns(&init_user_ns, user_ns, mnt_userns, value,
+                                  size, true);
 }
 
-void posix_acl_fix_xattr_to_user(void *value, size_t size)
+void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+                                void *value, size_t size)
 {
        struct user_namespace *user_ns = current_user_ns();
-       if (user_ns == &init_user_ns)
+       if ((user_ns == &init_user_ns) && (mnt_userns == &init_user_ns))
                return;
-       posix_acl_fix_xattr_userns(user_ns, &init_user_ns, value, size);
+       posix_acl_fix_xattr_userns(user_ns, &init_user_ns, mnt_userns, value,
+                                  size, false);
 }
 
 /*
@@ -858,7 +900,8 @@ posix_acl_xattr_get(const struct xattr_handler *handler,
 }
 
 int
-set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
+set_posix_acl(struct user_namespace *mnt_userns, struct inode *inode,
+             int type, struct posix_acl *acl)
 {
        if (!IS_POSIXACL(inode))
                return -EOPNOTSUPP;
@@ -867,7 +910,7 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
 
        if (type == ACL_TYPE_DEFAULT && !S_ISDIR(inode->i_mode))
                return acl ? -EACCES : 0;
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(mnt_userns, inode))
                return -EPERM;
 
        if (acl) {
@@ -875,15 +918,16 @@ set_posix_acl(struct inode *inode, int type, struct posix_acl *acl)
                if (ret)
                        return ret;
        }
-       return inode->i_op->set_acl(inode, acl, type);
+       return inode->i_op->set_acl(mnt_userns, inode, acl, type);
 }
 EXPORT_SYMBOL(set_posix_acl);
 
 static int
 posix_acl_xattr_set(const struct xattr_handler *handler,
-                   struct dentry *unused, struct inode *inode,
-                   const char *name, const void *value,
-                   size_t size, int flags)
+                          struct user_namespace *mnt_userns,
+                          struct dentry *unused, struct inode *inode,
+                          const char *name, const void *value, size_t size,
+                          int flags)
 {
        struct posix_acl *acl = NULL;
        int ret;
@@ -893,7 +937,7 @@ posix_acl_xattr_set(const struct xattr_handler *handler,
                if (IS_ERR(acl))
                        return PTR_ERR(acl);
        }
-       ret = set_posix_acl(inode, handler->flags, acl);
+       ret = set_posix_acl(mnt_userns, inode, handler->flags, acl);
        posix_acl_release(acl);
        return ret;
 }
@@ -922,12 +966,13 @@ const struct xattr_handler posix_acl_default_xattr_handler = {
 };
 EXPORT_SYMBOL_GPL(posix_acl_default_xattr_handler);
 
-int simple_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+int simple_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                  struct posix_acl *acl, int type)
 {
        int error;
 
        if (type == ACL_TYPE_ACCESS) {
-               error = posix_acl_update_mode(inode,
+               error = posix_acl_update_mode(mnt_userns, inode,
                                &inode->i_mode, &acl);
                if (error)
                        return error;
index b3422cd..3851bfc 100644 (file)
@@ -67,7 +67,6 @@
 #include <linux/mm.h>
 #include <linux/swap.h>
 #include <linux/rcupdate.h>
-#include <linux/kallsyms.h>
 #include <linux/stacktrace.h>
 #include <linux/resource.h>
 #include <linux/module.h>
@@ -386,19 +385,17 @@ static int proc_pid_wchan(struct seq_file *m, struct pid_namespace *ns,
                          struct pid *pid, struct task_struct *task)
 {
        unsigned long wchan;
-       char symname[KSYM_NAME_LEN];
 
-       if (!ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
-               goto print0;
+       if (ptrace_may_access(task, PTRACE_MODE_READ_FSCREDS))
+               wchan = get_wchan(task);
+       else
+               wchan = 0;
 
-       wchan = get_wchan(task);
-       if (wchan && !lookup_symbol_name(wchan, symname)) {
-               seq_puts(m, symname);
-               return 0;
-       }
+       if (wchan)
+               seq_printf(m, "%ps", (void *) wchan);
+       else
+               seq_putc(m, '0');
 
-print0:
-       seq_putc(m, '0');
        return 0;
 }
 #endif /* CONFIG_KALLSYMS */
@@ -685,7 +682,8 @@ static int proc_fd_access_allowed(struct inode *inode)
        return allowed;
 }
 
-int proc_setattr(struct dentry *dentry, struct iattr *attr)
+int proc_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                struct iattr *attr)
 {
        int error;
        struct inode *inode = d_inode(dentry);
@@ -693,11 +691,11 @@ int proc_setattr(struct dentry *dentry, struct iattr *attr)
        if (attr->ia_valid & ATTR_MODE)
                return -EPERM;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
@@ -726,7 +724,8 @@ static bool has_pid_permissions(struct proc_fs_info *fs_info,
 }
 
 
-static int proc_pid_permission(struct inode *inode, int mask)
+static int proc_pid_permission(struct user_namespace *mnt_userns,
+                              struct inode *inode, int mask)
 {
        struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
        struct task_struct *task;
@@ -751,7 +750,7 @@ static int proc_pid_permission(struct inode *inode, int mask)
 
                return -EPERM;
        }
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
 
@@ -1927,14 +1926,14 @@ out_unlock:
        return NULL;
 }
 
-int pid_getattr(const struct path *path, struct kstat *stat,
-               u32 request_mask, unsigned int query_flags)
+int pid_getattr(struct user_namespace *mnt_userns, const struct path *path,
+               struct kstat *stat, u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct proc_fs_info *fs_info = proc_sb_info(inode->i_sb);
        struct task_struct *task;
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        stat->uid = GLOBAL_ROOT_UID;
        stat->gid = GLOBAL_ROOT_GID;
@@ -3473,7 +3472,8 @@ int proc_pid_readdir(struct file *file, struct dir_context *ctx)
  * This function makes sure that the node is always accessible for members of
  * same thread group.
  */
-static int proc_tid_comm_permission(struct inode *inode, int mask)
+static int proc_tid_comm_permission(struct user_namespace *mnt_userns,
+                                   struct inode *inode, int mask)
 {
        bool is_same_tgroup;
        struct task_struct *task;
@@ -3492,7 +3492,7 @@ static int proc_tid_comm_permission(struct inode *inode, int mask)
                return 0;
        }
 
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
 static const struct inode_operations proc_tid_comm_inode_operations = {
@@ -3798,12 +3798,13 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx)
        return 0;
 }
 
-static int proc_task_getattr(const struct path *path, struct kstat *stat,
+static int proc_task_getattr(struct user_namespace *mnt_userns,
+                            const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
        struct task_struct *p = get_proc_task(inode);
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        if (p) {
                stat->nlink += get_nr_threads(p);
index cb51763..07fc4fa 100644 (file)
@@ -276,12 +276,13 @@ static struct dentry *proc_lookupfd(struct inode *dir, struct dentry *dentry,
  * /proc/pid/fd needs a special permission handler so that a process can still
  * access /proc/self/fd after it has executed a setuid().
  */
-int proc_fd_permission(struct inode *inode, int mask)
+int proc_fd_permission(struct user_namespace *mnt_userns,
+                      struct inode *inode, int mask)
 {
        struct task_struct *p;
        int rv;
 
-       rv = generic_permission(inode, mask);
+       rv = generic_permission(&init_user_ns, inode, mask);
        if (rv == 0)
                return rv;
 
index f371a60..c5a921a 100644 (file)
@@ -10,7 +10,8 @@ extern const struct inode_operations proc_fd_inode_operations;
 extern const struct file_operations proc_fdinfo_operations;
 extern const struct inode_operations proc_fdinfo_inode_operations;
 
-extern int proc_fd_permission(struct inode *inode, int mask);
+extern int proc_fd_permission(struct user_namespace *mnt_userns,
+                             struct inode *inode, int mask);
 
 static inline unsigned int proc_fd(struct inode *inode)
 {
index 6c0a05f..bc86aa8 100644 (file)
@@ -115,17 +115,18 @@ static bool pde_subdir_insert(struct proc_dir_entry *dir,
        return true;
 }
 
-static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
+static int proc_notify_change(struct user_namespace *mnt_userns,
+                             struct dentry *dentry, struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        struct proc_dir_entry *de = PDE(inode);
        int error;
 
-       error = setattr_prepare(dentry, iattr);
+       error = setattr_prepare(&init_user_ns, dentry, iattr);
        if (error)
                return error;
 
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
        mark_inode_dirty(inode);
 
        proc_set_user(de, inode->i_uid, inode->i_gid);
@@ -133,7 +134,8 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr)
        return 0;
 }
 
-static int proc_getattr(const struct path *path, struct kstat *stat,
+static int proc_getattr(struct user_namespace *mnt_userns,
+                       const struct path *path, struct kstat *stat,
                        u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
@@ -145,7 +147,7 @@ static int proc_getattr(const struct path *path, struct kstat *stat,
                }
        }
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        return 0;
 }
 
index f60b379..03415f3 100644 (file)
@@ -162,8 +162,10 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *,
  * base.c
  */
 extern const struct dentry_operations pid_dentry_operations;
-extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int);
-extern int proc_setattr(struct dentry *, struct iattr *);
+extern int pid_getattr(struct user_namespace *, const struct path *,
+                      struct kstat *, u32, unsigned int);
+extern int proc_setattr(struct user_namespace *, struct dentry *,
+                       struct iattr *);
 extern void proc_pid_evict_inode(struct proc_inode *);
 extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t);
 extern void pid_update_inode(struct task_struct *, struct inode *);
index d6fc746..6fa761c 100644 (file)
@@ -129,15 +129,15 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
        show_val_kb(m, "AnonHugePages:  ",
-                   global_node_page_state(NR_ANON_THPS) * HPAGE_PMD_NR);
+                   global_node_page_state(NR_ANON_THPS));
        show_val_kb(m, "ShmemHugePages: ",
-                   global_node_page_state(NR_SHMEM_THPS) * HPAGE_PMD_NR);
+                   global_node_page_state(NR_SHMEM_THPS));
        show_val_kb(m, "ShmemPmdMapped: ",
-                   global_node_page_state(NR_SHMEM_PMDMAPPED) * HPAGE_PMD_NR);
+                   global_node_page_state(NR_SHMEM_PMDMAPPED));
        show_val_kb(m, "FileHugePages:  ",
-                   global_node_page_state(NR_FILE_THPS) * HPAGE_PMD_NR);
+                   global_node_page_state(NR_FILE_THPS));
        show_val_kb(m, "FilePmdMapped:  ",
-                   global_node_page_state(NR_FILE_PMDMAPPED) * HPAGE_PMD_NR);
+                   global_node_page_state(NR_FILE_PMDMAPPED));
 #endif
 
 #ifdef CONFIG_CMA
index 1860104..15c2e55 100644 (file)
@@ -289,7 +289,8 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir,
        return de;
 }
 
-static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
+static int proc_tgid_net_getattr(struct user_namespace *mnt_userns,
+                                const struct path *path, struct kstat *stat,
                                 u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
@@ -297,7 +298,7 @@ static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat,
 
        net = get_proc_task_net(inode);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        if (net != NULL) {
                stat->nlink = net->proc_net->nlink;
index d2018f7..984e42f 100644 (file)
@@ -571,7 +571,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
        error = -ENOMEM;
        if (count >= KMALLOC_MAX_SIZE)
                goto out;
-       kbuf = kzalloc(count + 1, GFP_KERNEL);
+       kbuf = kvzalloc(count + 1, GFP_KERNEL);
        if (!kbuf)
                goto out;
 
@@ -600,7 +600,7 @@ static ssize_t proc_sys_call_handler(struct kiocb *iocb, struct iov_iter *iter,
 
        error = count;
 out_free_buf:
-       kfree(kbuf);
+       kvfree(kbuf);
 out:
        sysctl_head_finish(head);
 
@@ -785,7 +785,8 @@ out:
        return 0;
 }
 
-static int proc_sys_permission(struct inode *inode, int mask)
+static int proc_sys_permission(struct user_namespace *mnt_userns,
+                              struct inode *inode, int mask)
 {
        /*
         * sysctl entries that are not writeable,
@@ -813,7 +814,8 @@ static int proc_sys_permission(struct inode *inode, int mask)
        return error;
 }
 
-static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
+static int proc_sys_setattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
@@ -821,16 +823,17 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr)
        if (attr->ia_valid & (ATTR_MODE | ATTR_UID | ATTR_GID))
                return -EPERM;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
 
-static int proc_sys_getattr(const struct path *path, struct kstat *stat,
+static int proc_sys_getattr(struct user_namespace *mnt_userns,
+                           const struct path *path, struct kstat *stat,
                            u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = d_inode(path->dentry);
@@ -840,7 +843,7 @@ static int proc_sys_getattr(const struct path *path, struct kstat *stat,
        if (IS_ERR(head))
                return PTR_ERR(head);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        if (table)
                stat->mode = (stat->mode & S_IFMT) | table->mode;
 
index 5e444d4..c7e3b13 100644 (file)
@@ -308,10 +308,11 @@ void __init proc_root_init(void)
        register_filesystem(&proc_fs_type);
 }
 
-static int proc_root_getattr(const struct path *path, struct kstat *stat,
+static int proc_root_getattr(struct user_namespace *mnt_userns,
+                            const struct path *path, struct kstat *stat,
                             u32 request_mask, unsigned int query_flags)
 {
-       generic_fillattr(d_inode(path->dentry), stat);
+       generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
        stat->nlink = proc_root.nlink + nr_processes();
        return 0;
 }
index a401215..72cd69b 100644 (file)
@@ -16,13 +16,6 @@ static const char *proc_self_get_link(struct dentry *dentry,
        pid_t tgid = task_tgid_nr_ns(current, ns);
        char *name;
 
-       /*
-        * Not currently supported. Once we can inherit all of struct pid,
-        * we can allow this.
-        */
-       if (current->flags & PF_IO_WORKER)
-               return ERR_PTR(-EOPNOTSUPP);
-
        if (!tgid)
                return ERR_PTR(-ENOENT);
        /* max length of unsigned int in decimal + NULL term */
index 3cec6fb..e862cab 100644 (file)
@@ -1036,8 +1036,6 @@ struct clear_refs_private {
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
 
-#define is_cow_mapping(flags) (((flags) & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE)
-
 static inline bool pte_is_pinned(struct vm_area_struct *vma, unsigned long addr, pte_t pte)
 {
        struct page *page;
index d56681d..a553273 100644 (file)
@@ -17,13 +17,6 @@ static const char *proc_thread_self_get_link(struct dentry *dentry,
        pid_t pid = task_pid_nr_ns(current, ns);
        char *name;
 
-       /*
-        * Not currently supported. Once we can inherit all of struct pid,
-        * we can allow this.
-        */
-       if (current->flags & PF_IO_WORKER)
-               return ERR_PTR(-EOPNOTSUPP);
-
        if (!pid)
                return ERR_PTR(-ENOENT);
        name = kmalloc(10 + 6 + 10 + 1, dentry ? GFP_KERNEL : GFP_ATOMIC);
index c3a345c..9a15334 100644 (file)
@@ -1503,11 +1503,8 @@ int vmcore_add_device_dump(struct vmcoredd_data *data)
        return 0;
 
 out_err:
-       if (buf)
-               vfree(buf);
-
-       if (dump)
-               vfree(dump);
+       vfree(buf);
+       vfree(dump);
 
        return ret;
 }
index eafb757..392ef51 100644 (file)
@@ -79,6 +79,9 @@ static void show_mnt_opts(struct seq_file *m, struct vfsmount *mnt)
                if (mnt->mnt_flags & fs_infop->flag)
                        seq_puts(m, fs_infop->str);
        }
+
+       if (mnt_user_ns(mnt) != &init_user_ns)
+               seq_puts(m, ",idmapped");
 }
 
 static inline void mangle(struct seq_file *m, const char *s)
index 93a217e..14658b0 100644 (file)
@@ -467,7 +467,7 @@ static struct dentry *pstore_mount(struct file_system_type *fs_type,
 static void pstore_kill_sb(struct super_block *sb)
 {
        mutex_lock(&pstore_sb_lock);
-       WARN_ON(pstore_sb != sb);
+       WARN_ON(pstore_sb && pstore_sb != sb);
 
        kill_litter_super(sb);
        pstore_sb = NULL;
index aa8e0b6..fff363b 100644 (file)
@@ -246,7 +246,7 @@ static int persistent_ram_init_ecc(struct persistent_ram_zone *prz,
                pr_info("error in header, %d\n", numerr);
                prz->corrected_bytes += numerr;
        } else if (numerr < 0) {
-               pr_info("uncorrectable error in header\n");
+               pr_info_ratelimited("uncorrectable error in header\n");
                prz->bad_blocks++;
        }
 
index 355523f..ba3525c 100644 (file)
@@ -22,7 +22,7 @@
 #include <linux/uaccess.h>
 #include "internal.h"
 
-static int ramfs_nommu_setattr(struct dentry *, struct iattr *);
+static int ramfs_nommu_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 static unsigned long ramfs_nommu_get_unmapped_area(struct file *file,
                                                   unsigned long addr,
                                                   unsigned long len,
@@ -158,14 +158,15 @@ static int ramfs_nommu_resize(struct inode *inode, loff_t newsize, loff_t size)
  * handle a change of attributes
  * - we're specifically interested in a change of size
  */
-static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
+static int ramfs_nommu_setattr(struct user_namespace *mnt_userns,
+                              struct dentry *dentry, struct iattr *ia)
 {
        struct inode *inode = d_inode(dentry);
        unsigned int old_ia_valid = ia->ia_valid;
        int ret = 0;
 
        /* POSIX UID/GID verification for setting inode attributes */
-       ret = setattr_prepare(dentry, ia);
+       ret = setattr_prepare(&init_user_ns, dentry, ia);
        if (ret)
                return ret;
 
@@ -185,7 +186,7 @@ static int ramfs_nommu_setattr(struct dentry *dentry, struct iattr *ia)
                }
        }
 
-       setattr_copy(inode, ia);
+       setattr_copy(&init_user_ns, inode, ia);
  out:
        ia->ia_valid = old_ia_valid;
        return ret;
index ee179a8..9ebd17d 100644 (file)
@@ -67,7 +67,7 @@ struct inode *ramfs_get_inode(struct super_block *sb,
 
        if (inode) {
                inode->i_ino = get_next_ino();
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(&init_user_ns, inode, dir, mode);
                inode->i_mapping->a_ops = &ramfs_aops;
                mapping_set_gfp_mask(inode->i_mapping, GFP_HIGHUSER);
                mapping_set_unevictable(inode->i_mapping);
@@ -101,7 +101,8 @@ struct inode *ramfs_get_inode(struct super_block *sb,
  */
 /* SMP-safe */
 static int
-ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+ramfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+           struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode * inode = ramfs_get_inode(dir->i_sb, dir, mode, dev);
        int error = -ENOSPC;
@@ -115,20 +116,23 @@ ramfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
        return error;
 }
 
-static int ramfs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
+static int ramfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
-       int retval = ramfs_mknod(dir, dentry, mode | S_IFDIR, 0);
+       int retval = ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFDIR, 0);
        if (!retval)
                inc_nlink(dir);
        return retval;
 }
 
-static int ramfs_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool excl)
+static int ramfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
-       return ramfs_mknod(dir, dentry, mode | S_IFREG, 0);
+       return ramfs_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
 }
 
-static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char * symname)
+static int ramfs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        struct inode *inode;
        int error = -ENOSPC;
@@ -147,6 +151,18 @@ static int ramfs_symlink(struct inode * dir, struct dentry *dentry, const char *
        return error;
 }
 
+static int ramfs_tmpfile(struct user_namespace *mnt_userns,
+                        struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+       struct inode *inode;
+
+       inode = ramfs_get_inode(dir->i_sb, dir, mode, 0);
+       if (!inode)
+               return -ENOSPC;
+       d_tmpfile(dentry, inode);
+       return 0;
+}
+
 static const struct inode_operations ramfs_dir_inode_operations = {
        .create         = ramfs_create,
        .lookup         = simple_lookup,
@@ -157,6 +173,7 @@ static const struct inode_operations ramfs_dir_inode_operations = {
        .rmdir          = simple_rmdir,
        .mknod          = ramfs_mknod,
        .rename         = simple_rename,
+       .tmpfile        = ramfs_tmpfile,
 };
 
 /*
index 0c1c847..fd58618 100644 (file)
@@ -49,7 +49,8 @@ static inline int reiserfs_acl_count(size_t size)
 
 #ifdef CONFIG_REISERFS_FS_POSIX_ACL
 struct posix_acl *reiserfs_get_acl(struct inode *inode, int type);
-int reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+int reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                    struct posix_acl *acl, int type);
 int reiserfs_acl_chmod(struct inode *inode);
 int reiserfs_inherit_default_acl(struct reiserfs_transaction_handle *th,
                                 struct inode *dir, struct dentry *dentry,
index c76d563..780bb90 100644 (file)
@@ -3282,13 +3282,14 @@ static ssize_t reiserfs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
        return ret;
 }
 
-int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
+int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                    struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        unsigned int ia_valid;
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -3413,7 +3414,7 @@ int reiserfs_setattr(struct dentry *dentry, struct iattr *attr)
        }
 
        if (!error) {
-               setattr_copy(inode, attr);
+               setattr_copy(&init_user_ns, inode, attr);
                mark_inode_dirty(inode);
        }
 
index adb21be..4f1cbd9 100644 (file)
@@ -59,7 +59,7 @@ long reiserfs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
                        if (err)
                                break;
 
-                       if (!inode_owner_or_capable(inode)) {
+                       if (!inode_owner_or_capable(&init_user_ns, inode)) {
                                err = -EPERM;
                                goto setflags_out;
                        }
@@ -101,7 +101,7 @@ setflags_out:
                err = put_user(inode->i_generation, (int __user *)arg);
                break;
        case REISERFS_IOC_SETVERSION:
-               if (!inode_owner_or_capable(inode)) {
+               if (!inode_owner_or_capable(&init_user_ns, inode)) {
                        err = -EPERM;
                        break;
                }
index 1594687..e6eb05e 100644 (file)
@@ -615,12 +615,12 @@ static int new_inode_init(struct inode *inode, struct inode *dir, umode_t mode)
         * the quota init calls have to know who to charge the quota to, so
         * we have to set uid and gid here
         */
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        return dquot_initialize(inode);
 }
 
-static int reiserfs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                          bool excl)
+static int reiserfs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                          struct dentry *dentry, umode_t mode, bool excl)
 {
        int retval;
        struct inode *inode;
@@ -698,8 +698,8 @@ out_failed:
        return retval;
 }
 
-static int reiserfs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
-                         dev_t rdev)
+static int reiserfs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        int retval;
        struct inode *inode;
@@ -781,7 +781,8 @@ out_failed:
        return retval;
 }
 
-static int reiserfs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int reiserfs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                         struct dentry *dentry, umode_t mode)
 {
        int retval;
        struct inode *inode;
@@ -1094,8 +1095,9 @@ out_unlink:
        return retval;
 }
 
-static int reiserfs_symlink(struct inode *parent_dir,
-                           struct dentry *dentry, const char *symname)
+static int reiserfs_symlink(struct user_namespace *mnt_userns,
+                           struct inode *parent_dir, struct dentry *dentry,
+                           const char *symname)
 {
        int retval;
        struct inode *inode;
@@ -1304,7 +1306,8 @@ static void set_ino_in_dir_entry(struct reiserfs_dir_entry *de,
  * one path. If it holds 2 or more, it can get into endless waiting in
  * get_empty_nodes or its clones
  */
-static int reiserfs_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int reiserfs_rename(struct user_namespace *mnt_userns,
+                          struct inode *old_dir, struct dentry *old_dentry,
                           struct inode *new_dir, struct dentry *new_dentry,
                           unsigned int flags)
 {
index f698715..0ca2ac6 100644 (file)
@@ -3102,7 +3102,8 @@ static inline void reiserfs_update_sd(struct reiserfs_transaction_handle *th,
 }
 
 void sd_attrs_to_i_attrs(__u16 sd_attrs, struct inode *inode);
-int reiserfs_setattr(struct dentry *dentry, struct iattr *attr);
+int reiserfs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                    struct iattr *attr);
 
 int __reiserfs_write_begin(struct page *page, unsigned from, unsigned len);
 
index fe63a7c..bd07383 100644 (file)
 static int xattr_create(struct inode *dir, struct dentry *dentry, int mode)
 {
        BUG_ON(!inode_is_locked(dir));
-       return dir->i_op->create(dir, dentry, mode, true);
+       return dir->i_op->create(&init_user_ns, dir, dentry, mode, true);
 }
 #endif
 
 static int xattr_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
 {
        BUG_ON(!inode_is_locked(dir));
-       return dir->i_op->mkdir(dir, dentry, mode);
+       return dir->i_op->mkdir(&init_user_ns, dir, dentry, mode);
 }
 
 /*
@@ -352,7 +352,7 @@ static int chown_one_xattr(struct dentry *dentry, void *data)
         * ATTR_MODE is set.
         */
        attrs->ia_valid &= (ATTR_UID|ATTR_GID);
-       err = reiserfs_setattr(dentry, attrs);
+       err = reiserfs_setattr(&init_user_ns, dentry, attrs);
        attrs->ia_valid = ia_valid;
 
        return err;
@@ -604,7 +604,7 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
                inode_lock_nested(d_inode(dentry), I_MUTEX_XATTR);
                inode_dio_wait(d_inode(dentry));
 
-               err = reiserfs_setattr(dentry, &newattrs);
+               err = reiserfs_setattr(&init_user_ns, dentry, &newattrs);
                inode_unlock(d_inode(dentry));
        } else
                update_ctime(inode);
@@ -948,7 +948,8 @@ static int xattr_mount_check(struct super_block *s)
        return 0;
 }
 
-int reiserfs_permission(struct inode *inode, int mask)
+int reiserfs_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                       int mask)
 {
        /*
         * We don't do permission checks on the internal objects.
@@ -957,7 +958,7 @@ int reiserfs_permission(struct inode *inode, int mask)
        if (IS_PRIVATE(inode))
                return 0;
 
-       return generic_permission(inode, mask);
+       return generic_permission(&init_user_ns, inode, mask);
 }
 
 static int xattr_hide_revalidate(struct dentry *dentry, unsigned int flags)
index c764352..e47fde1 100644 (file)
@@ -16,7 +16,8 @@ int reiserfs_xattr_init(struct super_block *sb, int mount_flags);
 int reiserfs_lookup_privroot(struct super_block *sb);
 int reiserfs_delete_xattrs(struct inode *inode);
 int reiserfs_chown_xattrs(struct inode *inode, struct iattr *attrs);
-int reiserfs_permission(struct inode *inode, int mask);
+int reiserfs_permission(struct user_namespace *mnt_userns,
+                       struct inode *inode, int mask);
 
 #ifdef CONFIG_REISERFS_FS_XATTR
 #define has_xattr_dir(inode) (REISERFS_I(inode)->i_flags & i_has_xattr_dir)
@@ -43,7 +44,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec);
 
 static inline int reiserfs_xattrs_initialized(struct super_block *sb)
 {
-       return REISERFS_SB(sb)->priv_root != NULL;
+       return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root;
 }
 
 #define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header))
index ccd40df..a954714 100644 (file)
@@ -18,7 +18,8 @@ static int __reiserfs_set_acl(struct reiserfs_transaction_handle *th,
 
 
 int
-reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+reiserfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                struct posix_acl *acl, int type)
 {
        int error, error2;
        struct reiserfs_transaction_handle th;
@@ -40,7 +41,8 @@ reiserfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
        reiserfs_write_unlock(inode->i_sb);
        if (error == 0) {
                if (type == ACL_TYPE_ACCESS && acl) {
-                       error = posix_acl_update_mode(inode, &mode, &acl);
+                       error = posix_acl_update_mode(&init_user_ns, inode,
+                                                     &mode, &acl);
                        if (error)
                                goto unlock;
                        update_mode = 1;
@@ -399,5 +401,5 @@ int reiserfs_acl_chmod(struct inode *inode)
            !reiserfs_posixacl(inode->i_sb))
                return 0;
 
-       return posix_acl_chmod(inode, inode->i_mode);
+       return posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
 }
index 20be9a0..8965c8e 100644 (file)
@@ -21,7 +21,8 @@ security_get(const struct xattr_handler *handler, struct dentry *unused,
 }
 
 static int
-security_set(const struct xattr_handler *handler, struct dentry *unused,
+security_set(const struct xattr_handler *handler,
+            struct user_namespace *mnt_userns, struct dentry *unused,
             struct inode *inode, const char *name, const void *buffer,
             size_t size, int flags)
 {
index 5ed48da..d853cea 100644 (file)
@@ -20,7 +20,8 @@ trusted_get(const struct xattr_handler *handler, struct dentry *unused,
 }
 
 static int
-trusted_set(const struct xattr_handler *handler, struct dentry *unused,
+trusted_set(const struct xattr_handler *handler,
+           struct user_namespace *mnt_userns, struct dentry *unused,
            struct inode *inode, const char *name, const void *buffer,
            size_t size, int flags)
 {
index a573ca4..65d9cd1 100644 (file)
@@ -18,7 +18,8 @@ user_get(const struct xattr_handler *handler, struct dentry *unused,
 }
 
 static int
-user_set(const struct xattr_handler *handler, struct dentry *unused,
+user_set(const struct xattr_handler *handler, struct user_namespace *mnt_userns,
+        struct dentry *unused,
         struct inode *inode, const char *name, const void *buffer,
         size_t size, int flags)
 {
index 77dba3a..e4a5fdd 100644 (file)
@@ -432,13 +432,16 @@ EXPORT_SYMBOL(vfs_clone_file_range);
 /* Check whether we are allowed to dedupe the destination file */
 static bool allow_file_dedupe(struct file *file)
 {
+       struct user_namespace *mnt_userns = file_mnt_user_ns(file);
+       struct inode *inode = file_inode(file);
+
        if (capable(CAP_SYS_ADMIN))
                return true;
        if (file->f_mode & FMODE_WRITE)
                return true;
-       if (uid_eq(current_fsuid(), file_inode(file)->i_uid))
+       if (uid_eq(current_fsuid(), i_uid_into_mnt(mnt_userns, inode)))
                return true;
-       if (!inode_permission(file_inode(file), MAY_WRITE))
+       if (!inode_permission(mnt_userns, inode, MAY_WRITE))
                return true;
        return false;
 }
index 37aaa83..945896d 100644 (file)
@@ -1055,10 +1055,9 @@ static long do_restart_poll(struct restart_block *restart_block)
 
        ret = do_sys_poll(ufds, nfds, to);
 
-       if (ret == -ERESTARTNOHAND) {
-               restart_block->fn = do_restart_poll;
-               ret = -ERESTART_RESTARTBLOCK;
-       }
+       if (ret == -ERESTARTNOHAND)
+               ret = set_restart_fn(restart_block, do_restart_poll);
+
        return ret;
 }
 
@@ -1080,7 +1079,6 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
                struct restart_block *restart_block;
 
                restart_block = &current->restart_block;
-               restart_block->fn = do_restart_poll;
                restart_block->poll.ufds = ufds;
                restart_block->poll.nfds = nfds;
 
@@ -1091,7 +1089,7 @@ SYSCALL_DEFINE3(poll, struct pollfd __user *, ufds, unsigned int, nfds,
                } else
                        restart_block->poll.has_timeout = 0;
 
-               ret = -ERESTART_RESTARTBLOCK;
+               ret = set_restart_fn(restart_block, do_restart_poll);
        }
        return ret;
 }
index 45f4442..b9e87eb 100644 (file)
@@ -87,7 +87,7 @@ static int squashfs_bio_read(struct super_block *sb, u64 index, int length,
        int error, i;
        struct bio *bio;
 
-       if (page_count <= BIO_MAX_PAGES)
+       if (page_count <= BIO_MAX_VECS)
                bio = bio_alloc(GFP_NOIO, page_count);
        else
                bio = bio_kmalloc(GFP_NOIO, page_count);
index eb02072..7237637 100644 (file)
@@ -152,14 +152,18 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb,
                start = le64_to_cpu(table[n]);
                end = le64_to_cpu(table[n + 1]);
 
-               if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+               if (start >= end
+                   || (end - start) >
+                   (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
                        kfree(table);
                        return ERR_PTR(-EINVAL);
                }
        }
 
        start = le64_to_cpu(table[indexes - 1]);
-       if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) {
+       if (start >= lookup_table_start ||
+           (lookup_table_start - start) >
+           (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
                kfree(table);
                return ERR_PTR(-EINVAL);
        }
index 11581bf..ea53876 100644 (file)
@@ -97,14 +97,16 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb,
                start = le64_to_cpu(table[n]);
                end = le64_to_cpu(table[n + 1]);
 
-               if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+               if (start >= end || (end - start) >
+                               (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
                        kfree(table);
                        return ERR_PTR(-EINVAL);
                }
        }
 
        start = le64_to_cpu(table[indexes - 1]);
-       if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) {
+       if (start >= id_table_start || (id_table_start - start) >
+                               (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
                kfree(table);
                return ERR_PTR(-EINVAL);
        }
index 8d64edb..b3fdc82 100644 (file)
@@ -17,6 +17,7 @@
 
 /* size of metadata (inode and directory) blocks */
 #define SQUASHFS_METADATA_SIZE         8192
+#define SQUASHFS_BLOCK_OFFSET          2
 
 /* default size of block device I/O */
 #ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE
index ead6667..087cab8 100644 (file)
@@ -109,14 +109,16 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start,
                start = le64_to_cpu(table[n]);
                end = le64_to_cpu(table[n + 1]);
 
-               if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) {
+               if (start >= end || (end - start) >
+                               (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
                        kfree(table);
                        return ERR_PTR(-EINVAL);
                }
        }
 
        start = le64_to_cpu(table[indexes - 1]);
-       if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) {
+       if (start >= table_start || (table_start - start) >
+                               (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) {
                kfree(table);
                return ERR_PTR(-EINVAL);
        }
index dacecdd..fbc171d 100644 (file)
--- a/fs/stat.c
+++ b/fs/stat.c
 
 /**
  * generic_fillattr - Fill in the basic attributes from the inode struct
- * @inode: Inode to use as the source
- * @stat: Where to fill in the attributes
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @inode:     Inode to use as the source
+ * @stat:      Where to fill in the attributes
  *
  * Fill in the basic attributes in the kstat structure from data that's to be
  * found on the VFS inode structure.  This is the default if no getattr inode
  * operation is supplied.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before filling in the
+ * uid and gid filds. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
  */
-void generic_fillattr(struct inode *inode, struct kstat *stat)
+void generic_fillattr(struct user_namespace *mnt_userns, struct inode *inode,
+                     struct kstat *stat)
 {
        stat->dev = inode->i_sb->s_dev;
        stat->ino = inode->i_ino;
        stat->mode = inode->i_mode;
        stat->nlink = inode->i_nlink;
-       stat->uid = inode->i_uid;
-       stat->gid = inode->i_gid;
+       stat->uid = i_uid_into_mnt(mnt_userns, inode);
+       stat->gid = i_gid_into_mnt(mnt_userns, inode);
        stat->rdev = inode->i_rdev;
        stat->size = i_size_read(inode);
        stat->atime = inode->i_atime;
@@ -67,6 +75,7 @@ EXPORT_SYMBOL(generic_fillattr);
 int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
                      u32 request_mask, unsigned int query_flags)
 {
+       struct user_namespace *mnt_userns;
        struct inode *inode = d_backing_inode(path->dentry);
 
        memset(stat, 0, sizeof(*stat));
@@ -83,11 +92,12 @@ int vfs_getattr_nosec(const struct path *path, struct kstat *stat,
        if (IS_DAX(inode))
                stat->attributes |= STATX_ATTR_DAX;
 
+       mnt_userns = mnt_user_ns(path->mnt);
        if (inode->i_op->getattr)
-               return inode->i_op->getattr(path, stat, request_mask,
-                                           query_flags);
+               return inode->i_op->getattr(mnt_userns, path, stat,
+                                           request_mask, query_flags);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(mnt_userns, inode, stat);
        return 0;
 }
 EXPORT_SYMBOL(vfs_getattr_nosec);
index 45fc79a..90e0012 100644 (file)
@@ -29,12 +29,13 @@ const struct file_operations sysv_file_operations = {
        .splice_read    = generic_file_splice_read,
 };
 
-static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
+static int sysv_setattr(struct user_namespace *mnt_userns,
+                       struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -47,7 +48,7 @@ static int sysv_setattr(struct dentry *dentry, struct iattr *attr)
                sysv_truncate(inode);
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 6c98019..50df794 100644 (file)
@@ -163,7 +163,7 @@ struct inode * sysv_new_inode(const struct inode * dir, umode_t mode)
        *sbi->s_sb_fic_count = cpu_to_fs16(sbi, count);
        fs16_add(sbi, sbi->s_sb_total_free_inodes, -1);
        dirty_sb(sb);
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_ino = fs16_to_cpu(sbi, ino);
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
        inode->i_blocks = 0;
index bcb67b0..8b2e99b 100644 (file)
@@ -441,11 +441,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size)
        return blocks;
 }
 
-int sysv_getattr(const struct path *path, struct kstat *stat,
-                u32 request_mask, unsigned int flags)
+int sysv_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        struct super_block *s = path->dentry->d_sb;
-       generic_fillattr(d_inode(path->dentry), stat);
+       generic_fillattr(&init_user_ns, d_inode(path->dentry), stat);
        stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size);
        stat->blksize = s->s_blocksize;
        return 0;
index ea2414b..b2e6abc 100644 (file)
@@ -41,7 +41,8 @@ static struct dentry *sysv_lookup(struct inode * dir, struct dentry * dentry, un
        return d_splice_alias(inode, dentry);
 }
 
-static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode, dev_t rdev)
+static int sysv_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode * inode;
        int err;
@@ -60,13 +61,14 @@ static int sysv_mknod(struct inode * dir, struct dentry * dentry, umode_t mode,
        return err;
 }
 
-static int sysv_create(struct inode * dir, struct dentry * dentry, umode_t mode, bool excl)
+static int sysv_create(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, bool excl)
 {
-       return sysv_mknod(dir, dentry, mode, 0);
+       return sysv_mknod(&init_user_ns, dir, dentry, mode, 0);
 }
 
-static int sysv_symlink(struct inode * dir, struct dentry * dentry, 
-       const char * symname)
+static int sysv_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, const char *symname)
 {
        int err = -ENAMETOOLONG;
        int l = strlen(symname)+1;
@@ -108,7 +110,8 @@ static int sysv_link(struct dentry * old_dentry, struct inode * dir,
        return add_nondir(dentry, inode);
 }
 
-static int sysv_mkdir(struct inode * dir, struct dentry *dentry, umode_t mode)
+static int sysv_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode)
 {
        struct inode * inode;
        int err;
@@ -186,9 +189,9 @@ static int sysv_rmdir(struct inode * dir, struct dentry * dentry)
  * Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
  */
-static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
-                      struct inode * new_dir, struct dentry * new_dentry,
-                      unsigned int flags)
+static int sysv_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                      struct dentry *old_dentry, struct inode *new_dir,
+                      struct dentry *new_dentry, unsigned int flags)
 {
        struct inode * old_inode = d_inode(old_dentry);
        struct inode * new_inode = d_inode(new_dentry);
index 1cff585..99ddf03 100644 (file)
@@ -141,7 +141,8 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int);
 extern int sysv_write_inode(struct inode *, struct writeback_control *wbc);
 extern int sysv_sync_inode(struct inode *);
 extern void sysv_set_inode(struct inode *, dev_t);
-extern int sysv_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int sysv_getattr(struct user_namespace *, const struct path *,
+                       struct kstat *, u32, unsigned int);
 extern int sysv_init_icache(void);
 extern void sysv_destroy_icache(void);
 
index 0ee8c6d..4b83cbd 100644 (file)
@@ -67,7 +67,9 @@ static char *get_dname(struct dentry *dentry)
        return name;
 }
 
-static int tracefs_syscall_mkdir(struct inode *inode, struct dentry *dentry, umode_t mode)
+static int tracefs_syscall_mkdir(struct user_namespace *mnt_userns,
+                                struct inode *inode, struct dentry *dentry,
+                                umode_t mode)
 {
        char *name;
        int ret;
index 9a6b866..d9d8d77 100644 (file)
@@ -94,7 +94,7 @@ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
         */
        inode->i_flags |= S_NOCMTIME;
 
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_mtime = inode->i_atime = inode->i_ctime =
                         current_time(inode);
        inode->i_mapping->nrpages = 0;
@@ -280,8 +280,8 @@ static int ubifs_prepare_create(struct inode *dir, struct dentry *dentry,
        return fscrypt_setup_filename(dir, &dentry->d_name, 0, nm);
 }
 
-static int ubifs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                       bool excl)
+static int ubifs_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
        struct inode *inode;
        struct ubifs_info *c = dir->i_sb->s_fs_info;
@@ -441,8 +441,8 @@ out_budg:
        return err;
 }
 
-static int ubifs_tmpfile(struct inode *dir, struct dentry *dentry,
-                        umode_t mode)
+static int ubifs_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode)
 {
        return do_tmpfile(dir, dentry, mode, NULL);
 }
@@ -942,7 +942,8 @@ out_fname:
        return err;
 }
 
-static int ubifs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ubifs_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        struct ubifs_inode *dir_ui = ubifs_inode(dir);
@@ -1013,8 +1014,8 @@ out_budg:
        return err;
 }
 
-static int ubifs_mknod(struct inode *dir, struct dentry *dentry,
-                      umode_t mode, dev_t rdev)
+static int ubifs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode *inode;
        struct ubifs_inode *ui;
@@ -1102,8 +1103,8 @@ out_budg:
        return err;
 }
 
-static int ubifs_symlink(struct inode *dir, struct dentry *dentry,
-                        const char *symname)
+static int ubifs_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        struct inode *inode;
        struct ubifs_inode *ui;
@@ -1542,7 +1543,8 @@ static int ubifs_xrename(struct inode *old_dir, struct dentry *old_dentry,
        return err;
 }
 
-static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
+static int ubifs_rename(struct user_namespace *mnt_userns,
+                       struct inode *old_dir, struct dentry *old_dentry,
                        struct inode *new_dir, struct dentry *new_dentry,
                        unsigned int flags)
 {
@@ -1566,8 +1568,8 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry,
        return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
 }
 
-int ubifs_getattr(const struct path *path, struct kstat *stat,
-                 u32 request_mask, unsigned int flags)
+int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                 struct kstat *stat, u32 request_mask, unsigned int flags)
 {
        loff_t size;
        struct inode *inode = d_inode(path->dentry);
@@ -1589,7 +1591,7 @@ int ubifs_getattr(const struct path *path, struct kstat *stat,
                                STATX_ATTR_ENCRYPTED |
                                STATX_ATTR_IMMUTABLE);
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        stat->blksize = UBIFS_BLOCK_SIZE;
        stat->size = ui->ui_size;
 
index 2bc7780..0e4b4be 100644 (file)
@@ -1257,7 +1257,8 @@ static int do_setattr(struct ubifs_info *c, struct inode *inode,
        return err;
 }
 
-int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
+int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr)
 {
        int err;
        struct inode *inode = d_inode(dentry);
@@ -1265,7 +1266,7 @@ int ubifs_setattr(struct dentry *dentry, struct iattr *attr)
 
        dbg_gen("ino %lu, mode %#x, ia_valid %#x",
                inode->i_ino, inode->i_mode, attr->ia_valid);
-       err = setattr_prepare(dentry, attr);
+       err = setattr_prepare(&init_user_ns, dentry, attr);
        if (err)
                return err;
 
index 4363d85..2326d51 100644 (file)
@@ -155,7 +155,7 @@ long ubifs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
                if (IS_RDONLY(inode))
                        return -EROFS;
 
-               if (!inode_owner_or_capable(inode))
+               if (!inode_owner_or_capable(&init_user_ns, inode))
                        return -EACCES;
 
                if (get_user(flags, (int __user *) arg))
index fc2cdde..7fdfdbd 100644 (file)
@@ -1989,13 +1989,14 @@ int ubifs_calc_dark(const struct ubifs_info *c, int spc);
 
 /* file.c */
 int ubifs_fsync(struct file *file, loff_t start, loff_t end, int datasync);
-int ubifs_setattr(struct dentry *dentry, struct iattr *attr);
+int ubifs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 struct iattr *attr);
 int ubifs_update_time(struct inode *inode, struct timespec64 *time, int flags);
 
 /* dir.c */
 struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir,
                              umode_t mode);
-int ubifs_getattr(const struct path *path, struct kstat *stat,
+int ubifs_getattr(struct user_namespace *mnt_userns, const struct path *path, struct kstat *stat,
                  u32 request_mask, unsigned int flags);
 int ubifs_check_dir_empty(struct inode *dir);
 
index 842d5f1..6b1e983 100644 (file)
@@ -681,6 +681,7 @@ static int xattr_get(const struct xattr_handler *handler,
 }
 
 static int xattr_set(const struct xattr_handler *handler,
+                          struct user_namespace *mnt_userns,
                           struct dentry *dentry, struct inode *inode,
                           const char *name, const void *value,
                           size_t size, int flags)
index ad8eefa..2846dcd 100644 (file)
@@ -183,7 +183,7 @@ long udf_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
        long old_block, new_block;
        int result;
 
-       if (inode_permission(inode, MAY_READ) != 0) {
+       if (file_permission(filp, MAY_READ) != 0) {
                udf_debug("no permission to access inode %lu\n", inode->i_ino);
                return -EPERM;
        }
@@ -253,13 +253,14 @@ const struct file_operations udf_file_operations = {
        .llseek                 = generic_file_llseek,
 };
 
-static int udf_setattr(struct dentry *dentry, struct iattr *attr)
+static int udf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                      struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct super_block *sb = inode->i_sb;
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -282,7 +283,7 @@ static int udf_setattr(struct dentry *dentry, struct iattr *attr)
        if (attr->ia_valid & ATTR_MODE)
                udf_update_extra_perms(inode, attr->ia_mode);
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 84ed23e..2ecf0e8 100644 (file)
@@ -103,7 +103,7 @@ struct inode *udf_new_inode(struct inode *dir, umode_t mode)
                mutex_unlock(&sbi->s_alloc_mutex);
        }
 
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_UID_SET))
                inode->i_uid = sbi->s_uid;
        if (UDF_QUERY_FLAG(sb, UDF_FLAG_GID_SET))
index e169d8f..f146b30 100644 (file)
@@ -604,8 +604,8 @@ static int udf_add_nondir(struct dentry *dentry, struct inode *inode)
        return 0;
 }
 
-static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-                     bool excl)
+static int udf_create(struct user_namespace *mnt_userns, struct inode *dir,
+                     struct dentry *dentry, umode_t mode, bool excl)
 {
        struct inode *inode = udf_new_inode(dir, mode);
 
@@ -623,7 +623,8 @@ static int udf_create(struct inode *dir, struct dentry *dentry, umode_t mode,
        return udf_add_nondir(dentry, inode);
 }
 
-static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int udf_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct inode *inode = udf_new_inode(dir, mode);
 
@@ -642,8 +643,8 @@ static int udf_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
        return 0;
 }
 
-static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
-                    dev_t rdev)
+static int udf_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode *inode;
 
@@ -658,7 +659,8 @@ static int udf_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
        return udf_add_nondir(dentry, inode);
 }
 
-static int udf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int udf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        struct udf_fileident_bh fibh;
@@ -877,8 +879,8 @@ out:
        return retval;
 }
 
-static int udf_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *symname)
+static int udf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, const char *symname)
 {
        struct inode *inode = udf_new_inode(dir, S_IFLNK | 0777);
        struct pathComponent *pc;
@@ -1065,9 +1067,9 @@ static int udf_link(struct dentry *old_dentry, struct inode *dir,
 /* Anybody can rename anything with this: the permission checks are left to the
  * higher-level routines.
  */
-static int udf_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags)
+static int udf_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags)
 {
        struct inode *old_inode = d_inode(old_dentry);
        struct inode *new_inode = d_inode(new_dentry);
index c973db2..9b22342 100644 (file)
@@ -152,14 +152,15 @@ out_unmap:
        return err;
 }
 
-static int udf_symlink_getattr(const struct path *path, struct kstat *stat,
-                               u32 request_mask, unsigned int flags)
+static int udf_symlink_getattr(struct user_namespace *mnt_userns,
+                              const struct path *path, struct kstat *stat,
+                              u32 request_mask, unsigned int flags)
 {
        struct dentry *dentry = path->dentry;
        struct inode *inode = d_backing_inode(dentry);
        struct page *page;
 
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
        page = read_mapping_page(inode->i_mapping, 0, NULL);
        if (IS_ERR(page))
                return PTR_ERR(page);
index 969fd60..7e3e08c 100644 (file)
@@ -289,7 +289,7 @@ cg_found:
        ufs_mark_sb_dirty(sb);
 
        inode->i_ino = cg * uspi->s_ipg + bit;
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
        inode->i_blocks = 0;
        inode->i_generation = 0;
        inode->i_mtime = inode->i_atime = inode->i_ctime = current_time(inode);
index c843ec8..debc282 100644 (file)
@@ -1211,13 +1211,14 @@ out:
        return err;
 }
 
-int ufs_setattr(struct dentry *dentry, struct iattr *attr)
+int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        unsigned int ia_valid = attr->ia_valid;
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -1227,7 +1228,7 @@ int ufs_setattr(struct dentry *dentry, struct iattr *attr)
                        return error;
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        mark_inode_dirty(inode);
        return 0;
 }
index 9ef40f1..29d5a0e 100644 (file)
@@ -69,7 +69,8 @@ static struct dentry *ufs_lookup(struct inode * dir, struct dentry *dentry, unsi
  * If the create succeeds, we fill in the inode information
  * with d_instantiate(). 
  */
-static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
+static int ufs_create (struct user_namespace * mnt_userns,
+               struct inode * dir, struct dentry * dentry, umode_t mode,
                bool excl)
 {
        struct inode *inode;
@@ -85,7 +86,8 @@ static int ufs_create (struct inode * dir, struct dentry * dentry, umode_t mode,
        return ufs_add_nondir(dentry, inode);
 }
 
-static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t rdev)
+static int ufs_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode, dev_t rdev)
 {
        struct inode *inode;
        int err;
@@ -104,8 +106,8 @@ static int ufs_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev
        return err;
 }
 
-static int ufs_symlink (struct inode * dir, struct dentry * dentry,
-       const char * symname)
+static int ufs_symlink (struct user_namespace * mnt_userns, struct inode * dir,
+       struct dentry * dentry, const char * symname)
 {
        struct super_block * sb = dir->i_sb;
        int err;
@@ -164,7 +166,8 @@ static int ufs_link (struct dentry * old_dentry, struct inode * dir,
        return error;
 }
 
-static int ufs_mkdir(struct inode * dir, struct dentry * dentry, umode_t mode)
+static int ufs_mkdir(struct user_namespace * mnt_userns, struct inode * dir,
+       struct dentry * dentry, umode_t mode)
 {
        struct inode * inode;
        int err;
@@ -240,9 +243,9 @@ static int ufs_rmdir (struct inode * dir, struct dentry *dentry)
        return err;
 }
 
-static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
-                     struct inode *new_dir, struct dentry *new_dentry,
-                     unsigned int flags)
+static int ufs_rename(struct user_namespace *mnt_userns, struct inode *old_dir,
+                     struct dentry *old_dentry, struct inode *new_dir,
+                     struct dentry *new_dentry, unsigned int flags)
 {
        struct inode *old_inode = d_inode(old_dentry);
        struct inode *new_inode = d_inode(new_dentry);
index b49e0ef..550f7c5 100644 (file)
@@ -123,7 +123,8 @@ extern struct inode *ufs_iget(struct super_block *, unsigned long);
 extern int ufs_write_inode (struct inode *, struct writeback_control *);
 extern int ufs_sync_inode (struct inode *);
 extern void ufs_evict_inode (struct inode *);
-extern int ufs_setattr(struct dentry *dentry, struct iattr *attr);
+extern int ufs_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                      struct iattr *attr);
 
 /* namei.c */
 extern const struct file_operations ufs_dir_operations;
index fd3cc42..39f3560 100644 (file)
@@ -62,7 +62,8 @@ int vfs_utimes(const struct path *path, struct timespec64 *times)
        }
 retry_deleg:
        inode_lock(inode);
-       error = notify_change(path->dentry, &newattrs, &delegated_inode);
+       error = notify_change(mnt_user_ns(path->mnt), path->dentry, &newattrs,
+                             &delegated_inode);
        inode_unlock(inode);
        if (delegated_inode) {
                error = break_deleg_wait(&delegated_inode);
index 4d569f1..7aee0ec 100644 (file)
@@ -288,13 +288,15 @@ static int vboxsf_dir_create(struct inode *parent, struct dentry *dentry,
        return 0;
 }
 
-static int vboxsf_dir_mkfile(struct inode *parent, struct dentry *dentry,
+static int vboxsf_dir_mkfile(struct user_namespace *mnt_userns,
+                            struct inode *parent, struct dentry *dentry,
                             umode_t mode, bool excl)
 {
        return vboxsf_dir_create(parent, dentry, mode, 0);
 }
 
-static int vboxsf_dir_mkdir(struct inode *parent, struct dentry *dentry,
+static int vboxsf_dir_mkdir(struct user_namespace *mnt_userns,
+                           struct inode *parent, struct dentry *dentry,
                            umode_t mode)
 {
        return vboxsf_dir_create(parent, dentry, mode, 1);
@@ -332,7 +334,8 @@ static int vboxsf_dir_unlink(struct inode *parent, struct dentry *dentry)
        return 0;
 }
 
-static int vboxsf_dir_rename(struct inode *old_parent,
+static int vboxsf_dir_rename(struct user_namespace *mnt_userns,
+                            struct inode *old_parent,
                             struct dentry *old_dentry,
                             struct inode *new_parent,
                             struct dentry *new_dentry,
@@ -374,7 +377,8 @@ err_put_old_path:
        return err;
 }
 
-static int vboxsf_dir_symlink(struct inode *parent, struct dentry *dentry,
+static int vboxsf_dir_symlink(struct user_namespace *mnt_userns,
+                             struct inode *parent, struct dentry *dentry,
                              const char *symname)
 {
        struct vboxsf_inode *sf_parent_i = VBOXSF_I(parent);
index 0180575..3b847e3 100644 (file)
@@ -212,8 +212,8 @@ int vboxsf_inode_revalidate(struct dentry *dentry)
        return 0;
 }
 
-int vboxsf_getattr(const struct path *path, struct kstat *kstat,
-                  u32 request_mask, unsigned int flags)
+int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                  struct kstat *kstat, u32 request_mask, unsigned int flags)
 {
        int err;
        struct dentry *dentry = path->dentry;
@@ -233,11 +233,12 @@ int vboxsf_getattr(const struct path *path, struct kstat *kstat,
        if (err)
                return err;
 
-       generic_fillattr(d_inode(dentry), kstat);
+       generic_fillattr(&init_user_ns, d_inode(dentry), kstat);
        return 0;
 }
 
-int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr)
+int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                  struct iattr *iattr)
 {
        struct vboxsf_inode *sf_i = VBOXSF_I(d_inode(dentry));
        struct vboxsf_sbi *sbi = VBOXSF_SBI(dentry->d_sb);
index 18f95b0..760524e 100644 (file)
@@ -90,9 +90,11 @@ int vboxsf_stat(struct vboxsf_sbi *sbi, struct shfl_string *path,
                struct shfl_fsobjinfo *info);
 int vboxsf_stat_dentry(struct dentry *dentry, struct shfl_fsobjinfo *info);
 int vboxsf_inode_revalidate(struct dentry *dentry);
-int vboxsf_getattr(const struct path *path, struct kstat *kstat,
-                  u32 request_mask, unsigned int query_flags);
-int vboxsf_setattr(struct dentry *dentry, struct iattr *iattr);
+int vboxsf_getattr(struct user_namespace *mnt_userns, const struct path *path,
+                  struct kstat *kstat, u32 request_mask,
+                  unsigned int query_flags);
+int vboxsf_setattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                  struct iattr *iattr);
 struct shfl_string *vboxsf_path_from_dentry(struct vboxsf_sbi *sbi,
                                            struct dentry *dentry);
 int vboxsf_nlscpy(struct vboxsf_sbi *sbi, char *name, size_t name_bound_len,
index f7e997a..77e159a 100644 (file)
@@ -369,7 +369,7 @@ int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
         * has verity enabled, and to stabilize the data being hashed.
         */
 
-       err = inode_permission(inode, MAY_WRITE);
+       err = file_permission(filp, MAY_WRITE);
        if (err)
                return err;
 
index fd57153..b3444e0 100644 (file)
@@ -83,7 +83,8 @@ xattr_resolve_name(struct inode *inode, const char **name)
  * because different namespaces have very different rules.
  */
 static int
-xattr_permission(struct inode *inode, const char *name, int mask)
+xattr_permission(struct user_namespace *mnt_userns, struct inode *inode,
+                const char *name, int mask)
 {
        /*
         * We can never set or remove an extended attribute on a read-only
@@ -97,7 +98,7 @@ xattr_permission(struct inode *inode, const char *name, int mask)
                 * to be writen back improperly if their true value is
                 * unknown to the vfs.
                 */
-               if (HAS_UNMAPPED_ID(inode))
+               if (HAS_UNMAPPED_ID(mnt_userns, inode))
                        return -EPERM;
        }
 
@@ -127,11 +128,12 @@ xattr_permission(struct inode *inode, const char *name, int mask)
                if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
                        return (mask & MAY_WRITE) ? -EPERM : -ENODATA;
                if (S_ISDIR(inode->i_mode) && (inode->i_mode & S_ISVTX) &&
-                   (mask & MAY_WRITE) && !inode_owner_or_capable(inode))
+                   (mask & MAY_WRITE) &&
+                   !inode_owner_or_capable(mnt_userns, inode))
                        return -EPERM;
        }
 
-       return inode_permission(inode, mask);
+       return inode_permission(mnt_userns, inode, mask);
 }
 
 /*
@@ -162,8 +164,9 @@ xattr_supported_namespace(struct inode *inode, const char *prefix)
 EXPORT_SYMBOL(xattr_supported_namespace);
 
 int
-__vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name,
-              const void *value, size_t size, int flags)
+__vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+              struct inode *inode, const char *name, const void *value,
+              size_t size, int flags)
 {
        const struct xattr_handler *handler;
 
@@ -174,7 +177,8 @@ __vfs_setxattr(struct dentry *dentry, struct inode *inode, const char *name,
                return -EOPNOTSUPP;
        if (size == 0)
                value = "";  /* empty EA, do not remove */
-       return handler->set(handler, dentry, inode, name, value, size, flags);
+       return handler->set(handler, mnt_userns, dentry, inode, name, value,
+                           size, flags);
 }
 EXPORT_SYMBOL(__vfs_setxattr);
 
@@ -182,6 +186,7 @@ EXPORT_SYMBOL(__vfs_setxattr);
  *  __vfs_setxattr_noperm - perform setxattr operation without performing
  *  permission checks.
  *
+ *  @mnt_userns - user namespace of the mount the inode was found from
  *  @dentry - object to perform setxattr on
  *  @name - xattr name to set
  *  @value - value to set @name to
@@ -194,8 +199,9 @@ EXPORT_SYMBOL(__vfs_setxattr);
  *  is executed. It also assumes that the caller will make the appropriate
  *  permission checks.
  */
-int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags)
+int __vfs_setxattr_noperm(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, const char *name,
+                         const void *value, size_t size, int flags)
 {
        struct inode *inode = dentry->d_inode;
        int error = -EAGAIN;
@@ -205,7 +211,8 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
        if (issec)
                inode->i_flags &= ~S_NOSEC;
        if (inode->i_opflags & IOP_XATTR) {
-               error = __vfs_setxattr(dentry, inode, name, value, size, flags);
+               error = __vfs_setxattr(mnt_userns, dentry, inode, name, value,
+                                      size, flags);
                if (!error) {
                        fsnotify_xattr(dentry);
                        security_inode_post_setxattr(dentry, name, value,
@@ -244,18 +251,19 @@ int __vfs_setxattr_noperm(struct dentry *dentry, const char *name,
  *  a delegation was broken on, NULL if none.
  */
 int
-__vfs_setxattr_locked(struct dentry *dentry, const char *name,
-               const void *value, size_t size, int flags,
-               struct inode **delegated_inode)
+__vfs_setxattr_locked(struct user_namespace *mnt_userns, struct dentry *dentry,
+                     const char *name, const void *value, size_t size,
+                     int flags, struct inode **delegated_inode)
 {
        struct inode *inode = dentry->d_inode;
        int error;
 
-       error = xattr_permission(inode, name, MAY_WRITE);
+       error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
        if (error)
                return error;
 
-       error = security_inode_setxattr(dentry, name, value, size, flags);
+       error = security_inode_setxattr(mnt_userns, dentry, name, value, size,
+                                       flags);
        if (error)
                goto out;
 
@@ -263,7 +271,8 @@ __vfs_setxattr_locked(struct dentry *dentry, const char *name,
        if (error)
                goto out;
 
-       error = __vfs_setxattr_noperm(dentry, name, value, size, flags);
+       error = __vfs_setxattr_noperm(mnt_userns, dentry, name, value,
+                                     size, flags);
 
 out:
        return error;
@@ -271,8 +280,8 @@ out:
 EXPORT_SYMBOL_GPL(__vfs_setxattr_locked);
 
 int
-vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
-               size_t size, int flags)
+vfs_setxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+            const char *name, const void *value, size_t size, int flags)
 {
        struct inode *inode = dentry->d_inode;
        struct inode *delegated_inode = NULL;
@@ -280,7 +289,7 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
        int error;
 
        if (size && strcmp(name, XATTR_NAME_CAPS) == 0) {
-               error = cap_convert_nscap(dentry, &value, size);
+               error = cap_convert_nscap(mnt_userns, dentry, &value, size);
                if (error < 0)
                        return error;
                size = error;
@@ -288,8 +297,8 @@ vfs_setxattr(struct dentry *dentry, const char *name, const void *value,
 
 retry_deleg:
        inode_lock(inode);
-       error = __vfs_setxattr_locked(dentry, name, value, size, flags,
-           &delegated_inode);
+       error = __vfs_setxattr_locked(mnt_userns, dentry, name, value, size,
+                                     flags, &delegated_inode);
        inode_unlock(inode);
 
        if (delegated_inode) {
@@ -305,18 +314,20 @@ retry_deleg:
 EXPORT_SYMBOL_GPL(vfs_setxattr);
 
 static ssize_t
-xattr_getsecurity(struct inode *inode, const char *name, void *value,
-                       size_t size)
+xattr_getsecurity(struct user_namespace *mnt_userns, struct inode *inode,
+                 const char *name, void *value, size_t size)
 {
        void *buffer = NULL;
        ssize_t len;
 
        if (!value || !size) {
-               len = security_inode_getsecurity(inode, name, &buffer, false);
+               len = security_inode_getsecurity(mnt_userns, inode, name,
+                                                &buffer, false);
                goto out_noalloc;
        }
 
-       len = security_inode_getsecurity(inode, name, &buffer, true);
+       len = security_inode_getsecurity(mnt_userns, inode, name, &buffer,
+                                        true);
        if (len < 0)
                return len;
        if (size < len) {
@@ -339,15 +350,16 @@ out_noalloc:
  * Returns the result of alloc, if failed, or the getxattr operation.
  */
 ssize_t
-vfs_getxattr_alloc(struct dentry *dentry, const char *name, char **xattr_value,
-                  size_t xattr_size, gfp_t flags)
+vfs_getxattr_alloc(struct user_namespace *mnt_userns, struct dentry *dentry,
+                  const char *name, char **xattr_value, size_t xattr_size,
+                  gfp_t flags)
 {
        const struct xattr_handler *handler;
        struct inode *inode = dentry->d_inode;
        char *value = *xattr_value;
        int error;
 
-       error = xattr_permission(inode, name, MAY_READ);
+       error = xattr_permission(mnt_userns, inode, name, MAY_READ);
        if (error)
                return error;
 
@@ -388,12 +400,13 @@ __vfs_getxattr(struct dentry *dentry, struct inode *inode, const char *name,
 EXPORT_SYMBOL(__vfs_getxattr);
 
 ssize_t
-vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
+vfs_getxattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+            const char *name, void *value, size_t size)
 {
        struct inode *inode = dentry->d_inode;
        int error;
 
-       error = xattr_permission(inode, name, MAY_READ);
+       error = xattr_permission(mnt_userns, inode, name, MAY_READ);
        if (error)
                return error;
 
@@ -404,7 +417,8 @@ vfs_getxattr(struct dentry *dentry, const char *name, void *value, size_t size)
        if (!strncmp(name, XATTR_SECURITY_PREFIX,
                                XATTR_SECURITY_PREFIX_LEN)) {
                const char *suffix = name + XATTR_SECURITY_PREFIX_LEN;
-               int ret = xattr_getsecurity(inode, suffix, value, size);
+               int ret = xattr_getsecurity(mnt_userns, inode, suffix, value,
+                                           size);
                /*
                 * Only overwrite the return value if a security module
                 * is actually active.
@@ -439,7 +453,8 @@ vfs_listxattr(struct dentry *dentry, char *list, size_t size)
 EXPORT_SYMBOL_GPL(vfs_listxattr);
 
 int
-__vfs_removexattr(struct dentry *dentry, const char *name)
+__vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+                 const char *name)
 {
        struct inode *inode = d_inode(dentry);
        const struct xattr_handler *handler;
@@ -449,7 +464,8 @@ __vfs_removexattr(struct dentry *dentry, const char *name)
                return PTR_ERR(handler);
        if (!handler->set)
                return -EOPNOTSUPP;
-       return handler->set(handler, dentry, inode, name, NULL, 0, XATTR_REPLACE);
+       return handler->set(handler, mnt_userns, dentry, inode, name, NULL, 0,
+                           XATTR_REPLACE);
 }
 EXPORT_SYMBOL(__vfs_removexattr);
 
@@ -463,17 +479,18 @@ EXPORT_SYMBOL(__vfs_removexattr);
  *  a delegation was broken on, NULL if none.
  */
 int
-__vfs_removexattr_locked(struct dentry *dentry, const char *name,
-               struct inode **delegated_inode)
+__vfs_removexattr_locked(struct user_namespace *mnt_userns,
+                        struct dentry *dentry, const char *name,
+                        struct inode **delegated_inode)
 {
        struct inode *inode = dentry->d_inode;
        int error;
 
-       error = xattr_permission(inode, name, MAY_WRITE);
+       error = xattr_permission(mnt_userns, inode, name, MAY_WRITE);
        if (error)
                return error;
 
-       error = security_inode_removexattr(dentry, name);
+       error = security_inode_removexattr(mnt_userns, dentry, name);
        if (error)
                goto out;
 
@@ -481,7 +498,7 @@ __vfs_removexattr_locked(struct dentry *dentry, const char *name,
        if (error)
                goto out;
 
-       error = __vfs_removexattr(dentry, name);
+       error = __vfs_removexattr(mnt_userns, dentry, name);
 
        if (!error) {
                fsnotify_xattr(dentry);
@@ -494,7 +511,8 @@ out:
 EXPORT_SYMBOL_GPL(__vfs_removexattr_locked);
 
 int
-vfs_removexattr(struct dentry *dentry, const char *name)
+vfs_removexattr(struct user_namespace *mnt_userns, struct dentry *dentry,
+               const char *name)
 {
        struct inode *inode = dentry->d_inode;
        struct inode *delegated_inode = NULL;
@@ -502,7 +520,8 @@ vfs_removexattr(struct dentry *dentry, const char *name)
 
 retry_deleg:
        inode_lock(inode);
-       error = __vfs_removexattr_locked(dentry, name, &delegated_inode);
+       error = __vfs_removexattr_locked(mnt_userns, dentry,
+                                        name, &delegated_inode);
        inode_unlock(inode);
 
        if (delegated_inode) {
@@ -519,8 +538,9 @@ EXPORT_SYMBOL_GPL(vfs_removexattr);
  * Extended attribute SET operations
  */
 static long
-setxattr(struct dentry *d, const char __user *name, const void __user *value,
-        size_t size, int flags)
+setxattr(struct user_namespace *mnt_userns, struct dentry *d,
+        const char __user *name, const void __user *value, size_t size,
+        int flags)
 {
        int error;
        void *kvalue = NULL;
@@ -547,10 +567,10 @@ setxattr(struct dentry *d, const char __user *name, const void __user *value,
                }
                if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
                    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
-                       posix_acl_fix_xattr_from_user(kvalue, size);
+                       posix_acl_fix_xattr_from_user(mnt_userns, kvalue, size);
        }
 
-       error = vfs_setxattr(d, kname, kvalue, size, flags);
+       error = vfs_setxattr(mnt_userns, d, kname, kvalue, size, flags);
 out:
        kvfree(kvalue);
 
@@ -563,13 +583,15 @@ static int path_setxattr(const char __user *pathname,
 {
        struct path path;
        int error;
+
 retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (error)
                return error;
        error = mnt_want_write(path.mnt);
        if (!error) {
-               error = setxattr(path.dentry, name, value, size, flags);
+               error = setxattr(mnt_user_ns(path.mnt), path.dentry, name,
+                                value, size, flags);
                mnt_drop_write(path.mnt);
        }
        path_put(&path);
@@ -605,7 +627,9 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
        audit_file(f.file);
        error = mnt_want_write_file(f.file);
        if (!error) {
-               error = setxattr(f.file->f_path.dentry, name, value, size, flags);
+               error = setxattr(file_mnt_user_ns(f.file),
+                                f.file->f_path.dentry, name,
+                                value, size, flags);
                mnt_drop_write_file(f.file);
        }
        fdput(f);
@@ -616,8 +640,8 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
  * Extended attribute GET operations
  */
 static ssize_t
-getxattr(struct dentry *d, const char __user *name, void __user *value,
-        size_t size)
+getxattr(struct user_namespace *mnt_userns, struct dentry *d,
+        const char __user *name, void __user *value, size_t size)
 {
        ssize_t error;
        void *kvalue = NULL;
@@ -637,11 +661,11 @@ getxattr(struct dentry *d, const char __user *name, void __user *value,
                        return -ENOMEM;
        }
 
-       error = vfs_getxattr(d, kname, kvalue, size);
+       error = vfs_getxattr(mnt_userns, d, kname, kvalue, size);
        if (error > 0) {
                if ((strcmp(kname, XATTR_NAME_POSIX_ACL_ACCESS) == 0) ||
                    (strcmp(kname, XATTR_NAME_POSIX_ACL_DEFAULT) == 0))
-                       posix_acl_fix_xattr_to_user(kvalue, error);
+                       posix_acl_fix_xattr_to_user(mnt_userns, kvalue, error);
                if (size && copy_to_user(value, kvalue, error))
                        error = -EFAULT;
        } else if (error == -ERANGE && size >= XATTR_SIZE_MAX) {
@@ -665,7 +689,7 @@ retry:
        error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
        if (error)
                return error;
-       error = getxattr(path.dentry, name, value, size);
+       error = getxattr(mnt_user_ns(path.mnt), path.dentry, name, value, size);
        path_put(&path);
        if (retry_estale(error, lookup_flags)) {
                lookup_flags |= LOOKUP_REVAL;
@@ -695,7 +719,8 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
        if (!f.file)
                return error;
        audit_file(f.file);
-       error = getxattr(f.file->f_path.dentry, name, value, size);
+       error = getxattr(file_mnt_user_ns(f.file), f.file->f_path.dentry,
+                        name, value, size);
        fdput(f);
        return error;
 }
@@ -779,7 +804,8 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
  * Extended attribute REMOVE operations
  */
 static long
-removexattr(struct dentry *d, const char __user *name)
+removexattr(struct user_namespace *mnt_userns, struct dentry *d,
+           const char __user *name)
 {
        int error;
        char kname[XATTR_NAME_MAX + 1];
@@ -790,7 +816,7 @@ removexattr(struct dentry *d, const char __user *name)
        if (error < 0)
                return error;
 
-       return vfs_removexattr(d, kname);
+       return vfs_removexattr(mnt_userns, d, kname);
 }
 
 static int path_removexattr(const char __user *pathname,
@@ -804,7 +830,7 @@ retry:
                return error;
        error = mnt_want_write(path.mnt);
        if (!error) {
-               error = removexattr(path.dentry, name);
+               error = removexattr(mnt_user_ns(path.mnt), path.dentry, name);
                mnt_drop_write(path.mnt);
        }
        path_put(&path);
@@ -837,7 +863,8 @@ SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
        audit_file(f.file);
        error = mnt_want_write_file(f.file);
        if (!error) {
-               error = removexattr(f.file->f_path.dentry, name);
+               error = removexattr(file_mnt_user_ns(f.file),
+                                   f.file->f_path.dentry, name);
                mnt_drop_write_file(f.file);
        }
        fdput(f);
index b56ff45..5b6fcb9 100644 (file)
@@ -2805,7 +2805,7 @@ xfs_btree_split_worker(
        struct xfs_btree_split_args     *args = container_of(work,
                                                struct xfs_btree_split_args, work);
        unsigned long           pflags;
-       unsigned long           new_pflags = PF_MEMALLOC_NOFS;
+       unsigned long           new_pflags = 0;
 
        /*
         * we are in a transaction context here, but may also be doing work
@@ -2817,12 +2817,20 @@ xfs_btree_split_worker(
                new_pflags |= PF_MEMALLOC | PF_SWAPWRITE | PF_KSWAPD;
 
        current_set_flags_nested(&pflags, new_pflags);
+       xfs_trans_set_context(args->cur->bc_tp);
 
        args->result = __xfs_btree_split(args->cur, args->level, args->ptrp,
                                         args->key, args->curp, args->stat);
-       complete(args->done);
 
+       xfs_trans_clear_context(args->cur->bc_tp);
        current_restore_flags_nested(&pflags, new_pflags);
+
+       /*
+        * Do not access args after complete() has run here. We don't own args
+        * and the owner may run and free args before we return here.
+        */
+       complete(args->done);
+
 }
 
 /*
index 779cb73..d02bef2 100644 (file)
@@ -238,7 +238,8 @@ xfs_acl_set_mode(
 }
 
 int
-xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+           struct posix_acl *acl, int type)
 {
        umode_t mode;
        bool set_mode = false;
@@ -252,7 +253,7 @@ xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
                return error;
 
        if (type == ACL_TYPE_ACCESS) {
-               error = posix_acl_update_mode(inode, &mode, &acl);
+               error = posix_acl_update_mode(mnt_userns, inode, &mode, &acl);
                if (error)
                        return error;
                set_mode = true;
index c042c08..7bdb3a4 100644 (file)
@@ -11,7 +11,8 @@ struct posix_acl;
 
 #ifdef CONFIG_XFS_POSIX_ACL
 extern struct posix_acl *xfs_get_acl(struct inode *inode, int type);
-extern int xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int xfs_set_acl(struct user_namespace *mnt_userns, struct inode *inode,
+                      struct posix_acl *acl, int type);
 extern int __xfs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
 void xfs_forget_acl(struct inode *inode, const char *name);
 #else
index 4304c64..b4186d6 100644 (file)
@@ -62,7 +62,7 @@ xfs_setfilesize_trans_alloc(
         * We hand off the transaction to the completion thread now, so
         * clear the flag here.
         */
-       current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+       xfs_trans_clear_context(tp);
        return 0;
 }
 
@@ -125,7 +125,7 @@ xfs_setfilesize_ioend(
         * thus we need to mark ourselves as being in a transaction manually.
         * Similarly for freeze protection.
         */
-       current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+       xfs_trans_set_context(tp);
        __sb_writers_acquired(VFS_I(ip)->i_sb, SB_FREEZE_FS);
 
        /* we abort the update if there was an IO error */
@@ -568,6 +568,12 @@ xfs_vm_writepage(
 {
        struct xfs_writepage_ctx wpc = { };
 
+       if (WARN_ON_ONCE(current->journal_info)) {
+               redirty_page_for_writepage(wbc, page);
+               unlock_page(page);
+               return 0;
+       }
+
        return iomap_writepage(page, wbc, &wpc.ctx, &xfs_writeback_ops);
 }
 
@@ -578,6 +584,13 @@ xfs_vm_writepages(
 {
        struct xfs_writepage_ctx wpc = { };
 
+       /*
+        * Writing back data in a transaction context can result in recursive
+        * transactions. This is bad, so issue a warning and get out of here.
+        */
+       if (WARN_ON_ONCE(current->journal_info))
+               return 0;
+
        xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
        return iomap_writepages(mapping, wbc, &wpc.ctx, &xfs_writeback_ops);
 }
index e2148f2..17f36db 100644 (file)
@@ -6,7 +6,7 @@
 
 static inline unsigned int bio_max_vecs(unsigned int count)
 {
-       return min_t(unsigned, howmany(count, PAGE_SIZE), BIO_MAX_PAGES);
+       return bio_max_segs(howmany(count, PAGE_SIZE));
 }
 
 int
index f6e5235..37a1d12 100644 (file)
@@ -1480,7 +1480,7 @@ xfs_buf_ioapply_map(
        int             op)
 {
        int             page_index;
-       int             total_nr_pages = bp->b_page_count;
+       unsigned int    total_nr_pages = bp->b_page_count;
        int             nr_pages;
        struct bio      *bio;
        sector_t        sector =  bp->b_maps[map].bm_bn;
@@ -1505,7 +1505,7 @@ xfs_buf_ioapply_map(
 
 next_chunk:
        atomic_inc(&bp->b_io_remaining);
-       nr_pages = min(total_nr_pages, BIO_MAX_PAGES);
+       nr_pages = bio_max_segs(total_nr_pages);
 
        bio = bio_alloc(GFP_NOIO, nr_pages);
        bio_set_dev(bio, bp->b_target->bt_bdev);
index 3991e59..ef17c1f 100644 (file)
@@ -344,7 +344,6 @@ xfs_extent_busy_trim(
        ASSERT(*len > 0);
 
        spin_lock(&args->pag->pagb_lock);
-restart:
        fbno = *bno;
        flen = *len;
        rbp = args->pag->pagb_tree.rb_node;
@@ -363,19 +362,6 @@ restart:
                        continue;
                }
 
-               /*
-                * If this is a metadata allocation, try to reuse the busy
-                * extent instead of trimming the allocation.
-                */
-               if (!(args->datatype & XFS_ALLOC_USERDATA) &&
-                   !(busyp->flags & XFS_EXTENT_BUSY_DISCARDED)) {
-                       if (!xfs_extent_busy_update_extent(args->mp, args->pag,
-                                                         busyp, fbno, flen,
-                                                         false))
-                               goto restart;
-                       continue;
-               }
-
                if (bbno <= fbno) {
                        /* start overlap */
 
index 68ca1b4..a007ca0 100644 (file)
@@ -29,6 +29,7 @@
 #include <linux/backing-dev.h>
 #include <linux/mman.h>
 #include <linux/fadvise.h>
+#include <linux/mount.h>
 
 static const struct vm_operations_struct xfs_file_vm_ops;
 
@@ -1051,7 +1052,8 @@ xfs_file_fallocate(
 
                iattr.ia_valid = ATTR_SIZE;
                iattr.ia_size = new_size;
-               error = xfs_vn_setattr_size(file_dentry(file), &iattr);
+               error = xfs_vn_setattr_size(file_mnt_user_ns(file),
+                                           file_dentry(file), &iattr);
                if (error)
                        goto out_unlock;
        }
index 636ac13..f93370b 100644 (file)
@@ -766,6 +766,7 @@ xfs_inode_inherit_flags2(
  */
 static int
 xfs_init_new_inode(
+       struct user_namespace   *mnt_userns,
        struct xfs_trans        *tp,
        struct xfs_inode        *pip,
        xfs_ino_t               ino,
@@ -811,11 +812,11 @@ xfs_init_new_inode(
 
        if (dir && !(dir->i_mode & S_ISGID) &&
            (mp->m_flags & XFS_MOUNT_GRPID)) {
-               inode->i_uid = current_fsuid();
+               inode->i_uid = fsuid_into_mnt(mnt_userns);
                inode->i_gid = dir->i_gid;
                inode->i_mode = mode;
        } else {
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(mnt_userns, inode, dir, mode);
        }
 
        /*
@@ -824,7 +825,8 @@ xfs_init_new_inode(
         * (and only if the irix_sgid_inherit compatibility variable is set).
         */
        if (irix_sgid_inherit &&
-           (inode->i_mode & S_ISGID) && !in_group_p(inode->i_gid))
+           (inode->i_mode & S_ISGID) &&
+           !in_group_p(i_gid_into_mnt(mnt_userns, inode)))
                inode->i_mode &= ~S_ISGID;
 
        ip->i_d.di_size = 0;
@@ -901,6 +903,7 @@ xfs_init_new_inode(
  */
 int
 xfs_dir_ialloc(
+       struct user_namespace   *mnt_userns,
        struct xfs_trans        **tpp,
        struct xfs_inode        *dp,
        umode_t                 mode,
@@ -933,7 +936,8 @@ xfs_dir_ialloc(
                return error;
        ASSERT(ino != NULLFSINO);
 
-       return xfs_init_new_inode(*tpp, dp, ino, mode, nlink, rdev, prid, ipp);
+       return xfs_init_new_inode(mnt_userns, *tpp, dp, ino, mode, nlink, rdev,
+                                 prid, ipp);
 }
 
 /*
@@ -973,6 +977,7 @@ xfs_bumplink(
 
 int
 xfs_create(
+       struct user_namespace   *mnt_userns,
        xfs_inode_t             *dp,
        struct xfs_name         *name,
        umode_t                 mode,
@@ -1002,9 +1007,10 @@ xfs_create(
        /*
         * Make sure that we have allocated dquot(s) on disk.
         */
-       error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
-                                       XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
-                                       &udqp, &gdqp, &pdqp);
+       error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns),
+                       fsgid_into_mnt(mnt_userns), prid,
+                       XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+                       &udqp, &gdqp, &pdqp);
        if (error)
                return error;
 
@@ -1046,7 +1052,8 @@ xfs_create(
         * entry pointing to them, but a directory also the "." entry
         * pointing to itself.
         */
-       error = xfs_dir_ialloc(&tp, dp, mode, is_dir ? 2 : 1, rdev, prid, &ip);
+       error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, is_dir ? 2 : 1, rdev,
+                              prid, &ip);
        if (error)
                goto out_trans_cancel;
 
@@ -1127,6 +1134,7 @@ xfs_create(
 
 int
 xfs_create_tmpfile(
+       struct user_namespace   *mnt_userns,
        struct xfs_inode        *dp,
        umode_t                 mode,
        struct xfs_inode        **ipp)
@@ -1150,9 +1158,10 @@ xfs_create_tmpfile(
        /*
         * Make sure that we have allocated dquot(s) on disk.
         */
-       error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
-                               XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
-                               &udqp, &gdqp, &pdqp);
+       error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns),
+                       fsgid_into_mnt(mnt_userns), prid,
+                       XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
+                       &udqp, &gdqp, &pdqp);
        if (error)
                return error;
 
@@ -1164,7 +1173,7 @@ xfs_create_tmpfile(
        if (error)
                goto out_release_dquots;
 
-       error = xfs_dir_ialloc(&tp, dp, mode, 0, 0, prid, &ip);
+       error = xfs_dir_ialloc(mnt_userns, &tp, dp, mode, 0, 0, prid, &ip);
        if (error)
                goto out_trans_cancel;
 
@@ -2977,13 +2986,15 @@ out_trans_abort:
  */
 static int
 xfs_rename_alloc_whiteout(
+       struct user_namespace   *mnt_userns,
        struct xfs_inode        *dp,
        struct xfs_inode        **wip)
 {
        struct xfs_inode        *tmpfile;
        int                     error;
 
-       error = xfs_create_tmpfile(dp, S_IFCHR | WHITEOUT_MODE, &tmpfile);
+       error = xfs_create_tmpfile(mnt_userns, dp, S_IFCHR | WHITEOUT_MODE,
+                                  &tmpfile);
        if (error)
                return error;
 
@@ -3005,6 +3016,7 @@ xfs_rename_alloc_whiteout(
  */
 int
 xfs_rename(
+       struct user_namespace   *mnt_userns,
        struct xfs_inode        *src_dp,
        struct xfs_name         *src_name,
        struct xfs_inode        *src_ip,
@@ -3036,7 +3048,7 @@ xfs_rename(
         */
        if (flags & RENAME_WHITEOUT) {
                ASSERT(!(flags & (RENAME_NOREPLACE | RENAME_EXCHANGE)));
-               error = xfs_rename_alloc_whiteout(target_dp, &wip);
+               error = xfs_rename_alloc_whiteout(mnt_userns, target_dp, &wip);
                if (error)
                        return error;
 
index eca333f..88ee4c3 100644 (file)
@@ -369,15 +369,18 @@ int               xfs_release(struct xfs_inode *ip);
 void           xfs_inactive(struct xfs_inode *ip);
 int            xfs_lookup(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode **ipp, struct xfs_name *ci_name);
-int            xfs_create(struct xfs_inode *dp, struct xfs_name *name,
+int            xfs_create(struct user_namespace *mnt_userns,
+                          struct xfs_inode *dp, struct xfs_name *name,
                           umode_t mode, dev_t rdev, struct xfs_inode **ipp);
-int            xfs_create_tmpfile(struct xfs_inode *dp, umode_t mode,
+int            xfs_create_tmpfile(struct user_namespace *mnt_userns,
+                          struct xfs_inode *dp, umode_t mode,
                           struct xfs_inode **ipp);
 int            xfs_remove(struct xfs_inode *dp, struct xfs_name *name,
                           struct xfs_inode *ip);
 int            xfs_link(struct xfs_inode *tdp, struct xfs_inode *sip,
                         struct xfs_name *target_name);
-int            xfs_rename(struct xfs_inode *src_dp, struct xfs_name *src_name,
+int            xfs_rename(struct user_namespace *mnt_userns,
+                          struct xfs_inode *src_dp, struct xfs_name *src_name,
                           struct xfs_inode *src_ip, struct xfs_inode *target_dp,
                           struct xfs_name *target_name,
                           struct xfs_inode *target_ip, unsigned int flags);
@@ -407,9 +410,10 @@ void               xfs_lock_two_inodes(struct xfs_inode *ip0, uint ip0_mode,
 xfs_extlen_t   xfs_get_extsz_hint(struct xfs_inode *ip);
 xfs_extlen_t   xfs_get_cowextsz_hint(struct xfs_inode *ip);
 
-int xfs_dir_ialloc(struct xfs_trans **tpp, struct xfs_inode *dp, umode_t mode,
-                  xfs_nlink_t nlink, dev_t dev, prid_t prid,
-                  struct xfs_inode **ipp);
+int            xfs_dir_ialloc(struct user_namespace *mnt_userns,
+                              struct xfs_trans **tpp, struct xfs_inode *dp,
+                              umode_t mode, xfs_nlink_t nlink, dev_t dev,
+                              prid_t prid, struct xfs_inode **ipp);
 
 static inline int
 xfs_itruncate_extents(
index 248083e..99dfe89 100644 (file)
@@ -693,7 +693,8 @@ xfs_ioc_space(
 
        iattr.ia_valid = ATTR_SIZE;
        iattr.ia_size = bf->l_start;
-       error = xfs_vn_setattr_size(file_dentry(filp), &iattr);
+       error = xfs_vn_setattr_size(file_mnt_user_ns(filp), file_dentry(filp),
+                                   &iattr);
        if (error)
                goto out_unlock;
 
@@ -734,13 +735,15 @@ xfs_fsinumbers_fmt(
 
 STATIC int
 xfs_ioc_fsbulkstat(
-       xfs_mount_t             *mp,
+       struct file             *file,
        unsigned int            cmd,
        void                    __user *arg)
 {
+       struct xfs_mount        *mp = XFS_I(file_inode(file))->i_mount;
        struct xfs_fsop_bulkreq bulkreq;
        struct xfs_ibulk        breq = {
                .mp             = mp,
+               .mnt_userns     = file_mnt_user_ns(file),
                .ocount         = 0,
        };
        xfs_ino_t               lastino;
@@ -908,13 +911,15 @@ xfs_bulk_ireq_teardown(
 /* Handle the v5 bulkstat ioctl. */
 STATIC int
 xfs_ioc_bulkstat(
-       struct xfs_mount                *mp,
+       struct file                     *file,
        unsigned int                    cmd,
        struct xfs_bulkstat_req __user  *arg)
 {
+       struct xfs_mount                *mp = XFS_I(file_inode(file))->i_mount;
        struct xfs_bulk_ireq            hdr;
        struct xfs_ibulk                breq = {
                .mp                     = mp,
+               .mnt_userns             = file_mnt_user_ns(file),
        };
        int                             error;
 
@@ -1275,9 +1280,10 @@ xfs_ioctl_setattr_prepare_dax(
  */
 static struct xfs_trans *
 xfs_ioctl_setattr_get_trans(
-       struct xfs_inode        *ip,
+       struct file             *file,
        struct xfs_dquot        *pdqp)
 {
+       struct xfs_inode        *ip = XFS_I(file_inode(file));
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
        int                     error = -EROFS;
@@ -1299,7 +1305,7 @@ xfs_ioctl_setattr_get_trans(
         * The user ID of the calling process must be equal to the file owner
         * ID, except in cases where the CAP_FSETID capability is applicable.
         */
-       if (!inode_owner_or_capable(VFS_I(ip))) {
+       if (!inode_owner_or_capable(file_mnt_user_ns(file), VFS_I(ip))) {
                error = -EPERM;
                goto out_cancel;
        }
@@ -1427,9 +1433,11 @@ xfs_ioctl_setattr_check_projid(
 
 STATIC int
 xfs_ioctl_setattr(
-       xfs_inode_t             *ip,
+       struct file             *file,
        struct fsxattr          *fa)
 {
+       struct user_namespace   *mnt_userns = file_mnt_user_ns(file);
+       struct xfs_inode        *ip = XFS_I(file_inode(file));
        struct fsxattr          old_fa;
        struct xfs_mount        *mp = ip->i_mount;
        struct xfs_trans        *tp;
@@ -1461,7 +1469,7 @@ xfs_ioctl_setattr(
 
        xfs_ioctl_setattr_prepare_dax(ip, fa);
 
-       tp = xfs_ioctl_setattr_get_trans(ip, pdqp);
+       tp = xfs_ioctl_setattr_get_trans(file, pdqp);
        if (IS_ERR(tp)) {
                error = PTR_ERR(tp);
                goto error_free_dquots;
@@ -1493,7 +1501,7 @@ xfs_ioctl_setattr(
         */
 
        if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
-           !capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
+           !capable_wrt_inode_uidgid(mnt_userns, VFS_I(ip), CAP_FSETID))
                VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
 
        /* Change the ownerships and register project quota modifications */
@@ -1540,7 +1548,6 @@ error_free_dquots:
 
 STATIC int
 xfs_ioc_fssetxattr(
-       xfs_inode_t             *ip,
        struct file             *filp,
        void                    __user *arg)
 {
@@ -1553,7 +1560,7 @@ xfs_ioc_fssetxattr(
        error = mnt_want_write_file(filp);
        if (error)
                return error;
-       error = xfs_ioctl_setattr(ip, &fa);
+       error = xfs_ioctl_setattr(filp, &fa);
        mnt_drop_write_file(filp);
        return error;
 }
@@ -1599,7 +1606,7 @@ xfs_ioc_setxflags(
 
        xfs_ioctl_setattr_prepare_dax(ip, &fa);
 
-       tp = xfs_ioctl_setattr_get_trans(ip, NULL);
+       tp = xfs_ioctl_setattr_get_trans(filp, NULL);
        if (IS_ERR(tp)) {
                error = PTR_ERR(tp);
                goto out_drop_write;
@@ -2110,10 +2117,10 @@ xfs_file_ioctl(
        case XFS_IOC_FSBULKSTAT_SINGLE:
        case XFS_IOC_FSBULKSTAT:
        case XFS_IOC_FSINUMBERS:
-               return xfs_ioc_fsbulkstat(mp, cmd, arg);
+               return xfs_ioc_fsbulkstat(filp, cmd, arg);
 
        case XFS_IOC_BULKSTAT:
-               return xfs_ioc_bulkstat(mp, cmd, arg);
+               return xfs_ioc_bulkstat(filp, cmd, arg);
        case XFS_IOC_INUMBERS:
                return xfs_ioc_inumbers(mp, cmd, arg);
 
@@ -2135,7 +2142,7 @@ xfs_file_ioctl(
        case XFS_IOC_FSGETXATTRA:
                return xfs_ioc_fsgetxattr(ip, 1, arg);
        case XFS_IOC_FSSETXATTR:
-               return xfs_ioc_fssetxattr(ip, filp, arg);
+               return xfs_ioc_fssetxattr(filp, arg);
        case XFS_IOC_GETXFLAGS:
                return xfs_ioc_getxflags(ip, arg);
        case XFS_IOC_SETXFLAGS:
index c1771e7..33c09ec 100644 (file)
@@ -209,14 +209,16 @@ xfs_fsbulkstat_one_fmt_compat(
 /* copied from xfs_ioctl.c */
 STATIC int
 xfs_compat_ioc_fsbulkstat(
-       xfs_mount_t               *mp,
+       struct file             *file,
        unsigned int              cmd,
        struct compat_xfs_fsop_bulkreq __user *p32)
 {
+       struct xfs_mount        *mp = XFS_I(file_inode(file))->i_mount;
        u32                     addr;
        struct xfs_fsop_bulkreq bulkreq;
        struct xfs_ibulk        breq = {
                .mp             = mp,
+               .mnt_userns     = file_mnt_user_ns(file),
                .ocount         = 0,
        };
        xfs_ino_t               lastino;
@@ -436,7 +438,6 @@ xfs_file_compat_ioctl(
 {
        struct inode            *inode = file_inode(filp);
        struct xfs_inode        *ip = XFS_I(inode);
-       struct xfs_mount        *mp = ip->i_mount;
        void                    __user *arg = compat_ptr(p);
        int                     error;
 
@@ -456,7 +457,7 @@ xfs_file_compat_ioctl(
                return xfs_ioc_space(filp, &bf);
        }
        case XFS_IOC_FSGEOMETRY_V1_32:
-               return xfs_compat_ioc_fsgeometry_v1(mp, arg);
+               return xfs_compat_ioc_fsgeometry_v1(ip->i_mount, arg);
        case XFS_IOC_FSGROWFSDATA_32: {
                struct xfs_growfs_data  in;
 
@@ -465,7 +466,7 @@ xfs_file_compat_ioctl(
                error = mnt_want_write_file(filp);
                if (error)
                        return error;
-               error = xfs_growfs_data(mp, &in);
+               error = xfs_growfs_data(ip->i_mount, &in);
                mnt_drop_write_file(filp);
                return error;
        }
@@ -477,7 +478,7 @@ xfs_file_compat_ioctl(
                error = mnt_want_write_file(filp);
                if (error)
                        return error;
-               error = xfs_growfs_rt(mp, &in);
+               error = xfs_growfs_rt(ip->i_mount, &in);
                mnt_drop_write_file(filp);
                return error;
        }
@@ -507,7 +508,7 @@ xfs_file_compat_ioctl(
        case XFS_IOC_FSBULKSTAT_32:
        case XFS_IOC_FSBULKSTAT_SINGLE_32:
        case XFS_IOC_FSINUMBERS_32:
-               return xfs_compat_ioc_fsbulkstat(mp, cmd, arg);
+               return xfs_compat_ioc_fsbulkstat(filp, cmd, arg);
        case XFS_IOC_FD_TO_HANDLE_32:
        case XFS_IOC_PATH_TO_HANDLE_32:
        case XFS_IOC_PATH_TO_FSHANDLE_32: {
index 0036950..66ebccb 100644 (file)
@@ -128,6 +128,7 @@ xfs_cleanup_inode(
 
 STATIC int
 xfs_generic_create(
+       struct user_namespace   *mnt_userns,
        struct inode    *dir,
        struct dentry   *dentry,
        umode_t         mode,
@@ -161,9 +162,10 @@ xfs_generic_create(
                goto out_free_acl;
 
        if (!tmpfile) {
-               error = xfs_create(XFS_I(dir), &name, mode, rdev, &ip);
+               error = xfs_create(mnt_userns, XFS_I(dir), &name, mode, rdev,
+                                  &ip);
        } else {
-               error = xfs_create_tmpfile(XFS_I(dir), mode, &ip);
+               error = xfs_create_tmpfile(mnt_userns, XFS_I(dir), mode, &ip);
        }
        if (unlikely(error))
                goto out_free_acl;
@@ -220,31 +222,35 @@ xfs_generic_create(
 
 STATIC int
 xfs_vn_mknod(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       umode_t         mode,
-       dev_t           rdev)
+       struct user_namespace   *mnt_userns,
+       struct inode            *dir,
+       struct dentry           *dentry,
+       umode_t                 mode,
+       dev_t                   rdev)
 {
-       return xfs_generic_create(dir, dentry, mode, rdev, false);
+       return xfs_generic_create(mnt_userns, dir, dentry, mode, rdev, false);
 }
 
 STATIC int
 xfs_vn_create(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       umode_t         mode,
-       bool            flags)
+       struct user_namespace   *mnt_userns,
+       struct inode            *dir,
+       struct dentry           *dentry,
+       umode_t                 mode,
+       bool                    flags)
 {
-       return xfs_generic_create(dir, dentry, mode, 0, false);
+       return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, false);
 }
 
 STATIC int
 xfs_vn_mkdir(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       umode_t         mode)
+       struct user_namespace   *mnt_userns,
+       struct inode            *dir,
+       struct dentry           *dentry,
+       umode_t                 mode)
 {
-       return xfs_generic_create(dir, dentry, mode | S_IFDIR, 0, false);
+       return xfs_generic_create(mnt_userns, dir, dentry, mode | S_IFDIR, 0,
+                                 false);
 }
 
 STATIC struct dentry *
@@ -361,9 +367,10 @@ xfs_vn_unlink(
 
 STATIC int
 xfs_vn_symlink(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       const char      *symname)
+       struct user_namespace   *mnt_userns,
+       struct inode            *dir,
+       struct dentry           *dentry,
+       const char              *symname)
 {
        struct inode    *inode;
        struct xfs_inode *cip = NULL;
@@ -377,7 +384,7 @@ xfs_vn_symlink(
        if (unlikely(error))
                goto out;
 
-       error = xfs_symlink(XFS_I(dir), &name, symname, mode, &cip);
+       error = xfs_symlink(mnt_userns, XFS_I(dir), &name, symname, mode, &cip);
        if (unlikely(error))
                goto out;
 
@@ -403,11 +410,12 @@ xfs_vn_symlink(
 
 STATIC int
 xfs_vn_rename(
-       struct inode    *odir,
-       struct dentry   *odentry,
-       struct inode    *ndir,
-       struct dentry   *ndentry,
-       unsigned int    flags)
+       struct user_namespace   *mnt_userns,
+       struct inode            *odir,
+       struct dentry           *odentry,
+       struct inode            *ndir,
+       struct dentry           *ndentry,
+       unsigned int            flags)
 {
        struct inode    *new_inode = d_inode(ndentry);
        int             omode = 0;
@@ -431,8 +439,8 @@ xfs_vn_rename(
        if (unlikely(error))
                return error;
 
-       return xfs_rename(XFS_I(odir), &oname, XFS_I(d_inode(odentry)),
-                         XFS_I(ndir), &nname,
+       return xfs_rename(mnt_userns, XFS_I(odir), &oname,
+                         XFS_I(d_inode(odentry)), XFS_I(ndir), &nname,
                          new_inode ? XFS_I(new_inode) : NULL, flags);
 }
 
@@ -529,6 +537,7 @@ xfs_stat_blksize(
 
 STATIC int
 xfs_vn_getattr(
+       struct user_namespace   *mnt_userns,
        const struct path       *path,
        struct kstat            *stat,
        u32                     request_mask,
@@ -547,8 +556,8 @@ xfs_vn_getattr(
        stat->dev = inode->i_sb->s_dev;
        stat->mode = inode->i_mode;
        stat->nlink = inode->i_nlink;
-       stat->uid = inode->i_uid;
-       stat->gid = inode->i_gid;
+       stat->uid = i_uid_into_mnt(mnt_userns, inode);
+       stat->gid = i_gid_into_mnt(mnt_userns, inode);
        stat->ino = ip->i_ino;
        stat->atime = inode->i_atime;
        stat->mtime = inode->i_mtime;
@@ -626,8 +635,9 @@ xfs_setattr_time(
 
 static int
 xfs_vn_change_ok(
-       struct dentry   *dentry,
-       struct iattr    *iattr)
+       struct user_namespace   *mnt_userns,
+       struct dentry           *dentry,
+       struct iattr            *iattr)
 {
        struct xfs_mount        *mp = XFS_I(d_inode(dentry))->i_mount;
 
@@ -637,7 +647,7 @@ xfs_vn_change_ok(
        if (XFS_FORCED_SHUTDOWN(mp))
                return -EIO;
 
-       return setattr_prepare(dentry, iattr);
+       return setattr_prepare(mnt_userns, dentry, iattr);
 }
 
 /*
@@ -648,6 +658,7 @@ xfs_vn_change_ok(
  */
 static int
 xfs_setattr_nonsize(
+       struct user_namespace   *mnt_userns,
        struct xfs_inode        *ip,
        struct iattr            *iattr)
 {
@@ -788,7 +799,7 @@ xfs_setattr_nonsize(
         *           Posix ACL code seems to care about this issue either.
         */
        if (mask & ATTR_MODE) {
-               error = posix_acl_chmod(inode, inode->i_mode);
+               error = posix_acl_chmod(mnt_userns, inode, inode->i_mode);
                if (error)
                        return error;
        }
@@ -809,6 +820,7 @@ out_dqrele:
  */
 STATIC int
 xfs_setattr_size(
+       struct user_namespace   *mnt_userns,
        struct xfs_inode        *ip,
        struct iattr            *iattr)
 {
@@ -840,7 +852,7 @@ xfs_setattr_size(
                 * Use the regular setattr path to update the timestamps.
                 */
                iattr->ia_valid &= ~ATTR_SIZE;
-               return xfs_setattr_nonsize(ip, iattr);
+               return xfs_setattr_nonsize(mnt_userns, ip, iattr);
        }
 
        /*
@@ -1009,6 +1021,7 @@ out_trans_cancel:
 
 int
 xfs_vn_setattr_size(
+       struct user_namespace   *mnt_userns,
        struct dentry           *dentry,
        struct iattr            *iattr)
 {
@@ -1017,14 +1030,15 @@ xfs_vn_setattr_size(
 
        trace_xfs_setattr(ip);
 
-       error = xfs_vn_change_ok(dentry, iattr);
+       error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
        if (error)
                return error;
-       return xfs_setattr_size(ip, iattr);
+       return xfs_setattr_size(mnt_userns, ip, iattr);
 }
 
 STATIC int
 xfs_vn_setattr(
+       struct user_namespace   *mnt_userns,
        struct dentry           *dentry,
        struct iattr            *iattr)
 {
@@ -1044,14 +1058,14 @@ xfs_vn_setattr(
                        return error;
                }
 
-               error = xfs_vn_setattr_size(dentry, iattr);
+               error = xfs_vn_setattr_size(mnt_userns, dentry, iattr);
                xfs_iunlock(ip, XFS_MMAPLOCK_EXCL);
        } else {
                trace_xfs_setattr(ip);
 
-               error = xfs_vn_change_ok(dentry, iattr);
+               error = xfs_vn_change_ok(mnt_userns, dentry, iattr);
                if (!error)
-                       error = xfs_setattr_nonsize(ip, iattr);
+                       error = xfs_setattr_nonsize(mnt_userns, ip, iattr);
        }
 
        return error;
@@ -1122,11 +1136,12 @@ xfs_vn_fiemap(
 
 STATIC int
 xfs_vn_tmpfile(
-       struct inode    *dir,
-       struct dentry   *dentry,
-       umode_t         mode)
+       struct user_namespace   *mnt_userns,
+       struct inode            *dir,
+       struct dentry           *dentry,
+       umode_t                 mode)
 {
-       return xfs_generic_create(dir, dentry, mode, 0, true);
+       return xfs_generic_create(mnt_userns, dir, dentry, mode, 0, true);
 }
 
 static const struct inode_operations xfs_inode_operations = {
index 99ca745..2789490 100644 (file)
@@ -14,6 +14,7 @@ extern const struct file_operations xfs_dir_file_operations;
 extern ssize_t xfs_vn_listxattr(struct dentry *, char *data, size_t size);
 
 extern void xfs_setattr_time(struct xfs_inode *ip, struct iattr *iattr);
-extern int xfs_vn_setattr_size(struct dentry *dentry, struct iattr *vap);
+int xfs_vn_setattr_size(struct user_namespace *mnt_userns,
+               struct dentry *dentry, struct iattr *vap);
 
 #endif /* __XFS_IOPS_H__ */
index 16ca97a..3498b97 100644 (file)
@@ -54,10 +54,12 @@ struct xfs_bstat_chunk {
 STATIC int
 xfs_bulkstat_one_int(
        struct xfs_mount        *mp,
+       struct user_namespace   *mnt_userns,
        struct xfs_trans        *tp,
        xfs_ino_t               ino,
        struct xfs_bstat_chunk  *bc)
 {
+       struct user_namespace   *sb_userns = mp->m_super->s_user_ns;
        struct xfs_icdinode     *dic;           /* dinode core info pointer */
        struct xfs_inode        *ip;            /* incore inode pointer */
        struct inode            *inode;
@@ -86,8 +88,8 @@ xfs_bulkstat_one_int(
         */
        buf->bs_projectid = ip->i_d.di_projid;
        buf->bs_ino = ino;
-       buf->bs_uid = i_uid_read(inode);
-       buf->bs_gid = i_gid_read(inode);
+       buf->bs_uid = from_kuid(sb_userns, i_uid_into_mnt(mnt_userns, inode));
+       buf->bs_gid = from_kgid(sb_userns, i_gid_into_mnt(mnt_userns, inode));
        buf->bs_size = dic->di_size;
 
        buf->bs_nlink = inode->i_nlink;
@@ -166,6 +168,12 @@ xfs_bulkstat_one(
        };
        int                     error;
 
+       if (breq->mnt_userns != &init_user_ns) {
+               xfs_warn_ratelimited(breq->mp,
+                       "bulkstat not supported inside of idmapped mounts.");
+               return -EINVAL;
+       }
+
        ASSERT(breq->icount == 1);
 
        bc.buf = kmem_zalloc(sizeof(struct xfs_bulkstat),
@@ -173,7 +181,8 @@ xfs_bulkstat_one(
        if (!bc.buf)
                return -ENOMEM;
 
-       error = xfs_bulkstat_one_int(breq->mp, NULL, breq->startino, &bc);
+       error = xfs_bulkstat_one_int(breq->mp, breq->mnt_userns, NULL,
+                                    breq->startino, &bc);
 
        kmem_free(bc.buf);
 
@@ -194,9 +203,10 @@ xfs_bulkstat_iwalk(
        xfs_ino_t               ino,
        void                    *data)
 {
+       struct xfs_bstat_chunk  *bc = data;
        int                     error;
 
-       error = xfs_bulkstat_one_int(mp, tp, ino, data);
+       error = xfs_bulkstat_one_int(mp, bc->breq->mnt_userns, tp, ino, data);
        /* bulkstat just skips over missing inodes */
        if (error == -ENOENT || error == -EINVAL)
                return 0;
@@ -239,6 +249,11 @@ xfs_bulkstat(
        };
        int                     error;
 
+       if (breq->mnt_userns != &init_user_ns) {
+               xfs_warn_ratelimited(breq->mp,
+                       "bulkstat not supported inside of idmapped mounts.");
+               return -EINVAL;
+       }
        if (xfs_bulkstat_already_done(breq->mp, breq->startino))
                return 0;
 
index 96a1e2a..7078d10 100644 (file)
@@ -8,6 +8,7 @@
 /* In-memory representation of a userspace request for batch inode data. */
 struct xfs_ibulk {
        struct xfs_mount        *mp;
+       struct user_namespace   *mnt_userns;
        void __user             *ubuffer; /* user output buffer */
        xfs_ino_t               startino; /* start with this inode */
        unsigned int            icount;   /* number of elements in ubuffer */
index 52370d0..1c97b15 100644 (file)
@@ -635,6 +635,47 @@ xfs_check_summary_counts(
 }
 
 /*
+ * Flush and reclaim dirty inodes in preparation for unmount. Inodes and
+ * internal inode structures can be sitting in the CIL and AIL at this point,
+ * so we need to unpin them, write them back and/or reclaim them before unmount
+ * can proceed.
+ *
+ * An inode cluster that has been freed can have its buffer still pinned in
+ * memory because the transaction is still sitting in a iclog. The stale inodes
+ * on that buffer will be pinned to the buffer until the transaction hits the
+ * disk and the callbacks run. Pushing the AIL will skip the stale inodes and
+ * may never see the pinned buffer, so nothing will push out the iclog and
+ * unpin the buffer.
+ *
+ * Hence we need to force the log to unpin everything first. However, log
+ * forces don't wait for the discards they issue to complete, so we have to
+ * explicitly wait for them to complete here as well.
+ *
+ * Then we can tell the world we are unmounting so that error handling knows
+ * that the filesystem is going away and we should error out anything that we
+ * have been retrying in the background.  This will prevent never-ending
+ * retries in AIL pushing from hanging the unmount.
+ *
+ * Finally, we can push the AIL to clean all the remaining dirty objects, then
+ * reclaim the remaining inodes that are still in memory at this point in time.
+ */
+static void
+xfs_unmount_flush_inodes(
+       struct xfs_mount        *mp)
+{
+       xfs_log_force(mp, XFS_LOG_SYNC);
+       xfs_extent_busy_wait_all(mp);
+       flush_workqueue(xfs_discard_wq);
+
+       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
+
+       xfs_ail_push_all_sync(mp->m_ail);
+       cancel_delayed_work_sync(&mp->m_reclaim_work);
+       xfs_reclaim_inodes(mp);
+       xfs_health_unmount(mp);
+}
+
+/*
  * This function does the following on an initial mount of a file system:
  *     - reads the superblock from disk and init the mount struct
  *     - if we're a 32-bit kernel, do a size check on the superblock
@@ -1008,7 +1049,7 @@ xfs_mountfs(
        /* Clean out dquots that might be in memory after quotacheck. */
        xfs_qm_unmount(mp);
        /*
-        * Cancel all delayed reclaim work and reclaim the inodes directly.
+        * Flush all inode reclamation work and flush the log.
         * We have to do this /after/ rtunmount and qm_unmount because those
         * two will have scheduled delayed reclaim for the rt/quota inodes.
         *
@@ -1018,11 +1059,8 @@ xfs_mountfs(
         * qm_unmount_quotas and therefore rely on qm_unmount to release the
         * quota inodes.
         */
-       cancel_delayed_work_sync(&mp->m_reclaim_work);
-       xfs_reclaim_inodes(mp);
-       xfs_health_unmount(mp);
+       xfs_unmount_flush_inodes(mp);
  out_log_dealloc:
-       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
        xfs_log_mount_cancel(mp);
  out_fail_wait:
        if (mp->m_logdev_targp && mp->m_logdev_targp != mp->m_ddev_targp)
@@ -1063,47 +1101,7 @@ xfs_unmountfs(
        xfs_rtunmount_inodes(mp);
        xfs_irele(mp->m_rootip);
 
-       /*
-        * We can potentially deadlock here if we have an inode cluster
-        * that has been freed has its buffer still pinned in memory because
-        * the transaction is still sitting in a iclog. The stale inodes
-        * on that buffer will be pinned to the buffer until the
-        * transaction hits the disk and the callbacks run. Pushing the AIL will
-        * skip the stale inodes and may never see the pinned buffer, so
-        * nothing will push out the iclog and unpin the buffer. Hence we
-        * need to force the log here to ensure all items are flushed into the
-        * AIL before we go any further.
-        */
-       xfs_log_force(mp, XFS_LOG_SYNC);
-
-       /*
-        * Wait for all busy extents to be freed, including completion of
-        * any discard operation.
-        */
-       xfs_extent_busy_wait_all(mp);
-       flush_workqueue(xfs_discard_wq);
-
-       /*
-        * We now need to tell the world we are unmounting. This will allow
-        * us to detect that the filesystem is going away and we should error
-        * out anything that we have been retrying in the background. This will
-        * prevent neverending retries in AIL pushing from hanging the unmount.
-        */
-       mp->m_flags |= XFS_MOUNT_UNMOUNTING;
-
-       /*
-        * Flush all pending changes from the AIL.
-        */
-       xfs_ail_push_all_sync(mp->m_ail);
-
-       /*
-        * Reclaim all inodes. At this point there should be no dirty inodes and
-        * none should be pinned or locked. Stop background inode reclaim here
-        * if it is still running.
-        */
-       cancel_delayed_work_sync(&mp->m_reclaim_work);
-       xfs_reclaim_inodes(mp);
-       xfs_health_unmount(mp);
+       xfs_unmount_flush_inodes(mp);
 
        xfs_qm_unmount(mp);
 
index 742d141..bfa4164 100644 (file)
@@ -787,7 +787,8 @@ xfs_qm_qino_alloc(
                return error;
 
        if (need_alloc) {
-               error = xfs_dir_ialloc(&tp, NULL, S_IFREG, 1, 0, 0, ipp);
+               error = xfs_dir_ialloc(&init_user_ns, &tp, NULL, S_IFREG, 1, 0,
+                                      0, ipp);
                if (error) {
                        xfs_trans_cancel(tp);
                        return error;
index 586d423..e5e0713 100644 (file)
@@ -1881,7 +1881,7 @@ static struct file_system_type xfs_fs_type = {
        .init_fs_context        = xfs_init_fs_context,
        .parameters             = xfs_fs_parameters,
        .kill_sb                = kill_block_super,
-       .fs_flags               = FS_REQUIRES_DEV,
+       .fs_flags               = FS_REQUIRES_DEV | FS_ALLOW_IDMAP,
 };
 MODULE_ALIAS_FS("xfs");
 
index 8565663..7f368b1 100644 (file)
@@ -134,6 +134,7 @@ xfs_readlink(
 
 int
 xfs_symlink(
+       struct user_namespace   *mnt_userns,
        struct xfs_inode        *dp,
        struct xfs_name         *link_name,
        const char              *target_path,
@@ -181,7 +182,8 @@ xfs_symlink(
        /*
         * Make sure that we have allocated dquot(s) on disk.
         */
-       error = xfs_qm_vop_dqalloc(dp, current_fsuid(), current_fsgid(), prid,
+       error = xfs_qm_vop_dqalloc(dp, fsuid_into_mnt(mnt_userns),
+                       fsgid_into_mnt(mnt_userns), prid,
                        XFS_QMOPT_QUOTALL | XFS_QMOPT_INHERIT,
                        &udqp, &gdqp, &pdqp);
        if (error)
@@ -221,8 +223,8 @@ xfs_symlink(
        /*
         * Allocate an inode for the symlink.
         */
-       error = xfs_dir_ialloc(&tp, dp, S_IFLNK | (mode & ~S_IFMT), 1, 0,
-                              prid, &ip);
+       error = xfs_dir_ialloc(mnt_userns, &tp, dp, S_IFLNK | (mode & ~S_IFMT),
+                              1, 0, prid, &ip);
        if (error)
                goto out_trans_cancel;
 
index b1fa091..2586b7e 100644 (file)
@@ -7,8 +7,9 @@
 
 /* Kernel only symlink definitions */
 
-int xfs_symlink(struct xfs_inode *dp, struct xfs_name *link_name,
-               const char *target_path, umode_t mode, struct xfs_inode **ipp);
+int xfs_symlink(struct user_namespace *mnt_userns, struct xfs_inode *dp,
+               struct xfs_name *link_name, const char *target_path,
+               umode_t mode, struct xfs_inode **ipp);
 int xfs_readlink_bmap_ilocked(struct xfs_inode *ip, char *link);
 int xfs_readlink(struct xfs_inode *ip, char *link);
 int xfs_inactive_symlink(struct xfs_inode *ip);
index 145e06c..546a6cd 100644 (file)
@@ -51,7 +51,7 @@ xfs_panic_mask_proc_handler(
 #endif /* CONFIG_PROC_FS */
 
 STATIC int
-xfs_deprecate_irix_sgid_inherit_proc_handler(
+xfs_deprecated_dointvec_minmax(
        struct ctl_table        *ctl,
        int                     write,
        void                    *buffer,
@@ -59,24 +59,8 @@ xfs_deprecate_irix_sgid_inherit_proc_handler(
        loff_t                  *ppos)
 {
        if (write) {
-               printk_once(KERN_WARNING
-                               "XFS: " "%s sysctl option is deprecated.\n",
-                               ctl->procname);
-       }
-       return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
-}
-
-STATIC int
-xfs_deprecate_irix_symlink_mode_proc_handler(
-       struct ctl_table        *ctl,
-       int                     write,
-       void                    *buffer,
-       size_t                  *lenp,
-       loff_t                  *ppos)
-{
-       if (write) {
-               printk_once(KERN_WARNING
-                               "XFS: " "%s sysctl option is deprecated.\n",
+               printk_ratelimited(KERN_WARNING
+                               "XFS: %s sysctl option is deprecated.\n",
                                ctl->procname);
        }
        return proc_dointvec_minmax(ctl, write, buffer, lenp, ppos);
@@ -88,7 +72,7 @@ static struct ctl_table xfs_table[] = {
                .data           = &xfs_params.sgid_inherit.val,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = xfs_deprecate_irix_sgid_inherit_proc_handler,
+               .proc_handler   = xfs_deprecated_dointvec_minmax,
                .extra1         = &xfs_params.sgid_inherit.min,
                .extra2         = &xfs_params.sgid_inherit.max
        },
@@ -97,7 +81,7 @@ static struct ctl_table xfs_table[] = {
                .data           = &xfs_params.symlink_mode.val,
                .maxlen         = sizeof(int),
                .mode           = 0644,
-               .proc_handler   = xfs_deprecate_irix_symlink_mode_proc_handler,
+               .proc_handler   = xfs_deprecated_dointvec_minmax,
                .extra1         = &xfs_params.symlink_mode.min,
                .extra2         = &xfs_params.symlink_mode.max
        },
@@ -201,6 +185,15 @@ static struct ctl_table xfs_table[] = {
                .extra1         = &xfs_params.blockgc_timer.min,
                .extra2         = &xfs_params.blockgc_timer.max,
        },
+       {
+               .procname       = "speculative_cow_prealloc_lifetime",
+               .data           = &xfs_params.blockgc_timer.val,
+               .maxlen         = sizeof(int),
+               .mode           = 0644,
+               .proc_handler   = xfs_deprecated_dointvec_minmax,
+               .extra1         = &xfs_params.blockgc_timer.min,
+               .extra2         = &xfs_params.blockgc_timer.max,
+       },
        /* please keep this the last entry */
 #ifdef CONFIG_PROC_FS
        {
index 44f72c0..b22a09e 100644 (file)
@@ -72,6 +72,7 @@ xfs_trans_free(
        xfs_extent_busy_clear(tp->t_mountp, &tp->t_busy, false);
 
        trace_xfs_trans_free(tp, _RET_IP_);
+       xfs_trans_clear_context(tp);
        if (!(tp->t_flags & XFS_TRANS_NO_WRITECOUNT))
                sb_end_intwrite(tp->t_mountp->m_super);
        xfs_trans_free_dqinfo(tp);
@@ -123,7 +124,8 @@ xfs_trans_dup(
 
        ntp->t_rtx_res = tp->t_rtx_res - tp->t_rtx_res_used;
        tp->t_rtx_res = tp->t_rtx_res_used;
-       ntp->t_pflags = tp->t_pflags;
+
+       xfs_trans_switch_context(tp, ntp);
 
        /* move deferred ops over to the new tp */
        xfs_defer_move(ntp, tp);
@@ -157,9 +159,6 @@ xfs_trans_reserve(
        int                     error = 0;
        bool                    rsvd = (tp->t_flags & XFS_TRANS_RESERVE) != 0;
 
-       /* Mark this thread as being in a transaction */
-       current_set_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
        /*
         * Attempt to reserve the needed disk blocks by decrementing
         * the number needed from the number available.  This will
@@ -167,10 +166,8 @@ xfs_trans_reserve(
         */
        if (blocks > 0) {
                error = xfs_mod_fdblocks(mp, -((int64_t)blocks), rsvd);
-               if (error != 0) {
-                       current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
+               if (error != 0)
                        return -ENOSPC;
-               }
                tp->t_blk_res += blocks;
        }
 
@@ -244,9 +241,6 @@ undo_blocks:
                xfs_mod_fdblocks(mp, (int64_t)blocks, rsvd);
                tp->t_blk_res = 0;
        }
-
-       current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
        return error;
 }
 
@@ -260,6 +254,7 @@ xfs_trans_alloc(
        struct xfs_trans        **tpp)
 {
        struct xfs_trans        *tp;
+       bool                    want_retry = true;
        int                     error;
 
        /*
@@ -267,9 +262,11 @@ xfs_trans_alloc(
         * GFP_NOFS allocation context so that we avoid lockdep false positives
         * by doing GFP_KERNEL allocations inside sb_start_intwrite().
         */
+retry:
        tp = kmem_cache_zalloc(xfs_trans_zone, GFP_KERNEL | __GFP_NOFAIL);
        if (!(flags & XFS_TRANS_NO_WRITECOUNT))
                sb_start_intwrite(mp->m_super);
+       xfs_trans_set_context(tp);
 
        /*
         * Zero-reservation ("empty") transactions can't modify anything, so
@@ -289,7 +286,9 @@ xfs_trans_alloc(
        tp->t_firstblock = NULLFSBLOCK;
 
        error = xfs_trans_reserve(tp, resp, blocks, rtextents);
-       if (error == -ENOSPC) {
+       if (error == -ENOSPC && want_retry) {
+               xfs_trans_cancel(tp);
+
                /*
                 * We weren't able to reserve enough space for the transaction.
                 * Flush the other speculative space allocations to free space.
@@ -297,8 +296,11 @@ xfs_trans_alloc(
                 * other locks.
                 */
                error = xfs_blockgc_free_space(mp, NULL);
-               if (!error)
-                       error = xfs_trans_reserve(tp, resp, blocks, rtextents);
+               if (error)
+                       return error;
+
+               want_retry = false;
+               goto retry;
        }
        if (error) {
                xfs_trans_cancel(tp);
@@ -893,7 +895,6 @@ __xfs_trans_commit(
 
        xfs_log_commit_cil(mp, tp, &commit_lsn, regrant);
 
-       current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
        xfs_trans_free(tp);
 
        /*
@@ -925,7 +926,6 @@ out_unreserve:
                        xfs_log_ticket_ungrant(mp->m_log, tp->t_ticket);
                tp->t_ticket = NULL;
        }
-       current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
        xfs_trans_free_items(tp, !!error);
        xfs_trans_free(tp);
 
@@ -985,9 +985,6 @@ xfs_trans_cancel(
                tp->t_ticket = NULL;
        }
 
-       /* mark this thread as no longer being in a transaction */
-       current_restore_flags_nested(&tp->t_pflags, PF_MEMALLOC_NOFS);
-
        xfs_trans_free_items(tp, dirty);
        xfs_trans_free(tp);
 }
index 8b03fbf..9dd745c 100644 (file)
@@ -281,4 +281,34 @@ int xfs_trans_alloc_ichange(struct xfs_inode *ip, struct xfs_dquot *udqp,
                struct xfs_dquot *gdqp, struct xfs_dquot *pdqp, bool force,
                struct xfs_trans **tpp);
 
+static inline void
+xfs_trans_set_context(
+       struct xfs_trans        *tp)
+{
+       ASSERT(current->journal_info == NULL);
+       tp->t_pflags = memalloc_nofs_save();
+       current->journal_info = tp;
+}
+
+static inline void
+xfs_trans_clear_context(
+       struct xfs_trans        *tp)
+{
+       if (current->journal_info == tp) {
+               memalloc_nofs_restore(tp->t_pflags);
+               current->journal_info = NULL;
+       }
+}
+
+static inline void
+xfs_trans_switch_context(
+       struct xfs_trans        *old_tp,
+       struct xfs_trans        *new_tp)
+{
+       ASSERT(current->journal_info == old_tp);
+       new_tp->t_pflags = old_tp->t_pflags;
+       old_tp->t_pflags = 0;
+       current->journal_info = new_tp;
+}
+
 #endif /* __XFS_TRANS_H__ */
index bca48b3..12be32f 100644 (file)
@@ -38,9 +38,10 @@ xfs_xattr_get(const struct xattr_handler *handler, struct dentry *unused,
 }
 
 static int
-xfs_xattr_set(const struct xattr_handler *handler, struct dentry *unused,
-               struct inode *inode, const char *name, const void *value,
-               size_t size, int flags)
+xfs_xattr_set(const struct xattr_handler *handler,
+             struct user_namespace *mnt_userns, struct dentry *unused,
+             struct inode *inode, const char *name, const void *value,
+             size_t size, int flags)
 {
        struct xfs_da_args      args = {
                .dp             = XFS_I(inode),
index f311543..049e36c 100644 (file)
@@ -165,6 +165,21 @@ static int zonefs_writepages(struct address_space *mapping,
        return iomap_writepages(mapping, wbc, &wpc, &zonefs_writeback_ops);
 }
 
+static int zonefs_swap_activate(struct swap_info_struct *sis,
+                               struct file *swap_file, sector_t *span)
+{
+       struct inode *inode = file_inode(swap_file);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+
+       if (zi->i_ztype != ZONEFS_ZTYPE_CNV) {
+               zonefs_err(inode->i_sb,
+                          "swap file: not a conventional zone file\n");
+               return -EINVAL;
+       }
+
+       return iomap_swapfile_activate(sis, swap_file, span, &zonefs_iomap_ops);
+}
+
 static const struct address_space_operations zonefs_file_aops = {
        .readpage               = zonefs_readpage,
        .readahead              = zonefs_readahead,
@@ -177,6 +192,7 @@ static const struct address_space_operations zonefs_file_aops = {
        .is_partially_uptodate  = iomap_is_partially_uptodate,
        .error_remove_page      = generic_error_remove_page,
        .direct_IO              = noop_direct_IO,
+       .swap_activate          = zonefs_swap_activate,
 };
 
 static void zonefs_update_stats(struct inode *inode, loff_t new_isize)
@@ -489,7 +505,8 @@ unlock:
        return ret;
 }
 
-static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
+static int zonefs_inode_setattr(struct user_namespace *mnt_userns,
+                               struct dentry *dentry, struct iattr *iattr)
 {
        struct inode *inode = d_inode(dentry);
        int ret;
@@ -497,7 +514,7 @@ static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
        if (unlikely(IS_IMMUTABLE(inode)))
                return -EPERM;
 
-       ret = setattr_prepare(dentry, iattr);
+       ret = setattr_prepare(&init_user_ns, dentry, iattr);
        if (ret)
                return ret;
 
@@ -525,7 +542,7 @@ static int zonefs_inode_setattr(struct dentry *dentry, struct iattr *iattr)
                        return ret;
        }
 
-       setattr_copy(inode, iattr);
+       setattr_copy(&init_user_ns, inode, iattr);
 
        return 0;
 }
@@ -683,7 +700,7 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
        max = ALIGN_DOWN(max << SECTOR_SHIFT, inode->i_sb->s_blocksize);
        iov_iter_truncate(from, max);
 
-       nr_pages = iov_iter_npages(from, BIO_MAX_PAGES);
+       nr_pages = iov_iter_npages(from, BIO_MAX_VECS);
        if (!nr_pages)
                return 0;
 
@@ -727,6 +744,68 @@ out_release:
 }
 
 /*
+ * Do not exceed the LFS limits nor the file zone size. If pos is under the
+ * limit it becomes a short access. If it exceeds the limit, return -EFBIG.
+ */
+static loff_t zonefs_write_check_limits(struct file *file, loff_t pos,
+                                       loff_t count)
+{
+       struct inode *inode = file_inode(file);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       loff_t limit = rlimit(RLIMIT_FSIZE);
+       loff_t max_size = zi->i_max_size;
+
+       if (limit != RLIM_INFINITY) {
+               if (pos >= limit) {
+                       send_sig(SIGXFSZ, current, 0);
+                       return -EFBIG;
+               }
+               count = min(count, limit - pos);
+       }
+
+       if (!(file->f_flags & O_LARGEFILE))
+               max_size = min_t(loff_t, MAX_NON_LFS, max_size);
+
+       if (unlikely(pos >= max_size))
+               return -EFBIG;
+
+       return min(count, max_size - pos);
+}
+
+static ssize_t zonefs_write_checks(struct kiocb *iocb, struct iov_iter *from)
+{
+       struct file *file = iocb->ki_filp;
+       struct inode *inode = file_inode(file);
+       struct zonefs_inode_info *zi = ZONEFS_I(inode);
+       loff_t count;
+
+       if (IS_SWAPFILE(inode))
+               return -ETXTBSY;
+
+       if (!iov_iter_count(from))
+               return 0;
+
+       if ((iocb->ki_flags & IOCB_NOWAIT) && !(iocb->ki_flags & IOCB_DIRECT))
+               return -EINVAL;
+
+       if (iocb->ki_flags & IOCB_APPEND) {
+               if (zi->i_ztype != ZONEFS_ZTYPE_SEQ)
+                       return -EINVAL;
+               mutex_lock(&zi->i_truncate_mutex);
+               iocb->ki_pos = zi->i_wpoffset;
+               mutex_unlock(&zi->i_truncate_mutex);
+       }
+
+       count = zonefs_write_check_limits(file, iocb->ki_pos,
+                                         iov_iter_count(from));
+       if (count < 0)
+               return count;
+
+       iov_iter_truncate(from, count);
+       return iov_iter_count(from);
+}
+
+/*
  * Handle direct writes. For sequential zone files, this is the only possible
  * write path. For these files, check that the user is issuing writes
  * sequentially from the end of the file. This code assumes that the block layer
@@ -743,8 +822,7 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
        struct super_block *sb = inode->i_sb;
        bool sync = is_sync_kiocb(iocb);
        bool append = false;
-       size_t count;
-       ssize_t ret;
+       ssize_t ret, count;
 
        /*
         * For async direct IOs to sequential zone files, refuse IOCB_NOWAIT
@@ -762,12 +840,11 @@ static ssize_t zonefs_file_dio_write(struct kiocb *iocb, struct iov_iter *from)
                inode_lock(inode);
        }
 
-       ret = generic_write_checks(iocb, from);
-       if (ret <= 0)
+       count = zonefs_write_checks(iocb, from);
+       if (count <= 0) {
+               ret = count;
                goto inode_unlock;
-
-       iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
-       count = iov_iter_count(from);
+       }
 
        if ((iocb->ki_pos | count) & (sb->s_blocksize - 1)) {
                ret = -EINVAL;
@@ -827,12 +904,10 @@ static ssize_t zonefs_file_buffered_write(struct kiocb *iocb,
                inode_lock(inode);
        }
 
-       ret = generic_write_checks(iocb, from);
+       ret = zonefs_write_checks(iocb, from);
        if (ret <= 0)
                goto inode_unlock;
 
-       iov_iter_truncate(from, zi->i_max_size - iocb->ki_pos);
-
        ret = iomap_file_buffered_write(iocb, from, &zonefs_iomap_ops);
        if (ret > 0)
                iocb->ki_pos += ret;
@@ -965,9 +1040,7 @@ static int zonefs_open_zone(struct inode *inode)
 
        mutex_lock(&zi->i_truncate_mutex);
 
-       zi->i_wr_refcnt++;
-       if (zi->i_wr_refcnt == 1) {
-
+       if (!zi->i_wr_refcnt) {
                if (atomic_inc_return(&sbi->s_open_zones) > sbi->s_max_open_zones) {
                        atomic_dec(&sbi->s_open_zones);
                        ret = -EBUSY;
@@ -977,7 +1050,6 @@ static int zonefs_open_zone(struct inode *inode)
                if (i_size_read(inode) < zi->i_max_size) {
                        ret = zonefs_zone_mgmt(inode, REQ_OP_ZONE_OPEN);
                        if (ret) {
-                               zi->i_wr_refcnt--;
                                atomic_dec(&sbi->s_open_zones);
                                goto unlock;
                        }
@@ -985,6 +1057,8 @@ static int zonefs_open_zone(struct inode *inode)
                }
        }
 
+       zi->i_wr_refcnt++;
+
 unlock:
        mutex_unlock(&zi->i_truncate_mutex);
 
@@ -1233,7 +1307,7 @@ static void zonefs_init_dir_inode(struct inode *parent, struct inode *inode,
        struct super_block *sb = parent->i_sb;
 
        inode->i_ino = blkdev_nr_zones(sb->s_bdev->bd_disk) + type + 1;
-       inode_init_owner(inode, parent, S_IFDIR | 0555);
+       inode_init_owner(&init_user_ns, inode, parent, S_IFDIR | 0555);
        inode->i_op = &zonefs_dir_inode_operations;
        inode->i_fop = &simple_dir_operations;
        set_nlink(inode, 2);
index 02a716a..f28b097 100644 (file)
@@ -233,6 +233,7 @@ struct acpi_pnp_type {
 
 struct acpi_device_pnp {
        acpi_bus_id bus_id;             /* Object name */
+       int instance_no;                /* Instance number of this object */
        struct acpi_pnp_type type;      /* ID type */
        acpi_bus_address bus_address;   /* _ADR */
        char *unique_id;                /* _UID */
index 7282c0f..302506b 100644 (file)
@@ -50,6 +50,7 @@ mandatory-y += sections.h
 mandatory-y += serial.h
 mandatory-y += shmparam.h
 mandatory-y += simd.h
+mandatory-y += softirq_stack.h
 mandatory-y += switch_to.h
 mandatory-y += timex.h
 mandatory-y += tlbflush.h
diff --git a/include/asm-generic/numa.h b/include/asm-generic/numa.h
new file mode 100644 (file)
index 0000000..1a3ad6d
--- /dev/null
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __ASM_GENERIC_NUMA_H
+#define __ASM_GENERIC_NUMA_H
+
+#ifdef CONFIG_NUMA
+
+#define NR_NODE_MEMBLKS                (MAX_NUMNODES * 2)
+
+int __node_distance(int from, int to);
+#define node_distance(a, b) __node_distance(a, b)
+
+extern nodemask_t numa_nodes_parsed __initdata;
+
+extern bool numa_off;
+
+/* Mappings between node number and cpus on that node. */
+extern cpumask_var_t node_to_cpumask_map[MAX_NUMNODES];
+void numa_clear_node(unsigned int cpu);
+
+#ifdef CONFIG_DEBUG_PER_CPU_MAPS
+const struct cpumask *cpumask_of_node(int node);
+#else
+/* Returns a pointer to the cpumask of CPUs on Node 'node'. */
+static inline const struct cpumask *cpumask_of_node(int node)
+{
+       if (node == NUMA_NO_NODE)
+               return cpu_all_mask;
+
+       return node_to_cpumask_map[node];
+}
+#endif
+
+void __init arch_numa_init(void);
+int __init numa_add_memblk(int nodeid, u64 start, u64 end);
+void __init numa_set_distance(int from, int to, int distance);
+void __init numa_free_distance(void);
+void __init early_map_cpu_to_node(unsigned int cpu, int nid);
+void numa_store_cpu_info(unsigned int cpu);
+void numa_add_cpu(unsigned int cpu);
+void numa_remove_cpu(unsigned int cpu);
+
+#else  /* CONFIG_NUMA */
+
+static inline void numa_store_cpu_info(unsigned int cpu) { }
+static inline void numa_add_cpu(unsigned int cpu) { }
+static inline void numa_remove_cpu(unsigned int cpu) { }
+static inline void arch_numa_init(void) { }
+static inline void early_map_cpu_to_node(unsigned int cpu, int nid) { }
+
+#endif /* CONFIG_NUMA */
+
+#endif /* __ASM_GENERIC_NUMA_H */
diff --git a/include/asm-generic/softirq_stack.h b/include/asm-generic/softirq_stack.h
new file mode 100644 (file)
index 0000000..eceeecf
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef __ASM_GENERIC_SOFTIRQ_STACK_H
+#define __ASM_GENERIC_SOFTIRQ_STACK_H
+
+#ifdef CONFIG_HAVE_SOFTIRQ_ON_OWN_STACK
+void do_softirq_own_stack(void);
+#else
+static inline void do_softirq_own_stack(void)
+{
+       __do_softirq();
+}
+#endif
+
+#endif
index 01a3fd6..0331d5d 100644 (file)
  * .data. We don't want to pull in .data..other sections, which Linux
  * has defined. Same for text and bss.
  *
+ * With LTO_CLANG, the linker also splits sections by default, so we need
+ * these macros to combine the sections during the final link.
+ *
  * RODATA_MAIN is not used because existing code already defines .rodata.x
  * sections to be brought in with rodata.
  */
-#ifdef CONFIG_LD_DEAD_CODE_DATA_ELIMINATION
+#if defined(CONFIG_LD_DEAD_CODE_DATA_ELIMINATION) || defined(CONFIG_LTO_CLANG)
 #define TEXT_MAIN .text .text.[0-9a-zA-Z_]*
-#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$Lubsan_*
+#define DATA_MAIN .data .data.[0-9a-zA-Z_]* .data..L* .data..compoundliteral* .data.$__unnamed_* .data.$L*
 #define SDATA_MAIN .sdata .sdata.[0-9a-zA-Z_]*
 #define RODATA_MAIN .rodata .rodata.[0-9a-zA-Z_]* .rodata..L*
 #define BSS_MAIN .bss .bss.[0-9a-zA-Z_]* .bss..compoundliteral*
                __stop___ksymtab_gpl = .;                               \
        }                                                               \
                                                                        \
-       /* Kernel symbol table: Normal unused symbols */                \
-       __ksymtab_unused  : AT(ADDR(__ksymtab_unused) - LOAD_OFFSET) {  \
-               __start___ksymtab_unused = .;                           \
-               KEEP(*(SORT(___ksymtab_unused+*)))                      \
-               __stop___ksymtab_unused = .;                            \
-       }                                                               \
-                                                                       \
-       /* Kernel symbol table: GPL-only unused symbols */              \
-       __ksymtab_unused_gpl : AT(ADDR(__ksymtab_unused_gpl) - LOAD_OFFSET) { \
-               __start___ksymtab_unused_gpl = .;                       \
-               KEEP(*(SORT(___ksymtab_unused_gpl+*)))                  \
-               __stop___ksymtab_unused_gpl = .;                        \
-       }                                                               \
-                                                                       \
-       /* Kernel symbol table: GPL-future-only symbols */              \
-       __ksymtab_gpl_future : AT(ADDR(__ksymtab_gpl_future) - LOAD_OFFSET) { \
-               __start___ksymtab_gpl_future = .;                       \
-               KEEP(*(SORT(___ksymtab_gpl_future+*)))                  \
-               __stop___ksymtab_gpl_future = .;                        \
-       }                                                               \
-                                                                       \
        /* Kernel symbol table: Normal symbols */                       \
        __kcrctab         : AT(ADDR(__kcrctab) - LOAD_OFFSET) {         \
                __start___kcrctab = .;                                  \
                __stop___kcrctab_gpl = .;                               \
        }                                                               \
                                                                        \
-       /* Kernel symbol table: Normal unused symbols */                \
-       __kcrctab_unused  : AT(ADDR(__kcrctab_unused) - LOAD_OFFSET) {  \
-               __start___kcrctab_unused = .;                           \
-               KEEP(*(SORT(___kcrctab_unused+*)))                      \
-               __stop___kcrctab_unused = .;                            \
-       }                                                               \
-                                                                       \
-       /* Kernel symbol table: GPL-only unused symbols */              \
-       __kcrctab_unused_gpl : AT(ADDR(__kcrctab_unused_gpl) - LOAD_OFFSET) { \
-               __start___kcrctab_unused_gpl = .;                       \
-               KEEP(*(SORT(___kcrctab_unused_gpl+*)))                  \
-               __stop___kcrctab_unused_gpl = .;                        \
-       }                                                               \
-                                                                       \
-       /* Kernel symbol table: GPL-future-only symbols */              \
-       __kcrctab_gpl_future : AT(ADDR(__kcrctab_gpl_future) - LOAD_OFFSET) { \
-               __start___kcrctab_gpl_future = .;                       \
-               KEEP(*(SORT(___kcrctab_gpl_future+*)))                  \
-               __stop___kcrctab_gpl_future = .;                        \
-       }                                                               \
-                                                                       \
        /* Kernel symbol table: strings */                              \
         __ksymtab_strings : AT(ADDR(__ksymtab_strings) - LOAD_OFFSET) {        \
                *(__ksymtab_strings)                                    \
                /* DWARF 4 */                                           \
                .debug_types    0 : { *(.debug_types) }                 \
                /* DWARF 5 */                                           \
+               .debug_addr     0 : { *(.debug_addr) }                  \
+               .debug_line_str 0 : { *(.debug_line_str) }              \
+               .debug_loclists 0 : { *(.debug_loclists) }              \
                .debug_macro    0 : { *(.debug_macro) }                 \
-               .debug_addr     0 : { *(.debug_addr) }
+               .debug_names    0 : { *(.debug_names) }                 \
+               .debug_rnglists 0 : { *(.debug_rnglists) }              \
+               .debug_str_offsets      0 : { *(.debug_str_offsets) }
 
 /* Stabs debugging sections. */
 #define STABS_DEBUG                                                    \
 #endif
 
 /*
- * Clang's -fsanitize=kernel-address and -fsanitize=thread produce
- * unwanted sections (.eh_frame and .init_array.*), but
- * CONFIG_CONSTRUCTORS wants to keep any .init_array.* sections.
+ * Clang's -fprofile-arcs, -fsanitize=kernel-address, and
+ * -fsanitize=thread produce unwanted sections (.eh_frame
+ * and .init_array.*), but CONFIG_CONSTRUCTORS wants to
+ * keep any .init_array.* sections.
  * https://bugs.llvm.org/show_bug.cgi?id=46478
  */
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN)
+#if defined(CONFIG_GCOV_KERNEL) || defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KCSAN)
 # ifdef CONFIG_CONSTRUCTORS
 #  define SANITIZER_DISCARDS                                           \
        *(.eh_frame)
index 948c520..47accec 100644 (file)
@@ -12,7 +12,6 @@
 
 #include <linux/keyctl.h>
 #include <linux/oid_registry.h>
-#include <crypto/akcipher.h>
 
 /*
  * Cryptographic data for the public-key subtype of the asymmetric key type.
index 716990b..b81b3bf 100644 (file)
@@ -399,6 +399,9 @@ void drm_event_cancel_free(struct drm_device *dev,
                           struct drm_pending_event *p);
 void drm_send_event_locked(struct drm_device *dev, struct drm_pending_event *e);
 void drm_send_event(struct drm_device *dev, struct drm_pending_event *e);
+void drm_send_event_timestamp_locked(struct drm_device *dev,
+                                    struct drm_pending_event *e,
+                                    ktime_t timestamp);
 
 struct file *mock_drm_getfile(struct drm_minor *minor, unsigned int flags);
 
index e17be32..b8ca136 100644 (file)
@@ -612,9 +612,11 @@ static inline void ttm_bo_pin(struct ttm_buffer_object *bo)
 static inline void ttm_bo_unpin(struct ttm_buffer_object *bo)
 {
        dma_resv_assert_held(bo->base.resv);
-       WARN_ON_ONCE(!bo->pin_count);
        WARN_ON_ONCE(!kref_read(&bo->kref));
-       --bo->pin_count;
+       if (bo->pin_count)
+               --bo->pin_count;
+       else
+               WARN_ON_ONCE(true);
 }
 
 int ttm_mem_evict_first(struct ttm_bo_device *bdev,
index a48176a..b2de702 100644 (file)
@@ -9,7 +9,6 @@
 /*
  * Kendryte K210 SoC clock identifiers (arbitrary values).
  */
-#define K210_CLK_ACLK  0
 #define K210_CLK_CPU   0
 #define K210_CLK_SRAM0 1
 #define K210_CLK_SRAM1 2
diff --git a/include/dt-bindings/input/cros-ec-keyboard.h b/include/dt-bindings/input/cros-ec-keyboard.h
new file mode 100644 (file)
index 0000000..f0ae036
--- /dev/null
@@ -0,0 +1,103 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * This header provides the constants of the standard Chrome OS key matrix
+ * for cros-ec keyboard-controller bindings.
+ *
+ * Copyright (c) 2021 Google, Inc
+ */
+
+#ifndef _CROS_EC_KEYBOARD_H
+#define _CROS_EC_KEYBOARD_H
+
+#define CROS_STD_TOP_ROW_KEYMAP        \
+       MATRIX_KEY(0x00, 0x02, KEY_F1)  \
+       MATRIX_KEY(0x03, 0x02, KEY_F2)  \
+       MATRIX_KEY(0x02, 0x02, KEY_F3)  \
+       MATRIX_KEY(0x01, 0x02, KEY_F4)  \
+       MATRIX_KEY(0x03, 0x04, KEY_F5)  \
+       MATRIX_KEY(0x02, 0x04, KEY_F6)  \
+       MATRIX_KEY(0x01, 0x04, KEY_F7)  \
+       MATRIX_KEY(0x02, 0x09, KEY_F8)  \
+       MATRIX_KEY(0x01, 0x09, KEY_F9)  \
+       MATRIX_KEY(0x00, 0x04, KEY_F10)
+
+#define CROS_STD_MAIN_KEYMAP   \
+       MATRIX_KEY(0x00, 0x01, KEY_LEFTMETA)    \
+       MATRIX_KEY(0x00, 0x03, KEY_B)           \
+       MATRIX_KEY(0x00, 0x05, KEY_RO)          \
+       MATRIX_KEY(0x00, 0x06, KEY_N)           \
+       MATRIX_KEY(0x00, 0x08, KEY_EQUAL)       \
+       MATRIX_KEY(0x00, 0x0a, KEY_RIGHTALT)    \
+       MATRIX_KEY(0x01, 0x01, KEY_ESC)         \
+       MATRIX_KEY(0x01, 0x03, KEY_G)           \
+       MATRIX_KEY(0x01, 0x06, KEY_H)           \
+       MATRIX_KEY(0x01, 0x08, KEY_APOSTROPHE)  \
+       MATRIX_KEY(0x01, 0x0b, KEY_BACKSPACE)   \
+       MATRIX_KEY(0x01, 0x0c, KEY_HENKAN)      \
+                                               \
+       MATRIX_KEY(0x02, 0x00, KEY_LEFTCTRL)    \
+       MATRIX_KEY(0x02, 0x01, KEY_TAB)         \
+       MATRIX_KEY(0x02, 0x03, KEY_T)           \
+       MATRIX_KEY(0x02, 0x05, KEY_RIGHTBRACE)  \
+       MATRIX_KEY(0x02, 0x06, KEY_Y)           \
+       MATRIX_KEY(0x02, 0x07, KEY_102ND)       \
+       MATRIX_KEY(0x02, 0x08, KEY_LEFTBRACE)   \
+       MATRIX_KEY(0x02, 0x0a, KEY_YEN)         \
+                                               \
+       MATRIX_KEY(0x03, 0x00, KEY_LEFTMETA)    \
+       MATRIX_KEY(0x03, 0x01, KEY_GRAVE)       \
+       MATRIX_KEY(0x03, 0x03, KEY_5)           \
+       MATRIX_KEY(0x03, 0x06, KEY_6)           \
+       MATRIX_KEY(0x03, 0x08, KEY_MINUS)       \
+       MATRIX_KEY(0x03, 0x09, KEY_SLEEP)       \
+       MATRIX_KEY(0x03, 0x0b, KEY_BACKSLASH)   \
+       MATRIX_KEY(0x03, 0x0c, KEY_MUHENKAN)    \
+                                               \
+       MATRIX_KEY(0x04, 0x00, KEY_RIGHTCTRL)   \
+       MATRIX_KEY(0x04, 0x01, KEY_A)           \
+       MATRIX_KEY(0x04, 0x02, KEY_D)           \
+       MATRIX_KEY(0x04, 0x03, KEY_F)           \
+       MATRIX_KEY(0x04, 0x04, KEY_S)           \
+       MATRIX_KEY(0x04, 0x05, KEY_K)           \
+       MATRIX_KEY(0x04, 0x06, KEY_J)           \
+       MATRIX_KEY(0x04, 0x08, KEY_SEMICOLON)   \
+       MATRIX_KEY(0x04, 0x09, KEY_L)           \
+       MATRIX_KEY(0x04, 0x0a, KEY_BACKSLASH)   \
+       MATRIX_KEY(0x04, 0x0b, KEY_ENTER)       \
+                                               \
+       MATRIX_KEY(0x05, 0x01, KEY_Z)           \
+       MATRIX_KEY(0x05, 0x02, KEY_C)           \
+       MATRIX_KEY(0x05, 0x03, KEY_V)           \
+       MATRIX_KEY(0x05, 0x04, KEY_X)           \
+       MATRIX_KEY(0x05, 0x05, KEY_COMMA)       \
+       MATRIX_KEY(0x05, 0x06, KEY_M)           \
+       MATRIX_KEY(0x05, 0x07, KEY_LEFTSHIFT)   \
+       MATRIX_KEY(0x05, 0x08, KEY_SLASH)       \
+       MATRIX_KEY(0x05, 0x09, KEY_DOT)         \
+       MATRIX_KEY(0x05, 0x0b, KEY_SPACE)       \
+                                               \
+       MATRIX_KEY(0x06, 0x01, KEY_1)           \
+       MATRIX_KEY(0x06, 0x02, KEY_3)           \
+       MATRIX_KEY(0x06, 0x03, KEY_4)           \
+       MATRIX_KEY(0x06, 0x04, KEY_2)           \
+       MATRIX_KEY(0x06, 0x05, KEY_8)           \
+       MATRIX_KEY(0x06, 0x06, KEY_7)           \
+       MATRIX_KEY(0x06, 0x08, KEY_0)           \
+       MATRIX_KEY(0x06, 0x09, KEY_9)           \
+       MATRIX_KEY(0x06, 0x0a, KEY_LEFTALT)     \
+       MATRIX_KEY(0x06, 0x0b, KEY_DOWN)        \
+       MATRIX_KEY(0x06, 0x0c, KEY_RIGHT)       \
+                                               \
+       MATRIX_KEY(0x07, 0x01, KEY_Q)           \
+       MATRIX_KEY(0x07, 0x02, KEY_E)           \
+       MATRIX_KEY(0x07, 0x03, KEY_R)           \
+       MATRIX_KEY(0x07, 0x04, KEY_W)           \
+       MATRIX_KEY(0x07, 0x05, KEY_I)           \
+       MATRIX_KEY(0x07, 0x06, KEY_U)           \
+       MATRIX_KEY(0x07, 0x07, KEY_RIGHTSHIFT)  \
+       MATRIX_KEY(0x07, 0x08, KEY_P)           \
+       MATRIX_KEY(0x07, 0x09, KEY_O)           \
+       MATRIX_KEY(0x07, 0x0b, KEY_UP)          \
+       MATRIX_KEY(0x07, 0x0c, KEY_LEFT)
+
+#endif /* _CROS_EC_KEYBOARD_H */
diff --git a/include/dt-bindings/interconnect/qcom,msm8939.h b/include/dt-bindings/interconnect/qcom,msm8939.h
new file mode 100644 (file)
index 0000000..c22369a
--- /dev/null
@@ -0,0 +1,105 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Qualcomm interconnect IDs
+ *
+ * Copyright (c) 2020, Linaro Ltd.
+ * Author: Jun Nie <jun.nie@linaro.org>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_MSM8939_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_MSM8939_H
+
+#define BIMC_SNOC_SLV                  0
+#define MASTER_QDSS_BAM                        1
+#define MASTER_QDSS_ETR                        2
+#define MASTER_SNOC_CFG                        3
+#define PCNOC_SNOC_SLV                 4
+#define SLAVE_APSS                     5
+#define SLAVE_CATS_128                 6
+#define SLAVE_OCMEM_64                 7
+#define SLAVE_IMEM                     8
+#define SLAVE_QDSS_STM                 9
+#define SLAVE_SRVC_SNOC                        10
+#define SNOC_BIMC_0_MAS                        11
+#define SNOC_BIMC_1_MAS                        12
+#define SNOC_BIMC_2_MAS                        13
+#define SNOC_INT_0                     14
+#define SNOC_INT_1                     15
+#define SNOC_INT_BIMC                  16
+#define SNOC_PCNOC_MAS                 17
+#define SNOC_QDSS_INT                  18
+
+#define MASTER_VIDEO_P0                        0
+#define MASTER_JPEG                    1
+#define MASTER_VFE                     2
+#define MASTER_MDP_PORT0               3
+#define MASTER_MDP_PORT1               4
+#define MASTER_CPP                     5
+#define SNOC_MM_INT_0                  6
+#define SNOC_MM_INT_1                  7
+#define SNOC_MM_INT_2                  8
+
+#define BIMC_SNOC_MAS                  0
+#define MASTER_AMPSS_M0                        1
+#define MASTER_GRAPHICS_3D             2
+#define MASTER_TCU0                    3
+#define SLAVE_AMPSS_L2                 4
+#define SLAVE_EBI_CH0                  5
+#define SNOC_BIMC_0_SLV                        6
+#define SNOC_BIMC_1_SLV                        7
+#define SNOC_BIMC_2_SLV                        8
+
+#define MASTER_BLSP_1                  0
+#define MASTER_DEHR                    1
+#define MASTER_LPASS                   2
+#define MASTER_CRYPTO_CORE0            3
+#define MASTER_SDCC_1                  4
+#define MASTER_SDCC_2                  5
+#define MASTER_SPDM                    6
+#define MASTER_USB_HS1                 7
+#define MASTER_USB_HS2                 8
+#define PCNOC_INT_0                    9
+#define PCNOC_INT_1                    10
+#define PCNOC_MAS_0                    11
+#define PCNOC_MAS_1                    12
+#define PCNOC_SLV_0                    13
+#define PCNOC_SLV_1                    14
+#define PCNOC_SLV_2                    15
+#define PCNOC_SLV_3                    16
+#define PCNOC_SLV_4                    17
+#define PCNOC_SLV_8                    18
+#define PCNOC_SLV_9                    19
+#define PCNOC_SNOC_MAS                 20
+#define SLAVE_BIMC_CFG                 21
+#define SLAVE_BLSP_1                   22
+#define SLAVE_BOOT_ROM                 23
+#define SLAVE_CAMERA_CFG               24
+#define SLAVE_CLK_CTL                  25
+#define SLAVE_CRYPTO_0_CFG                     26
+#define SLAVE_DEHR_CFG                 27
+#define SLAVE_DISPLAY_CFG                      28
+#define SLAVE_GRAPHICS_3D_CFG                  29
+#define SLAVE_IMEM_CFG                 30
+#define SLAVE_LPASS                    31
+#define SLAVE_MPM                      32
+#define SLAVE_MSG_RAM                  33
+#define SLAVE_MSS                      34
+#define SLAVE_PDM                      35
+#define SLAVE_PMIC_ARB                 36
+#define SLAVE_PCNOC_CFG                        37
+#define SLAVE_PRNG                     38
+#define SLAVE_QDSS_CFG                 39
+#define SLAVE_RBCPR_CFG                        40
+#define SLAVE_SDCC_1                   41
+#define SLAVE_SDCC_2                   42
+#define SLAVE_SECURITY                 43
+#define SLAVE_SNOC_CFG                 44
+#define SLAVE_SPDM                     45
+#define SLAVE_TCSR                     46
+#define SLAVE_TLMM                     47
+#define SLAVE_USB_HS1                  48
+#define SLAVE_USB_HS2                  49
+#define SLAVE_VENUS_CFG                        50
+#define SNOC_PCNOC_SLV                 51
+
+#endif
diff --git a/include/dt-bindings/interconnect/qcom,sdx55.h b/include/dt-bindings/interconnect/qcom,sdx55.h
new file mode 100644 (file)
index 0000000..bfb6524
--- /dev/null
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Qualcomm SDX55 interconnect IDs
+ *
+ * Copyright (c) 2021, Linaro Ltd.
+ * Author: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
+ */
+
+#ifndef __DT_BINDINGS_INTERCONNECT_QCOM_SDX55_H
+#define __DT_BINDINGS_INTERCONNECT_QCOM_SDX55_H
+
+#define MASTER_LLCC                    0
+#define SLAVE_EBI_CH0                  1
+
+#define MASTER_TCU_0                   0
+#define MASTER_SNOC_GC_MEM_NOC         1
+#define MASTER_AMPSS_M0                        2
+#define SLAVE_LLCC                     3
+#define SLAVE_MEM_NOC_SNOC             4
+#define SLAVE_MEM_NOC_PCIE_SNOC                5
+
+#define MASTER_AUDIO                   0
+#define MASTER_BLSP_1                  1
+#define MASTER_QDSS_BAM                        2
+#define MASTER_QPIC                    3
+#define MASTER_SNOC_CFG                        4
+#define MASTER_SPMI_FETCHER            5
+#define MASTER_ANOC_SNOC               6
+#define MASTER_IPA                     7
+#define MASTER_MEM_NOC_SNOC            8
+#define MASTER_MEM_NOC_PCIE_SNOC       9
+#define MASTER_CRYPTO_CORE_0           10
+#define MASTER_EMAC                    11
+#define MASTER_IPA_PCIE                        12
+#define MASTER_PCIE                    13
+#define MASTER_QDSS_ETR                        14
+#define MASTER_SDCC_1                  15
+#define MASTER_USB3                    16
+#define SLAVE_AOP                      17
+#define SLAVE_AOSS                     18
+#define SLAVE_APPSS                    19
+#define SLAVE_AUDIO                    20
+#define SLAVE_BLSP_1                   21
+#define SLAVE_CLK_CTL                  22
+#define SLAVE_CRYPTO_0_CFG             23
+#define SLAVE_CNOC_DDRSS               24
+#define SLAVE_ECC_CFG                  25
+#define SLAVE_EMAC_CFG                 26
+#define SLAVE_IMEM_CFG                 27
+#define SLAVE_IPA_CFG                  28
+#define SLAVE_CNOC_MSS                 29
+#define SLAVE_PCIE_PARF                        30
+#define SLAVE_PDM                      31
+#define SLAVE_PRNG                     32
+#define SLAVE_QDSS_CFG                 33
+#define SLAVE_QPIC                     34
+#define SLAVE_SDCC_1                   35
+#define SLAVE_SNOC_CFG                 36
+#define SLAVE_SPMI_FETCHER             37
+#define SLAVE_SPMI_VGI_COEX            38
+#define SLAVE_TCSR                     39
+#define SLAVE_TLMM                     40
+#define SLAVE_USB3                     41
+#define SLAVE_USB3_PHY_CFG             42
+#define SLAVE_ANOC_SNOC                        43
+#define SLAVE_SNOC_MEM_NOC_GC          44
+#define SLAVE_OCIMEM                   45
+#define SLAVE_SERVICE_SNOC             46
+#define SLAVE_PCIE_0                   47
+#define SLAVE_QDSS_STM                 48
+#define SLAVE_TCU                      49
+
+#define MASTER_IPA_CORE                        0
+#define SLAVE_IPA_CORE                 1
+
+#endif
diff --git a/include/dt-bindings/pinctrl/k210-fpioa.h b/include/dt-bindings/pinctrl/k210-fpioa.h
new file mode 100644 (file)
index 0000000..314285e
--- /dev/null
@@ -0,0 +1,276 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2020 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef PINCTRL_K210_FPIOA_H
+#define PINCTRL_K210_FPIOA_H
+
+/*
+ * Full list of FPIOA functions from
+ * kendryte-standalone-sdk/lib/drivers/include/fpioa.h
+ */
+#define K210_PCF_MASK          GENMASK(7, 0)
+#define K210_PCF_JTAG_TCLK     0   /* JTAG Test Clock */
+#define K210_PCF_JTAG_TDI      1   /* JTAG Test Data In */
+#define K210_PCF_JTAG_TMS      2   /* JTAG Test Mode Select */
+#define K210_PCF_JTAG_TDO      3   /* JTAG Test Data Out */
+#define K210_PCF_SPI0_D0       4   /* SPI0 Data 0 */
+#define K210_PCF_SPI0_D1       5   /* SPI0 Data 1 */
+#define K210_PCF_SPI0_D2       6   /* SPI0 Data 2 */
+#define K210_PCF_SPI0_D3       7   /* SPI0 Data 3 */
+#define K210_PCF_SPI0_D4       8   /* SPI0 Data 4 */
+#define K210_PCF_SPI0_D5       9   /* SPI0 Data 5 */
+#define K210_PCF_SPI0_D6       10  /* SPI0 Data 6 */
+#define K210_PCF_SPI0_D7       11  /* SPI0 Data 7 */
+#define K210_PCF_SPI0_SS0      12  /* SPI0 Chip Select 0 */
+#define K210_PCF_SPI0_SS1      13  /* SPI0 Chip Select 1 */
+#define K210_PCF_SPI0_SS2      14  /* SPI0 Chip Select 2 */
+#define K210_PCF_SPI0_SS3      15  /* SPI0 Chip Select 3 */
+#define K210_PCF_SPI0_ARB      16  /* SPI0 Arbitration */
+#define K210_PCF_SPI0_SCLK     17  /* SPI0 Serial Clock */
+#define K210_PCF_UARTHS_RX     18  /* UART High speed Receiver */
+#define K210_PCF_UARTHS_TX     19  /* UART High speed Transmitter */
+#define K210_PCF_RESV6         20  /* Reserved function */
+#define K210_PCF_RESV7         21  /* Reserved function */
+#define K210_PCF_CLK_SPI1      22  /* Clock SPI1 */
+#define K210_PCF_CLK_I2C1      23  /* Clock I2C1 */
+#define K210_PCF_GPIOHS0       24  /* GPIO High speed 0 */
+#define K210_PCF_GPIOHS1       25  /* GPIO High speed 1 */
+#define K210_PCF_GPIOHS2       26  /* GPIO High speed 2 */
+#define K210_PCF_GPIOHS3       27  /* GPIO High speed 3 */
+#define K210_PCF_GPIOHS4       28  /* GPIO High speed 4 */
+#define K210_PCF_GPIOHS5       29  /* GPIO High speed 5 */
+#define K210_PCF_GPIOHS6       30  /* GPIO High speed 6 */
+#define K210_PCF_GPIOHS7       31  /* GPIO High speed 7 */
+#define K210_PCF_GPIOHS8       32  /* GPIO High speed 8 */
+#define K210_PCF_GPIOHS9       33  /* GPIO High speed 9 */
+#define K210_PCF_GPIOHS10      34  /* GPIO High speed 10 */
+#define K210_PCF_GPIOHS11      35  /* GPIO High speed 11 */
+#define K210_PCF_GPIOHS12      36  /* GPIO High speed 12 */
+#define K210_PCF_GPIOHS13      37  /* GPIO High speed 13 */
+#define K210_PCF_GPIOHS14      38  /* GPIO High speed 14 */
+#define K210_PCF_GPIOHS15      39  /* GPIO High speed 15 */
+#define K210_PCF_GPIOHS16      40  /* GPIO High speed 16 */
+#define K210_PCF_GPIOHS17      41  /* GPIO High speed 17 */
+#define K210_PCF_GPIOHS18      42  /* GPIO High speed 18 */
+#define K210_PCF_GPIOHS19      43  /* GPIO High speed 19 */
+#define K210_PCF_GPIOHS20      44  /* GPIO High speed 20 */
+#define K210_PCF_GPIOHS21      45  /* GPIO High speed 21 */
+#define K210_PCF_GPIOHS22      46  /* GPIO High speed 22 */
+#define K210_PCF_GPIOHS23      47  /* GPIO High speed 23 */
+#define K210_PCF_GPIOHS24      48  /* GPIO High speed 24 */
+#define K210_PCF_GPIOHS25      49  /* GPIO High speed 25 */
+#define K210_PCF_GPIOHS26      50  /* GPIO High speed 26 */
+#define K210_PCF_GPIOHS27      51  /* GPIO High speed 27 */
+#define K210_PCF_GPIOHS28      52  /* GPIO High speed 28 */
+#define K210_PCF_GPIOHS29      53  /* GPIO High speed 29 */
+#define K210_PCF_GPIOHS30      54  /* GPIO High speed 30 */
+#define K210_PCF_GPIOHS31      55  /* GPIO High speed 31 */
+#define K210_PCF_GPIO0         56  /* GPIO pin 0 */
+#define K210_PCF_GPIO1         57  /* GPIO pin 1 */
+#define K210_PCF_GPIO2         58  /* GPIO pin 2 */
+#define K210_PCF_GPIO3         59  /* GPIO pin 3 */
+#define K210_PCF_GPIO4         60  /* GPIO pin 4 */
+#define K210_PCF_GPIO5         61  /* GPIO pin 5 */
+#define K210_PCF_GPIO6         62  /* GPIO pin 6 */
+#define K210_PCF_GPIO7         63  /* GPIO pin 7 */
+#define K210_PCF_UART1_RX      64  /* UART1 Receiver */
+#define K210_PCF_UART1_TX      65  /* UART1 Transmitter */
+#define K210_PCF_UART2_RX      66  /* UART2 Receiver */
+#define K210_PCF_UART2_TX      67  /* UART2 Transmitter */
+#define K210_PCF_UART3_RX      68  /* UART3 Receiver */
+#define K210_PCF_UART3_TX      69  /* UART3 Transmitter */
+#define K210_PCF_SPI1_D0       70  /* SPI1 Data 0 */
+#define K210_PCF_SPI1_D1       71  /* SPI1 Data 1 */
+#define K210_PCF_SPI1_D2       72  /* SPI1 Data 2 */
+#define K210_PCF_SPI1_D3       73  /* SPI1 Data 3 */
+#define K210_PCF_SPI1_D4       74  /* SPI1 Data 4 */
+#define K210_PCF_SPI1_D5       75  /* SPI1 Data 5 */
+#define K210_PCF_SPI1_D6       76  /* SPI1 Data 6 */
+#define K210_PCF_SPI1_D7       77  /* SPI1 Data 7 */
+#define K210_PCF_SPI1_SS0      78  /* SPI1 Chip Select 0 */
+#define K210_PCF_SPI1_SS1      79  /* SPI1 Chip Select 1 */
+#define K210_PCF_SPI1_SS2      80  /* SPI1 Chip Select 2 */
+#define K210_PCF_SPI1_SS3      81  /* SPI1 Chip Select 3 */
+#define K210_PCF_SPI1_ARB      82  /* SPI1 Arbitration */
+#define K210_PCF_SPI1_SCLK     83  /* SPI1 Serial Clock */
+#define K210_PCF_SPI2_D0       84  /* SPI2 Data 0 */
+#define K210_PCF_SPI2_SS       85  /* SPI2 Select */
+#define K210_PCF_SPI2_SCLK     86  /* SPI2 Serial Clock */
+#define K210_PCF_I2S0_MCLK     87  /* I2S0 Master Clock */
+#define K210_PCF_I2S0_SCLK     88  /* I2S0 Serial Clock(BCLK) */
+#define K210_PCF_I2S0_WS       89  /* I2S0 Word Select(LRCLK) */
+#define K210_PCF_I2S0_IN_D0    90  /* I2S0 Serial Data Input 0 */
+#define K210_PCF_I2S0_IN_D1    91  /* I2S0 Serial Data Input 1 */
+#define K210_PCF_I2S0_IN_D2    92  /* I2S0 Serial Data Input 2 */
+#define K210_PCF_I2S0_IN_D3    93  /* I2S0 Serial Data Input 3 */
+#define K210_PCF_I2S0_OUT_D0   94  /* I2S0 Serial Data Output 0 */
+#define K210_PCF_I2S0_OUT_D1   95  /* I2S0 Serial Data Output 1 */
+#define K210_PCF_I2S0_OUT_D2   96  /* I2S0 Serial Data Output 2 */
+#define K210_PCF_I2S0_OUT_D3   97  /* I2S0 Serial Data Output 3 */
+#define K210_PCF_I2S1_MCLK     98  /* I2S1 Master Clock */
+#define K210_PCF_I2S1_SCLK     99  /* I2S1 Serial Clock(BCLK) */
+#define K210_PCF_I2S1_WS       100 /* I2S1 Word Select(LRCLK) */
+#define K210_PCF_I2S1_IN_D0    101 /* I2S1 Serial Data Input 0 */
+#define K210_PCF_I2S1_IN_D1    102 /* I2S1 Serial Data Input 1 */
+#define K210_PCF_I2S1_IN_D2    103 /* I2S1 Serial Data Input 2 */
+#define K210_PCF_I2S1_IN_D3    104 /* I2S1 Serial Data Input 3 */
+#define K210_PCF_I2S1_OUT_D0   105 /* I2S1 Serial Data Output 0 */
+#define K210_PCF_I2S1_OUT_D1   106 /* I2S1 Serial Data Output 1 */
+#define K210_PCF_I2S1_OUT_D2   107 /* I2S1 Serial Data Output 2 */
+#define K210_PCF_I2S1_OUT_D3   108 /* I2S1 Serial Data Output 3 */
+#define K210_PCF_I2S2_MCLK     109 /* I2S2 Master Clock */
+#define K210_PCF_I2S2_SCLK     110 /* I2S2 Serial Clock(BCLK) */
+#define K210_PCF_I2S2_WS       111 /* I2S2 Word Select(LRCLK) */
+#define K210_PCF_I2S2_IN_D0    112 /* I2S2 Serial Data Input 0 */
+#define K210_PCF_I2S2_IN_D1    113 /* I2S2 Serial Data Input 1 */
+#define K210_PCF_I2S2_IN_D2    114 /* I2S2 Serial Data Input 2 */
+#define K210_PCF_I2S2_IN_D3    115 /* I2S2 Serial Data Input 3 */
+#define K210_PCF_I2S2_OUT_D0   116 /* I2S2 Serial Data Output 0 */
+#define K210_PCF_I2S2_OUT_D1   117 /* I2S2 Serial Data Output 1 */
+#define K210_PCF_I2S2_OUT_D2   118 /* I2S2 Serial Data Output 2 */
+#define K210_PCF_I2S2_OUT_D3   119 /* I2S2 Serial Data Output 3 */
+#define K210_PCF_RESV0         120 /* Reserved function */
+#define K210_PCF_RESV1         121 /* Reserved function */
+#define K210_PCF_RESV2         122 /* Reserved function */
+#define K210_PCF_RESV3         123 /* Reserved function */
+#define K210_PCF_RESV4         124 /* Reserved function */
+#define K210_PCF_RESV5         125 /* Reserved function */
+#define K210_PCF_I2C0_SCLK     126 /* I2C0 Serial Clock */
+#define K210_PCF_I2C0_SDA      127 /* I2C0 Serial Data */
+#define K210_PCF_I2C1_SCLK     128 /* I2C1 Serial Clock */
+#define K210_PCF_I2C1_SDA      129 /* I2C1 Serial Data */
+#define K210_PCF_I2C2_SCLK     130 /* I2C2 Serial Clock */
+#define K210_PCF_I2C2_SDA      131 /* I2C2 Serial Data */
+#define K210_PCF_DVP_XCLK      132 /* DVP System Clock */
+#define K210_PCF_DVP_RST       133 /* DVP System Reset */
+#define K210_PCF_DVP_PWDN      134 /* DVP Power Down Mode */
+#define K210_PCF_DVP_VSYNC     135 /* DVP Vertical Sync */
+#define K210_PCF_DVP_HSYNC     136 /* DVP Horizontal Sync */
+#define K210_PCF_DVP_PCLK      137 /* Pixel Clock */
+#define K210_PCF_DVP_D0                138 /* Data Bit 0 */
+#define K210_PCF_DVP_D1                139 /* Data Bit 1 */
+#define K210_PCF_DVP_D2                140 /* Data Bit 2 */
+#define K210_PCF_DVP_D3                141 /* Data Bit 3 */
+#define K210_PCF_DVP_D4                142 /* Data Bit 4 */
+#define K210_PCF_DVP_D5                143 /* Data Bit 5 */
+#define K210_PCF_DVP_D6                144 /* Data Bit 6 */
+#define K210_PCF_DVP_D7                145 /* Data Bit 7 */
+#define K210_PCF_SCCB_SCLK     146 /* Serial Camera Control Bus Clock */
+#define K210_PCF_SCCB_SDA      147 /* Serial Camera Control Bus Data */
+#define K210_PCF_UART1_CTS     148 /* UART1 Clear To Send */
+#define K210_PCF_UART1_DSR     149 /* UART1 Data Set Ready */
+#define K210_PCF_UART1_DCD     150 /* UART1 Data Carrier Detect */
+#define K210_PCF_UART1_RI      151 /* UART1 Ring Indicator */
+#define K210_PCF_UART1_SIR_IN  152 /* UART1 Serial Infrared Input */
+#define K210_PCF_UART1_DTR     153 /* UART1 Data Terminal Ready */
+#define K210_PCF_UART1_RTS     154 /* UART1 Request To Send */
+#define K210_PCF_UART1_OUT2    155 /* UART1 User-designated Output 2 */
+#define K210_PCF_UART1_OUT1    156 /* UART1 User-designated Output 1 */
+#define K210_PCF_UART1_SIR_OUT 157 /* UART1 Serial Infrared Output */
+#define K210_PCF_UART1_BAUD    158 /* UART1 Transmit Clock Output */
+#define K210_PCF_UART1_RE      159 /* UART1 Receiver Output Enable */
+#define K210_PCF_UART1_DE      160 /* UART1 Driver Output Enable */
+#define K210_PCF_UART1_RS485_EN        161 /* UART1 RS485 Enable */
+#define K210_PCF_UART2_CTS     162 /* UART2 Clear To Send */
+#define K210_PCF_UART2_DSR     163 /* UART2 Data Set Ready */
+#define K210_PCF_UART2_DCD     164 /* UART2 Data Carrier Detect */
+#define K210_PCF_UART2_RI      165 /* UART2 Ring Indicator */
+#define K210_PCF_UART2_SIR_IN  166 /* UART2 Serial Infrared Input */
+#define K210_PCF_UART2_DTR     167 /* UART2 Data Terminal Ready */
+#define K210_PCF_UART2_RTS     168 /* UART2 Request To Send */
+#define K210_PCF_UART2_OUT2    169 /* UART2 User-designated Output 2 */
+#define K210_PCF_UART2_OUT1    170 /* UART2 User-designated Output 1 */
+#define K210_PCF_UART2_SIR_OUT 171 /* UART2 Serial Infrared Output */
+#define K210_PCF_UART2_BAUD    172 /* UART2 Transmit Clock Output */
+#define K210_PCF_UART2_RE      173 /* UART2 Receiver Output Enable */
+#define K210_PCF_UART2_DE      174 /* UART2 Driver Output Enable */
+#define K210_PCF_UART2_RS485_EN        175 /* UART2 RS485 Enable */
+#define K210_PCF_UART3_CTS     176 /* UART3 Clear To Send */
+#define K210_PCF_UART3_DSR     177 /* UART3 Data Set Ready */
+#define K210_PCF_UART3_DCD     178 /* UART3 Data Carrier Detect */
+#define K210_PCF_UART3_RI      179 /* UART3 Ring Indicator */
+#define K210_PCF_UART3_SIR_IN  180 /* UART3 Serial Infrared Input */
+#define K210_PCF_UART3_DTR     181 /* UART3 Data Terminal Ready */
+#define K210_PCF_UART3_RTS     182 /* UART3 Request To Send */
+#define K210_PCF_UART3_OUT2    183 /* UART3 User-designated Output 2 */
+#define K210_PCF_UART3_OUT1    184 /* UART3 User-designated Output 1 */
+#define K210_PCF_UART3_SIR_OUT 185 /* UART3 Serial Infrared Output */
+#define K210_PCF_UART3_BAUD    186 /* UART3 Transmit Clock Output */
+#define K210_PCF_UART3_RE      187 /* UART3 Receiver Output Enable */
+#define K210_PCF_UART3_DE      188 /* UART3 Driver Output Enable */
+#define K210_PCF_UART3_RS485_EN        189 /* UART3 RS485 Enable */
+#define K210_PCF_TIMER0_TOGGLE1        190 /* TIMER0 Toggle Output 1 */
+#define K210_PCF_TIMER0_TOGGLE2        191 /* TIMER0 Toggle Output 2 */
+#define K210_PCF_TIMER0_TOGGLE3        192 /* TIMER0 Toggle Output 3 */
+#define K210_PCF_TIMER0_TOGGLE4        193 /* TIMER0 Toggle Output 4 */
+#define K210_PCF_TIMER1_TOGGLE1        194 /* TIMER1 Toggle Output 1 */
+#define K210_PCF_TIMER1_TOGGLE2        195 /* TIMER1 Toggle Output 2 */
+#define K210_PCF_TIMER1_TOGGLE3        196 /* TIMER1 Toggle Output 3 */
+#define K210_PCF_TIMER1_TOGGLE4        197 /* TIMER1 Toggle Output 4 */
+#define K210_PCF_TIMER2_TOGGLE1        198 /* TIMER2 Toggle Output 1 */
+#define K210_PCF_TIMER2_TOGGLE2        199 /* TIMER2 Toggle Output 2 */
+#define K210_PCF_TIMER2_TOGGLE3        200 /* TIMER2 Toggle Output 3 */
+#define K210_PCF_TIMER2_TOGGLE4        201 /* TIMER2 Toggle Output 4 */
+#define K210_PCF_CLK_SPI2      202 /* Clock SPI2 */
+#define K210_PCF_CLK_I2C2      203 /* Clock I2C2 */
+#define K210_PCF_INTERNAL0     204 /* Internal function signal 0 */
+#define K210_PCF_INTERNAL1     205 /* Internal function signal 1 */
+#define K210_PCF_INTERNAL2     206 /* Internal function signal 2 */
+#define K210_PCF_INTERNAL3     207 /* Internal function signal 3 */
+#define K210_PCF_INTERNAL4     208 /* Internal function signal 4 */
+#define K210_PCF_INTERNAL5     209 /* Internal function signal 5 */
+#define K210_PCF_INTERNAL6     210 /* Internal function signal 6 */
+#define K210_PCF_INTERNAL7     211 /* Internal function signal 7 */
+#define K210_PCF_INTERNAL8     212 /* Internal function signal 8 */
+#define K210_PCF_INTERNAL9     213 /* Internal function signal 9 */
+#define K210_PCF_INTERNAL10    214 /* Internal function signal 10 */
+#define K210_PCF_INTERNAL11    215 /* Internal function signal 11 */
+#define K210_PCF_INTERNAL12    216 /* Internal function signal 12 */
+#define K210_PCF_INTERNAL13    217 /* Internal function signal 13 */
+#define K210_PCF_INTERNAL14    218 /* Internal function signal 14 */
+#define K210_PCF_INTERNAL15    219 /* Internal function signal 15 */
+#define K210_PCF_INTERNAL16    220 /* Internal function signal 16 */
+#define K210_PCF_INTERNAL17    221 /* Internal function signal 17 */
+#define K210_PCF_CONSTANT      222 /* Constant function */
+#define K210_PCF_INTERNAL18    223 /* Internal function signal 18 */
+#define K210_PCF_DEBUG0                224 /* Debug function 0 */
+#define K210_PCF_DEBUG1                225 /* Debug function 1 */
+#define K210_PCF_DEBUG2                226 /* Debug function 2 */
+#define K210_PCF_DEBUG3                227 /* Debug function 3 */
+#define K210_PCF_DEBUG4                228 /* Debug function 4 */
+#define K210_PCF_DEBUG5                229 /* Debug function 5 */
+#define K210_PCF_DEBUG6                230 /* Debug function 6 */
+#define K210_PCF_DEBUG7                231 /* Debug function 7 */
+#define K210_PCF_DEBUG8                232 /* Debug function 8 */
+#define K210_PCF_DEBUG9                233 /* Debug function 9 */
+#define K210_PCF_DEBUG10       234 /* Debug function 10 */
+#define K210_PCF_DEBUG11       235 /* Debug function 11 */
+#define K210_PCF_DEBUG12       236 /* Debug function 12 */
+#define K210_PCF_DEBUG13       237 /* Debug function 13 */
+#define K210_PCF_DEBUG14       238 /* Debug function 14 */
+#define K210_PCF_DEBUG15       239 /* Debug function 15 */
+#define K210_PCF_DEBUG16       240 /* Debug function 16 */
+#define K210_PCF_DEBUG17       241 /* Debug function 17 */
+#define K210_PCF_DEBUG18       242 /* Debug function 18 */
+#define K210_PCF_DEBUG19       243 /* Debug function 19 */
+#define K210_PCF_DEBUG20       244 /* Debug function 20 */
+#define K210_PCF_DEBUG21       245 /* Debug function 21 */
+#define K210_PCF_DEBUG22       246 /* Debug function 22 */
+#define K210_PCF_DEBUG23       247 /* Debug function 23 */
+#define K210_PCF_DEBUG24       248 /* Debug function 24 */
+#define K210_PCF_DEBUG25       249 /* Debug function 25 */
+#define K210_PCF_DEBUG26       250 /* Debug function 26 */
+#define K210_PCF_DEBUG27       251 /* Debug function 27 */
+#define K210_PCF_DEBUG28       252 /* Debug function 28 */
+#define K210_PCF_DEBUG29       253 /* Debug function 29 */
+#define K210_PCF_DEBUG30       254 /* Debug function 30 */
+#define K210_PCF_DEBUG31       255 /* Debug function 31 */
+
+#define K210_FPIOA(pin, func)          (((pin) << 16) | (func))
+
+#define K210_PC_POWER_3V3      0
+#define K210_PC_POWER_1V8      1
+
+#endif /* PINCTRL_K210_FPIOA_H */
diff --git a/include/dt-bindings/reset/k210-rst.h b/include/dt-bindings/reset/k210-rst.h
new file mode 100644 (file)
index 0000000..883c1ae
--- /dev/null
@@ -0,0 +1,42 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/*
+ * Copyright (C) 2019 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef RESET_K210_SYSCTL_H
+#define RESET_K210_SYSCTL_H
+
+/*
+ * Kendryte K210 SoC system controller K210_SYSCTL_SOFT_RESET register bits.
+ * Taken from Kendryte SDK (kendryte-standalone-sdk).
+ */
+#define K210_RST_ROM   0
+#define K210_RST_DMA   1
+#define K210_RST_AI    2
+#define K210_RST_DVP   3
+#define K210_RST_FFT   4
+#define K210_RST_GPIO  5
+#define K210_RST_SPI0  6
+#define K210_RST_SPI1  7
+#define K210_RST_SPI2  8
+#define K210_RST_SPI3  9
+#define K210_RST_I2S0  10
+#define K210_RST_I2S1  11
+#define K210_RST_I2S2  12
+#define K210_RST_I2C0  13
+#define K210_RST_I2C1  14
+#define K210_RST_I2C2  15
+#define K210_RST_UART1 16
+#define K210_RST_UART2 17
+#define K210_RST_UART3 18
+#define K210_RST_AES   19
+#define K210_RST_FPIOA 20
+#define K210_RST_TIMER0        21
+#define K210_RST_TIMER1        22
+#define K210_RST_TIMER2        23
+#define K210_RST_WDT0  24
+#define K210_RST_WDT1  25
+#define K210_RST_SHA   26
+#define K210_RST_RTC   29
+
+#endif /* RESET_K210_SYSCTL_H */
index 38afb34..abfcbe0 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * Copyright (C) 2010 IBM Corporation
  * Copyright (C) 2010 Politecnico di Torino, Italy
- *                    TORSEC group -- http://security.polito.it
+ *                    TORSEC group -- https://security.polito.it
  *
  * Authors:
  * Mimi Zohar <zohar@us.ibm.com>
index 8dcb3e1..6fd3cda 100644 (file)
 #define ARMV8_PMU_CYCLE_IDX            (ARMV8_PMU_MAX_COUNTERS - 1)
 #define ARMV8_PMU_MAX_COUNTER_PAIRS    ((ARMV8_PMU_MAX_COUNTERS + 1) >> 1)
 
+DECLARE_STATIC_KEY_FALSE(kvm_arm_pmu_available);
+
+static __always_inline bool kvm_arm_support_pmu_v3(void)
+{
+       return static_branch_likely(&kvm_arm_pmu_available);
+}
+
 #ifdef CONFIG_HW_PERF_EVENTS
 
 struct kvm_pmc {
@@ -47,7 +54,6 @@ void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val);
 void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu, u64 data,
                                    u64 select_idx);
-bool kvm_arm_support_pmu_v3(void);
 int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
                            struct kvm_device_attr *attr);
 int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu,
@@ -87,7 +93,6 @@ static inline void kvm_pmu_software_increment(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_handle_pmcr(struct kvm_vcpu *vcpu, u64 val) {}
 static inline void kvm_pmu_set_counter_event_type(struct kvm_vcpu *vcpu,
                                                  u64 data, u64 select_idx) {}
-static inline bool kvm_arm_support_pmu_v3(void) { return false; }
 static inline int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu,
                                          struct kvm_device_attr *attr)
 {
index d33c3cf..3bdcfc4 100644 (file)
@@ -222,10 +222,14 @@ void __iomem *__acpi_map_table(unsigned long phys, unsigned long size);
 void __acpi_unmap_table(void __iomem *map, unsigned long size);
 int early_acpi_boot_init(void);
 int acpi_boot_init (void);
+void acpi_boot_table_prepare (void);
 void acpi_boot_table_init (void);
 int acpi_mps_check (void);
 int acpi_numa_init (void);
 
+int acpi_locate_initial_tables (void);
+void acpi_reserve_initial_tables (void);
+void acpi_table_init_complete (void);
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
@@ -591,9 +595,6 @@ extern u32 osc_sb_native_usb4_control;
 #define ACPI_GSB_ACCESS_ATTRIB_RAW_BYTES       0x0000000E
 #define ACPI_GSB_ACCESS_ATTRIB_RAW_PROCESS     0x0000000F
 
-extern acpi_status acpi_pci_osc_control_set(acpi_handle handle,
-                                            u32 *mask, u32 req);
-
 /* Enable _OST when all relevant hotplug operations are enabled */
 #if defined(CONFIG_ACPI_HOTPLUG_CPU) &&                        \
        defined(CONFIG_ACPI_HOTPLUG_MEMORY) &&          \
@@ -749,12 +750,12 @@ acpi_dev_get_first_match_dev(const char *hid, const char *uid, s64 hrv)
 
 static inline void acpi_dev_put(struct acpi_device *adev) {}
 
-static inline bool is_acpi_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_node(const struct fwnode_handle *fwnode)
 {
        return false;
 }
 
-static inline bool is_acpi_device_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_device_node(const struct fwnode_handle *fwnode)
 {
        return false;
 }
@@ -764,7 +765,7 @@ static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwno
        return NULL;
 }
 
-static inline bool is_acpi_data_node(struct fwnode_handle *fwnode)
+static inline bool is_acpi_data_node(const struct fwnode_handle *fwnode)
 {
        return false;
 }
@@ -817,9 +818,12 @@ static inline int acpi_boot_init(void)
        return 0;
 }
 
+static inline void acpi_boot_table_prepare(void)
+{
+}
+
 static inline void acpi_boot_table_init(void)
 {
-       return;
 }
 
 static inline int acpi_mps_check(void)
@@ -1082,19 +1086,25 @@ void __acpi_handle_debug(struct _ddebug *descriptor, acpi_handle handle, const c
 #if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB)
 bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
                                struct acpi_resource_gpio **agpio);
-int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index);
+int acpi_dev_gpio_irq_get_by(struct acpi_device *adev, const char *name, int index);
 #else
 static inline bool acpi_gpio_get_irq_resource(struct acpi_resource *ares,
                                              struct acpi_resource_gpio **agpio)
 {
        return false;
 }
-static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+static inline int acpi_dev_gpio_irq_get_by(struct acpi_device *adev,
+                                          const char *name, int index)
 {
        return -ENXIO;
 }
 #endif
 
+static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index)
+{
+       return acpi_dev_gpio_irq_get_by(adev, NULL, index);
+}
+
 /* Device properties */
 
 #ifdef CONFIG_ACPI
index 6cc93ab..c68d87b 100644 (file)
@@ -105,8 +105,19 @@ extern struct bus_type amba_bustype;
 #define amba_get_drvdata(d)    dev_get_drvdata(&d->dev)
 #define amba_set_drvdata(d,p)  dev_set_drvdata(&d->dev, p)
 
+#ifdef CONFIG_ARM_AMBA
 int amba_driver_register(struct amba_driver *);
 void amba_driver_unregister(struct amba_driver *);
+#else
+static inline int amba_driver_register(struct amba_driver *drv)
+{
+       return -EINVAL;
+}
+static inline void amba_driver_unregister(struct amba_driver *drv)
+{
+}
+#endif
+
 struct amba_device *amba_device_alloc(const char *, resource_size_t, size_t);
 void amba_device_put(struct amba_device *);
 int amba_device_add(struct amba_device *, struct resource *);
index 60cd25c..9b02961 100644 (file)
@@ -151,7 +151,7 @@ struct atm_dev {
        const char      *type;          /* device type name */
        int             number;         /* device index */
        void            *dev_data;      /* per-device data */
-       void            *phy_data;      /* private PHY date */
+       void            *phy_data;      /* private PHY data */
        unsigned long   flags;          /* device flags (ATM_DF_*) */
        struct list_head local;         /* local ATM addresses */
        struct list_head lecs;          /* LECS ATM addresses learned via ILMI */
index 5b468f2..d0246c9 100644 (file)
 #define BIO_BUG_ON
 #endif
 
-#define BIO_MAX_PAGES          256
+#define BIO_MAX_VECS           256U
+
+static inline unsigned int bio_max_segs(unsigned int nr_segs)
+{
+       return min(nr_segs, BIO_MAX_VECS);
+}
 
 #define bio_prio(bio)                  (bio)->bi_ioprio
 #define bio_set_prio(bio, prio)                ((bio)->bi_ioprio = prio)
index a61f192..a5a4830 100644 (file)
@@ -214,7 +214,7 @@ static inline int get_count_order_long(unsigned long l)
  * __ffs64 - find first set bit in a 64 bit word
  * @word: The 64 bit word
  *
- * On 64 bit arches this is a synomyn for __ffs
+ * On 64 bit arches this is a synonym for __ffs
  * The result is not defined if no bits are set, so check that @word
  * is non-zero before calling this.
  */
index 69035e9..158aefa 100644 (file)
@@ -65,8 +65,6 @@ typedef void (rq_end_io_fn)(struct request *, blk_status_t);
  * request flags */
 typedef __u32 __bitwise req_flags_t;
 
-/* elevator knows about this request */
-#define RQF_SORTED             ((__force req_flags_t)(1 << 0))
 /* drive already may have started this one */
 #define RQF_STARTED            ((__force req_flags_t)(1 << 1))
 /* may not be passed by ioscheduler */
@@ -87,8 +85,6 @@ typedef __u32 __bitwise req_flags_t;
 #define RQF_ELVPRIV            ((__force req_flags_t)(1 << 12))
 /* account into disk and partition IO statistics */
 #define RQF_IO_STAT            ((__force req_flags_t)(1 << 13))
-/* request came from our alloc pool */
-#define RQF_ALLOCED            ((__force req_flags_t)(1 << 14))
 /* runtime pm request */
 #define RQF_PM                 ((__force req_flags_t)(1 << 15))
 /* on IO scheduler merge hash */
@@ -462,7 +458,6 @@ struct request_queue {
 #ifdef CONFIG_PM
        struct device           *dev;
        enum rpm_status         rpm_status;
-       unsigned int            nr_pending;
 #endif
 
        /*
index 0555657..a083e15 100644 (file)
@@ -23,8 +23,6 @@ struct blk_trace {
        u32 pid;
        u32 dev;
        struct dentry *dir;
-       struct dentry *dropped_file;
-       struct dentry *msg_file;
        struct list_head running_list;
        atomic_t dropped;
 };
@@ -119,7 +117,7 @@ struct compat_blk_user_trace_setup {
 
 #endif
 
-extern void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes);
+void blk_fill_rwbs(char *rwbs, unsigned int op);
 
 static inline sector_t blk_rq_trace_sector(struct request *rq)
 {
index cccaef1..3625f01 100644 (file)
@@ -21,6 +21,7 @@
 #include <linux/capability.h>
 #include <linux/sched/mm.h>
 #include <linux/slab.h>
+#include <linux/percpu-refcount.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -556,7 +557,8 @@ struct bpf_tramp_progs {
  *      fentry = a set of program to run before calling original function
  *      fexit = a set of program to run after original function
  */
-int arch_prepare_bpf_trampoline(void *image, void *image_end,
+struct bpf_tramp_image;
+int arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
                                const struct btf_func_model *m, u32 flags,
                                struct bpf_tramp_progs *tprogs,
                                void *orig_call);
@@ -565,6 +567,8 @@ u64 notrace __bpf_prog_enter(struct bpf_prog *prog);
 void notrace __bpf_prog_exit(struct bpf_prog *prog, u64 start);
 u64 notrace __bpf_prog_enter_sleepable(struct bpf_prog *prog);
 void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start);
+void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr);
+void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr);
 
 struct bpf_ksym {
        unsigned long            start;
@@ -583,6 +587,18 @@ enum bpf_tramp_prog_type {
        BPF_TRAMP_REPLACE, /* more than MAX */
 };
 
+struct bpf_tramp_image {
+       void *image;
+       struct bpf_ksym ksym;
+       struct percpu_ref pcref;
+       void *ip_after_call;
+       void *ip_epilogue;
+       union {
+               struct rcu_head rcu;
+               struct work_struct work;
+       };
+};
+
 struct bpf_trampoline {
        /* hlist for trampoline_table */
        struct hlist_node hlist;
@@ -605,9 +621,8 @@ struct bpf_trampoline {
        /* Number of attached programs. A counter per kind. */
        int progs_cnt[BPF_TRAMP_MAX];
        /* Executable image of trampoline */
-       void *image;
+       struct bpf_tramp_image *cur_image;
        u64 selector;
-       struct bpf_ksym ksym;
 };
 
 struct bpf_attach_target_info {
@@ -691,6 +706,8 @@ void bpf_image_ksym_add(void *data, struct bpf_ksym *ksym);
 void bpf_image_ksym_del(struct bpf_ksym *ksym);
 void bpf_ksym_add(struct bpf_ksym *ksym);
 void bpf_ksym_del(struct bpf_ksym *ksym);
+int bpf_jit_charge_modmem(u32 pages);
+void bpf_jit_uncharge_modmem(u32 pages);
 #else
 static inline int bpf_trampoline_link_prog(struct bpf_prog *prog,
                                           struct bpf_trampoline *tr)
@@ -787,7 +804,6 @@ struct bpf_prog_aux {
        bool func_proto_unreliable;
        bool sleepable;
        bool tail_call_reachable;
-       enum bpf_tramp_prog_type trampoline_prog_type;
        struct hlist_node tramp_hlist;
        /* BTF_KIND_FUNC_PROTO for valid attach_btf_id */
        const struct btf_type *attach_func_proto;
@@ -1093,7 +1109,7 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                _ret;                                                   \
         })
 
-#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \
+#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null, set_cg_storage) \
        ({                                              \
                struct bpf_prog_array_item *_item;      \
                struct bpf_prog *_prog;                 \
@@ -1106,7 +1122,8 @@ int bpf_prog_array_copy(struct bpf_prog_array *old_array,
                        goto _out;                      \
                _item = &_array->items[0];              \
                while ((_prog = READ_ONCE(_item->prog))) {              \
-                       bpf_cgroup_storage_set(_item->cgroup_storage);  \
+                       if (set_cg_storage)             \
+                               bpf_cgroup_storage_set(_item->cgroup_storage);  \
                        _ret &= func(_prog, ctx);       \
                        _item++;                        \
                }                                       \
@@ -1153,10 +1170,10 @@ _out:                                                   \
        })
 
 #define BPF_PROG_RUN_ARRAY(array, ctx, func)           \
-       __BPF_PROG_RUN_ARRAY(array, ctx, func, false)
+       __BPF_PROG_RUN_ARRAY(array, ctx, func, false, true)
 
 #define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func)     \
-       __BPF_PROG_RUN_ARRAY(array, ctx, func, true)
+       __BPF_PROG_RUN_ARRAY(array, ctx, func, true, false)
 
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
index 2f5d731..8afa92d 100644 (file)
@@ -44,6 +44,7 @@
 
 #include <linux/can.h>
 #include <linux/list.h>
+#include <linux/netdevice.h>
 
 #define CAN_SFF_RCV_ARRAY_SZ (1 << CAN_SFF_ID_BITS)
 #define CAN_EFF_RCV_HASH_BITS 10
@@ -65,4 +66,15 @@ struct can_ml_priv {
 #endif
 };
 
+static inline struct can_ml_priv *can_get_ml_priv(struct net_device *dev)
+{
+       return netdev_get_ml_priv(dev, ML_PRIV_CAN);
+}
+
+static inline void can_set_ml_priv(struct net_device *dev,
+                                  struct can_ml_priv *ml_priv)
+{
+       netdev_set_ml_priv(dev, ml_priv, ML_PRIV_CAN);
+}
+
 #endif /* CAN_ML_H */
index 685f34c..d438eb0 100644 (file)
@@ -65,8 +65,12 @@ static inline void can_skb_reserve(struct sk_buff *skb)
 
 static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk)
 {
-       if (sk) {
-               sock_hold(sk);
+       /* If the socket has already been closed by user space, the
+        * refcount may already be 0 (and the socket will be freed
+        * after the last TX skb has been freed). So only increase
+        * socket refcount if the refcount is > 0.
+        */
+       if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) {
                skb->destructor = sock_efree;
                skb->sk = sk;
        }
index b2f6989..65efb74 100644 (file)
@@ -247,8 +247,11 @@ static inline bool ns_capable_setid(struct user_namespace *ns, int cap)
        return true;
 }
 #endif /* CONFIG_MULTIUSER */
-extern bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode);
-extern bool capable_wrt_inode_uidgid(const struct inode *inode, int cap);
+bool privileged_wrt_inode_uidgid(struct user_namespace *ns,
+                                struct user_namespace *mnt_userns,
+                                const struct inode *inode);
+bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns,
+                             const struct inode *inode, int cap);
 extern bool file_ns_capable(const struct file *file, struct user_namespace *ns, int cap);
 extern bool ptracer_capable(struct task_struct *tsk, struct user_namespace *ns);
 static inline bool perfmon_capable(void)
@@ -268,8 +271,11 @@ static inline bool checkpoint_restore_ns_capable(struct user_namespace *ns)
 }
 
 /* audit system wants to get cap info from files as well */
-extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
+int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
+                          const struct dentry *dentry,
+                          struct cpu_vfs_cap_data *cpu_caps);
 
-extern int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size);
+int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
+                     const void **ivalue, size_t size);
 
 #endif /* !_LINUX_CAPABILITY_H */
index 4060004..6617d9c 100644 (file)
@@ -4,7 +4,7 @@
  *     Version: 0.1.0
  * Description: cfag12864b LCD driver header
  *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *      Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
  *        Date: 2006-10-12
  */
 
index 98cff1b..d217c38 100644 (file)
@@ -3,16 +3,6 @@
 #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead."
 #endif
 
-#define CLANG_VERSION (__clang_major__ * 10000 \
-                    + __clang_minor__ * 100    \
-                    + __clang_patchlevel__)
-
-#if CLANG_VERSION < 100001
-#ifndef __BPF_TRACING__
-# error Sorry, your version of Clang is too old - please use 10.0.1 or newer.
-#endif
-#endif
-
 /* Compiler specific definitions for Clang compiler */
 
 /* same as gcc, this was present in clang-2.6 so we can assume it works
 #define __no_sanitize_thread
 #endif
 
+#if defined(CONFIG_ARCH_USE_BUILTIN_BSWAP)
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#define __HAVE_BUILTIN_BSWAP16__
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
+
 #if __has_feature(undefined_behavior_sanitizer)
 /* GCC does not have __SANITIZE_UNDEFINED__ */
 #define __no_sanitize_undefined \
index 555ab0f..4875024 100644 (file)
                     + __GNUC_MINOR__ * 100     \
                     + __GNUC_PATCHLEVEL__)
 
-/* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */
-#if GCC_VERSION < 40900
-# error Sorry, your version of GCC is too old - please use 4.9 or newer.
-#elif defined(CONFIG_ARM64) && GCC_VERSION < 50100
-/*
- * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293
- * https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk
- */
-# error Sorry, your version of GCC is too old - please use 5.1 or newer.
-#endif
-
 /*
  * This macro obfuscates arithmetic on a variable address so that gcc
  * shouldn't recognize the original var, and make assumptions about it.
diff --git a/include/linux/compiler-version.h b/include/linux/compiler-version.h
new file mode 100644 (file)
index 0000000..2b2972c
--- /dev/null
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+
+#ifdef  __LINUX_COMPILER_VERSION_H
+#error "Please do not include <linux/compiler-version.h>. This is done by the build system."
+#endif
+#define __LINUX_COMPILER_VERSION_H
+
+/*
+ * This header exists to force full rebuild when the compiler is upgraded.
+ *
+ * When fixdep scans this, it will find this string "CONFIG_CC_VERSION_TEXT"
+ * and add dependency on include/config/cc/version/text.h, which is touched
+ * by Kconfig when the version string from the compiler changes.
+ */
index ea5e04e..c043b8d 100644 (file)
 #endif
 
 /*
+ * gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#Common-Function-Attributes
+ * clang: https://clang.llvm.org/docs/AttributeReference.html#flatten
+ */
+# define __flatten                     __attribute__((flatten))
+
+/*
  * Note the missing underscores.
  *
  *   gcc: https://gcc.gnu.org/onlinedocs/gcc/Common-Function-Attributes.html#index-noinline-function-attribute
index b0e35ee..4ac5c08 100644 (file)
 #define CORESIGHT_ETM_PMU_NAME "cs_etm"
 #define CORESIGHT_ETM_PMU_SEED  0x10
 
-/* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS      28
-#define ETM_OPT_RETSTK 29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC         12
+#define ETM_OPT_CTXTID         14
+#define ETM_OPT_CTXTID2                15
+#define ETM_OPT_TS             28
+#define ETM_OPT_RETSTK         29
 
 /* ETMv4 CONFIGR programming bits for the ETM OPTs */
 #define ETM4_CFG_BIT_CYCACC    4
 #define ETM4_CFG_BIT_CTXTID    6
+#define ETM4_CFG_BIT_VMID      7
 #define ETM4_CFG_BIT_TS                11
 #define ETM4_CFG_BIT_RETSTK    12
+#define ETM4_CFG_BIT_VMID_OPT  15
 
 static inline int coresight_get_trace_id(int cpu)
 {
index 7d3c87e..976ec26 100644 (file)
@@ -7,6 +7,7 @@
 #define _LINUX_CORESIGHT_H
 
 #include <linux/device.h>
+#include <linux/io.h>
 #include <linux/perf_event.h>
 #include <linux/sched.h>
 
@@ -115,6 +116,32 @@ struct coresight_platform_data {
 };
 
 /**
+ * struct csdev_access - Abstraction of a CoreSight device access.
+ *
+ * @io_mem     : True if the device has memory mapped I/O
+ * @base       : When io_mem == true, base address of the component
+ * @read       : Read from the given "offset" of the given instance.
+ * @write      : Write "val" to the given "offset".
+ */
+struct csdev_access {
+       bool io_mem;
+       union {
+               void __iomem *base;
+               struct {
+                       u64 (*read)(u32 offset, bool relaxed, bool _64bit);
+                       void (*write)(u64 val, u32 offset, bool relaxed,
+                                     bool _64bit);
+               };
+       };
+};
+
+#define CSDEV_ACCESS_IOMEM(_addr)              \
+       ((struct csdev_access)  {               \
+               .io_mem         = true,         \
+               .base           = (_addr),      \
+       })
+
+/**
  * struct coresight_desc - description of a component required from drivers
  * @type:      as defined by @coresight_dev_type.
  * @subtype:   as defined by @coresight_dev_subtype.
@@ -125,6 +152,7 @@ struct coresight_platform_data {
  * @groups:    operations specific to this component. These will end up
  *             in the component's sysfs sub-directory.
  * @name:      name for the coresight device, also shown under sysfs.
+ * @access:    Describe access to the device
  */
 struct coresight_desc {
        enum coresight_dev_type type;
@@ -134,6 +162,7 @@ struct coresight_desc {
        struct device *dev;
        const struct attribute_group **groups;
        const char *name;
+       struct csdev_access access;
 };
 
 /**
@@ -173,7 +202,8 @@ struct coresight_sysfs_link {
  * @type:      as defined by @coresight_dev_type.
  * @subtype:   as defined by @coresight_dev_subtype.
  * @ops:       generic operations for this component, as defined
-               by @coresight_ops.
+ *             by @coresight_ops.
+ * @access:    Device i/o access abstraction for this device.
  * @dev:       The device entity associated to this component.
  * @refcnt:    keep track of what is in use.
  * @orphan:    true if the component has connections that haven't been linked.
@@ -195,6 +225,7 @@ struct coresight_device {
        enum coresight_dev_type type;
        union coresight_dev_subtype subtype;
        const struct coresight_ops *ops;
+       struct csdev_access access;
        struct device dev;
        atomic_t *refcnt;
        bool orphan;
@@ -326,23 +357,133 @@ struct coresight_ops {
 };
 
 #if IS_ENABLED(CONFIG_CORESIGHT)
+
+static inline u32 csdev_access_relaxed_read32(struct csdev_access *csa,
+                                             u32 offset)
+{
+       if (likely(csa->io_mem))
+               return readl_relaxed(csa->base + offset);
+
+       return csa->read(offset, true, false);
+}
+
+static inline u32 csdev_access_read32(struct csdev_access *csa, u32 offset)
+{
+       if (likely(csa->io_mem))
+               return readl(csa->base + offset);
+
+       return csa->read(offset, false, false);
+}
+
+static inline void csdev_access_relaxed_write32(struct csdev_access *csa,
+                                               u32 val, u32 offset)
+{
+       if (likely(csa->io_mem))
+               writel_relaxed(val, csa->base + offset);
+       else
+               csa->write(val, offset, true, false);
+}
+
+static inline void csdev_access_write32(struct csdev_access *csa, u32 val, u32 offset)
+{
+       if (likely(csa->io_mem))
+               writel(val, csa->base + offset);
+       else
+               csa->write(val, offset, false, false);
+}
+
+#ifdef CONFIG_64BIT
+
+static inline u64 csdev_access_relaxed_read64(struct csdev_access *csa,
+                                             u32 offset)
+{
+       if (likely(csa->io_mem))
+               return readq_relaxed(csa->base + offset);
+
+       return csa->read(offset, true, true);
+}
+
+static inline u64 csdev_access_read64(struct csdev_access *csa, u32 offset)
+{
+       if (likely(csa->io_mem))
+               return readq(csa->base + offset);
+
+       return csa->read(offset, false, true);
+}
+
+static inline void csdev_access_relaxed_write64(struct csdev_access *csa,
+                                               u64 val, u32 offset)
+{
+       if (likely(csa->io_mem))
+               writeq_relaxed(val, csa->base + offset);
+       else
+               csa->write(val, offset, true, true);
+}
+
+static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 offset)
+{
+       if (likely(csa->io_mem))
+               writeq(val, csa->base + offset);
+       else
+               csa->write(val, offset, false, true);
+}
+
+#else  /* !CONFIG_64BIT */
+
+static inline u64 csdev_access_relaxed_read64(struct csdev_access *csa,
+                                             u32 offset)
+{
+       WARN_ON(1);
+       return 0;
+}
+
+static inline u64 csdev_access_read64(struct csdev_access *csa, u32 offset)
+{
+       WARN_ON(1);
+       return 0;
+}
+
+static inline void csdev_access_relaxed_write64(struct csdev_access *csa,
+                                               u64 val, u32 offset)
+{
+       WARN_ON(1);
+}
+
+static inline void csdev_access_write64(struct csdev_access *csa, u64 val, u32 offset)
+{
+       WARN_ON(1);
+}
+#endif /* CONFIG_64BIT */
+
 extern struct coresight_device *
 coresight_register(struct coresight_desc *desc);
 extern void coresight_unregister(struct coresight_device *csdev);
 extern int coresight_enable(struct coresight_device *csdev);
 extern void coresight_disable(struct coresight_device *csdev);
-extern int coresight_timeout(void __iomem *addr, u32 offset,
+extern int coresight_timeout(struct csdev_access *csa, u32 offset,
                             int position, int value);
 
-extern int coresight_claim_device(void __iomem *base);
-extern int coresight_claim_device_unlocked(void __iomem *base);
+extern int coresight_claim_device(struct coresight_device *csdev);
+extern int coresight_claim_device_unlocked(struct coresight_device *csdev);
 
-extern void coresight_disclaim_device(void __iomem *base);
-extern void coresight_disclaim_device_unlocked(void __iomem *base);
+extern void coresight_disclaim_device(struct coresight_device *csdev);
+extern void coresight_disclaim_device_unlocked(struct coresight_device *csdev);
 extern char *coresight_alloc_device_name(struct coresight_dev_list *devs,
                                         struct device *dev);
 
 extern bool coresight_loses_context_with_cpu(struct device *dev);
+
+u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset);
+u32 coresight_read32(struct coresight_device *csdev, u32 offset);
+void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset);
+void coresight_relaxed_write32(struct coresight_device *csdev,
+                              u32 val, u32 offset);
+u64 coresight_relaxed_read64(struct coresight_device *csdev, u32 offset);
+u64 coresight_read64(struct coresight_device *csdev, u32 offset);
+void coresight_relaxed_write64(struct coresight_device *csdev,
+                              u64 val, u32 offset);
+void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset);
+
 #else
 static inline struct coresight_device *
 coresight_register(struct coresight_desc *desc) { return NULL; }
@@ -350,29 +491,78 @@ static inline void coresight_unregister(struct coresight_device *csdev) {}
 static inline int
 coresight_enable(struct coresight_device *csdev) { return -ENOSYS; }
 static inline void coresight_disable(struct coresight_device *csdev) {}
-static inline int coresight_timeout(void __iomem *addr, u32 offset,
-                                    int position, int value) { return 1; }
-static inline int coresight_claim_device_unlocked(void __iomem *base)
+
+static inline int coresight_timeout(struct csdev_access *csa, u32 offset,
+                                   int position, int value)
+{
+       return 1;
+}
+
+static inline int coresight_claim_device_unlocked(struct coresight_device *csdev)
 {
        return -EINVAL;
 }
 
-static inline int coresight_claim_device(void __iomem *base)
+static inline int coresight_claim_device(struct coresight_device *csdev)
 {
        return -EINVAL;
 }
 
-static inline void coresight_disclaim_device(void __iomem *base) {}
-static inline void coresight_disclaim_device_unlocked(void __iomem *base) {}
+static inline void coresight_disclaim_device(struct coresight_device *csdev) {}
+static inline void coresight_disclaim_device_unlocked(struct coresight_device *csdev) {}
 
 static inline bool coresight_loses_context_with_cpu(struct device *dev)
 {
        return false;
 }
-#endif
+
+static inline u32 coresight_relaxed_read32(struct coresight_device *csdev, u32 offset)
+{
+       WARN_ON_ONCE(1);
+       return 0;
+}
+
+static inline u32 coresight_read32(struct coresight_device *csdev, u32 offset)
+{
+       WARN_ON_ONCE(1);
+       return 0;
+}
+
+static inline void coresight_write32(struct coresight_device *csdev, u32 val, u32 offset)
+{
+}
+
+static inline void coresight_relaxed_write32(struct coresight_device *csdev,
+                                            u32 val, u32 offset)
+{
+}
+
+static inline u64 coresight_relaxed_read64(struct coresight_device *csdev,
+                                          u32 offset)
+{
+       WARN_ON_ONCE(1);
+       return 0;
+}
+
+static inline u64 coresight_read64(struct coresight_device *csdev, u32 offset)
+{
+       WARN_ON_ONCE(1);
+       return 0;
+}
+
+static inline void coresight_relaxed_write64(struct coresight_device *csdev,
+                                            u64 val, u32 offset)
+{
+}
+
+static inline void coresight_write64(struct coresight_device *csdev, u64 val, u32 offset)
+{
+}
+
+#endif         /* IS_ENABLED(CONFIG_CORESIGHT) */
 
 extern int coresight_get_cpu(struct device *dev);
 
 struct coresight_platform_data *coresight_get_platform_data(struct device *dev);
 
-#endif
+#endif         /* _LINUX_COREISGHT_H */
index 3aaa068..94a578a 100644 (file)
@@ -108,6 +108,8 @@ static inline void cpu_maps_update_done(void)
 {
 }
 
+static inline int add_cpu(unsigned int cpu) { return 0;}
+
 #endif /* CONFIG_SMP */
 extern struct bus_type cpu_subsys;
 
@@ -137,6 +139,7 @@ static inline int  cpus_read_trylock(void) { return true; }
 static inline void lockdep_assert_cpus_held(void) { }
 static inline void cpu_hotplug_disable(void) { }
 static inline void cpu_hotplug_enable(void) { }
+static inline int remove_cpu(unsigned int cpu) { return -EPERM; }
 static inline void smp_shutdown_nonboot_cpus(unsigned int primary_cpu) { }
 #endif /* !CONFIG_HOTPLUG_CPU */
 
index ee09a39..f14adb8 100644 (file)
@@ -168,6 +168,7 @@ enum cpuhp_state {
        CPUHP_AP_PERF_X86_CQM_ONLINE,
        CPUHP_AP_PERF_X86_CSTATE_ONLINE,
        CPUHP_AP_PERF_S390_CF_ONLINE,
+       CPUHP_AP_PERF_S390_CFD_ONLINE,
        CPUHP_AP_PERF_S390_SF_ONLINE,
        CPUHP_AP_PERF_ARM_CCI_ONLINE,
        CPUHP_AP_PERF_ARM_CCN_ONLINE,
@@ -185,6 +186,7 @@ enum cpuhp_state {
        CPUHP_AP_PERF_POWERPC_TRACE_IMC_ONLINE,
        CPUHP_AP_PERF_POWERPC_HV_24x7_ONLINE,
        CPUHP_AP_PERF_POWERPC_HV_GPCI_ONLINE,
+       CPUHP_AP_PERF_CSKY_ONLINE,
        CPUHP_AP_WATCHDOG_ONLINE,
        CPUHP_AP_WORKQUEUE_ONLINE,
        CPUHP_AP_RCUTREE_ONLINE,
index 18639c0..4c63505 100644 (file)
@@ -25,7 +25,7 @@ struct inode;
 struct group_info {
        atomic_t        usage;
        int             ngroups;
-       kgid_t          gid[0];
+       kgid_t          gid[];
 } __randomize_layout;
 
 /**
index 7f4ac87..5c641f9 100644 (file)
@@ -253,7 +253,11 @@ struct target_type {
 #define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY)
 
 /*
- * Indicates that a target supports host-managed zoned block devices.
+ * Indicates support for zoned block devices:
+ * - DM_TARGET_ZONED_HM: the target also supports host-managed zoned
+ *   block devices but does not support combining different zoned models.
+ * - DM_TARGET_MIXED_ZONED_MODEL: the target supports combining multiple
+ *   devices with different zoned models.
  */
 #ifdef CONFIG_BLK_DEV_ZONED
 #define DM_TARGET_ZONED_HM             0x00000040
@@ -275,6 +279,15 @@ struct target_type {
 #define DM_TARGET_PASSES_CRYPTO                0x00000100
 #define dm_target_passes_crypto(type) ((type)->features & DM_TARGET_PASSES_CRYPTO)
 
+#ifdef CONFIG_BLK_DEV_ZONED
+#define DM_TARGET_MIXED_ZONED_MODEL    0x00000200
+#define dm_target_supports_mixed_zoned_model(type) \
+       ((type)->features & DM_TARGET_MIXED_ZONED_MODEL)
+#else
+#define DM_TARGET_MIXED_ZONED_MODEL    0x00000000
+#define dm_target_supports_mixed_zoned_model(type) (false)
+#endif
+
 struct dm_target {
        struct dm_table *table;
        struct target_type *type;
index 1779f90..ba66073 100644 (file)
@@ -291,6 +291,7 @@ struct device_dma_parameters {
         * sg limitations.
         */
        unsigned int max_segment_size;
+       unsigned int min_align_mask;
        unsigned long segment_boundary_mask;
 };
 
@@ -323,6 +324,7 @@ enum device_link_state {
  * AUTOPROBE_CONSUMER: Probe consumer driver automatically after supplier binds.
  * MANAGED: The core tracks presence of supplier/consumer drivers (internal).
  * SYNC_STATE_ONLY: Link only affects sync_state() behavior.
+ * INFERRED: Inferred from data (eg: firmware) and not from driver actions.
  */
 #define DL_FLAG_STATELESS              BIT(0)
 #define DL_FLAG_AUTOREMOVE_CONSUMER    BIT(1)
@@ -332,6 +334,7 @@ enum device_link_state {
 #define DL_FLAG_AUTOPROBE_CONSUMER     BIT(5)
 #define DL_FLAG_MANAGED                        BIT(6)
 #define DL_FLAG_SYNC_STATE_ONLY                BIT(7)
+#define DL_FLAG_INFERRED               BIT(8)
 
 /**
  * enum dl_dev_state - Device driver presence tracking information.
index ee7ba5b..a498ebc 100644 (file)
@@ -75,7 +75,7 @@ enum probe_type {
  * @resume:    Called to bring a device from sleep mode.
  * @groups:    Default attributes that get created by the driver core
  *             automatically.
- * @dev_groups:        Additional attributes attached to device instance once the
+ * @dev_groups:        Additional attributes attached to device instance once
  *             it is bound to the driver.
  * @pm:                Power management operations of the device which matched
  *             this driver.
diff --git a/include/linux/dfl.h b/include/linux/dfl.h
new file mode 100644 (file)
index 0000000..6cc1098
--- /dev/null
@@ -0,0 +1,86 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Header file for DFL driver and device API
+ *
+ * Copyright (C) 2020 Intel Corporation, Inc.
+ */
+
+#ifndef __LINUX_DFL_H
+#define __LINUX_DFL_H
+
+#include <linux/device.h>
+#include <linux/mod_devicetable.h>
+
+/**
+ * enum dfl_id_type - define the DFL FIU types
+ */
+enum dfl_id_type {
+       FME_ID = 0,
+       PORT_ID = 1,
+       DFL_ID_MAX,
+};
+
+/**
+ * struct dfl_device - represent an dfl device on dfl bus
+ *
+ * @dev: generic device interface.
+ * @id: id of the dfl device.
+ * @type: type of DFL FIU of the device. See enum dfl_id_type.
+ * @feature_id: feature identifier local to its DFL FIU type.
+ * @mmio_res: mmio resource of this dfl device.
+ * @irqs: list of Linux IRQ numbers of this dfl device.
+ * @num_irqs: number of IRQs supported by this dfl device.
+ * @cdev: pointer to DFL FPGA container device this dfl device belongs to.
+ * @id_entry: matched id entry in dfl driver's id table.
+ */
+struct dfl_device {
+       struct device dev;
+       int id;
+       u16 type;
+       u16 feature_id;
+       struct resource mmio_res;
+       int *irqs;
+       unsigned int num_irqs;
+       struct dfl_fpga_cdev *cdev;
+       const struct dfl_device_id *id_entry;
+};
+
+/**
+ * struct dfl_driver - represent an dfl device driver
+ *
+ * @drv: driver model structure.
+ * @id_table: pointer to table of device IDs the driver is interested in.
+ *           { } member terminated.
+ * @probe: mandatory callback for device binding.
+ * @remove: callback for device unbinding.
+ */
+struct dfl_driver {
+       struct device_driver drv;
+       const struct dfl_device_id *id_table;
+
+       int (*probe)(struct dfl_device *dfl_dev);
+       void (*remove)(struct dfl_device *dfl_dev);
+};
+
+#define to_dfl_dev(d) container_of(d, struct dfl_device, dev)
+#define to_dfl_drv(d) container_of(d, struct dfl_driver, drv)
+
+/*
+ * use a macro to avoid include chaining to get THIS_MODULE.
+ */
+#define dfl_driver_register(drv) \
+       __dfl_driver_register(drv, THIS_MODULE)
+int __dfl_driver_register(struct dfl_driver *dfl_drv, struct module *owner);
+void dfl_driver_unregister(struct dfl_driver *dfl_drv);
+
+/*
+ * module_dfl_driver() - Helper macro for drivers that don't do
+ * anything special in module init/exit.  This eliminates a lot of
+ * boilerplate.  Each module may only use this macro once, and
+ * calling it replaces module_init() and module_exit().
+ */
+#define module_dfl_driver(__dfl_driver) \
+       module_driver(__dfl_driver, dfl_driver_register, \
+                     dfl_driver_unregister)
+
+#endif /* __LINUX_DFL_H */
index 09e23ad..9f12efa 100644 (file)
@@ -372,6 +372,9 @@ static inline void __dma_fence_might_wait(void) {}
 
 int dma_fence_signal(struct dma_fence *fence);
 int dma_fence_signal_locked(struct dma_fence *fence);
+int dma_fence_signal_timestamp(struct dma_fence *fence, ktime_t timestamp);
+int dma_fence_signal_timestamp_locked(struct dma_fence *fence,
+                                     ktime_t timestamp);
 signed long dma_fence_default_wait(struct dma_fence *fence,
                                   bool intr, signed long timeout);
 int dma_fence_add_callback(struct dma_fence *fence,
index 454e354..5bc5c94 100644 (file)
@@ -16,15 +16,15 @@ struct dma_heap;
 
 /**
  * struct dma_heap_ops - ops to operate on a given heap
- * @allocate:          allocate dmabuf and return fd
+ * @allocate:          allocate dmabuf and return struct dma_buf ptr
  *
- * allocate returns dmabuf fd  on success, -errno on error.
+ * allocate returns dmabuf on success, ERR_PTR(-errno) on error.
  */
 struct dma_heap_ops {
-       int (*allocate)(struct dma_heap *heap,
-                       unsigned long len,
-                       unsigned long fd_flags,
-                       unsigned long heap_flags);
+       struct dma_buf *(*allocate)(struct dma_heap *heap,
+                                   unsigned long len,
+                                   unsigned long fd_flags,
+                                   unsigned long heap_flags);
 };
 
 /**
index 1e98b8c..51872e7 100644 (file)
@@ -22,11 +22,6 @@ struct dma_map_ops {
                        gfp_t gfp);
        void (*free_pages)(struct device *dev, size_t size, struct page *vaddr,
                        dma_addr_t dma_handle, enum dma_data_direction dir);
-       void *(*alloc_noncoherent)(struct device *dev, size_t size,
-                       dma_addr_t *dma_handle, enum dma_data_direction dir,
-                       gfp_t gfp);
-       void (*free_noncoherent)(struct device *dev, size_t size, void *vaddr,
-                       dma_addr_t dma_handle, enum dma_data_direction dir);
        int (*mmap)(struct device *, struct vm_area_struct *,
                        void *, dma_addr_t, size_t, unsigned long attrs);
 
index 2e49996..2a984cb 100644 (file)
@@ -263,10 +263,19 @@ struct page *dma_alloc_pages(struct device *dev, size_t size,
                dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp);
 void dma_free_pages(struct device *dev, size_t size, struct page *page,
                dma_addr_t dma_handle, enum dma_data_direction dir);
-void *dma_alloc_noncoherent(struct device *dev, size_t size,
-               dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp);
-void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_handle, enum dma_data_direction dir);
+
+static inline void *dma_alloc_noncoherent(struct device *dev, size_t size,
+               dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
+{
+       struct page *page = dma_alloc_pages(dev, size, dma_handle, dir, gfp);
+       return page ? page_address(page) : NULL;
+}
+
+static inline void dma_free_noncoherent(struct device *dev, size_t size,
+               void *vaddr, dma_addr_t dma_handle, enum dma_data_direction dir)
+{
+       dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir);
+}
 
 static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr,
                size_t size, enum dma_data_direction dir, unsigned long attrs)
@@ -500,6 +509,22 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask)
        return -EIO;
 }
 
+static inline unsigned int dma_get_min_align_mask(struct device *dev)
+{
+       if (dev->dma_parms)
+               return dev->dma_parms->min_align_mask;
+       return 0;
+}
+
+static inline int dma_set_min_align_mask(struct device *dev,
+               unsigned int min_align_mask)
+{
+       if (WARN_ON_ONCE(!dev->dma_parms))
+               return -EIO;
+       dev->dma_parms->min_align_mask = min_align_mask;
+       return 0;
+}
+
 static inline int dma_get_cache_alignment(void)
 {
 #ifdef ARCH_DMA_MINALIGN
index 36e22c5..5f106d8 100644 (file)
@@ -42,14 +42,14 @@ enum psil_endpoint_type {
 /**
  * struct psil_endpoint_config - PSI-L Endpoint configuration
  * @ep_type:           PSI-L endpoint type
+ * @channel_tpl:       Desired throughput level for the channel
  * @pkt_mode:          If set, the channel must be in Packet mode, otherwise in
  *                     TR mode
  * @notdpkt:           TDCM must be suppressed on the TX channel
  * @needs_epib:                Endpoint needs EPIB
- * @psd_size:          If set, PSdata is used by the endpoint
- * @channel_tpl:       Desired throughput level for the channel
  * @pdma_acc32:                ACC32 must be enabled on the PDMA side
  * @pdma_burst:                BURST must be enabled on the PDMA side
+ * @psd_size:          If set, PSdata is used by the endpoint
  * @mapped_channel_id: PKTDMA thread to channel mapping for mapped channels.
  *                     The thread must be serviced by the specified channel if
  *                     mapped_channel_id is >= 0 in case of PKTDMA
@@ -62,23 +62,22 @@ enum psil_endpoint_type {
  */
 struct psil_endpoint_config {
        enum psil_endpoint_type ep_type;
+       enum udma_tp_level channel_tpl;
 
        unsigned pkt_mode:1;
        unsigned notdpkt:1;
        unsigned needs_epib:1;
-       u32 psd_size;
-       enum udma_tp_level channel_tpl;
-
        /* PDMA properties, valid for PSIL_EP_PDMA_* */
        unsigned pdma_acc32:1;
        unsigned pdma_burst:1;
 
+       u32 psd_size;
        /* PKDMA mapped channel */
-       int mapped_channel_id;
+       s16 mapped_channel_id;
        /* PKTDMA tflow and rflow ranges for mapped channel */
        u16 flow_start;
        u16 flow_num;
-       u16 default_flow_id;
+       s16 default_flow_id;
 };
 
 int psil_set_new_ep_config(struct device *dev, const char *name,
diff --git a/include/linux/dma/mmp-pdma.h b/include/linux/dma/mmp-pdma.h
deleted file mode 100644 (file)
index 25cab62..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MMP_PDMA_H_
-#define _MMP_PDMA_H_
-
-struct dma_chan;
-
-#ifdef CONFIG_MMP_PDMA
-bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param);
-#else
-static inline bool mmp_pdma_filter_fn(struct dma_chan *chan, void *param)
-{
-       return false;
-}
-#endif
-
-#endif /* _MMP_PDMA_H_ */
index 68130f5..004736b 100644 (file)
@@ -745,6 +745,8 @@ enum dmaengine_alignment {
        DMAENGINE_ALIGN_16_BYTES = 4,
        DMAENGINE_ALIGN_32_BYTES = 5,
        DMAENGINE_ALIGN_64_BYTES = 6,
+       DMAENGINE_ALIGN_128_BYTES = 7,
+       DMAENGINE_ALIGN_256_BYTES = 8,
 };
 
 /**
index eec7928..99580c2 100644 (file)
@@ -16,6 +16,8 @@ struct eeprom_93xx46_platform_data {
 #define EEPROM_93XX46_QUIRK_SINGLE_WORD_READ           (1 << 0)
 /* Instructions such as EWEN are (addrlen + 2) in length. */
 #define EEPROM_93XX46_QUIRK_INSTRUCTION_LENGTH         (1 << 1)
+/* Add extra cycle after address during a read */
+#define EEPROM_93XX46_QUIRK_EXTRA_READ_CYCLE           BIT(2)
 
        /*
         * optional hooks to control additional logic
index 8710f57..6b5d36b 100644 (file)
@@ -72,8 +72,10 @@ typedef void *efi_handle_t;
  */
 typedef guid_t efi_guid_t __aligned(__alignof__(u32));
 
-#define EFI_GUID(a,b,c,d0,d1,d2,d3,d4,d5,d6,d7) \
-       GUID_INIT(a, b, c, d0, d1, d2, d3, d4, d5, d6, d7)
+#define EFI_GUID(a, b, c, d...) (efi_guid_t){ {                                        \
+       (a) & 0xff, ((a) >> 8) & 0xff, ((a) >> 16) & 0xff, ((a) >> 24) & 0xff,  \
+       (b) & 0xff, ((b) >> 8) & 0xff,                                          \
+       (c) & 0xff, ((c) >> 8) & 0xff, d } }
 
 /*
  * Generic EFI table header
index fceb5e8..6271a5d 100644 (file)
@@ -157,18 +157,9 @@ struct kernel_symbol {
 
 #define EXPORT_SYMBOL(sym)             _EXPORT_SYMBOL(sym, "")
 #define EXPORT_SYMBOL_GPL(sym)         _EXPORT_SYMBOL(sym, "_gpl")
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)  _EXPORT_SYMBOL(sym, "_gpl_future")
 #define EXPORT_SYMBOL_NS(sym, ns)      __EXPORT_SYMBOL(sym, "", #ns)
 #define EXPORT_SYMBOL_NS_GPL(sym, ns)  __EXPORT_SYMBOL(sym, "_gpl", #ns)
 
-#ifdef CONFIG_UNUSED_SYMBOLS
-#define EXPORT_UNUSED_SYMBOL(sym)      _EXPORT_SYMBOL(sym, "_unused")
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)  _EXPORT_SYMBOL(sym, "_unused_gpl")
-#else
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
-#endif
-
 #endif /* !__ASSEMBLY__ */
 
 #endif /* _LINUX_EXPORT_H */
index fd183fb..0c19010 100644 (file)
@@ -271,6 +271,29 @@ static inline  void devm_extcon_unregister_notifier(struct device *dev,
                                struct extcon_dev *edev, unsigned int id,
                                struct notifier_block *nb) { }
 
+static inline int extcon_register_notifier_all(struct extcon_dev *edev,
+                                              struct notifier_block *nb)
+{
+       return 0;
+}
+
+static inline int extcon_unregister_notifier_all(struct extcon_dev *edev,
+                                                struct notifier_block *nb)
+{
+       return 0;
+}
+
+static inline int devm_extcon_register_notifier_all(struct device *dev,
+                                                   struct extcon_dev *edev,
+                                                   struct notifier_block *nb)
+{
+       return 0;
+}
+
+static inline void devm_extcon_unregister_notifier_all(struct device *dev,
+                                                      struct extcon_dev *edev,
+                                                      struct notifier_block *nb) { }
+
 static inline struct extcon_dev *extcon_get_extcon_dev(const char *extcon_name)
 {
        return ERR_PTR(-ENODEV);
index ebc2956..19781b0 100644 (file)
@@ -56,7 +56,7 @@
  * COMMAND_RECONFIG_FLAG_PARTIAL:
  * Set to FPGA configuration type (full or partial).
  */
-#define COMMAND_RECONFIG_FLAG_PARTIAL  1
+#define COMMAND_RECONFIG_FLAG_PARTIAL  0
 
 /*
  * Timeout settings for service clients:
index 2a0da84..71177b1 100644 (file)
@@ -64,28 +64,27 @@ enum pm_api_id {
        PM_GET_API_VERSION = 1,
        PM_SYSTEM_SHUTDOWN = 12,
        PM_REQUEST_NODE = 13,
-       PM_RELEASE_NODE,
-       PM_SET_REQUIREMENT,
+       PM_RELEASE_NODE = 14,
+       PM_SET_REQUIREMENT = 15,
        PM_RESET_ASSERT = 17,
-       PM_RESET_GET_STATUS,
+       PM_RESET_GET_STATUS = 18,
        PM_PM_INIT_FINALIZE = 21,
-       PM_FPGA_LOAD,
-       PM_FPGA_GET_STATUS,
+       PM_FPGA_LOAD = 22,
+       PM_FPGA_GET_STATUS = 23,
        PM_GET_CHIPID = 24,
        PM_IOCTL = 34,
-       PM_QUERY_DATA,
-       PM_CLOCK_ENABLE,
-       PM_CLOCK_DISABLE,
-       PM_CLOCK_GETSTATE,
-       PM_CLOCK_SETDIVIDER,
-       PM_CLOCK_GETDIVIDER,
-       PM_CLOCK_SETRATE,
-       PM_CLOCK_GETRATE,
-       PM_CLOCK_SETPARENT,
-       PM_CLOCK_GETPARENT,
+       PM_QUERY_DATA = 35,
+       PM_CLOCK_ENABLE = 36,
+       PM_CLOCK_DISABLE = 37,
+       PM_CLOCK_GETSTATE = 38,
+       PM_CLOCK_SETDIVIDER = 39,
+       PM_CLOCK_GETDIVIDER = 40,
+       PM_CLOCK_SETRATE = 41,
+       PM_CLOCK_GETRATE = 42,
+       PM_CLOCK_SETPARENT = 43,
+       PM_CLOCK_GETPARENT = 44,
        PM_SECURE_AES = 47,
        PM_FEATURE_CHECK = 63,
-       PM_API_MAX,
 };
 
 /* PMU-FW return status codes */
@@ -93,21 +92,21 @@ enum pm_ret_status {
        XST_PM_SUCCESS = 0,
        XST_PM_NO_FEATURE = 19,
        XST_PM_INTERNAL = 2000,
-       XST_PM_CONFLICT,
-       XST_PM_NO_ACCESS,
-       XST_PM_INVALID_NODE,
-       XST_PM_DOUBLE_REQ,
-       XST_PM_ABORT_SUSPEND,
+       XST_PM_CONFLICT = 2001,
+       XST_PM_NO_ACCESS = 2002,
+       XST_PM_INVALID_NODE = 2003,
+       XST_PM_DOUBLE_REQ = 2004,
+       XST_PM_ABORT_SUSPEND = 2005,
        XST_PM_MULT_USER = 2008,
 };
 
 enum pm_ioctl_id {
        IOCTL_SD_DLL_RESET = 6,
-       IOCTL_SET_SD_TAPDELAY,
-       IOCTL_SET_PLL_FRAC_MODE,
-       IOCTL_GET_PLL_FRAC_MODE,
-       IOCTL_SET_PLL_FRAC_DATA,
-       IOCTL_GET_PLL_FRAC_DATA,
+       IOCTL_SET_SD_TAPDELAY = 7,
+       IOCTL_SET_PLL_FRAC_MODE = 8,
+       IOCTL_GET_PLL_FRAC_MODE = 9,
+       IOCTL_SET_PLL_FRAC_DATA = 10,
+       IOCTL_GET_PLL_FRAC_DATA = 11,
        IOCTL_WRITE_GGS = 12,
        IOCTL_READ_GGS = 13,
        IOCTL_WRITE_PGGS = 14,
@@ -117,185 +116,185 @@ enum pm_ioctl_id {
 };
 
 enum pm_query_id {
-       PM_QID_INVALID,
-       PM_QID_CLOCK_GET_NAME,
-       PM_QID_CLOCK_GET_TOPOLOGY,
-       PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS,
-       PM_QID_CLOCK_GET_PARENTS,
-       PM_QID_CLOCK_GET_ATTRIBUTES,
+       PM_QID_INVALID = 0,
+       PM_QID_CLOCK_GET_NAME = 1,
+       PM_QID_CLOCK_GET_TOPOLOGY = 2,
+       PM_QID_CLOCK_GET_FIXEDFACTOR_PARAMS = 3,
+       PM_QID_CLOCK_GET_PARENTS = 4,
+       PM_QID_CLOCK_GET_ATTRIBUTES = 5,
        PM_QID_CLOCK_GET_NUM_CLOCKS = 12,
-       PM_QID_CLOCK_GET_MAX_DIVISOR,
+       PM_QID_CLOCK_GET_MAX_DIVISOR = 13,
 };
 
 enum zynqmp_pm_reset_action {
-       PM_RESET_ACTION_RELEASE,
-       PM_RESET_ACTION_ASSERT,
-       PM_RESET_ACTION_PULSE,
+       PM_RESET_ACTION_RELEASE = 0,
+       PM_RESET_ACTION_ASSERT = 1,
+       PM_RESET_ACTION_PULSE = 2,
 };
 
 enum zynqmp_pm_reset {
        ZYNQMP_PM_RESET_START = 1000,
        ZYNQMP_PM_RESET_PCIE_CFG = ZYNQMP_PM_RESET_START,
-       ZYNQMP_PM_RESET_PCIE_BRIDGE,
-       ZYNQMP_PM_RESET_PCIE_CTRL,
-       ZYNQMP_PM_RESET_DP,
-       ZYNQMP_PM_RESET_SWDT_CRF,
-       ZYNQMP_PM_RESET_AFI_FM5,
-       ZYNQMP_PM_RESET_AFI_FM4,
-       ZYNQMP_PM_RESET_AFI_FM3,
-       ZYNQMP_PM_RESET_AFI_FM2,
-       ZYNQMP_PM_RESET_AFI_FM1,
-       ZYNQMP_PM_RESET_AFI_FM0,
-       ZYNQMP_PM_RESET_GDMA,
-       ZYNQMP_PM_RESET_GPU_PP1,
-       ZYNQMP_PM_RESET_GPU_PP0,
-       ZYNQMP_PM_RESET_GPU,
-       ZYNQMP_PM_RESET_GT,
-       ZYNQMP_PM_RESET_SATA,
-       ZYNQMP_PM_RESET_ACPU3_PWRON,
-       ZYNQMP_PM_RESET_ACPU2_PWRON,
-       ZYNQMP_PM_RESET_ACPU1_PWRON,
-       ZYNQMP_PM_RESET_ACPU0_PWRON,
-       ZYNQMP_PM_RESET_APU_L2,
-       ZYNQMP_PM_RESET_ACPU3,
-       ZYNQMP_PM_RESET_ACPU2,
-       ZYNQMP_PM_RESET_ACPU1,
-       ZYNQMP_PM_RESET_ACPU0,
-       ZYNQMP_PM_RESET_DDR,
-       ZYNQMP_PM_RESET_APM_FPD,
-       ZYNQMP_PM_RESET_SOFT,
-       ZYNQMP_PM_RESET_GEM0,
-       ZYNQMP_PM_RESET_GEM1,
-       ZYNQMP_PM_RESET_GEM2,
-       ZYNQMP_PM_RESET_GEM3,
-       ZYNQMP_PM_RESET_QSPI,
-       ZYNQMP_PM_RESET_UART0,
-       ZYNQMP_PM_RESET_UART1,
-       ZYNQMP_PM_RESET_SPI0,
-       ZYNQMP_PM_RESET_SPI1,
-       ZYNQMP_PM_RESET_SDIO0,
-       ZYNQMP_PM_RESET_SDIO1,
-       ZYNQMP_PM_RESET_CAN0,
-       ZYNQMP_PM_RESET_CAN1,
-       ZYNQMP_PM_RESET_I2C0,
-       ZYNQMP_PM_RESET_I2C1,
-       ZYNQMP_PM_RESET_TTC0,
-       ZYNQMP_PM_RESET_TTC1,
-       ZYNQMP_PM_RESET_TTC2,
-       ZYNQMP_PM_RESET_TTC3,
-       ZYNQMP_PM_RESET_SWDT_CRL,
-       ZYNQMP_PM_RESET_NAND,
-       ZYNQMP_PM_RESET_ADMA,
-       ZYNQMP_PM_RESET_GPIO,
-       ZYNQMP_PM_RESET_IOU_CC,
-       ZYNQMP_PM_RESET_TIMESTAMP,
-       ZYNQMP_PM_RESET_RPU_R50,
-       ZYNQMP_PM_RESET_RPU_R51,
-       ZYNQMP_PM_RESET_RPU_AMBA,
-       ZYNQMP_PM_RESET_OCM,
-       ZYNQMP_PM_RESET_RPU_PGE,
-       ZYNQMP_PM_RESET_USB0_CORERESET,
-       ZYNQMP_PM_RESET_USB1_CORERESET,
-       ZYNQMP_PM_RESET_USB0_HIBERRESET,
-       ZYNQMP_PM_RESET_USB1_HIBERRESET,
-       ZYNQMP_PM_RESET_USB0_APB,
-       ZYNQMP_PM_RESET_USB1_APB,
-       ZYNQMP_PM_RESET_IPI,
-       ZYNQMP_PM_RESET_APM_LPD,
-       ZYNQMP_PM_RESET_RTC,
-       ZYNQMP_PM_RESET_SYSMON,
-       ZYNQMP_PM_RESET_AFI_FM6,
-       ZYNQMP_PM_RESET_LPD_SWDT,
-       ZYNQMP_PM_RESET_FPD,
-       ZYNQMP_PM_RESET_RPU_DBG1,
-       ZYNQMP_PM_RESET_RPU_DBG0,
-       ZYNQMP_PM_RESET_DBG_LPD,
-       ZYNQMP_PM_RESET_DBG_FPD,
-       ZYNQMP_PM_RESET_APLL,
-       ZYNQMP_PM_RESET_DPLL,
-       ZYNQMP_PM_RESET_VPLL,
-       ZYNQMP_PM_RESET_IOPLL,
-       ZYNQMP_PM_RESET_RPLL,
-       ZYNQMP_PM_RESET_GPO3_PL_0,
-       ZYNQMP_PM_RESET_GPO3_PL_1,
-       ZYNQMP_PM_RESET_GPO3_PL_2,
-       ZYNQMP_PM_RESET_GPO3_PL_3,
-       ZYNQMP_PM_RESET_GPO3_PL_4,
-       ZYNQMP_PM_RESET_GPO3_PL_5,
-       ZYNQMP_PM_RESET_GPO3_PL_6,
-       ZYNQMP_PM_RESET_GPO3_PL_7,
-       ZYNQMP_PM_RESET_GPO3_PL_8,
-       ZYNQMP_PM_RESET_GPO3_PL_9,
-       ZYNQMP_PM_RESET_GPO3_PL_10,
-       ZYNQMP_PM_RESET_GPO3_PL_11,
-       ZYNQMP_PM_RESET_GPO3_PL_12,
-       ZYNQMP_PM_RESET_GPO3_PL_13,
-       ZYNQMP_PM_RESET_GPO3_PL_14,
-       ZYNQMP_PM_RESET_GPO3_PL_15,
-       ZYNQMP_PM_RESET_GPO3_PL_16,
-       ZYNQMP_PM_RESET_GPO3_PL_17,
-       ZYNQMP_PM_RESET_GPO3_PL_18,
-       ZYNQMP_PM_RESET_GPO3_PL_19,
-       ZYNQMP_PM_RESET_GPO3_PL_20,
-       ZYNQMP_PM_RESET_GPO3_PL_21,
-       ZYNQMP_PM_RESET_GPO3_PL_22,
-       ZYNQMP_PM_RESET_GPO3_PL_23,
-       ZYNQMP_PM_RESET_GPO3_PL_24,
-       ZYNQMP_PM_RESET_GPO3_PL_25,
-       ZYNQMP_PM_RESET_GPO3_PL_26,
-       ZYNQMP_PM_RESET_GPO3_PL_27,
-       ZYNQMP_PM_RESET_GPO3_PL_28,
-       ZYNQMP_PM_RESET_GPO3_PL_29,
-       ZYNQMP_PM_RESET_GPO3_PL_30,
-       ZYNQMP_PM_RESET_GPO3_PL_31,
-       ZYNQMP_PM_RESET_RPU_LS,
-       ZYNQMP_PM_RESET_PS_ONLY,
-       ZYNQMP_PM_RESET_PL,
-       ZYNQMP_PM_RESET_PS_PL0,
-       ZYNQMP_PM_RESET_PS_PL1,
-       ZYNQMP_PM_RESET_PS_PL2,
-       ZYNQMP_PM_RESET_PS_PL3,
+       ZYNQMP_PM_RESET_PCIE_BRIDGE = 1001,
+       ZYNQMP_PM_RESET_PCIE_CTRL = 1002,
+       ZYNQMP_PM_RESET_DP = 1003,
+       ZYNQMP_PM_RESET_SWDT_CRF = 1004,
+       ZYNQMP_PM_RESET_AFI_FM5 = 1005,
+       ZYNQMP_PM_RESET_AFI_FM4 = 1006,
+       ZYNQMP_PM_RESET_AFI_FM3 = 1007,
+       ZYNQMP_PM_RESET_AFI_FM2 = 1008,
+       ZYNQMP_PM_RESET_AFI_FM1 = 1009,
+       ZYNQMP_PM_RESET_AFI_FM0 = 1010,
+       ZYNQMP_PM_RESET_GDMA = 1011,
+       ZYNQMP_PM_RESET_GPU_PP1 = 1012,
+       ZYNQMP_PM_RESET_GPU_PP0 = 1013,
+       ZYNQMP_PM_RESET_GPU = 1014,
+       ZYNQMP_PM_RESET_GT = 1015,
+       ZYNQMP_PM_RESET_SATA = 1016,
+       ZYNQMP_PM_RESET_ACPU3_PWRON = 1017,
+       ZYNQMP_PM_RESET_ACPU2_PWRON = 1018,
+       ZYNQMP_PM_RESET_ACPU1_PWRON = 1019,
+       ZYNQMP_PM_RESET_ACPU0_PWRON = 1020,
+       ZYNQMP_PM_RESET_APU_L2 = 1021,
+       ZYNQMP_PM_RESET_ACPU3 = 1022,
+       ZYNQMP_PM_RESET_ACPU2 = 1023,
+       ZYNQMP_PM_RESET_ACPU1 = 1024,
+       ZYNQMP_PM_RESET_ACPU0 = 1025,
+       ZYNQMP_PM_RESET_DDR = 1026,
+       ZYNQMP_PM_RESET_APM_FPD = 1027,
+       ZYNQMP_PM_RESET_SOFT = 1028,
+       ZYNQMP_PM_RESET_GEM0 = 1029,
+       ZYNQMP_PM_RESET_GEM1 = 1030,
+       ZYNQMP_PM_RESET_GEM2 = 1031,
+       ZYNQMP_PM_RESET_GEM3 = 1032,
+       ZYNQMP_PM_RESET_QSPI = 1033,
+       ZYNQMP_PM_RESET_UART0 = 1034,
+       ZYNQMP_PM_RESET_UART1 = 1035,
+       ZYNQMP_PM_RESET_SPI0 = 1036,
+       ZYNQMP_PM_RESET_SPI1 = 1037,
+       ZYNQMP_PM_RESET_SDIO0 = 1038,
+       ZYNQMP_PM_RESET_SDIO1 = 1039,
+       ZYNQMP_PM_RESET_CAN0 = 1040,
+       ZYNQMP_PM_RESET_CAN1 = 1041,
+       ZYNQMP_PM_RESET_I2C0 = 1042,
+       ZYNQMP_PM_RESET_I2C1 = 1043,
+       ZYNQMP_PM_RESET_TTC0 = 1044,
+       ZYNQMP_PM_RESET_TTC1 = 1045,
+       ZYNQMP_PM_RESET_TTC2 = 1046,
+       ZYNQMP_PM_RESET_TTC3 = 1047,
+       ZYNQMP_PM_RESET_SWDT_CRL = 1048,
+       ZYNQMP_PM_RESET_NAND = 1049,
+       ZYNQMP_PM_RESET_ADMA = 1050,
+       ZYNQMP_PM_RESET_GPIO = 1051,
+       ZYNQMP_PM_RESET_IOU_CC = 1052,
+       ZYNQMP_PM_RESET_TIMESTAMP = 1053,
+       ZYNQMP_PM_RESET_RPU_R50 = 1054,
+       ZYNQMP_PM_RESET_RPU_R51 = 1055,
+       ZYNQMP_PM_RESET_RPU_AMBA = 1056,
+       ZYNQMP_PM_RESET_OCM = 1057,
+       ZYNQMP_PM_RESET_RPU_PGE = 1058,
+       ZYNQMP_PM_RESET_USB0_CORERESET = 1059,
+       ZYNQMP_PM_RESET_USB1_CORERESET = 1060,
+       ZYNQMP_PM_RESET_USB0_HIBERRESET = 1061,
+       ZYNQMP_PM_RESET_USB1_HIBERRESET = 1062,
+       ZYNQMP_PM_RESET_USB0_APB = 1063,
+       ZYNQMP_PM_RESET_USB1_APB = 1064,
+       ZYNQMP_PM_RESET_IPI = 1065,
+       ZYNQMP_PM_RESET_APM_LPD = 1066,
+       ZYNQMP_PM_RESET_RTC = 1067,
+       ZYNQMP_PM_RESET_SYSMON = 1068,
+       ZYNQMP_PM_RESET_AFI_FM6 = 1069,
+       ZYNQMP_PM_RESET_LPD_SWDT = 1070,
+       ZYNQMP_PM_RESET_FPD = 1071,
+       ZYNQMP_PM_RESET_RPU_DBG1 = 1072,
+       ZYNQMP_PM_RESET_RPU_DBG0 = 1073,
+       ZYNQMP_PM_RESET_DBG_LPD = 1074,
+       ZYNQMP_PM_RESET_DBG_FPD = 1075,
+       ZYNQMP_PM_RESET_APLL = 1076,
+       ZYNQMP_PM_RESET_DPLL = 1077,
+       ZYNQMP_PM_RESET_VPLL = 1078,
+       ZYNQMP_PM_RESET_IOPLL = 1079,
+       ZYNQMP_PM_RESET_RPLL = 1080,
+       ZYNQMP_PM_RESET_GPO3_PL_0 = 1081,
+       ZYNQMP_PM_RESET_GPO3_PL_1 = 1082,
+       ZYNQMP_PM_RESET_GPO3_PL_2 = 1083,
+       ZYNQMP_PM_RESET_GPO3_PL_3 = 1084,
+       ZYNQMP_PM_RESET_GPO3_PL_4 = 1085,
+       ZYNQMP_PM_RESET_GPO3_PL_5 = 1086,
+       ZYNQMP_PM_RESET_GPO3_PL_6 = 1087,
+       ZYNQMP_PM_RESET_GPO3_PL_7 = 1088,
+       ZYNQMP_PM_RESET_GPO3_PL_8 = 1089,
+       ZYNQMP_PM_RESET_GPO3_PL_9 = 1090,
+       ZYNQMP_PM_RESET_GPO3_PL_10 = 1091,
+       ZYNQMP_PM_RESET_GPO3_PL_11 = 1092,
+       ZYNQMP_PM_RESET_GPO3_PL_12 = 1093,
+       ZYNQMP_PM_RESET_GPO3_PL_13 = 1094,
+       ZYNQMP_PM_RESET_GPO3_PL_14 = 1095,
+       ZYNQMP_PM_RESET_GPO3_PL_15 = 1096,
+       ZYNQMP_PM_RESET_GPO3_PL_16 = 1097,
+       ZYNQMP_PM_RESET_GPO3_PL_17 = 1098,
+       ZYNQMP_PM_RESET_GPO3_PL_18 = 1099,
+       ZYNQMP_PM_RESET_GPO3_PL_19 = 1100,
+       ZYNQMP_PM_RESET_GPO3_PL_20 = 1101,
+       ZYNQMP_PM_RESET_GPO3_PL_21 = 1102,
+       ZYNQMP_PM_RESET_GPO3_PL_22 = 1103,
+       ZYNQMP_PM_RESET_GPO3_PL_23 = 1104,
+       ZYNQMP_PM_RESET_GPO3_PL_24 = 1105,
+       ZYNQMP_PM_RESET_GPO3_PL_25 = 1106,
+       ZYNQMP_PM_RESET_GPO3_PL_26 = 1107,
+       ZYNQMP_PM_RESET_GPO3_PL_27 = 1108,
+       ZYNQMP_PM_RESET_GPO3_PL_28 = 1109,
+       ZYNQMP_PM_RESET_GPO3_PL_29 = 1110,
+       ZYNQMP_PM_RESET_GPO3_PL_30 = 1111,
+       ZYNQMP_PM_RESET_GPO3_PL_31 = 1112,
+       ZYNQMP_PM_RESET_RPU_LS = 1113,
+       ZYNQMP_PM_RESET_PS_ONLY = 1114,
+       ZYNQMP_PM_RESET_PL = 1115,
+       ZYNQMP_PM_RESET_PS_PL0 = 1116,
+       ZYNQMP_PM_RESET_PS_PL1 = 1117,
+       ZYNQMP_PM_RESET_PS_PL2 = 1118,
+       ZYNQMP_PM_RESET_PS_PL3 = 1119,
        ZYNQMP_PM_RESET_END = ZYNQMP_PM_RESET_PS_PL3
 };
 
 enum zynqmp_pm_suspend_reason {
        SUSPEND_POWER_REQUEST = 201,
-       SUSPEND_ALERT,
-       SUSPEND_SYSTEM_SHUTDOWN,
+       SUSPEND_ALERT = 202,
+       SUSPEND_SYSTEM_SHUTDOWN = 203,
 };
 
 enum zynqmp_pm_request_ack {
        ZYNQMP_PM_REQUEST_ACK_NO = 1,
-       ZYNQMP_PM_REQUEST_ACK_BLOCKING,
-       ZYNQMP_PM_REQUEST_ACK_NON_BLOCKING,
+       ZYNQMP_PM_REQUEST_ACK_BLOCKING = 2,
+       ZYNQMP_PM_REQUEST_ACK_NON_BLOCKING = 3,
 };
 
 enum pm_node_id {
        NODE_SD_0 = 39,
-       NODE_SD_1,
+       NODE_SD_1 = 40,
 };
 
 enum tap_delay_type {
        PM_TAPDELAY_INPUT = 0,
-       PM_TAPDELAY_OUTPUT,
+       PM_TAPDELAY_OUTPUT = 1,
 };
 
 enum dll_reset_type {
-       PM_DLL_RESET_ASSERT,
-       PM_DLL_RESET_RELEASE,
-       PM_DLL_RESET_PULSE,
+       PM_DLL_RESET_ASSERT = 0,
+       PM_DLL_RESET_RELEASE = 1,
+       PM_DLL_RESET_PULSE = 2,
 };
 
 enum zynqmp_pm_shutdown_type {
-       ZYNQMP_PM_SHUTDOWN_TYPE_SHUTDOWN,
-       ZYNQMP_PM_SHUTDOWN_TYPE_RESET,
-       ZYNQMP_PM_SHUTDOWN_TYPE_SETSCOPE_ONLY,
+       ZYNQMP_PM_SHUTDOWN_TYPE_SHUTDOWN = 0,
+       ZYNQMP_PM_SHUTDOWN_TYPE_RESET = 1,
+       ZYNQMP_PM_SHUTDOWN_TYPE_SETSCOPE_ONLY = 2,
 };
 
 enum zynqmp_pm_shutdown_subtype {
-       ZYNQMP_PM_SHUTDOWN_SUBTYPE_SUBSYSTEM,
-       ZYNQMP_PM_SHUTDOWN_SUBTYPE_PS_ONLY,
-       ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM,
+       ZYNQMP_PM_SHUTDOWN_SUBTYPE_SUBSYSTEM = 0,
+       ZYNQMP_PM_SHUTDOWN_SUBTYPE_PS_ONLY = 1,
+       ZYNQMP_PM_SHUTDOWN_SUBTYPE_SYSTEM = 2,
 };
 
 /**
diff --git a/include/linux/fortify-string.h b/include/linux/fortify-string.h
new file mode 100644 (file)
index 0000000..c1be374
--- /dev/null
@@ -0,0 +1,302 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_FORTIFY_STRING_H_
+#define _LINUX_FORTIFY_STRING_H_
+
+
+#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
+extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
+extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
+extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
+extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
+extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
+extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
+extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
+extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
+extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
+extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
+#else
+#define __underlying_memchr    __builtin_memchr
+#define __underlying_memcmp    __builtin_memcmp
+#define __underlying_memcpy    __builtin_memcpy
+#define __underlying_memmove   __builtin_memmove
+#define __underlying_memset    __builtin_memset
+#define __underlying_strcat    __builtin_strcat
+#define __underlying_strcpy    __builtin_strcpy
+#define __underlying_strlen    __builtin_strlen
+#define __underlying_strncat   __builtin_strncat
+#define __underlying_strncpy   __builtin_strncpy
+#endif
+
+__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 1);
+
+       if (__builtin_constant_p(size) && p_size < size)
+               __write_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __underlying_strncpy(p, q, size);
+}
+
+__FORTIFY_INLINE char *strcat(char *p, const char *q)
+{
+       size_t p_size = __builtin_object_size(p, 1);
+
+       if (p_size == (size_t)-1)
+               return __underlying_strcat(p, q);
+       if (strlcat(p, q, p_size) >= p_size)
+               fortify_panic(__func__);
+       return p;
+}
+
+__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
+{
+       __kernel_size_t ret;
+       size_t p_size = __builtin_object_size(p, 1);
+
+       /* Work around gcc excess stack consumption issue */
+       if (p_size == (size_t)-1 ||
+               (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
+               return __underlying_strlen(p);
+       ret = strnlen(p, p_size);
+       if (p_size <= ret)
+               fortify_panic(__func__);
+       return ret;
+}
+
+extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
+__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
+{
+       size_t p_size = __builtin_object_size(p, 1);
+       __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
+
+       if (p_size <= ret && maxlen != ret)
+               fortify_panic(__func__);
+       return ret;
+}
+
+/* defined after fortified strlen to reuse it */
+extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
+__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
+{
+       size_t ret;
+       size_t p_size = __builtin_object_size(p, 1);
+       size_t q_size = __builtin_object_size(q, 1);
+
+       if (p_size == (size_t)-1 && q_size == (size_t)-1)
+               return __real_strlcpy(p, q, size);
+       ret = strlen(q);
+       if (size) {
+               size_t len = (ret >= size) ? size - 1 : ret;
+
+               if (__builtin_constant_p(len) && len >= p_size)
+                       __write_overflow();
+               if (len >= p_size)
+                       fortify_panic(__func__);
+               __underlying_memcpy(p, q, len);
+               p[len] = '\0';
+       }
+       return ret;
+}
+
+/* defined after fortified strnlen to reuse it */
+extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
+__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size)
+{
+       size_t len;
+       /* Use string size rather than possible enclosing struct size. */
+       size_t p_size = __builtin_object_size(p, 1);
+       size_t q_size = __builtin_object_size(q, 1);
+
+       /* If we cannot get size of p and q default to call strscpy. */
+       if (p_size == (size_t) -1 && q_size == (size_t) -1)
+               return __real_strscpy(p, q, size);
+
+       /*
+        * If size can be known at compile time and is greater than
+        * p_size, generate a compile time write overflow error.
+        */
+       if (__builtin_constant_p(size) && size > p_size)
+               __write_overflow();
+
+       /*
+        * This call protects from read overflow, because len will default to q
+        * length if it smaller than size.
+        */
+       len = strnlen(q, size);
+       /*
+        * If len equals size, we will copy only size bytes which leads to
+        * -E2BIG being returned.
+        * Otherwise we will copy len + 1 because of the final '\O'.
+        */
+       len = len == size ? size : len + 1;
+
+       /*
+        * Generate a runtime write overflow error if len is greater than
+        * p_size.
+        */
+       if (len > p_size)
+               fortify_panic(__func__);
+
+       /*
+        * We can now safely call vanilla strscpy because we are protected from:
+        * 1. Read overflow thanks to call to strnlen().
+        * 2. Write overflow thanks to above ifs.
+        */
+       return __real_strscpy(p, q, len);
+}
+
+/* defined after fortified strlen and strnlen to reuse them */
+__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
+{
+       size_t p_len, copy_len;
+       size_t p_size = __builtin_object_size(p, 1);
+       size_t q_size = __builtin_object_size(q, 1);
+
+       if (p_size == (size_t)-1 && q_size == (size_t)-1)
+               return __underlying_strncat(p, q, count);
+       p_len = strlen(p);
+       copy_len = strnlen(q, count);
+       if (p_size < p_len + copy_len + 1)
+               fortify_panic(__func__);
+       __underlying_memcpy(p + p_len, q, copy_len);
+       p[p_len + copy_len] = '\0';
+       return p;
+}
+
+__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+
+       if (__builtin_constant_p(size) && p_size < size)
+               __write_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __underlying_memset(p, c, size);
+}
+
+__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+
+       if (__builtin_constant_p(size)) {
+               if (p_size < size)
+                       __write_overflow();
+               if (q_size < size)
+                       __read_overflow2();
+       }
+       if (p_size < size || q_size < size)
+               fortify_panic(__func__);
+       return __underlying_memcpy(p, q, size);
+}
+
+__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+
+       if (__builtin_constant_p(size)) {
+               if (p_size < size)
+                       __write_overflow();
+               if (q_size < size)
+                       __read_overflow2();
+       }
+       if (p_size < size || q_size < size)
+               fortify_panic(__func__);
+       return __underlying_memmove(p, q, size);
+}
+
+extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
+__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __real_memscan(p, c, size);
+}
+
+__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+       size_t q_size = __builtin_object_size(q, 0);
+
+       if (__builtin_constant_p(size)) {
+               if (p_size < size)
+                       __read_overflow();
+               if (q_size < size)
+                       __read_overflow2();
+       }
+       if (p_size < size || q_size < size)
+               fortify_panic(__func__);
+       return __underlying_memcmp(p, q, size);
+}
+
+__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __underlying_memchr(p, c, size);
+}
+
+void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
+__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __real_memchr_inv(p, c, size);
+}
+
+extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
+__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
+{
+       size_t p_size = __builtin_object_size(p, 0);
+
+       if (__builtin_constant_p(size) && p_size < size)
+               __read_overflow();
+       if (p_size < size)
+               fortify_panic(__func__);
+       return __real_kmemdup(p, size, gfp);
+}
+
+/* defined after fortified strlen and memcpy to reuse them */
+__FORTIFY_INLINE char *strcpy(char *p, const char *q)
+{
+       size_t p_size = __builtin_object_size(p, 1);
+       size_t q_size = __builtin_object_size(q, 1);
+       size_t size;
+
+       if (p_size == (size_t)-1 && q_size == (size_t)-1)
+               return __underlying_strcpy(p, q);
+       size = strlen(q) + 1;
+       /* test here to use the more stringent object size */
+       if (p_size < size)
+               fortify_panic(__func__);
+       memcpy(p, q, size);
+       return p;
+}
+
+/* Don't use these outside the FORITFY_SOURCE implementation */
+#undef __underlying_memchr
+#undef __underlying_memcmp
+#undef __underlying_memcpy
+#undef __underlying_memmove
+#undef __underlying_memset
+#undef __underlying_strcat
+#undef __underlying_strcpy
+#undef __underlying_strlen
+#undef __underlying_strncat
+#undef __underlying_strncpy
+
+#endif /* _LINUX_FORTIFY_STRING_H_ */
index 43ba79d..ec8f3dd 100644 (file)
@@ -39,6 +39,8 @@
 #include <linux/fs_types.h>
 #include <linux/build_bug.h>
 #include <linux/stddef.h>
+#include <linux/mount.h>
+#include <linux/cred.h>
 
 #include <asm/byteorder.h>
 #include <uapi/linux/fs.h>
@@ -1572,6 +1574,52 @@ static inline void i_gid_write(struct inode *inode, gid_t gid)
        inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid);
 }
 
+static inline kuid_t kuid_into_mnt(struct user_namespace *mnt_userns,
+                                  kuid_t kuid)
+{
+       return make_kuid(mnt_userns, __kuid_val(kuid));
+}
+
+static inline kgid_t kgid_into_mnt(struct user_namespace *mnt_userns,
+                                  kgid_t kgid)
+{
+       return make_kgid(mnt_userns, __kgid_val(kgid));
+}
+
+static inline kuid_t i_uid_into_mnt(struct user_namespace *mnt_userns,
+                                   const struct inode *inode)
+{
+       return kuid_into_mnt(mnt_userns, inode->i_uid);
+}
+
+static inline kgid_t i_gid_into_mnt(struct user_namespace *mnt_userns,
+                                   const struct inode *inode)
+{
+       return kgid_into_mnt(mnt_userns, inode->i_gid);
+}
+
+static inline kuid_t kuid_from_mnt(struct user_namespace *mnt_userns,
+                                  kuid_t kuid)
+{
+       return KUIDT_INIT(from_kuid(mnt_userns, kuid));
+}
+
+static inline kgid_t kgid_from_mnt(struct user_namespace *mnt_userns,
+                                  kgid_t kgid)
+{
+       return KGIDT_INIT(from_kgid(mnt_userns, kgid));
+}
+
+static inline kuid_t fsuid_into_mnt(struct user_namespace *mnt_userns)
+{
+       return kuid_from_mnt(mnt_userns, current_fsuid());
+}
+
+static inline kgid_t fsgid_into_mnt(struct user_namespace *mnt_userns)
+{
+       return kgid_from_mnt(mnt_userns, current_fsgid());
+}
+
 extern struct timespec64 current_time(struct inode *inode);
 
 /*
@@ -1714,28 +1762,48 @@ static inline bool sb_start_intwrite_trylock(struct super_block *sb)
        return __sb_start_write_trylock(sb, SB_FREEZE_FS);
 }
 
-
-extern bool inode_owner_or_capable(const struct inode *inode);
+bool inode_owner_or_capable(struct user_namespace *mnt_userns,
+                           const struct inode *inode);
 
 /*
  * VFS helper functions..
  */
-extern int vfs_create(struct inode *, struct dentry *, umode_t, bool);
-extern int vfs_mkdir(struct inode *, struct dentry *, umode_t);
-extern int vfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
-extern int vfs_symlink(struct inode *, struct dentry *, const char *);
-extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct inode **);
-extern int vfs_rmdir(struct inode *, struct dentry *);
-extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
-extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+int vfs_create(struct user_namespace *, struct inode *,
+              struct dentry *, umode_t, bool);
+int vfs_mkdir(struct user_namespace *, struct inode *,
+             struct dentry *, umode_t);
+int vfs_mknod(struct user_namespace *, struct inode *, struct dentry *,
+              umode_t, dev_t);
+int vfs_symlink(struct user_namespace *, struct inode *,
+               struct dentry *, const char *);
+int vfs_link(struct dentry *, struct user_namespace *, struct inode *,
+            struct dentry *, struct inode **);
+int vfs_rmdir(struct user_namespace *, struct inode *, struct dentry *);
+int vfs_unlink(struct user_namespace *, struct inode *, struct dentry *,
+              struct inode **);
+
+struct renamedata {
+       struct user_namespace *old_mnt_userns;
+       struct inode *old_dir;
+       struct dentry *old_dentry;
+       struct user_namespace *new_mnt_userns;
+       struct inode *new_dir;
+       struct dentry *new_dentry;
+       struct inode **delegated_inode;
+       unsigned int flags;
+} __randomize_layout;
 
-static inline int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+int vfs_rename(struct renamedata *);
+
+static inline int vfs_whiteout(struct user_namespace *mnt_userns,
+                              struct inode *dir, struct dentry *dentry)
 {
-       return vfs_mknod(dir, dentry, S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+       return vfs_mknod(mnt_userns, dir, dentry, S_IFCHR | WHITEOUT_MODE,
+                        WHITEOUT_DEV);
 }
 
-extern struct dentry *vfs_tmpfile(struct dentry *dentry, umode_t mode,
-                                 int open_flag);
+struct dentry *vfs_tmpfile(struct user_namespace *mnt_userns,
+                          struct dentry *dentry, umode_t mode, int open_flag);
 
 int vfs_mkobj(struct dentry *, umode_t,
                int (*f)(struct dentry *, umode_t, void *),
@@ -1757,8 +1825,8 @@ extern long compat_ptr_ioctl(struct file *file, unsigned int cmd,
 /*
  * VFS file helper functions.
  */
-extern void inode_init_owner(struct inode *inode, const struct inode *dir,
-                       umode_t mode);
+void inode_init_owner(struct user_namespace *mnt_userns, struct inode *inode,
+                     const struct inode *dir, umode_t mode);
 extern bool may_open_dev(const struct path *path);
 
 /*
@@ -1862,22 +1930,28 @@ struct file_operations {
 struct inode_operations {
        struct dentry * (*lookup) (struct inode *,struct dentry *, unsigned int);
        const char * (*get_link) (struct dentry *, struct inode *, struct delayed_call *);
-       int (*permission) (struct inode *, int);
+       int (*permission) (struct user_namespace *, struct inode *, int);
        struct posix_acl * (*get_acl)(struct inode *, int);
 
        int (*readlink) (struct dentry *, char __user *,int);
 
-       int (*create) (struct inode *,struct dentry *, umode_t, bool);
+       int (*create) (struct user_namespace *, struct inode *,struct dentry *,
+                      umode_t, bool);
        int (*link) (struct dentry *,struct inode *,struct dentry *);
        int (*unlink) (struct inode *,struct dentry *);
-       int (*symlink) (struct inode *,struct dentry *,const char *);
-       int (*mkdir) (struct inode *,struct dentry *,umode_t);
+       int (*symlink) (struct user_namespace *, struct inode *,struct dentry *,
+                       const char *);
+       int (*mkdir) (struct user_namespace *, struct inode *,struct dentry *,
+                     umode_t);
        int (*rmdir) (struct inode *,struct dentry *);
-       int (*mknod) (struct inode *,struct dentry *,umode_t,dev_t);
-       int (*rename) (struct inode *, struct dentry *,
+       int (*mknod) (struct user_namespace *, struct inode *,struct dentry *,
+                     umode_t,dev_t);
+       int (*rename) (struct user_namespace *, struct inode *, struct dentry *,
                        struct inode *, struct dentry *, unsigned int);
-       int (*setattr) (struct dentry *, struct iattr *);
-       int (*getattr) (const struct path *, struct kstat *, u32, unsigned int);
+       int (*setattr) (struct user_namespace *, struct dentry *,
+                       struct iattr *);
+       int (*getattr) (struct user_namespace *, const struct path *,
+                       struct kstat *, u32, unsigned int);
        ssize_t (*listxattr) (struct dentry *, char *, size_t);
        int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
                      u64 len);
@@ -1885,8 +1959,10 @@ struct inode_operations {
        int (*atomic_open)(struct inode *, struct dentry *,
                           struct file *, unsigned open_flag,
                           umode_t create_mode);
-       int (*tmpfile) (struct inode *, struct dentry *, umode_t);
-       int (*set_acl)(struct inode *, struct posix_acl *, int);
+       int (*tmpfile) (struct user_namespace *, struct inode *,
+                       struct dentry *, umode_t);
+       int (*set_acl)(struct user_namespace *, struct inode *,
+                      struct posix_acl *, int);
 } ____cacheline_aligned;
 
 static inline ssize_t call_read_iter(struct file *file, struct kiocb *kio,
@@ -2035,9 +2111,11 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags
 #define IS_WHITEOUT(inode)     (S_ISCHR(inode->i_mode) && \
                                 (inode)->i_rdev == WHITEOUT_DEV)
 
-static inline bool HAS_UNMAPPED_ID(struct inode *inode)
+static inline bool HAS_UNMAPPED_ID(struct user_namespace *mnt_userns,
+                                  struct inode *inode)
 {
-       return !uid_valid(inode->i_uid) || !gid_valid(inode->i_gid);
+       return !uid_valid(i_uid_into_mnt(mnt_userns, inode)) ||
+              !gid_valid(i_gid_into_mnt(mnt_userns, inode));
 }
 
 static inline enum rw_hint file_write_hint(struct file *file)
@@ -2254,6 +2332,7 @@ struct file_system_type {
 #define FS_HAS_SUBTYPE         4
 #define FS_USERNS_MOUNT                8       /* Can be mounted by userns root */
 #define FS_DISALLOW_NOTIFY_PERM        16      /* Disable fanotify permission events */
+#define FS_ALLOW_IDMAP         32      /* FS has been updated to handle vfs idmappings. */
 #define FS_THP_SUPPORT         8192    /* Remove once all fs converted */
 #define FS_RENAME_DOES_D_MOVE  32768   /* FS will handle d_move() during rename() internally. */
        int (*init_fs_context)(struct fs_context *);
@@ -2540,9 +2619,13 @@ struct filename {
 };
 static_assert(offsetof(struct filename, iname) % sizeof(long) == 0);
 
+static inline struct user_namespace *file_mnt_user_ns(struct file *file)
+{
+       return mnt_user_ns(file->f_path.mnt);
+}
 extern long vfs_truncate(const struct path *, loff_t);
-extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
-                      struct file *filp);
+int do_truncate(struct user_namespace *, struct dentry *, loff_t start,
+               unsigned int time_attrs, struct file *filp);
 extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
                        loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
@@ -2779,10 +2862,22 @@ static inline int bmap(struct inode *inode,  sector_t *block)
 }
 #endif
 
-extern int notify_change(struct dentry *, struct iattr *, struct inode **);
-extern int inode_permission(struct inode *, int);
-extern int generic_permission(struct inode *, int);
-extern int __check_sticky(struct inode *dir, struct inode *inode);
+int notify_change(struct user_namespace *, struct dentry *,
+                 struct iattr *, struct inode **);
+int inode_permission(struct user_namespace *, struct inode *, int);
+int generic_permission(struct user_namespace *, struct inode *, int);
+static inline int file_permission(struct file *file, int mask)
+{
+       return inode_permission(file_mnt_user_ns(file),
+                               file_inode(file), mask);
+}
+static inline int path_permission(const struct path *path, int mask)
+{
+       return inode_permission(mnt_user_ns(path->mnt),
+                               d_inode(path->dentry), mask);
+}
+int __check_sticky(struct user_namespace *mnt_userns, struct inode *dir,
+                  struct inode *inode);
 
 static inline bool execute_ok(struct inode *inode)
 {
@@ -2985,8 +3080,8 @@ extern ssize_t generic_write_checks(struct kiocb *, struct iov_iter *);
 extern int generic_write_check_limits(struct file *file, loff_t pos,
                loff_t *count);
 extern int generic_file_rw_checks(struct file *file_in, struct file *file_out);
-extern ssize_t generic_file_buffered_read(struct kiocb *iocb,
-               struct iov_iter *to, ssize_t already_read);
+ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *to,
+               ssize_t already_read);
 extern ssize_t generic_file_read_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t __generic_file_write_iter(struct kiocb *, struct iov_iter *);
 extern ssize_t generic_file_write_iter(struct kiocb *, struct iov_iter *);
@@ -3113,7 +3208,7 @@ extern int __page_symlink(struct inode *inode, const char *symname, int len,
 extern int page_symlink(struct inode *inode, const char *symname, int len);
 extern const struct inode_operations page_symlink_inode_operations;
 extern void kfree_link(void *);
-extern void generic_fillattr(struct inode *, struct kstat *);
+void generic_fillattr(struct user_namespace *, struct inode *, struct kstat *);
 extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int);
 extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
 void __inode_add_bytes(struct inode *inode, loff_t bytes);
@@ -3163,15 +3258,18 @@ extern int dcache_dir_open(struct inode *, struct file *);
 extern int dcache_dir_close(struct inode *, struct file *);
 extern loff_t dcache_dir_lseek(struct file *, loff_t, int);
 extern int dcache_readdir(struct file *, struct dir_context *);
-extern int simple_setattr(struct dentry *, struct iattr *);
-extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int simple_setattr(struct user_namespace *, struct dentry *,
+                         struct iattr *);
+extern int simple_getattr(struct user_namespace *, const struct path *,
+                         struct kstat *, u32, unsigned int);
 extern int simple_statfs(struct dentry *, struct kstatfs *);
 extern int simple_open(struct inode *inode, struct file *file);
 extern int simple_link(struct dentry *, struct inode *, struct dentry *);
 extern int simple_unlink(struct inode *, struct dentry *);
 extern int simple_rmdir(struct inode *, struct dentry *);
-extern int simple_rename(struct inode *, struct dentry *,
-                        struct inode *, struct dentry *, unsigned int);
+extern int simple_rename(struct user_namespace *, struct inode *,
+                        struct dentry *, struct inode *, struct dentry *,
+                        unsigned int);
 extern void simple_recursive_removal(struct dentry *,
                               void (*callback)(struct dentry *));
 extern int noop_fsync(struct file *, loff_t, loff_t, int);
@@ -3229,9 +3327,10 @@ extern int buffer_migrate_page_norefs(struct address_space *,
 #define buffer_migrate_page_norefs NULL
 #endif
 
-extern int setattr_prepare(struct dentry *, struct iattr *);
+int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *);
 extern int inode_newsize_ok(const struct inode *, loff_t offset);
-extern void setattr_copy(struct inode *inode, const struct iattr *attr);
+void setattr_copy(struct user_namespace *, struct inode *inode,
+                 const struct iattr *attr);
 
 extern int file_update_time(struct file *file);
 
@@ -3395,12 +3494,13 @@ static inline bool is_sxid(umode_t mode)
        return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
 }
 
-static inline int check_sticky(struct inode *dir, struct inode *inode)
+static inline int check_sticky(struct user_namespace *mnt_userns,
+                              struct inode *dir, struct inode *inode)
 {
        if (!(dir->i_mode & S_ISVTX))
                return 0;
 
-       return __check_sticky(dir, inode);
+       return __check_sticky(mnt_userns, dir, inode);
 }
 
 static inline void inode_has_no_xattr(struct inode *inode)
index db24487..63b56ab 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/interrupt.h>
+#include <uapi/linux/fsl_mc.h>
 
 #define FSL_MC_VENDOR_FREESCALE        0x1957
 
@@ -209,8 +210,6 @@ struct fsl_mc_device {
 #define to_fsl_mc_device(_dev) \
        container_of(_dev, struct fsl_mc_device, dev)
 
-#define MC_CMD_NUM_OF_PARAMS   7
-
 struct mc_cmd_header {
        u8 src_id;
        u8 flags_hw;
@@ -220,11 +219,6 @@ struct mc_cmd_header {
        __le16 cmd_id;
 };
 
-struct fsl_mc_command {
-       __le64 header;
-       __le64 params[MC_CMD_NUM_OF_PARAMS];
-};
-
 enum mc_cmd_status {
        MC_CMD_STATUS_OK = 0x0, /* Completed successfully */
        MC_CMD_STATUS_READY = 0x1, /* Ready to be processed */
index 77414e4..ed4e67a 100644 (file)
@@ -11,6 +11,7 @@
 
 #include <linux/types.h>
 #include <linux/list.h>
+#include <linux/err.h>
 
 struct fwnode_operations;
 struct device;
@@ -18,9 +19,13 @@ struct device;
 /*
  * fwnode link flags
  *
- * LINKS_ADDED: The fwnode has already be parsed to add fwnode links.
+ * LINKS_ADDED:        The fwnode has already be parsed to add fwnode links.
+ * NOT_DEVICE: The fwnode will never be populated as a struct device.
+ * INITIALIZED: The hardware corresponding to fwnode has been initialized.
  */
 #define FWNODE_FLAG_LINKS_ADDED                BIT(0)
+#define FWNODE_FLAG_NOT_DEVICE         BIT(1)
+#define FWNODE_FLAG_INITIALIZED                BIT(2)
 
 struct fwnode_handle {
        struct fwnode_handle *secondary;
@@ -166,7 +171,20 @@ static inline void fwnode_init(struct fwnode_handle *fwnode,
        INIT_LIST_HEAD(&fwnode->suppliers);
 }
 
+static inline void fwnode_dev_initialized(struct fwnode_handle *fwnode,
+                                         bool initialized)
+{
+       if (IS_ERR_OR_NULL(fwnode))
+               return;
+
+       if (initialized)
+               fwnode->flags |= FWNODE_FLAG_INITIALIZED;
+       else
+               fwnode->flags &= ~FWNODE_FLAG_INITIALIZED;
+}
+
 extern u32 fw_devlink_get_flags(void);
+extern bool fw_devlink_is_strict(void);
 int fwnode_link_add(struct fwnode_handle *con, struct fwnode_handle *sup);
 void fwnode_links_purge(struct fwnode_handle *fwnode);
 
index 80544d5..8572a14 100644 (file)
@@ -8,6 +8,20 @@
 #include <linux/linkage.h>
 #include <linux/topology.h>
 
+/* The typedef is in types.h but we want the documentation here */
+#if 0
+/**
+ * typedef gfp_t - Memory allocation flags.
+ *
+ * GFP flags are commonly used throughout Linux to indicate how memory
+ * should be allocated.  The GFP acronym stands for get_free_pages(),
+ * the underlying memory allocation function.  Not every GFP flag is
+ * supported by every function which may allocate memory.  Most users
+ * will want to use a plain ``GFP_KERNEL``.
+ */
+typedef unsigned int __bitwise gfp_t;
+#endif
+
 struct vm_area_struct;
 
 /*
@@ -620,6 +634,8 @@ bool gfp_pfmemalloc_allowed(gfp_t gfp_mask);
 extern void pm_restrict_gfp_mask(void);
 extern void pm_restore_gfp_mask(void);
 
+extern gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma);
+
 #ifdef CONFIG_PM_SLEEP
 extern bool pm_suspended_storage(void);
 #else
index ef49307..c73b25b 100644 (file)
@@ -674,6 +674,8 @@ struct acpi_gpio_mapping {
  * get GpioIo type explicitly, this quirk may be used.
  */
 #define ACPI_GPIO_QUIRK_ONLY_GPIOIO            BIT(1)
+/* Use given pin as an absolute GPIO number in the system */
+#define ACPI_GPIO_QUIRK_ABSOLUTE_NUMBER                BIT(2)
 
        unsigned int quirks;
 };
index 46bcef3..763802b 100644 (file)
@@ -150,7 +150,7 @@ int sensor_hub_remove_callback(struct hid_sensor_hub_device *hsdev,
 * @info:       return information about attribute after parsing report
 *
 * Parses report and returns the attribute information such as report id,
-* field index, units and exponet etc.
+* field index, units and exponent etc.
 */
 int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
                        u8 type,
@@ -167,7 +167,7 @@ int sensor_hub_input_get_attribute_info(struct hid_sensor_hub_device *hsdev,
 * @is_signed:   If true then fields < 32 bits will be sign-extended
 *
 * Issues a synchronous or asynchronous read request for an input attribute.
-* Returns data upto 32 bits.
+* Return: data up to 32 bits.
 */
 
 enum sensor_hub_read_flags {
@@ -205,8 +205,9 @@ int sensor_hub_set_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
 * @buffer:     buffer to copy output
 *
 * Used to get a field in feature report. For example this can get polling
-* interval, sensitivity, activate/deactivate state. On success it returns
-* number of bytes copied to buffer. On failure, it returns value < 0.
+* interval, sensitivity, activate/deactivate state.
+* Return: On success, it returns the number of bytes copied to buffer.
+* On failure, it returns value < 0.
 */
 int sensor_hub_get_feature(struct hid_sensor_hub_device *hsdev, u32 report_id,
                           u32 field_index, int buffer_size, void *buffer);
index c39d71e..ef702b3 100644 (file)
@@ -918,7 +918,7 @@ __u32 hid_field_extract(const struct hid_device *hid, __u8 *report,
 /**
  * hid_device_io_start - enable HID input during probe, remove
  *
- * @hid - the device
+ * @hid: the device
  *
  * This should only be called during probe or remove and only be
  * called by the thread calling probe or remove. It will allow
@@ -936,7 +936,7 @@ static inline void hid_device_io_start(struct hid_device *hid) {
 /**
  * hid_device_io_stop - disable HID input during probe, remove
  *
- * @hid - the device
+ * @hid: the device
  *
  * Should only be called after hid_device_io_start. It will prevent
  * incoming packets from going to the driver for the duration of
@@ -1010,6 +1010,13 @@ static inline void hid_map_usage(struct hid_input *hidinput,
 /**
  * hid_map_usage_clear - map usage input bits and clear the input bit
  *
+ * @hidinput: hidinput which we are interested in
+ * @usage: usage to fill in
+ * @bit: pointer to input->{}bit (out parameter)
+ * @max: maximal valid usage->code to consider later (out parameter)
+ * @type: input event type (EV_KEY, EV_REL, ...)
+ * @c: code which corresponds to this usage and type
+ *
  * The same as hid_map_usage, except the @c bit is also cleared in supported
  * bits (@bit).
  */
@@ -1084,7 +1091,7 @@ static inline void hid_hw_request(struct hid_device *hdev,
  * @rtype: HID report type
  * @reqtype: HID_REQ_GET_REPORT or HID_REQ_SET_REPORT
  *
- * @return: count of data transfered, negative if error
+ * Return: count of data transferred, negative if error
  *
  * Same behavior as hid_hw_request, but with raw buffers instead.
  */
@@ -1106,7 +1113,7 @@ static inline int hid_hw_raw_request(struct hid_device *hdev,
  * @buf: raw data to transfer
  * @len: length of buf
  *
- * @return: count of data transfered, negative if error
+ * Return: count of data transferred, negative if error
  */
 static inline int hid_hw_output_report(struct hid_device *hdev, __u8 *buf,
                                        size_t len)
index 1bbe96d..7902c7d 100644 (file)
@@ -127,11 +127,6 @@ static inline unsigned long totalhigh_pages(void)
        return (unsigned long)atomic_long_read(&_totalhigh_pages);
 }
 
-static inline void totalhigh_pages_inc(void)
-{
-       atomic_long_inc(&_totalhigh_pages);
-}
-
 static inline void totalhigh_pages_add(long count)
 {
        atomic_long_add(count, &_totalhigh_pages);
index d2c70d3..44170f3 100644 (file)
@@ -276,4 +276,60 @@ static inline void copy_highpage(struct page *to, struct page *from)
 
 #endif
 
+static inline void memcpy_page(struct page *dst_page, size_t dst_off,
+                              struct page *src_page, size_t src_off,
+                              size_t len)
+{
+       char *dst = kmap_local_page(dst_page);
+       char *src = kmap_local_page(src_page);
+
+       VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
+       memcpy(dst + dst_off, src + src_off, len);
+       kunmap_local(src);
+       kunmap_local(dst);
+}
+
+static inline void memmove_page(struct page *dst_page, size_t dst_off,
+                              struct page *src_page, size_t src_off,
+                              size_t len)
+{
+       char *dst = kmap_local_page(dst_page);
+       char *src = kmap_local_page(src_page);
+
+       VM_BUG_ON(dst_off + len > PAGE_SIZE || src_off + len > PAGE_SIZE);
+       memmove(dst + dst_off, src + src_off, len);
+       kunmap_local(src);
+       kunmap_local(dst);
+}
+
+static inline void memset_page(struct page *page, size_t offset, int val,
+                              size_t len)
+{
+       char *addr = kmap_local_page(page);
+
+       VM_BUG_ON(offset + len > PAGE_SIZE);
+       memset(addr + offset, val, len);
+       kunmap_local(addr);
+}
+
+static inline void memcpy_from_page(char *to, struct page *page,
+                                   size_t offset, size_t len)
+{
+       char *from = kmap_local_page(page);
+
+       VM_BUG_ON(offset + len > PAGE_SIZE);
+       memcpy(to, from + offset, len);
+       kunmap_local(from);
+}
+
+static inline void memcpy_to_page(struct page *page, size_t offset,
+                                 const char *from, size_t len)
+{
+       char *to = kmap_local_page(page);
+
+       VM_BUG_ON(offset + len > PAGE_SIZE);
+       memcpy(to + offset, from, len);
+       kunmap_local(to);
+}
+
 #endif /* _LINUX_HIGHMEM_H */
index ce59a6a..9eb77c8 100644 (file)
@@ -320,7 +320,14 @@ static inline struct host1x_device *to_host1x_device(struct device *dev)
 int host1x_device_init(struct host1x_device *device);
 int host1x_device_exit(struct host1x_device *device);
 
-int host1x_client_register(struct host1x_client *client);
+int __host1x_client_register(struct host1x_client *client,
+                            struct lock_class_key *key);
+#define host1x_client_register(class) \
+       ({ \
+               static struct lock_class_key __key; \
+               __host1x_client_register(class, &__key); \
+       })
+
 int host1x_client_unregister(struct host1x_client *client);
 
 int host1x_client_suspend(struct host1x_client *client);
index 6a19f35..ba973ef 100644 (file)
@@ -78,6 +78,7 @@ static inline vm_fault_t vmf_insert_pfn_pud(struct vm_fault *vmf, pfn_t pfn,
 }
 
 enum transparent_hugepage_flag {
+       TRANSPARENT_HUGEPAGE_NEVER_DAX,
        TRANSPARENT_HUGEPAGE_FLAG,
        TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG,
        TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG,
@@ -123,6 +124,13 @@ extern unsigned long transparent_hugepage_flags;
  */
 static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 {
+
+       /*
+        * If the hardware/firmware marked hugepage support disabled.
+        */
+       if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_NEVER_DAX))
+               return false;
+
        if (vma->vm_flags & VM_NOHUGEPAGE)
                return false;
 
@@ -134,12 +142,7 @@ static inline bool __transparent_hugepage_enabled(struct vm_area_struct *vma)
 
        if (transparent_hugepage_flags & (1 << TRANSPARENT_HUGEPAGE_FLAG))
                return true;
-       /*
-        * For dax vmas, try to always use hugepage mappings. If the kernel does
-        * not support hugepages, fsdax mappings will fallback to PAGE_SIZE
-        * mappings, and device-dax namespaces, that try to guarantee a given
-        * mapping size, will fail to enable
-        */
+
        if (vma_is_dax(vma))
                return true;
 
index b5807f2..cccd1aa 100644 (file)
@@ -37,7 +37,7 @@ struct hugepage_subpool {
        struct hstate *hstate;
        long min_hpages;        /* Minimum huge pages or -1 if no minimum. */
        long rsv_hpages;        /* Pages reserved against global pool to */
-                               /* sasitfy minimum size. */
+                               /* satisfy minimum size. */
 };
 
 struct resv_map {
@@ -139,7 +139,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm, pte_t *dst_pte,
                                unsigned long dst_addr,
                                unsigned long src_addr,
                                struct page **pagep);
-int hugetlb_reserve_pages(struct inode *inode, long from, long to,
+bool hugetlb_reserve_pages(struct inode *inode, long from, long to,
                                                struct vm_area_struct *vma,
                                                vm_flags_t vm_flags);
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
@@ -472,6 +472,84 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
                                        unsigned long flags);
 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
 
+/*
+ * huegtlb page specific state flags.  These flags are located in page.private
+ * of the hugetlb head page.  Functions created via the below macros should be
+ * used to manipulate these flags.
+ *
+ * HPG_restore_reserve - Set when a hugetlb page consumes a reservation at
+ *     allocation time.  Cleared when page is fully instantiated.  Free
+ *     routine checks flag to restore a reservation on error paths.
+ *     Synchronization:  Examined or modified by code that knows it has
+ *     the only reference to page.  i.e. After allocation but before use
+ *     or when the page is being freed.
+ * HPG_migratable  - Set after a newly allocated page is added to the page
+ *     cache and/or page tables.  Indicates the page is a candidate for
+ *     migration.
+ *     Synchronization:  Initially set after new page allocation with no
+ *     locking.  When examined and modified during migration processing
+ *     (isolate, migrate, putback) the hugetlb_lock is held.
+ * HPG_temporary - - Set on a page that is temporarily allocated from the buddy
+ *     allocator.  Typically used for migration target pages when no pages
+ *     are available in the pool.  The hugetlb free page path will
+ *     immediately free pages with this flag set to the buddy allocator.
+ *     Synchronization: Can be set after huge page allocation from buddy when
+ *     code knows it has only reference.  All other examinations and
+ *     modifications require hugetlb_lock.
+ * HPG_freed - Set when page is on the free lists.
+ *     Synchronization: hugetlb_lock held for examination and modification.
+ */
+enum hugetlb_page_flags {
+       HPG_restore_reserve = 0,
+       HPG_migratable,
+       HPG_temporary,
+       HPG_freed,
+       __NR_HPAGEFLAGS,
+};
+
+/*
+ * Macros to create test, set and clear function definitions for
+ * hugetlb specific page flags.
+ */
+#ifdef CONFIG_HUGETLB_PAGE
+#define TESTHPAGEFLAG(uname, flname)                           \
+static inline int HPage##uname(struct page *page)              \
+       { return test_bit(HPG_##flname, &(page->private)); }
+
+#define SETHPAGEFLAG(uname, flname)                            \
+static inline void SetHPage##uname(struct page *page)          \
+       { set_bit(HPG_##flname, &(page->private)); }
+
+#define CLEARHPAGEFLAG(uname, flname)                          \
+static inline void ClearHPage##uname(struct page *page)                \
+       { clear_bit(HPG_##flname, &(page->private)); }
+#else
+#define TESTHPAGEFLAG(uname, flname)                           \
+static inline int HPage##uname(struct page *page)              \
+       { return 0; }
+
+#define SETHPAGEFLAG(uname, flname)                            \
+static inline void SetHPage##uname(struct page *page)          \
+       { }
+
+#define CLEARHPAGEFLAG(uname, flname)                          \
+static inline void ClearHPage##uname(struct page *page)                \
+       { }
+#endif
+
+#define HPAGEFLAG(uname, flname)                               \
+       TESTHPAGEFLAG(uname, flname)                            \
+       SETHPAGEFLAG(uname, flname)                             \
+       CLEARHPAGEFLAG(uname, flname)                           \
+
+/*
+ * Create functions associated with hugetlb page flags
+ */
+HPAGEFLAG(RestoreReserve, restore_reserve)
+HPAGEFLAG(Migratable, migratable)
+HPAGEFLAG(Temporary, temporary)
+HPAGEFLAG(Freed, freed)
+
 #ifdef CONFIG_HUGETLB_PAGE
 
 #define HSTATE_NAME_LEN 32
@@ -531,6 +609,20 @@ extern unsigned int default_hstate_idx;
 
 #define default_hstate (hstates[default_hstate_idx])
 
+/*
+ * hugetlb page subpool pointer located in hpage[1].private
+ */
+static inline struct hugepage_subpool *hugetlb_page_subpool(struct page *hpage)
+{
+       return (struct hugepage_subpool *)(hpage+1)->private;
+}
+
+static inline void hugetlb_set_page_subpool(struct page *hpage,
+                                       struct hugepage_subpool *subpool)
+{
+       set_page_private(hpage+1, (unsigned long)subpool);
+}
+
 static inline struct hstate *hstate_file(struct file *f)
 {
        return hstate_inode(file_inode(f));
@@ -770,8 +862,6 @@ static inline void huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
 }
 #endif
 
-void set_page_huge_active(struct page *page);
-
 #else  /* CONFIG_HUGETLB_PAGE */
 struct hstate {};
 
index 2ad6e92..0bff345 100644 (file)
@@ -113,6 +113,11 @@ static inline bool hugetlb_cgroup_disabled(void)
        return !cgroup_subsys_enabled(hugetlb_cgrp_subsys);
 }
 
+static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
+{
+       css_put(&h_cg->css);
+}
+
 extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
                                        struct hugetlb_cgroup **ptr);
 extern int hugetlb_cgroup_charge_cgroup_rsvd(int idx, unsigned long nr_pages,
@@ -138,7 +143,8 @@ extern void hugetlb_cgroup_uncharge_counter(struct resv_map *resv,
 
 extern void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
                                                struct file_region *rg,
-                                               unsigned long nr_pages);
+                                               unsigned long nr_pages,
+                                               bool region_del);
 
 extern void hugetlb_cgroup_file_init(void) __init;
 extern void hugetlb_cgroup_migrate(struct page *oldhpage,
@@ -147,7 +153,8 @@ extern void hugetlb_cgroup_migrate(struct page *oldhpage,
 #else
 static inline void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
                                                       struct file_region *rg,
-                                                      unsigned long nr_pages)
+                                                      unsigned long nr_pages,
+                                                      bool region_del)
 {
 }
 
@@ -185,6 +192,10 @@ static inline bool hugetlb_cgroup_disabled(void)
        return true;
 }
 
+static inline void hugetlb_cgroup_put_rsvd_cgroup(struct hugetlb_cgroup *h_cg)
+{
+}
+
 static inline int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
                                               struct hugetlb_cgroup **ptr)
 {
index 452d897..9055cb3 100644 (file)
@@ -3,6 +3,7 @@
 #define _LINUX_ICMPV6_H
 
 #include <linux/skbuff.h>
+#include <linux/ipv6.h>
 #include <uapi/linux/icmpv6.h>
 
 static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
@@ -15,13 +16,16 @@ static inline struct icmp6hdr *icmp6_hdr(const struct sk_buff *skb)
 #if IS_ENABLED(CONFIG_IPV6)
 
 typedef void ip6_icmp_send_t(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-                            const struct in6_addr *force_saddr);
+                            const struct in6_addr *force_saddr,
+                            const struct inet6_skb_parm *parm);
 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-               const struct in6_addr *force_saddr);
+               const struct in6_addr *force_saddr,
+               const struct inet6_skb_parm *parm);
 #if IS_BUILTIN(CONFIG_IPV6)
-static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+static inline void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+                                const struct inet6_skb_parm *parm)
 {
-       icmp6_send(skb, type, code, info, NULL);
+       icmp6_send(skb, type, code, info, NULL, parm);
 }
 static inline int inet6_register_icmp_sender(ip6_icmp_send_t *fn)
 {
@@ -34,18 +38,28 @@ static inline int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
        return 0;
 }
 #else
-extern void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info);
+extern void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+                         const struct inet6_skb_parm *parm);
 extern int inet6_register_icmp_sender(ip6_icmp_send_t *fn);
 extern int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn);
 #endif
 
+static inline void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+{
+       __icmpv6_send(skb, type, code, info, IP6CB(skb));
+}
+
 int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
                               unsigned int data_len);
 
 #if IS_ENABLED(CONFIG_NF_NAT)
 void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info);
 #else
-#define icmpv6_ndo_send icmpv6_send
+static inline void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
+{
+       struct inet6_skb_parm parm = { 0 };
+       __icmpv6_send(skb_in, type, code, info, &parm);
+}
 #endif
 
 #else
index 96556c6..10c94a3 100644 (file)
@@ -43,13 +43,14 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
        if (likely(success)) {
                struct vlan_pcpu_stats *pcpu_stats;
 
-               pcpu_stats = this_cpu_ptr(vlan->pcpu_stats);
+               pcpu_stats = get_cpu_ptr(vlan->pcpu_stats);
                u64_stats_update_begin(&pcpu_stats->syncp);
                pcpu_stats->rx_packets++;
                pcpu_stats->rx_bytes += len;
                if (multicast)
                        pcpu_stats->rx_multicast++;
                u64_stats_update_end(&pcpu_stats->syncp);
+               put_cpu_ptr(vlan->pcpu_stats);
        } else {
                this_cpu_inc(vlan->pcpu_stats->rx_errors);
        }
index 2ac834b..61d5723 100644 (file)
@@ -16,7 +16,8 @@ struct linux_binprm;
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
 extern int ima_file_check(struct file *file, int mask);
-extern void ima_post_create_tmpfile(struct inode *inode);
+extern void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+                                   struct inode *inode);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot);
@@ -27,7 +28,8 @@ extern int ima_read_file(struct file *file, enum kernel_read_file_id id,
                         bool contents);
 extern int ima_post_read_file(struct file *file, void *buf, loff_t size,
                              enum kernel_read_file_id id);
-extern void ima_post_path_mknod(struct dentry *dentry);
+extern void ima_post_path_mknod(struct user_namespace *mnt_userns,
+                               struct dentry *dentry);
 extern int ima_file_hash(struct file *file, char *buf, size_t buf_size);
 extern int ima_inode_hash(struct inode *inode, char *buf, size_t buf_size);
 extern void ima_kexec_cmdline(int kernel_fd, const void *buf, int size);
@@ -72,7 +74,8 @@ static inline int ima_file_check(struct file *file, int mask)
        return 0;
 }
 
-static inline void ima_post_create_tmpfile(struct inode *inode)
+static inline void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+                                          struct inode *inode)
 {
 }
 
@@ -116,7 +119,8 @@ static inline int ima_post_read_file(struct file *file, void *buf, loff_t size,
        return 0;
 }
 
-static inline void ima_post_path_mknod(struct dentry *dentry)
+static inline void ima_post_path_mknod(struct user_namespace *mnt_userns,
+                                      struct dentry *dentry)
 {
        return;
 }
@@ -163,7 +167,8 @@ static inline void ima_post_key_create_or_update(struct key *keyring,
 
 #ifdef CONFIG_IMA_APPRAISE
 extern bool is_ima_appraise_enabled(void);
-extern void ima_inode_post_setattr(struct dentry *dentry);
+extern void ima_inode_post_setattr(struct user_namespace *mnt_userns,
+                                  struct dentry *dentry);
 extern int ima_inode_setxattr(struct dentry *dentry, const char *xattr_name,
                       const void *xattr_value, size_t xattr_value_len);
 extern int ima_inode_removexattr(struct dentry *dentry, const char *xattr_name);
@@ -173,7 +178,8 @@ static inline bool is_ima_appraise_enabled(void)
        return 0;
 }
 
-static inline void ima_inode_post_setattr(struct dentry *dentry)
+static inline void ima_inode_post_setattr(struct user_namespace *mnt_userns,
+                                         struct dentry *dentry)
 {
        return;
 }
index e668832..31f54de 100644 (file)
@@ -184,19 +184,80 @@ extern bool initcall_debug;
  * as KEEP() in the linker script.
  */
 
+/* Format: <modname>__<counter>_<line>_<fn> */
+#define __initcall_id(fn)                                      \
+       __PASTE(__KBUILD_MODNAME,                               \
+       __PASTE(__,                                             \
+       __PASTE(__COUNTER__,                                    \
+       __PASTE(_,                                              \
+       __PASTE(__LINE__,                                       \
+       __PASTE(_, fn))))))
+
+/* Format: __<prefix>__<iid><id> */
+#define __initcall_name(prefix, __iid, id)                     \
+       __PASTE(__,                                             \
+       __PASTE(prefix,                                         \
+       __PASTE(__,                                             \
+       __PASTE(__iid, id))))
+
+#ifdef CONFIG_LTO_CLANG
+/*
+ * With LTO, the compiler doesn't necessarily obey link order for
+ * initcalls. In order to preserve the correct order, we add each
+ * variable into its own section and generate a linker script (in
+ * scripts/link-vmlinux.sh) to specify the order of the sections.
+ */
+#define __initcall_section(__sec, __iid)                       \
+       #__sec ".init.." #__iid
+
+/*
+ * With LTO, the compiler can rename static functions to avoid
+ * global naming collisions. We use a global stub function for
+ * initcalls to create a stable symbol name whose address can be
+ * taken in inline assembly when PREL32 relocations are used.
+ */
+#define __initcall_stub(fn, __iid, id)                         \
+       __initcall_name(initstub, __iid, id)
+
+#define __define_initcall_stub(__stub, fn)                     \
+       int __init __stub(void);                                \
+       int __init __stub(void)                                 \
+       {                                                       \
+               return fn();                                    \
+       }                                                       \
+       __ADDRESSABLE(__stub)
+#else
+#define __initcall_section(__sec, __iid)                       \
+       #__sec ".init"
+
+#define __initcall_stub(fn, __iid, id) fn
+
+#define __define_initcall_stub(__stub, fn)                     \
+       __ADDRESSABLE(fn)
+#endif
+
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define ___define_initcall(fn, id, __sec)                      \
-       __ADDRESSABLE(fn)                                       \
-       asm(".section   \"" #__sec ".init\", \"a\"      \n"     \
-       "__initcall_" #fn #id ":                        \n"     \
-           ".long      " #fn " - .                     \n"     \
+#define ____define_initcall(fn, __stub, __name, __sec)         \
+       __define_initcall_stub(__stub, fn)                      \
+       asm(".section   \"" __sec "\", \"a\"            \n"     \
+           __stringify(__name) ":                      \n"     \
+           ".long      " __stringify(__stub) " - .     \n"     \
            ".previous                                  \n");
 #else
-#define ___define_initcall(fn, id, __sec) \
-       static initcall_t __initcall_##fn##id __used \
-               __attribute__((__section__(#__sec ".init"))) = fn;
+#define ____define_initcall(fn, __unused, __name, __sec)       \
+       static initcall_t __name __used                         \
+               __attribute__((__section__(__sec))) = fn;
 #endif
 
+#define __unique_initcall(fn, id, __sec, __iid)                        \
+       ____define_initcall(fn,                                 \
+               __initcall_stub(fn, __iid, id),                 \
+               __initcall_name(initcall, __iid, id),           \
+               __initcall_section(__sec, __iid))
+
+#define ___define_initcall(fn, id, __sec)                      \
+       __unique_initcall(fn, id, __sec, __initcall_id(fn))
+
 #define __define_initcall(fn, id) ___define_initcall(fn, id, .initcall##id)
 
 /*
@@ -236,7 +297,7 @@ extern bool initcall_debug;
 #define __exitcall(fn)                                         \
        static exitcall_t __exitcall_##fn __exit_call = fn
 
-#define console_initcall(fn)   ___define_initcall(fn,, .con_initcall)
+#define console_initcall(fn)   ___define_initcall(fn, con, .con_initcall)
 
 struct obs_kernel_param {
        const char *str;
@@ -277,14 +338,14 @@ struct obs_kernel_param {
                var = 1;                                                \
                return 0;                                               \
        }                                                               \
-       __setup_param(str_on, parse_##var##_on, parse_##var##_on, 1);   \
+       early_param(str_on, parse_##var##_on);                          \
                                                                        \
        static int __init parse_##var##_off(char *arg)                  \
        {                                                               \
                var = 0;                                                \
                return 0;                                               \
        }                                                               \
-       __setup_param(str_off, parse_##var##_off, parse_##var##_off, 1)
+       early_param(str_off, parse_##var##_off)
 
 /* Relies on boot_command_line being set */
 void __init parse_early_param(void);
index 8db6f8c..85c1571 100644 (file)
@@ -1,5 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 
+#ifndef __LINUX_INITRD_H
+#define __LINUX_INITRD_H
+
 #define INITRD_MINOR 250 /* shouldn't collide with /dev/ram* too soon ... */
 
 /* starting block # of image */
@@ -15,6 +18,12 @@ extern int initrd_below_start_ok;
 extern unsigned long initrd_start, initrd_end;
 extern void free_initrd_mem(unsigned long, unsigned long);
 
+#ifdef CONFIG_BLK_DEV_INITRD
+extern void __init reserve_initrd_mem(void);
+#else
+static inline void __init reserve_initrd_mem(void) {}
+#endif
+
 extern phys_addr_t phys_initrd_start;
 extern unsigned long phys_initrd_size;
 
@@ -24,3 +33,5 @@ extern char __initramfs_start[];
 extern unsigned long __initramfs_size;
 
 void console_on_rootfs(void);
+
+#endif /* __LINUX_INITRD_H */
diff --git a/include/linux/intel-pti.h b/include/linux/intel-pti.h
deleted file mode 100644 (file)
index fcd841a..0000000
+++ /dev/null
@@ -1,35 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- *  Copyright (C) Intel 2011
- *
- * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
- *
- * The PTI (Parallel Trace Interface) driver directs trace data routed from
- * various parts in the system out through the Intel Penwell PTI port and
- * out of the mobile device for analysis with a debugging tool
- * (Lauterbach, Fido). This is part of a solution for the MIPI P1149.7,
- * compact JTAG, standard.
- *
- * This header file will allow other parts of the OS to use the
- * interface to write out it's contents for debugging a mobile system.
- */
-
-#ifndef LINUX_INTEL_PTI_H_
-#define LINUX_INTEL_PTI_H_
-
-/* offset for last dword of any PTI message. Part of MIPI P1149.7 */
-#define PTI_LASTDWORD_DTS      0x30
-
-/* basic structure used as a write address to the PTI HW */
-struct pti_masterchannel {
-       u8 master;
-       u8 channel;
-};
-
-/* the following functions are defined in misc/pti.c */
-void pti_writedata(struct pti_masterchannel *mc, u8 *buf, int count);
-struct pti_masterchannel *pti_request_masterchannel(u8 type,
-                                                   const char *thread_name);
-void pti_release_masterchannel(struct pti_masterchannel *mc);
-
-#endif /* LINUX_INTEL_PTI_H_ */
index bb8ff90..967e257 100644 (file)
@@ -569,15 +569,6 @@ struct softirq_action
 asmlinkage void do_softirq(void);
 asmlinkage void __do_softirq(void);
 
-#ifdef __ARCH_HAS_DO_SOFTIRQ
-void do_softirq_own_stack(void);
-#else
-static inline void do_softirq_own_stack(void)
-{
-       __do_softirq();
-}
-#endif
-
 extern void open_softirq(int nr, void (*action)(struct softirq_action *));
 extern void softirq_init(void);
 extern void __raise_softirq_irqoff(unsigned int nr);
index 2eb6d19..79cde99 100644 (file)
@@ -5,49 +5,6 @@
 #include <linux/sched.h>
 #include <linux/xarray.h>
 
-struct io_identity {
-       struct files_struct             *files;
-       struct mm_struct                *mm;
-#ifdef CONFIG_BLK_CGROUP
-       struct cgroup_subsys_state      *blkcg_css;
-#endif
-       const struct cred               *creds;
-       struct nsproxy                  *nsproxy;
-       struct fs_struct                *fs;
-       unsigned long                   fsize;
-#ifdef CONFIG_AUDIT
-       kuid_t                          loginuid;
-       unsigned int                    sessionid;
-#endif
-       refcount_t                      count;
-};
-
-struct io_wq_work_node {
-       struct io_wq_work_node *next;
-};
-
-struct io_wq_work_list {
-       struct io_wq_work_node *first;
-       struct io_wq_work_node *last;
-};
-
-struct io_uring_task {
-       /* submission side */
-       struct xarray           xa;
-       struct wait_queue_head  wait;
-       struct file             *last;
-       struct percpu_counter   inflight;
-       struct io_identity      __identity;
-       struct io_identity      *identity;
-       atomic_t                in_idle;
-       bool                    sqpoll;
-
-       spinlock_t              task_lock;
-       struct io_wq_work_list  task_list;
-       unsigned long           task_state;
-       struct callback_head    task_work;
-};
-
 #if defined(CONFIG_IO_URING)
 struct sock *io_uring_get_socket(struct file *file);
 void __io_uring_task_cancel(void);
@@ -56,12 +13,12 @@ void __io_uring_free(struct task_struct *tsk);
 
 static inline void io_uring_task_cancel(void)
 {
-       if (current->io_uring && !xa_empty(&current->io_uring->xa))
+       if (current->io_uring)
                __io_uring_task_cancel();
 }
 static inline void io_uring_files_cancel(struct files_struct *files)
 {
-       if (current->io_uring && !xa_empty(&current->io_uring->xa))
+       if (current->io_uring)
                __io_uring_files_cancel(files);
 }
 static inline void io_uring_free(struct task_struct *tsk)
index 9d1f29f..70b2ad3 100644 (file)
@@ -85,7 +85,6 @@ struct ipv6_params {
        __s32 autoconf;
 };
 extern struct ipv6_params ipv6_defaults;
-#include <linux/icmpv6.h>
 #include <linux/tcp.h>
 #include <linux/udp.h>
 
index 42d1968..33cacc8 100644 (file)
@@ -150,7 +150,6 @@ struct irq_domain_chip_generic;
  *      setting up one or more generic chips for interrupt controllers
  *      drivers using the generic chip library which uses this pointer.
  * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
- * @debugfs_file: dentry for the domain debugfs file
  *
  * Revmap data, used internally by irq_domain
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
@@ -174,9 +173,6 @@ struct irq_domain {
 #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY
        struct irq_domain *parent;
 #endif
-#ifdef CONFIG_GENERIC_IRQ_DEBUGFS
-       struct dentry           *debugfs_file;
-#endif
 
        /* reverse map data. The linear map gets appended to the irq_domain */
        irq_hw_number_t hwirq_max;
index 481273f..465060a 100644 (file)
@@ -71,15 +71,14 @@ static inline void *dereference_symbol_descriptor(void *ptr)
        return ptr;
 }
 
-#ifdef CONFIG_KALLSYMS
-/* Lookup the address for a symbol. Returns 0 if not found. */
-unsigned long kallsyms_lookup_name(const char *name);
-
-/* Call a function on each kallsyms symbol in the core kernel */
 int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
                                      unsigned long),
                            void *data);
 
+#ifdef CONFIG_KALLSYMS
+/* Lookup the address for a symbol. Returns 0 if not found. */
+unsigned long kallsyms_lookup_name(const char *name);
+
 extern int kallsyms_lookup_size_offset(unsigned long addr,
                                  unsigned long *symbolsize,
                                  unsigned long *offset);
@@ -108,14 +107,6 @@ static inline unsigned long kallsyms_lookup_name(const char *name)
        return 0;
 }
 
-static inline int kallsyms_on_each_symbol(int (*fn)(void *, const char *,
-                                                   struct module *,
-                                                   unsigned long),
-                                         void *data)
-{
-       return 0;
-}
-
 static inline int kallsyms_lookup_size_offset(unsigned long addr,
                                              unsigned long *symbolsize,
                                              unsigned long *offset)
index ca5e89f..3d6d22a 100644 (file)
@@ -5,6 +5,12 @@
 #include <linux/types.h>
 
 /*
+ * The annotations present in this file are only relevant for the software
+ * KASAN modes that rely on compiler instrumentation, and will be optimized
+ * away for the hardware tag-based KASAN mode. Use kasan_check_byte() instead.
+ */
+
+/*
  * __kasan_check_*: Always available when KASAN is enabled. This may be used
  * even in compilation units that selectively disable KASAN, but must use KASAN
  * to validate access to an address.   Never use these in header files!
index 0aea9e2..b91732b 100644 (file)
@@ -83,6 +83,7 @@ static inline void kasan_disable_current(void) {}
 struct kasan_cache {
        int alloc_meta_offset;
        int free_meta_offset;
+       bool is_kmalloc;
 };
 
 #ifdef CONFIG_KASAN_HW_TAGS
@@ -143,6 +144,13 @@ static __always_inline void kasan_cache_create(struct kmem_cache *cache,
                __kasan_cache_create(cache, size, flags);
 }
 
+void __kasan_cache_create_kmalloc(struct kmem_cache *cache);
+static __always_inline void kasan_cache_create_kmalloc(struct kmem_cache *cache)
+{
+       if (kasan_enabled())
+               __kasan_cache_create_kmalloc(cache);
+}
+
 size_t __kasan_metadata_size(struct kmem_cache *cache);
 static __always_inline size_t kasan_metadata_size(struct kmem_cache *cache)
 {
@@ -185,19 +193,25 @@ static __always_inline void * __must_check kasan_init_slab_obj(
 }
 
 bool __kasan_slab_free(struct kmem_cache *s, void *object, unsigned long ip);
-static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object,
-                                               unsigned long ip)
+static __always_inline bool kasan_slab_free(struct kmem_cache *s, void *object)
 {
        if (kasan_enabled())
-               return __kasan_slab_free(s, object, ip);
+               return __kasan_slab_free(s, object, _RET_IP_);
        return false;
 }
 
+void __kasan_kfree_large(void *ptr, unsigned long ip);
+static __always_inline void kasan_kfree_large(void *ptr)
+{
+       if (kasan_enabled())
+               __kasan_kfree_large(ptr, _RET_IP_);
+}
+
 void __kasan_slab_free_mempool(void *ptr, unsigned long ip);
-static __always_inline void kasan_slab_free_mempool(void *ptr, unsigned long ip)
+static __always_inline void kasan_slab_free_mempool(void *ptr)
 {
        if (kasan_enabled())
-               __kasan_slab_free_mempool(ptr, ip);
+               __kasan_slab_free_mempool(ptr, _RET_IP_);
 }
 
 void * __must_check __kasan_slab_alloc(struct kmem_cache *s,
@@ -240,13 +254,19 @@ static __always_inline void * __must_check kasan_krealloc(const void *object,
        return (void *)object;
 }
 
-void __kasan_kfree_large(void *ptr, unsigned long ip);
-static __always_inline void kasan_kfree_large(void *ptr, unsigned long ip)
+/*
+ * Unlike kasan_check_read/write(), kasan_check_byte() is performed even for
+ * the hardware tag-based mode that doesn't rely on compiler instrumentation.
+ */
+bool __kasan_check_byte(const void *addr, unsigned long ip);
+static __always_inline bool kasan_check_byte(const void *addr)
 {
        if (kasan_enabled())
-               __kasan_kfree_large(ptr, ip);
+               return __kasan_check_byte(addr, _RET_IP_);
+       return true;
 }
 
+
 bool kasan_save_enable_multi_shot(void);
 void kasan_restore_multi_shot(bool enabled);
 
@@ -266,6 +286,7 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {}
 static inline void kasan_cache_create(struct kmem_cache *cache,
                                      unsigned int *size,
                                      slab_flags_t *flags) {}
+static inline void kasan_cache_create_kmalloc(struct kmem_cache *cache) {}
 static inline size_t kasan_metadata_size(struct kmem_cache *cache) { return 0; }
 static inline void kasan_poison_slab(struct page *page) {}
 static inline void kasan_unpoison_object_data(struct kmem_cache *cache,
@@ -277,12 +298,12 @@ static inline void *kasan_init_slab_obj(struct kmem_cache *cache,
 {
        return (void *)object;
 }
-static inline bool kasan_slab_free(struct kmem_cache *s, void *object,
-                                  unsigned long ip)
+static inline bool kasan_slab_free(struct kmem_cache *s, void *object)
 {
        return false;
 }
-static inline void kasan_slab_free_mempool(void *ptr, unsigned long ip) {}
+static inline void kasan_kfree_large(void *ptr) {}
+static inline void kasan_slab_free_mempool(void *ptr) {}
 static inline void *kasan_slab_alloc(struct kmem_cache *s, void *object,
                                   gfp_t flags)
 {
@@ -302,7 +323,10 @@ static inline void *kasan_krealloc(const void *object, size_t new_size,
 {
        return (void *)object;
 }
-static inline void kasan_kfree_large(void *ptr, unsigned long ip) {}
+static inline bool kasan_check_byte(const void *address)
+{
+       return true;
+}
 
 #endif /* CONFIG_KASAN */
 
index e78e17a..24a59cb 100644 (file)
@@ -2,8 +2,6 @@
 #ifndef __LINUX_KCONFIG_H
 #define __LINUX_KCONFIG_H
 
-/* CONFIG_CC_VERSION_TEXT (Do not delete this comment. See help in Kconfig) */
-
 #include <generated/autoconf.h>
 
 #ifdef CONFIG_CPU_BIG_ENDIAN
index 5f61389..8a7aa1d 100644 (file)
@@ -314,6 +314,8 @@ extern void machine_kexec_cleanup(struct kimage *image);
 extern int kernel_kexec(void);
 extern struct page *kimage_alloc_control_pages(struct kimage *image,
                                                unsigned int order);
+int machine_kexec_post_load(struct kimage *image);
+
 extern void __crash_kexec(struct pt_regs *);
 extern void crash_kexec(struct pt_regs *);
 int kexec_should_crash(struct task_struct *);
index 0f2e24f..7febc48 100644 (file)
@@ -289,6 +289,7 @@ extern struct key *key_alloc(struct key_type *type,
 #define KEY_ALLOC_BUILT_IN             0x0004  /* Key is built into kernel */
 #define KEY_ALLOC_BYPASS_RESTRICTION   0x0008  /* Override the check on restricted keyrings */
 #define KEY_ALLOC_UID_KEYRING          0x0010  /* allocating a user or user session keyring */
+#define KEY_ALLOC_SET_KEEP             0x0020  /* Set the KEEP flag on the key/keyring */
 
 extern void key_revoke(struct key *key);
 extern void key_invalidate(struct key *key);
@@ -360,7 +361,7 @@ static inline struct key *request_key(struct key_type *type,
  * completion of keys undergoing construction with a non-interruptible wait.
  */
 #define request_key_net(type, description, net, callout_info) \
-       request_key_tag(type, description, net->key_domain, callout_info);
+       request_key_tag(type, description, net->key_domain, callout_info)
 
 /**
  * request_key_net_rcu - Request a key for a net namespace under RCU conditions
@@ -372,7 +373,7 @@ static inline struct key *request_key(struct key_type *type,
  * network namespace are used.
  */
 #define request_key_net_rcu(type, description, net) \
-       request_key_rcu(type, description, net->key_domain);
+       request_key_rcu(type, description, net->key_domain)
 #endif /* CONFIG_NET */
 
 extern int wait_for_key_construction(struct key *key, bool intr);
diff --git a/include/linux/kfence.h b/include/linux/kfence.h
new file mode 100644 (file)
index 0000000..a70d1ea
--- /dev/null
@@ -0,0 +1,222 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel Electric-Fence (KFENCE). Public interface for allocator and fault
+ * handler integration. For more info see Documentation/dev-tools/kfence.rst.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef _LINUX_KFENCE_H
+#define _LINUX_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/types.h>
+
+#ifdef CONFIG_KFENCE
+
+/*
+ * We allocate an even number of pages, as it simplifies calculations to map
+ * address to metadata indices; effectively, the very first page serves as an
+ * extended guard page, but otherwise has no special purpose.
+ */
+#define KFENCE_POOL_SIZE ((CONFIG_KFENCE_NUM_OBJECTS + 1) * 2 * PAGE_SIZE)
+extern char *__kfence_pool;
+
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+#include <linux/static_key.h>
+DECLARE_STATIC_KEY_FALSE(kfence_allocation_key);
+#else
+#include <linux/atomic.h>
+extern atomic_t kfence_allocation_gate;
+#endif
+
+/**
+ * is_kfence_address() - check if an address belongs to KFENCE pool
+ * @addr: address to check
+ *
+ * Return: true or false depending on whether the address is within the KFENCE
+ * object range.
+ *
+ * KFENCE objects live in a separate page range and are not to be intermixed
+ * with regular heap objects (e.g. KFENCE objects must never be added to the
+ * allocator freelists). Failing to do so may and will result in heap
+ * corruptions, therefore is_kfence_address() must be used to check whether
+ * an object requires specific handling.
+ *
+ * Note: This function may be used in fast-paths, and is performance critical.
+ * Future changes should take this into account; for instance, we want to avoid
+ * introducing another load and therefore need to keep KFENCE_POOL_SIZE a
+ * constant (until immediate patching support is added to the kernel).
+ */
+static __always_inline bool is_kfence_address(const void *addr)
+{
+       /*
+        * The non-NULL check is required in case the __kfence_pool pointer was
+        * never initialized; keep it in the slow-path after the range-check.
+        */
+       return unlikely((unsigned long)((char *)addr - __kfence_pool) < KFENCE_POOL_SIZE && addr);
+}
+
+/**
+ * kfence_alloc_pool() - allocate the KFENCE pool via memblock
+ */
+void __init kfence_alloc_pool(void);
+
+/**
+ * kfence_init() - perform KFENCE initialization at boot time
+ *
+ * Requires that kfence_alloc_pool() was called before. This sets up the
+ * allocation gate timer, and requires that workqueues are available.
+ */
+void __init kfence_init(void);
+
+/**
+ * kfence_shutdown_cache() - handle shutdown_cache() for KFENCE objects
+ * @s: cache being shut down
+ *
+ * Before shutting down a cache, one must ensure there are no remaining objects
+ * allocated from it. Because KFENCE objects are not referenced from the cache
+ * directly, we need to check them here.
+ *
+ * Note that shutdown_cache() is internal to SL*B, and kmem_cache_destroy() does
+ * not return if allocated objects still exist: it prints an error message and
+ * simply aborts destruction of a cache, leaking memory.
+ *
+ * If the only such objects are KFENCE objects, we will not leak the entire
+ * cache, but instead try to provide more useful debug info by making allocated
+ * objects "zombie allocations". Objects may then still be used or freed (which
+ * is handled gracefully), but usage will result in showing KFENCE error reports
+ * which include stack traces to the user of the object, the original allocation
+ * site, and caller to shutdown_cache().
+ */
+void kfence_shutdown_cache(struct kmem_cache *s);
+
+/*
+ * Allocate a KFENCE object. Allocators must not call this function directly,
+ * use kfence_alloc() instead.
+ */
+void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags);
+
+/**
+ * kfence_alloc() - allocate a KFENCE object with a low probability
+ * @s:     struct kmem_cache with object requirements
+ * @size:  exact size of the object to allocate (can be less than @s->size
+ *         e.g. for kmalloc caches)
+ * @flags: GFP flags
+ *
+ * Return:
+ * * NULL     - must proceed with allocating as usual,
+ * * non-NULL - pointer to a KFENCE object.
+ *
+ * kfence_alloc() should be inserted into the heap allocation fast path,
+ * allowing it to transparently return KFENCE-allocated objects with a low
+ * probability using a static branch (the probability is controlled by the
+ * kfence.sample_interval boot parameter).
+ */
+static __always_inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+{
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+       if (static_branch_unlikely(&kfence_allocation_key))
+#else
+       if (unlikely(!atomic_read(&kfence_allocation_gate)))
+#endif
+               return __kfence_alloc(s, size, flags);
+       return NULL;
+}
+
+/**
+ * kfence_ksize() - get actual amount of memory allocated for a KFENCE object
+ * @addr: pointer to a heap object
+ *
+ * Return:
+ * * 0     - not a KFENCE object, must call __ksize() instead,
+ * * non-0 - this many bytes can be accessed without causing a memory error.
+ *
+ * kfence_ksize() returns the number of bytes requested for a KFENCE object at
+ * allocation time. This number may be less than the object size of the
+ * corresponding struct kmem_cache.
+ */
+size_t kfence_ksize(const void *addr);
+
+/**
+ * kfence_object_start() - find the beginning of a KFENCE object
+ * @addr: address within a KFENCE-allocated object
+ *
+ * Return: address of the beginning of the object.
+ *
+ * SL[AU]B-allocated objects are laid out within a page one by one, so it is
+ * easy to calculate the beginning of an object given a pointer inside it and
+ * the object size. The same is not true for KFENCE, which places a single
+ * object at either end of the page. This helper function is used to find the
+ * beginning of a KFENCE-allocated object.
+ */
+void *kfence_object_start(const void *addr);
+
+/**
+ * __kfence_free() - release a KFENCE heap object to KFENCE pool
+ * @addr: object to be freed
+ *
+ * Requires: is_kfence_address(addr)
+ *
+ * Release a KFENCE object and mark it as freed.
+ */
+void __kfence_free(void *addr);
+
+/**
+ * kfence_free() - try to release an arbitrary heap object to KFENCE pool
+ * @addr: object to be freed
+ *
+ * Return:
+ * * false - object doesn't belong to KFENCE pool and was ignored,
+ * * true  - object was released to KFENCE pool.
+ *
+ * Release a KFENCE object and mark it as freed. May be called on any object,
+ * even non-KFENCE objects, to simplify integration of the hooks into the
+ * allocator's free codepath. The allocator must check the return value to
+ * determine if it was a KFENCE object or not.
+ */
+static __always_inline __must_check bool kfence_free(void *addr)
+{
+       if (!is_kfence_address(addr))
+               return false;
+       __kfence_free(addr);
+       return true;
+}
+
+/**
+ * kfence_handle_page_fault() - perform page fault handling for KFENCE pages
+ * @addr: faulting address
+ * @is_write: is access a write
+ * @regs: current struct pt_regs (can be NULL, but shows full stack trace)
+ *
+ * Return:
+ * * false - address outside KFENCE pool,
+ * * true  - page fault handled by KFENCE, no additional handling required.
+ *
+ * A page fault inside KFENCE pool indicates a memory error, such as an
+ * out-of-bounds access, a use-after-free or an invalid memory access. In these
+ * cases KFENCE prints an error message and marks the offending page as
+ * present, so that the kernel can proceed.
+ */
+bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs);
+
+#else /* CONFIG_KFENCE */
+
+static inline bool is_kfence_address(const void *addr) { return false; }
+static inline void kfence_alloc_pool(void) { }
+static inline void kfence_init(void) { }
+static inline void kfence_shutdown_cache(struct kmem_cache *s) { }
+static inline void *kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags) { return NULL; }
+static inline size_t kfence_ksize(const void *addr) { return 0; }
+static inline void *kfence_object_start(const void *addr) { return NULL; }
+static inline void __kfence_free(void *addr) { }
+static inline bool __must_check kfence_free(void *addr) { return false; }
+static inline bool __must_check kfence_handle_page_fault(unsigned long addr, bool is_write,
+                                                        struct pt_regs *regs)
+{
+       return false;
+}
+
+#endif
+
+#endif /* _LINUX_KFENCE_H */
index 0444b44..392a367 100644 (file)
@@ -359,9 +359,11 @@ extern atomic_t                    kgdb_active;
 extern bool dbg_is_early;
 extern void __init dbg_late_init(void);
 extern void kgdb_panic(const char *msg);
+extern void kgdb_free_init_mem(void);
 #else /* ! CONFIG_KGDB */
 #define in_dbg_master() (0)
 #define dbg_late_init()
 static inline void kgdb_panic(const char *msg) {}
+static inline void kgdb_free_init_mem(void) { }
 #endif /* ! CONFIG_KGDB */
 #endif /* _KGDB_H_ */
index c941b73..2fcc018 100644 (file)
@@ -3,6 +3,7 @@
 #define _LINUX_KHUGEPAGED_H
 
 #include <linux/sched/coredump.h> /* MMF_VM_HUGEPAGE */
+#include <linux/shmem_fs.h>
 
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -57,6 +58,7 @@ static inline int khugepaged_enter(struct vm_area_struct *vma,
 {
        if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
                if ((khugepaged_always() ||
+                    (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) ||
                     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
                    !(vm_flags & VM_NOHUGEPAGE) &&
                    !test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
index 0738389..1a37a66 100644 (file)
@@ -4,7 +4,7 @@
  *     Version: 0.1.0
  * Description: ks0108 LCD Controller driver header
  *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *      Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
  *        Date: 2006-10-31
  */
 
index e126ebd..1b65e72 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/signal.h>
 #include <linux/sched.h>
 #include <linux/bug.h>
+#include <linux/minmax.h>
 #include <linux/mm.h>
 #include <linux/mmu_notifier.h>
 #include <linux/preempt.h>
@@ -506,6 +507,8 @@ struct kvm {
        struct mmu_notifier mmu_notifier;
        unsigned long mmu_notifier_seq;
        long mmu_notifier_count;
+       unsigned long mmu_notifier_range_start;
+       unsigned long mmu_notifier_range_end;
 #endif
        long tlbs_dirty;
        struct list_head devices;
@@ -733,7 +736,7 @@ kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn);
 kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
                               bool atomic, bool *async, bool write_fault,
-                              bool *writable);
+                              bool *writable, hva_t *hva);
 
 void kvm_release_pfn_clean(kvm_pfn_t pfn);
 void kvm_release_pfn_dirty(kvm_pfn_t pfn);
@@ -1207,6 +1210,26 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq)
                return 1;
        return 0;
 }
+
+static inline int mmu_notifier_retry_hva(struct kvm *kvm,
+                                        unsigned long mmu_seq,
+                                        unsigned long hva)
+{
+       lockdep_assert_held(&kvm->mmu_lock);
+       /*
+        * If mmu_notifier_count is non-zero, then the range maintained by
+        * kvm_mmu_notifier_invalidate_range_start contains all addresses that
+        * might be being invalidated. Note that it may include some false
+        * positives, due to shortcuts when handing concurrent invalidations.
+        */
+       if (unlikely(kvm->mmu_notifier_count) &&
+           hva >= kvm->mmu_notifier_range_start &&
+           hva < kvm->mmu_notifier_range_end)
+               return 1;
+       if (kvm->mmu_notifier_seq != mmu_seq)
+               return 1;
+       return 0;
+}
 #endif
 
 #ifdef CONFIG_HAVE_KVM_IRQ_ROUTING
index 21a3358..612b4ca 100644 (file)
@@ -85,6 +85,7 @@ static inline struct led_classdev_flash *lcdev_to_flcdev(
        return container_of(lcdev, struct led_classdev_flash, led_cdev);
 }
 
+#if IS_ENABLED(CONFIG_LEDS_CLASS_FLASH)
 /**
  * led_classdev_flash_register_ext - register a new object of LED class with
  *                                  init data and with support for flash LEDs
@@ -98,12 +99,6 @@ int led_classdev_flash_register_ext(struct device *parent,
                                    struct led_classdev_flash *fled_cdev,
                                    struct led_init_data *init_data);
 
-static inline int led_classdev_flash_register(struct device *parent,
-                                          struct led_classdev_flash *fled_cdev)
-{
-       return led_classdev_flash_register_ext(parent, fled_cdev, NULL);
-}
-
 /**
  * led_classdev_flash_unregister - unregisters an object of led_classdev class
  *                                with support for flash LEDs
@@ -118,15 +113,44 @@ int devm_led_classdev_flash_register_ext(struct device *parent,
                                     struct led_init_data *init_data);
 
 
+void devm_led_classdev_flash_unregister(struct device *parent,
+                                       struct led_classdev_flash *fled_cdev);
+
+#else
+
+static inline int led_classdev_flash_register_ext(struct device *parent,
+                                   struct led_classdev_flash *fled_cdev,
+                                   struct led_init_data *init_data)
+{
+       return 0;
+}
+
+static inline void led_classdev_flash_unregister(struct led_classdev_flash *fled_cdev) {};
+static inline int devm_led_classdev_flash_register_ext(struct device *parent,
+                                    struct led_classdev_flash *fled_cdev,
+                                    struct led_init_data *init_data)
+{
+       return 0;
+}
+
+static inline void devm_led_classdev_flash_unregister(struct device *parent,
+                                       struct led_classdev_flash *fled_cdev)
+{};
+
+#endif  /* IS_ENABLED(CONFIG_LEDS_CLASS_FLASH) */
+
+static inline int led_classdev_flash_register(struct device *parent,
+                                          struct led_classdev_flash *fled_cdev)
+{
+       return led_classdev_flash_register_ext(parent, fled_cdev, NULL);
+}
+
 static inline int devm_led_classdev_flash_register(struct device *parent,
                                     struct led_classdev_flash *fled_cdev)
 {
        return devm_led_classdev_flash_register_ext(parent, fled_cdev, NULL);
 }
 
-void devm_led_classdev_flash_unregister(struct device *parent,
-                                       struct led_classdev_flash *fled_cdev);
-
 /**
  * led_set_flash_strobe - setup flash strobe
  * @fled_cdev: the flash LED to set strobe on
index 5116f9a..210d57b 100644 (file)
@@ -44,12 +44,6 @@ int led_classdev_multicolor_register_ext(struct device *parent,
                                            struct led_classdev_mc *mcled_cdev,
                                            struct led_init_data *init_data);
 
-static inline int led_classdev_multicolor_register(struct device *parent,
-                                           struct led_classdev_mc *mcled_cdev)
-{
-       return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
-}
-
 /**
  * led_classdev_multicolor_unregister - unregisters an object of led_classdev
  *                                     class with support for multicolor LEDs
@@ -68,13 +62,6 @@ int devm_led_classdev_multicolor_register_ext(struct device *parent,
                                          struct led_classdev_mc *mcled_cdev,
                                          struct led_init_data *init_data);
 
-static inline int devm_led_classdev_multicolor_register(struct device *parent,
-                                    struct led_classdev_mc *mcled_cdev)
-{
-       return devm_led_classdev_multicolor_register_ext(parent, mcled_cdev,
-                                                        NULL);
-}
-
 void devm_led_classdev_multicolor_unregister(struct device *parent,
                                            struct led_classdev_mc *mcled_cdev);
 #else
@@ -83,27 +70,33 @@ static inline int led_classdev_multicolor_register_ext(struct device *parent,
                                            struct led_classdev_mc *mcled_cdev,
                                            struct led_init_data *init_data)
 {
-       return -EINVAL;
-}
-
-static inline int led_classdev_multicolor_register(struct device *parent,
-                                           struct led_classdev_mc *mcled_cdev)
-{
-       return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
+       return 0;
 }
 
 static inline void led_classdev_multicolor_unregister(struct led_classdev_mc *mcled_cdev) {};
 static inline int led_mc_calc_color_components(struct led_classdev_mc *mcled_cdev,
                                               enum led_brightness brightness)
 {
-       return -EINVAL;
+       return 0;
 }
 
 static inline int devm_led_classdev_multicolor_register_ext(struct device *parent,
                                          struct led_classdev_mc *mcled_cdev,
                                          struct led_init_data *init_data)
 {
-       return -EINVAL;
+       return 0;
+}
+
+static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
+                                           struct led_classdev_mc *mcled_cdev)
+{};
+
+#endif  /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
+
+static inline int led_classdev_multicolor_register(struct device *parent,
+                                           struct led_classdev_mc *mcled_cdev)
+{
+       return led_classdev_multicolor_register_ext(parent, mcled_cdev, NULL);
 }
 
 static inline int devm_led_classdev_multicolor_register(struct device *parent,
@@ -113,9 +106,4 @@ static inline int devm_led_classdev_multicolor_register(struct device *parent,
                                                         NULL);
 }
 
-static inline void devm_led_classdev_multicolor_unregister(struct device *parent,
-                                           struct led_classdev_mc *mcled_cdev)
-{};
-
-#endif  /* IS_ENABLED(CONFIG_LEDS_CLASS_MULTICOLOR) */
 #endif /* _LINUX_MULTICOLOR_LEDS_H_INCLUDED */
index 6a8d640..329fd91 100644 (file)
@@ -63,8 +63,8 @@ struct led_hw_trigger_type {
 
 struct led_classdev {
        const char              *name;
-       enum led_brightness      brightness;
-       enum led_brightness      max_brightness;
+       unsigned int brightness;
+       unsigned int max_brightness;
        int                      flags;
 
        /* Lower 16 bits reflect status */
@@ -253,8 +253,7 @@ void led_blink_set_oneshot(struct led_classdev *led_cdev,
  * software blink timer that implements blinking when the
  * hardware doesn't. This function is guaranteed not to sleep.
  */
-void led_set_brightness(struct led_classdev *led_cdev,
-                       enum led_brightness brightness);
+void led_set_brightness(struct led_classdev *led_cdev, unsigned int brightness);
 
 /**
  * led_set_brightness_sync - set LED brightness synchronously
@@ -267,8 +266,7 @@ void led_set_brightness(struct led_classdev *led_cdev,
  *
  * Returns: 0 on success or negative error value on failure
  */
-int led_set_brightness_sync(struct led_classdev *led_cdev,
-                           enum led_brightness value);
+int led_set_brightness_sync(struct led_classdev *led_cdev, unsigned int value);
 
 /**
  * led_update_brightness - update LED brightness
@@ -565,7 +563,7 @@ static inline void ledtrig_cpu(enum cpu_led_event evt)
 
 #ifdef CONFIG_LEDS_BRIGHTNESS_HW_CHANGED
 void led_classdev_notify_brightness_hw_changed(
-       struct led_classdev *led_cdev, enum led_brightness brightness);
+       struct led_classdev *led_cdev, unsigned int brightness);
 #else
 static inline void led_classdev_notify_brightness_hw_changed(
        struct led_classdev *led_cdev, enum led_brightness brightness) { }
index 40f5be5..5ea9ccf 100644 (file)
@@ -3,9 +3,6 @@
  * Common LiteX header providing
  * helper functions for accessing CSRs.
  *
- * Implementation of the functions is provided by
- * the LiteX SoC Controller driver.
- *
  * Copyright (C) 2019-2020 Antmicro <www.antmicro.com>
  */
 
 #define _LINUX_LITEX_H
 
 #include <linux/io.h>
-#include <linux/types.h>
-#include <linux/compiler_types.h>
+
+/* LiteX SoCs support 8- or 32-bit CSR Bus data width (i.e., subreg. size) */
+#if defined(CONFIG_LITEX_SUBREG_SIZE) && \
+       (CONFIG_LITEX_SUBREG_SIZE == 1 || CONFIG_LITEX_SUBREG_SIZE == 4)
+#define LITEX_SUBREG_SIZE      CONFIG_LITEX_SUBREG_SIZE
+#else
+#error LiteX subregister size (LITEX_SUBREG_SIZE) must be 4 or 1!
+#endif
+#define LITEX_SUBREG_SIZE_BIT   (LITEX_SUBREG_SIZE * 8)
+
+/* LiteX subregisters of any width are always aligned on a 4-byte boundary */
+#define LITEX_SUBREG_ALIGN       0x4
+
+static inline void _write_litex_subregister(u32 val, void __iomem *addr)
+{
+       writel((u32 __force)cpu_to_le32(val), addr);
+}
+
+static inline u32 _read_litex_subregister(void __iomem *addr)
+{
+       return le32_to_cpu((__le32 __force)readl(addr));
+}
 
 /*
- * The parameters below are true for LiteX SoCs configured for 8-bit CSR Bus,
- * 32-bit aligned.
+ * LiteX SoC Generator, depending on the configuration, can split a single
+ * logical CSR (Control&Status Register) into a series of consecutive physical
+ * registers.
+ *
+ * For example, in the configuration with 8-bit CSR Bus, a 32-bit aligned,
+ * 32-bit wide logical CSR will be laid out as four 32-bit physical
+ * subregisters, each one containing one byte of meaningful data.
  *
- * Supporting other configurations will require extending the logic in this
- * header and in the LiteX SoC controller driver.
+ * For details see: https://github.com/enjoy-digital/litex/wiki/CSR-Bus
  */
-#define LITEX_REG_SIZE   0x4
-#define LITEX_SUBREG_SIZE      0x1
-#define LITEX_SUBREG_SIZE_BIT   (LITEX_SUBREG_SIZE * 8)
 
-#define WRITE_LITEX_SUBREGISTER(val, base_offset, subreg_id) \
-       writel((u32 __force)cpu_to_le32(val), base_offset + (LITEX_REG_SIZE * subreg_id))
+/* number of LiteX subregisters needed to store a register of given reg_size */
+#define _litex_num_subregs(reg_size) \
+       (((reg_size) - 1) / LITEX_SUBREG_SIZE + 1)
 
-#define READ_LITEX_SUBREGISTER(base_offset, subreg_id) \
-       le32_to_cpu((__le32 __force)readl(base_offset + (LITEX_REG_SIZE * subreg_id)))
+/*
+ * since the number of 4-byte aligned subregisters required to store a single
+ * LiteX CSR (MMIO) register varies with LITEX_SUBREG_SIZE, the offset of the
+ * next adjacent LiteX CSR register w.r.t. the offset of the current one also
+ * depends on how many subregisters the latter is spread across
+ */
+#define _next_reg_off(off, size) \
+       ((off) + _litex_num_subregs(size) * LITEX_SUBREG_ALIGN)
 
-void litex_set_reg(void __iomem *reg, unsigned long reg_sz, unsigned long val);
+/*
+ * The purpose of `_litex_[set|get]_reg()` is to implement the logic of
+ * writing to/reading from the LiteX CSR in a single place that can be then
+ * reused by all LiteX drivers via the `litex_[write|read][8|16|32|64]()`
+ * accessors for the appropriate data width.
+ * NOTE: direct use of `_litex_[set|get]_reg()` by LiteX drivers is strongly
+ * discouraged, as they perform no error checking on the requested data width!
+ */
 
-unsigned long litex_get_reg(void __iomem *reg, unsigned long reg_sz);
+/**
+ * _litex_set_reg() - Writes a value to the LiteX CSR (Control&Status Register)
+ * @reg: Address of the CSR
+ * @reg_size: The width of the CSR expressed in the number of bytes
+ * @val: Value to be written to the CSR
+ *
+ * This function splits a single (possibly multi-byte) LiteX CSR write into
+ * a series of subregister writes with a proper offset.
+ * NOTE: caller is responsible for ensuring (0 < reg_size <= sizeof(u64)).
+ */
+static inline void _litex_set_reg(void __iomem *reg, size_t reg_size, u64 val)
+{
+       u8 shift = _litex_num_subregs(reg_size) * LITEX_SUBREG_SIZE_BIT;
+
+       while (shift > 0) {
+               shift -= LITEX_SUBREG_SIZE_BIT;
+               _write_litex_subregister(val >> shift, reg);
+               reg += LITEX_SUBREG_ALIGN;
+       }
+}
+
+/**
+ * _litex_get_reg() - Reads a value of the LiteX CSR (Control&Status Register)
+ * @reg: Address of the CSR
+ * @reg_size: The width of the CSR expressed in the number of bytes
+ *
+ * Return: Value read from the CSR
+ *
+ * This function generates a series of subregister reads with a proper offset
+ * and joins their results into a single (possibly multi-byte) LiteX CSR value.
+ * NOTE: caller is responsible for ensuring (0 < reg_size <= sizeof(u64)).
+ */
+static inline u64 _litex_get_reg(void __iomem *reg, size_t reg_size)
+{
+       u64 r;
+       u8 i;
+
+       r = _read_litex_subregister(reg);
+       for (i = 1; i < _litex_num_subregs(reg_size); i++) {
+               r <<= LITEX_SUBREG_SIZE_BIT;
+               reg += LITEX_SUBREG_ALIGN;
+               r |= _read_litex_subregister(reg);
+       }
+       return r;
+}
 
 static inline void litex_write8(void __iomem *reg, u8 val)
 {
-       WRITE_LITEX_SUBREGISTER(val, reg, 0);
+       _litex_set_reg(reg, sizeof(u8), val);
 }
 
 static inline void litex_write16(void __iomem *reg, u16 val)
 {
-       WRITE_LITEX_SUBREGISTER(val >> 8, reg, 0);
-       WRITE_LITEX_SUBREGISTER(val, reg, 1);
+       _litex_set_reg(reg, sizeof(u16), val);
 }
 
 static inline void litex_write32(void __iomem *reg, u32 val)
 {
-       WRITE_LITEX_SUBREGISTER(val >> 24, reg, 0);
-       WRITE_LITEX_SUBREGISTER(val >> 16, reg, 1);
-       WRITE_LITEX_SUBREGISTER(val >> 8, reg, 2);
-       WRITE_LITEX_SUBREGISTER(val, reg, 3);
+       _litex_set_reg(reg, sizeof(u32), val);
 }
 
 static inline void litex_write64(void __iomem *reg, u64 val)
 {
-       WRITE_LITEX_SUBREGISTER(val >> 56, reg, 0);
-       WRITE_LITEX_SUBREGISTER(val >> 48, reg, 1);
-       WRITE_LITEX_SUBREGISTER(val >> 40, reg, 2);
-       WRITE_LITEX_SUBREGISTER(val >> 32, reg, 3);
-       WRITE_LITEX_SUBREGISTER(val >> 24, reg, 4);
-       WRITE_LITEX_SUBREGISTER(val >> 16, reg, 5);
-       WRITE_LITEX_SUBREGISTER(val >> 8, reg, 6);
-       WRITE_LITEX_SUBREGISTER(val, reg, 7);
+       _litex_set_reg(reg, sizeof(u64), val);
 }
 
 static inline u8 litex_read8(void __iomem *reg)
 {
-       return READ_LITEX_SUBREGISTER(reg, 0);
+       return _litex_get_reg(reg, sizeof(u8));
 }
 
 static inline u16 litex_read16(void __iomem *reg)
 {
-       return (READ_LITEX_SUBREGISTER(reg, 0) << 8)
-               | (READ_LITEX_SUBREGISTER(reg, 1));
+       return _litex_get_reg(reg, sizeof(u16));
 }
 
 static inline u32 litex_read32(void __iomem *reg)
 {
-       return (READ_LITEX_SUBREGISTER(reg, 0) << 24)
-               | (READ_LITEX_SUBREGISTER(reg, 1) << 16)
-               | (READ_LITEX_SUBREGISTER(reg, 2) << 8)
-               | (READ_LITEX_SUBREGISTER(reg, 3));
+       return _litex_get_reg(reg, sizeof(u32));
 }
 
 static inline u64 litex_read64(void __iomem *reg)
 {
-       return ((u64)READ_LITEX_SUBREGISTER(reg, 0) << 56)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 1) << 48)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 2) << 40)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 3) << 32)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 4) << 24)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 5) << 16)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 6) << 8)
-               | ((u64)READ_LITEX_SUBREGISTER(reg, 7));
+       return _litex_get_reg(reg, sizeof(u64));
 }
 
 #endif /* _LINUX_LITEX_H */
index dfd261d..477a597 100644 (file)
@@ -135,17 +135,20 @@ LSM_HOOK(int, 0, inode_follow_link, struct dentry *dentry, struct inode *inode,
 LSM_HOOK(int, 0, inode_permission, struct inode *inode, int mask)
 LSM_HOOK(int, 0, inode_setattr, struct dentry *dentry, struct iattr *attr)
 LSM_HOOK(int, 0, inode_getattr, const struct path *path)
-LSM_HOOK(int, 0, inode_setxattr, struct dentry *dentry, const char *name,
-        const void *value, size_t size, int flags)
+LSM_HOOK(int, 0, inode_setxattr, struct user_namespace *mnt_userns,
+        struct dentry *dentry, const char *name, const void *value,
+        size_t size, int flags)
 LSM_HOOK(void, LSM_RET_VOID, inode_post_setxattr, struct dentry *dentry,
         const char *name, const void *value, size_t size, int flags)
 LSM_HOOK(int, 0, inode_getxattr, struct dentry *dentry, const char *name)
 LSM_HOOK(int, 0, inode_listxattr, struct dentry *dentry)
-LSM_HOOK(int, 0, inode_removexattr, struct dentry *dentry, const char *name)
+LSM_HOOK(int, 0, inode_removexattr, struct user_namespace *mnt_userns,
+        struct dentry *dentry, const char *name)
 LSM_HOOK(int, 0, inode_need_killpriv, struct dentry *dentry)
-LSM_HOOK(int, 0, inode_killpriv, struct dentry *dentry)
-LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct inode *inode,
-        const char *name, void **buffer, bool alloc)
+LSM_HOOK(int, 0, inode_killpriv, struct user_namespace *mnt_userns,
+        struct dentry *dentry)
+LSM_HOOK(int, -EOPNOTSUPP, inode_getsecurity, struct user_namespace *mnt_userns,
+        struct inode *inode, const char *name, void **buffer, bool alloc)
 LSM_HOOK(int, -EOPNOTSUPP, inode_setsecurity, struct inode *inode,
         const char *name, const void *value, size_t size, int flags)
 LSM_HOOK(int, 0, inode_listsecurity, struct inode *inode, char *buffer,
index bdfc8a7..fb7f319 100644 (file)
  * @inode_killpriv:
  *     The setuid bit is being removed.  Remove similar security labels.
  *     Called with the dentry->d_inode->i_mutex held.
+ *     @mnt_userns: user namespace of the mount
  *     @dentry is the dentry being changed.
  *     Return 0 on success.  If error is returned, then the operation
  *     causing setuid bit removal is failed.
index 9004375..27eb383 100644 (file)
@@ -42,7 +42,7 @@ struct device *mdev_get_iommu_device(struct device *dev);
  *                     @mdev: mdev_device structure on of mediated device
  *                           that is being created
  *                     Returns integer: success (0) or error (< 0)
- * @remove:            Called to free resources in parent device's driver for a
+ * @remove:            Called to free resources in parent device's driver for
  *                     a mediated device. It is mandatory to provide 'remove'
  *                     ops.
  *                     @mdev: mdev_device device structure which is being
index 959ad7d..07f5ef8 100644 (file)
@@ -68,7 +68,7 @@ struct mei_cl_driver {
 
        int (*probe)(struct mei_cl_device *cldev,
                     const struct mei_cl_device_id *id);
-       int (*remove)(struct mei_cl_device *cldev);
+       void (*remove)(struct mei_cl_device *cldev);
 };
 
 int __mei_cldev_driver_register(struct mei_cl_driver *cldrv,
index c88bc24..5984fff 100644 (file)
@@ -460,7 +460,7 @@ static inline void memblock_free_late(phys_addr_t base, phys_addr_t size)
 /*
  * Set the allocation direction to bottom-up or top-down.
  */
-static inline void memblock_set_bottom_up(bool enable)
+static inline __init_memblock void memblock_set_bottom_up(bool enable)
 {
        memblock.bottom_up = enable;
 }
@@ -470,7 +470,7 @@ static inline void memblock_set_bottom_up(bool enable)
  * if this is true, that said, memblock will allocate memory
  * in bottom-up direction.
  */
-static inline bool memblock_bottom_up(void)
+static inline __init_memblock bool memblock_bottom_up(void)
 {
        return memblock.bottom_up;
 }
index eeb0b52..0c04d39 100644 (file)
@@ -92,6 +92,10 @@ struct lruvec_stat {
        long count[NR_VM_NODE_STAT_ITEMS];
 };
 
+struct batched_lruvec_stat {
+       s32 count[NR_VM_NODE_STAT_ITEMS];
+};
+
 /*
  * Bitmap of shrinker::id corresponding to memcg-aware shrinkers,
  * which have elements charged to this memcg.
@@ -107,11 +111,17 @@ struct memcg_shrinker_map {
 struct mem_cgroup_per_node {
        struct lruvec           lruvec;
 
-       /* Legacy local VM stats */
+       /*
+        * Legacy local VM stats. This should be struct lruvec_stat and
+        * cannot be optimized to struct batched_lruvec_stat. Because
+        * the threshold of the lruvec_stat_cpu can be as big as
+        * MEMCG_CHARGE_BATCH * PAGE_SIZE. It can fit into s32. But this
+        * filed has no upper limit.
+        */
        struct lruvec_stat __percpu *lruvec_stat_local;
 
        /* Subtree VM stats (batched updates) */
-       struct lruvec_stat __percpu *lruvec_stat_cpu;
+       struct batched_lruvec_stat __percpu *lruvec_stat_cpu;
        atomic_long_t           lruvec_stat[NR_VM_NODE_STAT_ITEMS];
 
        unsigned long           lru_zone_size[MAX_NR_ZONES][NR_LRU_LISTS];
@@ -475,19 +485,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
        return (struct obj_cgroup **)(memcg_data & ~MEMCG_DATA_FLAGS_MASK);
 }
 
-/*
- * set_page_objcgs - associate a page with a object cgroups vector
- * @page: a pointer to the page struct
- * @objcgs: a pointer to the object cgroups vector
- *
- * Atomically associates a page with a vector of object cgroups.
- */
-static inline bool set_page_objcgs(struct page *page,
-                                       struct obj_cgroup **objcgs)
-{
-       return !cmpxchg(&page->memcg_data, 0, (unsigned long)objcgs |
-                       MEMCG_DATA_OBJCGS);
-}
 #else
 static inline struct obj_cgroup **page_objcgs(struct page *page)
 {
@@ -498,12 +495,6 @@ static inline struct obj_cgroup **page_objcgs_check(struct page *page)
 {
        return NULL;
 }
-
-static inline bool set_page_objcgs(struct page *page,
-                                       struct obj_cgroup **objcgs)
-{
-       return true;
-}
 #endif
 
 static __always_inline bool memcg_stat_item_in_bytes(int idx)
@@ -689,8 +680,6 @@ struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 
 struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm);
 
-struct mem_cgroup *get_mem_cgroup_from_page(struct page *page);
-
 struct lruvec *lock_page_lruvec(struct page *page);
 struct lruvec *lock_page_lruvec_irq(struct page *page);
 struct lruvec *lock_page_lruvec_irqsave(struct page *page,
@@ -1072,9 +1061,7 @@ static inline void memcg_memory_event_mm(struct mm_struct *mm,
        rcu_read_unlock();
 }
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-void mem_cgroup_split_huge_fixup(struct page *head);
-#endif
+void split_page_memcg(struct page *head, unsigned int nr);
 
 #else /* CONFIG_MEMCG */
 
@@ -1200,11 +1187,6 @@ static inline struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
        return NULL;
 }
 
-static inline struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
-{
-       return NULL;
-}
-
 static inline void mem_cgroup_put(struct mem_cgroup *memcg)
 {
 }
@@ -1416,7 +1398,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
        return 0;
 }
 
-static inline void mem_cgroup_split_huge_fixup(struct page *head)
+static inline void split_page_memcg(struct page *head, unsigned int nr)
 {
 }
 
@@ -1601,9 +1583,6 @@ static inline void memcg_set_shrinker_bit(struct mem_cgroup *memcg,
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
-int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
-                       unsigned int nr_pages);
-void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages);
 int __memcg_kmem_charge_page(struct page *page, gfp_t gfp, int order);
 void __memcg_kmem_uncharge_page(struct page *page, int order);
 
index 439a89e..4da95e6 100644 (file)
@@ -27,9 +27,8 @@ struct memory_block {
        unsigned long start_section_nr;
        unsigned long state;            /* serialized by the dev->lock */
        int online_type;                /* for passing data to online routine */
-       int phys_device;                /* to which fru does this belong? */
-       struct device dev;
        int nid;                        /* NID for this memory block */
+       struct device dev;
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
index 15acce5..7288aa5 100644 (file)
@@ -16,22 +16,7 @@ struct resource;
 struct vmem_altmap;
 
 #ifdef CONFIG_MEMORY_HOTPLUG
-/*
- * Return page for the valid pfn only if the page is online. All pfn
- * walkers which rely on the fully initialized page->flags and others
- * should use this rather than pfn_valid && pfn_to_page
- */
-#define pfn_to_online_page(pfn)                                           \
-({                                                                \
-       struct page *___page = NULL;                               \
-       unsigned long ___pfn = pfn;                                \
-       unsigned long ___nr = pfn_to_section_nr(___pfn);           \
-                                                                  \
-       if (___nr < NR_MEM_SECTIONS && online_section_nr(___nr) && \
-           pfn_valid_within(___pfn))                              \
-               ___page = pfn_to_page(___pfn);                     \
-       ___page;                                                   \
-})
+struct page *pfn_to_online_page(unsigned long pfn);
 
 /*
  * Types for free bootmem stored in page->lru.next. These have to be in
@@ -68,7 +53,7 @@ typedef int __bitwise mhp_t;
  * with this flag set, the resource pointer must no longer be used as it
  * might be stale, or the resource might have changed.
  */
-#define MEMHP_MERGE_RESOURCE   ((__force mhp_t)BIT(0))
+#define MHP_MERGE_RESOURCE     ((__force mhp_t)BIT(0))
 
 /*
  * Extended parameters for memory hotplug:
@@ -81,6 +66,9 @@ struct mhp_params {
        pgprot_t pgprot;
 };
 
+bool mhp_range_allowed(u64 start, u64 size, bool need_mapping);
+struct range mhp_get_pluggable_range(bool need_mapping);
+
 /*
  * Zone resizing functions
  *
@@ -131,10 +119,10 @@ extern int arch_add_memory(int nid, u64 start, u64 size,
                           struct mhp_params *params);
 extern u64 max_mem_size;
 
-extern int memhp_online_type_from_str(const char *str);
+extern int mhp_online_type_from_str(const char *str);
 
 /* Default online_type (MMOP_*) when new memory blocks are added. */
-extern int memhp_default_online_type;
+extern int mhp_default_online_type;
 /* If movable_node boot option specified */
 extern bool movable_node_enabled;
 static inline bool movable_node_is_enabled(void)
@@ -281,6 +269,13 @@ static inline bool movable_node_is_enabled(void)
 }
 #endif /* ! CONFIG_MEMORY_HOTPLUG */
 
+/*
+ * Keep this declaration outside CONFIG_MEMORY_HOTPLUG as some
+ * platforms might override and use arch_get_mappable_range()
+ * for internal non memory hotplug purposes.
+ */
+struct range arch_get_mappable_range(void);
+
 #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT)
 /*
  * pgdat resizing functions
index 79c49e7..f5b464d 100644 (file)
@@ -137,6 +137,7 @@ void *devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap);
 void devm_memunmap_pages(struct device *dev, struct dev_pagemap *pgmap);
 struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
                struct dev_pagemap *pgmap);
+bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn);
 
 unsigned long vmem_altmap_offset(struct vmem_altmap *altmap);
 void vmem_altmap_free(struct vmem_altmap *altmap, unsigned long nr_pfns);
@@ -165,6 +166,11 @@ static inline struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
        return NULL;
 }
 
+static inline bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
+{
+       return false;
+}
+
 static inline unsigned long vmem_altmap_offset(struct vmem_altmap *altmap)
 {
        return 0;
diff --git a/include/linux/mfd/intel_msic.h b/include/linux/mfd/intel_msic.h
deleted file mode 100644 (file)
index 317e860..0000000
+++ /dev/null
@@ -1,453 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Core interface for Intel MSIC
- *
- * Copyright (C) 2011, Intel Corporation
- * Author: Mika Westerberg <mika.westerberg@linux.intel.com>
- */
-
-#ifndef __LINUX_MFD_INTEL_MSIC_H__
-#define __LINUX_MFD_INTEL_MSIC_H__
-
-/* ID */
-#define INTEL_MSIC_ID0                 0x000   /* RO */
-#define INTEL_MSIC_ID1                 0x001   /* RO */
-
-/* IRQ */
-#define INTEL_MSIC_IRQLVL1             0x002
-#define INTEL_MSIC_ADC1INT             0x003
-#define INTEL_MSIC_CCINT               0x004
-#define INTEL_MSIC_PWRSRCINT           0x005
-#define INTEL_MSIC_PWRSRCINT1          0x006
-#define INTEL_MSIC_CHRINT              0x007
-#define INTEL_MSIC_CHRINT1             0x008
-#define INTEL_MSIC_RTCIRQ              0x009
-#define INTEL_MSIC_GPIO0LVIRQ          0x00a
-#define INTEL_MSIC_GPIO1LVIRQ          0x00b
-#define INTEL_MSIC_GPIOHVIRQ           0x00c
-#define INTEL_MSIC_VRINT               0x00d
-#define INTEL_MSIC_OCAUDIO             0x00e
-#define INTEL_MSIC_ACCDET              0x00f
-#define INTEL_MSIC_RESETIRQ1           0x010
-#define INTEL_MSIC_RESETIRQ2           0x011
-#define INTEL_MSIC_MADC1INT            0x012
-#define INTEL_MSIC_MCCINT              0x013
-#define INTEL_MSIC_MPWRSRCINT          0x014
-#define INTEL_MSIC_MPWRSRCINT1         0x015
-#define INTEL_MSIC_MCHRINT             0x016
-#define INTEL_MSIC_MCHRINT1            0x017
-#define INTEL_MSIC_RTCIRQMASK          0x018
-#define INTEL_MSIC_GPIO0LVIRQMASK      0x019
-#define INTEL_MSIC_GPIO1LVIRQMASK      0x01a
-#define INTEL_MSIC_GPIOHVIRQMASK       0x01b
-#define INTEL_MSIC_VRINTMASK           0x01c
-#define INTEL_MSIC_OCAUDIOMASK         0x01d
-#define INTEL_MSIC_ACCDETMASK          0x01e
-#define INTEL_MSIC_RESETIRQ1MASK       0x01f
-#define INTEL_MSIC_RESETIRQ2MASK       0x020
-#define INTEL_MSIC_IRQLVL1MSK          0x021
-#define INTEL_MSIC_PBCONFIG            0x03e
-#define INTEL_MSIC_PBSTATUS            0x03f   /* RO */
-
-/* GPIO */
-#define INTEL_MSIC_GPIO0LV7CTLO                0x040
-#define INTEL_MSIC_GPIO0LV6CTLO                0x041
-#define INTEL_MSIC_GPIO0LV5CTLO                0x042
-#define INTEL_MSIC_GPIO0LV4CTLO                0x043
-#define INTEL_MSIC_GPIO0LV3CTLO                0x044
-#define INTEL_MSIC_GPIO0LV2CTLO                0x045
-#define INTEL_MSIC_GPIO0LV1CTLO                0x046
-#define INTEL_MSIC_GPIO0LV0CTLO                0x047
-#define INTEL_MSIC_GPIO1LV7CTLOS       0x048
-#define INTEL_MSIC_GPIO1LV6CTLO                0x049
-#define INTEL_MSIC_GPIO1LV5CTLO                0x04a
-#define INTEL_MSIC_GPIO1LV4CTLO                0x04b
-#define INTEL_MSIC_GPIO1LV3CTLO                0x04c
-#define INTEL_MSIC_GPIO1LV2CTLO                0x04d
-#define INTEL_MSIC_GPIO1LV1CTLO                0x04e
-#define INTEL_MSIC_GPIO1LV0CTLO                0x04f
-#define INTEL_MSIC_GPIO0LV7CTLI                0x050
-#define INTEL_MSIC_GPIO0LV6CTLI                0x051
-#define INTEL_MSIC_GPIO0LV5CTLI                0x052
-#define INTEL_MSIC_GPIO0LV4CTLI                0x053
-#define INTEL_MSIC_GPIO0LV3CTLI                0x054
-#define INTEL_MSIC_GPIO0LV2CTLI                0x055
-#define INTEL_MSIC_GPIO0LV1CTLI                0x056
-#define INTEL_MSIC_GPIO0LV0CTLI                0x057
-#define INTEL_MSIC_GPIO1LV7CTLIS       0x058
-#define INTEL_MSIC_GPIO1LV6CTLI                0x059
-#define INTEL_MSIC_GPIO1LV5CTLI                0x05a
-#define INTEL_MSIC_GPIO1LV4CTLI                0x05b
-#define INTEL_MSIC_GPIO1LV3CTLI                0x05c
-#define INTEL_MSIC_GPIO1LV2CTLI                0x05d
-#define INTEL_MSIC_GPIO1LV1CTLI                0x05e
-#define INTEL_MSIC_GPIO1LV0CTLI                0x05f
-#define INTEL_MSIC_PWM0CLKDIV1         0x061
-#define INTEL_MSIC_PWM0CLKDIV0         0x062
-#define INTEL_MSIC_PWM1CLKDIV1         0x063
-#define INTEL_MSIC_PWM1CLKDIV0         0x064
-#define INTEL_MSIC_PWM2CLKDIV1         0x065
-#define INTEL_MSIC_PWM2CLKDIV0         0x066
-#define INTEL_MSIC_PWM0DUTYCYCLE       0x067
-#define INTEL_MSIC_PWM1DUTYCYCLE       0x068
-#define INTEL_MSIC_PWM2DUTYCYCLE       0x069
-#define INTEL_MSIC_GPIO0HV3CTLO                0x06d
-#define INTEL_MSIC_GPIO0HV2CTLO                0x06e
-#define INTEL_MSIC_GPIO0HV1CTLO                0x06f
-#define INTEL_MSIC_GPIO0HV0CTLO                0x070
-#define INTEL_MSIC_GPIO1HV3CTLO                0x071
-#define INTEL_MSIC_GPIO1HV2CTLO                0x072
-#define INTEL_MSIC_GPIO1HV1CTLO                0x073
-#define INTEL_MSIC_GPIO1HV0CTLO                0x074
-#define INTEL_MSIC_GPIO0HV3CTLI                0x075
-#define INTEL_MSIC_GPIO0HV2CTLI                0x076
-#define INTEL_MSIC_GPIO0HV1CTLI                0x077
-#define INTEL_MSIC_GPIO0HV0CTLI                0x078
-#define INTEL_MSIC_GPIO1HV3CTLI                0x079
-#define INTEL_MSIC_GPIO1HV2CTLI                0x07a
-#define INTEL_MSIC_GPIO1HV1CTLI                0x07b
-#define INTEL_MSIC_GPIO1HV0CTLI                0x07c
-
-/* SVID */
-#define INTEL_MSIC_SVIDCTRL0           0x080
-#define INTEL_MSIC_SVIDCTRL1           0x081
-#define INTEL_MSIC_SVIDCTRL2           0x082
-#define INTEL_MSIC_SVIDTXLASTPKT3      0x083   /* RO */
-#define INTEL_MSIC_SVIDTXLASTPKT2      0x084   /* RO */
-#define INTEL_MSIC_SVIDTXLASTPKT1      0x085   /* RO */
-#define INTEL_MSIC_SVIDTXLASTPKT0      0x086   /* RO */
-#define INTEL_MSIC_SVIDPKTOUTBYTE3     0x087
-#define INTEL_MSIC_SVIDPKTOUTBYTE2     0x088
-#define INTEL_MSIC_SVIDPKTOUTBYTE1     0x089
-#define INTEL_MSIC_SVIDPKTOUTBYTE0     0x08a
-#define INTEL_MSIC_SVIDRXVPDEBUG1      0x08b
-#define INTEL_MSIC_SVIDRXVPDEBUG0      0x08c
-#define INTEL_MSIC_SVIDRXLASTPKT3      0x08d   /* RO */
-#define INTEL_MSIC_SVIDRXLASTPKT2      0x08e   /* RO */
-#define INTEL_MSIC_SVIDRXLASTPKT1      0x08f   /* RO */
-#define INTEL_MSIC_SVIDRXLASTPKT0      0x090   /* RO */
-#define INTEL_MSIC_SVIDRXCHKSTATUS3    0x091   /* RO */
-#define INTEL_MSIC_SVIDRXCHKSTATUS2    0x092   /* RO */
-#define INTEL_MSIC_SVIDRXCHKSTATUS1    0x093   /* RO */
-#define INTEL_MSIC_SVIDRXCHKSTATUS0    0x094   /* RO */
-
-/* VREG */
-#define INTEL_MSIC_VCCLATCH            0x0c0
-#define INTEL_MSIC_VNNLATCH            0x0c1
-#define INTEL_MSIC_VCCCNT              0x0c2
-#define INTEL_MSIC_SMPSRAMP            0x0c3
-#define INTEL_MSIC_VNNCNT              0x0c4
-#define INTEL_MSIC_VNNAONCNT           0x0c5
-#define INTEL_MSIC_VCC122AONCNT                0x0c6
-#define INTEL_MSIC_V180AONCNT          0x0c7
-#define INTEL_MSIC_V500CNT             0x0c8
-#define INTEL_MSIC_VIHFCNT             0x0c9
-#define INTEL_MSIC_LDORAMP1            0x0ca
-#define INTEL_MSIC_LDORAMP2            0x0cb
-#define INTEL_MSIC_VCC108AONCNT                0x0cc
-#define INTEL_MSIC_VCC108ASCNT         0x0cd
-#define INTEL_MSIC_VCC108CNT           0x0ce
-#define INTEL_MSIC_VCCA100ASCNT                0x0cf
-#define INTEL_MSIC_VCCA100CNT          0x0d0
-#define INTEL_MSIC_VCC180AONCNT                0x0d1
-#define INTEL_MSIC_VCC180CNT           0x0d2
-#define INTEL_MSIC_VCC330CNT           0x0d3
-#define INTEL_MSIC_VUSB330CNT          0x0d4
-#define INTEL_MSIC_VCCSDIOCNT          0x0d5
-#define INTEL_MSIC_VPROG1CNT           0x0d6
-#define INTEL_MSIC_VPROG2CNT           0x0d7
-#define INTEL_MSIC_VEMMCSCNT           0x0d8
-#define INTEL_MSIC_VEMMC1CNT           0x0d9
-#define INTEL_MSIC_VEMMC2CNT           0x0da
-#define INTEL_MSIC_VAUDACNT            0x0db
-#define INTEL_MSIC_VHSPCNT             0x0dc
-#define INTEL_MSIC_VHSNCNT             0x0dd
-#define INTEL_MSIC_VHDMICNT            0x0de
-#define INTEL_MSIC_VOTGCNT             0x0df
-#define INTEL_MSIC_V1P35CNT            0x0e0
-#define INTEL_MSIC_V330AONCNT          0x0e1
-
-/* RESET */
-#define INTEL_MSIC_CHIPCNTRL           0x100   /* WO */
-#define INTEL_MSIC_ERCONFIG            0x101
-
-/* BURST */
-#define INTEL_MSIC_BATCURRENTLIMIT12   0x102
-#define INTEL_MSIC_BATTIMELIMIT12      0x103
-#define INTEL_MSIC_BATTIMELIMIT3       0x104
-#define INTEL_MSIC_BATTIMEDB           0x105
-#define INTEL_MSIC_BRSTCONFIGOUTPUTS   0x106
-#define INTEL_MSIC_BRSTCONFIGACTIONS   0x107
-#define INTEL_MSIC_BURSTCONTROLSTATUS  0x108
-
-/* RTC */
-#define INTEL_MSIC_RTCB1               0x140   /* RO */
-#define INTEL_MSIC_RTCB2               0x141   /* RO */
-#define INTEL_MSIC_RTCB3               0x142   /* RO */
-#define INTEL_MSIC_RTCB4               0x143   /* RO */
-#define INTEL_MSIC_RTCOB1              0x144
-#define INTEL_MSIC_RTCOB2              0x145
-#define INTEL_MSIC_RTCOB3              0x146
-#define INTEL_MSIC_RTCOB4              0x147
-#define INTEL_MSIC_RTCAB1              0x148
-#define INTEL_MSIC_RTCAB2              0x149
-#define INTEL_MSIC_RTCAB3              0x14a
-#define INTEL_MSIC_RTCAB4              0x14b
-#define INTEL_MSIC_RTCWAB1             0x14c
-#define INTEL_MSIC_RTCWAB2             0x14d
-#define INTEL_MSIC_RTCWAB3             0x14e
-#define INTEL_MSIC_RTCWAB4             0x14f
-#define INTEL_MSIC_RTCSC1              0x150
-#define INTEL_MSIC_RTCSC2              0x151
-#define INTEL_MSIC_RTCSC3              0x152
-#define INTEL_MSIC_RTCSC4              0x153
-#define INTEL_MSIC_RTCSTATUS           0x154   /* RO */
-#define INTEL_MSIC_RTCCONFIG1          0x155
-#define INTEL_MSIC_RTCCONFIG2          0x156
-
-/* CHARGER */
-#define INTEL_MSIC_BDTIMER             0x180
-#define INTEL_MSIC_BATTRMV             0x181
-#define INTEL_MSIC_VBUSDET             0x182
-#define INTEL_MSIC_VBUSDET1            0x183
-#define INTEL_MSIC_ADPHVDET            0x184
-#define INTEL_MSIC_ADPLVDET            0x185
-#define INTEL_MSIC_ADPDETDBDM          0x186
-#define INTEL_MSIC_LOWBATTDET          0x187
-#define INTEL_MSIC_CHRCTRL             0x188
-#define INTEL_MSIC_CHRCVOLTAGE         0x189
-#define INTEL_MSIC_CHRCCURRENT         0x18a
-#define INTEL_MSIC_SPCHARGER           0x18b
-#define INTEL_MSIC_CHRTTIME            0x18c
-#define INTEL_MSIC_CHRCTRL1            0x18d
-#define INTEL_MSIC_PWRSRCLMT           0x18e
-#define INTEL_MSIC_CHRSTWDT            0x18f
-#define INTEL_MSIC_WDTWRITE            0x190   /* WO */
-#define INTEL_MSIC_CHRSAFELMT          0x191
-#define INTEL_MSIC_SPWRSRCINT          0x192   /* RO */
-#define INTEL_MSIC_SPWRSRCINT1         0x193   /* RO */
-#define INTEL_MSIC_CHRLEDPWM           0x194
-#define INTEL_MSIC_CHRLEDCTRL          0x195
-
-/* ADC */
-#define INTEL_MSIC_ADC1CNTL1           0x1c0
-#define INTEL_MSIC_ADC1CNTL2           0x1c1
-#define INTEL_MSIC_ADC1CNTL3           0x1c2
-#define INTEL_MSIC_ADC1OFFSETH         0x1c3   /* RO */
-#define INTEL_MSIC_ADC1OFFSETL         0x1c4   /* RO */
-#define INTEL_MSIC_ADC1ADDR0           0x1c5
-#define INTEL_MSIC_ADC1ADDR1           0x1c6
-#define INTEL_MSIC_ADC1ADDR2           0x1c7
-#define INTEL_MSIC_ADC1ADDR3           0x1c8
-#define INTEL_MSIC_ADC1ADDR4           0x1c9
-#define INTEL_MSIC_ADC1ADDR5           0x1ca
-#define INTEL_MSIC_ADC1ADDR6           0x1cb
-#define INTEL_MSIC_ADC1ADDR7           0x1cc
-#define INTEL_MSIC_ADC1ADDR8           0x1cd
-#define INTEL_MSIC_ADC1ADDR9           0x1ce
-#define INTEL_MSIC_ADC1ADDR10          0x1cf
-#define INTEL_MSIC_ADC1ADDR11          0x1d0
-#define INTEL_MSIC_ADC1ADDR12          0x1d1
-#define INTEL_MSIC_ADC1ADDR13          0x1d2
-#define INTEL_MSIC_ADC1ADDR14          0x1d3
-#define INTEL_MSIC_ADC1SNS0H           0x1d4   /* RO */
-#define INTEL_MSIC_ADC1SNS0L           0x1d5   /* RO */
-#define INTEL_MSIC_ADC1SNS1H           0x1d6   /* RO */
-#define INTEL_MSIC_ADC1SNS1L           0x1d7   /* RO */
-#define INTEL_MSIC_ADC1SNS2H           0x1d8   /* RO */
-#define INTEL_MSIC_ADC1SNS2L           0x1d9   /* RO */
-#define INTEL_MSIC_ADC1SNS3H           0x1da   /* RO */
-#define INTEL_MSIC_ADC1SNS3L           0x1db   /* RO */
-#define INTEL_MSIC_ADC1SNS4H           0x1dc   /* RO */
-#define INTEL_MSIC_ADC1SNS4L           0x1dd   /* RO */
-#define INTEL_MSIC_ADC1SNS5H           0x1de   /* RO */
-#define INTEL_MSIC_ADC1SNS5L           0x1df   /* RO */
-#define INTEL_MSIC_ADC1SNS6H           0x1e0   /* RO */
-#define INTEL_MSIC_ADC1SNS6L           0x1e1   /* RO */
-#define INTEL_MSIC_ADC1SNS7H           0x1e2   /* RO */
-#define INTEL_MSIC_ADC1SNS7L           0x1e3   /* RO */
-#define INTEL_MSIC_ADC1SNS8H           0x1e4   /* RO */
-#define INTEL_MSIC_ADC1SNS8L           0x1e5   /* RO */
-#define INTEL_MSIC_ADC1SNS9H           0x1e6   /* RO */
-#define INTEL_MSIC_ADC1SNS9L           0x1e7   /* RO */
-#define INTEL_MSIC_ADC1SNS10H          0x1e8   /* RO */
-#define INTEL_MSIC_ADC1SNS10L          0x1e9   /* RO */
-#define INTEL_MSIC_ADC1SNS11H          0x1ea   /* RO */
-#define INTEL_MSIC_ADC1SNS11L          0x1eb   /* RO */
-#define INTEL_MSIC_ADC1SNS12H          0x1ec   /* RO */
-#define INTEL_MSIC_ADC1SNS12L          0x1ed   /* RO */
-#define INTEL_MSIC_ADC1SNS13H          0x1ee   /* RO */
-#define INTEL_MSIC_ADC1SNS13L          0x1ef   /* RO */
-#define INTEL_MSIC_ADC1SNS14H          0x1f0   /* RO */
-#define INTEL_MSIC_ADC1SNS14L          0x1f1   /* RO */
-#define INTEL_MSIC_ADC1BV0H            0x1f2   /* RO */
-#define INTEL_MSIC_ADC1BV0L            0x1f3   /* RO */
-#define INTEL_MSIC_ADC1BV1H            0x1f4   /* RO */
-#define INTEL_MSIC_ADC1BV1L            0x1f5   /* RO */
-#define INTEL_MSIC_ADC1BV2H            0x1f6   /* RO */
-#define INTEL_MSIC_ADC1BV2L            0x1f7   /* RO */
-#define INTEL_MSIC_ADC1BV3H            0x1f8   /* RO */
-#define INTEL_MSIC_ADC1BV3L            0x1f9   /* RO */
-#define INTEL_MSIC_ADC1BI0H            0x1fa   /* RO */
-#define INTEL_MSIC_ADC1BI0L            0x1fb   /* RO */
-#define INTEL_MSIC_ADC1BI1H            0x1fc   /* RO */
-#define INTEL_MSIC_ADC1BI1L            0x1fd   /* RO */
-#define INTEL_MSIC_ADC1BI2H            0x1fe   /* RO */
-#define INTEL_MSIC_ADC1BI2L            0x1ff   /* RO */
-#define INTEL_MSIC_ADC1BI3H            0x200   /* RO */
-#define INTEL_MSIC_ADC1BI3L            0x201   /* RO */
-#define INTEL_MSIC_CCCNTL              0x202
-#define INTEL_MSIC_CCOFFSETH           0x203   /* RO */
-#define INTEL_MSIC_CCOFFSETL           0x204   /* RO */
-#define INTEL_MSIC_CCADCHA             0x205   /* RO */
-#define INTEL_MSIC_CCADCLA             0x206   /* RO */
-
-/* AUDIO */
-#define INTEL_MSIC_AUDPLLCTRL          0x240
-#define INTEL_MSIC_DMICBUF0123         0x241
-#define INTEL_MSIC_DMICBUF45           0x242
-#define INTEL_MSIC_DMICGPO             0x244
-#define INTEL_MSIC_DMICMUX             0x245
-#define INTEL_MSIC_DMICCLK             0x246
-#define INTEL_MSIC_MICBIAS             0x247
-#define INTEL_MSIC_ADCCONFIG           0x248
-#define INTEL_MSIC_MICAMP1             0x249
-#define INTEL_MSIC_MICAMP2             0x24a
-#define INTEL_MSIC_NOISEMUX            0x24b
-#define INTEL_MSIC_AUDIOMUX12          0x24c
-#define INTEL_MSIC_AUDIOMUX34          0x24d
-#define INTEL_MSIC_AUDIOSINC           0x24e
-#define INTEL_MSIC_AUDIOTXEN           0x24f
-#define INTEL_MSIC_HSEPRXCTRL          0x250
-#define INTEL_MSIC_IHFRXCTRL           0x251
-#define INTEL_MSIC_VOICETXVOL          0x252
-#define INTEL_MSIC_SIDETONEVOL         0x253
-#define INTEL_MSIC_MUSICSHARVOL                0x254
-#define INTEL_MSIC_VOICETXCTRL         0x255
-#define INTEL_MSIC_HSMIXER             0x256
-#define INTEL_MSIC_DACCONFIG           0x257
-#define INTEL_MSIC_SOFTMUTE            0x258
-#define INTEL_MSIC_HSLVOLCTRL          0x259
-#define INTEL_MSIC_HSRVOLCTRL          0x25a
-#define INTEL_MSIC_IHFLVOLCTRL         0x25b
-#define INTEL_MSIC_IHFRVOLCTRL         0x25c
-#define INTEL_MSIC_DRIVEREN            0x25d
-#define INTEL_MSIC_LINEOUTCTRL         0x25e
-#define INTEL_MSIC_VIB1CTRL1           0x25f
-#define INTEL_MSIC_VIB1CTRL2           0x260
-#define INTEL_MSIC_VIB1CTRL3           0x261
-#define INTEL_MSIC_VIB1SPIPCM_1                0x262
-#define INTEL_MSIC_VIB1SPIPCM_2                0x263
-#define INTEL_MSIC_VIB1CTRL5           0x264
-#define INTEL_MSIC_VIB2CTRL1           0x265
-#define INTEL_MSIC_VIB2CTRL2           0x266
-#define INTEL_MSIC_VIB2CTRL3           0x267
-#define INTEL_MSIC_VIB2SPIPCM_1                0x268
-#define INTEL_MSIC_VIB2SPIPCM_2                0x269
-#define INTEL_MSIC_VIB2CTRL5           0x26a
-#define INTEL_MSIC_BTNCTRL1            0x26b
-#define INTEL_MSIC_BTNCTRL2            0x26c
-#define INTEL_MSIC_PCM1TXSLOT01                0x26d
-#define INTEL_MSIC_PCM1TXSLOT23                0x26e
-#define INTEL_MSIC_PCM1TXSLOT45                0x26f
-#define INTEL_MSIC_PCM1RXSLOT0123      0x270
-#define INTEL_MSIC_PCM1RXSLOT045       0x271
-#define INTEL_MSIC_PCM2TXSLOT01                0x272
-#define INTEL_MSIC_PCM2TXSLOT23                0x273
-#define INTEL_MSIC_PCM2TXSLOT45                0x274
-#define INTEL_MSIC_PCM2RXSLOT01                0x275
-#define INTEL_MSIC_PCM2RXSLOT23                0x276
-#define INTEL_MSIC_PCM2RXSLOT45                0x277
-#define INTEL_MSIC_PCM1CTRL1           0x278
-#define INTEL_MSIC_PCM1CTRL2           0x279
-#define INTEL_MSIC_PCM1CTRL3           0x27a
-#define INTEL_MSIC_PCM2CTRL1           0x27b
-#define INTEL_MSIC_PCM2CTRL2           0x27c
-
-/* HDMI */
-#define INTEL_MSIC_HDMIPUEN            0x280
-#define INTEL_MSIC_HDMISTATUS          0x281   /* RO */
-
-/* Physical address of the start of the MSIC interrupt tree in SRAM */
-#define INTEL_MSIC_IRQ_PHYS_BASE       0xffff7fc0
-
-/**
- * struct intel_msic_gpio_pdata - platform data for the MSIC GPIO driver
- * @gpio_base: base number for the GPIOs
- */
-struct intel_msic_gpio_pdata {
-       unsigned        gpio_base;
-};
-
-/**
- * struct intel_msic_ocd_pdata - platform data for the MSIC OCD driver
- * @gpio: GPIO number used for OCD interrupts
- *
- * The MSIC MFD driver converts @gpio into an IRQ number and passes it to
- * the OCD driver as %IORESOURCE_IRQ.
- */
-struct intel_msic_ocd_pdata {
-       unsigned        gpio;
-};
-
-/* MSIC embedded blocks (subdevices) */
-enum intel_msic_block {
-       INTEL_MSIC_BLOCK_TOUCH,
-       INTEL_MSIC_BLOCK_ADC,
-       INTEL_MSIC_BLOCK_BATTERY,
-       INTEL_MSIC_BLOCK_GPIO,
-       INTEL_MSIC_BLOCK_AUDIO,
-       INTEL_MSIC_BLOCK_HDMI,
-       INTEL_MSIC_BLOCK_THERMAL,
-       INTEL_MSIC_BLOCK_POWER_BTN,
-       INTEL_MSIC_BLOCK_OCD,
-
-       INTEL_MSIC_BLOCK_LAST,
-};
-
-/**
- * struct intel_msic_platform_data - platform data for the MSIC driver
- * @irq: array of interrupt numbers, one per device. If @irq is set to %0
- *      for a given block, the corresponding platform device is not
- *      created. For devices which don't have an interrupt, use %0xff
- *      (this is same as in SFI spec).
- * @gpio: platform data for the MSIC GPIO driver
- * @ocd: platform data for the MSIC OCD driver
- *
- * Once the MSIC driver is initialized, the register interface is ready to
- * use. All the platform devices for subdevices are created after the
- * register interface is ready so that we can guarantee its availability to
- * the subdevice drivers.
- *
- * Interrupt numbers are passed to the subdevices via %IORESOURCE_IRQ
- * resources of the created platform device.
- */
-struct intel_msic_platform_data {
-       int                             irq[INTEL_MSIC_BLOCK_LAST];
-       struct intel_msic_gpio_pdata    *gpio;
-       struct intel_msic_ocd_pdata     *ocd;
-};
-
-struct intel_msic;
-
-extern int intel_msic_reg_read(unsigned short reg, u8 *val);
-extern int intel_msic_reg_write(unsigned short reg, u8 val);
-extern int intel_msic_reg_update(unsigned short reg, u8 val, u8 mask);
-extern int intel_msic_bulk_read(unsigned short *reg, u8 *buf, size_t count);
-extern int intel_msic_bulk_write(unsigned short *reg, u8 *buf, size_t count);
-
-/*
- * pdev_to_intel_msic - gets an MSIC instance from the platform device
- * @pdev: platform device pointer
- *
- * The client drivers need to have pointer to the MSIC instance if they
- * want to call intel_msic_irq_read(). This macro can be used for
- * convenience to get the MSIC pointer from @pdev where needed. This is
- * _only_ valid for devices which are managed by the MSIC.
- */
-#define pdev_to_intel_msic(pdev)       (dev_get_drvdata(pdev->dev.parent))
-
-extern int intel_msic_irq_read(struct intel_msic *msic, unsigned short reg,
-                              u8 *val);
-
-#endif /* __LINUX_MFD_INTEL_MSIC_H__ */
index ece53a2..d26acc8 100644 (file)
@@ -279,7 +279,7 @@ struct mhi_controller_config {
        u32 num_channels;
        const struct mhi_channel_config *ch_cfg;
        u32 num_events;
-       const struct mhi_event_config *event_cfg;
+       struct mhi_event_config *event_cfg;
        bool use_bounce_buf;
        bool m2_no_db;
 };
@@ -347,12 +347,14 @@ struct mhi_controller_config {
  * @unmap_single: CB function to destroy TRE buffer
  * @read_reg: Read a MHI register via the physical link (required)
  * @write_reg: Write a MHI register via the physical link (required)
+ * @reset: Controller specific reset function (optional)
  * @buffer_len: Bounce buffer length
  * @index: Index of the MHI controller instance
  * @bounce_buf: Use of bounce buffer
  * @fbc_download: MHI host needs to do complete image transfer (optional)
  * @pre_init: MHI host needs to do pre-initialization before power up
  * @wake_set: Device wakeup set flag
+ * @irq_flags: irq flags passed to request_irq (optional)
  *
  * Fields marked as (required) need to be populated by the controller driver
  * before calling mhi_register_controller(). For the fields marked as (optional)
@@ -437,6 +439,7 @@ struct mhi_controller {
                        u32 *out);
        void (*write_reg)(struct mhi_controller *mhi_cntrl, void __iomem *addr,
                          u32 val);
+       void (*reset)(struct mhi_controller *mhi_cntrl);
 
        size_t buffer_len;
        int index;
@@ -444,6 +447,7 @@ struct mhi_controller {
        bool fbc_download;
        bool pre_init;
        bool wake_set;
+       unsigned long irq_flags;
 };
 
 /**
@@ -682,6 +686,13 @@ enum mhi_ee_type mhi_get_exec_env(struct mhi_controller *mhi_cntrl);
 enum mhi_state mhi_get_mhi_state(struct mhi_controller *mhi_cntrl);
 
 /**
+ * mhi_soc_reset - Trigger a device reset. This can be used as a last resort
+ *                to reset and recover a device.
+ * @mhi_cntrl: MHI controller
+ */
+void mhi_soc_reset(struct mhi_controller *mhi_cntrl);
+
+/**
  * mhi_device_get - Disable device low power mode
  * @mhi_dev: Device associated with the channel
  */
index 4594838..3a38963 100644 (file)
@@ -89,7 +89,7 @@ extern int PageMovable(struct page *page);
 extern void __SetPageMovable(struct page *page, struct address_space *mapping);
 extern void __ClearPageMovable(struct page *page);
 #else
-static inline int PageMovable(struct page *page) { return 0; };
+static inline int PageMovable(struct page *page) { return 0; }
 static inline void __SetPageMovable(struct page *page,
                                struct address_space *mapping)
 {
index d75ef8a..b7deb79 100644 (file)
@@ -547,4 +547,11 @@ static inline const char *mlx5_qp_state_str(int state)
        }
 }
 
+static inline int mlx5_get_qp_default_ts(struct mlx5_core_dev *dev)
+{
+       return !MLX5_CAP_ROCE(dev, qp_ts_format) ?
+                      MLX5_QPC_TIMESTAMP_FORMAT_FREE_RUNNING :
+                      MLX5_QPC_TIMESTAMP_FORMAT_DEFAULT;
+}
+
 #endif /* MLX5_QP_H */
index 7deb716..8ba4342 100644 (file)
@@ -1187,6 +1187,9 @@ static inline void get_page(struct page *page)
 }
 
 bool __must_check try_grab_page(struct page *page, unsigned int flags);
+__maybe_unused struct page *try_grab_compound_head(struct page *page, int refs,
+                                                  unsigned int flags);
+
 
 static inline __must_check bool try_get_page(struct page *page)
 {
@@ -1297,6 +1300,27 @@ static inline bool page_maybe_dma_pinned(struct page *page)
                GUP_PIN_COUNTING_BIAS;
 }
 
+static inline bool is_cow_mapping(vm_flags_t flags)
+{
+       return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
+}
+
+/*
+ * This should most likely only be called during fork() to see whether we
+ * should break the cow immediately for a page on the src mm.
+ */
+static inline bool page_needs_cow_for_dma(struct vm_area_struct *vma,
+                                         struct page *page)
+{
+       if (!is_cow_mapping(vma->vm_flags))
+               return false;
+
+       if (!atomic_read(&vma->vm_mm->has_pinned))
+               return false;
+
+       return page_maybe_dma_pinned(page);
+}
+
 #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP)
 #define SECTION_IN_PAGE_FLAGS
 #endif
@@ -1437,16 +1461,28 @@ static inline bool cpupid_match_pid(struct task_struct *task, int cpupid)
 
 #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
 
+/*
+ * KASAN per-page tags are stored xor'ed with 0xff. This allows to avoid
+ * setting tags for all pages to native kernel tag value 0xff, as the default
+ * value 0x00 maps to 0xff.
+ */
+
 static inline u8 page_kasan_tag(const struct page *page)
 {
-       if (kasan_enabled())
-               return (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
-       return 0xff;
+       u8 tag = 0xff;
+
+       if (kasan_enabled()) {
+               tag = (page->flags >> KASAN_TAG_PGSHIFT) & KASAN_TAG_MASK;
+               tag ^= 0xff;
+       }
+
+       return tag;
 }
 
 static inline void page_kasan_tag_set(struct page *page, u8 tag)
 {
        if (kasan_enabled()) {
+               tag ^= 0xff;
                page->flags &= ~(KASAN_TAG_MASK << KASAN_TAG_PGSHIFT);
                page->flags |= (tag & KASAN_TAG_MASK) << KASAN_TAG_PGSHIFT;
        }
@@ -2310,32 +2346,20 @@ extern void free_initmem(void);
 extern unsigned long free_reserved_area(void *start, void *end,
                                        int poison, const char *s);
 
-#ifdef CONFIG_HIGHMEM
-/*
- * Free a highmem page into the buddy system, adjusting totalhigh_pages
- * and totalram_pages.
- */
-extern void free_highmem_page(struct page *page);
-#endif
-
 extern void adjust_managed_page_count(struct page *page, long count);
 extern void mem_init_print_info(const char *str);
 
 extern void reserve_bootmem_region(phys_addr_t start, phys_addr_t end);
 
 /* Free the reserved page into the buddy system, so it gets managed. */
-static inline void __free_reserved_page(struct page *page)
+static inline void free_reserved_page(struct page *page)
 {
        ClearPageReserved(page);
        init_page_count(page);
        __free_page(page);
-}
-
-static inline void free_reserved_page(struct page *page)
-{
-       __free_reserved_page(page);
        adjust_managed_page_count(page, 1);
 }
+#define free_highmem_page(page) free_reserved_page(page)
 
 static inline void mark_page_reserved(struct page *page)
 {
@@ -2405,9 +2429,10 @@ extern int __meminit early_pfn_to_nid(unsigned long pfn);
 #endif
 
 extern void set_dma_reserve(unsigned long new_dma_reserve);
-extern void memmap_init_zone(unsigned long, int, unsigned long,
+extern void memmap_init_range(unsigned long, int, unsigned long,
                unsigned long, unsigned long, enum meminit_context,
                struct vmem_altmap *, int migratetype);
+extern void memmap_init_zone(struct zone *zone);
 extern void setup_per_zone_wmarks(void);
 extern int __meminit init_per_zone_wmark_min(void);
 extern void mem_init(void);
index 8fc71e9..355ea1e 100644 (file)
@@ -24,7 +24,7 @@ static inline int page_is_file_lru(struct page *page)
        return !PageSwapBacked(page);
 }
 
-static __always_inline void __update_lru_size(struct lruvec *lruvec,
+static __always_inline void update_lru_size(struct lruvec *lruvec,
                                enum lru_list lru, enum zone_type zid,
                                int nr_pages)
 {
@@ -33,76 +33,27 @@ static __always_inline void __update_lru_size(struct lruvec *lruvec,
        __mod_lruvec_state(lruvec, NR_LRU_BASE + lru, nr_pages);
        __mod_zone_page_state(&pgdat->node_zones[zid],
                                NR_ZONE_LRU_BASE + lru, nr_pages);
-}
-
-static __always_inline void update_lru_size(struct lruvec *lruvec,
-                               enum lru_list lru, enum zone_type zid,
-                               int nr_pages)
-{
-       __update_lru_size(lruvec, lru, zid, nr_pages);
 #ifdef CONFIG_MEMCG
        mem_cgroup_update_lru_size(lruvec, lru, zid, nr_pages);
 #endif
 }
 
-static __always_inline void add_page_to_lru_list(struct page *page,
-                               struct lruvec *lruvec, enum lru_list lru)
-{
-       update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
-       list_add(&page->lru, &lruvec->lists[lru]);
-}
-
-static __always_inline void add_page_to_lru_list_tail(struct page *page,
-                               struct lruvec *lruvec, enum lru_list lru)
-{
-       update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
-       list_add_tail(&page->lru, &lruvec->lists[lru]);
-}
-
-static __always_inline void del_page_from_lru_list(struct page *page,
-                               struct lruvec *lruvec, enum lru_list lru)
-{
-       list_del(&page->lru);
-       update_lru_size(lruvec, lru, page_zonenum(page), -thp_nr_pages(page));
-}
-
 /**
- * page_lru_base_type - which LRU list type should a page be on?
- * @page: the page to test
- *
- * Used for LRU list index arithmetic.
- *
- * Returns the base LRU type - file or anon - @page should be on.
+ * __clear_page_lru_flags - clear page lru flags before releasing a page
+ * @page: the page that was on lru and now has a zero reference
  */
-static inline enum lru_list page_lru_base_type(struct page *page)
+static __always_inline void __clear_page_lru_flags(struct page *page)
 {
-       if (page_is_file_lru(page))
-               return LRU_INACTIVE_FILE;
-       return LRU_INACTIVE_ANON;
-}
+       VM_BUG_ON_PAGE(!PageLRU(page), page);
 
-/**
- * page_off_lru - which LRU list was page on? clearing its lru flags.
- * @page: the page to test
- *
- * Returns the LRU list a page was on, as an index into the array of LRU
- * lists; and clears its Unevictable or Active flags, ready for freeing.
- */
-static __always_inline enum lru_list page_off_lru(struct page *page)
-{
-       enum lru_list lru;
+       __ClearPageLRU(page);
 
-       if (PageUnevictable(page)) {
-               __ClearPageUnevictable(page);
-               lru = LRU_UNEVICTABLE;
-       } else {
-               lru = page_lru_base_type(page);
-               if (PageActive(page)) {
-                       __ClearPageActive(page);
-                       lru += LRU_ACTIVE;
-               }
-       }
-       return lru;
+       /* this shouldn't happen, so leave the flags to bad_page() */
+       if (PageActive(page) && PageUnevictable(page))
+               return;
+
+       __ClearPageActive(page);
+       __ClearPageUnevictable(page);
 }
 
 /**
@@ -116,13 +67,41 @@ static __always_inline enum lru_list page_lru(struct page *page)
 {
        enum lru_list lru;
 
+       VM_BUG_ON_PAGE(PageActive(page) && PageUnevictable(page), page);
+
        if (PageUnevictable(page))
-               lru = LRU_UNEVICTABLE;
-       else {
-               lru = page_lru_base_type(page);
-               if (PageActive(page))
-                       lru += LRU_ACTIVE;
-       }
+               return LRU_UNEVICTABLE;
+
+       lru = page_is_file_lru(page) ? LRU_INACTIVE_FILE : LRU_INACTIVE_ANON;
+       if (PageActive(page))
+               lru += LRU_ACTIVE;
+
        return lru;
 }
+
+static __always_inline void add_page_to_lru_list(struct page *page,
+                               struct lruvec *lruvec)
+{
+       enum lru_list lru = page_lru(page);
+
+       update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
+       list_add(&page->lru, &lruvec->lists[lru]);
+}
+
+static __always_inline void add_page_to_lru_list_tail(struct page *page,
+                               struct lruvec *lruvec)
+{
+       enum lru_list lru = page_lru(page);
+
+       update_lru_size(lruvec, lru, page_zonenum(page), thp_nr_pages(page));
+       list_add_tail(&page->lru, &lruvec->lists[lru]);
+}
+
+static __always_inline void del_page_from_lru_list(struct page *page,
+                               struct lruvec *lruvec)
+{
+       list_del(&page->lru);
+       update_lru_size(lruvec, page_lru(page), page_zonenum(page),
+                       -thp_nr_pages(page));
+}
 #endif
index 0974ad5..6613b26 100644 (file)
@@ -23,6 +23,7 @@
 #endif
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
 
+#define INIT_PASID     0
 
 struct address_space;
 struct mem_cgroup;
index b820078..1a6a9eb 100644 (file)
@@ -169,11 +169,11 @@ struct mmu_notifier_ops {
         * the last refcount is dropped.
         *
         * If blockable argument is set to false then the callback cannot
-        * sleep and has to return with -EAGAIN. 0 should be returned
-        * otherwise. Please note that if invalidate_range_start approves
-        * a non-blocking behavior then the same applies to
-        * invalidate_range_end.
-        *
+        * sleep and has to return with -EAGAIN if sleeping would be required.
+        * 0 should be returned otherwise. Please note that notifiers that can
+        * fail invalidate_range_start are not allowed to implement
+        * invalidate_range_end, as there is no mechanism for informing the
+        * notifier that its start failed.
         */
        int (*invalidate_range_start)(struct mmu_notifier *subscription,
                                      const struct mmu_notifier_range *range);
index b593316..47946ce 100644 (file)
@@ -206,10 +206,30 @@ enum node_stat_item {
        NR_KERNEL_SCS_KB,       /* measured in KiB */
 #endif
        NR_PAGETABLE,           /* used for pagetables */
+#ifdef CONFIG_SWAP
+       NR_SWAPCACHE,
+#endif
        NR_VM_NODE_STAT_ITEMS
 };
 
 /*
+ * Returns true if the item should be printed in THPs (/proc/vmstat
+ * currently prints number of anon, file and shmem THPs. But the item
+ * is charged in pages).
+ */
+static __always_inline bool vmstat_item_print_in_thp(enum node_stat_item item)
+{
+       if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
+               return false;
+
+       return item == NR_ANON_THPS ||
+              item == NR_FILE_THPS ||
+              item == NR_SHMEM_THPS ||
+              item == NR_SHMEM_PMDMAPPED ||
+              item == NR_FILE_PMDMAPPED;
+}
+
+/*
  * Returns true if the value is measured in bytes (most vmstat values are
  * measured in pages). This defines the API part, the internal representation
  * might be different.
@@ -483,6 +503,9 @@ struct zone {
         * bootmem allocator):
         *      managed_pages = present_pages - reserved_pages;
         *
+        * cma pages is present pages that are assigned for CMA use
+        * (MIGRATE_CMA).
+        *
         * So present_pages may be used by memory hotplug or memory power
         * management logic to figure out unmanaged pages by checking
         * (present_pages - managed_pages). And managed_pages should be used
@@ -507,6 +530,9 @@ struct zone {
        atomic_long_t           managed_pages;
        unsigned long           spanned_pages;
        unsigned long           present_pages;
+#ifdef CONFIG_CMA
+       unsigned long           cma_pages;
+#endif
 
        const char              *name;
 
@@ -604,6 +630,15 @@ static inline unsigned long zone_managed_pages(struct zone *zone)
        return (unsigned long)atomic_long_read(&zone->managed_pages);
 }
 
+static inline unsigned long zone_cma_pages(struct zone *zone)
+{
+#ifdef CONFIG_CMA
+       return zone->cma_pages;
+#else
+       return 0;
+#endif
+}
+
 static inline unsigned long zone_end_pfn(const struct zone *zone)
 {
        return zone->zone_start_pfn + zone->spanned_pages;
@@ -872,8 +907,6 @@ static inline struct pglist_data *lruvec_pgdat(struct lruvec *lruvec)
 #endif
 }
 
-extern unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx);
-
 #ifdef CONFIG_HAVE_MEMORYLESS_NODES
 int local_memory_node(int node_id);
 #else
@@ -885,6 +918,18 @@ static inline int local_memory_node(int node_id) { return node_id; };
  */
 #define zone_idx(zone)         ((zone) - (zone)->zone_pgdat->node_zones)
 
+#ifdef CONFIG_ZONE_DEVICE
+static inline bool zone_is_zone_device(struct zone *zone)
+{
+       return zone_idx(zone) == ZONE_DEVICE;
+}
+#else
+static inline bool zone_is_zone_device(struct zone *zone)
+{
+       return false;
+}
+#endif
+
 /*
  * Returns true if a zone has pages managed by the buddy allocator.
  * All the reclaim decisions have to use this function rather than
@@ -1273,13 +1318,14 @@ extern size_t mem_section_usage_size(void);
  *      which results in PFN_SECTION_SHIFT equal 6.
  * To sum it up, at least 6 bits are available.
  */
-#define        SECTION_MARKED_PRESENT  (1UL<<0)
-#define SECTION_HAS_MEM_MAP    (1UL<<1)
-#define SECTION_IS_ONLINE      (1UL<<2)
-#define SECTION_IS_EARLY       (1UL<<3)
-#define SECTION_MAP_LAST_BIT   (1UL<<4)
-#define SECTION_MAP_MASK       (~(SECTION_MAP_LAST_BIT-1))
-#define SECTION_NID_SHIFT      3
+#define SECTION_MARKED_PRESENT         (1UL<<0)
+#define SECTION_HAS_MEM_MAP            (1UL<<1)
+#define SECTION_IS_ONLINE              (1UL<<2)
+#define SECTION_IS_EARLY               (1UL<<3)
+#define SECTION_TAINT_ZONE_DEVICE      (1UL<<4)
+#define SECTION_MAP_LAST_BIT           (1UL<<5)
+#define SECTION_MAP_MASK               (~(SECTION_MAP_LAST_BIT-1))
+#define SECTION_NID_SHIFT              3
 
 static inline struct page *__section_mem_map_addr(struct mem_section *section)
 {
@@ -1318,6 +1364,13 @@ static inline int online_section(struct mem_section *section)
        return (section && (section->section_mem_map & SECTION_IS_ONLINE));
 }
 
+static inline int online_device_section(struct mem_section *section)
+{
+       unsigned long flags = SECTION_IS_ONLINE | SECTION_TAINT_ZONE_DEVICE;
+
+       return section && ((section->section_mem_map & flags) == flags);
+}
+
 static inline int online_section_nr(unsigned long nr)
 {
        return online_section(__nr_to_section(nr));
index 9350609..7d45b5f 100644 (file)
@@ -864,4 +864,28 @@ struct ssam_device_id {
        kernel_ulong_t driver_data;
 };
 
+/*
+ * DFL (Device Feature List)
+ *
+ * DFL defines a linked list of feature headers within the device MMIO space to
+ * provide an extensible way of adding features. Software can walk through these
+ * predefined data structures to enumerate features. It is now used in the FPGA.
+ * See Documentation/fpga/dfl.rst for more information.
+ *
+ * The dfl bus type is introduced to match the individual feature devices (dfl
+ * devices) for specific dfl drivers.
+ */
+
+/**
+ * struct dfl_device_id -  dfl device identifier
+ * @type: DFL FIU type of the device. See enum dfl_id_type.
+ * @feature_id: feature identifier local to its DFL FIU type.
+ * @driver_data: driver specific data.
+ */
+struct dfl_device_id {
+       __u16 type;
+       __u16 feature_id;
+       kernel_ulong_t driver_data;
+};
+
 #endif /* LINUX_MOD_DEVICETABLE_H */
index 7a0bcb5..da4b6fb 100644 (file)
@@ -30,9 +30,6 @@
 #include <linux/percpu.h>
 #include <asm/module.h>
 
-/* Not Yet Implemented */
-#define MODULE_SUPPORTED_DEVICE(name)
-
 #define MODULE_NAME_LEN MAX_PARAM_PREFIX_LEN
 
 struct modversion_info {
@@ -392,18 +389,6 @@ struct module {
        const s32 *gpl_crcs;
        bool using_gplonly_symbols;
 
-#ifdef CONFIG_UNUSED_SYMBOLS
-       /* unused exported symbols. */
-       const struct kernel_symbol *unused_syms;
-       const s32 *unused_crcs;
-       unsigned int num_unused_syms;
-
-       /* GPL-only, unused exported symbols. */
-       unsigned int num_unused_gpl_syms;
-       const struct kernel_symbol *unused_gpl_syms;
-       const s32 *unused_gpl_crcs;
-#endif
-
 #ifdef CONFIG_MODULE_SIG
        /* Signature was verified. */
        bool sig_ok;
@@ -411,11 +396,6 @@ struct module {
 
        bool async_probe_requested;
 
-       /* symbols that will be GPL-only in the near future. */
-       const struct kernel_symbol *gpl_future_syms;
-       const s32 *gpl_future_crcs;
-       unsigned int num_gpl_future_syms;
-
        /* Exception table */
        unsigned int num_exentries;
        struct exception_table_entry *extable;
@@ -550,8 +530,6 @@ static inline unsigned long kallsyms_symbol_value(const Elf_Sym *sym)
 }
 #endif
 
-extern struct mutex module_mutex;
-
 /* FIXME: It'd be nice to isolate modules during init, too, so they
    aren't used before they (may) fail.  But presently too much code
    (IDE & SCSI) require entry into the module during init.*/
@@ -586,20 +564,9 @@ static inline bool within_module(unsigned long addr, const struct module *mod)
        return within_module_init(addr, mod) || within_module_core(addr, mod);
 }
 
-/* Search for module by name: must hold module_mutex. */
+/* Search for module by name: must be in a RCU-sched critical section. */
 struct module *find_module(const char *name);
 
-struct symsearch {
-       const struct kernel_symbol *start, *stop;
-       const s32 *crcs;
-       enum mod_license {
-               NOT_GPL_ONLY,
-               GPL_ONLY,
-               WILL_BE_GPL_ONLY,
-       } license;
-       bool unused;
-};
-
 /* Returns 0 and fills in value, defined and namebuf, or -ERANGE if
    symnum out of range. */
 int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
@@ -608,10 +575,6 @@ int module_get_kallsym(unsigned int symnum, unsigned long *value, char *type,
 /* Look for this name: can be of form module:name. */
 unsigned long module_kallsyms_lookup_name(const char *name);
 
-int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
-                                            struct module *, unsigned long),
-                                  void *data);
-
 extern void __noreturn __module_put_and_exit(struct module *mod,
                        long code);
 #define module_put_and_exit(code) __module_put_and_exit(THIS_MODULE, code)
@@ -795,14 +758,6 @@ static inline unsigned long module_kallsyms_lookup_name(const char *name)
        return 0;
 }
 
-static inline int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
-                                                          struct module *,
-                                                          unsigned long),
-                                                void *data)
-{
-       return 0;
-}
-
 static inline int register_module_notifier(struct notifier_block *nb)
 {
        /* no events will happen anyway, so this can always succeed */
@@ -891,4 +846,8 @@ static inline bool module_sig_ok(struct module *module)
 }
 #endif /* CONFIG_MODULE_SIG */
 
+int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
+                                            struct module *, unsigned long),
+                                  void *data);
+
 #endif /* _LINUX_MODULE_H */
index aaf343b..5d92a7e 100644 (file)
@@ -72,14 +72,20 @@ struct vfsmount {
        struct dentry *mnt_root;        /* root of the mounted tree */
        struct super_block *mnt_sb;     /* pointer to superblock */
        int mnt_flags;
+       struct user_namespace *mnt_userns;
 } __randomize_layout;
 
+static inline struct user_namespace *mnt_user_ns(const struct vfsmount *mnt)
+{
+       /* Pairs with smp_store_release() in do_idmap_mount(). */
+       return smp_load_acquire(&mnt->mnt_userns);
+}
+
 struct file; /* forward dec */
 struct path;
 
 extern int mnt_want_write(struct vfsmount *mnt);
 extern int mnt_want_write_file(struct file *file);
-extern int mnt_clone_write(struct vfsmount *mnt);
 extern void mnt_drop_write(struct vfsmount *mnt);
 extern void mnt_drop_write_file(struct file *file);
 extern void mntput(struct vfsmount *mnt);
index 0cd631a..515cff7 100644 (file)
@@ -185,7 +185,7 @@ extern void mutex_lock_io(struct mutex *lock);
 # define mutex_lock_interruptible_nested(lock, subclass) mutex_lock_interruptible(lock)
 # define mutex_lock_killable_nested(lock, subclass) mutex_lock_killable(lock)
 # define mutex_lock_nest_lock(lock, nest_lock) mutex_lock(lock)
-# define mutex_lock_io_nested(lock, subclass) mutex_lock(lock)
+# define mutex_lock_io_nested(lock, subclass) mutex_lock_io(lock)
 #endif
 
 /*
index 55c7359..cec526c 100644 (file)
@@ -26,7 +26,7 @@ struct nd_device_driver {
        struct device_driver drv;
        unsigned long type;
        int (*probe)(struct device *dev);
-       int (*remove)(struct device *dev);
+       void (*remove)(struct device *dev);
        void (*shutdown)(struct device *dev);
        void (*notify)(struct device *dev, enum nvdimm_event event);
 };
index 9e2324e..ba736b4 100644 (file)
@@ -42,8 +42,6 @@ struct net;
 #define SOCK_PASSCRED          3
 #define SOCK_PASSSEC           4
 
-#define PROTO_CMSG_DATA_ONLY   0x0001
-
 #ifndef ARCH_HAS_SOCKET_TYPES
 /**
  * enum sock_type - Socket types
@@ -138,7 +136,6 @@ typedef int (*sk_read_actor_t)(read_descriptor_t *, struct sk_buff *,
 
 struct proto_ops {
        int             family;
-       unsigned int    flags;
        struct module   *owner;
        int             (*release)   (struct socket *sock);
        int             (*bind)      (struct socket *sock,
index ddf4cfc..87a5d18 100644 (file)
@@ -360,6 +360,7 @@ enum {
        NAPI_STATE_IN_BUSY_POLL,        /* sk_busy_loop() owns this NAPI */
        NAPI_STATE_PREFER_BUSY_POLL,    /* prefer busy-polling over softirq processing*/
        NAPI_STATE_THREADED,            /* The poll is performed inside its own thread*/
+       NAPI_STATE_SCHED_THREADED,      /* Napi is currently scheduled in threaded mode */
 };
 
 enum {
@@ -372,6 +373,7 @@ enum {
        NAPIF_STATE_IN_BUSY_POLL        = BIT(NAPI_STATE_IN_BUSY_POLL),
        NAPIF_STATE_PREFER_BUSY_POLL    = BIT(NAPI_STATE_PREFER_BUSY_POLL),
        NAPIF_STATE_THREADED            = BIT(NAPI_STATE_THREADED),
+       NAPIF_STATE_SCHED_THREADED      = BIT(NAPI_STATE_SCHED_THREADED),
 };
 
 enum gro_result {
@@ -1584,6 +1586,12 @@ enum netdev_priv_flags {
 #define IFF_L3MDEV_RX_HANDLER          IFF_L3MDEV_RX_HANDLER
 #define IFF_LIVE_RENAME_OK             IFF_LIVE_RENAME_OK
 
+/* Specifies the type of the struct net_device::ml_priv pointer */
+enum netdev_ml_priv_type {
+       ML_PRIV_NONE,
+       ML_PRIV_CAN,
+};
+
 /**
  *     struct net_device - The DEVICE structure.
  *
@@ -1779,6 +1787,7 @@ enum netdev_priv_flags {
  *     @nd_net:                Network namespace this network device is inside
  *
  *     @ml_priv:       Mid-layer private
+ *     @ml_priv_type:  Mid-layer private type
  *     @lstats:        Loopback statistics
  *     @tstats:        Tunnel statistics
  *     @dstats:        Dummy statistics
@@ -2094,8 +2103,10 @@ struct net_device {
        possible_net_t                  nd_net;
 
        /* mid-layer private */
+       void                            *ml_priv;
+       enum netdev_ml_priv_type        ml_priv_type;
+
        union {
-               void                                    *ml_priv;
                struct pcpu_lstats __percpu             *lstats;
                struct pcpu_sw_netstats __percpu        *tstats;
                struct pcpu_dstats __percpu             *dstats;
@@ -2286,6 +2297,29 @@ static inline void netdev_reset_rx_headroom(struct net_device *dev)
        netdev_set_rx_headroom(dev, -1);
 }
 
+static inline void *netdev_get_ml_priv(struct net_device *dev,
+                                      enum netdev_ml_priv_type type)
+{
+       if (dev->ml_priv_type != type)
+               return NULL;
+
+       return dev->ml_priv;
+}
+
+static inline void netdev_set_ml_priv(struct net_device *dev,
+                                     void *ml_priv,
+                                     enum netdev_ml_priv_type type)
+{
+       WARN(dev->ml_priv_type && dev->ml_priv_type != type,
+            "Overwriting already set ml_priv_type (%u) with different ml_priv_type (%u)!\n",
+            dev->ml_priv_type, type);
+       WARN(!dev->ml_priv_type && dev->ml_priv,
+            "Overwriting already set ml_priv and ml_priv_type is ML_PRIV_NONE!\n");
+
+       dev->ml_priv = ml_priv;
+       dev->ml_priv_type = type;
+}
+
 /*
  * Net namespace inlines
  */
@@ -3927,8 +3961,6 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 int bpf_xdp_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode);
 
-int xdp_umem_query(struct net_device *dev, u16 queue_id);
-
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb_nomtu(struct net_device *dev, struct sk_buff *skb);
index 8ebb641..8ec4846 100644 (file)
@@ -227,7 +227,7 @@ struct xt_table {
        unsigned int valid_hooks;
 
        /* Man behind the curtain... */
-       struct xt_table_info __rcu *private;
+       struct xt_table_info *private;
 
        /* Set this to THIS_MODULE if you are a module, otherwise NULL */
        struct module *me;
@@ -376,7 +376,7 @@ static inline unsigned int xt_write_recseq_begin(void)
         * since addend is most likely 1
         */
        __this_cpu_add(xt_recseq.sequence, addend);
-       smp_wmb();
+       smp_mb();
 
        return addend;
 }
@@ -448,9 +448,6 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu)
 
 struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *);
 
-struct xt_table_info
-*xt_table_get_private_protected(const struct xt_table *table);
-
 #ifdef CONFIG_COMPAT
 #include <net/compat.h>
 
index 681ed98..eadaabd 100644 (file)
@@ -379,18 +379,20 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *);
 extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr);
 extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr);
-extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int);
+extern int nfs_getattr(struct user_namespace *, const struct path *,
+                      struct kstat *, u32, unsigned int);
 extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *);
 extern void nfs_access_set_mask(struct nfs_access_entry *, u32);
-extern int nfs_permission(struct inode *, int);
+extern int nfs_permission(struct user_namespace *, struct inode *, int);
 extern int nfs_open(struct inode *, struct file *);
 extern int nfs_attribute_cache_expired(struct inode *inode);
 extern int nfs_revalidate_inode(struct nfs_server *server, struct inode *inode);
 extern int __nfs_revalidate_inode(struct nfs_server *, struct inode *);
+extern int nfs_clear_invalid_mapping(struct address_space *mapping);
 extern bool nfs_mapping_need_revalidate_inode(struct inode *inode);
 extern int nfs_revalidate_mapping(struct inode *inode, struct address_space *mapping);
 extern int nfs_revalidate_mapping_rcu(struct inode *inode);
-extern int nfs_setattr(struct dentry *, struct iattr *);
+extern int nfs_setattr(struct user_namespace *, struct dentry *, struct iattr *);
 extern void nfs_setattr_update_inode(struct inode *inode, struct iattr *attr, struct nfs_fattr *);
 extern void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr,
                                struct nfs4_label *label);
@@ -570,8 +572,6 @@ nfs_have_writebacks(struct inode *inode)
 extern int  nfs_readpage(struct file *, struct page *);
 extern int  nfs_readpages(struct file *, struct address_space *,
                struct list_head *, unsigned);
-extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
-                              struct page *);
 
 /*
  * inline functions
index 38e60ec..6f76b32 100644 (file)
@@ -142,7 +142,7 @@ struct nfs_server {
        struct nlm_host         *nlm_host;      /* NLM client handle */
        struct nfs_iostats __percpu *io_stats;  /* I/O statistics */
        atomic_long_t           writeback;      /* number of writeback pages */
-       int                     flags;          /* various flags */
+       unsigned int            flags;          /* various flags */
 
 /* The following are for internal use only. Also see uapi/linux/nfs_mount.h */
 #define NFS_MOUNT_LOOKUP_CACHE_NONEG   0x10000
@@ -153,6 +153,8 @@ struct nfs_server {
 #define NFS_MOUNT_LOCAL_FCNTL          0x200000
 #define NFS_MOUNT_SOFTERR              0x400000
 #define NFS_MOUNT_SOFTREVAL            0x800000
+#define NFS_MOUNT_WRITE_EAGER          0x01000000
+#define NFS_MOUNT_WRITE_WAIT           0x02000000
 
        unsigned int            caps;           /* server capabilities */
        unsigned int            rsize;          /* read size */
index 577f514..7e72d97 100644 (file)
@@ -29,11 +29,14 @@ struct unwind_hint {
  *
  * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
  * sp_reg+sp_offset points to the iret return frame.
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
  */
 #define UNWIND_HINT_TYPE_CALL          0
 #define UNWIND_HINT_TYPE_REGS          1
 #define UNWIND_HINT_TYPE_REGS_PARTIAL  2
-#define UNWIND_HINT_TYPE_RET_OFFSET    3
+#define UNWIND_HINT_TYPE_FUNC          3
 
 #ifdef CONFIG_STACK_VALIDATION
 
@@ -109,6 +112,12 @@ struct unwind_hint {
        .popsection
 .endm
 
+.macro STACK_FRAME_NON_STANDARD func:req
+       .pushsection .discard.func_stack_frame_non_standard, "aw"
+               .long \func - .
+       .popsection
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #else /* !CONFIG_STACK_VALIDATION */
@@ -122,6 +131,8 @@ struct unwind_hint {
 #define ANNOTATE_INTRA_FUNCTION_CALL
 .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
 .endm
+.macro STACK_FRAME_NON_STANDARD func:req
+.endm
 #endif
 
 #endif /* CONFIG_STACK_VALIDATION */
index e8b7813..aaf219b 100644 (file)
@@ -33,8 +33,6 @@ static inline int of_irq_parse_oldworld(struct device_node *device, int index,
 #endif /* CONFIG_PPC32 && CONFIG_PPC_PMAC */
 
 extern int of_irq_parse_raw(const __be32 *addr, struct of_phandle_args *out_irq);
-extern int of_irq_parse_one(struct device_node *device, int index,
-                         struct of_phandle_args *out_irq);
 extern unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data);
 extern int of_irq_to_resource(struct device_node *dev, int index,
                              struct resource *r);
@@ -42,6 +40,8 @@ extern int of_irq_to_resource(struct device_node *dev, int index,
 extern void of_irq_init(const struct of_device_id *matches);
 
 #ifdef CONFIG_OF_IRQ
+extern int of_irq_parse_one(struct device_node *device, int index,
+                         struct of_phandle_args *out_irq);
 extern int of_irq_count(struct device_node *dev);
 extern int of_irq_get(struct device_node *dev, int index);
 extern int of_irq_get_byname(struct device_node *dev, const char *name);
@@ -57,6 +57,11 @@ extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev,
 extern void of_msi_configure(struct device *dev, struct device_node *np);
 u32 of_msi_map_id(struct device *dev, struct device_node *msi_np, u32 id_in);
 #else
+static inline int of_irq_parse_one(struct device_node *device, int index,
+                                  struct of_phandle_args *out_irq)
+{
+       return -EINVAL;
+}
 static inline int of_irq_count(struct device_node *dev)
 {
        return 0;
index ec5d029..04a34c0 100644 (file)
@@ -592,15 +592,9 @@ static inline void ClearPageCompound(struct page *page)
 #ifdef CONFIG_HUGETLB_PAGE
 int PageHuge(struct page *page);
 int PageHeadHuge(struct page *page);
-bool page_huge_active(struct page *page);
 #else
 TESTPAGEFLAG_FALSE(Huge)
 TESTPAGEFLAG_FALSE(HeadHuge)
-
-static inline bool page_huge_active(struct page *page)
-{
-       return 0;
-}
 #endif
 
 
@@ -816,7 +810,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
 
 /*
  * Flags checked when a page is freed.  Pages being freed should not have
- * these flags set.  It they are, there is a problem.
+ * these flags set.  If they are, there is a problem.
  */
 #define PAGE_FLAGS_CHECK_AT_FREE                               \
        (1UL << PG_lru          | 1UL << PG_locked      |       \
@@ -827,7 +821,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
 
 /*
  * Flags checked when a page is prepped for return by the page allocator.
- * Pages being prepped should not have these flags set.  It they are set,
+ * Pages being prepped should not have these flags set.  If they are set,
  * there has been a kernel bug or struct page corruption.
  *
  * __PG_HWPOISON is exceptional because it needs to be kept beyond page's
index 85bd413..6795913 100644 (file)
@@ -12,7 +12,6 @@ struct page_counter {
        unsigned long low;
        unsigned long high;
        unsigned long max;
-       struct page_counter *parent;
 
        /* effective memory.min and memory.min usage tracking */
        unsigned long emin;
@@ -27,6 +26,14 @@ struct page_counter {
        /* legacy */
        unsigned long watermark;
        unsigned long failcnt;
+
+       /*
+        * 'parent' is placed here to be far from 'usage' to reduce
+        * cache false sharing, as 'usage' is written mostly while
+        * parent is frequently read for cgroup's hierarchical
+        * counting nature.
+        */
+       struct page_counter *parent;
 };
 
 #if BITS_PER_LONG == 32
index d5570de..8c9947f 100644 (file)
@@ -315,6 +315,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 #define FGP_NOWAIT             0x00000020
 #define FGP_FOR_MMAP           0x00000040
 #define FGP_HEAD               0x00000080
+#define FGP_ENTRY              0x00000100
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
                int fgp_flags, gfp_t cache_gfp_mask);
@@ -450,8 +451,7 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index)
 }
 
 unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
-                         unsigned int nr_entries, struct page **entries,
-                         pgoff_t *indices);
+               pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
 unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
                        pgoff_t end, unsigned int nr_pages,
                        struct page **pages);
@@ -559,7 +559,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma,
        return pgoff;
 }
 
-/* This has the same layout as wait_bit_key - see fs/cachefiles/rdwr.c */
 struct wait_page_key {
        struct page *page;
        int bit_nr;
@@ -681,9 +680,9 @@ static inline int wait_on_page_locked_killable(struct page *page)
        return wait_on_page_bit_killable(compound_head(page), PG_locked);
 }
 
-extern void put_and_wait_on_page_locked(struct page *page);
-
+int put_and_wait_on_page_locked(struct page *page, int state);
 void wait_on_page_writeback(struct page *page);
+int wait_on_page_writeback_killable(struct page *page);
 extern void end_page_writeback(struct page *page);
 void wait_for_stable_page(struct page *page);
 
@@ -757,9 +756,11 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping,
                                pgoff_t index, gfp_t gfp_mask);
 extern void delete_from_page_cache(struct page *page);
 extern void __delete_from_page_cache(struct page *page, void *shadow);
-int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask);
+void replace_page_cache_page(struct page *old, struct page *new);
 void delete_from_page_cache_batch(struct address_space *mapping,
                                  struct pagevec *pvec);
+loff_t mapping_seek_hole_data(struct address_space *, loff_t start, loff_t end,
+               int whence);
 
 /*
  * Like add_to_page_cache_locked, but used to add newly allocated pages:
index ad4ddc1..7f3f190 100644 (file)
@@ -25,10 +25,6 @@ struct pagevec {
 
 void __pagevec_release(struct pagevec *pvec);
 void __pagevec_lru_add(struct pagevec *pvec);
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
-                               struct address_space *mapping,
-                               pgoff_t start, unsigned nr_entries,
-                               pgoff_t *indices);
 void pagevec_remove_exceptionals(struct pagevec *pvec);
 unsigned pagevec_lookup_range(struct pagevec *pvec,
                              struct address_space *mapping,
index cc66bec..b82c9b1 100644 (file)
 
 struct pci_epc;
 
+enum pci_epc_interface_type {
+       UNKNOWN_INTERFACE = -1,
+       PRIMARY_INTERFACE,
+       SECONDARY_INTERFACE,
+};
+
 enum pci_epc_irq_type {
        PCI_EPC_IRQ_UNKNOWN,
        PCI_EPC_IRQ_LEGACY,
@@ -20,6 +26,19 @@ enum pci_epc_irq_type {
        PCI_EPC_IRQ_MSIX,
 };
 
+static inline const char *
+pci_epc_interface_string(enum pci_epc_interface_type type)
+{
+       switch (type) {
+       case PRIMARY_INTERFACE:
+               return "primary";
+       case SECONDARY_INTERFACE:
+               return "secondary";
+       default:
+               return "UNKNOWN interface";
+       }
+}
+
 /**
  * struct pci_epc_ops - set of function pointers for performing EPC operations
  * @write_header: ops to populate configuration space header
@@ -36,6 +55,7 @@ enum pci_epc_irq_type {
  * @get_msix: ops to get the number of MSI-X interrupts allocated by the RC
  *          from the MSI-X capability register
  * @raise_irq: ops to raise a legacy, MSI or MSI-X interrupt
+ * @map_msi_irq: ops to map physical address to MSI address and return MSI data
  * @start: ops to start the PCI link
  * @stop: ops to stop the PCI link
  * @owner: the module owner containing the ops
@@ -58,6 +78,10 @@ struct pci_epc_ops {
        int     (*get_msix)(struct pci_epc *epc, u8 func_no);
        int     (*raise_irq)(struct pci_epc *epc, u8 func_no,
                             enum pci_epc_irq_type type, u16 interrupt_num);
+       int     (*map_msi_irq)(struct pci_epc *epc, u8 func_no,
+                              phys_addr_t phys_addr, u8 interrupt_num,
+                              u32 entry_size, u32 *msi_data,
+                              u32 *msi_addr_offset);
        int     (*start)(struct pci_epc *epc);
        void    (*stop)(struct pci_epc *epc);
        const struct pci_epc_features* (*get_features)(struct pci_epc *epc,
@@ -175,10 +199,12 @@ __pci_epc_create(struct device *dev, const struct pci_epc_ops *ops,
                 struct module *owner);
 void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc);
 void pci_epc_destroy(struct pci_epc *epc);
-int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf);
+int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf,
+                   enum pci_epc_interface_type type);
 void pci_epc_linkup(struct pci_epc *epc);
 void pci_epc_init_notify(struct pci_epc *epc);
-void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf);
+void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf,
+                       enum pci_epc_interface_type type);
 int pci_epc_write_header(struct pci_epc *epc, u8 func_no,
                         struct pci_epf_header *hdr);
 int pci_epc_set_bar(struct pci_epc *epc, u8 func_no,
@@ -195,14 +221,19 @@ int pci_epc_get_msi(struct pci_epc *epc, u8 func_no);
 int pci_epc_set_msix(struct pci_epc *epc, u8 func_no, u16 interrupts,
                     enum pci_barno, u32 offset);
 int pci_epc_get_msix(struct pci_epc *epc, u8 func_no);
+int pci_epc_map_msi_irq(struct pci_epc *epc, u8 func_no,
+                       phys_addr_t phys_addr, u8 interrupt_num,
+                       u32 entry_size, u32 *msi_data, u32 *msi_addr_offset);
 int pci_epc_raise_irq(struct pci_epc *epc, u8 func_no,
                      enum pci_epc_irq_type type, u16 interrupt_num);
 int pci_epc_start(struct pci_epc *epc);
 void pci_epc_stop(struct pci_epc *epc);
 const struct pci_epc_features *pci_epc_get_features(struct pci_epc *epc,
                                                    u8 func_no);
-unsigned int pci_epc_get_first_free_bar(const struct pci_epc_features
-                                       *epc_features);
+enum pci_barno
+pci_epc_get_first_free_bar(const struct pci_epc_features *epc_features);
+enum pci_barno pci_epc_get_next_free_bar(const struct pci_epc_features
+                                        *epc_features, enum pci_barno bar);
 struct pci_epc *pci_epc_get(const char *epc_name);
 void pci_epc_put(struct pci_epc *epc);
 
index 6644ff3..6833e21 100644 (file)
@@ -9,11 +9,13 @@
 #ifndef __LINUX_PCI_EPF_H
 #define __LINUX_PCI_EPF_H
 
+#include <linux/configfs.h>
 #include <linux/device.h>
 #include <linux/mod_devicetable.h>
 #include <linux/pci.h>
 
 struct pci_epf;
+enum pci_epc_interface_type;
 
 enum pci_notify_event {
        CORE_INIT,
@@ -21,6 +23,7 @@ enum pci_notify_event {
 };
 
 enum pci_barno {
+       NO_BAR = -1,
        BAR_0,
        BAR_1,
        BAR_2,
@@ -60,10 +63,13 @@ struct pci_epf_header {
  * @bind: ops to perform when a EPC device has been bound to EPF device
  * @unbind: ops to perform when a binding has been lost between a EPC device
  *         and EPF device
+ * @add_cfs: ops to initialize function specific configfs attributes
  */
 struct pci_epf_ops {
        int     (*bind)(struct pci_epf *epf);
        void    (*unbind)(struct pci_epf *epf);
+       struct config_group *(*add_cfs)(struct pci_epf *epf,
+                                       struct config_group *group);
 };
 
 /**
@@ -118,6 +124,12 @@ struct pci_epf_bar {
  * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc
  * @nb: notifier block to notify EPF of any EPC events (like linkup)
  * @lock: mutex to protect pci_epf_ops
+ * @sec_epc: the secondary EPC device to which this EPF device is bound
+ * @sec_epc_list: to add pci_epf as list of PCI endpoint functions to secondary
+ *   EPC device
+ * @sec_epc_bar: represents the BAR of EPF device associated with secondary EPC
+ * @sec_epc_func_no: unique (physical) function number within the secondary EPC
+ * @group: configfs group associated with the EPF device
  */
 struct pci_epf {
        struct device           dev;
@@ -134,6 +146,13 @@ struct pci_epf {
        struct notifier_block   nb;
        /* mutex to protect against concurrent access of pci_epf_ops */
        struct mutex            lock;
+
+       /* Below members are to attach secondary EPC to an endpoint function */
+       struct pci_epc          *sec_epc;
+       struct list_head        sec_epc_list;
+       struct pci_epf_bar      sec_epc_bar[6];
+       u8                      sec_epc_func_no;
+       struct config_group     *group;
 };
 
 /**
@@ -164,16 +183,17 @@ static inline void *epf_get_drvdata(struct pci_epf *epf)
        return dev_get_drvdata(&epf->dev);
 }
 
-const struct pci_epf_device_id *
-pci_epf_match_device(const struct pci_epf_device_id *id, struct pci_epf *epf);
 struct pci_epf *pci_epf_create(const char *name);
 void pci_epf_destroy(struct pci_epf *epf);
 int __pci_epf_register_driver(struct pci_epf_driver *driver,
                              struct module *owner);
 void pci_epf_unregister_driver(struct pci_epf_driver *driver);
 void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar,
-                         size_t align);
-void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar);
+                         size_t align, enum pci_epc_interface_type type);
+void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar,
+                       enum pci_epc_interface_type type);
 int pci_epf_bind(struct pci_epf *epf);
 void pci_epf_unbind(struct pci_epf *epf);
+struct config_group *pci_epf_type_add_cfs(struct pci_epf *epf,
+                                         struct config_group *group);
 #endif /* __LINUX_PCI_EPF_H */
index 53f4904..86c799c 100644 (file)
@@ -1926,7 +1926,7 @@ enum pci_fixup_pass {
 };
 
 #ifdef CONFIG_HAVE_ARCH_PREL32_RELOCATIONS
-#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+#define ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class, \
                                    class_shift, hook)                  \
        __ADDRESSABLE(hook)                                             \
        asm(".section " #sec ", \"a\"                           \n"     \
@@ -1935,10 +1935,33 @@ enum pci_fixup_pass {
            ".long "    #class ", " #class_shift "              \n"     \
            ".long "    #hook " - .                             \n"     \
            ".previous                                          \n");
+
+/*
+ * Clang's LTO may rename static functions in C, but has no way to
+ * handle such renamings when referenced from inline asm. To work
+ * around this, create global C stubs for these cases.
+ */
+#ifdef CONFIG_LTO_CLANG
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+                                 class_shift, hook, stub)              \
+       void stub(struct pci_dev *dev);                                 \
+       void stub(struct pci_dev *dev)                                  \
+       {                                                               \
+               hook(dev);                                              \
+       }                                                               \
+       ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+                                 class_shift, stub)
+#else
+#define __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+                                 class_shift, hook, stub)              \
+       ___DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,  \
+                                 class_shift, hook)
+#endif
+
 #define DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,    \
                                  class_shift, hook)                    \
        __DECLARE_PCI_FIXUP_SECTION(sec, name, vendor, device, class,   \
-                                 class_shift, hook)
+                                 class_shift, hook, __UNIQUE_ID(hook))
 #else
 /* Anonymous variables would be nice... */
 #define DECLARE_PCI_FIXUP_SECTION(section, name, vendor, device, class,        \
index d8156a5..a76ccb6 100644 (file)
@@ -51,6 +51,7 @@
 #define PCI_BASE_CLASS_MEMORY          0x05
 #define PCI_CLASS_MEMORY_RAM           0x0500
 #define PCI_CLASS_MEMORY_FLASH         0x0501
+#define PCI_CLASS_MEMORY_CXL           0x0502
 #define PCI_CLASS_MEMORY_OTHER         0x0580
 
 #define PCI_BASE_CLASS_BRIDGE          0x06
 #define PCI_DEVICE_ID_TI_X620          0xac8d
 #define PCI_DEVICE_ID_TI_X420          0xac8e
 #define PCI_DEVICE_ID_TI_XX20_FM       0xac8f
+#define PCI_DEVICE_ID_TI_J721E         0xb00d
 #define PCI_DEVICE_ID_TI_DRA74x                0xb500
 #define PCI_DEVICE_ID_TI_DRA72x                0xb501
 
 
 #define PCI_VENDOR_ID_REDHAT           0x1b36
 
+#define PCI_VENDOR_ID_SILICOM_DENMARK  0x1c2c
+
 #define PCI_VENDOR_ID_AMAZON_ANNAPURNA_LABS    0x1c36
 
 #define PCI_VENDOR_ID_CIRCUITCO                0x1cc8
index fab42cf..3f7f89e 100644 (file)
@@ -606,6 +606,7 @@ struct swevent_hlist {
 #define PERF_ATTACH_TASK       0x04
 #define PERF_ATTACH_TASK_DATA  0x08
 #define PERF_ATTACH_ITRACE     0x10
+#define PERF_ATTACH_SCHED_CB   0x20
 
 struct perf_cgroup;
 struct perf_buffer;
@@ -872,6 +873,7 @@ struct perf_cpu_context {
        struct list_head                cgrp_cpuctx_entry;
 #endif
 
+       struct list_head                sched_cb_entry;
        int                             sched_cb_usage;
 
        int                             online;
index 36eb748..5e77239 100644 (file)
@@ -432,14 +432,6 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, unsigned long addres
  * To be differentiate with macro pte_mkyoung, this macro is used on platforms
  * where software maintains page access bit.
  */
-#ifndef pte_sw_mkyoung
-static inline pte_t pte_sw_mkyoung(pte_t pte)
-{
-       return pte;
-}
-#define pte_sw_mkyoung pte_sw_mkyoung
-#endif
-
 #ifndef pte_savedwrite
 #define pte_savedwrite pte_write
 #endif
@@ -912,6 +904,10 @@ static inline void ptep_modify_prot_commit(struct vm_area_struct *vma,
 #define pgprot_device pgprot_noncached
 #endif
 
+#ifndef pgprot_mhp
+#define pgprot_mhp(prot)       (prot)
+#endif
+
 #ifdef CONFIG_MMU
 #ifndef pgprot_modify
 #define pgprot_modify pgprot_modify
diff --git a/include/linux/platform_data/dma-atmel.h b/include/linux/platform_data/dma-atmel.h
deleted file mode 100644 (file)
index 069637e..0000000
+++ /dev/null
@@ -1,61 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Header file for the Atmel AHB DMA Controller driver
- *
- * Copyright (C) 2008 Atmel Corporation
- */
-#ifndef AT_HDMAC_H
-#define AT_HDMAC_H
-
-#include <linux/dmaengine.h>
-
-/**
- * struct at_dma_platform_data - Controller configuration parameters
- * @nr_channels: Number of channels supported by hardware (max 8)
- * @cap_mask: dma_capability flags supported by the platform
- */
-struct at_dma_platform_data {
-       unsigned int    nr_channels;
-       dma_cap_mask_t  cap_mask;
-};
-
-/**
- * struct at_dma_slave - Controller-specific information about a slave
- * @dma_dev: required DMA master device
- * @cfg: Platform-specific initializer for the CFG register
- */
-struct at_dma_slave {
-       struct device           *dma_dev;
-       u32                     cfg;
-};
-
-
-/* Platform-configurable bits in CFG */
-#define ATC_PER_MSB(h) ((0x30U & (h)) >> 4)    /* Extract most significant bits of a handshaking identifier */
-
-#define        ATC_SRC_PER(h)          (0xFU & (h))    /* Channel src rq associated with periph handshaking ifc h */
-#define        ATC_DST_PER(h)          ((0xFU & (h)) <<  4)    /* Channel dst rq associated with periph handshaking ifc h */
-#define        ATC_SRC_REP             (0x1 <<  8)     /* Source Replay Mod */
-#define        ATC_SRC_H2SEL           (0x1 <<  9)     /* Source Handshaking Mod */
-#define                ATC_SRC_H2SEL_SW        (0x0 <<  9)
-#define                ATC_SRC_H2SEL_HW        (0x1 <<  9)
-#define        ATC_SRC_PER_MSB(h)      (ATC_PER_MSB(h) << 10)  /* Channel src rq (most significant bits) */
-#define        ATC_DST_REP             (0x1 << 12)     /* Destination Replay Mod */
-#define        ATC_DST_H2SEL           (0x1 << 13)     /* Destination Handshaking Mod */
-#define                ATC_DST_H2SEL_SW        (0x0 << 13)
-#define                ATC_DST_H2SEL_HW        (0x1 << 13)
-#define        ATC_DST_PER_MSB(h)      (ATC_PER_MSB(h) << 14)  /* Channel dst rq (most significant bits) */
-#define        ATC_SOD                 (0x1 << 16)     /* Stop On Done */
-#define        ATC_LOCK_IF             (0x1 << 20)     /* Interface Lock */
-#define        ATC_LOCK_B              (0x1 << 21)     /* AHB Bus Lock */
-#define        ATC_LOCK_IF_L           (0x1 << 22)     /* Master Interface Arbiter Lock */
-#define                ATC_LOCK_IF_L_CHUNK     (0x0 << 22)
-#define                ATC_LOCK_IF_L_BUFFER    (0x1 << 22)
-#define        ATC_AHB_PROT_MASK       (0x7 << 24)     /* AHB Protection */
-#define        ATC_FIFOCFG_MASK        (0x3 << 28)     /* FIFO Request Configuration */
-#define                ATC_FIFOCFG_LARGESTBURST        (0x0 << 28)
-#define                ATC_FIFOCFG_HALFFIFO            (0x1 << 28)
-#define                ATC_FIFOCFG_ENOUGHSPACE         (0x2 << 28)
-
-
-#endif /* AT_HDMAC_H */
diff --git a/include/linux/platform_data/dma-coh901318.h b/include/linux/platform_data/dma-coh901318.h
deleted file mode 100644 (file)
index 4cca529..0000000
+++ /dev/null
@@ -1,72 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * Platform data for the COH901318 DMA controller
- * Copyright (C) 2007-2013 ST-Ericsson
- */
-
-#ifndef PLAT_COH901318_H
-#define PLAT_COH901318_H
-
-#ifdef CONFIG_COH901318
-
-/* We only support the U300 DMA channels */
-#define U300_DMA_MSL_TX_0              0
-#define U300_DMA_MSL_TX_1              1
-#define U300_DMA_MSL_TX_2              2
-#define U300_DMA_MSL_TX_3              3
-#define U300_DMA_MSL_TX_4              4
-#define U300_DMA_MSL_TX_5              5
-#define U300_DMA_MSL_TX_6              6
-#define U300_DMA_MSL_RX_0              7
-#define U300_DMA_MSL_RX_1              8
-#define U300_DMA_MSL_RX_2              9
-#define U300_DMA_MSL_RX_3              10
-#define U300_DMA_MSL_RX_4              11
-#define U300_DMA_MSL_RX_5              12
-#define U300_DMA_MSL_RX_6              13
-#define U300_DMA_MMCSD_RX_TX           14
-#define U300_DMA_MSPRO_TX              15
-#define U300_DMA_MSPRO_RX              16
-#define U300_DMA_UART0_TX              17
-#define U300_DMA_UART0_RX              18
-#define U300_DMA_APEX_TX               19
-#define U300_DMA_APEX_RX               20
-#define U300_DMA_PCM_I2S0_TX           21
-#define U300_DMA_PCM_I2S0_RX           22
-#define U300_DMA_PCM_I2S1_TX           23
-#define U300_DMA_PCM_I2S1_RX           24
-#define U300_DMA_XGAM_CDI              25
-#define U300_DMA_XGAM_PDI              26
-#define U300_DMA_SPI_TX                        27
-#define U300_DMA_SPI_RX                        28
-#define U300_DMA_GENERAL_PURPOSE_0     29
-#define U300_DMA_GENERAL_PURPOSE_1     30
-#define U300_DMA_GENERAL_PURPOSE_2     31
-#define U300_DMA_GENERAL_PURPOSE_3     32
-#define U300_DMA_GENERAL_PURPOSE_4     33
-#define U300_DMA_GENERAL_PURPOSE_5     34
-#define U300_DMA_GENERAL_PURPOSE_6     35
-#define U300_DMA_GENERAL_PURPOSE_7     36
-#define U300_DMA_GENERAL_PURPOSE_8     37
-#define U300_DMA_UART1_TX              38
-#define U300_DMA_UART1_RX              39
-
-#define U300_DMA_DEVICE_CHANNELS       32
-#define U300_DMA_CHANNELS              40
-
-/**
- * coh901318_filter_id() - DMA channel filter function
- * @chan: dma channel handle
- * @chan_id: id of dma channel to be filter out
- *
- * In dma_request_channel() it specifies what channel id to be requested
- */
-bool coh901318_filter_id(struct dma_chan *chan, void *chan_id);
-#else
-static inline bool coh901318_filter_id(struct dma_chan *chan, void *chan_id)
-{
-       return false;
-}
-#endif
-
-#endif /* PLAT_COH901318_H */
index 30e676b..725602d 100644 (file)
@@ -57,15 +57,4 @@ struct sdma_script_start_addrs {
        /* End of v4 array */
 };
 
-/**
- * struct sdma_platform_data - platform specific data for SDMA engine
- *
- * @fw_name            The firmware name
- * @script_addrs       SDMA scripts addresses in SDMA ROM
- */
-struct sdma_platform_data {
-       char *fw_name;
-       struct sdma_script_start_addrs *script_addrs;
-};
-
 #endif /* __MACH_MXC_SDMA_H__ */
diff --git a/include/linux/platform_data/i2c-hid.h b/include/linux/platform_data/i2c-hid.h
deleted file mode 100644 (file)
index c628bb5..0000000
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * HID over I2C protocol implementation
- *
- * Copyright (c) 2012 Benjamin Tissoires <benjamin.tissoires@gmail.com>
- * Copyright (c) 2012 Ecole Nationale de l'Aviation Civile, France
- *
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file COPYING in the main directory of this archive for
- * more details.
- */
-
-#ifndef __LINUX_I2C_HID_H
-#define __LINUX_I2C_HID_H
-
-#include <linux/regulator/consumer.h>
-#include <linux/types.h>
-
-/**
- * struct i2chid_platform_data - used by hid over i2c implementation.
- * @hid_descriptor_address: i2c register where the HID descriptor is stored.
- * @supplies: regulators for powering on the device.
- * @post_power_delay_ms: delay after powering on before device is usable.
- *
- * Note that it is the responsibility of the platform driver (or the acpi 5.0
- * driver, or the flattened device tree) to setup the irq related to the gpio in
- * the struct i2c_board_info.
- * The platform driver should also setup the gpio according to the device:
- *
- * A typical example is the following:
- *     irq = gpio_to_irq(intr_gpio);
- *     hkdk4412_i2c_devs5[0].irq = irq; // store the irq in i2c_board_info
- *     gpio_request(intr_gpio, "elan-irq");
- *     s3c_gpio_setpull(intr_gpio, S3C_GPIO_PULL_UP);
- */
-struct i2c_hid_platform_data {
-       u16 hid_descriptor_address;
-       struct regulator_bulk_data supplies[2];
-       int post_power_delay_ms;
-};
-
-#endif /* __LINUX_I2C_HID_H */
index a26542d..a632900 100644 (file)
@@ -12,9 +12,8 @@
 #include <linux/bitops.h>
 
 /*
- * If more options are added please update profile_names
- * array in platform-profile.c and sysfs-platform-profile.rst
- * documentation.
+ * If more options are added please update profile_names array in
+ * platform_profile.c and sysfs-platform_profile documentation.
  */
 
 enum platform_profile_option {
@@ -22,6 +21,7 @@ enum platform_profile_option {
        PLATFORM_PROFILE_COOL,
        PLATFORM_PROFILE_QUIET,
        PLATFORM_PROFILE_BALANCED,
+       PLATFORM_PROFILE_BALANCED_PERFORMANCE,
        PLATFORM_PROFILE_PERFORMANCE,
        PLATFORM_PROFILE_LAST, /*must always be last */
 };
index 90797f1..307094e 100644 (file)
@@ -15,6 +15,8 @@
 #include <linux/refcount.h>
 #include <uapi/linux/posix_acl.h>
 
+struct user_namespace;
+
 struct posix_acl_entry {
        short                   e_tag;
        unsigned short          e_perm;
@@ -61,23 +63,24 @@ posix_acl_release(struct posix_acl *acl)
 
 extern void posix_acl_init(struct posix_acl *, int);
 extern struct posix_acl *posix_acl_alloc(int, gfp_t);
-extern int posix_acl_valid(struct user_namespace *, const struct posix_acl *);
-extern int posix_acl_permission(struct inode *, const struct posix_acl *, int);
 extern struct posix_acl *posix_acl_from_mode(umode_t, gfp_t);
 extern int posix_acl_equiv_mode(const struct posix_acl *, umode_t *);
 extern int __posix_acl_create(struct posix_acl **, gfp_t, umode_t *);
 extern int __posix_acl_chmod(struct posix_acl **, gfp_t, umode_t);
 
 extern struct posix_acl *get_posix_acl(struct inode *, int);
-extern int set_posix_acl(struct inode *, int, struct posix_acl *);
+extern int set_posix_acl(struct user_namespace *, struct inode *, int,
+                        struct posix_acl *);
 
 #ifdef CONFIG_FS_POSIX_ACL
-extern int posix_acl_chmod(struct inode *, umode_t);
+int posix_acl_chmod(struct user_namespace *, struct inode *, umode_t);
 extern int posix_acl_create(struct inode *, umode_t *, struct posix_acl **,
                struct posix_acl **);
-extern int posix_acl_update_mode(struct inode *, umode_t *, struct posix_acl **);
+int posix_acl_update_mode(struct user_namespace *, struct inode *, umode_t *,
+                         struct posix_acl **);
 
-extern int simple_set_acl(struct inode *, struct posix_acl *, int);
+extern int simple_set_acl(struct user_namespace *, struct inode *,
+                         struct posix_acl *, int);
 extern int simple_acl_create(struct inode *, struct inode *);
 
 struct posix_acl *get_cached_acl(struct inode *inode, int type);
@@ -85,6 +88,9 @@ struct posix_acl *get_cached_acl_rcu(struct inode *inode, int type);
 void set_cached_acl(struct inode *inode, int type, struct posix_acl *acl);
 void forget_cached_acl(struct inode *inode, int type);
 void forget_all_cached_acls(struct inode *inode);
+int posix_acl_valid(struct user_namespace *, const struct posix_acl *);
+int posix_acl_permission(struct user_namespace *, struct inode *,
+                        const struct posix_acl *, int);
 
 static inline void cache_no_acl(struct inode *inode)
 {
@@ -92,7 +98,8 @@ static inline void cache_no_acl(struct inode *inode)
        inode->i_default_acl = NULL;
 }
 #else
-static inline int posix_acl_chmod(struct inode *inode, umode_t mode)
+static inline int posix_acl_chmod(struct user_namespace *mnt_userns,
+                                 struct inode *inode, umode_t mode)
 {
        return 0;
 }
index 2387709..060e8d2 100644 (file)
@@ -33,13 +33,17 @@ posix_acl_xattr_count(size_t size)
 }
 
 #ifdef CONFIG_FS_POSIX_ACL
-void posix_acl_fix_xattr_from_user(void *value, size_t size);
-void posix_acl_fix_xattr_to_user(void *value, size_t size);
+void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+                                  void *value, size_t size);
+void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+                                void *value, size_t size);
 #else
-static inline void posix_acl_fix_xattr_from_user(void *value, size_t size)
+static inline void posix_acl_fix_xattr_from_user(struct user_namespace *mnt_userns,
+                                                void *value, size_t size)
 {
 }
-static inline void posix_acl_fix_xattr_to_user(void *value, size_t size)
+static inline void posix_acl_fix_xattr_to_user(struct user_namespace *mnt_userns,
+                                              void *value, size_t size)
 {
 }
 #endif
index dafccfc..dd4687b 100644 (file)
@@ -488,7 +488,7 @@ fwnode_create_software_node(const struct property_entry *properties,
                            const struct fwnode_handle *parent);
 void fwnode_remove_software_node(struct fwnode_handle *fwnode);
 
-int device_add_software_node(struct device *dev, const struct software_node *swnode);
+int device_add_software_node(struct device *dev, const struct software_node *node);
 void device_remove_software_node(struct device *dev);
 
 int device_create_managed_software_node(struct device *dev,
index 2a9df80..b5ebf6c 100644 (file)
@@ -171,7 +171,7 @@ static inline void ptrace_event(int event, unsigned long message)
  *
  * Check whether @event is enabled and, if so, report @event and @pid
  * to the ptrace parent.  @pid is reported as the pid_t seen from the
- * the ptrace parent's pid namespace.
+ * ptrace parent's pid namespace.
  *
  * Called without locks.
  */
index ec2ad4b..c4fdb44 100644 (file)
@@ -460,7 +460,5 @@ void geni_icc_set_tag(struct geni_se *se, u32 tag);
 int geni_icc_enable(struct geni_se *se);
 
 int geni_icc_disable(struct geni_se *se);
-
-void geni_remove_earlycon_icc_vote(void);
 #endif
 #endif
index ccdb532..71902f4 100644 (file)
@@ -147,6 +147,9 @@ enum {
 #define BUCK6_FPWM                     0x04
 #define BUCK6_ENMODE_MASK              0x03
 
+/* PCA9450_REG_BUCK123_PRESET_EN bit */
+#define BUCK123_PRESET_EN              0x80
+
 /* PCA9450_BUCK1OUT_DVS0 bits */
 #define BUCK1OUT_DVS0_MASK             0x7F
 #define BUCK1OUT_DVS0_DEFAULT          0x14
index bba2920..980a655 100644 (file)
@@ -23,6 +23,7 @@ enum timespec_type {
  * System call restart block.
  */
 struct restart_block {
+       unsigned long arch_data;
        long (*fn)(struct restart_block *);
        union {
                /* For futex_wait and futex_wait_requeue_pi */
index 70085ca..def5c62 100644 (file)
@@ -213,7 +213,8 @@ struct page_vma_mapped_walk {
 
 static inline void page_vma_mapped_walk_done(struct page_vma_mapped_walk *pvmw)
 {
-       if (pvmw->pte)
+       /* HugeTLB pte is set to the relevant page table entry without pte_mapped. */
+       if (pvmw->pte && !PageHuge(pvmw->page))
                pte_unmap(pvmw->pte);
        if (pvmw->ptl)
                spin_unlock(pvmw->ptl);
index daded9f..22fc3a6 100644 (file)
@@ -7,12 +7,17 @@
 
 struct qcom_glink;
 
+#if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK)
+void qcom_glink_ssr_notify(const char *ssr_name);
+#else
+static inline void qcom_glink_ssr_notify(const char *ssr_name) {}
+#endif
+
 #if IS_ENABLED(CONFIG_RPMSG_QCOM_GLINK_SMEM)
 
 struct qcom_glink *qcom_glink_smem_register(struct device *parent,
                                            struct device_node *node);
 void qcom_glink_smem_unregister(struct qcom_glink *glink);
-void qcom_glink_ssr_notify(const char *ssr_name);
 
 #else
 
@@ -24,7 +29,6 @@ qcom_glink_smem_register(struct device *parent,
 }
 
 static inline void qcom_glink_smem_unregister(struct qcom_glink *glink) {}
-static inline void qcom_glink_ssr_notify(const char *ssr_name) {}
 #endif
 
 #endif
index 26f4998..ef00bb2 100644 (file)
@@ -895,6 +895,9 @@ struct task_struct {
        /* CLONE_CHILD_CLEARTID: */
        int __user                      *clear_child_tid;
 
+       /* PF_IO_WORKER */
+       void                            *pf_io_worker;
+
        u64                             utime;
        u64                             stime;
 #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME
index 1ae08b8..90b2a0b 100644 (file)
@@ -140,7 +140,8 @@ static inline bool in_vfork(struct task_struct *tsk)
         * another oom-unkillable task does this it should blame itself.
         */
        rcu_read_lock();
-       ret = tsk->vfork_done && tsk->real_parent->mm == tsk->mm;
+       ret = tsk->vfork_done &&
+                       rcu_dereference(tsk->real_parent)->mm == tsk->mm;
        rcu_read_unlock();
 
        return ret;
index c0f71f2..ef02be8 100644 (file)
@@ -31,6 +31,7 @@ struct kernel_clone_args {
        /* Number of elements in *set_tid */
        size_t set_tid_size;
        int cgroup;
+       int io_thread;
        struct cgroup *cgrp;
        struct css_set *cset;
 };
@@ -82,6 +83,7 @@ extern void exit_files(struct task_struct *);
 extern void exit_itimers(struct signal_struct *);
 
 extern pid_t kernel_clone(struct kernel_clone_args *kargs);
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node);
 struct task_struct *fork_idle(int);
 struct mm_struct *copy_init_mm(void);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
index b0d14f0..8aeebd6 100644 (file)
@@ -145,13 +145,16 @@ extern int cap_capset(struct cred *new, const struct cred *old,
                      const kernel_cap_t *inheritable,
                      const kernel_cap_t *permitted);
 extern int cap_bprm_creds_from_file(struct linux_binprm *bprm, struct file *file);
-extern int cap_inode_setxattr(struct dentry *dentry, const char *name,
-                             const void *value, size_t size, int flags);
-extern int cap_inode_removexattr(struct dentry *dentry, const char *name);
-extern int cap_inode_need_killpriv(struct dentry *dentry);
-extern int cap_inode_killpriv(struct dentry *dentry);
-extern int cap_inode_getsecurity(struct inode *inode, const char *name,
-                                void **buffer, bool alloc);
+int cap_inode_setxattr(struct dentry *dentry, const char *name,
+                      const void *value, size_t size, int flags);
+int cap_inode_removexattr(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, const char *name);
+int cap_inode_need_killpriv(struct dentry *dentry);
+int cap_inode_killpriv(struct user_namespace *mnt_userns,
+                      struct dentry *dentry);
+int cap_inode_getsecurity(struct user_namespace *mnt_userns,
+                         struct inode *inode, const char *name, void **buffer,
+                         bool alloc);
 extern int cap_mmap_addr(unsigned long addr);
 extern int cap_mmap_file(struct file *file, unsigned long reqprot,
                         unsigned long prot, unsigned long flags);
@@ -348,16 +351,21 @@ int security_inode_follow_link(struct dentry *dentry, struct inode *inode,
 int security_inode_permission(struct inode *inode, int mask);
 int security_inode_setattr(struct dentry *dentry, struct iattr *attr);
 int security_inode_getattr(const struct path *path);
-int security_inode_setxattr(struct dentry *dentry, const char *name,
+int security_inode_setxattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry, const char *name,
                            const void *value, size_t size, int flags);
 void security_inode_post_setxattr(struct dentry *dentry, const char *name,
                                  const void *value, size_t size, int flags);
 int security_inode_getxattr(struct dentry *dentry, const char *name);
 int security_inode_listxattr(struct dentry *dentry);
-int security_inode_removexattr(struct dentry *dentry, const char *name);
+int security_inode_removexattr(struct user_namespace *mnt_userns,
+                              struct dentry *dentry, const char *name);
 int security_inode_need_killpriv(struct dentry *dentry);
-int security_inode_killpriv(struct dentry *dentry);
-int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc);
+int security_inode_killpriv(struct user_namespace *mnt_userns,
+                           struct dentry *dentry);
+int security_inode_getsecurity(struct user_namespace *mnt_userns,
+                              struct inode *inode, const char *name,
+                              void **buffer, bool alloc);
 int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags);
 int security_inode_listsecurity(struct inode *inode, char *buffer, size_t buffer_size);
 void security_inode_getsecid(struct inode *inode, u32 *secid);
@@ -841,8 +849,9 @@ static inline int security_inode_getattr(const struct path *path)
        return 0;
 }
 
-static inline int security_inode_setxattr(struct dentry *dentry,
-               const char *name, const void *value, size_t size, int flags)
+static inline int security_inode_setxattr(struct user_namespace *mnt_userns,
+               struct dentry *dentry, const char *name, const void *value,
+               size_t size, int flags)
 {
        return cap_inode_setxattr(dentry, name, value, size, flags);
 }
@@ -862,10 +871,11 @@ static inline int security_inode_listxattr(struct dentry *dentry)
        return 0;
 }
 
-static inline int security_inode_removexattr(struct dentry *dentry,
-                       const char *name)
+static inline int security_inode_removexattr(struct user_namespace *mnt_userns,
+                                            struct dentry *dentry,
+                                            const char *name)
 {
-       return cap_inode_removexattr(dentry, name);
+       return cap_inode_removexattr(mnt_userns, dentry, name);
 }
 
 static inline int security_inode_need_killpriv(struct dentry *dentry)
@@ -873,14 +883,18 @@ static inline int security_inode_need_killpriv(struct dentry *dentry)
        return cap_inode_need_killpriv(dentry);
 }
 
-static inline int security_inode_killpriv(struct dentry *dentry)
+static inline int security_inode_killpriv(struct user_namespace *mnt_userns,
+                                         struct dentry *dentry)
 {
-       return cap_inode_killpriv(dentry);
+       return cap_inode_killpriv(mnt_userns, dentry);
 }
 
-static inline int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc)
+static inline int security_inode_getsecurity(struct user_namespace *mnt_userns,
+                                            struct inode *inode,
+                                            const char *name, void **buffer,
+                                            bool alloc)
 {
-       return cap_inode_getsecurity(inode, name, buffer, alloc);
+       return cap_inode_getsecurity(mnt_userns, inode, name, buffer, alloc);
 }
 
 static inline int security_inode_setsecurity(struct inode *inode, const char *name, const void *value, size_t size, int flags)
index 2f7bb92..f61e34f 100644 (file)
@@ -664,10 +664,7 @@ typedef struct {
  * seqcount_latch_init() - runtime initializer for seqcount_latch_t
  * @s: Pointer to the seqcount_latch_t instance
  */
-static inline void seqcount_latch_init(seqcount_latch_t *s)
-{
-       seqcount_init(&s->seqcount);
-}
+#define seqcount_latch_init(s) seqcount_init(&(s)->seqcount)
 
 /**
  * raw_read_seqcount_latch() - pick even/odd latch data copy
diff --git a/include/linux/sfi.h b/include/linux/sfi.h
deleted file mode 100644 (file)
index e0e1597..0000000
+++ /dev/null
@@ -1,210 +0,0 @@
-/* sfi.h Simple Firmware Interface */
-
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  BSD LICENSE
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#ifndef _LINUX_SFI_H
-#define _LINUX_SFI_H
-
-#include <linux/init.h>
-#include <linux/types.h>
-
-/* Table signatures reserved by the SFI specification */
-#define SFI_SIG_SYST           "SYST"
-#define SFI_SIG_FREQ           "FREQ"
-#define SFI_SIG_IDLE           "IDLE"
-#define SFI_SIG_CPUS           "CPUS"
-#define SFI_SIG_MTMR           "MTMR"
-#define SFI_SIG_MRTC           "MRTC"
-#define SFI_SIG_MMAP           "MMAP"
-#define SFI_SIG_APIC           "APIC"
-#define SFI_SIG_XSDT           "XSDT"
-#define SFI_SIG_WAKE           "WAKE"
-#define SFI_SIG_DEVS           "DEVS"
-#define SFI_SIG_GPIO           "GPIO"
-
-#define SFI_SIGNATURE_SIZE     4
-#define SFI_OEM_ID_SIZE                6
-#define SFI_OEM_TABLE_ID_SIZE  8
-
-#define SFI_NAME_LEN           16
-
-#define SFI_SYST_SEARCH_BEGIN          0x000E0000
-#define SFI_SYST_SEARCH_END            0x000FFFFF
-
-#define SFI_GET_NUM_ENTRIES(ptable, entry_type) \
-       ((ptable->header.len - sizeof(struct sfi_table_header)) / \
-       (sizeof(entry_type)))
-/*
- * Table structures must be byte-packed to match the SFI specification,
- * as they are provided by the BIOS.
- */
-struct sfi_table_header {
-       char    sig[SFI_SIGNATURE_SIZE];
-       u32     len;
-       u8      rev;
-       u8      csum;
-       char    oem_id[SFI_OEM_ID_SIZE];
-       char    oem_table_id[SFI_OEM_TABLE_ID_SIZE];
-} __packed;
-
-struct sfi_table_simple {
-       struct sfi_table_header         header;
-       u64                             pentry[1];
-} __packed;
-
-/* Comply with UEFI spec 2.1 */
-struct sfi_mem_entry {
-       u32     type;
-       u64     phys_start;
-       u64     virt_start;
-       u64     pages;
-       u64     attrib;
-} __packed;
-
-struct sfi_cpu_table_entry {
-       u32     apic_id;
-} __packed;
-
-struct sfi_cstate_table_entry {
-       u32     hint;           /* MWAIT hint */
-       u32     latency;        /* latency in ms */
-} __packed;
-
-struct sfi_apic_table_entry {
-       u64     phys_addr;      /* phy base addr for APIC reg */
-} __packed;
-
-struct sfi_freq_table_entry {
-       u32     freq_mhz;       /* in MHZ */
-       u32     latency;        /* transition latency in ms */
-       u32     ctrl_val;       /* value to write to PERF_CTL */
-} __packed;
-
-struct sfi_wake_table_entry {
-       u64     phys_addr;      /* pointer to where the wake vector locates */
-} __packed;
-
-struct sfi_timer_table_entry {
-       u64     phys_addr;      /* phy base addr for the timer */
-       u32     freq_hz;        /* in HZ */
-       u32     irq;
-} __packed;
-
-struct sfi_rtc_table_entry {
-       u64     phys_addr;      /* phy base addr for the RTC */
-       u32     irq;
-} __packed;
-
-struct sfi_device_table_entry {
-       u8      type;           /* bus type, I2C, SPI or ...*/
-#define SFI_DEV_TYPE_SPI       0
-#define SFI_DEV_TYPE_I2C       1
-#define SFI_DEV_TYPE_UART      2
-#define SFI_DEV_TYPE_HSI       3
-#define SFI_DEV_TYPE_IPC       4
-#define SFI_DEV_TYPE_SD                5
-
-       u8      host_num;       /* attached to host 0, 1...*/
-       u16     addr;
-       u8      irq;
-       u32     max_freq;
-       char    name[SFI_NAME_LEN];
-} __packed;
-
-struct sfi_gpio_table_entry {
-       char    controller_name[SFI_NAME_LEN];
-       u16     pin_no;
-       char    pin_name[SFI_NAME_LEN];
-} __packed;
-
-typedef int (*sfi_table_handler) (struct sfi_table_header *table);
-
-#ifdef CONFIG_SFI
-extern void __init sfi_init(void);
-extern int __init sfi_platform_init(void);
-extern void __init sfi_init_late(void);
-extern int sfi_table_parse(char *signature, char *oem_id, char *oem_table_id,
-                               sfi_table_handler handler);
-
-extern int sfi_disabled;
-static inline void disable_sfi(void)
-{
-       sfi_disabled = 1;
-}
-
-#else /* !CONFIG_SFI */
-
-static inline void sfi_init(void)
-{
-}
-
-static inline void sfi_init_late(void)
-{
-}
-
-#define sfi_disabled   0
-
-static inline int sfi_table_parse(char *signature, char *oem_id,
-                                       char *oem_table_id,
-                                       sfi_table_handler handler)
-{
-       return -1;
-}
-
-#endif /* !CONFIG_SFI */
-
-#endif /*_LINUX_SFI_H*/
diff --git a/include/linux/sfi_acpi.h b/include/linux/sfi_acpi.h
deleted file mode 100644 (file)
index a6e555c..0000000
+++ /dev/null
@@ -1,93 +0,0 @@
-/* sfi.h Simple Firmware Interface */
-
-/*
-
-  This file is provided under a dual BSD/GPLv2 license.  When using or
-  redistributing this file, you may do so under either license.
-
-  GPL LICENSE SUMMARY
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify
-  it under the terms of version 2 of the GNU General Public License as
-  published by the Free Software Foundation.
-
-  This program is distributed in the hope that it will be useful, but
-  WITHOUT ANY WARRANTY; without even the implied warranty of
-  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-  General Public License for more details.
-
-  You should have received a copy of the GNU General Public License
-  along with this program; if not, write to the Free Software
-  Foundation, Inc., 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
-  The full GNU General Public License is included in this distribution
-  in the file called LICENSE.GPL.
-
-  BSD LICENSE
-
-  Copyright(c) 2009 Intel Corporation. All rights reserved.
-
-  Redistribution and use in source and binary forms, with or without
-  modification, are permitted provided that the following conditions
-  are met:
-
-    * Redistributions of source code must retain the above copyright
-      notice, this list of conditions and the following disclaimer.
-    * Redistributions in binary form must reproduce the above copyright
-      notice, this list of conditions and the following disclaimer in
-      the documentation and/or other materials provided with the
-      distribution.
-    * Neither the name of Intel Corporation nor the names of its
-      contributors may be used to endorse or promote products derived
-      from this software without specific prior written permission.
-
-  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
-  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
-  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
-  A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
-  OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-  SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
-  LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
-  DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
-  THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
-  (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
-  OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-*/
-
-#ifndef _LINUX_SFI_ACPI_H
-#define _LINUX_SFI_ACPI_H
-
-#include <linux/acpi.h>
-#include <linux/sfi.h>
-
-#ifdef CONFIG_SFI
-extern int sfi_acpi_table_parse(char *signature, char *oem_id,
-                               char *oem_table_id,
-                               int (*handler)(struct acpi_table_header *));
-
-static inline int __init acpi_sfi_table_parse(char *signature,
-                               int (*handler)(struct acpi_table_header *))
-{
-       if (!acpi_table_parse(signature, handler))
-               return 0;
-
-       return sfi_acpi_table_parse(signature, NULL, NULL, handler);
-}
-#else /* !CONFIG_SFI */
-static inline int sfi_acpi_table_parse(char *signature, char *oem_id,
-                               char *oem_table_id,
-                               int (*handler)(struct acpi_table_header *))
-{
-       return -1;
-}
-
-static inline int __init acpi_sfi_table_parse(char *signature,
-                               int (*handler)(struct acpi_table_header *))
-{
-       return acpi_table_parse(signature, handler);
-}
-#endif /* !CONFIG_SFI */
-
-#endif /*_LINUX_SFI_ACPI_H*/
diff --git a/include/linux/sirfsoc_dma.h b/include/linux/sirfsoc_dma.h
deleted file mode 100644 (file)
index 50161b6..0000000
+++ /dev/null
@@ -1,7 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _SIRFSOC_DMA_H_
-#define _SIRFSOC_DMA_H_
-
-bool sirfsoc_dma_filter_id(struct dma_chan *chan, void *chan_id);
-
-#endif
index 6d0a33d..f2c9ee7 100644 (file)
@@ -285,6 +285,7 @@ struct nf_bridge_info {
 struct tc_skb_ext {
        __u32 chain;
        __u16 mru;
+       bool post_ct;
 };
 #endif
 
index 9eb430c..3aa5e1e 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _LINUX_SLAB_DEF_H
 #define        _LINUX_SLAB_DEF_H
 
+#include <linux/kfence.h>
 #include <linux/reciprocal_div.h>
 
 /*
@@ -114,6 +115,8 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
 static inline int objs_per_slab_page(const struct kmem_cache *cache,
                                     const struct page *page)
 {
+       if (is_kfence_address(page_address(page)))
+               return 1;
        return cache->num;
 }
 
index 1be0ed5..dcde82a 100644 (file)
@@ -7,6 +7,7 @@
  *
  * (C) 2007 SGI, Christoph Lameter
  */
+#include <linux/kfence.h>
 #include <linux/kobject.h>
 #include <linux/reciprocal_div.h>
 
@@ -185,6 +186,8 @@ static inline unsigned int __obj_to_index(const struct kmem_cache *cache,
 static inline unsigned int obj_to_index(const struct kmem_cache *cache,
                                        const struct page *page, void *obj)
 {
+       if (is_kfence_address(obj))
+               return 0;
        return __obj_to_index(cache, page_address(page), obj);
 }
 
index f0b01b7..d08039d 100644 (file)
@@ -1005,6 +1005,8 @@ int sdw_bus_exit_clk_stop(struct sdw_bus *bus);
 
 int sdw_read(struct sdw_slave *slave, u32 addr);
 int sdw_write(struct sdw_slave *slave, u32 addr, u8 value);
+int sdw_write_no_pm(struct sdw_slave *slave, u32 addr, u8 value);
+int sdw_read_no_pm(struct sdw_slave *slave, u32 addr);
 int sdw_nread(struct sdw_slave *slave, u32 addr, size_t count, u8 *val);
 int sdw_nwrite(struct sdw_slave *slave, u32 addr, size_t count, u8 *val);
 
index 120ffdd..3a5446a 100644 (file)
@@ -187,4 +187,6 @@ void sdw_intel_enable_irq(void __iomem *mmio_base, bool enable);
 
 irqreturn_t sdw_intel_thread(int irq, void *dev_id);
 
+#define SDW_INTEL_QUIRK_MASK_BUS_DISABLE      BIT(1)
+
 #endif
index 24d49c7..6bb4bc1 100644 (file)
@@ -21,4 +21,13 @@ unsigned int stack_depot_fetch(depot_stack_handle_t handle,
 
 unsigned int filter_irq_stacks(unsigned long *entries, unsigned int nr_entries);
 
+#ifdef CONFIG_STACKDEPOT
+int stack_depot_init(void);
+#else
+static inline int stack_depot_init(void)
+{
+       return 0;
+}
+#endif /* CONFIG_STACKDEPOT */
+
 #endif
index 30577c3..46fb3eb 100644 (file)
@@ -128,7 +128,7 @@ int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
                                   const struct cpumask *cpus);
 #else  /* CONFIG_SMP || CONFIG_HOTPLUG_CPU */
 
-static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
+static __always_inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
                                          const struct cpumask *cpus)
 {
        unsigned long flags;
@@ -139,14 +139,15 @@ static inline int stop_machine_cpuslocked(cpu_stop_fn_t fn, void *data,
        return ret;
 }
 
-static inline int stop_machine(cpu_stop_fn_t fn, void *data,
-                              const struct cpumask *cpus)
+static __always_inline int
+stop_machine(cpu_stop_fn_t fn, void *data, const struct cpumask *cpus)
 {
        return stop_machine_cpuslocked(fn, data, cpus);
 }
 
-static inline int stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
-                                                const struct cpumask *cpus)
+static __always_inline int
+stop_machine_from_inactive_cpu(cpu_stop_fn_t fn, void *data,
+                              const struct cpumask *cpus)
 {
        return stop_machine(fn, data, cpus);
 }
index 4fcfb56..9521d8c 100644 (file)
@@ -266,287 +266,7 @@ void __read_overflow3(void) __compiletime_error("detected read beyond size of ob
 void __write_overflow(void) __compiletime_error("detected write beyond size of object passed as 1st parameter");
 
 #if !defined(__NO_FORTIFY) && defined(__OPTIMIZE__) && defined(CONFIG_FORTIFY_SOURCE)
-
-#if defined(CONFIG_KASAN_GENERIC) || defined(CONFIG_KASAN_SW_TAGS)
-extern void *__underlying_memchr(const void *p, int c, __kernel_size_t size) __RENAME(memchr);
-extern int __underlying_memcmp(const void *p, const void *q, __kernel_size_t size) __RENAME(memcmp);
-extern void *__underlying_memcpy(void *p, const void *q, __kernel_size_t size) __RENAME(memcpy);
-extern void *__underlying_memmove(void *p, const void *q, __kernel_size_t size) __RENAME(memmove);
-extern void *__underlying_memset(void *p, int c, __kernel_size_t size) __RENAME(memset);
-extern char *__underlying_strcat(char *p, const char *q) __RENAME(strcat);
-extern char *__underlying_strcpy(char *p, const char *q) __RENAME(strcpy);
-extern __kernel_size_t __underlying_strlen(const char *p) __RENAME(strlen);
-extern char *__underlying_strncat(char *p, const char *q, __kernel_size_t count) __RENAME(strncat);
-extern char *__underlying_strncpy(char *p, const char *q, __kernel_size_t size) __RENAME(strncpy);
-#else
-#define __underlying_memchr    __builtin_memchr
-#define __underlying_memcmp    __builtin_memcmp
-#define __underlying_memcpy    __builtin_memcpy
-#define __underlying_memmove   __builtin_memmove
-#define __underlying_memset    __builtin_memset
-#define __underlying_strcat    __builtin_strcat
-#define __underlying_strcpy    __builtin_strcpy
-#define __underlying_strlen    __builtin_strlen
-#define __underlying_strncat   __builtin_strncat
-#define __underlying_strncpy   __builtin_strncpy
-#endif
-
-__FORTIFY_INLINE char *strncpy(char *p, const char *q, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 1);
-       if (__builtin_constant_p(size) && p_size < size)
-               __write_overflow();
-       if (p_size < size)
-               fortify_panic(__func__);
-       return __underlying_strncpy(p, q, size);
-}
-
-__FORTIFY_INLINE char *strcat(char *p, const char *q)
-{
-       size_t p_size = __builtin_object_size(p, 1);
-       if (p_size == (size_t)-1)
-               return __underlying_strcat(p, q);
-       if (strlcat(p, q, p_size) >= p_size)
-               fortify_panic(__func__);
-       return p;
-}
-
-__FORTIFY_INLINE __kernel_size_t strlen(const char *p)
-{
-       __kernel_size_t ret;
-       size_t p_size = __builtin_object_size(p, 1);
-
-       /* Work around gcc excess stack consumption issue */
-       if (p_size == (size_t)-1 ||
-           (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0'))
-               return __underlying_strlen(p);
-       ret = strnlen(p, p_size);
-       if (p_size <= ret)
-               fortify_panic(__func__);
-       return ret;
-}
-
-extern __kernel_size_t __real_strnlen(const char *, __kernel_size_t) __RENAME(strnlen);
-__FORTIFY_INLINE __kernel_size_t strnlen(const char *p, __kernel_size_t maxlen)
-{
-       size_t p_size = __builtin_object_size(p, 1);
-       __kernel_size_t ret = __real_strnlen(p, maxlen < p_size ? maxlen : p_size);
-       if (p_size <= ret && maxlen != ret)
-               fortify_panic(__func__);
-       return ret;
-}
-
-/* defined after fortified strlen to reuse it */
-extern size_t __real_strlcpy(char *, const char *, size_t) __RENAME(strlcpy);
-__FORTIFY_INLINE size_t strlcpy(char *p, const char *q, size_t size)
-{
-       size_t ret;
-       size_t p_size = __builtin_object_size(p, 1);
-       size_t q_size = __builtin_object_size(q, 1);
-       if (p_size == (size_t)-1 && q_size == (size_t)-1)
-               return __real_strlcpy(p, q, size);
-       ret = strlen(q);
-       if (size) {
-               size_t len = (ret >= size) ? size - 1 : ret;
-               if (__builtin_constant_p(len) && len >= p_size)
-                       __write_overflow();
-               if (len >= p_size)
-                       fortify_panic(__func__);
-               __underlying_memcpy(p, q, len);
-               p[len] = '\0';
-       }
-       return ret;
-}
-
-/* defined after fortified strnlen to reuse it */
-extern ssize_t __real_strscpy(char *, const char *, size_t) __RENAME(strscpy);
-__FORTIFY_INLINE ssize_t strscpy(char *p, const char *q, size_t size)
-{
-       size_t len;
-       /* Use string size rather than possible enclosing struct size. */
-       size_t p_size = __builtin_object_size(p, 1);
-       size_t q_size = __builtin_object_size(q, 1);
-
-       /* If we cannot get size of p and q default to call strscpy. */
-       if (p_size == (size_t) -1 && q_size == (size_t) -1)
-               return __real_strscpy(p, q, size);
-
-       /*
-        * If size can be known at compile time and is greater than
-        * p_size, generate a compile time write overflow error.
-        */
-       if (__builtin_constant_p(size) && size > p_size)
-               __write_overflow();
-
-       /*
-        * This call protects from read overflow, because len will default to q
-        * length if it smaller than size.
-        */
-       len = strnlen(q, size);
-       /*
-        * If len equals size, we will copy only size bytes which leads to
-        * -E2BIG being returned.
-        * Otherwise we will copy len + 1 because of the final '\O'.
-        */
-       len = len == size ? size : len + 1;
-
-       /*
-        * Generate a runtime write overflow error if len is greater than
-        * p_size.
-        */
-       if (len > p_size)
-               fortify_panic(__func__);
-
-       /*
-        * We can now safely call vanilla strscpy because we are protected from:
-        * 1. Read overflow thanks to call to strnlen().
-        * 2. Write overflow thanks to above ifs.
-        */
-       return __real_strscpy(p, q, len);
-}
-
-/* defined after fortified strlen and strnlen to reuse them */
-__FORTIFY_INLINE char *strncat(char *p, const char *q, __kernel_size_t count)
-{
-       size_t p_len, copy_len;
-       size_t p_size = __builtin_object_size(p, 1);
-       size_t q_size = __builtin_object_size(q, 1);
-       if (p_size == (size_t)-1 && q_size == (size_t)-1)
-               return __underlying_strncat(p, q, count);
-       p_len = strlen(p);
-       copy_len = strnlen(q, count);
-       if (p_size < p_len + copy_len + 1)
-               fortify_panic(__func__);
-       __underlying_memcpy(p + p_len, q, copy_len);
-       p[p_len + copy_len] = '\0';
-       return p;
-}
-
-__FORTIFY_INLINE void *memset(void *p, int c, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       if (__builtin_constant_p(size) && p_size < size)
-               __write_overflow();
-       if (p_size < size)
-               fortify_panic(__func__);
-       return __underlying_memset(p, c, size);
-}
-
-__FORTIFY_INLINE void *memcpy(void *p, const void *q, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       size_t q_size = __builtin_object_size(q, 0);
-       if (__builtin_constant_p(size)) {
-               if (p_size < size)
-                       __write_overflow();
-               if (q_size < size)
-                       __read_overflow2();
-       }
-       if (p_size < size || q_size < size)
-               fortify_panic(__func__);
-       return __underlying_memcpy(p, q, size);
-}
-
-__FORTIFY_INLINE void *memmove(void *p, const void *q, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       size_t q_size = __builtin_object_size(q, 0);
-       if (__builtin_constant_p(size)) {
-               if (p_size < size)
-                       __write_overflow();
-               if (q_size < size)
-                       __read_overflow2();
-       }
-       if (p_size < size || q_size < size)
-               fortify_panic(__func__);
-       return __underlying_memmove(p, q, size);
-}
-
-extern void *__real_memscan(void *, int, __kernel_size_t) __RENAME(memscan);
-__FORTIFY_INLINE void *memscan(void *p, int c, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       if (__builtin_constant_p(size) && p_size < size)
-               __read_overflow();
-       if (p_size < size)
-               fortify_panic(__func__);
-       return __real_memscan(p, c, size);
-}
-
-__FORTIFY_INLINE int memcmp(const void *p, const void *q, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       size_t q_size = __builtin_object_size(q, 0);
-       if (__builtin_constant_p(size)) {
-               if (p_size < size)
-                       __read_overflow();
-               if (q_size < size)
-                       __read_overflow2();
-       }
-       if (p_size < size || q_size < size)
-               fortify_panic(__func__);
-       return __underlying_memcmp(p, q, size);
-}
-
-__FORTIFY_INLINE void *memchr(const void *p, int c, __kernel_size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       if (__builtin_constant_p(size) && p_size < size)
-               __read_overflow();
-       if (p_size < size)
-               fortify_panic(__func__);
-       return __underlying_memchr(p, c, size);
-}
-
-void *__real_memchr_inv(const void *s, int c, size_t n) __RENAME(memchr_inv);
-__FORTIFY_INLINE void *memchr_inv(const void *p, int c, size_t size)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       if (__builtin_constant_p(size) && p_size < size)
-               __read_overflow();
-       if (p_size < size)
-               fortify_panic(__func__);
-       return __real_memchr_inv(p, c, size);
-}
-
-extern void *__real_kmemdup(const void *src, size_t len, gfp_t gfp) __RENAME(kmemdup);
-__FORTIFY_INLINE void *kmemdup(const void *p, size_t size, gfp_t gfp)
-{
-       size_t p_size = __builtin_object_size(p, 0);
-       if (__builtin_constant_p(size) && p_size < size)
-               __read_overflow();
-       if (p_size < size)
-               fortify_panic(__func__);
-       return __real_kmemdup(p, size, gfp);
-}
-
-/* defined after fortified strlen and memcpy to reuse them */
-__FORTIFY_INLINE char *strcpy(char *p, const char *q)
-{
-       size_t p_size = __builtin_object_size(p, 1);
-       size_t q_size = __builtin_object_size(q, 1);
-       size_t size;
-       if (p_size == (size_t)-1 && q_size == (size_t)-1)
-               return __underlying_strcpy(p, q);
-       size = strlen(q) + 1;
-       /* test here to use the more stringent object size */
-       if (p_size < size)
-               fortify_panic(__func__);
-       memcpy(p, q, size);
-       return p;
-}
-
-/* Don't use these outside the FORITFY_SOURCE implementation */
-#undef __underlying_memchr
-#undef __underlying_memcmp
-#undef __underlying_memcpy
-#undef __underlying_memmove
-#undef __underlying_memset
-#undef __underlying_strcat
-#undef __underlying_strcpy
-#undef __underlying_strlen
-#undef __underlying_strncat
-#undef __underlying_strncpy
+#include <linux/fortify-string.h>
 #endif
 
 /**
index 7c693b3..1e76ed6 100644 (file)
@@ -104,7 +104,6 @@ struct svcxprt_rdma {
 
        wait_queue_head_t    sc_send_wait;      /* SQ exhaustion waitlist */
        unsigned long        sc_flags;
-       u32                  sc_pending_recvs;
        struct list_head     sc_read_complete_q;
        struct work_struct   sc_work;
 
index 3f1f7ae..4cc6ec3 100644 (file)
@@ -356,7 +356,7 @@ extern void lru_cache_add_inactive_or_unevictable(struct page *page,
 extern unsigned long zone_reclaimable_pages(struct zone *zone);
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
                                        gfp_t gfp_mask, nodemask_t *mask);
-extern int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
+extern bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode);
 extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
                                                  unsigned long nr_pages,
                                                  gfp_t gfp_mask,
@@ -408,7 +408,11 @@ extern struct address_space *swapper_spaces[];
 #define swap_address_space(entry)                          \
        (&swapper_spaces[swp_type(entry)][swp_offset(entry) \
                >> SWAP_ADDRESS_SPACE_SHIFT])
-extern unsigned long total_swapcache_pages(void);
+static inline unsigned long total_swapcache_pages(void)
+{
+       return global_node_page_state(NR_SWAPCACHE);
+}
+
 extern void show_swap_cache_info(void);
 extern int add_to_swap(struct page *page);
 extern void *get_shadow_from_swap_cache(swp_entry_t entry);
@@ -481,6 +485,7 @@ struct backing_dev_info;
 extern int init_swap_address_space(unsigned int type, unsigned long nr_pages);
 extern void exit_swap_address_space(unsigned int type);
 extern struct swap_info_struct *get_swap_device(swp_entry_t entry);
+sector_t swap_page_sector(struct page *page);
 
 static inline void put_swap_device(struct swap_info_struct *si)
 {
index d9c9fc9..5857a93 100644 (file)
@@ -29,6 +29,7 @@ enum swiotlb_force {
  * controllable.
  */
 #define IO_TLB_SHIFT 11
+#define IO_TLB_SIZE (1 << IO_TLB_SHIFT)
 
 /* default to 64MB */
 #define IO_TLB_DEFAULT_SIZE (64UL<<20)
index f93f927..2839dc9 100644 (file)
@@ -68,6 +68,7 @@ union bpf_attr;
 struct io_uring_params;
 struct clone_args;
 struct open_how;
+struct mount_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -1028,6 +1029,9 @@ asmlinkage long sys_open_tree(int dfd, const char __user *path, unsigned flags);
 asmlinkage long sys_move_mount(int from_dfd, const char __user *from_path,
                               int to_dfd, const char __user *to_path,
                               unsigned int ms_flags);
+asmlinkage long sys_mount_setattr(int dfd, const char __user *path,
+                                 unsigned int flags,
+                                 struct mount_attr __user *uattr, size_t usize);
 asmlinkage long sys_fsopen(const char __user *fs_name, unsigned int flags);
 asmlinkage long sys_fsconfig(int fs_fd, unsigned int cmd, const char __user *key,
                             const void __user *value, int aux);
index 13770cf..6673e4d 100644 (file)
@@ -23,7 +23,7 @@ struct ts_config;
 struct ts_state
 {
        unsigned int            offset;
-       char                    cb[40];
+       char                    cb[48];
 };
 
 /**
index 9b2158c..157762d 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/types.h>
 #include <linux/bug.h>
 #include <linux/restart_block.h>
+#include <linux/errno.h>
 
 #ifdef CONFIG_THREAD_INFO_IN_TASK
 /*
@@ -59,6 +60,18 @@ enum syscall_work_bit {
 
 #ifdef __KERNEL__
 
+#ifndef arch_set_restart_data
+#define arch_set_restart_data(restart) do { } while (0)
+#endif
+
+static inline long set_restart_fn(struct restart_block *restart,
+                                       long (*fn)(struct restart_block *))
+{
+       restart->fn = fn;
+       arch_set_restart_data(restart);
+       return -ERESTART_RESTARTBLOCK;
+}
+
 #ifndef THREAD_ALIGN
 #define THREAD_ALIGN   THREAD_SIZE
 #endif
index 7077fec..28e7af1 100644 (file)
@@ -349,15 +349,8 @@ struct trace_event_call {
        struct event_filter     *filter;
        void                    *mod;
        void                    *data;
-       /*
-        *   bit 0:             filter_active
-        *   bit 1:             allow trace by non root (cap any)
-        *   bit 2:             failed to apply filter
-        *   bit 3:             trace internal event (do not enable)
-        *   bit 4:             Event was enabled by module
-        *   bit 5:             use call filter rather than file filter
-        *   bit 6:             Event is a tracepoint
-        */
+
+       /* See the TRACE_EVENT_FL_* flags above */
        int                     flags; /* static flags of different events */
 
 #ifdef CONFIG_PERF_EVENTS
index c6abb79..e81856c 100644 (file)
@@ -115,12 +115,13 @@ static inline void u64_stats_inc(u64_stats_t *p)
 }
 #endif
 
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+#define u64_stats_init(syncp)  seqcount_init(&(syncp)->seq)
+#else
 static inline void u64_stats_init(struct u64_stats_sync *syncp)
 {
-#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
-       seqcount_init(&syncp->seq);
-#endif
 }
+#endif
 
 static inline void u64_stats_update_begin(struct u64_stats_sync *syncp)
 {
index 7d72c4e..d6a4184 100644 (file)
@@ -746,6 +746,8 @@ extern int usb_lock_device_for_reset(struct usb_device *udev,
 extern int usb_reset_device(struct usb_device *dev);
 extern void usb_queue_reset_device(struct usb_interface *dev);
 
+extern struct device *usb_intf_get_dma_device(struct usb_interface *intf);
+
 #ifdef CONFIG_ACPI
 extern int usb_acpi_set_power_state(struct usb_device *hdev, int index,
        bool enable);
index 5646dad..c71150f 100644 (file)
@@ -575,8 +575,8 @@ static inline u16 get_default_bcdDevice(void)
 {
        u16 bcdDevice;
 
-       bcdDevice = bin2bcd((LINUX_VERSION_CODE >> 16 & 0xff)) << 8;
-       bcdDevice |= bin2bcd((LINUX_VERSION_CODE >> 8 & 0xff));
+       bcdDevice = bin2bcd(LINUX_VERSION_MAJOR) << 8;
+       bcdDevice |= bin2bcd(LINUX_VERSION_PATCHLEVEL);
        return bcdDevice;
 }
 
index 6b03fdd..712363c 100644 (file)
@@ -86,6 +86,8 @@
                /* lies about caching, so always sync */        \
        US_FLAG(NO_SAME, 0x40000000)                            \
                /* Cannot handle WRITE_SAME */                  \
+       US_FLAG(SENSE_AFTER_SYNC, 0x80000000)                   \
+               /* Do REQUEST_SENSE after SYNCHRONIZE_CACHE */  \
 
 #define US_FLAG(name, value)   US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
index 073a9e0..ad97041 100644 (file)
@@ -14,5 +14,6 @@ struct umd_info {
 int umd_load_blob(struct umd_info *info, const void *data, size_t len);
 int umd_unload_blob(struct umd_info *info);
 int fork_usermode_driver(struct umd_info *info);
+void umd_cleanup_helper(struct umd_info *info);
 
 #endif /* __LINUX_USERMODE_DRIVER_H__ */
index 0fefeb9..15fa085 100644 (file)
@@ -35,6 +35,8 @@ struct vdpa_vq_state {
        u16     avail_index;
 };
 
+struct vdpa_mgmt_dev;
+
 /**
  * vDPA device - representation of a vDPA device
  * @dev: underlying device
@@ -43,6 +45,8 @@ struct vdpa_vq_state {
  * @index: device index
  * @features_valid: were features initialized? for legacy guests
  * @nvqs: maximum number of supported virtqueues
+ * @mdev: management device pointer; caller must setup when registering device as part
+ *       of dev_add() mgmtdev ops callback before invoking _vdpa_register_device().
  */
 struct vdpa_device {
        struct device dev;
@@ -51,6 +55,7 @@ struct vdpa_device {
        unsigned int index;
        bool features_valid;
        int nvqs;
+       struct vdpa_mgmt_dev *mdev;
 };
 
 /**
@@ -245,20 +250,22 @@ struct vdpa_config_ops {
 
 struct vdpa_device *__vdpa_alloc_device(struct device *parent,
                                        const struct vdpa_config_ops *config,
-                                       int nvqs,
-                                       size_t size);
+                                       size_t size, const char *name);
 
-#define vdpa_alloc_device(dev_struct, member, parent, config, nvqs)   \
+#define vdpa_alloc_device(dev_struct, member, parent, config, name)   \
                          container_of(__vdpa_alloc_device( \
-                                      parent, config, nvqs, \
+                                      parent, config, \
                                       sizeof(dev_struct) + \
                                       BUILD_BUG_ON_ZERO(offsetof( \
-                                      dev_struct, member))), \
+                                      dev_struct, member)), name), \
                                       dev_struct, member)
 
-int vdpa_register_device(struct vdpa_device *vdev);
+int vdpa_register_device(struct vdpa_device *vdev, int nvqs);
 void vdpa_unregister_device(struct vdpa_device *vdev);
 
+int _vdpa_register_device(struct vdpa_device *vdev, int nvqs);
+void _vdpa_unregister_device(struct vdpa_device *vdev);
+
 /**
  * vdpa_driver - operations for a vDPA driver
  * @driver: underlying device driver
@@ -336,4 +343,33 @@ static inline void vdpa_get_config(struct vdpa_device *vdev, unsigned offset,
        ops->get_config(vdev, offset, buf, len);
 }
 
+/**
+ * vdpa_mgmtdev_ops - vdpa device ops
+ * @dev_add:   Add a vdpa device using alloc and register
+ *             @mdev: parent device to use for device addition
+ *             @name: name of the new vdpa device
+ *             Driver need to add a new device using _vdpa_register_device()
+ *             after fully initializing the vdpa device. Driver must return 0
+ *             on success or appropriate error code.
+ * @dev_del:   Remove a vdpa device using unregister
+ *             @mdev: parent device to use for device removal
+ *             @dev: vdpa device to remove
+ *             Driver need to remove the specified device by calling
+ *             _vdpa_unregister_device().
+ */
+struct vdpa_mgmtdev_ops {
+       int (*dev_add)(struct vdpa_mgmt_dev *mdev, const char *name);
+       void (*dev_del)(struct vdpa_mgmt_dev *mdev, struct vdpa_device *dev);
+};
+
+struct vdpa_mgmt_dev {
+       struct device *device;
+       const struct vdpa_mgmtdev_ops *ops;
+       const struct virtio_device_id *id_table; /* supported ids */
+       struct list_head list;
+};
+
+int vdpa_mgmtdev_register(struct vdpa_mgmt_dev *mdev);
+void vdpa_mgmtdev_unregister(struct vdpa_mgmt_dev *mdev);
+
 #endif /* _LINUX_VDPA_H */
index 911ab7c..a655923 100644 (file)
@@ -8,6 +8,8 @@
 #ifndef _LINUX_VERIFICATION_H
 #define _LINUX_VERIFICATION_H
 
+#include <linux/types.h>
+
 /*
  * Indicate that both builtin trusted keys and secondary trusted keys
  * should be used.
index f45940b..b7e18bd 100644 (file)
@@ -57,6 +57,11 @@ extern struct vfio_device *vfio_device_get_from_dev(struct device *dev);
 extern void vfio_device_put(struct vfio_device *device);
 extern void *vfio_device_data(struct vfio_device *device);
 
+/* events for the backend driver notify callback */
+enum vfio_iommu_notify_type {
+       VFIO_IOMMU_CONTAINER_CLOSE = 0,
+};
+
 /**
  * struct vfio_iommu_driver_ops - VFIO IOMMU driver callbacks
  */
@@ -92,6 +97,8 @@ struct vfio_iommu_driver_ops {
                                  void *data, size_t count, bool write);
        struct iommu_domain *(*group_iommu_domain)(void *iommu_data,
                                                   struct iommu_group *group);
+       void            (*notify)(void *iommu_data,
+                                 enum vfio_iommu_notify_type event);
 };
 
 extern int vfio_register_iommu_driver(const struct vfio_iommu_driver_ops *ops);
index 55ea329..b1894e0 100644 (file)
@@ -132,8 +132,6 @@ bool is_virtio_device(struct device *dev);
 void virtio_break_device(struct virtio_device *dev);
 
 void virtio_config_changed(struct virtio_device *dev);
-void virtio_config_disable(struct virtio_device *dev);
-void virtio_config_enable(struct virtio_device *dev);
 int virtio_finalize_features(struct virtio_device *dev);
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev);
index e8a924e..6b5fcfa 100644 (file)
@@ -79,8 +79,13 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb,
                if (gso_type && skb->network_header) {
                        struct flow_keys_basic keys;
 
-                       if (!skb->protocol)
+                       if (!skb->protocol) {
+                               __be16 protocol = dev_parse_header_protocol(skb);
+
                                virtio_net_hdr_set_proto(skb, hdr);
+                               if (protocol && protocol != skb->protocol)
+                                       return -EINVAL;
+                       }
 retry:
                        if (!skb_flow_dissect_flow_keys_basic(NULL, skb, &keys,
                                                              NULL, 0, 0, 0,
diff --git a/include/linux/virtio_pci_modern.h b/include/linux/virtio_pci_modern.h
new file mode 100644 (file)
index 0000000..f26acbe
--- /dev/null
@@ -0,0 +1,111 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_VIRTIO_PCI_MODERN_H
+#define _LINUX_VIRTIO_PCI_MODERN_H
+
+#include <linux/pci.h>
+#include <linux/virtio_pci.h>
+
+struct virtio_pci_modern_device {
+       struct pci_dev *pci_dev;
+
+       struct virtio_pci_common_cfg __iomem *common;
+       /* Device-specific data (non-legacy mode)  */
+       void __iomem *device;
+       /* Base of vq notifications (non-legacy mode). */
+       void __iomem *notify_base;
+       /* Where to read and clear interrupt */
+       u8 __iomem *isr;
+
+       /* So we can sanity-check accesses. */
+       size_t notify_len;
+       size_t device_len;
+
+       /* Capability for when we need to map notifications per-vq. */
+       int notify_map_cap;
+
+       /* Multiply queue_notify_off by this value. (non-legacy mode). */
+       u32 notify_offset_multiplier;
+
+       int modern_bars;
+
+       struct virtio_device_id id;
+};
+
+/*
+ * Type-safe wrappers for io accesses.
+ * Use these to enforce at compile time the following spec requirement:
+ *
+ * The driver MUST access each field using the “natural” access
+ * method, i.e. 32-bit accesses for 32-bit fields, 16-bit accesses
+ * for 16-bit fields and 8-bit accesses for 8-bit fields.
+ */
+static inline u8 vp_ioread8(const u8 __iomem *addr)
+{
+       return ioread8(addr);
+}
+static inline u16 vp_ioread16 (const __le16 __iomem *addr)
+{
+       return ioread16(addr);
+}
+
+static inline u32 vp_ioread32(const __le32 __iomem *addr)
+{
+       return ioread32(addr);
+}
+
+static inline void vp_iowrite8(u8 value, u8 __iomem *addr)
+{
+       iowrite8(value, addr);
+}
+
+static inline void vp_iowrite16(u16 value, __le16 __iomem *addr)
+{
+       iowrite16(value, addr);
+}
+
+static inline void vp_iowrite32(u32 value, __le32 __iomem *addr)
+{
+       iowrite32(value, addr);
+}
+
+static inline void vp_iowrite64_twopart(u64 val,
+                                       __le32 __iomem *lo,
+                                       __le32 __iomem *hi)
+{
+       vp_iowrite32((u32)val, lo);
+       vp_iowrite32(val >> 32, hi);
+}
+
+u64 vp_modern_get_features(struct virtio_pci_modern_device *mdev);
+void vp_modern_set_features(struct virtio_pci_modern_device *mdev,
+                    u64 features);
+u32 vp_modern_generation(struct virtio_pci_modern_device *mdev);
+u8 vp_modern_get_status(struct virtio_pci_modern_device *mdev);
+void vp_modern_set_status(struct virtio_pci_modern_device *mdev,
+                  u8 status);
+u16 vp_modern_queue_vector(struct virtio_pci_modern_device *mdev,
+                          u16 idx, u16 vector);
+u16 vp_modern_config_vector(struct virtio_pci_modern_device *mdev,
+                    u16 vector);
+void vp_modern_queue_address(struct virtio_pci_modern_device *mdev,
+                            u16 index, u64 desc_addr, u64 driver_addr,
+                            u64 device_addr);
+void vp_modern_set_queue_enable(struct virtio_pci_modern_device *mdev,
+                               u16 idx, bool enable);
+bool vp_modern_get_queue_enable(struct virtio_pci_modern_device *mdev,
+                               u16 idx);
+void vp_modern_set_queue_size(struct virtio_pci_modern_device *mdev,
+                             u16 idx, u16 size);
+u16 vp_modern_get_queue_size(struct virtio_pci_modern_device *mdev,
+                            u16 idx);
+u16 vp_modern_get_num_queues(struct virtio_pci_modern_device *mdev);
+u16 vp_modern_get_queue_notify_off(struct virtio_pci_modern_device *mdev,
+                                  u16 idx);
+void __iomem *vp_modern_map_capability(struct virtio_pci_modern_device *mdev, int off,
+                                      size_t minlen,
+                                      u32 align,
+                                      u32 start, u32 size,
+                                      size_t *len);
+int vp_modern_probe(struct virtio_pci_modern_device *mdev);
+void vp_modern_remove(struct virtio_pci_modern_device *mdev);
+#endif
index 7e82bf5..b204a9b 100644 (file)
@@ -122,7 +122,7 @@ struct vme_driver {
        const char *name;
        int (*match)(struct vme_dev *);
        int (*probe)(struct vme_dev *);
-       int (*remove)(struct vme_dev *);
+       void (*remove)(struct vme_dev *);
        struct device_driver driver;
        struct list_head devices;
 };
index 773135f..506d625 100644 (file)
@@ -313,6 +313,12 @@ static inline void __mod_node_page_state(struct pglist_data *pgdat,
                        enum node_stat_item item, int delta)
 {
        if (vmstat_item_in_bytes(item)) {
+               /*
+                * Only cgroups use subpage accounting right now; at
+                * the global level, these items still change in
+                * multiples of whole pages. Store them as pages
+                * internally to keep the per-cpu counters compact.
+                */
                VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
                delta >>= PAGE_SHIFT;
        }
index be0afe6..e36cb11 100644 (file)
@@ -66,7 +66,7 @@ enum {
  * consists of at least two pages, the memory limit also dictates the
  * number of queue pairs a guest can create.
  */
-#define VMCI_MAX_GUEST_QP_MEMORY (128 * 1024 * 1024)
+#define VMCI_MAX_GUEST_QP_MEMORY ((size_t)(128 * 1024 * 1024))
 #define VMCI_MAX_GUEST_QP_COUNT  (VMCI_MAX_GUEST_QP_MEMORY / PAGE_SIZE / 2)
 
 /*
@@ -80,7 +80,7 @@ enum {
  * too much kernel memory (especially on vmkernel).  We limit a queuepair to
  * 32 KB, or 16 KB per queue for symmetrical pairs.
  */
-#define VMCI_MAX_PINNED_QP_MEMORY (32 * 1024)
+#define VMCI_MAX_PINNED_QP_MEMORY ((size_t)(32 * 1024))
 
 /*
  * We have a fixed set of resource IDs available in the VMX.
index 850424e..6ecf2a0 100644 (file)
@@ -173,9 +173,10 @@ static inline void ww_acquire_done(struct ww_acquire_ctx *ctx)
  */
 static inline void ww_acquire_fini(struct ww_acquire_ctx *ctx)
 {
-#ifdef CONFIG_DEBUG_MUTEXES
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
        mutex_release(&ctx->dep_map, _THIS_IP_);
-
+#endif
+#ifdef CONFIG_DEBUG_MUTEXES
        DEBUG_LOCKS_WARN_ON(ctx->acquired);
        if (!IS_ENABLED(CONFIG_PROVE_LOCKING))
                /*
index 92c0160..a91e3d9 100644 (file)
@@ -229,9 +229,10 @@ static inline int xa_err(void *entry)
  *
  * This structure is used either directly or via the XA_LIMIT() macro
  * to communicate the range of IDs that are valid for allocation.
- * Two common ranges are predefined for you:
+ * Three common ranges are predefined for you:
  * * xa_limit_32b      - [0 - UINT_MAX]
  * * xa_limit_31b      - [0 - INT_MAX]
+ * * xa_limit_16b      - [0 - USHRT_MAX]
  */
 struct xa_limit {
        u32 max;
@@ -242,6 +243,7 @@ struct xa_limit {
 
 #define xa_limit_32b   XA_LIMIT(0, UINT_MAX)
 #define xa_limit_31b   XA_LIMIT(0, INT_MAX)
+#define xa_limit_16b   XA_LIMIT(0, USHRT_MAX)
 
 typedef unsigned __bitwise xa_mark_t;
 #define XA_MARK_0              ((__force xa_mark_t)0U)
index 10b4dc2..4c379d2 100644 (file)
@@ -16,6 +16,7 @@
 #include <linux/types.h>
 #include <linux/spinlock.h>
 #include <linux/mm.h>
+#include <linux/user_namespace.h>
 #include <uapi/linux/xattr.h>
 
 struct inode;
@@ -34,7 +35,8 @@ struct xattr_handler {
        int (*get)(const struct xattr_handler *, struct dentry *dentry,
                   struct inode *inode, const char *name, void *buffer,
                   size_t size);
-       int (*set)(const struct xattr_handler *, struct dentry *dentry,
+       int (*set)(const struct xattr_handler *,
+                  struct user_namespace *mnt_userns, struct dentry *dentry,
                   struct inode *inode, const char *name, const void *buffer,
                   size_t size, int flags);
 };
@@ -48,18 +50,26 @@ struct xattr {
 };
 
 ssize_t __vfs_getxattr(struct dentry *, struct inode *, const char *, void *, size_t);
-ssize_t vfs_getxattr(struct dentry *, const char *, void *, size_t);
+ssize_t vfs_getxattr(struct user_namespace *, struct dentry *, const char *,
+                    void *, size_t);
 ssize_t vfs_listxattr(struct dentry *d, char *list, size_t size);
-int __vfs_setxattr(struct dentry *, struct inode *, const char *, const void *, size_t, int);
-int __vfs_setxattr_noperm(struct dentry *, const char *, const void *, size_t, int);
-int __vfs_setxattr_locked(struct dentry *, const char *, const void *, size_t, int, struct inode **);
-int vfs_setxattr(struct dentry *, const char *, const void *, size_t, int);
-int __vfs_removexattr(struct dentry *, const char *);
-int __vfs_removexattr_locked(struct dentry *, const char *, struct inode **);
-int vfs_removexattr(struct dentry *, const char *);
+int __vfs_setxattr(struct user_namespace *, struct dentry *, struct inode *,
+                  const char *, const void *, size_t, int);
+int __vfs_setxattr_noperm(struct user_namespace *, struct dentry *,
+                         const char *, const void *, size_t, int);
+int __vfs_setxattr_locked(struct user_namespace *, struct dentry *,
+                         const char *, const void *, size_t, int,
+                         struct inode **);
+int vfs_setxattr(struct user_namespace *, struct dentry *, const char *,
+                const void *, size_t, int);
+int __vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
+int __vfs_removexattr_locked(struct user_namespace *, struct dentry *,
+                            const char *, struct inode **);
+int vfs_removexattr(struct user_namespace *, struct dentry *, const char *);
 
 ssize_t generic_listxattr(struct dentry *dentry, char *buffer, size_t buffer_size);
-ssize_t vfs_getxattr_alloc(struct dentry *dentry, const char *name,
+ssize_t vfs_getxattr_alloc(struct user_namespace *mnt_userns,
+                          struct dentry *dentry, const char *name,
                           char **xattr_value, size_t size, gfp_t flags);
 
 int xattr_supported_namespace(struct inode *inode, const char *prefix);
index 51bf430..e899701 100644 (file)
@@ -73,6 +73,7 @@ u64 zpool_get_total_size(struct zpool *pool);
  * @malloc:    allocate mem from a pool.
  * @free:      free mem from a pool.
  * @shrink:    shrink the pool.
+ * @sleep_mapped: whether zpool driver can sleep during map.
  * @map:       map a handle.
  * @unmap:     unmap a handle.
  * @total_size:        get total size of a pool.
@@ -100,6 +101,7 @@ struct zpool_driver {
        int (*shrink)(void *pool, unsigned int pages,
                                unsigned int *reclaimed);
 
+       bool sleep_mapped;
        void *(*map)(void *pool, unsigned long handle,
                                enum zpool_mapmode mm);
        void (*unmap)(void *pool, unsigned long handle);
@@ -112,5 +114,6 @@ void zpool_register_driver(struct zpool_driver *driver);
 int zpool_unregister_driver(struct zpool_driver *driver);
 
 bool zpool_evictable(struct zpool *pool);
+bool zpool_can_sleep_mapped(struct zpool *pool);
 
 #endif
index 4807ca4..2a430e7 100644 (file)
@@ -35,7 +35,7 @@ enum zs_mapmode {
 
 struct zs_pool_stats {
        /* How many pages were migrated (freed) */
-       unsigned long pages_compacted;
+       atomic_long_t pages_compacted;
 };
 
 struct zs_pool;
index 999b750..30f138e 100644 (file)
@@ -175,6 +175,13 @@ struct rc_map_list {
        struct rc_map map;
 };
 
+#ifdef CONFIG_MEDIA_CEC_RC
+/*
+ * rc_map_list from rc-cec.c
+ */
+extern struct rc_map_list cec_map;
+#endif
+
 /* Routines from rc-map.c */
 
 /**
index 26f134a..75b1e73 100644 (file)
@@ -550,4 +550,15 @@ static inline void skb_dst_update_pmtu_no_confirm(struct sk_buff *skb, u32 mtu)
                dst->ops->update_pmtu(dst, NULL, skb, mtu, false);
 }
 
+struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie);
+void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                              struct sk_buff *skb, u32 mtu, bool confirm_neigh);
+void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+                           struct sk_buff *skb);
+u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old);
+struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
+                                            struct sk_buff *skb,
+                                            const void *daddr);
+unsigned int dst_blackhole_mtu(const struct dst_entry *dst);
+
 #endif /* _NET_DST_H */
index 9ac2d26..fd84adc 100644 (file)
@@ -46,7 +46,11 @@ static inline void icmp_send(struct sk_buff *skb_in, int type, int code, __be32
 #if IS_ENABLED(CONFIG_NF_NAT)
 void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info);
 #else
-#define icmp_ndo_send icmp_send
+static inline void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
+{
+       struct ip_options opts = { 0 };
+       __icmp_send(skb_in, type, code, info, &opts);
+}
 #endif
 
 int icmp_rcv(struct sk_buff *skb);
index 10a6257..3c8c594 100644 (file)
@@ -282,7 +282,7 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
        return inet_csk_reqsk_queue_len(sk) >= sk->sk_max_ack_backlog;
 }
 
-void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
 void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
 
 static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
index fdec57d..5aaced6 100644 (file)
@@ -1536,6 +1536,7 @@ struct nft_trans_flowtable {
        struct nft_flowtable            *flowtable;
        bool                            update;
        struct list_head                hook_list;
+       u32                             flags;
 };
 
 #define nft_trans_flowtable(trans)     \
@@ -1544,6 +1545,8 @@ struct nft_trans_flowtable {
        (((struct nft_trans_flowtable *)trans->data)->update)
 #define nft_trans_flowtable_hooks(trans)       \
        (((struct nft_trans_flowtable *)trans->data)->hook_list)
+#define nft_trans_flowtable_flags(trans)       \
+       (((struct nft_trans_flowtable *)trans->data)->flags)
 
 int __init nft_chain_filter_init(void);
 void nft_chain_filter_fini(void);
index 7bc057a..a10a319 100644 (file)
@@ -410,6 +410,7 @@ static inline struct fib_nh *fib_info_nh(struct fib_info *fi, int nhsel)
 int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
                       struct netlink_ext_ack *extack);
 
+/* Caller should either hold rcu_read_lock(), or RTNL. */
 static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
 {
        struct nh_info *nhi;
@@ -430,6 +431,29 @@ static inline struct fib6_nh *nexthop_fib6_nh(struct nexthop *nh)
        return NULL;
 }
 
+/* Variant of nexthop_fib6_nh().
+ * Caller should either hold rcu_read_lock_bh(), or RTNL.
+ */
+static inline struct fib6_nh *nexthop_fib6_nh_bh(struct nexthop *nh)
+{
+       struct nh_info *nhi;
+
+       if (nh->is_group) {
+               struct nh_group *nh_grp;
+
+               nh_grp = rcu_dereference_bh_rtnl(nh->nh_grp);
+               nh = nexthop_mpath_select(nh_grp, 0);
+               if (!nh)
+                       return NULL;
+       }
+
+       nhi = rcu_dereference_bh_rtnl(nh->nh_info);
+       if (nhi->family == AF_INET6)
+               return &nhi->fib6_nh;
+
+       return NULL;
+}
+
 static inline struct net_device *fib6_info_nh_dev(struct fib6_info *f6i)
 {
        struct fib6_nh *fib6_nh;
index 932f0d7..0b39eff 100644 (file)
@@ -168,7 +168,8 @@ static inline void red_set_vars(struct red_vars *v)
        v->qcount       = -1;
 }
 
-static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_log)
+static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog,
+                                   u8 Scell_log, u8 *stab)
 {
        if (fls(qth_min) + Wlog > 32)
                return false;
@@ -178,6 +179,13 @@ static inline bool red_check_params(u32 qth_min, u32 qth_max, u8 Wlog, u8 Scell_
                return false;
        if (qth_max < qth_min)
                return false;
+       if (stab) {
+               int i;
+
+               for (i = 0; i < RED_STAB_SIZE; i++)
+                       if (stab[i] >= 32)
+                               return false;
+       }
        return true;
 }
 
@@ -287,7 +295,7 @@ static inline unsigned long red_calc_qavg_from_idle_time(const struct red_parms
        int  shift;
 
        /*
-        * The problem: ideally, average length queue recalcultion should
+        * The problem: ideally, average length queue recalculation should
         * be done over constant clock intervals. This is too expensive, so
         * that the calculation is driven by outgoing packets.
         * When the queue is idle we have to model this clock by hand.
index e2091bb..4da61c9 100644 (file)
@@ -33,6 +33,7 @@ static inline int rtnl_msg_family(const struct nlmsghdr *nlh)
  *
  *     @list: Used internally
  *     @kind: Identifier
+ *     @netns_refund: Physical device, move to init_net on netns exit
  *     @maxtype: Highest device specific netlink attribute number
  *     @policy: Netlink policy for device specific attribute validation
  *     @validate: Optional validation function for netlink/changelink parameters
@@ -64,6 +65,7 @@ struct rtnl_link_ops {
        size_t                  priv_size;
        void                    (*setup)(struct net_device *dev);
 
+       bool                    netns_refund;
        unsigned int            maxtype;
        const struct nla_policy *policy;
        int                     (*validate)(struct nlattr *tb[],
index 636810d..0b6266f 100644 (file)
@@ -936,7 +936,7 @@ static inline void sk_acceptq_added(struct sock *sk)
 
 static inline bool sk_acceptq_is_full(const struct sock *sk)
 {
-       return READ_ONCE(sk->sk_ack_backlog) > READ_ONCE(sk->sk_max_ack_backlog);
+       return READ_ONCE(sk->sk_ack_backlog) >= READ_ONCE(sk->sk_max_ack_backlog);
 }
 
 /*
index b3bbd10..02f966e 100644 (file)
@@ -187,7 +187,7 @@ struct iscsi_conn {
        struct iscsi_task       *task;          /* xmit task in progress */
 
        /* xmit */
-       spinlock_t              taskqueuelock;  /* protects the next three lists */
+       /* items must be added/deleted under frwd lock */
        struct list_head        mgmtqueue;      /* mgmt (control) xmit queue */
        struct list_head        cmdqueue;       /* data-path cmd queue */
        struct list_head        requeue;        /* tasks needing another run */
@@ -332,7 +332,7 @@ struct iscsi_session {
                                                 * cmdsn, queued_cmdsn     *
                                                 * session resources:      *
                                                 * - cmdpool kfifo_out ,   *
-                                                * - mgmtpool,             */
+                                                * - mgmtpool, queues      */
        spinlock_t              back_lock;      /* protects cmdsn_exp      *
                                                 * cmdsn_max,              *
                                                 * cmdpool kfifo_in        */
@@ -395,6 +395,8 @@ extern struct Scsi_Host *iscsi_host_alloc(struct scsi_host_template *sht,
 extern void iscsi_host_remove(struct Scsi_Host *shost);
 extern void iscsi_host_free(struct Scsi_Host *shost);
 extern int iscsi_target_alloc(struct scsi_target *starget);
+extern int iscsi_host_get_max_scsi_cmds(struct Scsi_Host *shost,
+                                       uint16_t requested_cmds_max);
 
 /*
  * session management
index 8a26a2f..fc5a398 100644 (file)
@@ -193,6 +193,7 @@ enum iscsi_connection_state {
        ISCSI_CONN_UP = 0,
        ISCSI_CONN_DOWN,
        ISCSI_CONN_FAILED,
+       ISCSI_CONN_BOUND,
 };
 
 struct iscsi_cls_conn {
diff --git a/include/soc/canaan/k210-sysctl.h b/include/soc/canaan/k210-sysctl.h
new file mode 100644 (file)
index 0000000..0c2b2c2
--- /dev/null
@@ -0,0 +1,43 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/*
+ * Copyright (C) 2019-20 Sean Anderson <seanga2@gmail.com>
+ * Copyright (c) 2020 Western Digital Corporation or its affiliates.
+ */
+#ifndef K210_SYSCTL_H
+#define K210_SYSCTL_H
+
+/*
+ * Kendryte K210 SoC system controller registers offsets.
+ * Taken from Kendryte SDK (kendryte-standalone-sdk).
+ */
+#define K210_SYSCTL_GIT_ID     0x00 /* Git short commit id */
+#define K210_SYSCTL_UART_BAUD  0x04 /* Default UARTHS baud rate */
+#define K210_SYSCTL_PLL0       0x08 /* PLL0 controller */
+#define K210_SYSCTL_PLL1       0x0C /* PLL1 controller */
+#define K210_SYSCTL_PLL2       0x10 /* PLL2 controller */
+#define K210_SYSCTL_PLL_LOCK   0x18 /* PLL lock tester */
+#define K210_SYSCTL_ROM_ERROR  0x1C /* AXI ROM detector */
+#define K210_SYSCTL_SEL0       0x20 /* Clock select controller 0 */
+#define K210_SYSCTL_SEL1       0x24 /* Clock select controller 1 */
+#define K210_SYSCTL_EN_CENT    0x28 /* Central clock enable */
+#define K210_SYSCTL_EN_PERI    0x2C /* Peripheral clock enable */
+#define K210_SYSCTL_SOFT_RESET 0x30 /* Soft reset ctrl */
+#define K210_SYSCTL_PERI_RESET 0x34 /* Peripheral reset controller */
+#define K210_SYSCTL_THR0       0x38 /* Clock threshold controller 0 */
+#define K210_SYSCTL_THR1       0x3C /* Clock threshold controller 1 */
+#define K210_SYSCTL_THR2       0x40 /* Clock threshold controller 2 */
+#define K210_SYSCTL_THR3       0x44 /* Clock threshold controller 3 */
+#define K210_SYSCTL_THR4       0x48 /* Clock threshold controller 4 */
+#define K210_SYSCTL_THR5       0x4C /* Clock threshold controller 5 */
+#define K210_SYSCTL_THR6       0x50 /* Clock threshold controller 6 */
+#define K210_SYSCTL_MISC       0x54 /* Miscellaneous controller */
+#define K210_SYSCTL_PERI       0x58 /* Peripheral controller */
+#define K210_SYSCTL_SPI_SLEEP  0x5C /* SPI sleep controller */
+#define K210_SYSCTL_RESET_STAT 0x60 /* Reset source status */
+#define K210_SYSCTL_DMA_SEL0   0x64 /* DMA handshake selector 0 */
+#define K210_SYSCTL_DMA_SEL1   0x68 /* DMA handshake selector 1 */
+#define K210_SYSCTL_POWER_SEL  0x6C /* IO Power Mode Select controller */
+
+void k210_clk_early_init(void __iomem *regs);
+
+#endif
index 743c2f4..d057480 100644 (file)
@@ -113,6 +113,11 @@ struct nhlt_vendor_dmic_array_config {
 } __packed;
 
 enum {
+       NHLT_CONFIG_TYPE_GENERIC = 0,
+       NHLT_CONFIG_TYPE_MIC_ARRAY = 1
+};
+
+enum {
        NHLT_MIC_ARRAY_2CH_SMALL = 0xa,
        NHLT_MIC_ARRAY_2CH_BIG = 0xb,
        NHLT_MIC_ARRAY_4CH_1ST_GEOM = 0xc,
index 9a43c44..c450750 100644 (file)
@@ -174,7 +174,7 @@ struct snd_soc_acpi_codecs {
 static inline bool snd_soc_acpi_sof_parent(struct device *dev)
 {
        return dev->parent && dev->parent->driver && dev->parent->driver->name &&
-               !strcmp(dev->parent->driver->name, "sof-audio-acpi");
+               !strncmp(dev->parent->driver->name, "sof-audio-acpi", strlen("sof-audio-acpi"));
 }
 
 #endif
index 6336780..ce2fba4 100644 (file)
@@ -72,6 +72,7 @@ int   transport_backend_register(const struct target_backend_ops *);
 void   target_backend_unregister(const struct target_backend_ops *);
 
 void   target_complete_cmd(struct se_cmd *, u8);
+void   target_set_cmd_data_length(struct se_cmd *, int);
 void   target_complete_cmd_with_length(struct se_cmd *, u8, int);
 
 void   transport_copy_sense_to_cmd(struct se_cmd *, unsigned char *);
index e41c611..899fdac 100644 (file)
@@ -28,7 +28,7 @@ DECLARE_EVENT_CLASS(bcache_request,
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->orig_sector    = bio->bi_iter.bi_sector - 16;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
        ),
 
        TP_printk("%d,%d %s %llu + %u (from %d,%d @ %llu)",
@@ -102,7 +102,7 @@ DECLARE_EVENT_CLASS(bcache_bio,
                __entry->dev            = bio_dev(bio);
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
        ),
 
        TP_printk("%d,%d  %s %llu + %u",
@@ -137,7 +137,7 @@ TRACE_EVENT(bcache_read,
                __entry->dev            = bio_dev(bio);
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
                __entry->cache_hit = hit;
                __entry->bypass = bypass;
        ),
@@ -168,7 +168,7 @@ TRACE_EVENT(bcache_write,
                __entry->inode          = inode;
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
                __entry->writeback = writeback;
                __entry->bypass = bypass;
        ),
@@ -238,7 +238,7 @@ TRACE_EVENT(bcache_journal_write,
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio->bi_iter.bi_size >> 9;
                __entry->nr_keys        = keys;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
        ),
 
        TP_printk("%d,%d  %s %llu + %u keys %u",
index 0d78266..cc5ab96 100644 (file)
@@ -89,7 +89,7 @@ TRACE_EVENT(block_rq_requeue,
                __entry->sector    = blk_rq_trace_sector(rq);
                __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
 
-               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
                __get_str(cmd)[0] = '\0';
        ),
 
@@ -133,7 +133,7 @@ TRACE_EVENT(block_rq_complete,
                __entry->nr_sector = nr_bytes >> 9;
                __entry->error     = error;
 
-               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, nr_bytes);
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
                __get_str(cmd)[0] = '\0';
        ),
 
@@ -166,7 +166,7 @@ DECLARE_EVENT_CLASS(block_rq,
                __entry->nr_sector = blk_rq_trace_nr_sectors(rq);
                __entry->bytes     = blk_rq_bytes(rq);
 
-               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
                __get_str(cmd)[0] = '\0';
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
@@ -196,7 +196,7 @@ DEFINE_EVENT(block_rq, block_rq_insert,
 
 /**
  * block_rq_issue - issue pending block IO request operation to device driver
- * @rq: block IO operation operation request
+ * @rq: block IO operation request
  *
  * Called when block operation request @rq from queue @q is sent to a
  * device driver for processing.
@@ -210,7 +210,7 @@ DEFINE_EVENT(block_rq, block_rq_issue,
 
 /**
  * block_rq_merge - merge request with another one in the elevator
- * @rq: block IO operation operation request
+ * @rq: block IO operation request
  *
  * Called when block operation request @rq from queue @q is merged to another
  * request queued in the elevator.
@@ -249,7 +249,7 @@ TRACE_EVENT(block_bio_complete,
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio_sectors(bio);
                __entry->error          = blk_status_to_errno(bio->bi_status);
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
        ),
 
        TP_printk("%d,%d %s %llu + %u [%d]",
@@ -276,7 +276,7 @@ DECLARE_EVENT_CLASS(block_bio,
                __entry->dev            = bio_dev(bio);
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->nr_sector      = bio_sectors(bio);
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
@@ -433,7 +433,7 @@ TRACE_EVENT(block_split,
                __entry->dev            = bio_dev(bio);
                __entry->sector         = bio->bi_iter.bi_sector;
                __entry->new_sector     = new_sector;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
                memcpy(__entry->comm, current->comm, TASK_COMM_LEN);
        ),
 
@@ -474,7 +474,7 @@ TRACE_EVENT(block_bio_remap,
                __entry->nr_sector      = bio_sectors(bio);
                __entry->old_dev        = dev;
                __entry->old_sector     = from;
-               blk_fill_rwbs(__entry->rwbs, bio->bi_opf, bio->bi_iter.bi_size);
+               blk_fill_rwbs(__entry->rwbs, bio->bi_opf);
        ),
 
        TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu",
@@ -518,7 +518,7 @@ TRACE_EVENT(block_rq_remap,
                __entry->old_dev        = dev;
                __entry->old_sector     = from;
                __entry->nr_bios        = blk_rq_count_bios(rq);
-               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags, blk_rq_bytes(rq));
+               blk_fill_rwbs(__entry->rwbs, rq->cmd_flags);
        ),
 
        TP_printk("%d,%d %s %llu + %u <- (%d,%d) %llu %u",
diff --git a/include/trace/events/error_report.h b/include/trace/events/error_report.h
new file mode 100644 (file)
index 0000000..96f64bf
--- /dev/null
@@ -0,0 +1,74 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Declarations for error reporting tracepoints.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM error_report
+
+#if !defined(_TRACE_ERROR_REPORT_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_ERROR_REPORT_H
+
+#include <linux/tracepoint.h>
+
+#ifndef __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY
+#define __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY
+
+enum error_detector {
+       ERROR_DETECTOR_KFENCE,
+       ERROR_DETECTOR_KASAN
+};
+
+#endif /* __ERROR_REPORT_DECLARE_TRACE_ENUMS_ONCE_ONLY */
+
+#define error_detector_list    \
+       EM(ERROR_DETECTOR_KFENCE, "kfence")     \
+       EMe(ERROR_DETECTOR_KASAN, "kasan")
+/* Always end the list with an EMe. */
+
+#undef EM
+#undef EMe
+
+#define EM(a, b)       TRACE_DEFINE_ENUM(a);
+#define EMe(a, b)      TRACE_DEFINE_ENUM(a);
+
+error_detector_list
+
+#undef EM
+#undef EMe
+
+#define EM(a, b) { a, b },
+#define EMe(a, b) { a, b }
+
+#define show_error_detector_list(val) \
+       __print_symbolic(val, error_detector_list)
+
+DECLARE_EVENT_CLASS(error_report_template,
+                   TP_PROTO(enum error_detector error_detector, unsigned long id),
+                   TP_ARGS(error_detector, id),
+                   TP_STRUCT__entry(__field(enum error_detector, error_detector)
+                                            __field(unsigned long, id)),
+                   TP_fast_assign(__entry->error_detector = error_detector;
+                                  __entry->id = id;),
+                   TP_printk("[%s] %lx",
+                             show_error_detector_list(__entry->error_detector),
+                             __entry->id));
+
+/**
+ * error_report_end - called after printing the error report
+ * @error_detector:    short string describing the error detection tool
+ * @id:                        pseudo-unique descriptor identifying the report
+ *                     (e.g. the memory access address)
+ *
+ * This event occurs right after a debugging tool finishes printing the error
+ * report.
+ */
+DEFINE_EVENT(error_report_template, error_report_end,
+            TP_PROTO(enum error_detector error_detector, unsigned long id),
+            TP_ARGS(error_detector, id));
+
+#endif /* _TRACE_ERROR_REPORT_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
index f65b1f6..3a60b6b 100644 (file)
@@ -115,7 +115,7 @@ DEFINE_EVENT(kmem_alloc_node, kmem_cache_alloc_node,
        TP_ARGS(call_site, ptr, bytes_req, bytes_alloc, gfp_flags, node)
 );
 
-DECLARE_EVENT_CLASS(kmem_free,
+TRACE_EVENT(kfree,
 
        TP_PROTO(unsigned long call_site, const void *ptr),
 
@@ -135,18 +135,26 @@ DECLARE_EVENT_CLASS(kmem_free,
                  (void *)__entry->call_site, __entry->ptr)
 );
 
-DEFINE_EVENT(kmem_free, kfree,
+TRACE_EVENT(kmem_cache_free,
 
-       TP_PROTO(unsigned long call_site, const void *ptr),
+       TP_PROTO(unsigned long call_site, const void *ptr, const char *name),
 
-       TP_ARGS(call_site, ptr)
-);
+       TP_ARGS(call_site, ptr, name),
 
-DEFINE_EVENT(kmem_free, kmem_cache_free,
+       TP_STRUCT__entry(
+               __field(        unsigned long,  call_site       )
+               __field(        const void *,   ptr             )
+               __string(       name,   name    )
+       ),
 
-       TP_PROTO(unsigned long call_site, const void *ptr),
+       TP_fast_assign(
+               __entry->call_site      = call_site;
+               __entry->ptr            = ptr;
+               __assign_str(name, name);
+       ),
 
-       TP_ARGS(call_site, ptr)
+       TP_printk("call_site=%pS ptr=%p name=%s",
+                 (void *)__entry->call_site, __entry->ptr, __get_str(name))
 );
 
 TRACE_EVENT(mm_page_free,
index 8fd1bab..e1735fe 100644 (file)
 
 TRACE_EVENT(mm_lru_insertion,
 
-       TP_PROTO(
-               struct page *page,
-               int lru
-       ),
+       TP_PROTO(struct page *page),
 
-       TP_ARGS(page, lru),
+       TP_ARGS(page),
 
        TP_STRUCT__entry(
                __field(struct page *,  page    )
                __field(unsigned long,  pfn     )
-               __field(int,            lru     )
+               __field(enum lru_list,  lru     )
                __field(unsigned long,  flags   )
        ),
 
        TP_fast_assign(
                __entry->page   = page;
                __entry->pfn    = page_to_pfn(page);
-               __entry->lru    = lru;
+               __entry->lru    = page_lru(page);
                __entry->flags  = trace_pagemap_flags(page);
        ),
 
index 76e85e1..c838e7a 100644 (file)
@@ -60,6 +60,51 @@ DECLARE_EVENT_CLASS(rpcrdma_completion_class,
                                ),                                      \
                                TP_ARGS(wc, cid))
 
+DECLARE_EVENT_CLASS(rpcrdma_receive_completion_class,
+       TP_PROTO(
+               const struct ib_wc *wc,
+               const struct rpc_rdma_cid *cid
+       ),
+
+       TP_ARGS(wc, cid),
+
+       TP_STRUCT__entry(
+               __field(u32, cq_id)
+               __field(int, completion_id)
+               __field(u32, received)
+               __field(unsigned long, status)
+               __field(unsigned int, vendor_err)
+       ),
+
+       TP_fast_assign(
+               __entry->cq_id = cid->ci_queue_id;
+               __entry->completion_id = cid->ci_completion_id;
+               __entry->status = wc->status;
+               if (wc->status) {
+                       __entry->received = 0;
+                       __entry->vendor_err = wc->vendor_err;
+               } else {
+                       __entry->received = wc->byte_len;
+                       __entry->vendor_err = 0;
+               }
+       ),
+
+       TP_printk("cq.id=%u cid=%d status=%s (%lu/0x%x) received=%u",
+               __entry->cq_id, __entry->completion_id,
+               rdma_show_wc_status(__entry->status),
+               __entry->status, __entry->vendor_err,
+               __entry->received
+       )
+);
+
+#define DEFINE_RECEIVE_COMPLETION_EVENT(name)                          \
+               DEFINE_EVENT(rpcrdma_receive_completion_class, name,    \
+                               TP_PROTO(                               \
+                                       const struct ib_wc *wc,         \
+                                       const struct rpc_rdma_cid *cid  \
+                               ),                                      \
+                               TP_ARGS(wc, cid))
+
 DECLARE_EVENT_CLASS(xprtrdma_reply_class,
        TP_PROTO(
                const struct rpcrdma_rep *rep
@@ -838,7 +883,8 @@ TRACE_EVENT(xprtrdma_post_linv_err,
  ** Completion events
  **/
 
-DEFINE_COMPLETION_EVENT(xprtrdma_wc_receive);
+DEFINE_RECEIVE_COMPLETION_EVENT(xprtrdma_wc_receive);
+
 DEFINE_COMPLETION_EVENT(xprtrdma_wc_send);
 DEFINE_COMPLETION_EVENT(xprtrdma_wc_fastreg);
 DEFINE_COMPLETION_EVENT(xprtrdma_wc_li);
@@ -1790,7 +1836,7 @@ TRACE_EVENT(svcrdma_post_recv,
        )
 );
 
-DEFINE_COMPLETION_EVENT(svcrdma_wc_receive);
+DEFINE_RECEIVE_COMPLETION_EVENT(svcrdma_wc_receive);
 
 TRACE_EVENT(svcrdma_rq_post_err,
        TP_PROTO(
index 970cc2e..6154a2e 100644 (file)
@@ -30,7 +30,7 @@ TRACE_EVENT(workqueue_queue_work,
        TP_STRUCT__entry(
                __field( void *,        work    )
                __field( void *,        function)
-               __field( const char *,  workqueue)
+               __string( workqueue,    pwq->wq->name)
                __field( unsigned int,  req_cpu )
                __field( unsigned int,  cpu     )
        ),
@@ -38,13 +38,13 @@ TRACE_EVENT(workqueue_queue_work,
        TP_fast_assign(
                __entry->work           = work;
                __entry->function       = work->func;
-               __entry->workqueue      = pwq->wq->name;
+               __assign_str(workqueue, pwq->wq->name);
                __entry->req_cpu        = req_cpu;
                __entry->cpu            = pwq->pool->cpu;
        ),
 
        TP_printk("work struct=%p function=%ps workqueue=%s req_cpu=%u cpu=%u",
-                 __entry->work, __entry->function, __entry->workqueue,
+                 __entry->work, __entry->function, __get_str(workqueue),
                  __entry->req_cpu, __entry->cpu)
 );
 
index 7287529..ce58cff 100644 (file)
@@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
 #define __NR_epoll_pwait2 441
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
 
 #undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443
 
 /*
  * 32 bit systems traditionally used different
diff --git a/include/uapi/linux/acrn.h b/include/uapi/linux/acrn.h
new file mode 100644 (file)
index 0000000..353b2a2
--- /dev/null
@@ -0,0 +1,580 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Userspace interface for /dev/acrn_hsm - ACRN Hypervisor Service Module
+ *
+ * This file can be used by applications that need to communicate with the HSM
+ * via the ioctl interface.
+ *
+ * Copyright (C) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef _UAPI_ACRN_H
+#define _UAPI_ACRN_H
+
+#include <linux/types.h>
+#include <linux/uuid.h>
+
+#define ACRN_IO_REQUEST_MAX            16
+
+#define ACRN_IOREQ_STATE_PENDING       0
+#define ACRN_IOREQ_STATE_COMPLETE      1
+#define ACRN_IOREQ_STATE_PROCESSING    2
+#define ACRN_IOREQ_STATE_FREE          3
+
+#define ACRN_IOREQ_TYPE_PORTIO         0
+#define ACRN_IOREQ_TYPE_MMIO           1
+#define ACRN_IOREQ_TYPE_PCICFG         2
+
+#define ACRN_IOREQ_DIR_READ            0
+#define ACRN_IOREQ_DIR_WRITE           1
+
+/**
+ * struct acrn_mmio_request - Info of a MMIO I/O request
+ * @direction: Access direction of this request (ACRN_IOREQ_DIR_*)
+ * @reserved:  Reserved for alignment and should be 0
+ * @address:   Access address of this MMIO I/O request
+ * @size:      Access size of this MMIO I/O request
+ * @value:     Read/write value of this MMIO I/O request
+ */
+struct acrn_mmio_request {
+       __u32   direction;
+       __u32   reserved;
+       __u64   address;
+       __u64   size;
+       __u64   value;
+};
+
+/**
+ * struct acrn_pio_request - Info of a PIO I/O request
+ * @direction: Access direction of this request (ACRN_IOREQ_DIR_*)
+ * @reserved:  Reserved for alignment and should be 0
+ * @address:   Access address of this PIO I/O request
+ * @size:      Access size of this PIO I/O request
+ * @value:     Read/write value of this PIO I/O request
+ */
+struct acrn_pio_request {
+       __u32   direction;
+       __u32   reserved;
+       __u64   address;
+       __u64   size;
+       __u32   value;
+};
+
+/**
+ * struct acrn_pci_request - Info of a PCI I/O request
+ * @direction: Access direction of this request (ACRN_IOREQ_DIR_*)
+ * @reserved:  Reserved for alignment and should be 0
+ * @size:      Access size of this PCI I/O request
+ * @value:     Read/write value of this PIO I/O request
+ * @bus:       PCI bus value of this PCI I/O request
+ * @dev:       PCI device value of this PCI I/O request
+ * @func:      PCI function value of this PCI I/O request
+ * @reg:       PCI config space offset of this PCI I/O request
+ *
+ * Need keep same header layout with &struct acrn_pio_request.
+ */
+struct acrn_pci_request {
+       __u32   direction;
+       __u32   reserved[3];
+       __u64   size;
+       __u32   value;
+       __u32   bus;
+       __u32   dev;
+       __u32   func;
+       __u32   reg;
+};
+
+/**
+ * struct acrn_io_request - 256-byte ACRN I/O request
+ * @type:              Type of this request (ACRN_IOREQ_TYPE_*).
+ * @completion_polling:        Polling flag. Hypervisor will poll completion of the
+ *                     I/O request if this flag set.
+ * @reserved0:         Reserved fields.
+ * @reqs:              Union of different types of request. Byte offset: 64.
+ * @reqs.pio_request:  PIO request data of the I/O request.
+ * @reqs.pci_request:  PCI configuration space request data of the I/O request.
+ * @reqs.mmio_request: MMIO request data of the I/O request.
+ * @reqs.data:         Raw data of the I/O request.
+ * @reserved1:         Reserved fields.
+ * @kernel_handled:    Flag indicates this request need be handled in kernel.
+ * @processed:         The status of this request (ACRN_IOREQ_STATE_*).
+ *
+ * The state transitions of ACRN I/O request:
+ *
+ *    FREE -> PENDING -> PROCESSING -> COMPLETE -> FREE -> ...
+ *
+ * An I/O request in COMPLETE or FREE state is owned by the hypervisor. HSM and
+ * ACRN userspace are in charge of processing the others.
+ *
+ * On basis of the states illustrated above, a typical lifecycle of ACRN IO
+ * request would look like:
+ *
+ * Flow                 (assume the initial state is FREE)
+ * |
+ * |   Service VM vCPU 0     Service VM vCPU x      User vCPU y
+ * |
+ * |                                             hypervisor:
+ * |                                               fills in type, addr, etc.
+ * |                                               pauses the User VM vCPU y
+ * |                                               sets the state to PENDING (a)
+ * |                                               fires an upcall to Service VM
+ * |
+ * | HSM:
+ * |  scans for PENDING requests
+ * |  sets the states to PROCESSING (b)
+ * |  assigns the requests to clients (c)
+ * V
+ * |                     client:
+ * |                       scans for the assigned requests
+ * |                       handles the requests (d)
+ * |                     HSM:
+ * |                       sets states to COMPLETE
+ * |                       notifies the hypervisor
+ * |
+ * |                     hypervisor:
+ * |                       resumes User VM vCPU y (e)
+ * |
+ * |                                             hypervisor:
+ * |                                               post handling (f)
+ * V                                               sets states to FREE
+ *
+ * Note that the procedures (a) to (f) in the illustration above require to be
+ * strictly processed in the order.  One vCPU cannot trigger another request of
+ * I/O emulation before completing the previous one.
+ *
+ * Atomic and barriers are required when HSM and hypervisor accessing the state
+ * of &struct acrn_io_request.
+ *
+ */
+struct acrn_io_request {
+       __u32   type;
+       __u32   completion_polling;
+       __u32   reserved0[14];
+       union {
+               struct acrn_pio_request         pio_request;
+               struct acrn_pci_request         pci_request;
+               struct acrn_mmio_request        mmio_request;
+               __u64                           data[8];
+       } reqs;
+       __u32   reserved1;
+       __u32   kernel_handled;
+       __u32   processed;
+} __attribute__((aligned(256)));
+
+struct acrn_io_request_buffer {
+       union {
+               struct acrn_io_request  req_slot[ACRN_IO_REQUEST_MAX];
+               __u8                    reserved[4096];
+       };
+};
+
+/**
+ * struct acrn_ioreq_notify - The structure of ioreq completion notification
+ * @vmid:      User VM ID
+ * @reserved:  Reserved and should be 0
+ * @vcpu:      vCPU ID
+ */
+struct acrn_ioreq_notify {
+       __u16   vmid;
+       __u16   reserved;
+       __u32   vcpu;
+};
+
+/**
+ * struct acrn_vm_creation - Info to create a User VM
+ * @vmid:              User VM ID returned from the hypervisor
+ * @reserved0:         Reserved and must be 0
+ * @vcpu_num:          Number of vCPU in the VM. Return from hypervisor.
+ * @reserved1:         Reserved and must be 0
+ * @uuid:              UUID of the VM. Pass to hypervisor directly.
+ * @vm_flag:           Flag of the VM creating. Pass to hypervisor directly.
+ * @ioreq_buf:         Service VM GPA of I/O request buffer. Pass to
+ *                     hypervisor directly.
+ * @cpu_affinity:      CPU affinity of the VM. Pass to hypervisor directly.
+ *                     It's a bitmap which indicates CPUs used by the VM.
+ */
+struct acrn_vm_creation {
+       __u16   vmid;
+       __u16   reserved0;
+       __u16   vcpu_num;
+       __u16   reserved1;
+       guid_t  uuid;
+       __u64   vm_flag;
+       __u64   ioreq_buf;
+       __u64   cpu_affinity;
+};
+
+/**
+ * struct acrn_gp_regs - General registers of a User VM
+ * @rax:       Value of register RAX
+ * @rcx:       Value of register RCX
+ * @rdx:       Value of register RDX
+ * @rbx:       Value of register RBX
+ * @rsp:       Value of register RSP
+ * @rbp:       Value of register RBP
+ * @rsi:       Value of register RSI
+ * @rdi:       Value of register RDI
+ * @r8:                Value of register R8
+ * @r9:                Value of register R9
+ * @r10:       Value of register R10
+ * @r11:       Value of register R11
+ * @r12:       Value of register R12
+ * @r13:       Value of register R13
+ * @r14:       Value of register R14
+ * @r15:       Value of register R15
+ */
+struct acrn_gp_regs {
+       __le64  rax;
+       __le64  rcx;
+       __le64  rdx;
+       __le64  rbx;
+       __le64  rsp;
+       __le64  rbp;
+       __le64  rsi;
+       __le64  rdi;
+       __le64  r8;
+       __le64  r9;
+       __le64  r10;
+       __le64  r11;
+       __le64  r12;
+       __le64  r13;
+       __le64  r14;
+       __le64  r15;
+};
+
+/**
+ * struct acrn_descriptor_ptr - Segment descriptor table of a User VM.
+ * @limit:     Limit field.
+ * @base:      Base field.
+ * @reserved:  Reserved and must be 0.
+ */
+struct acrn_descriptor_ptr {
+       __le16  limit;
+       __le64  base;
+       __le16  reserved[3];
+} __attribute__ ((__packed__));
+
+/**
+ * struct acrn_regs - Registers structure of a User VM
+ * @gprs:              General registers
+ * @gdt:               Global Descriptor Table
+ * @idt:               Interrupt Descriptor Table
+ * @rip:               Value of register RIP
+ * @cs_base:           Base of code segment selector
+ * @cr0:               Value of register CR0
+ * @cr4:               Value of register CR4
+ * @cr3:               Value of register CR3
+ * @ia32_efer:         Value of IA32_EFER MSR
+ * @rflags:            Value of regsiter RFLAGS
+ * @reserved_64:       Reserved and must be 0
+ * @cs_ar:             Attribute field of code segment selector
+ * @cs_limit:          Limit field of code segment selector
+ * @reserved_32:       Reserved and must be 0
+ * @cs_sel:            Value of code segment selector
+ * @ss_sel:            Value of stack segment selector
+ * @ds_sel:            Value of data segment selector
+ * @es_sel:            Value of extra segment selector
+ * @fs_sel:            Value of FS selector
+ * @gs_sel:            Value of GS selector
+ * @ldt_sel:           Value of LDT descriptor selector
+ * @tr_sel:            Value of TSS descriptor selector
+ */
+struct acrn_regs {
+       struct acrn_gp_regs             gprs;
+       struct acrn_descriptor_ptr      gdt;
+       struct acrn_descriptor_ptr      idt;
+
+       __le64                          rip;
+       __le64                          cs_base;
+       __le64                          cr0;
+       __le64                          cr4;
+       __le64                          cr3;
+       __le64                          ia32_efer;
+       __le64                          rflags;
+       __le64                          reserved_64[4];
+
+       __le32                          cs_ar;
+       __le32                          cs_limit;
+       __le32                          reserved_32[3];
+
+       __le16                          cs_sel;
+       __le16                          ss_sel;
+       __le16                          ds_sel;
+       __le16                          es_sel;
+       __le16                          fs_sel;
+       __le16                          gs_sel;
+       __le16                          ldt_sel;
+       __le16                          tr_sel;
+};
+
+/**
+ * struct acrn_vcpu_regs - Info of vCPU registers state
+ * @vcpu_id:   vCPU ID
+ * @reserved:  Reserved and must be 0
+ * @vcpu_regs: vCPU registers state
+ *
+ * This structure will be passed to hypervisor directly.
+ */
+struct acrn_vcpu_regs {
+       __u16                   vcpu_id;
+       __u16                   reserved[3];
+       struct acrn_regs        vcpu_regs;
+};
+
+#define        ACRN_MEM_ACCESS_RIGHT_MASK      0x00000007U
+#define        ACRN_MEM_ACCESS_READ            0x00000001U
+#define        ACRN_MEM_ACCESS_WRITE           0x00000002U
+#define        ACRN_MEM_ACCESS_EXEC            0x00000004U
+#define        ACRN_MEM_ACCESS_RWX             (ACRN_MEM_ACCESS_READ  | \
+                                        ACRN_MEM_ACCESS_WRITE | \
+                                        ACRN_MEM_ACCESS_EXEC)
+
+#define        ACRN_MEM_TYPE_MASK              0x000007C0U
+#define        ACRN_MEM_TYPE_WB                0x00000040U
+#define        ACRN_MEM_TYPE_WT                0x00000080U
+#define        ACRN_MEM_TYPE_UC                0x00000100U
+#define        ACRN_MEM_TYPE_WC                0x00000200U
+#define        ACRN_MEM_TYPE_WP                0x00000400U
+
+/* Memory mapping types */
+#define        ACRN_MEMMAP_RAM                 0
+#define        ACRN_MEMMAP_MMIO                1
+
+/**
+ * struct acrn_vm_memmap - A EPT memory mapping info for a User VM.
+ * @type:              Type of the memory mapping (ACRM_MEMMAP_*).
+ *                     Pass to hypervisor directly.
+ * @attr:              Attribute of the memory mapping.
+ *                     Pass to hypervisor directly.
+ * @user_vm_pa:                Physical address of User VM.
+ *                     Pass to hypervisor directly.
+ * @service_vm_pa:     Physical address of Service VM.
+ *                     Pass to hypervisor directly.
+ * @vma_base:          VMA address of Service VM. Pass to hypervisor directly.
+ * @len:               Length of the memory mapping.
+ *                     Pass to hypervisor directly.
+ */
+struct acrn_vm_memmap {
+       __u32   type;
+       __u32   attr;
+       __u64   user_vm_pa;
+       union {
+               __u64   service_vm_pa;
+               __u64   vma_base;
+       };
+       __u64   len;
+};
+
+/* Type of interrupt of a passthrough device */
+#define ACRN_PTDEV_IRQ_INTX    0
+#define ACRN_PTDEV_IRQ_MSI     1
+#define ACRN_PTDEV_IRQ_MSIX    2
+/**
+ * struct acrn_ptdev_irq - Interrupt data of a passthrough device.
+ * @type:              Type (ACRN_PTDEV_IRQ_*)
+ * @virt_bdf:          Virtual Bus/Device/Function
+ * @phys_bdf:          Physical Bus/Device/Function
+ * @intx:              Info of interrupt
+ * @intx.virt_pin:     Virtual IOAPIC pin
+ * @intx.phys_pin:     Physical IOAPIC pin
+ * @intx.is_pic_pin:   Is PIC pin or not
+ *
+ * This structure will be passed to hypervisor directly.
+ */
+struct acrn_ptdev_irq {
+       __u32   type;
+       __u16   virt_bdf;
+       __u16   phys_bdf;
+
+       struct {
+               __u32   virt_pin;
+               __u32   phys_pin;
+               __u32   is_pic_pin;
+       } intx;
+};
+
+/* Type of PCI device assignment */
+#define ACRN_PTDEV_QUIRK_ASSIGN        (1U << 0)
+
+#define ACRN_PCI_NUM_BARS      6
+/**
+ * struct acrn_pcidev - Info for assigning or de-assigning a PCI device
+ * @type:      Type of the assignment
+ * @virt_bdf:  Virtual Bus/Device/Function
+ * @phys_bdf:  Physical Bus/Device/Function
+ * @intr_line: PCI interrupt line
+ * @intr_pin:  PCI interrupt pin
+ * @bar:       PCI BARs.
+ *
+ * This structure will be passed to hypervisor directly.
+ */
+struct acrn_pcidev {
+       __u32   type;
+       __u16   virt_bdf;
+       __u16   phys_bdf;
+       __u8    intr_line;
+       __u8    intr_pin;
+       __u32   bar[ACRN_PCI_NUM_BARS];
+};
+
+/**
+ * struct acrn_msi_entry - Info for injecting a MSI interrupt to a VM
+ * @msi_addr:  MSI addr[19:12] with dest vCPU ID
+ * @msi_data:  MSI data[7:0] with vector
+ */
+struct acrn_msi_entry {
+       __u64   msi_addr;
+       __u64   msi_data;
+};
+
+struct acrn_acpi_generic_address {
+       __u8    space_id;
+       __u8    bit_width;
+       __u8    bit_offset;
+       __u8    access_size;
+       __u64   address;
+} __attribute__ ((__packed__));
+
+/**
+ * struct acrn_cstate_data - A C state package defined in ACPI
+ * @cx_reg:    Register of the C state object
+ * @type:      Type of the C state object
+ * @latency:   The worst-case latency to enter and exit this C state
+ * @power:     The average power consumption when in this C state
+ */
+struct acrn_cstate_data {
+       struct acrn_acpi_generic_address        cx_reg;
+       __u8                                    type;
+       __u32                                   latency;
+       __u64                                   power;
+};
+
+/**
+ * struct acrn_pstate_data - A P state package defined in ACPI
+ * @core_frequency:    CPU frequency (in MHz).
+ * @power:             Power dissipation (in milliwatts).
+ * @transition_latency:        The worst-case latency in microseconds that CPU is
+ *                     unavailable during a transition from any P state to
+ *                     this P state.
+ * @bus_master_latency:        The worst-case latency in microseconds that Bus Masters
+ *                     are prevented from accessing memory during a transition
+ *                     from any P state to this P state.
+ * @control:           The value to be written to Performance Control Register
+ * @status:            Transition status.
+ */
+struct acrn_pstate_data {
+       __u64   core_frequency;
+       __u64   power;
+       __u64   transition_latency;
+       __u64   bus_master_latency;
+       __u64   control;
+       __u64   status;
+};
+
+#define PMCMD_TYPE_MASK                0x000000ff
+enum acrn_pm_cmd_type {
+       ACRN_PMCMD_GET_PX_CNT,
+       ACRN_PMCMD_GET_PX_DATA,
+       ACRN_PMCMD_GET_CX_CNT,
+       ACRN_PMCMD_GET_CX_DATA,
+};
+
+#define ACRN_IOEVENTFD_FLAG_PIO                0x01
+#define ACRN_IOEVENTFD_FLAG_DATAMATCH  0x02
+#define ACRN_IOEVENTFD_FLAG_DEASSIGN   0x04
+/**
+ * struct acrn_ioeventfd - Data to operate a &struct hsm_ioeventfd
+ * @fd:                The fd of eventfd associated with a hsm_ioeventfd
+ * @flags:     Logical-OR of ACRN_IOEVENTFD_FLAG_*
+ * @addr:      The start address of IO range of ioeventfd
+ * @len:       The length of IO range of ioeventfd
+ * @reserved:  Reserved and should be 0
+ * @data:      Data for data matching
+ *
+ * Without flag ACRN_IOEVENTFD_FLAG_DEASSIGN, ioctl ACRN_IOCTL_IOEVENTFD
+ * creates a &struct hsm_ioeventfd with properties originated from &struct
+ * acrn_ioeventfd. With flag ACRN_IOEVENTFD_FLAG_DEASSIGN, ioctl
+ * ACRN_IOCTL_IOEVENTFD destroys the &struct hsm_ioeventfd matching the fd.
+ */
+struct acrn_ioeventfd {
+       __u32   fd;
+       __u32   flags;
+       __u64   addr;
+       __u32   len;
+       __u32   reserved;
+       __u64   data;
+};
+
+#define ACRN_IRQFD_FLAG_DEASSIGN       0x01
+/**
+ * struct acrn_irqfd - Data to operate a &struct hsm_irqfd
+ * @fd:                The fd of eventfd associated with a hsm_irqfd
+ * @flags:     Logical-OR of ACRN_IRQFD_FLAG_*
+ * @msi:       Info of MSI associated with the irqfd
+ */
+struct acrn_irqfd {
+       __s32                   fd;
+       __u32                   flags;
+       struct acrn_msi_entry   msi;
+};
+
+/* The ioctl type, documented in ioctl-number.rst */
+#define ACRN_IOCTL_TYPE                        0xA2
+
+/*
+ * Common IOCTL IDs definition for ACRN userspace
+ */
+#define ACRN_IOCTL_CREATE_VM           \
+       _IOWR(ACRN_IOCTL_TYPE, 0x10, struct acrn_vm_creation)
+#define ACRN_IOCTL_DESTROY_VM          \
+       _IO(ACRN_IOCTL_TYPE, 0x11)
+#define ACRN_IOCTL_START_VM            \
+       _IO(ACRN_IOCTL_TYPE, 0x12)
+#define ACRN_IOCTL_PAUSE_VM            \
+       _IO(ACRN_IOCTL_TYPE, 0x13)
+#define ACRN_IOCTL_RESET_VM            \
+       _IO(ACRN_IOCTL_TYPE, 0x15)
+#define ACRN_IOCTL_SET_VCPU_REGS       \
+       _IOW(ACRN_IOCTL_TYPE, 0x16, struct acrn_vcpu_regs)
+
+#define ACRN_IOCTL_INJECT_MSI          \
+       _IOW(ACRN_IOCTL_TYPE, 0x23, struct acrn_msi_entry)
+#define ACRN_IOCTL_VM_INTR_MONITOR     \
+       _IOW(ACRN_IOCTL_TYPE, 0x24, unsigned long)
+#define ACRN_IOCTL_SET_IRQLINE         \
+       _IOW(ACRN_IOCTL_TYPE, 0x25, __u64)
+
+#define ACRN_IOCTL_NOTIFY_REQUEST_FINISH \
+       _IOW(ACRN_IOCTL_TYPE, 0x31, struct acrn_ioreq_notify)
+#define ACRN_IOCTL_CREATE_IOREQ_CLIENT \
+       _IO(ACRN_IOCTL_TYPE, 0x32)
+#define ACRN_IOCTL_ATTACH_IOREQ_CLIENT \
+       _IO(ACRN_IOCTL_TYPE, 0x33)
+#define ACRN_IOCTL_DESTROY_IOREQ_CLIENT        \
+       _IO(ACRN_IOCTL_TYPE, 0x34)
+#define ACRN_IOCTL_CLEAR_VM_IOREQ      \
+       _IO(ACRN_IOCTL_TYPE, 0x35)
+
+#define ACRN_IOCTL_SET_MEMSEG          \
+       _IOW(ACRN_IOCTL_TYPE, 0x41, struct acrn_vm_memmap)
+#define ACRN_IOCTL_UNSET_MEMSEG                \
+       _IOW(ACRN_IOCTL_TYPE, 0x42, struct acrn_vm_memmap)
+
+#define ACRN_IOCTL_SET_PTDEV_INTR      \
+       _IOW(ACRN_IOCTL_TYPE, 0x53, struct acrn_ptdev_irq)
+#define ACRN_IOCTL_RESET_PTDEV_INTR    \
+       _IOW(ACRN_IOCTL_TYPE, 0x54, struct acrn_ptdev_irq)
+#define ACRN_IOCTL_ASSIGN_PCIDEV       \
+       _IOW(ACRN_IOCTL_TYPE, 0x55, struct acrn_pcidev)
+#define ACRN_IOCTL_DEASSIGN_PCIDEV     \
+       _IOW(ACRN_IOCTL_TYPE, 0x56, struct acrn_pcidev)
+
+#define ACRN_IOCTL_PM_GET_CPU_STATE    \
+       _IOWR(ACRN_IOCTL_TYPE, 0x60, __u64)
+
+#define ACRN_IOCTL_IOEVENTFD           \
+       _IOW(ACRN_IOCTL_TYPE, 0x70, struct acrn_ioeventfd)
+#define ACRN_IOCTL_IRQFD               \
+       _IOW(ACRN_IOCTL_TYPE, 0x71, struct acrn_irqfd)
+
+#endif /* _UAPI_ACRN_H */
index ac6474e..d0a64ee 100644 (file)
@@ -2,29 +2,6 @@
 #ifndef _UAPI__LINUX_BLKPG_H
 #define _UAPI__LINUX_BLKPG_H
 
-/*
- * Partition table and disk geometry handling
- *
- * A single ioctl with lots of subfunctions:
- *
- * Device number stuff:
- *    get_whole_disk()         (given the device number of a partition,
- *                               find the device number of the encompassing disk)
- *    get_all_partitions()     (given the device number of a disk, return the
- *                              device numbers of all its known partitions)
- *
- * Partition stuff:
- *    add_partition()
- *    delete_partition()
- *    test_partition_in_use()  (also for test_disk_in_use)
- *
- * Geometry stuff:
- *    get_geometry()
- *    set_geometry()
- *    get_bios_drivedata()
- *
- * For today, only the partition stuff - aeb, 990515
- */
 #include <linux/compiler.h>
 #include <linux/ioctl.h>
 
@@ -52,9 +29,8 @@ struct blkpg_partition {
        long long start;                /* starting offset in bytes */
        long long length;               /* length in bytes */
        int pno;                        /* partition number */
-       char devname[BLKPG_DEVNAMELTH]; /* partition name, like sda5 or c0d1p2,
-                                          to be used in kernel messages */
-       char volname[BLKPG_VOLNAMELTH]; /* volume label */
+       char devname[BLKPG_DEVNAMELTH]; /* unused / ignored */
+       char volname[BLKPG_VOLNAMELTH]; /* unused / ignore */
 };
 
 #endif /* _UAPI__LINUX_BLKPG_H */
index 4c24daa..4ba4ef0 100644 (file)
@@ -3850,8 +3850,7 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *     Description
-
- *             Check ctx packet size against exceeding MTU of net device (based
+ *             Check packet size against exceeding MTU of net device (based
  *             on *ifindex*).  This helper will likely be used in combination
  *             with helpers that adjust/change the packet size.
  *
@@ -3868,6 +3867,14 @@ union bpf_attr {
  *             against the current net device.  This is practical if this isn't
  *             used prior to redirect.
  *
+ *             On input *mtu_len* must be a valid pointer, else verifier will
+ *             reject BPF program.  If the value *mtu_len* is initialized to
+ *             zero then the ctx packet size is use.  When value *mtu_len* is
+ *             provided as input this specify the L3 length that the MTU check
+ *             is done against. Remember XDP and TC length operate at L2, but
+ *             this value is L3 as this correlate to MTU and IP-header tot_len
+ *             values which are L3 (similar behavior as bpf_fib_lookup).
+ *
  *             The Linux kernel route table can configure MTUs on a more
  *             specific per route level, which is not provided by this helper.
  *             For route level MTU checks use the **bpf_fib_lookup**\ ()
@@ -3892,11 +3899,9 @@ union bpf_attr {
  *
  *             On return *mtu_len* pointer contains the MTU value of the net
  *             device.  Remember the net device configured MTU is the L3 size,
- *             which is returned here and XDP and TX length operate at L2.
+ *             which is returned here and XDP and TC length operate at L2.
  *             Helper take this into account for you, but remember when using
- *             MTU value in your BPF-code.  On input *mtu_len* must be a valid
- *             pointer and be initialized (to zero), else verifier will reject
- *             BPF program.
+ *             MTU value in your BPF-code.
  *
  *     Return
  *             * 0 on success, and populate MTU value in *mtu_len* pointer.
diff --git a/include/uapi/linux/cxl_mem.h b/include/uapi/linux/cxl_mem.h
new file mode 100644 (file)
index 0000000..3155382
--- /dev/null
@@ -0,0 +1,172 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * CXL IOCTLs for Memory Devices
+ */
+
+#ifndef _UAPI_CXL_MEM_H_
+#define _UAPI_CXL_MEM_H_
+
+#include <linux/types.h>
+
+/**
+ * DOC: UAPI
+ *
+ * Not all of all commands that the driver supports are always available for use
+ * by userspace. Userspace must check the results from the QUERY command in
+ * order to determine the live set of commands.
+ */
+
+#define CXL_MEM_QUERY_COMMANDS _IOR(0xCE, 1, struct cxl_mem_query_commands)
+#define CXL_MEM_SEND_COMMAND _IOWR(0xCE, 2, struct cxl_send_command)
+
+#define CXL_CMDS                                                          \
+       ___C(INVALID, "Invalid Command"),                                 \
+       ___C(IDENTIFY, "Identify Command"),                               \
+       ___C(RAW, "Raw device command"),                                  \
+       ___C(GET_SUPPORTED_LOGS, "Get Supported Logs"),                   \
+       ___C(GET_FW_INFO, "Get FW Info"),                                 \
+       ___C(GET_PARTITION_INFO, "Get Partition Information"),            \
+       ___C(GET_LSA, "Get Label Storage Area"),                          \
+       ___C(GET_HEALTH_INFO, "Get Health Info"),                         \
+       ___C(GET_LOG, "Get Log"),                                         \
+       ___C(MAX, "invalid / last command")
+
+#define ___C(a, b) CXL_MEM_COMMAND_ID_##a
+enum { CXL_CMDS };
+
+#undef ___C
+#define ___C(a, b) { b }
+static const struct {
+       const char *name;
+} cxl_command_names[] = { CXL_CMDS };
+
+/*
+ * Here's how this actually breaks out:
+ * cxl_command_names[] = {
+ *     [CXL_MEM_COMMAND_ID_INVALID] = { "Invalid Command" },
+ *     [CXL_MEM_COMMAND_ID_IDENTIFY] = { "Identify Command" },
+ *     ...
+ *     [CXL_MEM_COMMAND_ID_MAX] = { "invalid / last command" },
+ * };
+ */
+
+#undef ___C
+
+/**
+ * struct cxl_command_info - Command information returned from a query.
+ * @id: ID number for the command.
+ * @flags: Flags that specify command behavior.
+ * @size_in: Expected input size, or -1 if variable length.
+ * @size_out: Expected output size, or -1 if variable length.
+ *
+ * Represents a single command that is supported by both the driver and the
+ * hardware. This is returned as part of an array from the query ioctl. The
+ * following would be a command that takes a variable length input and returns 0
+ * bytes of output.
+ *
+ *  - @id = 10
+ *  - @flags = 0
+ *  - @size_in = -1
+ *  - @size_out = 0
+ *
+ * See struct cxl_mem_query_commands.
+ */
+struct cxl_command_info {
+       __u32 id;
+
+       __u32 flags;
+#define CXL_MEM_COMMAND_FLAG_MASK GENMASK(0, 0)
+
+       __s32 size_in;
+       __s32 size_out;
+};
+
+/**
+ * struct cxl_mem_query_commands - Query supported commands.
+ * @n_commands: In/out parameter. When @n_commands is > 0, the driver will
+ *             return min(num_support_commands, n_commands). When @n_commands
+ *             is 0, driver will return the number of total supported commands.
+ * @rsvd: Reserved for future use.
+ * @commands: Output array of supported commands. This array must be allocated
+ *            by userspace to be at least min(num_support_commands, @n_commands)
+ *
+ * Allow userspace to query the available commands supported by both the driver,
+ * and the hardware. Commands that aren't supported by either the driver, or the
+ * hardware are not returned in the query.
+ *
+ * Examples:
+ *
+ *  - { .n_commands = 0 } // Get number of supported commands
+ *  - { .n_commands = 15, .commands = buf } // Return first 15 (or less)
+ *    supported commands
+ *
+ *  See struct cxl_command_info.
+ */
+struct cxl_mem_query_commands {
+       /*
+        * Input: Number of commands to return (space allocated by user)
+        * Output: Number of commands supported by the driver/hardware
+        *
+        * If n_commands is 0, kernel will only return number of commands and
+        * not try to populate commands[], thus allowing userspace to know how
+        * much space to allocate
+        */
+       __u32 n_commands;
+       __u32 rsvd;
+
+       struct cxl_command_info __user commands[]; /* out: supported commands */
+};
+
+/**
+ * struct cxl_send_command - Send a command to a memory device.
+ * @id: The command to send to the memory device. This must be one of the
+ *     commands returned by the query command.
+ * @flags: Flags for the command (input).
+ * @raw: Special fields for raw commands
+ * @raw.opcode: Opcode passed to hardware when using the RAW command.
+ * @raw.rsvd: Must be zero.
+ * @rsvd: Must be zero.
+ * @retval: Return value from the memory device (output).
+ * @in: Parameters associated with input payload.
+ * @in.size: Size of the payload to provide to the device (input).
+ * @in.rsvd: Must be zero.
+ * @in.payload: Pointer to memory for payload input, payload is little endian.
+ * @out: Parameters associated with output payload.
+ * @out.size: Size of the payload received from the device (input/output). This
+ *           field is filled in by userspace to let the driver know how much
+ *           space was allocated for output. It is populated by the driver to
+ *           let userspace know how large the output payload actually was.
+ * @out.rsvd: Must be zero.
+ * @out.payload: Pointer to memory for payload output, payload is little endian.
+ *
+ * Mechanism for userspace to send a command to the hardware for processing. The
+ * driver will do basic validation on the command sizes. In some cases even the
+ * payload may be introspected. Userspace is required to allocate large enough
+ * buffers for size_out which can be variable length in certain situations.
+ */
+struct cxl_send_command {
+       __u32 id;
+       __u32 flags;
+       union {
+               struct {
+                       __u16 opcode;
+                       __u16 rsvd;
+               } raw;
+               __u32 rsvd;
+       };
+       __u32 retval;
+
+       struct {
+               __s32 size;
+               __u32 rsvd;
+               __u64 payload;
+       } in;
+
+       struct {
+               __s32 size;
+               __u32 rsvd;
+               __u64 payload;
+       } out;
+};
+
+#endif
index 7e5b5c1..5effa98 100644 (file)
@@ -844,7 +844,7 @@ struct fw_cdev_queue_iso {
  * struct fw_cdev_start_iso - Start an isochronous transmission or reception
  * @cycle:     Cycle in which to start I/O.  If @cycle is greater than or
  *             equal to 0, the I/O will start on that cycle.
- * @sync:      Determines the value to wait for for receive packets that have
+ * @sync:      Determines the value to wait for receive packets that have
  *             the %FW_CDEV_ISO_SYNC bit set
  * @tags:      Tag filter bit mask.  Only valid for isochronous reception.
  *             Determines the tag values for which packets will be accepted.
diff --git a/include/uapi/linux/fsl_mc.h b/include/uapi/linux/fsl_mc.h
new file mode 100644 (file)
index 0000000..e574515
--- /dev/null
@@ -0,0 +1,34 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Management Complex (MC) userspace public interface
+ *
+ * Copyright 2021 NXP
+ *
+ */
+#ifndef _UAPI_FSL_MC_H_
+#define _UAPI_FSL_MC_H_
+
+#include <linux/types.h>
+
+#define MC_CMD_NUM_OF_PARAMS   7
+
+/**
+ * struct fsl_mc_command - Management Complex (MC) command structure
+ * @header: MC command header
+ * @params: MC command parameters
+ *
+ * Used by FSL_MC_SEND_MC_COMMAND
+ */
+struct fsl_mc_command {
+       __le64 header;
+       __le64 params[MC_CMD_NUM_OF_PARAMS];
+};
+
+#define FSL_MC_SEND_CMD_IOCTL_TYPE     'R'
+#define FSL_MC_SEND_CMD_IOCTL_SEQ      0xE0
+
+#define FSL_MC_SEND_MC_COMMAND \
+       _IOWR(FSL_MC_SEND_CMD_IOCTL_TYPE, FSL_MC_SEND_CMD_IOCTL_SEQ, \
+       struct fsl_mc_command)
+
+#endif /* _UAPI_FSL_MC_H_ */
index 98ca64d..5444261 100644 (file)
@@ -903,7 +903,8 @@ struct fuse_notify_retrieve_in {
 };
 
 /* Device ioctls: */
-#define FUSE_DEV_IOC_CLONE     _IOR(229, 0, uint32_t)
+#define FUSE_DEV_IOC_MAGIC             229
+#define FUSE_DEV_IOC_CLONE             _IOR(FUSE_DEV_IOC_MAGIC, 0, uint32_t)
 
 struct fuse_lseek_in {
        uint64_t        fh;
index 07e508e..6ec4291 100644 (file)
@@ -47,7 +47,7 @@
 #define GFS2_FORMAT_DE         1200
 #define GFS2_FORMAT_QU         1500
 /* These are part of the superblock */
-#define GFS2_FORMAT_FS         1801
+#define GFS2_FORMAT_FS         1802
 #define GFS2_FORMAT_MULTI      1900
 
 /*
@@ -389,8 +389,9 @@ struct gfs2_leaf {
 #define GFS2_EATYPE_USR                1
 #define GFS2_EATYPE_SYS                2
 #define GFS2_EATYPE_SECURITY   3
+#define GFS2_EATYPE_TRUSTED    4
 
-#define GFS2_EATYPE_LAST       3
+#define GFS2_EATYPE_LAST       4
 #define GFS2_EATYPE_VALID(x)   ((x) <= GFS2_EATYPE_LAST)
 
 #define GFS2_EAFLAG_LAST       0x01    /* last ea in block */
index 9a61c28..ee31274 100644 (file)
@@ -84,7 +84,7 @@ struct input_id {
  * in units per radian.
  * When INPUT_PROP_ACCELEROMETER is set the resolution changes.
  * The main axes (ABS_X, ABS_Y, ABS_Z) are then reported in
- * in units per g (units/g) and in units per degree per second
+ * units per g (units/g) and in units per degree per second
  * (units/deg/s) for rotational axes (ABS_RX, ABS_RY, ABS_RZ).
  */
 struct input_absinfo {
index ac4e173..2514eb6 100644 (file)
@@ -262,6 +262,7 @@ struct io_uring_params {
 #define IORING_FEAT_POLL_32BITS        (1U << 6)
 #define IORING_FEAT_SQPOLL_NONFIXED    (1U << 7)
 #define IORING_FEAT_EXT_ARG            (1U << 8)
+#define IORING_FEAT_NATIVE_WORKERS     (1U << 9)
 
 /*
  * io_uring_register(2) opcodes and arguments
index 8b281f7..f6afee2 100644 (file)
@@ -1154,6 +1154,7 @@ struct kvm_x86_mce {
 #define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
 #define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
 #define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
 
 struct kvm_xen_hvm_config {
        __u32 flags;
@@ -1621,12 +1622,24 @@ struct kvm_xen_vcpu_attr {
        union {
                __u64 gpa;
                __u64 pad[8];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
        } u;
 };
 
 /* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
 #define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
 
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
index 30c80d5..bab8c97 100644 (file)
@@ -145,6 +145,7 @@ enum {
        L2TP_ATTR_RX_ERRORS,            /* u64 */
        L2TP_ATTR_STATS_PAD,
        L2TP_ATTR_RX_COOKIE_DISCARDS,   /* u64 */
+       L2TP_ATTR_RX_INVALID,           /* u64 */
        __L2TP_ATTR_STATS_MAX,
 };
 
index 13a06e5..8b02088 100644 (file)
  * In device drivers it is recommended, if required, to make the char map
  * accessible via the sysfs interface using the following scheme:
  *
- * static ssize_t show_map(struct device *dev, char *buf) {
+ * static ssize_t map_seg7_show(struct device *dev,
+ *                             struct device_attribute *attr, char *buf)
+ * {
  *     memcpy(buf, &map_seg7, sizeof(map_seg7));
  *     return sizeof(map_seg7);
  * }
- * static ssize_t store_map(struct device *dev, const char *buf, size_t cnt) {
+ * static ssize_t map_seg7_store(struct device *dev,
+ *                              struct device_attribute *attr, const char *buf,
+ *                              size_t cnt)
+ * {
  *     if(cnt != sizeof(map_seg7))
  *             return -EINVAL;
  *     memcpy(&map_seg7, buf, cnt);
  *     return cnt;
  * }
- * static DEVICE_ATTR(map_seg7, PERMS_RW, show_map, store_map);
+ * static DEVICE_ATTR_RW(map_seg7);
  *
  * History:
  * 2005-05-31  RFC linux-kernel@vger.kernel.org
index 3354774..8948467 100644 (file)
@@ -28,12 +28,14 @@ enum {
 /* Flags for set_mempolicy */
 #define MPOL_F_STATIC_NODES    (1 << 15)
 #define MPOL_F_RELATIVE_NODES  (1 << 14)
+#define MPOL_F_NUMA_BALANCING  (1 << 13) /* Optimize with NUMA balancing if possible */
 
 /*
  * MPOL_MODE_FLAGS is the union of all possible optional mode flags passed to
  * either set_mempolicy() or mbind().
  */
-#define MPOL_MODE_FLAGS        (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES)
+#define MPOL_MODE_FLAGS                                                        \
+       (MPOL_F_STATIC_NODES | MPOL_F_RELATIVE_NODES | MPOL_F_NUMA_BALANCING)
 
 /* Flags for get_mempolicy */
 #define MPOL_F_NODE    (1<<0)  /* return next IL mode instead of node mask */
diff --git a/include/uapi/linux/misc/bcm_vk.h b/include/uapi/linux/misc/bcm_vk.h
new file mode 100644 (file)
index 0000000..ec28e0b
--- /dev/null
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
+/*
+ * Copyright 2018-2020 Broadcom.
+ */
+
+#ifndef __UAPI_LINUX_MISC_BCM_VK_H
+#define __UAPI_LINUX_MISC_BCM_VK_H
+
+#include <linux/ioctl.h>
+#include <linux/types.h>
+
+#define BCM_VK_MAX_FILENAME 64
+
+struct vk_image {
+       __u32 type; /* Type of image */
+#define VK_IMAGE_TYPE_BOOT1 1 /* 1st stage (load to SRAM) */
+#define VK_IMAGE_TYPE_BOOT2 2 /* 2nd stage (load to DDR) */
+       __u8 filename[BCM_VK_MAX_FILENAME]; /* Filename of image */
+};
+
+struct vk_reset {
+       __u32 arg1;
+       __u32 arg2;
+};
+
+#define VK_MAGIC               0x5e
+
+/* Load image to Valkyrie */
+#define VK_IOCTL_LOAD_IMAGE    _IOW(VK_MAGIC, 0x2, struct vk_image)
+
+/* Send Reset to Valkyrie */
+#define VK_IOCTL_RESET         _IOW(VK_MAGIC, 0x4, struct vk_reset)
+
+/*
+ * Firmware Status accessed directly via BAR space
+ */
+#define VK_BAR_FWSTS                   0x41c
+#define VK_BAR_COP_FWSTS               0x428
+/* VK_FWSTS definitions */
+#define VK_FWSTS_RELOCATION_ENTRY      (1UL << 0)
+#define VK_FWSTS_RELOCATION_EXIT       (1UL << 1)
+#define VK_FWSTS_INIT_START            (1UL << 2)
+#define VK_FWSTS_ARCH_INIT_DONE                (1UL << 3)
+#define VK_FWSTS_PRE_KNL1_INIT_DONE    (1UL << 4)
+#define VK_FWSTS_PRE_KNL2_INIT_DONE    (1UL << 5)
+#define VK_FWSTS_POST_KNL_INIT_DONE    (1UL << 6)
+#define VK_FWSTS_INIT_DONE             (1UL << 7)
+#define VK_FWSTS_APP_INIT_START                (1UL << 8)
+#define VK_FWSTS_APP_INIT_DONE         (1UL << 9)
+#define VK_FWSTS_MASK                  0xffffffff
+#define VK_FWSTS_READY                 (VK_FWSTS_INIT_START | \
+                                        VK_FWSTS_ARCH_INIT_DONE | \
+                                        VK_FWSTS_PRE_KNL1_INIT_DONE | \
+                                        VK_FWSTS_PRE_KNL2_INIT_DONE | \
+                                        VK_FWSTS_POST_KNL_INIT_DONE | \
+                                        VK_FWSTS_INIT_DONE | \
+                                        VK_FWSTS_APP_INIT_START | \
+                                        VK_FWSTS_APP_INIT_DONE)
+/* Deinit */
+#define VK_FWSTS_APP_DEINIT_START      (1UL << 23)
+#define VK_FWSTS_APP_DEINIT_DONE       (1UL << 24)
+#define VK_FWSTS_DRV_DEINIT_START      (1UL << 25)
+#define VK_FWSTS_DRV_DEINIT_DONE       (1UL << 26)
+#define VK_FWSTS_RESET_DONE            (1UL << 27)
+#define VK_FWSTS_DEINIT_TRIGGERED      (VK_FWSTS_APP_DEINIT_START | \
+                                        VK_FWSTS_APP_DEINIT_DONE  | \
+                                        VK_FWSTS_DRV_DEINIT_START | \
+                                        VK_FWSTS_DRV_DEINIT_DONE)
+/* Last nibble for reboot reason */
+#define VK_FWSTS_RESET_REASON_SHIFT    28
+#define VK_FWSTS_RESET_REASON_MASK     (0xf << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_SYS_PWRUP       (0x0 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_MBOX_DB         (0x1 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_M7_WDOG         (0x2 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_TEMP            (0x3 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_PCI_FLR         (0x4 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_PCI_HOT         (0x5 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_PCI_WARM                (0x6 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_PCI_COLD                (0x7 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_L1              (0x8 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_L0              (0x9 << VK_FWSTS_RESET_REASON_SHIFT)
+#define VK_FWSTS_RESET_UNKNOWN         (0xf << VK_FWSTS_RESET_REASON_SHIFT)
+
+#endif /* __UAPI_LINUX_MISC_BCM_VK_H */
index dd8306e..e6524ea 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_MOUNT_H
 #define _UAPI_LINUX_MOUNT_H
 
+#include <linux/types.h>
+
 /*
  * These are the fs-independent mount-flags: up to 32 flags are supported
  *
@@ -117,5 +119,19 @@ enum fsconfig_command {
 #define MOUNT_ATTR_NOATIME     0x00000010 /* - Do not update access times. */
 #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
 #define MOUNT_ATTR_NODIRATIME  0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP       0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+       __u64 attr_set;
+       __u64 attr_clr;
+       __u64 propagation;
+       __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0   32 /* sizeof first published struct */
 
 #endif /* _UAPI_LINUX_MOUNT_H */
index a13137a..70af020 100644 (file)
@@ -5,7 +5,7 @@
 #define NFCT_HELPER_STATUS_DISABLED    0
 #define NFCT_HELPER_STATUS_ENABLED     1
 
-enum nfnl_acct_msg_types {
+enum nfnl_cthelper_msg_types {
        NFNL_MSG_CTHELPER_NEW,
        NFNL_MSG_CTHELPER_GET,
        NFNL_MSG_CTHELPER_DEL,
index aea26ab..bff5032 100644 (file)
@@ -3,7 +3,6 @@
 #define __UAPI_PSAMPLE_H
 
 enum {
-       /* sampled packet metadata */
        PSAMPLE_ATTR_IIFINDEX,
        PSAMPLE_ATTR_OIFINDEX,
        PSAMPLE_ATTR_ORIGSIZE,
@@ -11,10 +10,8 @@ enum {
        PSAMPLE_ATTR_GROUP_SEQ,
        PSAMPLE_ATTR_SAMPLE_RATE,
        PSAMPLE_ATTR_DATA,
-       PSAMPLE_ATTR_TUNNEL,
-
-       /* commands attributes */
        PSAMPLE_ATTR_GROUP_REFCOUNT,
+       PSAMPLE_ATTR_TUNNEL,
 
        __PSAMPLE_ATTR_MAX
 };
diff --git a/include/uapi/linux/vdpa.h b/include/uapi/linux/vdpa.h
new file mode 100644 (file)
index 0000000..66a41e4
--- /dev/null
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/*
+ * vdpa device management interface
+ * Copyright (c) 2020 Mellanox Technologies Ltd. All rights reserved.
+ */
+
+#ifndef _UAPI_LINUX_VDPA_H_
+#define _UAPI_LINUX_VDPA_H_
+
+#define VDPA_GENL_NAME "vdpa"
+#define VDPA_GENL_VERSION 0x1
+
+enum vdpa_command {
+       VDPA_CMD_UNSPEC,
+       VDPA_CMD_MGMTDEV_NEW,
+       VDPA_CMD_MGMTDEV_GET,           /* can dump */
+       VDPA_CMD_DEV_NEW,
+       VDPA_CMD_DEV_DEL,
+       VDPA_CMD_DEV_GET,               /* can dump */
+};
+
+enum vdpa_attr {
+       VDPA_ATTR_UNSPEC,
+
+       /* bus name (optional) + dev name together make the parent device handle */
+       VDPA_ATTR_MGMTDEV_BUS_NAME,             /* string */
+       VDPA_ATTR_MGMTDEV_DEV_NAME,             /* string */
+       VDPA_ATTR_MGMTDEV_SUPPORTED_CLASSES,    /* u64 */
+
+       VDPA_ATTR_DEV_NAME,                     /* string */
+       VDPA_ATTR_DEV_ID,                       /* u32 */
+       VDPA_ATTR_DEV_VENDOR_ID,                /* u32 */
+       VDPA_ATTR_DEV_MAX_VQS,                  /* u32 */
+       VDPA_ATTR_DEV_MAX_VQ_SIZE,              /* u16 */
+
+       /* new attributes must be added above here */
+       VDPA_ATTR_MAX,
+};
+
+#endif
index d181277..8ce36c1 100644 (file)
  */
 #define VFIO_NOIOMMU_IOMMU             8
 
+/* Supports VFIO_DMA_UNMAP_FLAG_ALL */
+#define VFIO_UNMAP_ALL                 9
+
+/* Supports the vaddr flag for DMA map and unmap */
+#define VFIO_UPDATE_VADDR              10
+
 /*
  * The IOCTL interface is designed for extensibility by embedding the
  * structure length (argsz) and flags into structures passed between
@@ -1074,12 +1080,22 @@ struct vfio_iommu_type1_info_dma_avail {
  *
  * Map process virtual addresses to IO virtual addresses using the
  * provided struct vfio_dma_map. Caller sets argsz. READ &/ WRITE required.
+ *
+ * If flags & VFIO_DMA_MAP_FLAG_VADDR, update the base vaddr for iova, and
+ * unblock translation of host virtual addresses in the iova range.  The vaddr
+ * must have previously been invalidated with VFIO_DMA_UNMAP_FLAG_VADDR.  To
+ * maintain memory consistency within the user application, the updated vaddr
+ * must address the same memory object as originally mapped.  Failure to do so
+ * will result in user memory corruption and/or device misbehavior.  iova and
+ * size must match those in the original MAP_DMA call.  Protection is not
+ * changed, and the READ & WRITE flags must be 0.
  */
 struct vfio_iommu_type1_dma_map {
        __u32   argsz;
        __u32   flags;
 #define VFIO_DMA_MAP_FLAG_READ (1 << 0)                /* readable from device */
 #define VFIO_DMA_MAP_FLAG_WRITE (1 << 1)       /* writable from device */
+#define VFIO_DMA_MAP_FLAG_VADDR (1 << 2)
        __u64   vaddr;                          /* Process virtual address */
        __u64   iova;                           /* IO virtual address */
        __u64   size;                           /* Size of mapping (bytes) */
@@ -1102,6 +1118,7 @@ struct vfio_bitmap {
  * field.  No guarantee is made to the user that arbitrary unmaps of iova
  * or size different from those used in the original mapping call will
  * succeed.
+ *
  * VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP should be set to get the dirty bitmap
  * before unmapping IO virtual addresses. When this flag is set, the user must
  * provide a struct vfio_bitmap in data[]. User must provide zero-allocated
@@ -1111,11 +1128,21 @@ struct vfio_bitmap {
  * indicates that the page at that offset from iova is dirty. A Bitmap of the
  * pages in the range of unmapped size is returned in the user-provided
  * vfio_bitmap.data.
+ *
+ * If flags & VFIO_DMA_UNMAP_FLAG_ALL, unmap all addresses.  iova and size
+ * must be 0.  This cannot be combined with the get-dirty-bitmap flag.
+ *
+ * If flags & VFIO_DMA_UNMAP_FLAG_VADDR, do not unmap, but invalidate host
+ * virtual addresses in the iova range.  Tasks that attempt to translate an
+ * iova's vaddr will block.  DMA to already-mapped pages continues.  This
+ * cannot be combined with the get-dirty-bitmap flag.
  */
 struct vfio_iommu_type1_dma_unmap {
        __u32   argsz;
        __u32   flags;
 #define VFIO_DMA_UNMAP_FLAG_GET_DIRTY_BITMAP (1 << 0)
+#define VFIO_DMA_UNMAP_FLAG_ALL                     (1 << 1)
+#define VFIO_DMA_UNMAP_FLAG_VADDR           (1 << 2)
        __u64   iova;                           /* IO virtual address */
        __u64   size;                           /* Size of mapping (bytes) */
        __u8    data[];
index dba3827..5a86b52 100644 (file)
@@ -309,7 +309,9 @@ struct hl_info_hw_ip_info {
        __u32 num_of_events;
        __u32 device_id; /* PCI Device ID */
        __u32 module_id; /* For mezzanine cards in servers (From OCP spec.) */
-       __u32 reserved[2];
+       __u32 reserved;
+       __u16 first_available_interrupt_id;
+       __u16 reserved2;
        __u32 cpld_version;
        __u32 psoc_pci_pll_nr;
        __u32 psoc_pci_pll_nf;
@@ -320,6 +322,8 @@ struct hl_info_hw_ip_info {
        __u8 pad[2];
        __u8 cpucp_version[HL_INFO_VERSION_MAX_LEN];
        __u8 card_name[HL_INFO_CARD_NAME_MAX_LEN];
+       __u64 reserved3;
+       __u64 dram_page_size;
 };
 
 struct hl_info_dram_usage {
@@ -327,6 +331,8 @@ struct hl_info_dram_usage {
        __u64 ctx_dram_mem;
 };
 
+#define HL_BUSY_ENGINES_MASK_EXT_SIZE  2
+
 struct hl_info_hw_idle {
        __u32 is_idle;
        /*
@@ -339,7 +345,7 @@ struct hl_info_hw_idle {
         * Extended Bitmask of busy engines.
         * Bits definition is according to `enum <chip>_enging_id'.
         */
-       __u64 busy_engines_mask_ext;
+       __u64 busy_engines_mask_ext[HL_BUSY_ENGINES_MASK_EXT_SIZE];
 };
 
 struct hl_info_device_status {
@@ -408,10 +414,13 @@ struct hl_pll_frequency_info {
  * struct hl_info_sync_manager - sync manager information
  * @first_available_sync_object: first available sob
  * @first_available_monitor: first available monitor
+ * @first_available_cq: first available cq
  */
 struct hl_info_sync_manager {
        __u32 first_available_sync_object;
        __u32 first_available_monitor;
+       __u32 first_available_cq;
+       __u32 reserved;
 };
 
 /**
@@ -604,11 +613,14 @@ struct hl_cs_chunk {
 };
 
 /* SIGNAL and WAIT/COLLECTIVE_WAIT flags are mutually exclusive */
-#define HL_CS_FLAGS_FORCE_RESTORE      0x1
-#define HL_CS_FLAGS_SIGNAL             0x2
-#define HL_CS_FLAGS_WAIT               0x4
-#define HL_CS_FLAGS_COLLECTIVE_WAIT    0x8
-#define HL_CS_FLAGS_TIMESTAMP          0x20
+#define HL_CS_FLAGS_FORCE_RESTORE              0x1
+#define HL_CS_FLAGS_SIGNAL                     0x2
+#define HL_CS_FLAGS_WAIT                       0x4
+#define HL_CS_FLAGS_COLLECTIVE_WAIT            0x8
+#define HL_CS_FLAGS_TIMESTAMP                  0x20
+#define HL_CS_FLAGS_STAGED_SUBMISSION          0x40
+#define HL_CS_FLAGS_STAGED_SUBMISSION_FIRST    0x80
+#define HL_CS_FLAGS_STAGED_SUBMISSION_LAST     0x100
 
 #define HL_CS_STATUS_SUCCESS           0
 
@@ -622,10 +634,17 @@ struct hl_cs_in {
        /* holds address of array of hl_cs_chunk for execution phase */
        __u64 chunks_execute;
 
-       /* this holds address of array of hl_cs_chunk for store phase -
-        * Currently not in use
-        */
-       __u64 chunks_store;
+       union {
+               /* this holds address of array of hl_cs_chunk for store phase -
+                * Currently not in use
+                */
+               __u64 chunks_store;
+
+               /* Sequence number of a staged submission CS
+                * valid only if HL_CS_FLAGS_STAGED_SUBMISSION is set
+                */
+               __u64 seq;
+       };
 
        /* Number of chunks in restore phase array. Maximum number is
         * HL_MAX_JOBS_PER_CS
@@ -704,6 +723,8 @@ union hl_wait_cs_args {
 #define HL_MEM_OP_MAP                  2
 /* Opcode to unmap previously mapped host and device memory */
 #define HL_MEM_OP_UNMAP                        3
+/* Opcode to map a hw block */
+#define HL_MEM_OP_MAP_BLOCK            4
 
 /* Memory flags */
 #define HL_MEM_CONTIGUOUS      0x1
@@ -758,6 +779,17 @@ struct hl_mem_in {
                        __u64 mem_size;
                } map_host;
 
+               /* HL_MEM_OP_MAP_BLOCK - map a hw block */
+               struct {
+                       /*
+                        * HW block address to map, a handle and size will be
+                        * returned to the user and will be used to mmap the
+                        * relevant block. Only addresses from configuration
+                        * space are allowed.
+                        */
+                       __u64 block_addr;
+               } map_block;
+
                /* HL_MEM_OP_UNMAP - unmap host memory */
                struct {
                        /* Virtual address returned from HL_MEM_OP_MAP */
@@ -784,10 +816,26 @@ struct hl_mem_out {
                __u64 device_virt_addr;
 
                /*
-                * Used for HL_MEM_OP_ALLOC. This is the assigned
-                * handle for the allocated memory
+                * Used in HL_MEM_OP_ALLOC
+                * This is the assigned handle for the allocated memory
                 */
                __u64 handle;
+
+               struct {
+                       /*
+                        * Used in HL_MEM_OP_MAP_BLOCK.
+                        * This is the assigned handle for the mapped block
+                        */
+                       __u64 block_handle;
+
+                       /*
+                        * Used in HL_MEM_OP_MAP_BLOCK
+                        * This is the size of the mapped block
+                        */
+                       __u32 block_size;
+
+                       __u32 pad;
+               };
        };
 };
 
index 0b1182a..cb854df 100644 (file)
 #include <linux/page-flags.h>
 #include <linux/kernel.h>
 
+/*
+ * Technically there's no reliably invalid grant reference or grant handle,
+ * so pick the value that is the most unlikely one to be observed valid.
+ */
+#define INVALID_GRANT_REF          ((grant_ref_t)-1)
+#define INVALID_GRANT_HANDLE       ((grant_handle_t)-1)
+
 #define GNTTAB_RESERVED_XENSTORE 1
 
 /* NR_GRANT_FRAMES must be less than or equal to that configured in Xen */
index bf3cfc7..b94074c 100644 (file)
@@ -51,7 +51,6 @@
 
 #define XENBUS_MAX_RING_GRANT_ORDER 4
 #define XENBUS_MAX_RING_GRANTS      (1U << XENBUS_MAX_RING_GRANT_ORDER)
-#define INVALID_GRANT_HANDLE       (~0U)
 
 /* Register callback to watch this node. */
 struct xenbus_watch
@@ -88,6 +87,13 @@ struct xenbus_device {
        struct completion down;
        struct work_struct work;
        struct semaphore reclaim_sem;
+
+       /* Event channel based statistics and settings. */
+       atomic_t event_channels;
+       atomic_t events;
+       atomic_t spurious_events;
+       atomic_t jiffies_eoi_delayed;
+       unsigned int spurious_threshold;
 };
 
 static inline struct xenbus_device *to_xenbus_device(struct device *dev)
index 47a3d65..5f5c776 100644 (file)
@@ -19,37 +19,43 @@ config CC_VERSION_TEXT
            CC_VERSION_TEXT so it is recorded in include/config/auto.conf.cmd.
            When the compiler is updated, Kconfig will be invoked.
 
-         - Ensure full rebuild when the compier is updated
-           include/linux/kconfig.h contains this option in the comment line so
-           fixdep adds include/config/cc/version/text.h into the auto-generated
-           dependency. When the compiler is updated, syncconfig will touch it
-           and then every file will be rebuilt.
+         - Ensure full rebuild when the compiler is updated
+           include/linux/compiler-version.h contains this option in the comment
+           line so fixdep adds include/config/cc/version/text.h into the
+           auto-generated dependency. When the compiler is updated, syncconfig
+           will touch it and then every file will be rebuilt.
 
 config CC_IS_GCC
-       def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q gcc)
+       def_bool $(success,test "$(cc-name)" = GCC)
 
 config GCC_VERSION
        int
-       default $(shell,$(srctree)/scripts/gcc-version.sh $(CC)) if CC_IS_GCC
+       default $(cc-version) if CC_IS_GCC
        default 0
 
-config LD_VERSION
-       int
-       default $(shell,$(LD) --version | $(srctree)/scripts/ld-version.sh)
-
 config CC_IS_CLANG
-       def_bool $(success,echo "$(CC_VERSION_TEXT)" | grep -q clang)
-
-config LD_IS_LLD
-       def_bool $(success,$(LD) -v | head -n 1 | grep -q LLD)
+       def_bool $(success,test "$(cc-name)" = Clang)
 
 config CLANG_VERSION
        int
-       default $(shell,$(srctree)/scripts/clang-version.sh $(CC))
+       default $(cc-version) if CC_IS_CLANG
+       default 0
+
+config LD_IS_BFD
+       def_bool $(success,test "$(ld-name)" = BFD)
+
+config LD_VERSION
+       int
+       default $(ld-version) if LD_IS_BFD
+       default 0
+
+config LD_IS_LLD
+       def_bool $(success,test "$(ld-name)" = LLD)
 
 config LLD_VERSION
        int
-       default $(shell,$(srctree)/scripts/lld-version.sh $(LD))
+       default $(ld-version) if LD_IS_LLD
+       default 0
 
 config CC_CAN_LINK
        bool
@@ -113,8 +119,7 @@ config INIT_ENV_ARG_LIMIT
 
 config COMPILE_TEST
        bool "Compile also drivers which will not load"
-       depends on !UML && !S390
-       default n
+       depends on HAS_IOMEM
        help
          Some drivers can be compiled on a different platform than they are
          intended to be run on. Despite they cannot be loaded there (or even
@@ -1861,20 +1866,6 @@ config SLUB_DEBUG
          SLUB sysfs support. /sys/slab will not exist and there will be
          no support for cache validation etc.
 
-config SLUB_MEMCG_SYSFS_ON
-       default n
-       bool "Enable memcg SLUB sysfs support by default" if EXPERT
-       depends on SLUB && SYSFS && MEMCG
-       help
-         SLUB creates a directory under /sys/kernel/slab for each
-         allocation cache to host info and debug files. If memory
-         cgroup is enabled, each cache can have per memory cgroup
-         caches. SLUB can create the same sysfs directories for these
-         caches under /sys/kernel/slab/CACHE/cgroup but it can lead
-         to a very high number of debug files being created. This is
-         controlled by slub_memcg_sysfs boot parameter and this
-         config option determines the parameter's default value.
-
 config COMPAT_BRK
        bool "Disable heap randomization"
        default y
@@ -2272,25 +2263,9 @@ config MODULE_ALLOW_MISSING_NAMESPACE_IMPORTS
 
          If unsure, say N.
 
-config UNUSED_SYMBOLS
-       bool "Enable unused/obsolete exported symbols"
-       default y if X86
-       help
-         Unused but exported symbols make the kernel needlessly bigger.  For
-         that reason most of these unused exports will soon be removed.  This
-         option is provided temporarily to provide a transition period in case
-         some external kernel module needs one of these symbols anyway. If you
-         encounter such a case in your module, consider if you are actually
-         using the right API.  (rationale: since nobody in the kernel is using
-         this in a module, there is a pretty good chance it's actually the
-         wrong interface to use).  If you really need the symbol, please send a
-         mail to the linux kernel mailing list mentioning the symbol and why
-         you really need it, and what the merge plan to the mainline kernel for
-         your module is.
-
 config TRIM_UNUSED_KSYMS
-       bool "Trim unused exported kernel symbols"
-       depends on !UNUSED_SYMBOLS
+       bool "Trim unused exported kernel symbols" if EXPERT
+       depends on !COMPILE_TEST
        help
          The kernel and some modules make many symbols available for
          other modules to use via EXPORT_SYMBOL() and variants. Depending
index 55b74d7..d677e8e 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/utime.h>
 #include <linux/file.h>
 #include <linux/memblock.h>
+#include <linux/mm.h>
 #include <linux/namei.h>
 #include <linux/init_syscalls.h>
 
@@ -45,6 +46,16 @@ static void __init error(char *x)
                message = x;
 }
 
+static void panic_show_mem(const char *fmt, ...)
+{
+       va_list args;
+
+       show_mem(0, NULL);
+       va_start(args, fmt);
+       panic(fmt, args);
+       va_end(args);
+}
+
 /* link hash */
 
 #define N_ALIGN(len) ((((len) + 1) & ~3) + 2)
@@ -80,7 +91,7 @@ static char __init *find_link(int major, int minor, int ino,
        }
        q = kmalloc(sizeof(struct hash), GFP_KERNEL);
        if (!q)
-               panic("can't allocate link hash entry");
+               panic_show_mem("can't allocate link hash entry");
        q->major = major;
        q->minor = minor;
        q->ino = ino;
@@ -125,7 +136,7 @@ static void __init dir_add(const char *name, time64_t mtime)
 {
        struct dir_entry *de = kmalloc(sizeof(struct dir_entry), GFP_KERNEL);
        if (!de)
-               panic("can't allocate dir_entry buffer");
+               panic_show_mem("can't allocate dir_entry buffer");
        INIT_LIST_HEAD(&de->list);
        de->name = kstrdup(name, GFP_KERNEL);
        de->mtime = mtime;
@@ -460,7 +471,7 @@ static char * __init unpack_to_rootfs(char *buf, unsigned long len)
        name_buf = kmalloc(N_ALIGN(PATH_MAX), GFP_KERNEL);
 
        if (!header_buf || !symlink_buf || !name_buf)
-               panic("can't allocate buffers");
+               panic_show_mem("can't allocate buffers");
 
        state = Start;
        this_header = 0;
@@ -535,6 +546,51 @@ extern unsigned long __initramfs_size;
 #include <linux/initrd.h>
 #include <linux/kexec.h>
 
+void __init reserve_initrd_mem(void)
+{
+       phys_addr_t start;
+       unsigned long size;
+
+       /* Ignore the virtul address computed during device tree parsing */
+       initrd_start = initrd_end = 0;
+
+       if (!phys_initrd_size)
+               return;
+       /*
+        * Round the memory region to page boundaries as per free_initrd_mem()
+        * This allows us to detect whether the pages overlapping the initrd
+        * are in use, but more importantly, reserves the entire set of pages
+        * as we don't want these pages allocated for other purposes.
+        */
+       start = round_down(phys_initrd_start, PAGE_SIZE);
+       size = phys_initrd_size + (phys_initrd_start - start);
+       size = round_up(size, PAGE_SIZE);
+
+       if (!memblock_is_region_memory(start, size)) {
+               pr_err("INITRD: 0x%08llx+0x%08lx is not a memory region",
+                      (u64)start, size);
+               goto disable;
+       }
+
+       if (memblock_is_region_reserved(start, size)) {
+               pr_err("INITRD: 0x%08llx+0x%08lx overlaps in-use memory region\n",
+                      (u64)start, size);
+               goto disable;
+       }
+
+       memblock_reserve(start, size);
+       /* Now convert initrd to virtual addresses */
+       initrd_start = (unsigned long)__va(phys_initrd_start);
+       initrd_end = initrd_start + phys_initrd_size;
+       initrd_below_start_ok = 1;
+
+       return;
+disable:
+       pr_cont(" - disabling initrd\n");
+       initrd_start = 0;
+       initrd_end = 0;
+}
+
 void __weak __init free_initrd_mem(unsigned long start, unsigned long end)
 {
 #ifdef CONFIG_ARCH_KEEP_MEMBLOCK
@@ -607,7 +663,7 @@ static int __init populate_rootfs(void)
        /* Load the built in initramfs */
        char *err = unpack_to_rootfs(__initramfs_start, __initramfs_size);
        if (err)
-               panic("%s", err); /* Failed to decompress INTERNAL initramfs */
+               panic_show_mem("%s", err); /* Failed to decompress INTERNAL initramfs */
 
        if (!initrd_start || IS_ENABLED(CONFIG_INITRAMFS_FORCE))
                goto done;
index a626e78..53b2788 100644 (file)
@@ -40,6 +40,7 @@
 #include <linux/security.h>
 #include <linux/smp.h>
 #include <linux/profile.h>
+#include <linux/kfence.h>
 #include <linux/rcupdate.h>
 #include <linux/moduleparam.h>
 #include <linux/kallsyms.h>
@@ -74,7 +75,6 @@
 #include <linux/kgdb.h>
 #include <linux/ftrace.h>
 #include <linux/async.h>
-#include <linux/sfi.h>
 #include <linux/shmem_fs.h>
 #include <linux/slab.h>
 #include <linux/perf_event.h>
@@ -97,6 +97,7 @@
 #include <linux/mem_encrypt.h>
 #include <linux/kcsan.h>
 #include <linux/init_syscalls.h>
+#include <linux/stackdepot.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -825,7 +826,9 @@ static void __init mm_init(void)
         */
        page_ext_init_flatmem();
        init_mem_debugging_and_hardening();
+       kfence_alloc_pool();
        report_meminit();
+       stack_depot_init();
        mem_init();
        /* page_owner must be initialized after buddy is ready */
        page_ext_init_flatmem_late();
@@ -956,6 +959,7 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
        hrtimers_init();
        softirq_init();
        timekeeping_init();
+       kfence_init();
 
        /*
         * For best initial stack canary entropy, prepare it after:
@@ -1054,7 +1058,6 @@ asmlinkage __visible void __init __no_sanitize_address start_kernel(void)
 
        acpi_subsystem_init();
        arch_post_acpi_subsys_init();
-       sfi_init_late();
        kcsan_init();
 
        /* Do the rest non-__init'ed, we're now alive */
@@ -1423,6 +1426,7 @@ static int __ref kernel_init(void *unused)
        async_synchronize_full();
        kprobe_free_init_mem();
        ftrace_free_init_mem();
+       kgdb_free_init_mem();
        free_initmem();
        mark_readonly();
 
index 80d2b75..92afc78 100644 (file)
 #include <linux/version.h>
 #include <linux/proc_ns.h>
 
-#ifndef CONFIG_KALLSYMS
-#define version(a) Version_ ## a
-#define version_string(a) version(a)
-
-extern int version_string(LINUX_VERSION_CODE);
-int version_string(LINUX_VERSION_CODE);
-#endif
-
 struct uts_namespace init_uts_ns = {
        .ns.count = REFCOUNT_INIT(2),
        .name = {
index beff0cf..8031464 100644 (file)
@@ -594,8 +594,8 @@ out_unlock:
        return error;
 }
 
-static int mqueue_create(struct inode *dir, struct dentry *dentry,
-                               umode_t mode, bool excl)
+static int mqueue_create(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, umode_t mode, bool excl)
 {
        return mqueue_create_attr(dentry, mode, NULL);
 }
@@ -873,7 +873,7 @@ static int prepare_open(struct dentry *dentry, int oflag, int ro,
        if ((oflag & O_ACCMODE) == (O_RDWR | O_WRONLY))
                return -EINVAL;
        acc = oflag2acc[oflag & O_ACCMODE];
-       return inode_permission(d_inode(dentry), acc);
+       return inode_permission(&init_user_ns, d_inode(dentry), acc);
 }
 
 static int do_mq_open(const char __user *u_name, int oflag, umode_t mode,
@@ -965,7 +965,8 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name)
                err = -ENOENT;
        } else {
                ihold(inode);
-               err = vfs_unlink(d_inode(dentry->d_parent), dentry, NULL);
+               err = vfs_unlink(&init_user_ns, d_inode(dentry->d_parent),
+                                dentry, NULL);
        }
        dput(dentry);
 
index 5b3f01d..60739d5 100644 (file)
@@ -84,7 +84,7 @@ struct audit_fsnotify_mark *audit_alloc_mark(struct audit_krule *krule, char *pa
 
        dentry = kern_path_locked(pathname, &path);
        if (IS_ERR(dentry))
-               return (void *)dentry; /* returning an error */
+               return ERR_CAST(dentry); /* returning an error */
        inode = path.dentry->d_inode;
        inode_unlock(inode);
 
index 434337a..47fb48f 100644 (file)
@@ -1930,7 +1930,7 @@ static inline int audit_copy_fcaps(struct audit_names *name,
        if (!dentry)
                return 0;
 
-       rc = get_vfs_caps_from_disk(dentry, &caps);
+       rc = get_vfs_caps_from_disk(&init_user_ns, dentry, &caps);
        if (rc)
                return rc;
 
@@ -2481,7 +2481,8 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
        ax->d.next = context->aux;
        context->aux = (void *)ax;
 
-       get_vfs_caps_from_disk(bprm->file->f_path.dentry, &vcaps);
+       get_vfs_caps_from_disk(&init_user_ns,
+                              bprm->file->f_path.dentry, &vcaps);
 
        ax->fcap.permitted = vcaps.permitted;
        ax->fcap.inheritable = vcaps.inheritable;
index 6639640..b58b2ef 100644 (file)
@@ -109,7 +109,7 @@ static void *bpf_fd_inode_storage_lookup_elem(struct bpf_map *map, void *key)
        fd = *(int *)key;
        f = fget_raw(fd);
        if (!f)
-               return NULL;
+               return ERR_PTR(-EBADF);
 
        sdata = inode_storage_lookup(f->f_inode, map, true);
        fput(f);
index 1a666a9..70f6fd4 100644 (file)
@@ -430,7 +430,7 @@ static int bpf_struct_ops_map_update_elem(struct bpf_map *map, void *key,
 
                tprogs[BPF_TRAMP_FENTRY].progs[0] = prog;
                tprogs[BPF_TRAMP_FENTRY].nr_progs = 1;
-               err = arch_prepare_bpf_trampoline(image,
+               err = arch_prepare_bpf_trampoline(NULL, image,
                                                  st_map->image + PAGE_SIZE,
                                                  &st_ops->func_models[i], 0,
                                                  tprogs, NULL);
index 2efeb5f..b1a76fe 100644 (file)
@@ -4321,8 +4321,6 @@ btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
                 * is not supported yet.
                 * BPF_PROG_TYPE_RAW_TRACEPOINT is fine.
                 */
-               if (log->level & BPF_LOG_LEVEL)
-                       bpf_log(log, "arg#%d type is not a struct\n", arg);
                return NULL;
        }
        tname = btf_name_by_offset(btf, t->name_off);
index 0ae015a..75244ec 100644 (file)
@@ -827,7 +827,7 @@ static int __init bpf_jit_charge_init(void)
 }
 pure_initcall(bpf_jit_charge_init);
 
-static int bpf_jit_charge_modmem(u32 pages)
+int bpf_jit_charge_modmem(u32 pages)
 {
        if (atomic_long_add_return(pages, &bpf_jit_current) >
            (bpf_jit_limit >> PAGE_SHIFT)) {
@@ -840,7 +840,7 @@ static int bpf_jit_charge_modmem(u32 pages)
        return 0;
 }
 
-static void bpf_jit_uncharge_modmem(u32 pages)
+void bpf_jit_uncharge_modmem(u32 pages)
 {
        atomic_long_sub(pages, &bpf_jit_current);
 }
@@ -1118,6 +1118,8 @@ static void bpf_prog_clone_free(struct bpf_prog *fp)
         * clone is guaranteed to not be locked.
         */
        fp->aux = NULL;
+       fp->stats = NULL;
+       fp->active = NULL;
        __bpf_prog_free(fp);
 }
 
@@ -2342,6 +2344,10 @@ bool __weak bpf_helper_changes_pkt_data(void *func)
 /* Return TRUE if the JIT backend wants verifier to enable sub-register usage
  * analysis code and wants explicit zero extension inserted by verifier.
  * Otherwise, return FALSE.
+ *
+ * The verifier inserts an explicit zero extension after BPF_CMPXCHGs even if
+ * you don't override this. JITs that don't want these extra insns can detect
+ * them using insn_is_zext.
  */
 bool __weak bpf_jit_needs_zext(void)
 {
index dd4b7fd..1576ff3 100644 (file)
@@ -122,7 +122,7 @@ static struct inode *bpf_get_inode(struct super_block *sb,
        inode->i_mtime = inode->i_atime;
        inode->i_ctime = inode->i_atime;
 
-       inode_init_owner(inode, dir, mode);
+       inode_init_owner(&init_user_ns, inode, dir, mode);
 
        return inode;
 }
@@ -152,7 +152,8 @@ static void bpf_dentry_finalize(struct dentry *dentry, struct inode *inode,
        dir->i_ctime = dir->i_mtime;
 }
 
-static int bpf_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int bpf_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                    struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
 
@@ -381,8 +382,8 @@ bpf_lookup(struct inode *dir, struct dentry *dentry, unsigned flags)
        return simple_lookup(dir, dentry, flags);
 }
 
-static int bpf_symlink(struct inode *dir, struct dentry *dentry,
-                      const char *target)
+static int bpf_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, const char *target)
 {
        char *link = kstrdup(target, GFP_USER | __GFP_NOWARN);
        struct inode *inode;
@@ -507,7 +508,7 @@ static void *bpf_obj_do_get(const char __user *pathname,
                return ERR_PTR(ret);
 
        inode = d_backing_inode(path.dentry);
-       ret = inode_permission(inode, ACC_MODE(flags));
+       ret = path_permission(&path, ACC_MODE(flags));
        if (ret)
                goto out;
 
@@ -558,7 +559,7 @@ int bpf_obj_get_user(const char __user *pathname, int flags)
 static struct bpf_prog *__get_prog_inode(struct inode *inode, enum bpf_prog_type type)
 {
        struct bpf_prog *prog;
-       int ret = inode_permission(inode, MAY_READ);
+       int ret = inode_permission(&init_user_ns, inode, MAY_READ);
        if (ret)
                return ERR_PTR(ret);
 
index 79c5772..53736e5 100644 (file)
@@ -60,9 +60,12 @@ static int finish(void)
                         &magic, sizeof(magic), &pos);
        if (n != sizeof(magic))
                return -EPIPE;
+
        tgid = umd_ops.info.tgid;
-       wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
-       umd_ops.info.tgid = NULL;
+       if (tgid) {
+               wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
+               umd_cleanup_helper(&umd_ops.info);
+       }
        return 0;
 }
 
@@ -80,10 +83,18 @@ static int __init load_umd(void)
 
 static void __exit fini_umd(void)
 {
+       struct pid *tgid;
+
        bpf_preload_ops = NULL;
+
        /* kill UMD in case it's still there due to earlier error */
-       kill_pid(umd_ops.info.tgid, SIGKILL, 1);
-       umd_ops.info.tgid = NULL;
+       tgid = umd_ops.info.tgid;
+       if (tgid) {
+               kill_pid(tgid, SIGKILL, 1);
+
+               wait_event(tgid->wait_pidfd, thread_group_exited(tgid));
+               umd_cleanup_helper(&umd_ops.info);
+       }
        umd_unload_blob(&umd_ops.info);
 }
 late_initcall(load_umd);
index c859bc4..2505034 100644 (file)
@@ -854,6 +854,11 @@ static int map_create(union bpf_attr *attr)
                        err = PTR_ERR(btf);
                        goto free_map;
                }
+               if (btf_is_kernel(btf)) {
+                       btf_put(btf);
+                       err = -EACCES;
+                       goto free_map;
+               }
                map->btf = btf;
 
                if (attr->btf_value_type_id) {
index 7bc3b32..1f3a4be 100644 (file)
@@ -57,19 +57,10 @@ void bpf_image_ksym_del(struct bpf_ksym *ksym)
                           PAGE_SIZE, true, ksym->name);
 }
 
-static void bpf_trampoline_ksym_add(struct bpf_trampoline *tr)
-{
-       struct bpf_ksym *ksym = &tr->ksym;
-
-       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu", tr->key);
-       bpf_image_ksym_add(tr->image, ksym);
-}
-
 static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
 {
        struct bpf_trampoline *tr;
        struct hlist_head *head;
-       void *image;
        int i;
 
        mutex_lock(&trampoline_mutex);
@@ -84,14 +75,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
        if (!tr)
                goto out;
 
-       /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */
-       image = bpf_jit_alloc_exec_page();
-       if (!image) {
-               kfree(tr);
-               tr = NULL;
-               goto out;
-       }
-
        tr->key = key;
        INIT_HLIST_NODE(&tr->hlist);
        hlist_add_head(&tr->hlist, head);
@@ -99,9 +82,6 @@ static struct bpf_trampoline *bpf_trampoline_lookup(u64 key)
        mutex_init(&tr->mutex);
        for (i = 0; i < BPF_TRAMP_MAX; i++)
                INIT_HLIST_HEAD(&tr->progs_hlist[i]);
-       tr->image = image;
-       INIT_LIST_HEAD_RCU(&tr->ksym.lnode);
-       bpf_trampoline_ksym_add(tr);
 out:
        mutex_unlock(&trampoline_mutex);
        return tr;
@@ -185,10 +165,142 @@ bpf_trampoline_get_progs(const struct bpf_trampoline *tr, int *total)
        return tprogs;
 }
 
+static void __bpf_tramp_image_put_deferred(struct work_struct *work)
+{
+       struct bpf_tramp_image *im;
+
+       im = container_of(work, struct bpf_tramp_image, work);
+       bpf_image_ksym_del(&im->ksym);
+       bpf_jit_free_exec(im->image);
+       bpf_jit_uncharge_modmem(1);
+       percpu_ref_exit(&im->pcref);
+       kfree_rcu(im, rcu);
+}
+
+/* callback, fexit step 3 or fentry step 2 */
+static void __bpf_tramp_image_put_rcu(struct rcu_head *rcu)
+{
+       struct bpf_tramp_image *im;
+
+       im = container_of(rcu, struct bpf_tramp_image, rcu);
+       INIT_WORK(&im->work, __bpf_tramp_image_put_deferred);
+       schedule_work(&im->work);
+}
+
+/* callback, fexit step 2. Called after percpu_ref_kill confirms. */
+static void __bpf_tramp_image_release(struct percpu_ref *pcref)
+{
+       struct bpf_tramp_image *im;
+
+       im = container_of(pcref, struct bpf_tramp_image, pcref);
+       call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
+}
+
+/* callback, fexit or fentry step 1 */
+static void __bpf_tramp_image_put_rcu_tasks(struct rcu_head *rcu)
+{
+       struct bpf_tramp_image *im;
+
+       im = container_of(rcu, struct bpf_tramp_image, rcu);
+       if (im->ip_after_call)
+               /* the case of fmod_ret/fexit trampoline and CONFIG_PREEMPTION=y */
+               percpu_ref_kill(&im->pcref);
+       else
+               /* the case of fentry trampoline */
+               call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu);
+}
+
+static void bpf_tramp_image_put(struct bpf_tramp_image *im)
+{
+       /* The trampoline image that calls original function is using:
+        * rcu_read_lock_trace to protect sleepable bpf progs
+        * rcu_read_lock to protect normal bpf progs
+        * percpu_ref to protect trampoline itself
+        * rcu tasks to protect trampoline asm not covered by percpu_ref
+        * (which are few asm insns before __bpf_tramp_enter and
+        *  after __bpf_tramp_exit)
+        *
+        * The trampoline is unreachable before bpf_tramp_image_put().
+        *
+        * First, patch the trampoline to avoid calling into fexit progs.
+        * The progs will be freed even if the original function is still
+        * executing or sleeping.
+        * In case of CONFIG_PREEMPT=y use call_rcu_tasks() to wait on
+        * first few asm instructions to execute and call into
+        * __bpf_tramp_enter->percpu_ref_get.
+        * Then use percpu_ref_kill to wait for the trampoline and the original
+        * function to finish.
+        * Then use call_rcu_tasks() to make sure few asm insns in
+        * the trampoline epilogue are done as well.
+        *
+        * In !PREEMPT case the task that got interrupted in the first asm
+        * insns won't go through an RCU quiescent state which the
+        * percpu_ref_kill will be waiting for. Hence the first
+        * call_rcu_tasks() is not necessary.
+        */
+       if (im->ip_after_call) {
+               int err = bpf_arch_text_poke(im->ip_after_call, BPF_MOD_JUMP,
+                                            NULL, im->ip_epilogue);
+               WARN_ON(err);
+               if (IS_ENABLED(CONFIG_PREEMPTION))
+                       call_rcu_tasks(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
+               else
+                       percpu_ref_kill(&im->pcref);
+               return;
+       }
+
+       /* The trampoline without fexit and fmod_ret progs doesn't call original
+        * function and doesn't use percpu_ref.
+        * Use call_rcu_tasks_trace() to wait for sleepable progs to finish.
+        * Then use call_rcu_tasks() to wait for the rest of trampoline asm
+        * and normal progs.
+        */
+       call_rcu_tasks_trace(&im->rcu, __bpf_tramp_image_put_rcu_tasks);
+}
+
+static struct bpf_tramp_image *bpf_tramp_image_alloc(u64 key, u32 idx)
+{
+       struct bpf_tramp_image *im;
+       struct bpf_ksym *ksym;
+       void *image;
+       int err = -ENOMEM;
+
+       im = kzalloc(sizeof(*im), GFP_KERNEL);
+       if (!im)
+               goto out;
+
+       err = bpf_jit_charge_modmem(1);
+       if (err)
+               goto out_free_im;
+
+       err = -ENOMEM;
+       im->image = image = bpf_jit_alloc_exec_page();
+       if (!image)
+               goto out_uncharge;
+
+       err = percpu_ref_init(&im->pcref, __bpf_tramp_image_release, 0, GFP_KERNEL);
+       if (err)
+               goto out_free_image;
+
+       ksym = &im->ksym;
+       INIT_LIST_HEAD_RCU(&ksym->lnode);
+       snprintf(ksym->name, KSYM_NAME_LEN, "bpf_trampoline_%llu_%u", key, idx);
+       bpf_image_ksym_add(image, ksym);
+       return im;
+
+out_free_image:
+       bpf_jit_free_exec(im->image);
+out_uncharge:
+       bpf_jit_uncharge_modmem(1);
+out_free_im:
+       kfree(im);
+out:
+       return ERR_PTR(err);
+}
+
 static int bpf_trampoline_update(struct bpf_trampoline *tr)
 {
-       void *old_image = tr->image + ((tr->selector + 1) & 1) * PAGE_SIZE/2;
-       void *new_image = tr->image + (tr->selector & 1) * PAGE_SIZE/2;
+       struct bpf_tramp_image *im;
        struct bpf_tramp_progs *tprogs;
        u32 flags = BPF_TRAMP_F_RESTORE_REGS;
        int err, total;
@@ -198,41 +310,42 @@ static int bpf_trampoline_update(struct bpf_trampoline *tr)
                return PTR_ERR(tprogs);
 
        if (total == 0) {
-               err = unregister_fentry(tr, old_image);
+               err = unregister_fentry(tr, tr->cur_image->image);
+               bpf_tramp_image_put(tr->cur_image);
+               tr->cur_image = NULL;
                tr->selector = 0;
                goto out;
        }
 
+       im = bpf_tramp_image_alloc(tr->key, tr->selector);
+       if (IS_ERR(im)) {
+               err = PTR_ERR(im);
+               goto out;
+       }
+
        if (tprogs[BPF_TRAMP_FEXIT].nr_progs ||
            tprogs[BPF_TRAMP_MODIFY_RETURN].nr_progs)
                flags = BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_SKIP_FRAME;
 
-       /* Though the second half of trampoline page is unused a task could be
-        * preempted in the middle of the first half of trampoline and two
-        * updates to trampoline would change the code from underneath the
-        * preempted task. Hence wait for tasks to voluntarily schedule or go
-        * to userspace.
-        * The same trampoline can hold both sleepable and non-sleepable progs.
-        * synchronize_rcu_tasks_trace() is needed to make sure all sleepable
-        * programs finish executing.
-        * Wait for these two grace periods together.
-        */
-       synchronize_rcu_mult(call_rcu_tasks, call_rcu_tasks_trace);
-
-       err = arch_prepare_bpf_trampoline(new_image, new_image + PAGE_SIZE / 2,
+       err = arch_prepare_bpf_trampoline(im, im->image, im->image + PAGE_SIZE,
                                          &tr->func.model, flags, tprogs,
                                          tr->func.addr);
        if (err < 0)
                goto out;
 
-       if (tr->selector)
+       WARN_ON(tr->cur_image && tr->selector == 0);
+       WARN_ON(!tr->cur_image && tr->selector);
+       if (tr->cur_image)
                /* progs already running at this address */
-               err = modify_fentry(tr, old_image, new_image);
+               err = modify_fentry(tr, tr->cur_image->image, im->image);
        else
                /* first time registering */
-               err = register_fentry(tr, new_image);
+               err = register_fentry(tr, im->image);
        if (err)
                goto out;
+       if (tr->cur_image)
+               bpf_tramp_image_put(tr->cur_image);
+       tr->cur_image = im;
        tr->selector++;
 out:
        kfree(tprogs);
@@ -364,17 +477,12 @@ void bpf_trampoline_put(struct bpf_trampoline *tr)
                goto out;
        if (WARN_ON_ONCE(!hlist_empty(&tr->progs_hlist[BPF_TRAMP_FEXIT])))
                goto out;
-       bpf_image_ksym_del(&tr->ksym);
-       /* This code will be executed when all bpf progs (both sleepable and
-        * non-sleepable) went through
-        * bpf_prog_put()->call_rcu[_tasks_trace]()->bpf_prog_free_deferred().
-        * Hence no need for another synchronize_rcu_tasks_trace() here,
-        * but synchronize_rcu_tasks() is still needed, since trampoline
-        * may not have had any sleepable programs and we need to wait
-        * for tasks to get out of trampoline code before freeing it.
+       /* This code will be executed even when the last bpf_tramp_image
+        * is alive. All progs are detached from the trampoline and the
+        * trampoline image is patched with jmp into epilogue to skip
+        * fexit progs. The fentry-only trampoline will be freed via
+        * multiple rcu callbacks.
         */
-       synchronize_rcu_tasks();
-       bpf_jit_free_exec(tr->image);
        hlist_del(&tr->hlist);
        kfree(tr);
 out:
@@ -478,8 +586,18 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start)
        rcu_read_unlock_trace();
 }
 
+void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr)
+{
+       percpu_ref_get(&tr->pcref);
+}
+
+void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr)
+{
+       percpu_ref_put(&tr->pcref);
+}
+
 int __weak
-arch_prepare_bpf_trampoline(void *image, void *image_end,
+arch_prepare_bpf_trampoline(struct bpf_tramp_image *tr, void *image, void *image_end,
                            const struct btf_func_model *m, u32 flags,
                            struct bpf_tramp_progs *tprogs,
                            void *orig_call)
index 1dda9d8..44e4ec1 100644 (file)
@@ -504,6 +504,13 @@ static bool is_ptr_cast_function(enum bpf_func_id func_id)
                func_id == BPF_FUNC_skc_to_tcp_request_sock;
 }
 
+static bool is_cmpxchg_insn(const struct bpf_insn *insn)
+{
+       return BPF_CLASS(insn->code) == BPF_STX &&
+              BPF_MODE(insn->code) == BPF_ATOMIC &&
+              insn->imm == BPF_CMPXCHG;
+}
+
 /* string representation of 'enum bpf_reg_type' */
 static const char * const reg_type_str[] = {
        [NOT_INIT]              = "?",
@@ -1120,7 +1127,7 @@ static void mark_ptr_not_null_reg(struct bpf_reg_state *reg)
                reg->type = PTR_TO_RDWR_BUF;
                break;
        default:
-               WARN_ON("unknown nullable register type");
+               WARN_ONCE(1, "unknown nullable register type");
        }
 }
 
@@ -1703,7 +1710,11 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
        }
 
        if (class == BPF_STX) {
-               if (reg->type != SCALAR_VALUE)
+               /* BPF_STX (including atomic variants) has multiple source
+                * operands, one of which is a ptr. Check whether the caller is
+                * asking about it.
+                */
+               if (t == SRC_OP && reg->type != SCALAR_VALUE)
                        return true;
                return BPF_SIZE(code) == BPF_DW;
        }
@@ -1735,22 +1746,38 @@ static bool is_reg64(struct bpf_verifier_env *env, struct bpf_insn *insn,
        return true;
 }
 
-/* Return TRUE if INSN doesn't have explicit value define. */
-static bool insn_no_def(struct bpf_insn *insn)
+/* Return the regno defined by the insn, or -1. */
+static int insn_def_regno(const struct bpf_insn *insn)
 {
-       u8 class = BPF_CLASS(insn->code);
-
-       return (class == BPF_JMP || class == BPF_JMP32 ||
-               class == BPF_STX || class == BPF_ST);
+       switch (BPF_CLASS(insn->code)) {
+       case BPF_JMP:
+       case BPF_JMP32:
+       case BPF_ST:
+               return -1;
+       case BPF_STX:
+               if (BPF_MODE(insn->code) == BPF_ATOMIC &&
+                   (insn->imm & BPF_FETCH)) {
+                       if (insn->imm == BPF_CMPXCHG)
+                               return BPF_REG_0;
+                       else
+                               return insn->src_reg;
+               } else {
+                       return -1;
+               }
+       default:
+               return insn->dst_reg;
+       }
 }
 
 /* Return TRUE if INSN has defined any 32-bit value explicitly. */
 static bool insn_has_def32(struct bpf_verifier_env *env, struct bpf_insn *insn)
 {
-       if (insn_no_def(insn))
+       int dst_reg = insn_def_regno(insn);
+
+       if (dst_reg == -1)
                return false;
 
-       return !is_reg64(env, insn, insn->dst_reg, NULL, DST_OP);
+       return !is_reg64(env, insn, dst_reg, NULL, DST_OP);
 }
 
 static void mark_insn_zext(struct bpf_verifier_env *env,
@@ -5834,10 +5861,14 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 {
        bool mask_to_left = (opcode == BPF_ADD &&  off_is_neg) ||
                            (opcode == BPF_SUB && !off_is_neg);
-       u32 off;
+       u32 off, max;
 
        switch (ptr_reg->type) {
        case PTR_TO_STACK:
+               /* Offset 0 is out-of-bounds, but acceptable start for the
+                * left direction, see BPF_REG_FP.
+                */
+               max = MAX_BPF_STACK + mask_to_left;
                /* Indirect variable offset stack access is prohibited in
                 * unprivileged mode so it's not handled here.
                 */
@@ -5845,16 +5876,17 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
                if (mask_to_left)
                        *ptr_limit = MAX_BPF_STACK + off;
                else
-                       *ptr_limit = -off;
-               return 0;
+                       *ptr_limit = -off - 1;
+               return *ptr_limit >= max ? -ERANGE : 0;
        case PTR_TO_MAP_VALUE:
+               max = ptr_reg->map_ptr->value_size;
                if (mask_to_left) {
                        *ptr_limit = ptr_reg->umax_value + ptr_reg->off;
                } else {
                        off = ptr_reg->smin_value + ptr_reg->off;
-                       *ptr_limit = ptr_reg->map_ptr->value_size - off;
+                       *ptr_limit = ptr_reg->map_ptr->value_size - off - 1;
                }
-               return 0;
+               return *ptr_limit >= max ? -ERANGE : 0;
        default:
                return -EINVAL;
        }
@@ -5907,6 +5939,7 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
        u32 alu_state, alu_limit;
        struct bpf_reg_state tmp;
        bool ret;
+       int err;
 
        if (can_skip_alu_sanitation(env, insn))
                return 0;
@@ -5922,10 +5955,13 @@ static int sanitize_ptr_alu(struct bpf_verifier_env *env,
        alu_state |= ptr_is_dst_reg ?
                     BPF_ALU_SANITIZE_SRC : BPF_ALU_SANITIZE_DST;
 
-       if (retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg))
-               return 0;
-       if (update_alu_sanitation_state(aux, alu_state, alu_limit))
-               return -EACCES;
+       err = retrieve_ptr_limit(ptr_reg, &alu_limit, opcode, off_is_neg);
+       if (err < 0)
+               return err;
+
+       err = update_alu_sanitation_state(aux, alu_state, alu_limit);
+       if (err < 0)
+               return err;
 do_sim:
        /* Simulate and find potential out-of-bounds access under
         * speculative execution from truncation as a result of
@@ -6076,7 +6112,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_ADD:
                ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
                if (ret < 0) {
-                       verbose(env, "R%d tried to add from different maps or paths\n", dst);
+                       verbose(env, "R%d tried to add from different maps, paths, or prohibited types\n", dst);
                        return ret;
                }
                /* We can take a fixed offset as long as it doesn't overflow
@@ -6131,7 +6167,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
        case BPF_SUB:
                ret = sanitize_ptr_alu(env, insn, ptr_reg, dst_reg, smin_val < 0);
                if (ret < 0) {
-                       verbose(env, "R%d tried to sub from different maps or paths\n", dst);
+                       verbose(env, "R%d tried to sub from different maps, paths, or prohibited types\n", dst);
                        return ret;
                }
                if (dst_reg == off_reg) {
@@ -9029,6 +9065,10 @@ static int check_btf_info(struct bpf_verifier_env *env,
        btf = btf_get_by_fd(attr->prog_btf_fd);
        if (IS_ERR(btf))
                return PTR_ERR(btf);
+       if (btf_is_kernel(btf)) {
+               btf_put(btf);
+               return -EACCES;
+       }
        env->prog->aux->btf = btf;
 
        err = check_btf_func(env, attr, uattr);
@@ -11006,9 +11046,10 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
        for (i = 0; i < len; i++) {
                int adj_idx = i + delta;
                struct bpf_insn insn;
-               u8 load_reg;
+               int load_reg;
 
                insn = insns[adj_idx];
+               load_reg = insn_def_regno(&insn);
                if (!aux[adj_idx].zext_dst) {
                        u8 code, class;
                        u32 imm_rnd;
@@ -11018,14 +11059,14 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
 
                        code = insn.code;
                        class = BPF_CLASS(code);
-                       if (insn_no_def(&insn))
+                       if (load_reg == -1)
                                continue;
 
                        /* NOTE: arg "reg" (the fourth one) is only used for
-                        *       BPF_STX which has been ruled out in above
-                        *       check, it is safe to pass NULL here.
+                        *       BPF_STX + SRC_OP, so it is safe to pass NULL
+                        *       here.
                         */
-                       if (is_reg64(env, &insn, insn.dst_reg, NULL, DST_OP)) {
+                       if (is_reg64(env, &insn, load_reg, NULL, DST_OP)) {
                                if (class == BPF_LD &&
                                    BPF_MODE(code) == BPF_IMM)
                                        i++;
@@ -11040,31 +11081,28 @@ static int opt_subreg_zext_lo32_rnd_hi32(struct bpf_verifier_env *env,
                        imm_rnd = get_random_int();
                        rnd_hi32_patch[0] = insn;
                        rnd_hi32_patch[1].imm = imm_rnd;
-                       rnd_hi32_patch[3].dst_reg = insn.dst_reg;
+                       rnd_hi32_patch[3].dst_reg = load_reg;
                        patch = rnd_hi32_patch;
                        patch_len = 4;
                        goto apply_patch_buffer;
                }
 
-               if (!bpf_jit_needs_zext())
+               /* Add in an zero-extend instruction if a) the JIT has requested
+                * it or b) it's a CMPXCHG.
+                *
+                * The latter is because: BPF_CMPXCHG always loads a value into
+                * R0, therefore always zero-extends. However some archs'
+                * equivalent instruction only does this load when the
+                * comparison is successful. This detail of CMPXCHG is
+                * orthogonal to the general zero-extension behaviour of the
+                * CPU, so it's treated independently of bpf_jit_needs_zext.
+                */
+               if (!bpf_jit_needs_zext() && !is_cmpxchg_insn(&insn))
                        continue;
 
-               /* zext_dst means that we want to zero-extend whatever register
-                * the insn defines, which is dst_reg most of the time, with
-                * the notable exception of BPF_STX + BPF_ATOMIC + BPF_FETCH.
-                */
-               if (BPF_CLASS(insn.code) == BPF_STX &&
-                   BPF_MODE(insn.code) == BPF_ATOMIC) {
-                       /* BPF_STX + BPF_ATOMIC insns without BPF_FETCH do not
-                        * define any registers, therefore zext_dst cannot be
-                        * set.
-                        */
-                       if (WARN_ON(!(insn.imm & BPF_FETCH)))
-                               return -EINVAL;
-                       load_reg = insn.imm == BPF_CMPXCHG ? BPF_REG_0
-                                                          : insn.src_reg;
-               } else {
-                       load_reg = insn.dst_reg;
+               if (WARN_ON(load_reg == -1)) {
+                       verbose(env, "verifier bug. zext_dst is set, but no reg is defined\n");
+                       return -EFAULT;
                }
 
                zext_patch[0] = insn;
@@ -11635,7 +11673,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
                        off_reg = issrc ? insn->src_reg : insn->dst_reg;
                        if (isneg)
                                *patch++ = BPF_ALU64_IMM(BPF_MUL, off_reg, -1);
-                       *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit - 1);
+                       *patch++ = BPF_MOV32_IMM(BPF_REG_AX, aux->alu_limit);
                        *patch++ = BPF_ALU64_REG(BPF_SUB, BPF_REG_AX, off_reg);
                        *patch++ = BPF_ALU64_REG(BPF_OR, BPF_REG_AX, off_reg);
                        *patch++ = BPF_ALU64_IMM(BPF_NEG, BPF_REG_AX, 0);
index de7eac9..46a361d 100644 (file)
@@ -484,10 +484,12 @@ EXPORT_SYMBOL(file_ns_capable);
  *
  * Return true if the inode uid and gid are within the namespace.
  */
-bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *inode)
+bool privileged_wrt_inode_uidgid(struct user_namespace *ns,
+                                struct user_namespace *mnt_userns,
+                                const struct inode *inode)
 {
-       return kuid_has_mapping(ns, inode->i_uid) &&
-               kgid_has_mapping(ns, inode->i_gid);
+       return kuid_has_mapping(ns, i_uid_into_mnt(mnt_userns, inode)) &&
+              kgid_has_mapping(ns, i_gid_into_mnt(mnt_userns, inode));
 }
 
 /**
@@ -499,11 +501,13 @@ bool privileged_wrt_inode_uidgid(struct user_namespace *ns, const struct inode *
  * its own user namespace and that the given inode's uid and gid are
  * mapped into the current user namespace.
  */
-bool capable_wrt_inode_uidgid(const struct inode *inode, int cap)
+bool capable_wrt_inode_uidgid(struct user_namespace *mnt_userns,
+                             const struct inode *inode, int cap)
 {
        struct user_namespace *ns = current_user_ns();
 
-       return ns_capable(ns, cap) && privileged_wrt_inode_uidgid(ns, inode);
+       return ns_capable(ns, cap) &&
+              privileged_wrt_inode_uidgid(ns, mnt_userns, inode);
 }
 EXPORT_SYMBOL(capable_wrt_inode_uidgid);
 
index c80fe99..9153b20 100644 (file)
@@ -4672,7 +4672,7 @@ static int cgroup_may_write(const struct cgroup *cgrp, struct super_block *sb)
        if (!inode)
                return -ENOMEM;
 
-       ret = inode_permission(inode, MAY_WRITE);
+       ret = inode_permission(&init_user_ns, inode, MAY_WRITE);
        iput(inode);
        return ret;
 }
index b636d51..4708aec 100644 (file)
@@ -455,6 +455,17 @@ setundefined:
        return 0;
 }
 
+void kgdb_free_init_mem(void)
+{
+       int i;
+
+       /* Clear init memory breakpoints. */
+       for (i = 0; i < KGDB_MAX_BREAKPOINTS; i++) {
+               if (init_section_contains((void *)kgdb_break[i].bpt_addr, 0))
+                       kgdb_break[i].state = BP_UNDEFINED;
+       }
+}
+
 #ifdef CONFIG_KGDB_KDB
 void kdb_dump_stack_on_cpu(int cpu)
 {
index da95df3..e0e64f8 100644 (file)
@@ -21,6 +21,7 @@
 #define DMA_MAP_BENCHMARK      _IOWR('d', 1, struct map_benchmark)
 #define DMA_MAP_MAX_THREADS    1024
 #define DMA_MAP_MAX_SECONDS    300
+#define DMA_MAP_MAX_TRANS_DELAY        (10 * NSEC_PER_MSEC)
 
 #define DMA_MAP_BIDIRECTIONAL  0
 #define DMA_MAP_TO_DEVICE      1
@@ -36,7 +37,8 @@ struct map_benchmark {
        __s32 node; /* which numa node this benchmark will run on */
        __u32 dma_bits; /* DMA addressing capability */
        __u32 dma_dir; /* DMA data direction */
-       __u8 expansion[84];     /* For future use */
+       __u32 dma_trans_ns; /* time for DMA transmission in ns */
+       __u8 expansion[80];     /* For future use */
 };
 
 struct map_benchmark_data {
@@ -87,6 +89,9 @@ static int map_benchmark_thread(void *data)
                map_etime = ktime_get();
                map_delta = ktime_sub(map_etime, map_stime);
 
+               /* Pretend DMA is transmitting */
+               ndelay(map->bparam.dma_trans_ns);
+
                unmap_stime = ktime_get();
                dma_unmap_single(map->dev, dma_addr, PAGE_SIZE, map->dir);
                unmap_etime = ktime_get();
@@ -218,6 +223,11 @@ static long map_benchmark_ioctl(struct file *file, unsigned int cmd,
                        return -EINVAL;
                }
 
+               if (map->bparam.dma_trans_ns > DMA_MAP_MAX_TRANS_DELAY) {
+                       pr_err("invalid transmission delay\n");
+                       return -EINVAL;
+               }
+
                if (map->bparam.node != NUMA_NO_NODE &&
                    !node_possible(map->bparam.node)) {
                        pr_err("invalid numa node\n");
index 84de6b1..b6a6336 100644 (file)
@@ -517,46 +517,6 @@ void dma_free_pages(struct device *dev, size_t size, struct page *page,
 }
 EXPORT_SYMBOL_GPL(dma_free_pages);
 
-void *dma_alloc_noncoherent(struct device *dev, size_t size,
-               dma_addr_t *dma_handle, enum dma_data_direction dir, gfp_t gfp)
-{
-       const struct dma_map_ops *ops = get_dma_ops(dev);
-       void *vaddr;
-
-       if (!ops || !ops->alloc_noncoherent) {
-               struct page *page;
-
-               page = dma_alloc_pages(dev, size, dma_handle, dir, gfp);
-               if (!page)
-                       return NULL;
-               return page_address(page);
-       }
-
-       size = PAGE_ALIGN(size);
-       vaddr = ops->alloc_noncoherent(dev, size, dma_handle, dir, gfp);
-       if (vaddr)
-               debug_dma_map_page(dev, virt_to_page(vaddr), 0, size, dir,
-                                  *dma_handle);
-       return vaddr;
-}
-EXPORT_SYMBOL_GPL(dma_alloc_noncoherent);
-
-void dma_free_noncoherent(struct device *dev, size_t size, void *vaddr,
-               dma_addr_t dma_handle, enum dma_data_direction dir)
-{
-       const struct dma_map_ops *ops = get_dma_ops(dev);
-
-       if (!ops || !ops->free_noncoherent) {
-               dma_free_pages(dev, size, virt_to_page(vaddr), dma_handle, dir);
-               return;
-       }
-
-       size = PAGE_ALIGN(size);
-       debug_dma_unmap_page(dev, dma_handle, size, dir);
-       ops->free_noncoherent(dev, size, vaddr, dma_handle, dir);
-}
-EXPORT_SYMBOL_GPL(dma_free_noncoherent);
-
 int dma_supported(struct device *dev, u64 mask)
 {
        const struct dma_map_ops *ops = get_dma_ops(dev);
index 7c42df6..c10e855 100644 (file)
@@ -50,9 +50,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/swiotlb.h>
 
-#define OFFSET(val,align) ((unsigned long)     \
-                          ( (val) & ( (align) - 1)))
-
 #define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT))
 
 /*
@@ -103,6 +100,11 @@ static unsigned int max_segment;
 static phys_addr_t *io_tlb_orig_addr;
 
 /*
+ * The mapped buffer's size should be validated during a sync operation.
+ */
+static size_t *io_tlb_orig_size;
+
+/*
  * Protect the above data structures in the map and unmap calls
  */
 static DEFINE_SPINLOCK(io_tlb_lock);
@@ -171,7 +173,7 @@ void __init swiotlb_adjust_size(unsigned long new_size)
         * adjust/expand SWIOTLB size for their use.
         */
        if (!io_tlb_nslabs) {
-               size = ALIGN(new_size, 1 << IO_TLB_SHIFT);
+               size = ALIGN(new_size, IO_TLB_SIZE);
                io_tlb_nslabs = size >> IO_TLB_SHIFT;
                io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE);
 
@@ -192,6 +194,16 @@ void swiotlb_print_info(void)
               bytes >> 20);
 }
 
+static inline unsigned long io_tlb_offset(unsigned long val)
+{
+       return val & (IO_TLB_SEGSIZE - 1);
+}
+
+static inline unsigned long nr_slots(u64 val)
+{
+       return DIV_ROUND_UP(val, IO_TLB_SIZE);
+}
+
 /*
  * Early SWIOTLB allocation may be too early to allow an architecture to
  * perform the desired operations.  This function allows the architecture to
@@ -240,9 +252,16 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose)
                panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
                      __func__, alloc_size, PAGE_SIZE);
 
+       alloc_size = PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t));
+       io_tlb_orig_size = memblock_alloc(alloc_size, PAGE_SIZE);
+       if (!io_tlb_orig_size)
+               panic("%s: Failed to allocate %zu bytes align=0x%lx\n",
+                     __func__, alloc_size, PAGE_SIZE);
+
        for (i = 0; i < io_tlb_nslabs; i++) {
-               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+               io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
                io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+               io_tlb_orig_size[i] = 0;
        }
        io_tlb_index = 0;
        no_iotlb_memory = false;
@@ -363,7 +382,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
         * between io_tlb_start and io_tlb_end.
         */
        io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL,
-                                     get_order(io_tlb_nslabs * sizeof(int)));
+                                     get_order(io_tlb_nslabs * sizeof(int)));
        if (!io_tlb_list)
                goto cleanup3;
 
@@ -374,9 +393,18 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
        if (!io_tlb_orig_addr)
                goto cleanup4;
 
+       io_tlb_orig_size = (size_t *)
+               __get_free_pages(GFP_KERNEL,
+                                get_order(io_tlb_nslabs *
+                                          sizeof(size_t)));
+       if (!io_tlb_orig_size)
+               goto cleanup5;
+
+
        for (i = 0; i < io_tlb_nslabs; i++) {
-               io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE);
+               io_tlb_list[i] = IO_TLB_SEGSIZE - io_tlb_offset(i);
                io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+               io_tlb_orig_size[i] = 0;
        }
        io_tlb_index = 0;
        no_iotlb_memory = false;
@@ -389,6 +417,10 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 
        return 0;
 
+cleanup5:
+       free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs *
+                                                             sizeof(phys_addr_t)));
+
 cleanup4:
        free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
                                                         sizeof(int)));
@@ -404,6 +436,8 @@ void __init swiotlb_exit(void)
                return;
 
        if (late_alloc) {
+               free_pages((unsigned long)io_tlb_orig_size,
+                          get_order(io_tlb_nslabs * sizeof(size_t)));
                free_pages((unsigned long)io_tlb_orig_addr,
                           get_order(io_tlb_nslabs * sizeof(phys_addr_t)));
                free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs *
@@ -413,6 +447,8 @@ void __init swiotlb_exit(void)
        } else {
                memblock_free_late(__pa(io_tlb_orig_addr),
                                   PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)));
+               memblock_free_late(__pa(io_tlb_orig_size),
+                                  PAGE_ALIGN(io_tlb_nslabs * sizeof(size_t)));
                memblock_free_late(__pa(io_tlb_list),
                                   PAGE_ALIGN(io_tlb_nslabs * sizeof(int)));
                memblock_free_late(io_tlb_start,
@@ -461,79 +497,71 @@ static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr,
        }
 }
 
-phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
-               size_t mapping_size, size_t alloc_size,
-               enum dma_data_direction dir, unsigned long attrs)
-{
-       dma_addr_t tbl_dma_addr = phys_to_dma_unencrypted(hwdev, io_tlb_start);
-       unsigned long flags;
-       phys_addr_t tlb_addr;
-       unsigned int nslots, stride, index, wrap;
-       int i;
-       unsigned long mask;
-       unsigned long offset_slots;
-       unsigned long max_slots;
-       unsigned long tmp_io_tlb_used;
-
-       if (no_iotlb_memory)
-               panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
-
-       if (mem_encrypt_active())
-               pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+#define slot_addr(start, idx)  ((start) + ((idx) << IO_TLB_SHIFT))
 
-       if (mapping_size > alloc_size) {
-               dev_warn_once(hwdev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
-                             mapping_size, alloc_size);
-               return (phys_addr_t)DMA_MAPPING_ERROR;
-       }
+/*
+ * Return the offset into a iotlb slot required to keep the device happy.
+ */
+static unsigned int swiotlb_align_offset(struct device *dev, u64 addr)
+{
+       return addr & dma_get_min_align_mask(dev) & (IO_TLB_SIZE - 1);
+}
 
-       mask = dma_get_seg_boundary(hwdev);
+/*
+ * Carefully handle integer overflow which can occur when boundary_mask == ~0UL.
+ */
+static inline unsigned long get_max_slots(unsigned long boundary_mask)
+{
+       if (boundary_mask == ~0UL)
+               return 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+       return nr_slots(boundary_mask + 1);
+}
 
-       tbl_dma_addr &= mask;
+static unsigned int wrap_index(unsigned int index)
+{
+       if (index >= io_tlb_nslabs)
+               return 0;
+       return index;
+}
 
-       offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+/*
+ * Find a suitable number of IO TLB entries size that will fit this request and
+ * allocate a buffer from that IO TLB pool.
+ */
+static int find_slots(struct device *dev, phys_addr_t orig_addr,
+               size_t alloc_size)
+{
+       unsigned long boundary_mask = dma_get_seg_boundary(dev);
+       dma_addr_t tbl_dma_addr =
+               phys_to_dma_unencrypted(dev, io_tlb_start) & boundary_mask;
+       unsigned long max_slots = get_max_slots(boundary_mask);
+       unsigned int iotlb_align_mask =
+               dma_get_min_align_mask(dev) & ~(IO_TLB_SIZE - 1);
+       unsigned int nslots = nr_slots(alloc_size), stride;
+       unsigned int index, wrap, count = 0, i;
+       unsigned long flags;
 
-       /*
-        * Carefully handle integer overflow which can occur when mask == ~0UL.
-        */
-       max_slots = mask + 1
-                   ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT
-                   : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT);
+       BUG_ON(!nslots);
 
        /*
-        * For mappings greater than or equal to a page, we limit the stride
-        * (and hence alignment) to a page size.
+        * For mappings with an alignment requirement don't bother looping to
+        * unaligned slots once we found an aligned one.  For allocations of
+        * PAGE_SIZE or larger only look for page aligned allocations.
         */
-       nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
+       stride = (iotlb_align_mask >> IO_TLB_SHIFT) + 1;
        if (alloc_size >= PAGE_SIZE)
-               stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT));
-       else
-               stride = 1;
-
-       BUG_ON(!nslots);
+               stride = max(stride, stride << (PAGE_SHIFT - IO_TLB_SHIFT));
 
-       /*
-        * Find suitable number of IO TLB entries size that will fit this
-        * request and allocate a buffer from that IO TLB pool.
-        */
        spin_lock_irqsave(&io_tlb_lock, flags);
-
        if (unlikely(nslots > io_tlb_nslabs - io_tlb_used))
                goto not_found;
 
-       index = ALIGN(io_tlb_index, stride);
-       if (index >= io_tlb_nslabs)
-               index = 0;
-       wrap = index;
-
+       index = wrap = wrap_index(ALIGN(io_tlb_index, stride));
        do {
-               while (iommu_is_span_boundary(index, nslots, offset_slots,
-                                             max_slots)) {
-                       index += stride;
-                       if (index >= io_tlb_nslabs)
-                               index = 0;
-                       if (index == wrap)
-                               goto not_found;
+               if ((slot_addr(tbl_dma_addr, index) & iotlb_align_mask) !=
+                   (orig_addr & iotlb_align_mask)) {
+                       index = wrap_index(index + 1);
+                       continue;
                }
 
                /*
@@ -541,55 +569,96 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, phys_addr_t orig_addr,
                 * contiguous buffers, we allocate the buffers from that slot
                 * and mark the entries as '0' indicating unavailable.
                 */
-               if (io_tlb_list[index] >= nslots) {
-                       int count = 0;
-
-                       for (i = index; i < (int) (index + nslots); i++)
-                               io_tlb_list[i] = 0;
-                       for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--)
-                               io_tlb_list[i] = ++count;
-                       tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT);
-
-                       /*
-                        * Update the indices to avoid searching in the next
-                        * round.
-                        */
-                       io_tlb_index = ((index + nslots) < io_tlb_nslabs
-                                       ? (index + nslots) : 0);
-
-                       goto found;
+               if (!iommu_is_span_boundary(index, nslots,
+                                           nr_slots(tbl_dma_addr),
+                                           max_slots)) {
+                       if (io_tlb_list[index] >= nslots)
+                               goto found;
                }
-               index += stride;
-               if (index >= io_tlb_nslabs)
-                       index = 0;
+               index = wrap_index(index + stride);
        } while (index != wrap);
 
 not_found:
-       tmp_io_tlb_used = io_tlb_used;
-
        spin_unlock_irqrestore(&io_tlb_lock, flags);
-       if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit())
-               dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
-                        alloc_size, io_tlb_nslabs, tmp_io_tlb_used);
-       return (phys_addr_t)DMA_MAPPING_ERROR;
+       return -1;
+
 found:
+       for (i = index; i < index + nslots; i++)
+               io_tlb_list[i] = 0;
+       for (i = index - 1;
+            io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 &&
+            io_tlb_list[i]; i--)
+               io_tlb_list[i] = ++count;
+
+       /*
+        * Update the indices to avoid searching in the next round.
+        */
+       if (index + nslots < io_tlb_nslabs)
+               io_tlb_index = index + nslots;
+       else
+               io_tlb_index = 0;
        io_tlb_used += nslots;
+
        spin_unlock_irqrestore(&io_tlb_lock, flags);
+       return index;
+}
+
+phys_addr_t swiotlb_tbl_map_single(struct device *dev, phys_addr_t orig_addr,
+               size_t mapping_size, size_t alloc_size,
+               enum dma_data_direction dir, unsigned long attrs)
+{
+       unsigned int offset = swiotlb_align_offset(dev, orig_addr);
+       unsigned int index, i;
+       phys_addr_t tlb_addr;
+
+       if (no_iotlb_memory)
+               panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer");
+
+       if (mem_encrypt_active())
+               pr_warn_once("Memory encryption is active and system is using DMA bounce buffers\n");
+
+       if (mapping_size > alloc_size) {
+               dev_warn_once(dev, "Invalid sizes (mapping: %zd bytes, alloc: %zd bytes)",
+                             mapping_size, alloc_size);
+               return (phys_addr_t)DMA_MAPPING_ERROR;
+       }
+
+       index = find_slots(dev, orig_addr, alloc_size + offset);
+       if (index == -1) {
+               if (!(attrs & DMA_ATTR_NO_WARN))
+                       dev_warn_ratelimited(dev,
+       "swiotlb buffer is full (sz: %zd bytes), total %lu (slots), used %lu (slots)\n",
+                                alloc_size, io_tlb_nslabs, io_tlb_used);
+               return (phys_addr_t)DMA_MAPPING_ERROR;
+       }
 
        /*
         * Save away the mapping from the original address to the DMA address.
         * This is needed when we sync the memory.  Then we sync the buffer if
         * needed.
         */
-       for (i = 0; i < nslots; i++)
-               io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT);
+       for (i = 0; i < nr_slots(alloc_size + offset); i++) {
+               io_tlb_orig_addr[index + i] = slot_addr(orig_addr, i);
+               io_tlb_orig_size[index+i] = alloc_size - (i << IO_TLB_SHIFT);
+       }
+       tlb_addr = slot_addr(io_tlb_start, index) + offset;
        if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) &&
            (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL))
                swiotlb_bounce(orig_addr, tlb_addr, mapping_size, DMA_TO_DEVICE);
-
        return tlb_addr;
 }
 
+static void validate_sync_size_and_truncate(struct device *hwdev, size_t orig_size, size_t *size)
+{
+       if (*size > orig_size) {
+               /* Warn and truncate mapping_size */
+               dev_WARN_ONCE(hwdev, 1,
+                       "Attempt for buffer overflow. Original size: %zu. Mapping size: %zu.\n",
+                       orig_size, *size);
+               *size = orig_size;
+       }
+}
+
 /*
  * tlb_addr is the physical address of the bounce buffer to unmap.
  */
@@ -598,10 +667,13 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
                              enum dma_data_direction dir, unsigned long attrs)
 {
        unsigned long flags;
-       int i, count, nslots = ALIGN(alloc_size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT;
-       int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+       unsigned int offset = swiotlb_align_offset(hwdev, tlb_addr);
+       int i, count, nslots = nr_slots(alloc_size + offset);
+       int index = (tlb_addr - offset - io_tlb_start) >> IO_TLB_SHIFT;
        phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
+       validate_sync_size_and_truncate(hwdev, io_tlb_orig_size[index], &mapping_size);
+
        /*
         * First, sync the memory before unmapping the entry
         */
@@ -617,26 +689,30 @@ void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr,
         * with slots below and above the pool being returned.
         */
        spin_lock_irqsave(&io_tlb_lock, flags);
-       {
-               count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ?
-                        io_tlb_list[index + nslots] : 0);
-               /*
-                * Step 1: return the slots to the free list, merging the
-                * slots with superceeding slots
-                */
-               for (i = index + nslots - 1; i >= index; i--) {
-                       io_tlb_list[i] = ++count;
-                       io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
-               }
-               /*
-                * Step 2: merge the returned slots with the preceding slots,
-                * if available (non zero)
-                */
-               for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--)
-                       io_tlb_list[i] = ++count;
+       if (index + nslots < ALIGN(index + 1, IO_TLB_SEGSIZE))
+               count = io_tlb_list[index + nslots];
+       else
+               count = 0;
 
-               io_tlb_used -= nslots;
+       /*
+        * Step 1: return the slots to the free list, merging the slots with
+        * superceeding slots
+        */
+       for (i = index + nslots - 1; i >= index; i--) {
+               io_tlb_list[i] = ++count;
+               io_tlb_orig_addr[i] = INVALID_PHYS_ADDR;
+               io_tlb_orig_size[i] = 0;
        }
+
+       /*
+        * Step 2: merge the returned slots with the preceding slots, if
+        * available (non zero)
+        */
+       for (i = index - 1;
+            io_tlb_offset(i) != IO_TLB_SEGSIZE - 1 && io_tlb_list[i];
+            i--)
+               io_tlb_list[i] = ++count;
+       io_tlb_used -= nslots;
        spin_unlock_irqrestore(&io_tlb_lock, flags);
 }
 
@@ -645,11 +721,13 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
                             enum dma_sync_target target)
 {
        int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT;
+       size_t orig_size = io_tlb_orig_size[index];
        phys_addr_t orig_addr = io_tlb_orig_addr[index];
 
        if (orig_addr == INVALID_PHYS_ADDR)
                return;
-       orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1);
+
+       validate_sync_size_and_truncate(hwdev, orig_size, &size);
 
        switch (target) {
        case SYNC_FOR_CPU:
@@ -707,7 +785,7 @@ dma_addr_t swiotlb_map(struct device *dev, phys_addr_t paddr, size_t size,
 
 size_t swiotlb_max_mapping_size(struct device *dev)
 {
-       return ((size_t)1 << IO_TLB_SHIFT) * IO_TLB_SEGSIZE;
+       return ((size_t)IO_TLB_SIZE) * IO_TLB_SEGSIZE;
 }
 
 bool is_swiotlb_active(void)
index 129dee5..03db40f 100644 (file)
@@ -269,7 +269,7 @@ static void event_function_call(struct perf_event *event, event_f func, void *da
        if (!event->parent) {
                /*
                 * If this is a !child event, we must hold ctx::mutex to
-                * stabilize the the event->ctx relation. See
+                * stabilize the event->ctx relation. See
                 * perf_event_ctx_lock().
                 */
                lockdep_assert_held(&ctx->mutex);
@@ -386,6 +386,7 @@ static DEFINE_MUTEX(perf_sched_mutex);
 static atomic_t perf_sched_count;
 
 static DEFINE_PER_CPU(atomic_t, perf_cgroup_events);
+static DEFINE_PER_CPU(int, perf_sched_cb_usages);
 static DEFINE_PER_CPU(struct pmu_event_list, pmu_sb_events);
 
 static atomic_t nr_mmap_events __read_mostly;
@@ -1303,7 +1304,7 @@ static void put_ctx(struct perf_event_context *ctx)
  * life-time rules separate them. That is an exiting task cannot fork, and a
  * spawning task cannot (yet) exit.
  *
- * But remember that that these are parent<->child context relations, and
+ * But remember that these are parent<->child context relations, and
  * migration does not affect children, therefore these two orderings should not
  * interact.
  *
@@ -1442,7 +1443,7 @@ static u64 primary_event_id(struct perf_event *event)
 /*
  * Get the perf_event_context for a task and lock it.
  *
- * This has to cope with with the fact that until it is locked,
+ * This has to cope with the fact that until it is locked,
  * the context could get moved to another task.
  */
 static struct perf_event_context *
@@ -2486,7 +2487,7 @@ static void perf_set_shadow_time(struct perf_event *event,
         * But this is a bit hairy.
         *
         * So instead, we have an explicit cgroup call to remain
-        * within the time time source all along. We believe it
+        * within the time source all along. We believe it
         * is cleaner and simpler to understand.
         */
        if (is_cgroup_event(event))
@@ -3461,11 +3462,16 @@ unlock:
        }
 }
 
+static DEFINE_PER_CPU(struct list_head, sched_cb_list);
+
 void perf_sched_cb_dec(struct pmu *pmu)
 {
        struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
-       --cpuctx->sched_cb_usage;
+       this_cpu_dec(perf_sched_cb_usages);
+
+       if (!--cpuctx->sched_cb_usage)
+               list_del(&cpuctx->sched_cb_entry);
 }
 
 
@@ -3473,7 +3479,10 @@ void perf_sched_cb_inc(struct pmu *pmu)
 {
        struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
 
-       cpuctx->sched_cb_usage++;
+       if (!cpuctx->sched_cb_usage++)
+               list_add(&cpuctx->sched_cb_entry, this_cpu_ptr(&sched_cb_list));
+
+       this_cpu_inc(perf_sched_cb_usages);
 }
 
 /*
@@ -3502,6 +3511,24 @@ static void __perf_pmu_sched_task(struct perf_cpu_context *cpuctx, bool sched_in
        perf_ctx_unlock(cpuctx, cpuctx->task_ctx);
 }
 
+static void perf_pmu_sched_task(struct task_struct *prev,
+                               struct task_struct *next,
+                               bool sched_in)
+{
+       struct perf_cpu_context *cpuctx;
+
+       if (prev == next)
+               return;
+
+       list_for_each_entry(cpuctx, this_cpu_ptr(&sched_cb_list), sched_cb_entry) {
+               /* will be handled in perf_event_context_sched_in/out */
+               if (cpuctx->task_ctx)
+                       continue;
+
+               __perf_pmu_sched_task(cpuctx, sched_in);
+       }
+}
+
 static void perf_event_switch(struct task_struct *task,
                              struct task_struct *next_prev, bool sched_in);
 
@@ -3524,6 +3551,9 @@ void __perf_event_task_sched_out(struct task_struct *task,
 {
        int ctxn;
 
+       if (__this_cpu_read(perf_sched_cb_usages))
+               perf_pmu_sched_task(task, next, false);
+
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, next, false);
 
@@ -3832,6 +3862,9 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 
        if (atomic_read(&nr_switch_events))
                perf_event_switch(task, prev, true);
+
+       if (__this_cpu_read(perf_sched_cb_usages))
+               perf_pmu_sched_task(prev, task, true);
 }
 
 static u64 perf_calculate_period(struct perf_event *event, u64 nsec, u64 count)
@@ -4656,7 +4689,7 @@ static void unaccount_event(struct perf_event *event)
        if (event->parent)
                return;
 
-       if (event->attach_state & PERF_ATTACH_TASK)
+       if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
                dec = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_dec(&nr_mmap_events);
@@ -11175,7 +11208,7 @@ static void account_event(struct perf_event *event)
        if (event->parent)
                return;
 
-       if (event->attach_state & PERF_ATTACH_TASK)
+       if (event->attach_state & (PERF_ATTACH_TASK | PERF_ATTACH_SCHED_CB))
                inc = true;
        if (event->attr.mmap || event->attr.mmap_data)
                atomic_inc(&nr_mmap_events);
@@ -12972,6 +13005,7 @@ static void __init perf_event_init_all_cpus(void)
 #ifdef CONFIG_CGROUP_PERF
                INIT_LIST_HEAD(&per_cpu(cgrp_cpuctx_list, cpu));
 #endif
+               INIT_LIST_HEAD(&per_cpu(sched_cb_list, cpu));
        }
 }
 
index 3ea7f8f..6addc97 100644 (file)
@@ -1733,7 +1733,7 @@ void uprobe_free_utask(struct task_struct *t)
 }
 
 /*
- * Allocate a uprobe_task object for the task if if necessary.
+ * Allocate a uprobe_task object for the task if necessary.
  * Called when the thread hits a breakpoint.
  *
  * Returns:
index d66cd10..426cd0c 100644 (file)
@@ -994,6 +994,13 @@ static void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 #endif
 }
 
+static void mm_init_pasid(struct mm_struct *mm)
+{
+#ifdef CONFIG_IOMMU_SUPPORT
+       mm->pasid = INIT_PASID;
+#endif
+}
+
 static void mm_init_uprobes_state(struct mm_struct *mm)
 {
 #ifdef CONFIG_UPROBES
@@ -1024,6 +1031,7 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
        mm_init_cpumask(mm);
        mm_init_aio(mm);
        mm_init_owner(mm, p);
+       mm_init_pasid(mm);
        RCU_INIT_POINTER(mm->exe_file, NULL);
        mmu_notifier_subscriptions_init(mm);
        init_tlb_flush_pending(mm);
@@ -1940,6 +1948,14 @@ static __latent_entropy struct task_struct *copy_process(
        p = dup_task_struct(current, node);
        if (!p)
                goto fork_out;
+       if (args->io_thread) {
+               /*
+                * Mark us an IO worker, and block any signal that isn't
+                * fatal or STOP
+                */
+               p->flags |= PF_IO_WORKER;
+               siginitsetinv(&p->blocked, sigmask(SIGKILL)|sigmask(SIGSTOP));
+       }
 
        /*
         * This _must_ happen before we call free_task(), i.e. before we jump
@@ -2411,6 +2427,28 @@ struct mm_struct *copy_init_mm(void)
 }
 
 /*
+ * This is like kernel_clone(), but shaved down and tailored to just
+ * creating io_uring workers. It returns a created task, or an error pointer.
+ * The returned task is inactive, and the caller must fire it up through
+ * wake_up_new_task(p). All signals are blocked in the created task.
+ */
+struct task_struct *create_io_thread(int (*fn)(void *), void *arg, int node)
+{
+       unsigned long flags = CLONE_FS|CLONE_FILES|CLONE_SIGHAND|CLONE_THREAD|
+                               CLONE_IO;
+       struct kernel_clone_args args = {
+               .flags          = ((lower_32_bits(flags) | CLONE_VM |
+                                   CLONE_UNTRACED) & ~CSIGNAL),
+               .exit_signal    = (lower_32_bits(flags) & CSIGNAL),
+               .stack          = (unsigned long)fn,
+               .stack_size     = (unsigned long)arg,
+               .io_thread      = 1,
+       };
+
+       return copy_process(NULL, 0, node, &args);
+}
+
+/*
  *  Ok, this is the main fork-routine.
  *
  * It copies the process, and if successful kick-starts
index e68db77..00febd6 100644 (file)
@@ -2728,14 +2728,13 @@ retry:
                goto out;
 
        restart = &current->restart_block;
-       restart->fn = futex_wait_restart;
        restart->futex.uaddr = uaddr;
        restart->futex.val = val;
        restart->futex.time = *abs_time;
        restart->futex.bitset = bitset;
        restart->futex.flags = flags | FLAGS_HAS_TIMEOUT;
 
-       ret = -ERESTART_RESTARTBLOCK;
+       ret = set_restart_fn(restart, futex_wait_restart);
 
 out:
        if (to) {
index c94b820..8743150 100644 (file)
@@ -75,7 +75,9 @@ struct gcov_fn_info {
 
        u32 num_counters;
        u64 *counters;
+#if CONFIG_CLANG_VERSION < 110000
        const char *function_name;
+#endif
 };
 
 static struct gcov_info *current_info;
@@ -105,6 +107,7 @@ void llvm_gcov_init(llvm_gcov_callback writeout, llvm_gcov_callback flush)
 }
 EXPORT_SYMBOL(llvm_gcov_init);
 
+#if CONFIG_CLANG_VERSION < 110000
 void llvm_gcda_start_file(const char *orig_filename, const char version[4],
                u32 checksum)
 {
@@ -113,7 +116,17 @@ void llvm_gcda_start_file(const char *orig_filename, const char version[4],
        current_info->checksum = checksum;
 }
 EXPORT_SYMBOL(llvm_gcda_start_file);
+#else
+void llvm_gcda_start_file(const char *orig_filename, u32 version, u32 checksum)
+{
+       current_info->filename = orig_filename;
+       current_info->version = version;
+       current_info->checksum = checksum;
+}
+EXPORT_SYMBOL(llvm_gcda_start_file);
+#endif
 
+#if CONFIG_CLANG_VERSION < 110000
 void llvm_gcda_emit_function(u32 ident, const char *function_name,
                u32 func_checksum, u8 use_extra_checksum, u32 cfg_checksum)
 {
@@ -133,6 +146,24 @@ void llvm_gcda_emit_function(u32 ident, const char *function_name,
        list_add_tail(&info->head, &current_info->functions);
 }
 EXPORT_SYMBOL(llvm_gcda_emit_function);
+#else
+void llvm_gcda_emit_function(u32 ident, u32 func_checksum,
+               u8 use_extra_checksum, u32 cfg_checksum)
+{
+       struct gcov_fn_info *info = kzalloc(sizeof(*info), GFP_KERNEL);
+
+       if (!info)
+               return;
+
+       INIT_LIST_HEAD(&info->head);
+       info->ident = ident;
+       info->checksum = func_checksum;
+       info->use_extra_checksum = use_extra_checksum;
+       info->cfg_checksum = cfg_checksum;
+       list_add_tail(&info->head, &current_info->functions);
+}
+EXPORT_SYMBOL(llvm_gcda_emit_function);
+#endif
 
 void llvm_gcda_emit_arcs(u32 num_counters, u64 *counters)
 {
@@ -295,6 +326,7 @@ void gcov_info_add(struct gcov_info *dst, struct gcov_info *src)
        }
 }
 
+#if CONFIG_CLANG_VERSION < 110000
 static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn)
 {
        size_t cv_size; /* counter values size */
@@ -322,6 +354,28 @@ err_name:
        kfree(fn_dup);
        return NULL;
 }
+#else
+static struct gcov_fn_info *gcov_fn_info_dup(struct gcov_fn_info *fn)
+{
+       size_t cv_size; /* counter values size */
+       struct gcov_fn_info *fn_dup = kmemdup(fn, sizeof(*fn),
+                       GFP_KERNEL);
+       if (!fn_dup)
+               return NULL;
+       INIT_LIST_HEAD(&fn_dup->head);
+
+       cv_size = fn->num_counters * sizeof(fn->counters[0]);
+       fn_dup->counters = vmalloc(cv_size);
+       if (!fn_dup->counters) {
+               kfree(fn_dup);
+               return NULL;
+       }
+
+       memcpy(fn_dup->counters, fn->counters, cv_size);
+
+       return fn_dup;
+}
+#endif
 
 /**
  * gcov_info_dup - duplicate profiling data set
@@ -362,6 +416,7 @@ err:
  * gcov_info_free - release memory for profiling data set duplicate
  * @info: profiling data set duplicate to free
  */
+#if CONFIG_CLANG_VERSION < 110000
 void gcov_info_free(struct gcov_info *info)
 {
        struct gcov_fn_info *fn, *tmp;
@@ -375,6 +430,20 @@ void gcov_info_free(struct gcov_info *info)
        kfree(info->filename);
        kfree(info);
 }
+#else
+void gcov_info_free(struct gcov_info *info)
+{
+       struct gcov_fn_info *fn, *tmp;
+
+       list_for_each_entry_safe(fn, tmp, &info->functions, head) {
+               vfree(fn->counters);
+               list_del(&fn->head);
+               kfree(fn);
+       }
+       kfree(info->filename);
+       kfree(info);
+}
+#endif
 
 #define ITER_STRIDE    PAGE_SIZE
 
index fe7e638..787b381 100644 (file)
 struct group_info *groups_alloc(int gidsetsize)
 {
        struct group_info *gi;
-       unsigned int len;
-
-       len = sizeof(struct group_info) + sizeof(kgid_t) * gidsetsize;
-       gi = kmalloc(len, GFP_KERNEL_ACCOUNT|__GFP_NOWARN|__GFP_NORETRY);
-       if (!gi)
-               gi = __vmalloc(len, GFP_KERNEL_ACCOUNT);
+       gi = kvmalloc(struct_size(gi, gid, gidsetsize), GFP_KERNEL_ACCOUNT);
        if (!gi)
                return NULL;
 
index 4800660..40880c3 100644 (file)
@@ -159,7 +159,7 @@ static const struct irq_domain_ops irq_sim_domain_ops = {
  * irq_domain_create_sim - Create a new interrupt simulator irq_domain and
  *                         allocate a range of dummy interrupts.
  *
- * @fnode:      struct fwnode_handle to be associated with this domain.
+ * @fwnode:     struct fwnode_handle to be associated with this domain.
  * @num_irqs:   Number of interrupts to allocate.
  *
  * On success: return a new irq_domain object.
@@ -228,7 +228,7 @@ static void devm_irq_domain_release_sim(struct device *dev, void *res)
  *                              a managed device.
  *
  * @dev:        Device to initialize the simulator object for.
- * @fnode:      struct fwnode_handle to be associated with this domain.
+ * @fwnode:     struct fwnode_handle to be associated with this domain.
  * @num_irqs:   Number of interrupts to allocate
  *
  * On success: return a new irq_domain object.
index 6aacd34..d10ab1d 100644 (file)
@@ -205,6 +205,7 @@ struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size,
        }
 
        fwnode_handle_get(fwnode);
+       fwnode_dev_initialized(fwnode, true);
 
        /* Fill structure */
        INIT_RADIX_TREE(&domain->revmap_tree, GFP_KERNEL);
@@ -253,6 +254,7 @@ void irq_domain_remove(struct irq_domain *domain)
 
        pr_debug("Removed domain %s\n", domain->name);
 
+       fwnode_dev_initialized(domain->fwnode, false);
        fwnode_handle_put(domain->fwnode);
        if (domain->flags & IRQ_DOMAIN_NAME_ALLOCATED)
                kfree(domain->name);
@@ -1896,16 +1898,15 @@ DEFINE_SHOW_ATTRIBUTE(irq_domain_debug);
 
 static void debugfs_add_domain_dir(struct irq_domain *d)
 {
-       if (!d->name || !domain_dir || d->debugfs_file)
+       if (!d->name || !domain_dir)
                return;
-       d->debugfs_file = debugfs_create_file(d->name, 0444, domain_dir, d,
-                                             &irq_domain_debug_fops);
+       debugfs_create_file(d->name, 0444, domain_dir, d,
+                           &irq_domain_debug_fops);
 }
 
 static void debugfs_remove_domain_dir(struct irq_domain *d)
 {
-       debugfs_remove(d->debugfs_file);
-       d->debugfs_file = NULL;
+       debugfs_remove(debugfs_lookup(d->name, domain_dir));
 }
 
 void __init irq_domain_debugfs_init(struct dentry *root)
index dec3f73..21ea370 100644 (file)
@@ -1142,11 +1142,15 @@ irq_forced_thread_fn(struct irq_desc *desc, struct irqaction *action)
        irqreturn_t ret;
 
        local_bh_disable();
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_disable();
        ret = action->thread_fn(action->irq, action->dev_id);
        if (ret == IRQ_HANDLED)
                atomic_inc(&desc->threads_handled);
 
        irq_finalize_oneshot(desc, action);
+       if (!IS_ENABLED(CONFIG_PREEMPT_RT))
+               local_irq_enable();
        local_bh_enable();
        return ret;
 }
index c6a39d6..ba39fbb 100644 (file)
@@ -407,6 +407,14 @@ static bool jump_label_can_update(struct jump_entry *entry, bool init)
                return false;
 
        if (!kernel_text_address(jump_entry_code(entry))) {
+               /*
+                * This skips patching built-in __exit, which
+                * is part of init_section_contains() but is
+                * not part of kernel_text_address().
+                *
+                * Skipping built-in __exit is fine since it
+                * will never be executed.
+                */
                WARN_ONCE(!jump_entry_is_init(entry),
                          "can't patch jump_label at %pS",
                          (void *)jump_entry_code(entry));
index fe9de06..8043a90 100644 (file)
@@ -177,6 +177,11 @@ unsigned long kallsyms_lookup_name(const char *name)
        return module_kallsyms_lookup_name(name);
 }
 
+#ifdef CONFIG_LIVEPATCH
+/*
+ * Iterate over all symbols in vmlinux.  For symbols from modules use
+ * module_kallsyms_on_each_symbol instead.
+ */
 int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
                                      unsigned long),
                            void *data)
@@ -192,8 +197,9 @@ int kallsyms_on_each_symbol(int (*fn)(void *, const char *, struct module *,
                if (ret != 0)
                        return ret;
        }
-       return module_kallsyms_on_each_symbol(fn, data);
+       return 0;
 }
+#endif /* CONFIG_LIVEPATCH */
 
 static unsigned long get_symbol_pos(unsigned long addr,
                                    unsigned long *symbolsize,
index 39d30cc..48aaf2a 100644 (file)
@@ -13,8 +13,6 @@ void kimage_terminate(struct kimage *image);
 int kimage_is_destination_range(struct kimage *image,
                                unsigned long start, unsigned long end);
 
-int machine_kexec_post_load(struct kimage *image);
-
 extern struct mutex kexec_mutex;
 
 #ifdef CONFIG_KEXEC_FILE
index f76fdb9..335d988 100644 (file)
@@ -19,6 +19,7 @@
 #include <linux/moduleloader.h>
 #include <linux/completion.h>
 #include <linux/memory.h>
+#include <linux/rcupdate.h>
 #include <asm/cacheflush.h>
 #include "core.h"
 #include "patch.h"
@@ -57,7 +58,7 @@ static void klp_find_object_module(struct klp_object *obj)
        if (!klp_is_module(obj))
                return;
 
-       mutex_lock(&module_mutex);
+       rcu_read_lock_sched();
        /*
         * We do not want to block removal of patched modules and therefore
         * we do not take a reference here. The patches are removed by
@@ -74,7 +75,7 @@ static void klp_find_object_module(struct klp_object *obj)
        if (mod && mod->klp_alive)
                obj->mod = mod;
 
-       mutex_unlock(&module_mutex);
+       rcu_read_unlock_sched();
 }
 
 static bool klp_initialized(void)
@@ -163,12 +164,10 @@ static int klp_find_object_symbol(const char *objname, const char *name,
                .pos = sympos,
        };
 
-       mutex_lock(&module_mutex);
        if (objname)
                module_kallsyms_on_each_symbol(klp_find_callback, &args);
        else
                kallsyms_on_each_symbol(klp_find_callback, &args);
-       mutex_unlock(&module_mutex);
 
        /*
         * Ensure an address was found. If sympos is 0, ensure symbol is unique;
index adb9350..622ebdf 100644 (file)
@@ -626,7 +626,7 @@ static inline int mutex_can_spin_on_owner(struct mutex *lock)
  */
 static __always_inline bool
 mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
-                     const bool use_ww_ctx, struct mutex_waiter *waiter)
+                     struct mutex_waiter *waiter)
 {
        if (!waiter) {
                /*
@@ -702,7 +702,7 @@ fail:
 #else
 static __always_inline bool
 mutex_optimistic_spin(struct mutex *lock, struct ww_acquire_ctx *ww_ctx,
-                     const bool use_ww_ctx, struct mutex_waiter *waiter)
+                     struct mutex_waiter *waiter)
 {
        return false;
 }
@@ -922,6 +922,9 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        struct ww_mutex *ww;
        int ret;
 
+       if (!use_ww_ctx)
+               ww_ctx = NULL;
+
        might_sleep();
 
 #ifdef CONFIG_DEBUG_MUTEXES
@@ -929,7 +932,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 #endif
 
        ww = container_of(lock, struct ww_mutex, base);
-       if (use_ww_ctx && ww_ctx) {
+       if (ww_ctx) {
                if (unlikely(ww_ctx == READ_ONCE(ww->ctx)))
                        return -EALREADY;
 
@@ -946,10 +949,10 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
        mutex_acquire_nest(&lock->dep_map, subclass, 0, nest_lock, ip);
 
        if (__mutex_trylock(lock) ||
-           mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, NULL)) {
+           mutex_optimistic_spin(lock, ww_ctx, NULL)) {
                /* got the lock, yay! */
                lock_acquired(&lock->dep_map, ip);
-               if (use_ww_ctx && ww_ctx)
+               if (ww_ctx)
                        ww_mutex_set_context_fastpath(ww, ww_ctx);
                preempt_enable();
                return 0;
@@ -960,7 +963,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
         * After waiting to acquire the wait_lock, try again.
         */
        if (__mutex_trylock(lock)) {
-               if (use_ww_ctx && ww_ctx)
+               if (ww_ctx)
                        __ww_mutex_check_waiters(lock, ww_ctx);
 
                goto skip_wait;
@@ -1013,7 +1016,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                        goto err;
                }
 
-               if (use_ww_ctx && ww_ctx) {
+               if (ww_ctx) {
                        ret = __ww_mutex_check_kill(lock, &waiter, ww_ctx);
                        if (ret)
                                goto err;
@@ -1026,7 +1029,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * ww_mutex needs to always recheck its position since its waiter
                 * list is not FIFO ordered.
                 */
-               if ((use_ww_ctx && ww_ctx) || !first) {
+               if (ww_ctx || !first) {
                        first = __mutex_waiter_is_first(lock, &waiter);
                        if (first)
                                __mutex_set_flag(lock, MUTEX_FLAG_HANDOFF);
@@ -1039,7 +1042,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
                 * or we must see its unlock and acquire.
                 */
                if (__mutex_trylock(lock) ||
-                   (first && mutex_optimistic_spin(lock, ww_ctx, use_ww_ctx, &waiter)))
+                   (first && mutex_optimistic_spin(lock, ww_ctx, &waiter)))
                        break;
 
                spin_lock(&lock->wait_lock);
@@ -1048,7 +1051,7 @@ __mutex_lock_common(struct mutex *lock, long state, unsigned int subclass,
 acquired:
        __set_current_state(TASK_RUNNING);
 
-       if (use_ww_ctx && ww_ctx) {
+       if (ww_ctx) {
                /*
                 * Wound-Wait; we stole the lock (!first_waiter), check the
                 * waiters as anyone might want to wound us.
@@ -1068,7 +1071,7 @@ skip_wait:
        /* got the lock - cleanup and rejoice! */
        lock_acquired(&lock->dep_map, ip);
 
-       if (use_ww_ctx && ww_ctx)
+       if (ww_ctx)
                ww_mutex_lock_acquired(ww, ww_ctx);
 
        spin_unlock(&lock->wait_lock);
index 03b2113..48fff64 100644 (file)
@@ -1420,7 +1420,7 @@ rt_mutex_fasttrylock(struct rt_mutex *lock,
 }
 
 /*
- * Performs the wakeup of the the top-waiter and re-enables preemption.
+ * Performs the wakeup of the top-waiter and re-enables preemption.
  */
 void rt_mutex_postunlock(struct wake_q_head *wake_q)
 {
@@ -1819,7 +1819,7 @@ struct task_struct *rt_mutex_next_owner(struct rt_mutex *lock)
  *                     been started.
  * @waiter:            the pre-initialized rt_mutex_waiter
  *
- * Wait for the the lock acquisition started on our behalf by
+ * Wait for the lock acquisition started on our behalf by
  * rt_mutex_start_proxy_lock(). Upon failure, the caller must call
  * rt_mutex_cleanup_proxy_lock().
  *
index ba67600..abba5df 100644 (file)
@@ -1048,7 +1048,7 @@ rwsem_down_write_slowpath(struct rw_semaphore *sem, int state)
 
                /*
                 * If there were already threads queued before us and:
-                *  1) there are no no active locks, wake the front
+                *  1) there are no active locks, wake the front
                 *     queued process(es) as the handoff bit might be set.
                 *  2) there are no active writers and some readers, the lock
                 *     must be read owned; so we try to wake any read lock
index d9dd94d..9aa855a 100644 (file)
@@ -119,7 +119,7 @@ EXPORT_SYMBOL(down_killable);
  * @sem: the semaphore to be acquired
  *
  * Try to acquire the semaphore atomically.  Returns 0 if the semaphore has
- * been acquired successfully or 1 if it it cannot be acquired.
+ * been acquired successfully or 1 if it cannot be acquired.
  *
  * NOTE: This return value is inverted from both spin_trylock and
  * mutex_trylock!  Be careful about this when converting code.
index 4bf30e4..3047935 100644 (file)
@@ -87,8 +87,7 @@
  * 3) module_addr_min/module_addr_max.
  * (delete and add uses RCU list operations).
  */
-DEFINE_MUTEX(module_mutex);
-EXPORT_SYMBOL_GPL(module_mutex);
+static DEFINE_MUTEX(module_mutex);
 static LIST_HEAD(modules);
 
 /* Work queue for freeing init sections in success case */
@@ -256,11 +255,6 @@ static void mod_update_bounds(struct module *mod)
 struct list_head *kdb_modules = &modules; /* kdb needs the list of modules */
 #endif /* CONFIG_KGDB_KDB */
 
-static void module_assert_mutex(void)
-{
-       lockdep_assert_held(&module_mutex);
-}
-
 static void module_assert_mutex_or_preempt(void)
 {
 #ifdef CONFIG_LOCKDEP
@@ -414,19 +408,8 @@ extern const struct kernel_symbol __start___ksymtab[];
 extern const struct kernel_symbol __stop___ksymtab[];
 extern const struct kernel_symbol __start___ksymtab_gpl[];
 extern const struct kernel_symbol __stop___ksymtab_gpl[];
-extern const struct kernel_symbol __start___ksymtab_gpl_future[];
-extern const struct kernel_symbol __stop___ksymtab_gpl_future[];
 extern const s32 __start___kcrctab[];
 extern const s32 __start___kcrctab_gpl[];
-extern const s32 __start___kcrctab_gpl_future[];
-#ifdef CONFIG_UNUSED_SYMBOLS
-extern const struct kernel_symbol __start___ksymtab_unused[];
-extern const struct kernel_symbol __stop___ksymtab_unused[];
-extern const struct kernel_symbol __start___ksymtab_unused_gpl[];
-extern const struct kernel_symbol __stop___ksymtab_unused_gpl[];
-extern const s32 __start___kcrctab_unused[];
-extern const s32 __start___kcrctab_unused_gpl[];
-#endif
 
 #ifndef CONFIG_MODVERSIONS
 #define symversion(base, idx) NULL
@@ -434,87 +417,14 @@ extern const s32 __start___kcrctab_unused_gpl[];
 #define symversion(base, idx) ((base != NULL) ? ((base) + (idx)) : NULL)
 #endif
 
-static bool each_symbol_in_section(const struct symsearch *arr,
-                                  unsigned int arrsize,
-                                  struct module *owner,
-                                  bool (*fn)(const struct symsearch *syms,
-                                             struct module *owner,
-                                             void *data),
-                                  void *data)
-{
-       unsigned int j;
-
-       for (j = 0; j < arrsize; j++) {
-               if (fn(&arr[j], owner, data))
-                       return true;
-       }
-
-       return false;
-}
-
-/* Returns true as soon as fn returns true, otherwise false. */
-static bool each_symbol_section(bool (*fn)(const struct symsearch *arr,
-                                   struct module *owner,
-                                   void *data),
-                        void *data)
-{
-       struct module *mod;
-       static const struct symsearch arr[] = {
-               { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
-                 NOT_GPL_ONLY, false },
-               { __start___ksymtab_gpl, __stop___ksymtab_gpl,
-                 __start___kcrctab_gpl,
-                 GPL_ONLY, false },
-               { __start___ksymtab_gpl_future, __stop___ksymtab_gpl_future,
-                 __start___kcrctab_gpl_future,
-                 WILL_BE_GPL_ONLY, false },
-#ifdef CONFIG_UNUSED_SYMBOLS
-               { __start___ksymtab_unused, __stop___ksymtab_unused,
-                 __start___kcrctab_unused,
-                 NOT_GPL_ONLY, true },
-               { __start___ksymtab_unused_gpl, __stop___ksymtab_unused_gpl,
-                 __start___kcrctab_unused_gpl,
-                 GPL_ONLY, true },
-#endif
-       };
-
-       module_assert_mutex_or_preempt();
-
-       if (each_symbol_in_section(arr, ARRAY_SIZE(arr), NULL, fn, data))
-               return true;
-
-       list_for_each_entry_rcu(mod, &modules, list,
-                               lockdep_is_held(&module_mutex)) {
-               struct symsearch arr[] = {
-                       { mod->syms, mod->syms + mod->num_syms, mod->crcs,
-                         NOT_GPL_ONLY, false },
-                       { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
-                         mod->gpl_crcs,
-                         GPL_ONLY, false },
-                       { mod->gpl_future_syms,
-                         mod->gpl_future_syms + mod->num_gpl_future_syms,
-                         mod->gpl_future_crcs,
-                         WILL_BE_GPL_ONLY, false },
-#ifdef CONFIG_UNUSED_SYMBOLS
-                       { mod->unused_syms,
-                         mod->unused_syms + mod->num_unused_syms,
-                         mod->unused_crcs,
-                         NOT_GPL_ONLY, true },
-                       { mod->unused_gpl_syms,
-                         mod->unused_gpl_syms + mod->num_unused_gpl_syms,
-                         mod->unused_gpl_crcs,
-                         GPL_ONLY, true },
-#endif
-               };
-
-               if (mod->state == MODULE_STATE_UNFORMED)
-                       continue;
-
-               if (each_symbol_in_section(arr, ARRAY_SIZE(arr), mod, fn, data))
-                       return true;
-       }
-       return false;
-}
+struct symsearch {
+       const struct kernel_symbol *start, *stop;
+       const s32 *crcs;
+       enum mod_license {
+               NOT_GPL_ONLY,
+               GPL_ONLY,
+       } license;
+};
 
 struct find_symbol_arg {
        /* Input */
@@ -535,28 +445,8 @@ static bool check_exported_symbol(const struct symsearch *syms,
 {
        struct find_symbol_arg *fsa = data;
 
-       if (!fsa->gplok) {
-               if (syms->license == GPL_ONLY)
-                       return false;
-               if (syms->license == WILL_BE_GPL_ONLY && fsa->warn) {
-                       pr_warn("Symbol %s is being used by a non-GPL module, "
-                               "which will not be allowed in the future\n",
-                               fsa->name);
-               }
-       }
-
-#ifdef CONFIG_UNUSED_SYMBOLS
-       if (syms->unused && fsa->warn) {
-               pr_warn("Symbol %s is marked as UNUSED, however this module is "
-                       "using it.\n", fsa->name);
-               pr_warn("This symbol will go away in the future.\n");
-               pr_warn("Please evaluate if this is the right api to use and "
-                       "if it really is, submit a report to the linux kernel "
-                       "mailing list together with submitting your code for "
-                       "inclusion.\n");
-       }
-#endif
-
+       if (!fsa->gplok && syms->license == GPL_ONLY)
+               return false;
        fsa->owner = owner;
        fsa->crc = symversion(syms->crcs, symnum);
        fsa->sym = &syms->start[symnum];
@@ -619,31 +509,44 @@ static bool find_exported_symbol_in_section(const struct symsearch *syms,
  * Find an exported symbol and return it, along with, (optional) crc and
  * (optional) module which owns it.  Needs preempt disabled or module_mutex.
  */
-static const struct kernel_symbol *find_symbol(const char *name,
-                                       struct module **owner,
-                                       const s32 **crc,
-                                       enum mod_license *license,
-                                       bool gplok,
-                                       bool warn)
-{
-       struct find_symbol_arg fsa;
-
-       fsa.name = name;
-       fsa.gplok = gplok;
-       fsa.warn = warn;
-
-       if (each_symbol_section(find_exported_symbol_in_section, &fsa)) {
-               if (owner)
-                       *owner = fsa.owner;
-               if (crc)
-                       *crc = fsa.crc;
-               if (license)
-                       *license = fsa.license;
-               return fsa.sym;
+static bool find_symbol(struct find_symbol_arg *fsa)
+{
+       static const struct symsearch arr[] = {
+               { __start___ksymtab, __stop___ksymtab, __start___kcrctab,
+                 NOT_GPL_ONLY },
+               { __start___ksymtab_gpl, __stop___ksymtab_gpl,
+                 __start___kcrctab_gpl,
+                 GPL_ONLY },
+       };
+       struct module *mod;
+       unsigned int i;
+
+       module_assert_mutex_or_preempt();
+
+       for (i = 0; i < ARRAY_SIZE(arr); i++)
+               if (find_exported_symbol_in_section(&arr[i], NULL, fsa))
+                       return true;
+
+       list_for_each_entry_rcu(mod, &modules, list,
+                               lockdep_is_held(&module_mutex)) {
+               struct symsearch arr[] = {
+                       { mod->syms, mod->syms + mod->num_syms, mod->crcs,
+                         NOT_GPL_ONLY },
+                       { mod->gpl_syms, mod->gpl_syms + mod->num_gpl_syms,
+                         mod->gpl_crcs,
+                         GPL_ONLY },
+               };
+
+               if (mod->state == MODULE_STATE_UNFORMED)
+                       continue;
+
+               for (i = 0; i < ARRAY_SIZE(arr); i++)
+                       if (find_exported_symbol_in_section(&arr[i], mod, fsa))
+                               return true;
        }
 
-       pr_debug("Failed to find symbol %s\n", name);
-       return NULL;
+       pr_debug("Failed to find symbol %s\n", fsa->name);
+       return false;
 }
 
 /*
@@ -669,10 +572,8 @@ static struct module *find_module_all(const char *name, size_t len,
 
 struct module *find_module(const char *name)
 {
-       module_assert_mutex();
        return find_module_all(name, strlen(name), false);
 }
-EXPORT_SYMBOL_GPL(find_module);
 
 #ifdef CONFIG_SMP
 
@@ -1107,12 +1008,15 @@ static inline void print_unload_info(struct seq_file *m, struct module *mod)
 
 void __symbol_put(const char *symbol)
 {
-       struct module *owner;
+       struct find_symbol_arg fsa = {
+               .name   = symbol,
+               .gplok  = true,
+       };
 
        preempt_disable();
-       if (!find_symbol(symbol, &owner, NULL, NULL, true, false))
+       if (!find_symbol(&fsa))
                BUG();
-       module_put(owner);
+       module_put(fsa.owner);
        preempt_enable();
 }
 EXPORT_SYMBOL(__symbol_put);
@@ -1381,19 +1285,22 @@ bad_version:
 static inline int check_modstruct_version(const struct load_info *info,
                                          struct module *mod)
 {
-       const s32 *crc;
+       struct find_symbol_arg fsa = {
+               .name   = "module_layout",
+               .gplok  = true,
+       };
 
        /*
         * Since this should be found in kernel (which can't be removed), no
         * locking is necessary -- use preempt_disable() to placate lockdep.
         */
        preempt_disable();
-       if (!find_symbol("module_layout", NULL, &crc, NULL, true, false)) {
+       if (!find_symbol(&fsa)) {
                preempt_enable();
                BUG();
        }
        preempt_enable();
-       return check_version(info, "module_layout", mod, crc);
+       return check_version(info, "module_layout", mod, fsa.crc);
 }
 
 /* First part is kernel version, which we ignore if module has crcs. */
@@ -1487,10 +1394,11 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
                                                  const char *name,
                                                  char ownername[])
 {
-       struct module *owner;
-       const struct kernel_symbol *sym;
-       const s32 *crc;
-       enum mod_license license;
+       struct find_symbol_arg fsa = {
+               .name   = name,
+               .gplok  = !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)),
+               .warn   = true,
+       };
        int err;
 
        /*
@@ -1500,42 +1408,40 @@ static const struct kernel_symbol *resolve_symbol(struct module *mod,
         */
        sched_annotate_sleep();
        mutex_lock(&module_mutex);
-       sym = find_symbol(name, &owner, &crc, &license,
-                         !(mod->taints & (1 << TAINT_PROPRIETARY_MODULE)), true);
-       if (!sym)
+       if (!find_symbol(&fsa))
                goto unlock;
 
-       if (license == GPL_ONLY)
+       if (fsa.license == GPL_ONLY)
                mod->using_gplonly_symbols = true;
 
-       if (!inherit_taint(mod, owner)) {
-               sym = NULL;
+       if (!inherit_taint(mod, fsa.owner)) {
+               fsa.sym = NULL;
                goto getname;
        }
 
-       if (!check_version(info, name, mod, crc)) {
-               sym = ERR_PTR(-EINVAL);
+       if (!check_version(info, name, mod, fsa.crc)) {
+               fsa.sym = ERR_PTR(-EINVAL);
                goto getname;
        }
 
-       err = verify_namespace_is_imported(info, sym, mod);
+       err = verify_namespace_is_imported(info, fsa.sym, mod);
        if (err) {
-               sym = ERR_PTR(err);
+               fsa.sym = ERR_PTR(err);
                goto getname;
        }
 
-       err = ref_module(mod, owner);
+       err = ref_module(mod, fsa.owner);
        if (err) {
-               sym = ERR_PTR(err);
+               fsa.sym = ERR_PTR(err);
                goto getname;
        }
 
 getname:
        /* We must make copy under the lock if we failed to get ref. */
-       strncpy(ownername, module_name(owner), MODULE_NAME_LEN);
+       strncpy(ownername, module_name(fsa.owner), MODULE_NAME_LEN);
 unlock:
        mutex_unlock(&module_mutex);
-       return sym;
+       return fsa.sym;
 }
 
 static const struct kernel_symbol *
@@ -2296,16 +2202,19 @@ static void free_module(struct module *mod)
 
 void *__symbol_get(const char *symbol)
 {
-       struct module *owner;
-       const struct kernel_symbol *sym;
+       struct find_symbol_arg fsa = {
+               .name   = symbol,
+               .gplok  = true,
+               .warn   = true,
+       };
 
        preempt_disable();
-       sym = find_symbol(symbol, &owner, NULL, NULL, true, true);
-       if (sym && strong_try_module_get(owner))
-               sym = NULL;
+       if (!find_symbol(&fsa) || strong_try_module_get(fsa.owner)) {
+               preempt_enable();
+               return NULL;
+       }
        preempt_enable();
-
-       return sym ? (void *)kernel_symbol_value(sym) : NULL;
+       return (void *)kernel_symbol_value(fsa.sym);
 }
 EXPORT_SYMBOL_GPL(__symbol_get);
 
@@ -2318,7 +2227,6 @@ EXPORT_SYMBOL_GPL(__symbol_get);
 static int verify_exported_symbols(struct module *mod)
 {
        unsigned int i;
-       struct module *owner;
        const struct kernel_symbol *s;
        struct {
                const struct kernel_symbol *sym;
@@ -2326,21 +2234,19 @@ static int verify_exported_symbols(struct module *mod)
        } arr[] = {
                { mod->syms, mod->num_syms },
                { mod->gpl_syms, mod->num_gpl_syms },
-               { mod->gpl_future_syms, mod->num_gpl_future_syms },
-#ifdef CONFIG_UNUSED_SYMBOLS
-               { mod->unused_syms, mod->num_unused_syms },
-               { mod->unused_gpl_syms, mod->num_unused_gpl_syms },
-#endif
        };
 
        for (i = 0; i < ARRAY_SIZE(arr); i++) {
                for (s = arr[i].sym; s < arr[i].sym + arr[i].num; s++) {
-                       if (find_symbol(kernel_symbol_name(s), &owner, NULL,
-                                       NULL, true, false)) {
+                       struct find_symbol_arg fsa = {
+                               .name   = kernel_symbol_name(s),
+                               .gplok  = true,
+                       };
+                       if (find_symbol(&fsa)) {
                                pr_err("%s: exports duplicate symbol %s"
                                       " (owned by %s)\n",
                                       mod->name, kernel_symbol_name(s),
-                                      module_name(owner));
+                                      module_name(fsa.owner));
                                return -ENOEXEC;
                        }
                }
@@ -2348,6 +2254,21 @@ static int verify_exported_symbols(struct module *mod)
        return 0;
 }
 
+static bool ignore_undef_symbol(Elf_Half emachine, const char *name)
+{
+       /*
+        * On x86, PIC code and Clang non-PIC code may have call foo@PLT. GNU as
+        * before 2.37 produces an unreferenced _GLOBAL_OFFSET_TABLE_ on x86-64.
+        * i386 has a similar problem but may not deserve a fix.
+        *
+        * If we ever have to ignore many symbols, consider refactoring the code to
+        * only warn if referenced by a relocation.
+        */
+       if (emachine == EM_386 || emachine == EM_X86_64)
+               return !strcmp(name, "_GLOBAL_OFFSET_TABLE_");
+       return false;
+}
+
 /* Change all symbols so that st_value encodes the pointer directly. */
 static int simplify_symbols(struct module *mod, const struct load_info *info)
 {
@@ -2395,8 +2316,10 @@ static int simplify_symbols(struct module *mod, const struct load_info *info)
                                break;
                        }
 
-                       /* Ok if weak.  */
-                       if (!ksym && ELF_ST_BIND(sym[i].st_info) == STB_WEAK)
+                       /* Ok if weak or ignored.  */
+                       if (!ksym &&
+                           (ELF_ST_BIND(sym[i].st_info) == STB_WEAK ||
+                            ignore_undef_symbol(info->hdr->e_machine, name)))
                                break;
 
                        ret = PTR_ERR(ksym) ?: -ENOENT;
@@ -2964,7 +2887,7 @@ static int module_sig_check(struct load_info *info, int flags)
        }
 
        if (is_module_sig_enforced()) {
-               pr_notice("%s: loading of %s is rejected\n", info->name, reason);
+               pr_notice("Loading of %s is rejected\n", reason);
                return -EKEYREJECTED;
        }
 
@@ -2977,9 +2900,33 @@ static int module_sig_check(struct load_info *info, int flags)
 }
 #endif /* !CONFIG_MODULE_SIG */
 
-/* Sanity checks against invalid binaries, wrong arch, weird elf version. */
-static int elf_header_check(struct load_info *info)
+static int validate_section_offset(struct load_info *info, Elf_Shdr *shdr)
+{
+       unsigned long secend;
+
+       /*
+        * Check for both overflow and offset/size being
+        * too large.
+        */
+       secend = shdr->sh_offset + shdr->sh_size;
+       if (secend < shdr->sh_offset || secend > info->len)
+               return -ENOEXEC;
+
+       return 0;
+}
+
+/*
+ * Sanity checks against invalid binaries, wrong arch, weird elf version.
+ *
+ * Also do basic validity checks against section offsets and sizes, the
+ * section name string table, and the indices used for it (sh_name).
+ */
+static int elf_validity_check(struct load_info *info)
 {
+       unsigned int i;
+       Elf_Shdr *shdr, *strhdr;
+       int err;
+
        if (info->len < sizeof(*(info->hdr)))
                return -ENOEXEC;
 
@@ -2989,11 +2936,78 @@ static int elf_header_check(struct load_info *info)
            || info->hdr->e_shentsize != sizeof(Elf_Shdr))
                return -ENOEXEC;
 
+       /*
+        * e_shnum is 16 bits, and sizeof(Elf_Shdr) is
+        * known and small. So e_shnum * sizeof(Elf_Shdr)
+        * will not overflow unsigned long on any platform.
+        */
        if (info->hdr->e_shoff >= info->len
            || (info->hdr->e_shnum * sizeof(Elf_Shdr) >
                info->len - info->hdr->e_shoff))
                return -ENOEXEC;
 
+       info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
+
+       /*
+        * Verify if the section name table index is valid.
+        */
+       if (info->hdr->e_shstrndx == SHN_UNDEF
+           || info->hdr->e_shstrndx >= info->hdr->e_shnum)
+               return -ENOEXEC;
+
+       strhdr = &info->sechdrs[info->hdr->e_shstrndx];
+       err = validate_section_offset(info, strhdr);
+       if (err < 0)
+               return err;
+
+       /*
+        * The section name table must be NUL-terminated, as required
+        * by the spec. This makes strcmp and pr_* calls that access
+        * strings in the section safe.
+        */
+       info->secstrings = (void *)info->hdr + strhdr->sh_offset;
+       if (info->secstrings[strhdr->sh_size - 1] != '\0')
+               return -ENOEXEC;
+
+       /*
+        * The code assumes that section 0 has a length of zero and
+        * an addr of zero, so check for it.
+        */
+       if (info->sechdrs[0].sh_type != SHT_NULL
+           || info->sechdrs[0].sh_size != 0
+           || info->sechdrs[0].sh_addr != 0)
+               return -ENOEXEC;
+
+       for (i = 1; i < info->hdr->e_shnum; i++) {
+               shdr = &info->sechdrs[i];
+               switch (shdr->sh_type) {
+               case SHT_NULL:
+               case SHT_NOBITS:
+                       continue;
+               case SHT_SYMTAB:
+                       if (shdr->sh_link == SHN_UNDEF
+                           || shdr->sh_link >= info->hdr->e_shnum)
+                               return -ENOEXEC;
+                       fallthrough;
+               default:
+                       err = validate_section_offset(info, shdr);
+                       if (err < 0) {
+                               pr_err("Invalid ELF section in module (section %u type %u)\n",
+                                       i, shdr->sh_type);
+                               return err;
+                       }
+
+                       if (shdr->sh_flags & SHF_ALLOC) {
+                               if (shdr->sh_name >= strhdr->sh_size) {
+                                       pr_err("Invalid ELF section name in module (section %u type %u)\n",
+                                              i, shdr->sh_type);
+                                       return -ENOEXEC;
+                               }
+                       }
+                       break;
+               }
+       }
+
        return 0;
 }
 
@@ -3095,11 +3109,6 @@ static int rewrite_section_headers(struct load_info *info, int flags)
 
        for (i = 1; i < info->hdr->e_shnum; i++) {
                Elf_Shdr *shdr = &info->sechdrs[i];
-               if (shdr->sh_type != SHT_NOBITS
-                   && info->len < shdr->sh_offset + shdr->sh_size) {
-                       pr_err("Module len %lu truncated\n", info->len);
-                       return -ENOEXEC;
-               }
 
                /*
                 * Mark all sections sh_addr with their address in the
@@ -3133,11 +3142,6 @@ static int setup_load_info(struct load_info *info, int flags)
 {
        unsigned int i;
 
-       /* Set up the convenience variables */
-       info->sechdrs = (void *)info->hdr + info->hdr->e_shoff;
-       info->secstrings = (void *)info->hdr
-               + info->sechdrs[info->hdr->e_shstrndx].sh_offset;
-
        /* Try to find a name early so we can log errors with a module name */
        info->index.info = find_sec(info, ".modinfo");
        if (info->index.info)
@@ -3241,22 +3245,7 @@ static int find_module_sections(struct module *mod, struct load_info *info)
                                     sizeof(*mod->gpl_syms),
                                     &mod->num_gpl_syms);
        mod->gpl_crcs = section_addr(info, "__kcrctab_gpl");
-       mod->gpl_future_syms = section_objs(info,
-                                           "__ksymtab_gpl_future",
-                                           sizeof(*mod->gpl_future_syms),
-                                           &mod->num_gpl_future_syms);
-       mod->gpl_future_crcs = section_addr(info, "__kcrctab_gpl_future");
-
-#ifdef CONFIG_UNUSED_SYMBOLS
-       mod->unused_syms = section_objs(info, "__ksymtab_unused",
-                                       sizeof(*mod->unused_syms),
-                                       &mod->num_unused_syms);
-       mod->unused_crcs = section_addr(info, "__kcrctab_unused");
-       mod->unused_gpl_syms = section_objs(info, "__ksymtab_unused_gpl",
-                                           sizeof(*mod->unused_gpl_syms),
-                                           &mod->num_unused_gpl_syms);
-       mod->unused_gpl_crcs = section_addr(info, "__kcrctab_unused_gpl");
-#endif
+
 #ifdef CONFIG_CONSTRUCTORS
        mod->ctors = section_objs(info, ".ctors",
                                  sizeof(*mod->ctors), &mod->num_ctors);
@@ -3437,14 +3426,8 @@ static int check_module_license_and_versions(struct module *mod)
                pr_warn("%s: module license taints kernel.\n", mod->name);
 
 #ifdef CONFIG_MODVERSIONS
-       if ((mod->num_syms && !mod->crcs)
-           || (mod->num_gpl_syms && !mod->gpl_crcs)
-           || (mod->num_gpl_future_syms && !mod->gpl_future_crcs)
-#ifdef CONFIG_UNUSED_SYMBOLS
-           || (mod->num_unused_syms && !mod->unused_crcs)
-           || (mod->num_unused_gpl_syms && !mod->unused_gpl_crcs)
-#endif
-               ) {
+       if ((mod->num_syms && !mod->crcs) ||
+           (mod->num_gpl_syms && !mod->gpl_crcs)) {
                return try_to_force_load(mod,
                                         "no versions for exported symbols");
        }
@@ -3894,26 +3877,50 @@ static int load_module(struct load_info *info, const char __user *uargs,
        long err = 0;
        char *after_dashes;
 
-       err = elf_header_check(info);
+       /*
+        * Do the signature check (if any) first. All that
+        * the signature check needs is info->len, it does
+        * not need any of the section info. That can be
+        * set up later. This will minimize the chances
+        * of a corrupt module causing problems before
+        * we even get to the signature check.
+        *
+        * The check will also adjust info->len by stripping
+        * off the sig length at the end of the module, making
+        * checks against info->len more correct.
+        */
+       err = module_sig_check(info, flags);
+       if (err)
+               goto free_copy;
+
+       /*
+        * Do basic sanity checks against the ELF header and
+        * sections.
+        */
+       err = elf_validity_check(info);
        if (err) {
-               pr_err("Module has invalid ELF header\n");
+               pr_err("Module has invalid ELF structures\n");
                goto free_copy;
        }
 
+       /*
+        * Everything checks out, so set up the section info
+        * in the info structure.
+        */
        err = setup_load_info(info, flags);
        if (err)
                goto free_copy;
 
+       /*
+        * Now that we know we have the correct module name, check
+        * if it's blacklisted.
+        */
        if (blacklisted(info->name)) {
                err = -EPERM;
                pr_err("Module %s is blacklisted\n", info->name);
                goto free_copy;
        }
 
-       err = module_sig_check(info, flags);
-       if (err)
-               goto free_copy;
-
        err = rewrite_section_headers(info, flags);
        if (err)
                goto free_copy;
@@ -4374,16 +4381,16 @@ unsigned long module_kallsyms_lookup_name(const char *name)
        return ret;
 }
 
+#ifdef CONFIG_LIVEPATCH
 int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
                                             struct module *, unsigned long),
                                   void *data)
 {
        struct module *mod;
        unsigned int i;
-       int ret;
-
-       module_assert_mutex();
+       int ret = 0;
 
+       mutex_lock(&module_mutex);
        list_for_each_entry(mod, &modules, list) {
                /* We hold module_mutex: no need for rcu_dereference_sched */
                struct mod_kallsyms *kallsyms = mod->kallsyms;
@@ -4399,11 +4406,13 @@ int module_kallsyms_on_each_symbol(int (*fn)(void *, const char *,
                        ret = fn(data, kallsyms_symbol_name(kallsyms, i),
                                 mod, kallsyms_symbol_value(sym));
                        if (ret != 0)
-                               return ret;
+                               break;
                }
        }
-       return 0;
+       mutex_unlock(&module_mutex);
+       return ret;
 }
+#endif /* CONFIG_LIVEPATCH */
 #endif /* CONFIG_KALLSYMS */
 
 /* Maximum number of characters written by module_flags() */
index 4224a10..00132d1 100644 (file)
@@ -25,7 +25,7 @@ int mod_check_sig(const struct module_signature *ms, size_t file_len,
                return -EBADMSG;
 
        if (ms->id_type != PKEY_ID_PKCS7) {
-               pr_err("%s: Module is not signed with expected PKCS#7 message\n",
+               pr_err("%s: not signed with expected PKCS#7 message\n",
                       name);
                return -ENOPKG;
        }
index 9d9fc67..8723ae7 100644 (file)
@@ -30,7 +30,7 @@ int mod_verify_sig(const void *mod, struct load_info *info)
 
        memcpy(&ms, mod + (modlen - sizeof(ms)), sizeof(ms));
 
-       ret = mod_check_sig(&ms, modlen, info->name);
+       ret = mod_check_sig(&ms, modlen, "module");
        if (ret)
                return ret;
 
index 1358fa4..0f4530b 100644 (file)
@@ -98,7 +98,7 @@ static int __init em_debug_init(void)
 
        return 0;
 }
-core_initcall(em_debug_init);
+fs_initcall(em_debug_init);
 #else /* CONFIG_DEBUG_FS */
 static void em_debug_create_pd(struct device *dev) {}
 static void em_debug_remove_pd(struct device *dev) {}
index eb1b158..a6ad5eb 100644 (file)
@@ -244,8 +244,6 @@ void migrate_to_reboot_cpu(void)
 void kernel_restart(char *cmd)
 {
        kernel_restart_prepare(cmd);
-       if (pm_power_off_prepare)
-               pm_power_off_prepare();
        migrate_to_reboot_cpu();
        syscore_shutdown();
        if (!cmd)
index ca2bb62..9819121 100644 (file)
@@ -1862,8 +1862,13 @@ struct migration_arg {
        struct set_affinity_pending     *pending;
 };
 
+/*
+ * @refs: number of wait_for_completion()
+ * @stop_pending: is @stop_work in use
+ */
 struct set_affinity_pending {
        refcount_t              refs;
+       unsigned int            stop_pending;
        struct completion       done;
        struct cpu_stop_work    stop_work;
        struct migration_arg    arg;
@@ -1898,8 +1903,8 @@ static struct rq *__migrate_task(struct rq *rq, struct rq_flags *rf,
  */
 static int migration_cpu_stop(void *data)
 {
-       struct set_affinity_pending *pending;
        struct migration_arg *arg = data;
+       struct set_affinity_pending *pending = arg->pending;
        struct task_struct *p = arg->task;
        int dest_cpu = arg->dest_cpu;
        struct rq *rq = this_rq();
@@ -1921,7 +1926,6 @@ static int migration_cpu_stop(void *data)
        raw_spin_lock(&p->pi_lock);
        rq_lock(rq, &rf);
 
-       pending = p->migration_pending;
        /*
         * If task_rq(p) != rq, it cannot be migrated here, because we're
         * holding rq->lock, if p->on_rq == 0 it cannot get enqueued because
@@ -1932,21 +1936,14 @@ static int migration_cpu_stop(void *data)
                        goto out;
 
                if (pending) {
-                       p->migration_pending = NULL;
+                       if (p->migration_pending == pending)
+                               p->migration_pending = NULL;
                        complete = true;
                }
 
-               /* migrate_enable() --  we must not race against SCA */
                if (dest_cpu < 0) {
-                       /*
-                        * When this was migrate_enable() but we no longer
-                        * have a @pending, a concurrent SCA 'fixed' things
-                        * and we should be valid again. Nothing to do.
-                        */
-                       if (!pending) {
-                               WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
+                       if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask))
                                goto out;
-                       }
 
                        dest_cpu = cpumask_any_distribute(&p->cpus_mask);
                }
@@ -1956,7 +1953,14 @@ static int migration_cpu_stop(void *data)
                else
                        p->wake_cpu = dest_cpu;
 
-       } else if (dest_cpu < 0 || pending) {
+               /*
+                * XXX __migrate_task() can fail, at which point we might end
+                * up running on a dodgy CPU, AFAICT this can only happen
+                * during CPU hotplug, at which point we'll get pushed out
+                * anyway, so it's probably not a big deal.
+                */
+
+       } else if (pending) {
                /*
                 * This happens when we get migrated between migrate_enable()'s
                 * preempt_enable() and scheduling the stopper task. At that
@@ -1971,43 +1975,32 @@ static int migration_cpu_stop(void *data)
                 * ->pi_lock, so the allowed mask is stable - if it got
                 * somewhere allowed, we're done.
                 */
-               if (pending && cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
-                       p->migration_pending = NULL;
+               if (cpumask_test_cpu(task_cpu(p), p->cpus_ptr)) {
+                       if (p->migration_pending == pending)
+                               p->migration_pending = NULL;
                        complete = true;
                        goto out;
                }
 
                /*
-                * When this was migrate_enable() but we no longer have an
-                * @pending, a concurrent SCA 'fixed' things and we should be
-                * valid again. Nothing to do.
-                */
-               if (!pending) {
-                       WARN_ON_ONCE(!cpumask_test_cpu(task_cpu(p), &p->cpus_mask));
-                       goto out;
-               }
-
-               /*
                 * When migrate_enable() hits a rq mis-match we can't reliably
                 * determine is_migration_disabled() and so have to chase after
                 * it.
                 */
+               WARN_ON_ONCE(!pending->stop_pending);
                task_rq_unlock(rq, p, &rf);
                stop_one_cpu_nowait(task_cpu(p), migration_cpu_stop,
                                    &pending->arg, &pending->stop_work);
                return 0;
        }
 out:
+       if (pending)
+               pending->stop_pending = false;
        task_rq_unlock(rq, p, &rf);
 
        if (complete)
                complete_all(&pending->done);
 
-       /* For pending->{arg,stop_work} */
-       pending = arg->pending;
-       if (pending && refcount_dec_and_test(&pending->refs))
-               wake_up_var(&pending->refs);
-
        return 0;
 }
 
@@ -2194,11 +2187,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                            int dest_cpu, unsigned int flags)
 {
        struct set_affinity_pending my_pending = { }, *pending = NULL;
-       struct migration_arg arg = {
-               .task = p,
-               .dest_cpu = dest_cpu,
-       };
-       bool complete = false;
+       bool stop_pending, complete = false;
 
        /* Can the task run on the task's current CPU? If so, we're done */
        if (cpumask_test_cpu(task_cpu(p), &p->cpus_mask)) {
@@ -2210,12 +2199,16 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                        push_task = get_task_struct(p);
                }
 
+               /*
+                * If there are pending waiters, but no pending stop_work,
+                * then complete now.
+                */
                pending = p->migration_pending;
-               if (pending) {
-                       refcount_inc(&pending->refs);
+               if (pending && !pending->stop_pending) {
                        p->migration_pending = NULL;
                        complete = true;
                }
+
                task_rq_unlock(rq, p, rf);
 
                if (push_task) {
@@ -2224,7 +2217,7 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                }
 
                if (complete)
-                       goto do_complete;
+                       complete_all(&pending->done);
 
                return 0;
        }
@@ -2235,6 +2228,12 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                        /* Install the request */
                        refcount_set(&my_pending.refs, 1);
                        init_completion(&my_pending.done);
+                       my_pending.arg = (struct migration_arg) {
+                               .task = p,
+                               .dest_cpu = -1,         /* any */
+                               .pending = &my_pending,
+                       };
+
                        p->migration_pending = &my_pending;
                } else {
                        pending = p->migration_pending;
@@ -2259,45 +2258,41 @@ static int affine_move_task(struct rq *rq, struct task_struct *p, struct rq_flag
                return -EINVAL;
        }
 
-       if (flags & SCA_MIGRATE_ENABLE) {
-
-               refcount_inc(&pending->refs); /* pending->{arg,stop_work} */
-               p->migration_flags &= ~MDF_PUSH;
-               task_rq_unlock(rq, p, rf);
-
-               pending->arg = (struct migration_arg) {
-                       .task = p,
-                       .dest_cpu = -1,
-                       .pending = pending,
-               };
-
-               stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
-                                   &pending->arg, &pending->stop_work);
-
-               return 0;
-       }
-
        if (task_running(rq, p) || p->state == TASK_WAKING) {
                /*
-                * Lessen races (and headaches) by delegating
-                * is_migration_disabled(p) checks to the stopper, which will
-                * run on the same CPU as said p.
+                * MIGRATE_ENABLE gets here because 'p == current', but for
+                * anything else we cannot do is_migration_disabled(), punt
+                * and have the stopper function handle it all race-free.
                 */
+               stop_pending = pending->stop_pending;
+               if (!stop_pending)
+                       pending->stop_pending = true;
+
+               if (flags & SCA_MIGRATE_ENABLE)
+                       p->migration_flags &= ~MDF_PUSH;
+
                task_rq_unlock(rq, p, rf);
-               stop_one_cpu(cpu_of(rq), migration_cpu_stop, &arg);
 
+               if (!stop_pending) {
+                       stop_one_cpu_nowait(cpu_of(rq), migration_cpu_stop,
+                                           &pending->arg, &pending->stop_work);
+               }
+
+               if (flags & SCA_MIGRATE_ENABLE)
+                       return 0;
        } else {
 
                if (!is_migration_disabled(p)) {
                        if (task_on_rq_queued(p))
                                rq = move_queued_task(rq, rf, p, dest_cpu);
 
-                       p->migration_pending = NULL;
-                       complete = true;
+                       if (!pending->stop_pending) {
+                               p->migration_pending = NULL;
+                               complete = true;
+                       }
                }
                task_rq_unlock(rq, p, rf);
 
-do_complete:
                if (complete)
                        complete_all(&pending->done);
        }
@@ -2305,7 +2300,7 @@ do_complete:
        wait_for_completion(&pending->done);
 
        if (refcount_dec_and_test(&pending->refs))
-               wake_up_var(&pending->refs);
+               wake_up_var(&pending->refs); /* No UaF, just an address */
 
        /*
         * Block the original owner of &pending until all subsequent callers
@@ -2313,6 +2308,9 @@ do_complete:
         */
        wait_var_event(&my_pending.refs, !refcount_read(&my_pending.refs));
 
+       /* ARGH */
+       WARN_ON_ONCE(my_pending.stop_pending);
+
        return 0;
 }
 
index 41e498b..50cbad8 100644 (file)
@@ -26,7 +26,7 @@ struct sugov_policy {
        struct sugov_tunables   *tunables;
        struct list_head        tunables_hook;
 
-       raw_spinlock_t          update_lock;    /* For shared policies */
+       raw_spinlock_t          update_lock;
        u64                     last_freq_update_time;
        s64                     freq_update_delay_ns;
        unsigned int            next_freq;
@@ -320,23 +320,21 @@ static inline bool sugov_cpu_is_busy(struct sugov_cpu *sg_cpu) { return false; }
  * Make sugov_should_update_freq() ignore the rate limit when DL
  * has increased the utilization.
  */
-static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu, struct sugov_policy *sg_policy)
+static inline void ignore_dl_rate_limit(struct sugov_cpu *sg_cpu)
 {
        if (cpu_bw_dl(cpu_rq(sg_cpu->cpu)) > sg_cpu->bw_dl)
-               sg_policy->limits_changed = true;
+               sg_cpu->sg_policy->limits_changed = true;
 }
 
 static inline bool sugov_update_single_common(struct sugov_cpu *sg_cpu,
                                              u64 time, unsigned int flags)
 {
-       struct sugov_policy *sg_policy = sg_cpu->sg_policy;
-
        sugov_iowait_boost(sg_cpu, time, flags);
        sg_cpu->last_update = time;
 
-       ignore_dl_rate_limit(sg_cpu, sg_policy);
+       ignore_dl_rate_limit(sg_cpu);
 
-       if (!sugov_should_update_freq(sg_policy, time))
+       if (!sugov_should_update_freq(sg_cpu->sg_policy, time))
                return false;
 
        sugov_get_util(sg_cpu);
@@ -451,7 +449,7 @@ sugov_update_shared(struct update_util_data *hook, u64 time, unsigned int flags)
        sugov_iowait_boost(sg_cpu, time, flags);
        sg_cpu->last_update = time;
 
-       ignore_dl_rate_limit(sg_cpu, sg_policy);
+       ignore_dl_rate_limit(sg_cpu);
 
        if (sugov_should_update_freq(sg_policy, time)) {
                next_f = sugov_next_freq_shared(sg_cpu, time);
index 8a8bd7b..794c2cb 100644 (file)
@@ -5126,7 +5126,7 @@ static void do_sched_cfs_slack_timer(struct cfs_bandwidth *cfs_b)
 /*
  * When a group wakes up we want to make sure that its quota is not already
  * expired/exceeded, otherwise it may be allowed to steal additional ticks of
- * runtime as update_curr() throttling can not not trigger until it's on-rq.
+ * runtime as update_curr() throttling can not trigger until it's on-rq.
  */
 static void check_enqueue_throttle(struct cfs_rq *cfs_rq)
 {
index 08ae45a..b5add64 100644 (file)
@@ -454,7 +454,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
 
        /*
         * For each cpu runqueue, if the task's mm match @mm, ensure that all
-        * @mm's membarrier state set bits are also set in in the runqueue's
+        * @mm's membarrier state set bits are also set in the runqueue's
         * membarrier state. This ensures that a runqueue scheduling
         * between threads which are users of @mm has its membarrier state
         * updated.
@@ -471,9 +471,7 @@ static int sync_runqueues_membarrier_state(struct mm_struct *mm)
        }
        rcu_read_unlock();
 
-       preempt_disable();
-       smp_call_function_many(tmpmask, ipi_sync_rq_state, mm, 1);
-       preempt_enable();
+       on_each_cpu_mask(tmpmask, ipi_sync_rq_state, mm, true);
 
        free_cpumask_var(tmpmask);
        cpus_read_unlock();
index 5ad8566..f271835 100644 (file)
@@ -2768,13 +2768,21 @@ relock:
                }
 
                /*
+                * PF_IO_WORKER threads will catch and exit on fatal signals
+                * themselves. They have cleanup that must be performed, so
+                * we cannot call do_exit() on their behalf.
+                */
+               if (current->flags & PF_IO_WORKER)
+                       goto out;
+
+               /*
                 * Death signals, no core dump.
                 */
                do_group_exit(ksig->info.si_signo);
                /* NOTREACHED */
        }
        spin_unlock_irq(&sighand->siglock);
-
+out:
        ksig->sig = signr;
 
        if (!(ksig->ka.sa.sa_flags & SA_EXPOSE_TAGBITS))
index 9d71046..9908ec4 100644 (file)
@@ -26,6 +26,8 @@
 #include <linux/tick.h>
 #include <linux/irq.h>
 
+#include <asm/softirq_stack.h>
+
 #define CREATE_TRACE_POINTS
 #include <trace/events/irq.h>
 
index 6906c6e..2c5950b 100644 (file)
@@ -35,27 +35,30 @@ static inline void *static_call_addr(struct static_call_site *site)
        return (void *)((long)site->addr + (long)&site->addr);
 }
 
+static inline unsigned long __static_call_key(const struct static_call_site *site)
+{
+       return (long)site->key + (long)&site->key;
+}
 
 static inline struct static_call_key *static_call_key(const struct static_call_site *site)
 {
-       return (struct static_call_key *)
-               (((long)site->key + (long)&site->key) & ~STATIC_CALL_SITE_FLAGS);
+       return (void *)(__static_call_key(site) & ~STATIC_CALL_SITE_FLAGS);
 }
 
 /* These assume the key is word-aligned. */
 static inline bool static_call_is_init(struct static_call_site *site)
 {
-       return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_INIT;
+       return __static_call_key(site) & STATIC_CALL_SITE_INIT;
 }
 
 static inline bool static_call_is_tail(struct static_call_site *site)
 {
-       return ((long)site->key + (long)&site->key) & STATIC_CALL_SITE_TAIL;
+       return __static_call_key(site) & STATIC_CALL_SITE_TAIL;
 }
 
 static inline void static_call_set_init(struct static_call_site *site)
 {
-       site->key = ((long)static_call_key(site) | STATIC_CALL_SITE_INIT) -
+       site->key = (__static_call_key(site) | STATIC_CALL_SITE_INIT) -
                    (long)&site->key;
 }
 
@@ -146,6 +149,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
        };
 
        for (site_mod = &first; site_mod; site_mod = site_mod->next) {
+               bool init = system_state < SYSTEM_RUNNING;
                struct module *mod = site_mod->mod;
 
                if (!site_mod->sites) {
@@ -165,6 +169,7 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
                if (mod) {
                        stop = mod->static_call_sites +
                               mod->num_static_call_sites;
+                       init = mod->state == MODULE_STATE_COMING;
                }
 #endif
 
@@ -172,25 +177,26 @@ void __static_call_update(struct static_call_key *key, void *tramp, void *func)
                     site < stop && static_call_key(site) == key; site++) {
                        void *site_addr = static_call_addr(site);
 
-                       if (static_call_is_init(site)) {
-                               /*
-                                * Don't write to call sites which were in
-                                * initmem and have since been freed.
-                                */
-                               if (!mod && system_state >= SYSTEM_RUNNING)
-                                       continue;
-                               if (mod && !within_module_init((unsigned long)site_addr, mod))
-                                       continue;
-                       }
+                       if (!init && static_call_is_init(site))
+                               continue;
 
                        if (!kernel_text_address((unsigned long)site_addr)) {
-                               WARN_ONCE(1, "can't patch static call site at %pS",
+                               /*
+                                * This skips patching built-in __exit, which
+                                * is part of init_section_contains() but is
+                                * not part of kernel_text_address().
+                                *
+                                * Skipping built-in __exit is fine since it
+                                * will never be executed.
+                                */
+                               WARN_ONCE(!static_call_is_init(site),
+                                         "can't patch static call site at %pS",
                                          site_addr);
                                continue;
                        }
 
                        arch_static_call_transform(site_addr, NULL, func,
-                               static_call_is_tail(site));
+                                                  static_call_is_tail(site));
                }
        }
 
@@ -349,7 +355,8 @@ static int static_call_add_module(struct module *mod)
        struct static_call_site *site;
 
        for (site = start; site != stop; site++) {
-               unsigned long addr = (unsigned long)static_call_key(site);
+               unsigned long s_key = __static_call_key(site);
+               unsigned long addr = s_key & ~STATIC_CALL_SITE_FLAGS;
                unsigned long key;
 
                /*
@@ -373,8 +380,8 @@ static int static_call_add_module(struct module *mod)
                        return -EINVAL;
                }
 
-               site->key = (key - (long)&site->key) |
-                           (site->key & STATIC_CALL_SITE_FLAGS);
+               key |= s_key & STATIC_CALL_SITE_FLAGS;
+               site->key = key - (long)&site->key;
        }
 
        return __static_call_init(mod, start, stop);
index 6928d23..2e2e3f3 100644 (file)
@@ -1242,7 +1242,7 @@ static int override_release(char __user *release, size_t len)
                                break;
                        rest++;
                }
-               v = ((LINUX_VERSION_CODE >> 8) & 0xff) + 60;
+               v = LINUX_VERSION_PATCHLEVEL + 60;
                copy = clamp_t(size_t, len, 1, sizeof(buf));
                copy = scnprintf(buf, copy, "2.6.%u%s", v, rest);
                ret = copy_to_user(release, buf, copy + 1);
@@ -1847,7 +1847,7 @@ static int prctl_set_mm_exe_file(struct mm_struct *mm, unsigned int fd)
        if (!S_ISREG(inode->i_mode) || path_noexec(&exe.file->f_path))
                goto exit;
 
-       err = inode_permission(inode, MAY_EXEC);
+       err = file_permission(exe.file, MAY_EXEC);
        if (err)
                goto exit;
 
@@ -2079,7 +2079,7 @@ static int prctl_set_auxv(struct mm_struct *mm, unsigned long addr,
         * up to the caller to provide sane values here, otherwise userspace
         * tools which use this vector might be unhappy.
         */
-       unsigned long user_auxv[AT_VECTOR_SIZE];
+       unsigned long user_auxv[AT_VECTOR_SIZE] = {};
 
        if (len > sizeof(user_auxv))
                return -EINVAL;
index c9fbdd8..62fbd09 100644 (file)
@@ -2962,7 +2962,7 @@ static struct ctl_table vm_table[] = {
                .data           = &block_dump,
                .maxlen         = sizeof(block_dump),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
        },
        {
@@ -2970,7 +2970,7 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_vfs_cache_pressure,
                .maxlen         = sizeof(sysctl_vfs_cache_pressure),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
        },
 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
@@ -2980,7 +2980,7 @@ static struct ctl_table vm_table[] = {
                .data           = &sysctl_legacy_va_layout,
                .maxlen         = sizeof(sysctl_legacy_va_layout),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
        },
 #endif
@@ -2990,7 +2990,7 @@ static struct ctl_table vm_table[] = {
                .data           = &node_reclaim_mode,
                .maxlen         = sizeof(node_reclaim_mode),
                .mode           = 0644,
-               .proc_handler   = proc_dointvec,
+               .proc_handler   = proc_dointvec_minmax,
                .extra1         = SYSCTL_ZERO,
        },
        {
index 98d7a15..4d94e2b 100644 (file)
@@ -854,9 +854,9 @@ static int alarm_timer_nsleep(const clockid_t which_clock, int flags,
        if (flags == TIMER_ABSTIME)
                return -ERESTARTNOHAND;
 
-       restart->fn = alarm_timer_nsleep_restart;
        restart->nanosleep.clockid = type;
        restart->nanosleep.expires = exp;
+       set_restart_fn(restart, alarm_timer_nsleep_restart);
        return ret;
 }
 
index 743c852..5c9d968 100644 (file)
@@ -546,8 +546,11 @@ static ktime_t __hrtimer_next_event_base(struct hrtimer_cpu_base *cpu_base,
 }
 
 /*
- * Recomputes cpu_base::*next_timer and returns the earliest expires_next but
- * does not set cpu_base::*expires_next, that is done by hrtimer_reprogram.
+ * Recomputes cpu_base::*next_timer and returns the earliest expires_next
+ * but does not set cpu_base::*expires_next, that is done by
+ * hrtimer[_force]_reprogram and hrtimer_interrupt only. When updating
+ * cpu_base::*expires_next right away, reprogramming logic would no longer
+ * work.
  *
  * When a softirq is pending, we can ignore the HRTIMER_ACTIVE_SOFT bases,
  * those timers will get run whenever the softirq gets handled, at the end of
@@ -588,6 +591,37 @@ __hrtimer_get_next_event(struct hrtimer_cpu_base *cpu_base, unsigned int active_
        return expires_next;
 }
 
+static ktime_t hrtimer_update_next_event(struct hrtimer_cpu_base *cpu_base)
+{
+       ktime_t expires_next, soft = KTIME_MAX;
+
+       /*
+        * If the soft interrupt has already been activated, ignore the
+        * soft bases. They will be handled in the already raised soft
+        * interrupt.
+        */
+       if (!cpu_base->softirq_activated) {
+               soft = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_SOFT);
+               /*
+                * Update the soft expiry time. clock_settime() might have
+                * affected it.
+                */
+               cpu_base->softirq_expires_next = soft;
+       }
+
+       expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_HARD);
+       /*
+        * If a softirq timer is expiring first, update cpu_base->next_timer
+        * and program the hardware with the soft expiry time.
+        */
+       if (expires_next > soft) {
+               cpu_base->next_timer = cpu_base->softirq_next_timer;
+               expires_next = soft;
+       }
+
+       return expires_next;
+}
+
 static inline ktime_t hrtimer_update_base(struct hrtimer_cpu_base *base)
 {
        ktime_t *offs_real = &base->clock_base[HRTIMER_BASE_REALTIME].offset;
@@ -628,23 +662,7 @@ hrtimer_force_reprogram(struct hrtimer_cpu_base *cpu_base, int skip_equal)
 {
        ktime_t expires_next;
 
-       /*
-        * Find the current next expiration time.
-        */
-       expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
-
-       if (cpu_base->next_timer && cpu_base->next_timer->is_soft) {
-               /*
-                * When the softirq is activated, hrtimer has to be
-                * programmed with the first hard hrtimer because soft
-                * timer interrupt could occur too late.
-                */
-               if (cpu_base->softirq_activated)
-                       expires_next = __hrtimer_get_next_event(cpu_base,
-                                                               HRTIMER_ACTIVE_HARD);
-               else
-                       cpu_base->softirq_expires_next = expires_next;
-       }
+       expires_next = hrtimer_update_next_event(cpu_base);
 
        if (skip_equal && expires_next == cpu_base->expires_next)
                return;
@@ -1644,8 +1662,8 @@ retry:
 
        __hrtimer_run_queues(cpu_base, now, flags, HRTIMER_ACTIVE_HARD);
 
-       /* Reevaluate the clock bases for the next expiry */
-       expires_next = __hrtimer_get_next_event(cpu_base, HRTIMER_ACTIVE_ALL);
+       /* Reevaluate the clock bases for the [soft] next expiry */
+       expires_next = hrtimer_update_next_event(cpu_base);
        /*
         * Store the new expiry value so the migration code can verify
         * against it.
@@ -1939,9 +1957,9 @@ long hrtimer_nanosleep(ktime_t rqtp, const enum hrtimer_mode mode,
        }
 
        restart = &current->restart_block;
-       restart->fn = hrtimer_nanosleep_restart;
        restart->nanosleep.clockid = t.timer.base->clockid;
        restart->nanosleep.expires = hrtimer_get_expires_tv64(&t.timer);
+       set_restart_fn(restart, hrtimer_nanosleep_restart);
 out:
        destroy_hrtimer_on_stack(&t.timer);
        return ret;
index a71758e..9abe152 100644 (file)
@@ -1480,8 +1480,8 @@ static int posix_cpu_nsleep(const clockid_t which_clock, int flags,
                if (flags & TIMER_ABSTIME)
                        return -ERESTARTNOHAND;
 
-               restart_block->fn = posix_cpu_nsleep_restart;
                restart_block->nanosleep.clockid = which_clock;
+               set_restart_fn(restart_block, posix_cpu_nsleep_restart);
        }
        return error;
 }
index 799dbcf..7fa8277 100644 (file)
@@ -60,6 +60,11 @@ config HAVE_NOP_MCOUNT
        help
          Arch supports the gcc options -pg with -mrecord-mcount and -nop-mcount
 
+config HAVE_OBJTOOL_MCOUNT
+       bool
+       help
+         Arch supports objtool --mcount
+
 config HAVE_C_RECORDMCOUNT
        bool
        help
@@ -602,6 +607,30 @@ config FTRACE_MCOUNT_RECORD
        depends on DYNAMIC_FTRACE
        depends on HAVE_FTRACE_MCOUNT_RECORD
 
+config FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+       bool
+       depends on FTRACE_MCOUNT_RECORD
+
+config FTRACE_MCOUNT_USE_CC
+       def_bool y
+       depends on $(cc-option,-mrecord-mcount)
+       depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+       depends on FTRACE_MCOUNT_RECORD
+
+config FTRACE_MCOUNT_USE_OBJTOOL
+       def_bool y
+       depends on HAVE_OBJTOOL_MCOUNT
+       depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+       depends on !FTRACE_MCOUNT_USE_CC
+       depends on FTRACE_MCOUNT_RECORD
+
+config FTRACE_MCOUNT_USE_RECORDMCOUNT
+       def_bool y
+       depends on !FTRACE_MCOUNT_USE_PATCHABLE_FUNCTION_ENTRY
+       depends on !FTRACE_MCOUNT_USE_CC
+       depends on !FTRACE_MCOUNT_USE_OBJTOOL
+       depends on FTRACE_MCOUNT_RECORD
+
 config TRACING_MAP
        bool
        depends on ARCH_HAVE_NMI_SAFE_CMPXCHG
@@ -665,7 +694,7 @@ config TRACEPOINT_BENCHMARK
        help
         This option creates the tracepoint "benchmark:benchmark_event".
         When the tracepoint is enabled, it kicks off a kernel thread that
-        goes into an infinite loop (calling cond_sched() to let other tasks
+        goes into an infinite loop (calling cond_resched() to let other tasks
         run), and calls the tracepoint. Each iteration will record the time
         it took to write to the tracepoint and the next iteration that
         data will be passed to the tracepoint itself. That is, the tracepoint
index 7e44cea..b28d3e5 100644 (file)
@@ -81,6 +81,7 @@ obj-$(CONFIG_SYNTH_EVENTS) += trace_events_synth.o
 obj-$(CONFIG_HIST_TRIGGERS) += trace_events_hist.o
 obj-$(CONFIG_BPF_EVENTS) += bpf_trace.o
 obj-$(CONFIG_KPROBE_EVENTS) += trace_kprobe.o
+obj-$(CONFIG_TRACEPOINTS) += error_report-traces.o
 obj-$(CONFIG_TRACEPOINTS) += power-traces.o
 ifeq ($(CONFIG_PM),y)
 obj-$(CONFIG_TRACEPOINTS) += rpm-traces.o
index c286c13..c221e4c 100644 (file)
@@ -312,8 +312,6 @@ record_it:
 
 static void blk_trace_free(struct blk_trace *bt)
 {
-       debugfs_remove(bt->msg_file);
-       debugfs_remove(bt->dropped_file);
        relay_close(bt->rchan);
        debugfs_remove(bt->dir);
        free_percpu(bt->sequence);
@@ -545,10 +543,8 @@ static int do_blk_trace_setup(struct request_queue *q, char *name, dev_t dev,
        INIT_LIST_HEAD(&bt->running_list);
 
        ret = -EIO;
-       bt->dropped_file = debugfs_create_file("dropped", 0444, dir, bt,
-                                              &blk_dropped_fops);
-
-       bt->msg_file = debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
+       debugfs_create_file("dropped", 0444, dir, bt, &blk_dropped_fops);
+       debugfs_create_file("msg", 0222, dir, bt, &blk_msg_fops);
 
        bt->rchan = relay_open("trace", dir, buts->buf_size,
                                buts->buf_nr, &blk_relay_callbacks, bt);
@@ -1868,7 +1864,17 @@ void blk_trace_remove_sysfs(struct device *dev)
 
 #ifdef CONFIG_EVENT_TRACING
 
-void blk_fill_rwbs(char *rwbs, unsigned int op, int bytes)
+/**
+ * blk_fill_rwbs - Fill the buffer rwbs by mapping op to character string.
+ * @rwbs:      buffer to be filled
+ * @op:                REQ_OP_XXX for the tracepoint
+ *
+ * Description:
+ *     Maps the REQ_OP_XXX to character and fills the buffer provided by the
+ *     caller with resulting string.
+ *
+ **/
+void blk_fill_rwbs(char *rwbs, unsigned int op)
 {
        int i = 0;
 
diff --git a/kernel/trace/error_report-traces.c b/kernel/trace/error_report-traces.c
new file mode 100644 (file)
index 0000000..f89792c
--- /dev/null
@@ -0,0 +1,11 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Error reporting trace points.
+ *
+ * Copyright (C) 2021, Google LLC.
+ */
+
+#define CREATE_TRACE_POINTS
+#include <trace/events/error_report.h>
+
+EXPORT_TRACEPOINT_SYMBOL_GPL(error_report_end);
index 4d8e355..3ba52d4 100644 (file)
@@ -3231,7 +3231,8 @@ ftrace_allocate_pages(unsigned long num_to_init)
        pg = start_pg;
        while (pg) {
                order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-               free_pages((unsigned long)pg->records, order);
+               if (order >= 0)
+                       free_pages((unsigned long)pg->records, order);
                start_pg = pg->next;
                kfree(pg);
                pg = start_pg;
@@ -5045,6 +5046,20 @@ struct ftrace_direct_func *ftrace_find_direct_func(unsigned long addr)
        return NULL;
 }
 
+static struct ftrace_direct_func *ftrace_alloc_direct_func(unsigned long addr)
+{
+       struct ftrace_direct_func *direct;
+
+       direct = kmalloc(sizeof(*direct), GFP_KERNEL);
+       if (!direct)
+               return NULL;
+       direct->addr = addr;
+       direct->count = 0;
+       list_add_rcu(&direct->next, &ftrace_direct_funcs);
+       ftrace_direct_func_count++;
+       return direct;
+}
+
 /**
  * register_ftrace_direct - Call a custom trampoline directly
  * @ip: The address of the nop at the beginning of a function
@@ -5120,15 +5135,11 @@ int register_ftrace_direct(unsigned long ip, unsigned long addr)
 
        direct = ftrace_find_direct_func(addr);
        if (!direct) {
-               direct = kmalloc(sizeof(*direct), GFP_KERNEL);
+               direct = ftrace_alloc_direct_func(addr);
                if (!direct) {
                        kfree(entry);
                        goto out_unlock;
                }
-               direct->addr = addr;
-               direct->count = 0;
-               list_add_rcu(&direct->next, &ftrace_direct_funcs);
-               ftrace_direct_func_count++;
        }
 
        entry->ip = ip;
@@ -5329,6 +5340,7 @@ int __weak ftrace_modify_direct_caller(struct ftrace_func_entry *entry,
 int modify_ftrace_direct(unsigned long ip,
                         unsigned long old_addr, unsigned long new_addr)
 {
+       struct ftrace_direct_func *direct, *new_direct = NULL;
        struct ftrace_func_entry *entry;
        struct dyn_ftrace *rec;
        int ret = -ENODEV;
@@ -5344,6 +5356,20 @@ int modify_ftrace_direct(unsigned long ip,
        if (entry->direct != old_addr)
                goto out_unlock;
 
+       direct = ftrace_find_direct_func(old_addr);
+       if (WARN_ON(!direct))
+               goto out_unlock;
+       if (direct->count > 1) {
+               ret = -ENOMEM;
+               new_direct = ftrace_alloc_direct_func(new_addr);
+               if (!new_direct)
+                       goto out_unlock;
+               direct->count--;
+               new_direct->count++;
+       } else {
+               direct->addr = new_addr;
+       }
+
        /*
         * If there's no other ftrace callback on the rec->ip location,
         * then it can be changed directly by the architecture.
@@ -5357,6 +5383,14 @@ int modify_ftrace_direct(unsigned long ip,
                ret = 0;
        }
 
+       if (unlikely(ret && new_direct)) {
+               direct->count++;
+               list_del_rcu(&new_direct->next);
+               synchronize_rcu_tasks();
+               kfree(new_direct);
+               ftrace_direct_func_count--;
+       }
+
  out_unlock:
        mutex_unlock(&ftrace_lock);
        mutex_unlock(&direct_mutex);
@@ -6418,7 +6452,8 @@ void ftrace_release_mod(struct module *mod)
                clear_mod_from_hashes(pg);
 
                order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-               free_pages((unsigned long)pg->records, order);
+               if (order >= 0)
+                       free_pages((unsigned long)pg->records, order);
                tmp_page = pg->next;
                kfree(pg);
                ftrace_number_of_pages -= 1 << order;
@@ -6778,7 +6813,8 @@ void ftrace_free_mem(struct module *mod, void *start_ptr, void *end_ptr)
                if (!pg->index) {
                        *last_pg = pg->next;
                        order = get_count_order(pg->size / ENTRIES_PER_PAGE);
-                       free_pages((unsigned long)pg->records, order);
+                       if (order >= 0)
+                               free_pages((unsigned long)pg->records, order);
                        ftrace_number_of_pages -= 1 << order;
                        ftrace_number_of_groups--;
                        kfree(pg);
index b9dad35..68744c5 100644 (file)
@@ -2815,6 +2815,17 @@ rb_try_to_discard(struct ring_buffer_per_cpu *cpu_buffer,
                        return 0;
 
                /*
+                * It's possible that the event time delta is zero
+                * (has the same time stamp as the previous event)
+                * in which case write_stamp and before_stamp could
+                * be the same. In such a case, force before_stamp
+                * to be different than write_stamp. It doesn't
+                * matter what it is, as long as its different.
+                */
+               if (!delta)
+                       rb_time_set(&cpu_buffer->before_stamp, 0);
+
+               /*
                 * If an event were to come in now, it would see that the
                 * write_stamp and the before_stamp are different, and assume
                 * that this event just added itself before updating
@@ -3307,9 +3318,13 @@ static void check_buffer(struct ring_buffer_per_cpu *cpu_buffer,
                        goto out;
                }
                atomic_inc(&cpu_buffer->record_disabled);
-               pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld after:%lld\n",
-                      cpu_buffer->cpu,
-                      ts + info->delta, info->ts, info->delta, info->after);
+               /* There's some cases in boot up that this can happen */
+               WARN_ON_ONCE(system_state != SYSTEM_BOOTING);
+               pr_warn("[CPU: %d]TIME DOES NOT MATCH expected:%lld actual:%lld delta:%lld before:%lld after:%lld%s\n",
+                       cpu_buffer->cpu,
+                       ts + info->delta, info->ts, info->delta,
+                       info->before, info->after,
+                       full ? " (full)" : "");
                dump_buffer_page(bpage, info, tail);
                atomic_dec(&ts_dump);
                /* Do not re-enable checking */
index e295c41..5c77762 100644 (file)
@@ -1929,6 +1929,12 @@ static int run_tracer_selftest(struct tracer *type)
        if (!selftests_can_run)
                return save_selftest(type);
 
+       if (!tracing_is_on()) {
+               pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
+                       type->name);
+               return 0;
+       }
+
        /*
         * Run a selftest on this tracer.
         * Here we reset the trace buffer, and set the current
@@ -2978,7 +2984,8 @@ static void __ftrace_trace_stack(struct trace_buffer *buffer,
 
        size = nr_entries * sizeof(unsigned long);
        event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
-                                           sizeof(*entry) + size, trace_ctx);
+                                   (sizeof(*entry) - sizeof(entry->caller)) + size,
+                                   trace_ctx);
        if (!event)
                goto out;
        entry = ring_buffer_event_data(event);
index dec13ff..a6446c0 100644 (file)
@@ -605,7 +605,6 @@ void trace_graph_function(struct trace_array *tr,
 void trace_latency_header(struct seq_file *m);
 void trace_default_header(struct seq_file *m);
 void print_trace_header(struct seq_file *m, struct trace_iterator *iter);
-int trace_empty(struct trace_iterator *iter);
 
 void trace_graph_return(struct ftrace_graph_ret *trace);
 int trace_graph_entry(struct ftrace_graph_ent *trace);
index 2979a96..8d71e6c 100644 (file)
@@ -1225,8 +1225,10 @@ static int __create_synth_event(const char *name, const char *raw_fields)
                        goto err;
                }
 
-               if (!argc)
+               if (!argc) {
+                       argv_free(argv);
                        continue;
+               }
 
                n_fields_this_loop = 0;
                consumed = 0;
index 8a1cb08..6fe770d 100644 (file)
@@ -124,9 +124,9 @@ static nokprobe_inline bool trace_kprobe_module_exist(struct trace_kprobe *tk)
        if (!p)
                return true;
        *p = '\0';
-       mutex_lock(&module_mutex);
+       rcu_read_lock_sched();
        ret = !!find_module(tk->symbol);
-       mutex_unlock(&module_mutex);
+       rcu_read_unlock_sched();
        *p = ':';
 
        return ret;
index 0b35212..bb7bb3b 100644 (file)
@@ -139,13 +139,22 @@ static void umd_cleanup(struct subprocess_info *info)
        struct umd_info *umd_info = info->data;
 
        /* cleanup if umh_setup() was successful but exec failed */
-       if (info->retval) {
-               fput(umd_info->pipe_to_umh);
-               fput(umd_info->pipe_from_umh);
-               put_pid(umd_info->tgid);
-               umd_info->tgid = NULL;
-       }
+       if (info->retval)
+               umd_cleanup_helper(umd_info);
+}
+
+/**
+ * umd_cleanup_helper - release the resources which were allocated in umd_setup
+ * @info: information about usermode driver
+ */
+void umd_cleanup_helper(struct umd_info *info)
+{
+       fput(info->pipe_to_umh);
+       fput(info->pipe_from_umh);
+       put_pid(info->tgid);
+       info->tgid = NULL;
 }
+EXPORT_SYMBOL_GPL(umd_cleanup_helper);
 
 /**
  * fork_usermode_driver - fork a usermode driver
index 0ef8f65..9c9eb20 100644 (file)
@@ -413,7 +413,7 @@ static void put_watch(struct watch *watch)
 }
 
 /**
- * init_watch_queue - Initialise a watch
+ * init_watch - Initialise a watch
  * @watch: The watch to initialise.
  * @wqueue: The queue to assign.
  *
index 7110906..107bc38 100644 (file)
@@ -278,9 +278,10 @@ void touch_all_softlockup_watchdogs(void)
         * update as well, the only side effect might be a cycle delay for
         * the softlockup check.
         */
-       for_each_cpu(cpu, &watchdog_allowed_mask)
+       for_each_cpu(cpu, &watchdog_allowed_mask) {
                per_cpu(watchdog_touch_ts, cpu) = SOFTLOCKUP_RESET;
-       wq_watchdog_touch(-1);
+               wq_watchdog_touch(cpu);
+       }
 }
 
 void touch_softlockup_watchdog_sync(void)
index 0d150da..79f2319 100644 (file)
@@ -1412,7 +1412,6 @@ static void __queue_work(int cpu, struct workqueue_struct *wq,
         */
        lockdep_assert_irqs_disabled();
 
-       debug_work_activate(work);
 
        /* if draining, only works from the same workqueue are allowed */
        if (unlikely(wq->flags & __WQ_DRAINING) &&
@@ -1494,6 +1493,7 @@ retry:
                worklist = &pwq->delayed_works;
        }
 
+       debug_work_activate(work);
        insert_work(pwq, work, worklist, work_flags);
 
 out:
@@ -5787,22 +5787,17 @@ static void wq_watchdog_timer_fn(struct timer_list *unused)
                        continue;
 
                /* get the latest of pool and touched timestamps */
+               if (pool->cpu >= 0)
+                       touched = READ_ONCE(per_cpu(wq_watchdog_touched_cpu, pool->cpu));
+               else
+                       touched = READ_ONCE(wq_watchdog_touched);
                pool_ts = READ_ONCE(pool->watchdog_ts);
-               touched = READ_ONCE(wq_watchdog_touched);
 
                if (time_after(pool_ts, touched))
                        ts = pool_ts;
                else
                        ts = touched;
 
-               if (pool->cpu >= 0) {
-                       unsigned long cpu_touched =
-                               READ_ONCE(per_cpu(wq_watchdog_touched_cpu,
-                                                 pool->cpu));
-                       if (time_after(cpu_touched, ts))
-                               ts = cpu_touched;
-               }
-
                /* did we stall? */
                if (time_after(jiffies, ts + thresh)) {
                        lockup_detected = true;
@@ -5826,8 +5821,8 @@ notrace void wq_watchdog_touch(int cpu)
 {
        if (cpu >= 0)
                per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies;
-       else
-               wq_watchdog_touched = jiffies;
+
+       wq_watchdog_touched = jiffies;
 }
 
 static void wq_watchdog_set_thresh(unsigned long thresh)
index 4680633..a38cc61 100644 (file)
@@ -651,6 +651,15 @@ config STACKDEPOT
        bool
        select STACKTRACE
 
+config STACK_HASH_ORDER
+       int "stack depot hash size (12 => 4KB, 20 => 1024KB)"
+       range 12 20
+       default 20
+       depends on STACKDEPOT
+       help
+        Select the hash size as a power of 2 for the stackdepot hash table.
+        Choose a lower value to reduce the memory impact.
+
 config SBITMAP
        bool
 
index 5ea0c17..2779c29 100644 (file)
@@ -256,14 +256,51 @@ config DEBUG_INFO_SPLIT
          to know about the .dwo files and include them.
          Incompatible with older versions of ccache.
 
+choice
+       prompt "DWARF version"
+       help
+         Which version of DWARF debug info to emit.
+
+config DEBUG_INFO_DWARF_TOOLCHAIN_DEFAULT
+       bool "Rely on the toolchain's implicit default DWARF version"
+       help
+         The implicit default version of DWARF debug info produced by a
+         toolchain changes over time.
+
+         This can break consumers of the debug info that haven't upgraded to
+         support newer revisions, and prevent testing newer versions, but
+         those should be less common scenarios.
+
+         If unsure, say Y.
+
 config DEBUG_INFO_DWARF4
-       bool "Generate dwarf4 debuginfo"
-       depends on $(cc-option,-gdwarf-4)
+       bool "Generate DWARF Version 4 debuginfo"
+       help
+         Generate DWARF v4 debug info. This requires gcc 4.5+ and gdb 7.0+.
+
+         If you have consumers of DWARF debug info that are not ready for
+         newer revisions of DWARF, you may wish to choose this or have your
+         config select this.
+
+config DEBUG_INFO_DWARF5
+       bool "Generate DWARF Version 5 debuginfo"
+       depends on GCC_VERSION >= 50000 || CC_IS_CLANG
+       depends on CC_IS_GCC || $(success,$(srctree)/scripts/test_dwarf5_support.sh $(CC) $(CLANG_FLAGS))
+       depends on !DEBUG_INFO_BTF
        help
-         Generate dwarf4 debug info. This requires recent versions
-         of gcc and gdb. It makes the debug information larger.
-         But it significantly improves the success of resolving
-         variables in gdb on optimized code.
+         Generate DWARF v5 debug info. Requires binutils 2.35.2, gcc 5.0+ (gcc
+         5.0+ accepts the -gdwarf-5 flag but only had partial support for some
+         draft features until 7.0), and gdb 8.0+.
+
+         Changes to the structure of debug info in Version 5 allow for around
+         15-18% savings in resulting image and debug info section sizes as
+         compared to DWARF Version 4. DWARF Version 5 standardizes previous
+         extensions such as accelerators for symbol indexing and the format
+         for fission (.dwo/.dwp) files. Users may not want to select this
+         config if they rely on tooling that has not yet been updated to
+         support DWARF Version 5.
+
+endchoice # "DWARF version"
 
 config DEBUG_INFO_BTF
        bool "Generate BTF typeinfo"
@@ -901,6 +938,7 @@ config DEBUG_STACKOVERFLOW
          If in doubt, say "N".
 
 source "lib/Kconfig.kasan"
+source "lib/Kconfig.kfence"
 
 endmenu # "Memory Debugging"
 
index f5fa4ba..fba9909 100644 (file)
@@ -156,6 +156,7 @@ config KASAN_STACK_ENABLE
 
 config KASAN_STACK
        int
+       depends on KASAN_GENERIC || KASAN_SW_TAGS
        default 1 if KASAN_STACK_ENABLE || CC_IS_GCC
        default 0
 
@@ -190,11 +191,11 @@ config KASAN_KUNIT_TEST
          kernel debugging features like KASAN.
 
          For more information on KUnit and unit tests in general, please refer
-         to the KUnit documentation in Documentation/dev-tools/kunit
+         to the KUnit documentation in Documentation/dev-tools/kunit.
 
-config TEST_KASAN_MODULE
+config KASAN_MODULE_TEST
        tristate "KUnit-incompatible tests of KASAN bug detection capabilities"
-       depends on m && KASAN
+       depends on m && KASAN && !KASAN_HW_TAGS
        help
          This is a part of the KASAN test suite that is incompatible with
          KUnit. Currently includes tests that do bad copy_from/to_user
diff --git a/lib/Kconfig.kfence b/lib/Kconfig.kfence
new file mode 100644 (file)
index 0000000..78f50cc
--- /dev/null
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-only
+
+config HAVE_ARCH_KFENCE
+       bool
+
+menuconfig KFENCE
+       bool "KFENCE: low-overhead sampling-based memory safety error detector"
+       depends on HAVE_ARCH_KFENCE && (SLAB || SLUB)
+       select STACKTRACE
+       help
+         KFENCE is a low-overhead sampling-based detector of heap out-of-bounds
+         access, use-after-free, and invalid-free errors. KFENCE is designed
+         to have negligible cost to permit enabling it in production
+         environments.
+
+         See <file:Documentation/dev-tools/kfence.rst> for more details.
+
+         Note that, KFENCE is not a substitute for explicit testing with tools
+         such as KASAN. KFENCE can detect a subset of bugs that KASAN can
+         detect, albeit at very different performance profiles. If you can
+         afford to use KASAN, continue using KASAN, for example in test
+         environments. If your kernel targets production use, and cannot
+         enable KASAN due to its cost, consider using KFENCE.
+
+if KFENCE
+
+config KFENCE_STATIC_KEYS
+       bool "Use static keys to set up allocations"
+       default y
+       depends on JUMP_LABEL # To ensure performance, require jump labels
+       help
+         Use static keys (static branches) to set up KFENCE allocations. Using
+         static keys is normally recommended, because it avoids a dynamic
+         branch in the allocator's fast path. However, with very low sample
+         intervals, or on systems that do not support jump labels, a dynamic
+         branch may still be an acceptable performance trade-off.
+
+config KFENCE_SAMPLE_INTERVAL
+       int "Default sample interval in milliseconds"
+       default 100
+       help
+         The KFENCE sample interval determines the frequency with which heap
+         allocations will be guarded by KFENCE. May be overridden via boot
+         parameter "kfence.sample_interval".
+
+         Set this to 0 to disable KFENCE by default, in which case only
+         setting "kfence.sample_interval" to a non-zero value enables KFENCE.
+
+config KFENCE_NUM_OBJECTS
+       int "Number of guarded objects available"
+       range 1 65535
+       default 255
+       help
+         The number of guarded objects available. For each KFENCE object, 2
+         pages are required; with one containing the object and two adjacent
+         ones used as guard pages.
+
+config KFENCE_STRESS_TEST_FAULTS
+       int "Stress testing of fault handling and error reporting" if EXPERT
+       default 0
+       help
+         The inverse probability with which to randomly protect KFENCE object
+         pages, resulting in spurious use-after-frees. The main purpose of
+         this option is to stress test KFENCE with concurrent error reports
+         and allocations/frees. A value of 0 disables stress testing logic.
+
+         Only for KFENCE testing; set to 0 if you are not a KFENCE developer.
+
+config KFENCE_KUNIT_TEST
+       tristate "KFENCE integration test suite" if !KUNIT_ALL_TESTS
+       default KUNIT_ALL_TESTS
+       depends on TRACEPOINTS && KUNIT
+       help
+         Test suite for KFENCE, testing various error detection scenarios with
+         various allocation types, and checking that reports are correctly
+         output to console.
+
+         Say Y here if you want the test to be built into the kernel and run
+         during boot; say M if you want the test to build as a module; say N
+         if you are unsure.
+
+endif # KFENCE
index 3a0b1c9..e5372a1 100644 (file)
@@ -112,23 +112,6 @@ config UBSAN_UNREACHABLE
          This option enables -fsanitize=unreachable which checks for control
          flow reaching an expected-to-be-unreachable position.
 
-config UBSAN_SIGNED_OVERFLOW
-       bool "Perform checking for signed arithmetic overflow"
-       default UBSAN
-       depends on $(cc-option,-fsanitize=signed-integer-overflow)
-       help
-         This option enables -fsanitize=signed-integer-overflow which checks
-         for overflow of any arithmetic operations with signed integers.
-
-config UBSAN_UNSIGNED_OVERFLOW
-       bool "Perform checking for unsigned arithmetic overflow"
-       depends on $(cc-option,-fsanitize=unsigned-integer-overflow)
-       depends on !X86_32 # avoid excessive stack usage on x86-32/clang
-       help
-         This option enables -fsanitize=unsigned-integer-overflow which checks
-         for overflow of any arithmetic operations with unsigned integers. This
-         currently causes x86 to fail to boot.
-
 config UBSAN_OBJECT_SIZE
        bool "Perform checking for accesses beyond the end of objects"
        default UBSAN
index fb7d946..b5307d3 100644 (file)
@@ -66,7 +66,7 @@ obj-$(CONFIG_TEST_IDA) += test_ida.o
 obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
 CFLAGS_test_kasan.o += -fno-builtin
 CFLAGS_test_kasan.o += $(call cc-disable-warning, vla)
-obj-$(CONFIG_TEST_KASAN_MODULE) += test_kasan_module.o
+obj-$(CONFIG_KASAN_MODULE_TEST) += test_kasan_module.o
 CFLAGS_test_kasan_module.o += -fno-builtin
 obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
 CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
index 7103440..8f9d537 100644 (file)
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -91,8 +91,6 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
        char *secstrings;
        unsigned int i;
 
-       lockdep_assert_held(&module_mutex);
-
        mod->bug_table = NULL;
        mod->num_bugs = 0;
 
@@ -118,7 +116,6 @@ void module_bug_finalize(const Elf_Ehdr *hdr, const Elf_Shdr *sechdrs,
 
 void module_bug_cleanup(struct module *mod)
 {
-       lockdep_assert_held(&module_mutex);
        list_del_rcu(&mod->bug_list);
 }
 
index dfd4c44..5d474c6 100644 (file)
@@ -228,7 +228,6 @@ char *next_arg(char *args, char **param, char **val)
 {
        unsigned int i, equals = 0;
        int in_quote = 0, quoted = 0;
-       char *next;
 
        if (*args == '"') {
                args++;
@@ -266,10 +265,10 @@ char *next_arg(char *args, char **param, char **val)
 
        if (args[i]) {
                args[i] = '\0';
-               next = args + i + 1;
+               args += i + 1;
        } else
-               next = args + i;
+               args += i;
 
        /* Chew up trailing spaces. */
-       return skip_spaces(next);
+       return skip_spaces(args);
 }
index c3e59ca..9c9f40b 100644 (file)
@@ -21,7 +21,6 @@ static inline unsigned long ex_to_insn(const struct exception_table_entry *x)
 }
 #endif
 
-#ifndef ARCH_HAS_SORT_EXTABLE
 #ifndef ARCH_HAS_RELATIVE_EXTABLE
 #define swap_ex                NULL
 #else
@@ -88,9 +87,6 @@ void trim_init_extable(struct module *m)
                m->num_exentries--;
 }
 #endif /* CONFIG_MODULES */
-#endif /* !ARCH_HAS_SORT_EXTABLE */
-
-#ifndef ARCH_HAS_SEARCH_EXTABLE
 
 static int cmp_ex_search(const void *key, const void *elt)
 {
@@ -120,4 +116,3 @@ search_extable(const struct exception_table_entry *base,
        return bsearch(&value, base, num,
                       sizeof(struct exception_table_entry), cmp_ex_search);
 }
-#endif
index dab97bb..5dcf9cd 100644 (file)
@@ -81,7 +81,8 @@ static int clear_bits_ll(unsigned long *addr, unsigned long mask_to_clear)
  * users set the same bit, one user will return remain bits, otherwise
  * return 0.
  */
-static int bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
+static unsigned long
+bitmap_set_ll(unsigned long *map, unsigned long start, unsigned long nr)
 {
        unsigned long *p = map + BIT_WORD(start);
        const unsigned long size = start + nr;
index d8ca336..f66c62a 100644 (file)
@@ -464,20 +464,6 @@ void iov_iter_init(struct iov_iter *i, unsigned int direction,
 }
 EXPORT_SYMBOL(iov_iter_init);
 
-static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len)
-{
-       char *from = kmap_atomic(page);
-       memcpy(to, from + offset, len);
-       kunmap_atomic(from);
-}
-
-static void memcpy_to_page(struct page *page, size_t offset, const char *from, size_t len)
-{
-       char *to = kmap_atomic(page);
-       memcpy(to + offset, from, len);
-       kunmap_atomic(to);
-}
-
 static void memzero_page(struct page *page, size_t offset, size_t len)
 {
        char *addr = kmap_atomic(page);
index f32fe48..07b4b9a 100644 (file)
@@ -28,6 +28,8 @@ static DEFINE_MUTEX(io_range_mutex);
  * @new_range: pointer to the IO range to be registered.
  *
  * Returns 0 on success, the error code in case of failure.
+ * If the range already exists, -EEXIST will be returned, which should be
+ * considered a success.
  *
  * Register a new IO range node in the IO range list.
  */
@@ -51,6 +53,7 @@ int logic_pio_register_range(struct logic_pio_hwaddr *new_range)
        list_for_each_entry(range, &io_range_list, list) {
                if (range->fwnode == new_range->fwnode) {
                        /* range already there */
+                       ret = -EEXIST;
                        goto end_register;
                }
                if (range->flags == LOGIC_PIO_CPU_MMIO &&
index 064d68a..4686639 100644 (file)
@@ -232,4 +232,5 @@ u64 mul_u64_u64_div_u64(u64 a, u64 b, u64 c)
 
        return res + div64_u64(a * b, c);
 }
+EXPORT_SYMBOL(mul_u64_u64_div_u64);
 #endif
index 890dcc2..49f67a0 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/stackdepot.h>
 #include <linux/string.h>
 #include <linux/types.h>
+#include <linux/memblock.h>
 
 #define DEPOT_STACK_BITS (sizeof(depot_stack_handle_t) * 8)
 
@@ -141,14 +142,38 @@ static struct stack_record *depot_alloc_stack(unsigned long *entries, int size,
        return stack;
 }
 
-#define STACK_HASH_ORDER 20
-#define STACK_HASH_SIZE (1L << STACK_HASH_ORDER)
+#define STACK_HASH_SIZE (1L << CONFIG_STACK_HASH_ORDER)
 #define STACK_HASH_MASK (STACK_HASH_SIZE - 1)
 #define STACK_HASH_SEED 0x9747b28c
 
-static struct stack_record *stack_table[STACK_HASH_SIZE] = {
-       [0 ...  STACK_HASH_SIZE - 1] = NULL
-};
+static bool stack_depot_disable;
+static struct stack_record **stack_table;
+
+static int __init is_stack_depot_disabled(char *str)
+{
+       int ret;
+
+       ret = kstrtobool(str, &stack_depot_disable);
+       if (!ret && stack_depot_disable) {
+               pr_info("Stack Depot is disabled\n");
+               stack_table = NULL;
+       }
+       return 0;
+}
+early_param("stack_depot_disable", is_stack_depot_disabled);
+
+int __init stack_depot_init(void)
+{
+       if (!stack_depot_disable) {
+               size_t size = (STACK_HASH_SIZE * sizeof(struct stack_record *));
+               int i;
+
+               stack_table = memblock_alloc(size, size);
+               for (i = 0; i < STACK_HASH_SIZE;  i++)
+                       stack_table[i] = NULL;
+       }
+       return 0;
+}
 
 /* Calculate hash for a stack */
 static inline u32 hash_stack(unsigned long *entries, unsigned int size)
@@ -242,7 +267,7 @@ depot_stack_handle_t stack_depot_save(unsigned long *entries,
        unsigned long flags;
        u32 hash;
 
-       if (unlikely(nr_entries == 0))
+       if (unlikely(nr_entries == 0) || stack_depot_disable)
                goto fast_exit;
 
        hash = hash_stack(entries, nr_entries);
index 2947274..e5647d1 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/mman.h>
 #include <linux/module.h>
 #include <linux/printk.h>
+#include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/string.h>
 #include <linux/uaccess.h>
 #define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_GRANULE_SIZE)
 
 /*
- * We assign some test results to these globals to make sure the tests
- * are not eliminated as dead code.
+ * Some tests use these global variables to store return values from function
+ * calls that could otherwise be eliminated by the compiler as dead code.
  */
-
 void *kasan_ptr_result;
 int kasan_int_result;
 
@@ -39,40 +39,81 @@ static struct kunit_resource resource;
 static struct kunit_kasan_expectation fail_data;
 static bool multishot;
 
+/*
+ * Temporarily enable multi-shot mode. Otherwise, KASAN would only report the
+ * first detected bug and panic the kernel if panic_on_warn is enabled. For
+ * hardware tag-based KASAN also allow tag checking to be reenabled for each
+ * test, see the comment for KUNIT_EXPECT_KASAN_FAIL().
+ */
 static int kasan_test_init(struct kunit *test)
 {
-       /*
-        * Temporarily enable multi-shot mode and set panic_on_warn=0.
-        * Otherwise, we'd only get a report for the first case.
-        */
-       multishot = kasan_save_enable_multi_shot();
+       if (!kasan_enabled()) {
+               kunit_err(test, "can't run KASAN tests with KASAN disabled");
+               return -1;
+       }
 
+       multishot = kasan_save_enable_multi_shot();
+       kasan_set_tagging_report_once(false);
        return 0;
 }
 
 static void kasan_test_exit(struct kunit *test)
 {
+       kasan_set_tagging_report_once(true);
        kasan_restore_multi_shot(multishot);
 }
 
 /**
- * KUNIT_EXPECT_KASAN_FAIL() - Causes a test failure when the expression does
- * not cause a KASAN error. This uses a KUnit resource named "kasan_data." Do
- * Do not use this name for a KUnit resource outside here.
+ * KUNIT_EXPECT_KASAN_FAIL() - check that the executed expression produces a
+ * KASAN report; causes a test failure otherwise. This relies on a KUnit
+ * resource named "kasan_data". Do not use this name for KUnit resources
+ * outside of KASAN tests.
+ *
+ * For hardware tag-based KASAN, when a tag fault happens, tag checking is
+ * normally auto-disabled. When this happens, this test handler reenables
+ * tag checking. As tag checking can be only disabled or enabled per CPU, this
+ * handler disables migration (preemption).
  *
+ * Since the compiler doesn't see that the expression can change the fail_data
+ * fields, it can reorder or optimize away the accesses to those fields.
+ * Use READ/WRITE_ONCE() for the accesses and compiler barriers around the
+ * expression to prevent that.
  */
-#define KUNIT_EXPECT_KASAN_FAIL(test, condition) do { \
-       fail_data.report_expected = true; \
-       fail_data.report_found = false; \
-       kunit_add_named_resource(test, \
-                               NULL, \
-                               NULL, \
-                               &resource, \
-                               "kasan_data", &fail_data); \
-       condition; \
-       KUNIT_EXPECT_EQ(test, \
-                       fail_data.report_expected, \
-                       fail_data.report_found); \
+#define KUNIT_EXPECT_KASAN_FAIL(test, expression) do {         \
+       if (IS_ENABLED(CONFIG_KASAN_HW_TAGS))                   \
+               migrate_disable();                              \
+       WRITE_ONCE(fail_data.report_expected, true);            \
+       WRITE_ONCE(fail_data.report_found, false);              \
+       kunit_add_named_resource(test,                          \
+                               NULL,                           \
+                               NULL,                           \
+                               &resource,                      \
+                               "kasan_data", &fail_data);      \
+       barrier();                                              \
+       expression;                                             \
+       barrier();                                              \
+       KUNIT_EXPECT_EQ(test,                                   \
+                       READ_ONCE(fail_data.report_expected),   \
+                       READ_ONCE(fail_data.report_found));     \
+       if (IS_ENABLED(CONFIG_KASAN_HW_TAGS)) {                 \
+               if (READ_ONCE(fail_data.report_found))          \
+                       kasan_enable_tagging();                 \
+               migrate_enable();                               \
+       }                                                       \
+} while (0)
+
+#define KASAN_TEST_NEEDS_CONFIG_ON(test, config) do {                  \
+       if (!IS_ENABLED(config)) {                                      \
+               kunit_info((test), "skipping, " #config " required");   \
+               return;                                                 \
+       }                                                               \
+} while (0)
+
+#define KASAN_TEST_NEEDS_CONFIG_OFF(test, config) do {                 \
+       if (IS_ENABLED(config)) {                                       \
+               kunit_info((test), "skipping, " #config " enabled");    \
+               return;                                                 \
+       }                                                               \
 } while (0)
 
 static void kmalloc_oob_right(struct kunit *test)
@@ -111,23 +152,24 @@ static void kmalloc_node_oob_right(struct kunit *test)
        kfree(ptr);
 }
 
+/*
+ * These kmalloc_pagealloc_* tests try allocating a memory chunk that doesn't
+ * fit into a slab cache and therefore is allocated via the page allocator
+ * fallback. Since this kind of fallback is only implemented for SLUB, these
+ * tests are limited to that allocator.
+ */
 static void kmalloc_pagealloc_oob_right(struct kunit *test)
 {
        char *ptr;
        size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-       if (!IS_ENABLED(CONFIG_SLUB)) {
-               kunit_info(test, "CONFIG_SLUB is not enabled.");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
 
-       /* Allocate a chunk that does not fit into a SLUB cache to trigger
-        * the page allocator fallback.
-        */
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
        KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0);
+
        kfree(ptr);
 }
 
@@ -136,15 +178,12 @@ static void kmalloc_pagealloc_uaf(struct kunit *test)
        char *ptr;
        size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-       if (!IS_ENABLED(CONFIG_SLUB)) {
-               kunit_info(test, "CONFIG_SLUB is not enabled.");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
 
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
-
        kfree(ptr);
+
        KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
 }
 
@@ -153,10 +192,7 @@ static void kmalloc_pagealloc_invalid_free(struct kunit *test)
        char *ptr;
        size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-       if (!IS_ENABLED(CONFIG_SLUB)) {
-               kunit_info(test, "CONFIG_SLUB is not enabled.");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
 
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
@@ -164,11 +200,49 @@ static void kmalloc_pagealloc_invalid_free(struct kunit *test)
        KUNIT_EXPECT_KASAN_FAIL(test, kfree(ptr + 1));
 }
 
+static void pagealloc_oob_right(struct kunit *test)
+{
+       char *ptr;
+       struct page *pages;
+       size_t order = 4;
+       size_t size = (1UL << (PAGE_SHIFT + order));
+
+       /*
+        * With generic KASAN page allocations have no redzones, thus
+        * out-of-bounds detection is not guaranteed.
+        * See https://bugzilla.kernel.org/show_bug.cgi?id=210503.
+        */
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
+
+       pages = alloc_pages(GFP_KERNEL, order);
+       ptr = page_address(pages);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
+       free_pages((unsigned long)ptr, order);
+}
+
+static void pagealloc_uaf(struct kunit *test)
+{
+       char *ptr;
+       struct page *pages;
+       size_t order = 4;
+
+       pages = alloc_pages(GFP_KERNEL, order);
+       ptr = page_address(pages);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+       free_pages((unsigned long)ptr, order);
+
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
+}
+
 static void kmalloc_large_oob_right(struct kunit *test)
 {
        char *ptr;
        size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
-       /* Allocate a chunk that is large enough, but still fits into a slab
+
+       /*
+        * Allocate a chunk that is large enough, but still fits into a slab
         * and does not trigger the page allocator fallback in SLUB.
         */
        ptr = kmalloc(size, GFP_KERNEL);
@@ -178,11 +252,14 @@ static void kmalloc_large_oob_right(struct kunit *test)
        kfree(ptr);
 }
 
-static void kmalloc_oob_krealloc_more(struct kunit *test)
+static void krealloc_more_oob_helper(struct kunit *test,
+                                       size_t size1, size_t size2)
 {
        char *ptr1, *ptr2;
-       size_t size1 = 17;
-       size_t size2 = 19;
+       size_t middle;
+
+       KUNIT_ASSERT_LT(test, size1, size2);
+       middle = size1 + (size2 - size1) / 2;
 
        ptr1 = kmalloc(size1, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
@@ -190,15 +267,31 @@ static void kmalloc_oob_krealloc_more(struct kunit *test)
        ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
 
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
+       /* All offsets up to size2 must be accessible. */
+       ptr2[size1 - 1] = 'x';
+       ptr2[size1] = 'x';
+       ptr2[middle] = 'x';
+       ptr2[size2 - 1] = 'x';
+
+       /* Generic mode is precise, so unaligned size2 must be inaccessible. */
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
+
+       /* For all modes first aligned offset after size2 must be inaccessible. */
+       KUNIT_EXPECT_KASAN_FAIL(test,
+               ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
+
        kfree(ptr2);
 }
 
-static void kmalloc_oob_krealloc_less(struct kunit *test)
+static void krealloc_less_oob_helper(struct kunit *test,
+                                       size_t size1, size_t size2)
 {
        char *ptr1, *ptr2;
-       size_t size1 = 17;
-       size_t size2 = 15;
+       size_t middle;
+
+       KUNIT_ASSERT_LT(test, size2, size1);
+       middle = size2 + (size1 - size2) / 2;
 
        ptr1 = kmalloc(size1, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
@@ -206,10 +299,79 @@ static void kmalloc_oob_krealloc_less(struct kunit *test)
        ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
 
-       KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
+       /* Must be accessible for all modes. */
+       ptr2[size2 - 1] = 'x';
+
+       /* Generic mode is precise, so unaligned size2 must be inaccessible. */
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2] = 'x');
+
+       /* For all modes first aligned offset after size2 must be inaccessible. */
+       KUNIT_EXPECT_KASAN_FAIL(test,
+               ptr2[round_up(size2, KASAN_GRANULE_SIZE)] = 'x');
+
+       /*
+        * For all modes all size2, middle, and size1 should land in separate
+        * granules and thus the latter two offsets should be inaccessible.
+        */
+       KUNIT_EXPECT_LE(test, round_up(size2, KASAN_GRANULE_SIZE),
+                               round_down(middle, KASAN_GRANULE_SIZE));
+       KUNIT_EXPECT_LE(test, round_up(middle, KASAN_GRANULE_SIZE),
+                               round_down(size1, KASAN_GRANULE_SIZE));
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr2[middle] = 'x');
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1 - 1] = 'x');
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size1] = 'x');
+
        kfree(ptr2);
 }
 
+static void krealloc_more_oob(struct kunit *test)
+{
+       krealloc_more_oob_helper(test, 201, 235);
+}
+
+static void krealloc_less_oob(struct kunit *test)
+{
+       krealloc_less_oob_helper(test, 235, 201);
+}
+
+static void krealloc_pagealloc_more_oob(struct kunit *test)
+{
+       /* page_alloc fallback in only implemented for SLUB. */
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
+
+       krealloc_more_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 201,
+                                       KMALLOC_MAX_CACHE_SIZE + 235);
+}
+
+static void krealloc_pagealloc_less_oob(struct kunit *test)
+{
+       /* page_alloc fallback in only implemented for SLUB. */
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_SLUB);
+
+       krealloc_less_oob_helper(test, KMALLOC_MAX_CACHE_SIZE + 235,
+                                       KMALLOC_MAX_CACHE_SIZE + 201);
+}
+
+/*
+ * Check that krealloc() detects a use-after-free, returns NULL,
+ * and doesn't unpoison the freed object.
+ */
+static void krealloc_uaf(struct kunit *test)
+{
+       char *ptr1, *ptr2;
+       int size1 = 201;
+       int size2 = 235;
+
+       ptr1 = kmalloc(size1, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+       kfree(ptr1);
+
+       KUNIT_EXPECT_KASAN_FAIL(test, ptr2 = krealloc(ptr1, size2, GFP_KERNEL));
+       KUNIT_ASSERT_PTR_EQ(test, (void *)ptr2, NULL);
+       KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)ptr1);
+}
+
 static void kmalloc_oob_16(struct kunit *test)
 {
        struct {
@@ -217,10 +379,7 @@ static void kmalloc_oob_16(struct kunit *test)
        } *ptr1, *ptr2;
 
        /* This test is specifically crafted for the generic mode. */
-       if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
-               kunit_info(test, "CONFIG_KASAN_GENERIC required\n");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
 
        ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
@@ -355,7 +514,9 @@ static void kmalloc_uaf2(struct kunit *test)
 {
        char *ptr1, *ptr2;
        size_t size = 43;
+       int counter = 0;
 
+again:
        ptr1 = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
@@ -364,6 +525,15 @@ static void kmalloc_uaf2(struct kunit *test)
        ptr2 = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
 
+       /*
+        * For tag-based KASAN ptr1 and ptr2 tags might happen to be the same.
+        * Allow up to 16 attempts at generating different tags.
+        */
+       if (!IS_ENABLED(CONFIG_KASAN_GENERIC) && ptr1 == ptr2 && counter++ < 16) {
+               kfree(ptr2);
+               goto again;
+       }
+
        KUNIT_EXPECT_KASAN_FAIL(test, ptr1[40] = 'x');
        KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2);
 
@@ -402,10 +572,11 @@ static void kmem_cache_oob(struct kunit *test)
 {
        char *p;
        size_t size = 200;
-       struct kmem_cache *cache = kmem_cache_create("test_cache",
-                                               size, 0,
-                                               0, NULL);
+       struct kmem_cache *cache;
+
+       cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
        p = kmem_cache_alloc(cache, GFP_KERNEL);
        if (!p) {
                kunit_err(test, "Allocation failed: %s\n", __func__);
@@ -414,11 +585,12 @@ static void kmem_cache_oob(struct kunit *test)
        }
 
        KUNIT_EXPECT_KASAN_FAIL(test, *p = p[size + OOB_TAG_OFF]);
+
        kmem_cache_free(cache, p);
        kmem_cache_destroy(cache);
 }
 
-static void memcg_accounted_kmem_cache(struct kunit *test)
+static void kmem_cache_accounted(struct kunit *test)
 {
        int i;
        char *p;
@@ -445,6 +617,31 @@ free_cache:
        kmem_cache_destroy(cache);
 }
 
+static void kmem_cache_bulk(struct kunit *test)
+{
+       struct kmem_cache *cache;
+       size_t size = 200;
+       char *p[10];
+       bool ret;
+       int i;
+
+       cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
+       ret = kmem_cache_alloc_bulk(cache, GFP_KERNEL, ARRAY_SIZE(p), (void **)&p);
+       if (!ret) {
+               kunit_err(test, "Allocation failed: %s\n", __func__);
+               kmem_cache_destroy(cache);
+               return;
+       }
+
+       for (i = 0; i < ARRAY_SIZE(p); i++)
+               p[i][0] = p[i][size - 1] = 42;
+
+       kmem_cache_free_bulk(cache, ARRAY_SIZE(p), (void **)&p);
+       kmem_cache_destroy(cache);
+}
+
 static char global_array[10];
 
 static void kasan_global_oob(struct kunit *test)
@@ -453,14 +650,12 @@ static void kasan_global_oob(struct kunit *test)
        char *p = &global_array[ARRAY_SIZE(global_array) + i];
 
        /* Only generic mode instruments globals. */
-       if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
-               kunit_info(test, "CONFIG_KASAN_GENERIC required");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
 
        KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
+/* Check that ksize() makes the whole object accessible. */
 static void ksize_unpoisons_memory(struct kunit *test)
 {
        char *ptr;
@@ -469,11 +664,32 @@ static void ksize_unpoisons_memory(struct kunit *test)
        ptr = kmalloc(size, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
        real_size = ksize(ptr);
-       /* This access doesn't trigger an error. */
+
+       /* This access shouldn't trigger a KASAN report. */
        ptr[size] = 'x';
-       /* This one does. */
+
+       /* This one must. */
        KUNIT_EXPECT_KASAN_FAIL(test, ptr[real_size] = 'y');
+
+       kfree(ptr);
+}
+
+/*
+ * Check that a use-after-free is detected by ksize() and via normal accesses
+ * after it.
+ */
+static void ksize_uaf(struct kunit *test)
+{
+       char *ptr;
+       int size = 128 - KASAN_GRANULE_SIZE;
+
+       ptr = kmalloc(size, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
        kfree(ptr);
+
+       KUNIT_EXPECT_KASAN_FAIL(test, ksize(ptr));
+       KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *ptr);
+       KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = *(ptr + size));
 }
 
 static void kasan_stack_oob(struct kunit *test)
@@ -482,10 +698,7 @@ static void kasan_stack_oob(struct kunit *test)
        volatile int i = OOB_TAG_OFF;
        char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
 
-       if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
-               kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
 
        KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
@@ -497,15 +710,8 @@ static void kasan_alloca_oob_left(struct kunit *test)
        char *p = alloca_array - 1;
 
        /* Only generic mode instruments dynamic allocas. */
-       if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
-               kunit_info(test, "CONFIG_KASAN_GENERIC required");
-               return;
-       }
-
-       if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
-               kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
 
        KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
@@ -517,15 +723,8 @@ static void kasan_alloca_oob_right(struct kunit *test)
        char *p = alloca_array + i;
 
        /* Only generic mode instruments dynamic allocas. */
-       if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
-               kunit_info(test, "CONFIG_KASAN_GENERIC required");
-               return;
-       }
-
-       if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
-               kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_STACK);
 
        KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
@@ -568,7 +767,7 @@ static void kmem_cache_invalid_free(struct kunit *test)
                return;
        }
 
-       /* Trigger invalid free, the object doesn't get freed */
+       /* Trigger invalid free, the object doesn't get freed. */
        KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p + 1));
 
        /*
@@ -585,12 +784,11 @@ static void kasan_memchr(struct kunit *test)
        char *ptr;
        size_t size = 24;
 
-       /* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
-       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
-               kunit_info(test,
-                       "str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
-               return;
-       }
+       /*
+        * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT.
+        * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details.
+        */
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT);
 
        if (OOB_TAG_OFF)
                size = round_up(size, OOB_TAG_OFF);
@@ -610,12 +808,11 @@ static void kasan_memcmp(struct kunit *test)
        size_t size = 24;
        int arr[9];
 
-       /* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
-       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
-               kunit_info(test,
-                       "str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
-               return;
-       }
+       /*
+        * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT.
+        * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details.
+        */
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT);
 
        if (OOB_TAG_OFF)
                size = round_up(size, OOB_TAG_OFF);
@@ -634,12 +831,11 @@ static void kasan_strings(struct kunit *test)
        char *ptr;
        size_t size = 24;
 
-       /* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
-       if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
-               kunit_info(test,
-                       "str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
-               return;
-       }
+       /*
+        * str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT.
+        * See https://bugzilla.kernel.org/show_bug.cgi?id=206337 for details.
+        */
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_AMD_MEM_ENCRYPT);
 
        ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
@@ -700,13 +896,10 @@ static void kasan_bitops_generic(struct kunit *test)
        long *bits;
 
        /* This test is specifically crafted for the generic mode. */
-       if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) {
-               kunit_info(test, "CONFIG_KASAN_GENERIC required\n");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_GENERIC);
 
        /*
-        * Allocate 1 more byte, which causes kzalloc to round up to 16-bytes;
+        * Allocate 1 more byte, which causes kzalloc to round up to 16 bytes;
         * this way we do not actually corrupt other memory.
         */
        bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL);
@@ -731,19 +924,16 @@ static void kasan_bitops_tags(struct kunit *test)
 {
        long *bits;
 
-       /* This test is specifically crafted for the tag-based mode. */
-       if (IS_ENABLED(CONFIG_KASAN_GENERIC)) {
-               kunit_info(test, "CONFIG_KASAN_SW_TAGS required\n");
-               return;
-       }
+       /* This test is specifically crafted for tag-based modes. */
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
 
-       /* Allocation size will be rounded to up granule size, which is 16. */
-       bits = kzalloc(sizeof(*bits), GFP_KERNEL);
+       /* kmalloc-64 cache will be used and the last 16 bytes will be the redzone. */
+       bits = kzalloc(48, GFP_KERNEL);
        KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
 
-       /* Do the accesses past the 16 allocated bytes. */
-       kasan_bitops_modify(test, BITS_PER_LONG, &bits[1]);
-       kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, &bits[1]);
+       /* Do the accesses past the 48 allocated bytes, but within the redone. */
+       kasan_bitops_modify(test, BITS_PER_LONG, (void *)bits + 48);
+       kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, (void *)bits + 48);
 
        kfree(bits);
 }
@@ -764,10 +954,7 @@ static void vmalloc_oob(struct kunit *test)
 {
        void *area;
 
-       if (!IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
-               kunit_info(test, "CONFIG_KASAN_VMALLOC is not enabled.");
-               return;
-       }
+       KASAN_TEST_NEEDS_CONFIG_ON(test, CONFIG_KASAN_VMALLOC);
 
        /*
         * We have to be careful not to hit the guard page.
@@ -780,6 +967,94 @@ static void vmalloc_oob(struct kunit *test)
        vfree(area);
 }
 
+/*
+ * Check that the assigned pointer tag falls within the [KASAN_TAG_MIN,
+ * KASAN_TAG_KERNEL) range (note: excluding the match-all tag) for tag-based
+ * modes.
+ */
+static void match_all_not_assigned(struct kunit *test)
+{
+       char *ptr;
+       struct page *pages;
+       int i, size, order;
+
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
+
+       for (i = 0; i < 256; i++) {
+               size = (get_random_int() % 1024) + 1;
+               ptr = kmalloc(size, GFP_KERNEL);
+               KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+               KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
+               KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
+               kfree(ptr);
+       }
+
+       for (i = 0; i < 256; i++) {
+               order = (get_random_int() % 4) + 1;
+               pages = alloc_pages(GFP_KERNEL, order);
+               ptr = page_address(pages);
+               KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+               KUNIT_EXPECT_GE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_MIN);
+               KUNIT_EXPECT_LT(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
+               free_pages((unsigned long)ptr, order);
+       }
+}
+
+/* Check that 0xff works as a match-all pointer tag for tag-based modes. */
+static void match_all_ptr_tag(struct kunit *test)
+{
+       char *ptr;
+       u8 tag;
+
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
+
+       ptr = kmalloc(128, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+       /* Backup the assigned tag. */
+       tag = get_tag(ptr);
+       KUNIT_EXPECT_NE(test, tag, (u8)KASAN_TAG_KERNEL);
+
+       /* Reset the tag to 0xff.*/
+       ptr = set_tag(ptr, KASAN_TAG_KERNEL);
+
+       /* This access shouldn't trigger a KASAN report. */
+       *ptr = 0;
+
+       /* Recover the pointer tag and free. */
+       ptr = set_tag(ptr, tag);
+       kfree(ptr);
+}
+
+/* Check that there are no match-all memory tags for tag-based modes. */
+static void match_all_mem_tag(struct kunit *test)
+{
+       char *ptr;
+       int tag;
+
+       KASAN_TEST_NEEDS_CONFIG_OFF(test, CONFIG_KASAN_GENERIC);
+
+       ptr = kmalloc(128, GFP_KERNEL);
+       KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+       KUNIT_EXPECT_NE(test, (u8)get_tag(ptr), (u8)KASAN_TAG_KERNEL);
+
+       /* For each possible tag value not matching the pointer tag. */
+       for (tag = KASAN_TAG_MIN; tag <= KASAN_TAG_KERNEL; tag++) {
+               if (tag == get_tag(ptr))
+                       continue;
+
+               /* Mark the first memory granule with the chosen memory tag. */
+               kasan_poison(ptr, KASAN_GRANULE_SIZE, (u8)tag);
+
+               /* This access must cause a KASAN report. */
+               KUNIT_EXPECT_KASAN_FAIL(test, *ptr = 0);
+       }
+
+       /* Recover the memory tag and free. */
+       kasan_poison(ptr, KASAN_GRANULE_SIZE, get_tag(ptr));
+       kfree(ptr);
+}
+
 static struct kunit_case kasan_kunit_test_cases[] = {
        KUNIT_CASE(kmalloc_oob_right),
        KUNIT_CASE(kmalloc_oob_left),
@@ -787,9 +1062,14 @@ static struct kunit_case kasan_kunit_test_cases[] = {
        KUNIT_CASE(kmalloc_pagealloc_oob_right),
        KUNIT_CASE(kmalloc_pagealloc_uaf),
        KUNIT_CASE(kmalloc_pagealloc_invalid_free),
+       KUNIT_CASE(pagealloc_oob_right),
+       KUNIT_CASE(pagealloc_uaf),
        KUNIT_CASE(kmalloc_large_oob_right),
-       KUNIT_CASE(kmalloc_oob_krealloc_more),
-       KUNIT_CASE(kmalloc_oob_krealloc_less),
+       KUNIT_CASE(krealloc_more_oob),
+       KUNIT_CASE(krealloc_less_oob),
+       KUNIT_CASE(krealloc_pagealloc_more_oob),
+       KUNIT_CASE(krealloc_pagealloc_less_oob),
+       KUNIT_CASE(krealloc_uaf),
        KUNIT_CASE(kmalloc_oob_16),
        KUNIT_CASE(kmalloc_uaf_16),
        KUNIT_CASE(kmalloc_oob_in_memset),
@@ -804,12 +1084,14 @@ static struct kunit_case kasan_kunit_test_cases[] = {
        KUNIT_CASE(kfree_via_page),
        KUNIT_CASE(kfree_via_phys),
        KUNIT_CASE(kmem_cache_oob),
-       KUNIT_CASE(memcg_accounted_kmem_cache),
+       KUNIT_CASE(kmem_cache_accounted),
+       KUNIT_CASE(kmem_cache_bulk),
        KUNIT_CASE(kasan_global_oob),
        KUNIT_CASE(kasan_stack_oob),
        KUNIT_CASE(kasan_alloca_oob_left),
        KUNIT_CASE(kasan_alloca_oob_right),
        KUNIT_CASE(ksize_unpoisons_memory),
+       KUNIT_CASE(ksize_uaf),
        KUNIT_CASE(kmem_cache_double_free),
        KUNIT_CASE(kmem_cache_invalid_free),
        KUNIT_CASE(kasan_memchr),
@@ -819,6 +1101,9 @@ static struct kunit_case kasan_kunit_test_cases[] = {
        KUNIT_CASE(kasan_bitops_tags),
        KUNIT_CASE(kmalloc_double_kzfree),
        KUNIT_CASE(vmalloc_oob),
+       KUNIT_CASE(match_all_not_assigned),
+       KUNIT_CASE(match_all_ptr_tag),
+       KUNIT_CASE(match_all_mem_tag),
        {}
 };
 
index 3b4cc77..eee017f 100644 (file)
@@ -123,8 +123,9 @@ static noinline void __init kasan_workqueue_uaf(void)
 static int __init test_kasan_module_init(void)
 {
        /*
-        * Temporarily enable multi-shot mode. Otherwise, we'd only get a
-        * report for the first case.
+        * Temporarily enable multi-shot mode. Otherwise, KASAN would only
+        * report the first detected bug and panic the kernel if panic_on_warn
+        * is enabled.
         */
        bool multishot = kasan_save_enable_multi_shot();
 
index 5e5d935..7e7bbd0 100644 (file)
@@ -11,51 +11,6 @@ typedef void(*test_ubsan_fp)(void);
                        #config, IS_ENABLED(config) ? "y" : "n");       \
        } while (0)
 
-static void test_ubsan_add_overflow(void)
-{
-       volatile int val = INT_MAX;
-       volatile unsigned int uval = UINT_MAX;
-
-       UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
-       val += 2;
-
-       UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
-       uval += 2;
-}
-
-static void test_ubsan_sub_overflow(void)
-{
-       volatile int val = INT_MIN;
-       volatile unsigned int uval = 0;
-       volatile int val2 = 2;
-
-       UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
-       val -= val2;
-
-       UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
-       uval -= val2;
-}
-
-static void test_ubsan_mul_overflow(void)
-{
-       volatile int val = INT_MAX / 2;
-       volatile unsigned int uval = UINT_MAX / 2;
-
-       UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
-       val *= 3;
-
-       UBSAN_TEST(CONFIG_UBSAN_UNSIGNED_OVERFLOW);
-       uval *= 3;
-}
-
-static void test_ubsan_negate_overflow(void)
-{
-       volatile int val = INT_MIN;
-
-       UBSAN_TEST(CONFIG_UBSAN_SIGNED_OVERFLOW);
-       val = -val;
-}
-
 static void test_ubsan_divrem_overflow(void)
 {
        volatile int val = 16;
@@ -155,10 +110,6 @@ static void test_ubsan_object_size_mismatch(void)
 }
 
 static const test_ubsan_fp test_ubsan_array[] = {
-       test_ubsan_add_overflow,
-       test_ubsan_sub_overflow,
-       test_ubsan_mul_overflow,
-       test_ubsan_negate_overflow,
        test_ubsan_shift_out_of_bounds,
        test_ubsan_out_of_bounds,
        test_ubsan_load_invalid_value,
index 8294f43..8b1c318 100644 (file)
@@ -1530,24 +1530,24 @@ static noinline void check_store_range(struct xarray *xa)
 
 #ifdef CONFIG_XARRAY_MULTI
 static void check_split_1(struct xarray *xa, unsigned long index,
-                                                       unsigned int order)
+                               unsigned int order, unsigned int new_order)
 {
-       XA_STATE(xas, xa, index);
-       void *entry;
-       unsigned int i = 0;
+       XA_STATE_ORDER(xas, xa, index, new_order);
+       unsigned int i;
 
        xa_store_order(xa, index, order, xa, GFP_KERNEL);
 
        xas_split_alloc(&xas, xa, order, GFP_KERNEL);
        xas_lock(&xas);
        xas_split(&xas, xa, order);
+       for (i = 0; i < (1 << order); i += (1 << new_order))
+               __xa_store(xa, index + i, xa_mk_index(index + i), 0);
        xas_unlock(&xas);
 
-       xa_for_each(xa, index, entry) {
-               XA_BUG_ON(xa, entry != xa);
-               i++;
+       for (i = 0; i < (1 << order); i++) {
+               unsigned int val = index + (i & ~((1 << new_order) - 1));
+               XA_BUG_ON(xa, xa_load(xa, index + i) != xa_mk_index(val));
        }
-       XA_BUG_ON(xa, i != 1 << order);
 
        xa_set_mark(xa, index, XA_MARK_0);
        XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
@@ -1557,14 +1557,16 @@ static void check_split_1(struct xarray *xa, unsigned long index,
 
 static noinline void check_split(struct xarray *xa)
 {
-       unsigned int order;
+       unsigned int order, new_order;
 
        XA_BUG_ON(xa, !xa_empty(xa));
 
        for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
-               check_split_1(xa, 0, order);
-               check_split_1(xa, 1UL << order, order);
-               check_split_1(xa, 3UL << order, order);
+               for (new_order = 0; new_order < order; new_order++) {
+                       check_split_1(xa, 0, order, new_order);
+                       check_split_1(xa, 1UL << order, order, new_order);
+                       check_split_1(xa, 3UL << order, order, new_order);
+               }
        }
 }
 #else
index bec38c6..2622997 100644 (file)
@@ -163,74 +163,6 @@ static void ubsan_epilogue(void)
        }
 }
 
-static void handle_overflow(struct overflow_data *data, void *lhs,
-                       void *rhs, char op)
-{
-
-       struct type_descriptor *type = data->type;
-       char lhs_val_str[VALUE_LENGTH];
-       char rhs_val_str[VALUE_LENGTH];
-
-       if (suppress_report(&data->location))
-               return;
-
-       ubsan_prologue(&data->location, type_is_signed(type) ?
-                       "signed-integer-overflow" :
-                       "unsigned-integer-overflow");
-
-       val_to_string(lhs_val_str, sizeof(lhs_val_str), type, lhs);
-       val_to_string(rhs_val_str, sizeof(rhs_val_str), type, rhs);
-       pr_err("%s %c %s cannot be represented in type %s\n",
-               lhs_val_str,
-               op,
-               rhs_val_str,
-               type->type_name);
-
-       ubsan_epilogue();
-}
-
-void __ubsan_handle_add_overflow(void *data,
-                               void *lhs, void *rhs)
-{
-
-       handle_overflow(data, lhs, rhs, '+');
-}
-EXPORT_SYMBOL(__ubsan_handle_add_overflow);
-
-void __ubsan_handle_sub_overflow(void *data,
-                               void *lhs, void *rhs)
-{
-       handle_overflow(data, lhs, rhs, '-');
-}
-EXPORT_SYMBOL(__ubsan_handle_sub_overflow);
-
-void __ubsan_handle_mul_overflow(void *data,
-                               void *lhs, void *rhs)
-{
-       handle_overflow(data, lhs, rhs, '*');
-}
-EXPORT_SYMBOL(__ubsan_handle_mul_overflow);
-
-void __ubsan_handle_negate_overflow(void *_data, void *old_val)
-{
-       struct overflow_data *data = _data;
-       char old_val_str[VALUE_LENGTH];
-
-       if (suppress_report(&data->location))
-               return;
-
-       ubsan_prologue(&data->location, "negation-overflow");
-
-       val_to_string(old_val_str, sizeof(old_val_str), data->type, old_val);
-
-       pr_err("negation of %s cannot be represented in type %s:\n",
-               old_val_str, data->type->type_name);
-
-       ubsan_epilogue();
-}
-EXPORT_SYMBOL(__ubsan_handle_negate_overflow);
-
-
 void __ubsan_handle_divrem_overflow(void *_data, void *lhs, void *rhs)
 {
        struct overflow_data *data = _data;
index 5fa5161..f5d8f54 100644 (file)
@@ -987,7 +987,7 @@ static void node_set_marks(struct xa_node *node, unsigned int offset,
  * xas_split_alloc() - Allocate memory for splitting an entry.
  * @xas: XArray operation state.
  * @entry: New entry which will be stored in the array.
- * @order: New entry order.
+ * @order: Current entry order.
  * @gfp: Memory allocation flags.
  *
  * This function should be called before calling xas_split().
@@ -1011,7 +1011,7 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
 
        do {
                unsigned int i;
-               void *sibling;
+               void *sibling = NULL;
                struct xa_node *node;
 
                node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
@@ -1021,7 +1021,7 @@ void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
                for (i = 0; i < XA_CHUNK_SIZE; i++) {
                        if ((i & mask) == 0) {
                                RCU_INIT_POINTER(node->slots[i], entry);
-                               sibling = xa_mk_sibling(0);
+                               sibling = xa_mk_sibling(i);
                        } else {
                                RCU_INIT_POINTER(node->slots[i], sibling);
                        }
@@ -1041,9 +1041,10 @@ EXPORT_SYMBOL_GPL(xas_split_alloc);
  * xas_split() - Split a multi-index entry into smaller entries.
  * @xas: XArray operation state.
  * @entry: New entry to store in the array.
- * @order: New entry order.
+ * @order: Current entry order.
  *
- * The value in the entry is copied to all the replacement entries.
+ * The size of the new entries is set in @xas.  The value in @entry is
+ * copied to all the replacement entries.
  *
  * Context: Any context.  The caller should hold the xa_lock.
  */
index 135bbb6..72227b2 100644 (file)
@@ -81,6 +81,7 @@ obj-$(CONFIG_PAGE_POISONING) += page_poison.o
 obj-$(CONFIG_SLAB) += slab.o
 obj-$(CONFIG_SLUB) += slub.o
 obj-$(CONFIG_KASAN)    += kasan/
+obj-$(CONFIG_KFENCE) += kfence/
 obj-$(CONFIG_FAILSLAB) += failslab.o
 obj-$(CONFIG_MEMORY_HOTPLUG) += memory_hotplug.o
 obj-$(CONFIG_MEMTEST)          += memtest.o
index e337975..576220a 100644 (file)
@@ -8,6 +8,7 @@
 #include <linux/fs.h>
 #include <linux/pagemap.h>
 #include <linux/mm.h>
+#include <linux/sched/mm.h>
 #include <linux/sched.h>
 #include <linux/module.h>
 #include <linux/writeback.h>
@@ -32,6 +33,8 @@ LIST_HEAD(bdi_list);
 /* bdi_wq serves all asynchronous writeback tasks */
 struct workqueue_struct *bdi_wq;
 
+#define K(x) ((x) << (PAGE_SHIFT - 10))
+
 #ifdef CONFIG_DEBUG_FS
 #include <linux/debugfs.h>
 #include <linux/seq_file.h>
@@ -69,7 +72,6 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
        global_dirty_limits(&background_thresh, &dirty_thresh);
        wb_thresh = wb_calc_thresh(wb, dirty_thresh);
 
-#define K(x) ((x) << (PAGE_SHIFT - 10))
        seq_printf(m,
                   "BdiWriteback:       %10lu kB\n"
                   "BdiReclaimable:     %10lu kB\n"
@@ -98,7 +100,6 @@ static int bdi_debug_stats_show(struct seq_file *m, void *v)
                   nr_more_io,
                   nr_dirty_time,
                   !list_empty(&bdi->bdi_list), bdi->wb.state);
-#undef K
 
        return 0;
 }
@@ -146,8 +147,6 @@ static ssize_t read_ahead_kb_store(struct device *dev,
        return count;
 }
 
-#define K(pages) ((pages) << (PAGE_SHIFT - 10))
-
 #define BDI_SHOW(name, expr)                                           \
 static ssize_t name##_show(struct device *dev,                         \
                           struct device_attribute *attr, char *buf)    \
@@ -580,7 +579,7 @@ struct bdi_writeback *wb_get_create(struct backing_dev_info *bdi,
 {
        struct bdi_writeback *wb;
 
-       might_sleep_if(gfpflags_allow_blocking(gfp));
+       might_alloc(gfp);
 
        if (!memcg_css->parent)
                return &bdi->wb;
index 20c4f6f..54eee21 100644 (file)
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -94,34 +94,29 @@ static void cma_clear_bitmap(struct cma *cma, unsigned long pfn,
 
 static void __init cma_activate_area(struct cma *cma)
 {
-       unsigned long base_pfn = cma->base_pfn, pfn = base_pfn;
-       unsigned i = cma->count >> pageblock_order;
+       unsigned long base_pfn = cma->base_pfn, pfn;
        struct zone *zone;
 
        cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL);
        if (!cma->bitmap)
                goto out_error;
 
-       WARN_ON_ONCE(!pfn_valid(pfn));
-       zone = page_zone(pfn_to_page(pfn));
-
-       do {
-               unsigned j;
-
-               base_pfn = pfn;
-               for (j = pageblock_nr_pages; j; --j, pfn++) {
-                       WARN_ON_ONCE(!pfn_valid(pfn));
-                       /*
-                        * alloc_contig_range requires the pfn range
-                        * specified to be in the same zone. Make this
-                        * simple by forcing the entire CMA resv range
-                        * to be in the same zone.
-                        */
-                       if (page_zone(pfn_to_page(pfn)) != zone)
-                               goto not_in_zone;
-               }
-               init_cma_reserved_pageblock(pfn_to_page(base_pfn));
-       } while (--i);
+       /*
+        * alloc_contig_range() requires the pfn range specified to be in the
+        * same zone. Simplify by forcing the entire CMA resv range to be in the
+        * same zone.
+        */
+       WARN_ON_ONCE(!pfn_valid(base_pfn));
+       zone = page_zone(pfn_to_page(base_pfn));
+       for (pfn = base_pfn + 1; pfn < base_pfn + cma->count; pfn++) {
+               WARN_ON_ONCE(!pfn_valid(pfn));
+               if (page_zone(pfn_to_page(pfn)) != zone)
+                       goto not_in_zone;
+       }
+
+       for (pfn = base_pfn; pfn < base_pfn + cma->count;
+            pfn += pageblock_nr_pages)
+               init_cma_reserved_pageblock(pfn_to_page(pfn));
 
        mutex_init(&cma->lock);
 
@@ -135,6 +130,10 @@ static void __init cma_activate_area(struct cma *cma)
 not_in_zone:
        bitmap_free(cma->bitmap);
 out_error:
+       /* Expose all pages to the buddy, they are useless for CMA. */
+       for (pfn = base_pfn; pfn < base_pfn + cma->count; pfn++)
+               free_reserved_page(pfn_to_page(pfn));
+       totalcma_pages -= cma->count;
        cma->count = 0;
        pr_err("CMA area %s could not be activated\n", cma->name);
        return;
@@ -336,6 +335,23 @@ int __init cma_declare_contiguous_nid(phys_addr_t base,
                        limit = highmem_start;
                }
 
+               /*
+                * If there is enough memory, try a bottom-up allocation first.
+                * It will place the new cma area close to the start of the node
+                * and guarantee that the compaction is moving pages out of the
+                * cma area and not into it.
+                * Avoid using first 4GB to not interfere with constrained zones
+                * like DMA/DMA32.
+                */
+#ifdef CONFIG_PHYS_ADDR_T_64BIT
+               if (!memblock_bottom_up() && memblock_end >= SZ_4G + size) {
+                       memblock_set_bottom_up(true);
+                       addr = memblock_alloc_range_nid(size, alignment, SZ_4G,
+                                                       limit, nid, true);
+                       memblock_set_bottom_up(false);
+               }
+#endif
+
                if (!addr) {
                        addr = memblock_alloc_range_nid(size, alignment, base,
                                        limit, nid, true);
@@ -484,8 +500,8 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
        }
 
        if (ret && !no_warn) {
-               pr_err("%s: alloc failed, req-size: %zu pages, ret: %d\n",
-                       __func__, count, ret);
+               pr_err("%s: %s: alloc failed, req-size: %zu pages, ret: %d\n",
+                      __func__, cma->name, count, ret);
                cma_debug_show_areas(cma);
        }
 
index 190ccda..e04f447 100644 (file)
@@ -137,7 +137,6 @@ EXPORT_SYMBOL(__SetPageMovable);
 
 void __ClearPageMovable(struct page *page)
 {
-       VM_BUG_ON_PAGE(!PageLocked(page), page);
        VM_BUG_ON_PAGE(!PageMovable(page), page);
        /*
         * Clear registered address_space val with keeping PAGE_MAPPING_MOVABLE
@@ -988,14 +987,13 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                if (unlikely(!get_page_unless_zero(page)))
                        goto isolate_fail;
 
-               if (__isolate_lru_page_prepare(page, isolate_mode) != 0)
+               if (!__isolate_lru_page_prepare(page, isolate_mode))
                        goto isolate_fail_put;
 
                /* Try isolate the page */
                if (!TestClearPageLRU(page))
                        goto isolate_fail_put;
 
-               rcu_read_lock();
                lruvec = mem_cgroup_page_lruvec(page, pgdat);
 
                /* If we already hold the lock, we can skip some rechecking */
@@ -1005,7 +1003,6 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 
                        compact_lock_irqsave(&lruvec->lru_lock, &flags, cc);
                        locked = lruvec;
-                       rcu_read_unlock();
 
                        lruvec_memcg_debug(lruvec, page);
 
@@ -1026,15 +1023,14 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
                                SetPageLRU(page);
                                goto isolate_fail_put;
                        }
-               } else
-                       rcu_read_unlock();
+               }
 
                /* The whole page is taken off the LRU; skip the tail pages. */
                if (PageCompound(page))
                        low_pfn += compound_nr(page) - 1;
 
                /* Successfully isolated */
-               del_page_from_lru_list(page, lruvec, page_lru(page));
+               del_page_from_lru_list(page, lruvec);
                mod_node_page_state(page_pgdat(page),
                                NR_ISOLATED_ANON + page_is_file_lru(page),
                                thp_nr_pages(page));
@@ -1288,7 +1284,7 @@ static void
 fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long nr_isolated)
 {
        unsigned long start_pfn, end_pfn;
-       struct page *page = pfn_to_page(pfn);
+       struct page *page;
 
        /* Do not search around if there are enough pages already */
        if (cc->nr_freepages >= cc->nr_migratepages)
@@ -1299,8 +1295,12 @@ fast_isolate_around(struct compact_control *cc, unsigned long pfn, unsigned long
                return;
 
        /* Pageblock boundaries */
-       start_pfn = pageblock_start_pfn(pfn);
-       end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone)) - 1;
+       start_pfn = max(pageblock_start_pfn(pfn), cc->zone->zone_start_pfn);
+       end_pfn = min(pageblock_end_pfn(pfn), zone_end_pfn(cc->zone));
+
+       page = pageblock_pfn_to_page(start_pfn, end_pfn, cc->zone);
+       if (!page)
+               return;
 
        /* Scan before */
        if (start_pfn != pfn) {
@@ -1402,7 +1402,8 @@ fast_isolate_freepages(struct compact_control *cc)
                        pfn = page_to_pfn(freepage);
 
                        if (pfn >= highest)
-                               highest = pageblock_start_pfn(pfn);
+                               highest = max(pageblock_start_pfn(pfn),
+                                             cc->zone->zone_start_pfn);
 
                        if (pfn >= low_pfn) {
                                cc->fast_search_fail = 0;
@@ -1472,7 +1473,8 @@ fast_isolate_freepages(struct compact_control *cc)
                        } else {
                                if (cc->direct_compaction && pfn_valid(min_pfn)) {
                                        page = pageblock_pfn_to_page(min_pfn,
-                                               pageblock_end_pfn(min_pfn),
+                                               min(pageblock_end_pfn(min_pfn),
+                                                   zone_end_pfn(cc->zone)),
                                                cc->zone);
                                        cc->free_pfn = min_pfn;
                                }
@@ -1702,6 +1704,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
        unsigned long pfn = cc->migrate_pfn;
        unsigned long high_pfn;
        int order;
+       bool found_block = false;
 
        /* Skip hints are relied on to avoid repeats on the fast search */
        if (cc->ignore_skip_hint)
@@ -1744,7 +1747,7 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
        high_pfn = pageblock_start_pfn(cc->migrate_pfn + distance);
 
        for (order = cc->order - 1;
-            order >= PAGE_ALLOC_COSTLY_ORDER && pfn == cc->migrate_pfn && nr_scanned < limit;
+            order >= PAGE_ALLOC_COSTLY_ORDER && !found_block && nr_scanned < limit;
             order--) {
                struct free_area *area = &cc->zone->free_area[order];
                struct list_head *freelist;
@@ -1759,7 +1762,11 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
                list_for_each_entry(freepage, freelist, lru) {
                        unsigned long free_pfn;
 
-                       nr_scanned++;
+                       if (nr_scanned++ >= limit) {
+                               move_freelist_tail(freelist, freepage);
+                               break;
+                       }
+
                        free_pfn = page_to_pfn(freepage);
                        if (free_pfn < high_pfn) {
                                /*
@@ -1768,12 +1775,8 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
                                 * the list assumes an entry is deleted, not
                                 * reordered.
                                 */
-                               if (get_pageblock_skip(freepage)) {
-                                       if (list_is_last(freelist, &freepage->lru))
-                                               break;
-
+                               if (get_pageblock_skip(freepage))
                                        continue;
-                               }
 
                                /* Reorder to so a future search skips recent pages */
                                move_freelist_tail(freelist, freepage);
@@ -1781,15 +1784,10 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
                                update_fast_start_pfn(cc, free_pfn);
                                pfn = pageblock_start_pfn(free_pfn);
                                cc->fast_search_fail = 0;
+                               found_block = true;
                                set_pageblock_skip(freepage);
                                break;
                        }
-
-                       if (nr_scanned >= limit) {
-                               cc->fast_search_fail++;
-                               move_freelist_tail(freelist, freepage);
-                               break;
-                       }
                }
                spin_unlock_irqrestore(&cc->zone->lock, flags);
        }
@@ -1800,9 +1798,10 @@ static unsigned long fast_find_migrateblock(struct compact_control *cc)
         * If fast scanning failed then use a cached entry for a page block
         * that had free pages as the basis for starting a linear scan.
         */
-       if (pfn == cc->migrate_pfn)
+       if (!found_block) {
+               cc->fast_search_fail++;
                pfn = reinit_migrate_pfn(cc);
-
+       }
        return pfn;
 }
 
@@ -1926,20 +1925,28 @@ static bool kswapd_is_running(pg_data_t *pgdat)
 
 /*
  * A zone's fragmentation score is the external fragmentation wrt to the
- * COMPACTION_HPAGE_ORDER scaled by the zone's size. It returns a value
- * in the range [0, 100].
+ * COMPACTION_HPAGE_ORDER. It returns a value in the range [0, 100].
+ */
+static unsigned int fragmentation_score_zone(struct zone *zone)
+{
+       return extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
+}
+
+/*
+ * A weighted zone's fragmentation score is the external fragmentation
+ * wrt to the COMPACTION_HPAGE_ORDER scaled by the zone's size. It
+ * returns a value in the range [0, 100].
  *
  * The scaling factor ensures that proactive compaction focuses on larger
  * zones like ZONE_NORMAL, rather than smaller, specialized zones like
  * ZONE_DMA32. For smaller zones, the score value remains close to zero,
  * and thus never exceeds the high threshold for proactive compaction.
  */
-static unsigned int fragmentation_score_zone(struct zone *zone)
+static unsigned int fragmentation_score_zone_weighted(struct zone *zone)
 {
        unsigned long score;
 
-       score = zone->present_pages *
-                       extfrag_for_order(zone, COMPACTION_HPAGE_ORDER);
+       score = zone->present_pages * fragmentation_score_zone(zone);
        return div64_ul(score, zone->zone_pgdat->node_present_pages + 1);
 }
 
@@ -1959,7 +1966,7 @@ static unsigned int fragmentation_score_node(pg_data_t *pgdat)
                struct zone *zone;
 
                zone = &pgdat->node_zones[zoneid];
-               score += fragmentation_score_zone(zone);
+               score += fragmentation_score_zone_weighted(zone);
        }
 
        return score;
index 8a40b3f..0bdda84 100644 (file)
@@ -110,6 +110,11 @@ void __dump_page(struct page *page, const char *reason)
                                        head_compound_mapcount(head));
                }
        }
+
+#ifdef CONFIG_MEMCG
+       if (head->memcg_data)
+               pr_warn("memcg:%lx\n", head->memcg_data);
+#endif
        if (PageKsm(page))
                type = "ksm ";
        else if (PageAnon(page))
@@ -180,11 +185,6 @@ hex_only:
 
        if (reason)
                pr_warn("page dumped because: %s\n", reason);
-
-#ifdef CONFIG_MEMCG
-       if (!page_poisoned && page->memcg_data)
-               pr_warn("pages's memcg:%lx\n", page->memcg_data);
-#endif
 }
 
 void dump_page(struct page *page, const char *reason)
index c05d9dc..a9bd6ce 100644 (file)
 #define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
 #define RANDOM_NZVALUE GENMASK(7, 0)
 
-static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
+static void __init pte_basic_tests(unsigned long pfn, int idx)
 {
+       pgprot_t prot = protection_map[idx];
        pte_t pte = pfn_pte(pfn, prot);
+       unsigned long val = idx, *ptr = &val;
+
+       pr_debug("Validating PTE basic (%pGv)\n", ptr);
+
+       /*
+        * This test needs to be executed after the given page table entry
+        * is created with pfn_pte() to make sure that protection_map[idx]
+        * does not have the dirty bit enabled from the beginning. This is
+        * important for platforms like arm64 where (!PTE_RDONLY) indicate
+        * dirty bit being set.
+        */
+       WARN_ON(pte_dirty(pte_wrprotect(pte)));
 
-       pr_debug("Validating PTE basic\n");
        WARN_ON(!pte_same(pte, pte));
        WARN_ON(!pte_young(pte_mkyoung(pte_mkold(pte))));
        WARN_ON(!pte_dirty(pte_mkdirty(pte_mkclean(pte))));
@@ -70,6 +82,8 @@ static void __init pte_basic_tests(unsigned long pfn, pgprot_t prot)
        WARN_ON(pte_young(pte_mkold(pte_mkyoung(pte))));
        WARN_ON(pte_dirty(pte_mkclean(pte_mkdirty(pte))));
        WARN_ON(pte_write(pte_wrprotect(pte_mkwrite(pte))));
+       WARN_ON(pte_dirty(pte_wrprotect(pte_mkclean(pte))));
+       WARN_ON(!pte_dirty(pte_wrprotect(pte_mkdirty(pte))));
 }
 
 static void __init pte_advanced_tests(struct mm_struct *mm,
@@ -129,14 +143,27 @@ static void __init pte_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
+static void __init pmd_basic_tests(unsigned long pfn, int idx)
 {
+       pgprot_t prot = protection_map[idx];
        pmd_t pmd = pfn_pmd(pfn, prot);
+       unsigned long val = idx, *ptr = &val;
 
        if (!has_transparent_hugepage())
                return;
 
-       pr_debug("Validating PMD basic\n");
+       pr_debug("Validating PMD basic (%pGv)\n", ptr);
+
+       /*
+        * This test needs to be executed after the given page table entry
+        * is created with pfn_pmd() to make sure that protection_map[idx]
+        * does not have the dirty bit enabled from the beginning. This is
+        * important for platforms like arm64 where (!PTE_RDONLY) indicate
+        * dirty bit being set.
+        */
+       WARN_ON(pmd_dirty(pmd_wrprotect(pmd)));
+
+
        WARN_ON(!pmd_same(pmd, pmd));
        WARN_ON(!pmd_young(pmd_mkyoung(pmd_mkold(pmd))));
        WARN_ON(!pmd_dirty(pmd_mkdirty(pmd_mkclean(pmd))));
@@ -144,6 +171,8 @@ static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot)
        WARN_ON(pmd_young(pmd_mkold(pmd_mkyoung(pmd))));
        WARN_ON(pmd_dirty(pmd_mkclean(pmd_mkdirty(pmd))));
        WARN_ON(pmd_write(pmd_wrprotect(pmd_mkwrite(pmd))));
+       WARN_ON(pmd_dirty(pmd_wrprotect(pmd_mkclean(pmd))));
+       WARN_ON(!pmd_dirty(pmd_wrprotect(pmd_mkdirty(pmd))));
        /*
         * A huge page does not point to next level page table
         * entry. Hence this must qualify as pmd_bad().
@@ -249,19 +278,35 @@ static void __init pmd_savedwrite_tests(unsigned long pfn, pgprot_t prot)
 }
 
 #ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot)
+static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx)
 {
+       pgprot_t prot = protection_map[idx];
        pud_t pud = pfn_pud(pfn, prot);
+       unsigned long val = idx, *ptr = &val;
 
        if (!has_transparent_hugepage())
                return;
 
-       pr_debug("Validating PUD basic\n");
+       pr_debug("Validating PUD basic (%pGv)\n", ptr);
+
+       /*
+        * This test needs to be executed after the given page table entry
+        * is created with pfn_pud() to make sure that protection_map[idx]
+        * does not have the dirty bit enabled from the beginning. This is
+        * important for platforms like arm64 where (!PTE_RDONLY) indicate
+        * dirty bit being set.
+        */
+       WARN_ON(pud_dirty(pud_wrprotect(pud)));
+
        WARN_ON(!pud_same(pud, pud));
        WARN_ON(!pud_young(pud_mkyoung(pud_mkold(pud))));
+       WARN_ON(!pud_dirty(pud_mkdirty(pud_mkclean(pud))));
+       WARN_ON(pud_dirty(pud_mkclean(pud_mkdirty(pud))));
        WARN_ON(!pud_write(pud_mkwrite(pud_wrprotect(pud))));
        WARN_ON(pud_write(pud_wrprotect(pud_mkwrite(pud))));
        WARN_ON(pud_young(pud_mkold(pud_mkyoung(pud))));
+       WARN_ON(pud_dirty(pud_wrprotect(pud_mkclean(pud))));
+       WARN_ON(!pud_dirty(pud_wrprotect(pud_mkdirty(pud))));
 
        if (mm_pmd_folded(mm))
                return;
@@ -359,7 +404,7 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
 #endif /* !CONFIG_HAVE_ARCH_HUGE_VMAP */
 
 #else  /* !CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
-static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { }
 static void __init pud_advanced_tests(struct mm_struct *mm,
                                      struct vm_area_struct *vma, pud_t *pudp,
                                      unsigned long pfn, unsigned long vaddr,
@@ -372,8 +417,8 @@ static void __init pud_huge_tests(pud_t *pudp, unsigned long pfn, pgprot_t prot)
 }
 #endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */
 #else  /* !CONFIG_TRANSPARENT_HUGEPAGE */
-static void __init pmd_basic_tests(unsigned long pfn, pgprot_t prot) { }
-static void __init pud_basic_tests(unsigned long pfn, pgprot_t prot) { }
+static void __init pmd_basic_tests(unsigned long pfn, int idx) { }
+static void __init pud_basic_tests(struct mm_struct *mm, unsigned long pfn, int idx) { }
 static void __init pmd_advanced_tests(struct mm_struct *mm,
                                      struct vm_area_struct *vma, pmd_t *pmdp,
                                      unsigned long pfn, unsigned long vaddr,
@@ -899,6 +944,7 @@ static int __init debug_vm_pgtable(void)
        unsigned long vaddr, pte_aligned, pmd_aligned;
        unsigned long pud_aligned, p4d_aligned, pgd_aligned;
        spinlock_t *ptl = NULL;
+       int idx;
 
        pr_info("Validating architecture page table helpers\n");
        prot = vm_get_page_prot(VMFLAGS);
@@ -963,9 +1009,25 @@ static int __init debug_vm_pgtable(void)
        saved_pmdp = pmd_offset(pudp, 0UL);
        saved_ptep = pmd_pgtable(pmd);
 
-       pte_basic_tests(pte_aligned, prot);
-       pmd_basic_tests(pmd_aligned, prot);
-       pud_basic_tests(pud_aligned, prot);
+       /*
+        * Iterate over the protection_map[] to make sure that all
+        * the basic page table transformation validations just hold
+        * true irrespective of the starting protection value for a
+        * given page table entry.
+        */
+       for (idx = 0; idx < ARRAY_SIZE(protection_map); idx++) {
+               pte_basic_tests(pte_aligned, idx);
+               pmd_basic_tests(pmd_aligned, idx);
+               pud_basic_tests(mm, pud_aligned, idx);
+       }
+
+       /*
+        * Both P4D and PGD level tests are very basic which do not
+        * involve creating page table entries from the protection
+        * value and the given pfn. Hence just keep them out from
+        * the above iteration for now to save some test execution
+        * time.
+        */
        p4d_basic_tests(p4d_aligned, prot);
        pgd_basic_tests(pgd_aligned, prot);
 
index a97c972..f379153 100644 (file)
@@ -28,6 +28,7 @@
 #include <linux/mutex.h>
 #include <linux/poison.h>
 #include <linux/sched.h>
+#include <linux/sched/mm.h>
 #include <linux/slab.h>
 #include <linux/stat.h>
 #include <linux/spinlock.h>
@@ -319,7 +320,7 @@ void *dma_pool_alloc(struct dma_pool *pool, gfp_t mem_flags,
        size_t offset;
        void *retval;
 
-       might_sleep_if(gfpflags_allow_blocking(mem_flags));
+       might_alloc(mem_flags);
 
        spin_lock_irqsave(&pool->lock, flags);
        list_for_each_entry(page, &pool->page_list, page_list) {
index a0018ad..164607c 100644 (file)
@@ -181,17 +181,17 @@ void __init early_iounmap(void __iomem *addr, unsigned long size)
                }
        }
 
-       if (WARN(slot < 0, "early_iounmap(%p, %08lx) not found slot\n",
-                addr, size))
+       if (WARN(slot < 0, "%s(%p, %08lx) not found slot\n",
+                 __func__, addr, size))
                return;
 
        if (WARN(prev_size[slot] != size,
-                "early_iounmap(%p, %08lx) [%d] size not consistent %08lx\n",
-                addr, size, slot, prev_size[slot]))
+                "%s(%p, %08lx) [%d] size not consistent %08lx\n",
+                 __func__, addr, size, slot, prev_size[slot]))
                return;
 
-       WARN(early_ioremap_debug, "early_iounmap(%p, %08lx) [%d]\n",
-            addr, size, slot);
+       WARN(early_ioremap_debug, "%s(%p, %08lx) [%d]\n",
+             __func__, addr, size, slot);
 
        virt_addr = (unsigned long)addr;
        if (WARN_ON(virt_addr < fix_to_virt(FIX_BTMAP_BEGIN)))
index 6ff2a3f..4370048 100644 (file)
@@ -206,9 +206,9 @@ static void unaccount_page_cache_page(struct address_space *mapping,
        if (PageSwapBacked(page)) {
                __mod_lruvec_page_state(page, NR_SHMEM, -nr);
                if (PageTransHuge(page))
-                       __dec_lruvec_page_state(page, NR_SHMEM_THPS);
+                       __mod_lruvec_page_state(page, NR_SHMEM_THPS, -nr);
        } else if (PageTransHuge(page)) {
-               __dec_lruvec_page_state(page, NR_FILE_THPS);
+               __mod_lruvec_page_state(page, NR_FILE_THPS, -nr);
                filemap_nr_thps_dec(mapping);
        }
 
@@ -777,7 +777,6 @@ EXPORT_SYMBOL(file_write_and_wait_range);
  * replace_page_cache_page - replace a pagecache page with a new one
  * @old:       page to be replaced
  * @new:       page to replace with
- * @gfp_mask:  allocation mode
  *
  * This function replaces a page in the pagecache with a new one.  On
  * success it acquires the pagecache reference for the new page and
@@ -786,10 +785,8 @@ EXPORT_SYMBOL(file_write_and_wait_range);
  * caller must do that.
  *
  * The remove + add is atomic.  This function cannot fail.
- *
- * Return: %0
  */
-int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
+void replace_page_cache_page(struct page *old, struct page *new)
 {
        struct address_space *mapping = old->mapping;
        void (*freepage)(struct page *) = mapping->a_ops->freepage;
@@ -824,8 +821,6 @@ int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask)
        if (freepage)
                freepage(old);
        put_page(old);
-
-       return 0;
 }
 EXPORT_SYMBOL_GPL(replace_page_cache_page);
 
@@ -1348,61 +1343,26 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 }
 EXPORT_SYMBOL(wait_on_page_bit_killable);
 
-static int __wait_on_page_locked_async(struct page *page,
-                                      struct wait_page_queue *wait, bool set)
-{
-       struct wait_queue_head *q = page_waitqueue(page);
-       int ret = 0;
-
-       wait->page = page;
-       wait->bit_nr = PG_locked;
-
-       spin_lock_irq(&q->lock);
-       __add_wait_queue_entry_tail(q, &wait->wait);
-       SetPageWaiters(page);
-       if (set)
-               ret = !trylock_page(page);
-       else
-               ret = PageLocked(page);
-       /*
-        * If we were successful now, we know we're still on the
-        * waitqueue as we're still under the lock. This means it's
-        * safe to remove and return success, we know the callback
-        * isn't going to trigger.
-        */
-       if (!ret)
-               __remove_wait_queue(q, &wait->wait);
-       else
-               ret = -EIOCBQUEUED;
-       spin_unlock_irq(&q->lock);
-       return ret;
-}
-
-static int wait_on_page_locked_async(struct page *page,
-                                    struct wait_page_queue *wait)
-{
-       if (!PageLocked(page))
-               return 0;
-       return __wait_on_page_locked_async(compound_head(page), wait, false);
-}
-
 /**
  * put_and_wait_on_page_locked - Drop a reference and wait for it to be unlocked
  * @page: The page to wait for.
+ * @state: The sleep state (TASK_KILLABLE, TASK_UNINTERRUPTIBLE, etc).
  *
  * The caller should hold a reference on @page.  They expect the page to
  * become unlocked relatively soon, but do not wish to hold up migration
  * (for example) by holding the reference while waiting for the page to
  * come unlocked.  After this function returns, the caller should not
  * dereference @page.
+ *
+ * Return: 0 if the page was unlocked or -EINTR if interrupted by a signal.
  */
-void put_and_wait_on_page_locked(struct page *page)
+int put_and_wait_on_page_locked(struct page *page, int state)
 {
        wait_queue_head_t *q;
 
        page = compound_head(page);
        q = page_waitqueue(page);
-       wait_on_page_bit_common(q, page, PG_locked, TASK_UNINTERRUPTIBLE, DROP);
+       return wait_on_page_bit_common(q, page, PG_locked, state, DROP);
 }
 
 /**
@@ -1558,7 +1518,28 @@ EXPORT_SYMBOL_GPL(__lock_page_killable);
 
 int __lock_page_async(struct page *page, struct wait_page_queue *wait)
 {
-       return __wait_on_page_locked_async(page, wait, true);
+       struct wait_queue_head *q = page_waitqueue(page);
+       int ret = 0;
+
+       wait->page = page;
+       wait->bit_nr = PG_locked;
+
+       spin_lock_irq(&q->lock);
+       __add_wait_queue_entry_tail(q, &wait->wait);
+       SetPageWaiters(page);
+       ret = !trylock_page(page);
+       /*
+        * If we were successful now, we know we're still on the
+        * waitqueue as we're still under the lock. This means it's
+        * safe to remove and return success, we know the callback
+        * isn't going to trigger.
+        */
+       if (!ret)
+               __remove_wait_queue(q, &wait->wait);
+       else
+               ret = -EIOCBQUEUED;
+       spin_unlock_irq(&q->lock);
+       return ret;
 }
 
 /*
@@ -1677,8 +1658,8 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping,
 }
 EXPORT_SYMBOL(page_cache_prev_miss);
 
-/**
- * find_get_entry - find and get a page cache entry
+/*
+ * mapping_get_entry - Get a page cache entry.
  * @mapping: the address_space to search
  * @index: The page cache index.
  *
@@ -1690,7 +1671,8 @@ EXPORT_SYMBOL(page_cache_prev_miss);
  *
  * Return: The head page or shadow entry, %NULL if nothing is found.
  */
-struct page *find_get_entry(struct address_space *mapping, pgoff_t index)
+static struct page *mapping_get_entry(struct address_space *mapping,
+               pgoff_t index)
 {
        XA_STATE(xas, &mapping->i_pages, index);
        struct page *page;
@@ -1727,39 +1709,6 @@ out:
 }
 
 /**
- * find_lock_entry - Locate and lock a page cache entry.
- * @mapping: The address_space to search.
- * @index: The page cache index.
- *
- * Looks up the page at @mapping & @index.  If there is a page in the
- * cache, the head page is returned locked and with an increased refcount.
- *
- * If the slot holds a shadow entry of a previously evicted page, or a
- * swap entry from shmem/tmpfs, it is returned.
- *
- * Context: May sleep.
- * Return: The head page or shadow entry, %NULL if nothing is found.
- */
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t index)
-{
-       struct page *page;
-
-repeat:
-       page = find_get_entry(mapping, index);
-       if (page && !xa_is_value(page)) {
-               lock_page(page);
-               /* Has the page been truncated? */
-               if (unlikely(page->mapping != mapping)) {
-                       unlock_page(page);
-                       put_page(page);
-                       goto repeat;
-               }
-               VM_BUG_ON_PAGE(!thp_contains(page, index), page);
-       }
-       return page;
-}
-
-/**
  * pagecache_get_page - Find and get a reference to a page.
  * @mapping: The address_space to search.
  * @index: The page index.
@@ -1774,6 +1723,8 @@ repeat:
  * * %FGP_LOCK - The page is returned locked.
  * * %FGP_HEAD - If the page is present and a THP, return the head page
  *   rather than the exact page specified by the index.
+ * * %FGP_ENTRY - If there is a shadow / swap / DAX entry, return it
+ *   instead of allocating a new page to replace it.
  * * %FGP_CREAT - If no page is present then a new page is allocated using
  *   @gfp_mask and added to the page cache and the VM's LRU list.
  *   The page is returned locked and with an increased refcount.
@@ -1797,9 +1748,12 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index,
        struct page *page;
 
 repeat:
-       page = find_get_entry(mapping, index);
-       if (xa_is_value(page))
+       page = mapping_get_entry(mapping, index);
+       if (xa_is_value(page)) {
+               if (fgp_flags & FGP_ENTRY)
+                       return page;
                page = NULL;
+       }
        if (!page)
                goto no_page;
 
@@ -1871,18 +1825,53 @@ no_page:
 }
 EXPORT_SYMBOL(pagecache_get_page);
 
+static inline struct page *find_get_entry(struct xa_state *xas, pgoff_t max,
+               xa_mark_t mark)
+{
+       struct page *page;
+
+retry:
+       if (mark == XA_PRESENT)
+               page = xas_find(xas, max);
+       else
+               page = xas_find_marked(xas, max, mark);
+
+       if (xas_retry(xas, page))
+               goto retry;
+       /*
+        * A shadow entry of a recently evicted page, a swap
+        * entry from shmem/tmpfs or a DAX entry.  Return it
+        * without attempting to raise page count.
+        */
+       if (!page || xa_is_value(page))
+               return page;
+
+       if (!page_cache_get_speculative(page))
+               goto reset;
+
+       /* Has the page moved or been split? */
+       if (unlikely(page != xas_reload(xas))) {
+               put_page(page);
+               goto reset;
+       }
+
+       return page;
+reset:
+       xas_reset(xas);
+       goto retry;
+}
+
 /**
  * find_get_entries - gang pagecache lookup
  * @mapping:   The address_space to search
  * @start:     The starting page cache index
- * @nr_entries:        The maximum number of entries
- * @entries:   Where the resulting entries are placed
+ * @end:       The final page index (inclusive).
+ * @pvec:      Where the resulting entries are placed.
  * @indices:   The cache indices corresponding to the entries in @entries
  *
- * find_get_entries() will search for and return a group of up to
- * @nr_entries entries in the mapping.  The entries are placed at
- * @entries.  find_get_entries() takes a reference against any actual
- * pages it returns.
+ * find_get_entries() will search for and return a batch of entries in
+ * the mapping.  The entries are placed in @pvec.  find_get_entries()
+ * takes a reference on any actual pages it returns.
  *
  * The search returns a group of mapping-contiguous page cache entries
  * with ascending indexes.  There may be holes in the indices due to
@@ -1898,60 +1887,97 @@ EXPORT_SYMBOL(pagecache_get_page);
  *
  * Return: the number of pages and shadow entries which were found.
  */
-unsigned find_get_entries(struct address_space *mapping,
-                         pgoff_t start, unsigned int nr_entries,
-                         struct page **entries, pgoff_t *indices)
+unsigned find_get_entries(struct address_space *mapping, pgoff_t start,
+               pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
 {
        XA_STATE(xas, &mapping->i_pages, start);
        struct page *page;
        unsigned int ret = 0;
-
-       if (!nr_entries)
-               return 0;
+       unsigned nr_entries = PAGEVEC_SIZE;
 
        rcu_read_lock();
-       xas_for_each(&xas, page, ULONG_MAX) {
-               if (xas_retry(&xas, page))
-                       continue;
-               /*
-                * A shadow entry of a recently evicted page, a swap
-                * entry from shmem/tmpfs or a DAX entry.  Return it
-                * without attempting to raise page count.
-                */
-               if (xa_is_value(page))
-                       goto export;
-
-               if (!page_cache_get_speculative(page))
-                       goto retry;
-
-               /* Has the page moved or been split? */
-               if (unlikely(page != xas_reload(&xas)))
-                       goto put_page;
-
+       while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
                /*
                 * Terminate early on finding a THP, to allow the caller to
                 * handle it all at once; but continue if this is hugetlbfs.
                 */
-               if (PageTransHuge(page) && !PageHuge(page)) {
+               if (!xa_is_value(page) && PageTransHuge(page) &&
+                               !PageHuge(page)) {
                        page = find_subpage(page, xas.xa_index);
                        nr_entries = ret + 1;
                }
-export:
+
                indices[ret] = xas.xa_index;
-               entries[ret] = page;
+               pvec->pages[ret] = page;
                if (++ret == nr_entries)
                        break;
-               continue;
-put_page:
-               put_page(page);
-retry:
-               xas_reset(&xas);
        }
        rcu_read_unlock();
+
+       pvec->nr = ret;
        return ret;
 }
 
 /**
+ * find_lock_entries - Find a batch of pagecache entries.
+ * @mapping:   The address_space to search.
+ * @start:     The starting page cache index.
+ * @end:       The final page index (inclusive).
+ * @pvec:      Where the resulting entries are placed.
+ * @indices:   The cache indices of the entries in @pvec.
+ *
+ * find_lock_entries() will return a batch of entries from @mapping.
+ * Swap, shadow and DAX entries are included.  Pages are returned
+ * locked and with an incremented refcount.  Pages which are locked by
+ * somebody else or under writeback are skipped.  Only the head page of
+ * a THP is returned.  Pages which are partially outside the range are
+ * not returned.
+ *
+ * The entries have ascending indexes.  The indices may not be consecutive
+ * due to not-present entries, THP pages, pages which could not be locked
+ * or pages under writeback.
+ *
+ * Return: The number of entries which were found.
+ */
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+               pgoff_t end, struct pagevec *pvec, pgoff_t *indices)
+{
+       XA_STATE(xas, &mapping->i_pages, start);
+       struct page *page;
+
+       rcu_read_lock();
+       while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
+               if (!xa_is_value(page)) {
+                       if (page->index < start)
+                               goto put;
+                       VM_BUG_ON_PAGE(page->index != xas.xa_index, page);
+                       if (page->index + thp_nr_pages(page) - 1 > end)
+                               goto put;
+                       if (!trylock_page(page))
+                               goto put;
+                       if (page->mapping != mapping || PageWriteback(page))
+                               goto unlock;
+                       VM_BUG_ON_PAGE(!thp_contains(page, xas.xa_index),
+                                       page);
+               }
+               indices[pvec->nr] = xas.xa_index;
+               if (!pagevec_add(pvec, page))
+                       break;
+               goto next;
+unlock:
+               unlock_page(page);
+put:
+               put_page(page);
+next:
+               if (!xa_is_value(page) && PageTransHuge(page))
+                       xas_set(&xas, page->index + thp_nr_pages(page));
+       }
+       rcu_read_unlock();
+
+       return pagevec_count(pvec);
+}
+
+/**
  * find_get_pages_range - gang pagecache lookup
  * @mapping:   The address_space to search
  * @start:     The starting page index
@@ -1984,30 +2010,16 @@ unsigned find_get_pages_range(struct address_space *mapping, pgoff_t *start,
                return 0;
 
        rcu_read_lock();
-       xas_for_each(&xas, page, end) {
-               if (xas_retry(&xas, page))
-                       continue;
+       while ((page = find_get_entry(&xas, end, XA_PRESENT))) {
                /* Skip over shadow, swap and DAX entries */
                if (xa_is_value(page))
                        continue;
 
-               if (!page_cache_get_speculative(page))
-                       goto retry;
-
-               /* Has the page moved or been split? */
-               if (unlikely(page != xas_reload(&xas)))
-                       goto put_page;
-
                pages[ret] = find_subpage(page, xas.xa_index);
                if (++ret == nr_pages) {
                        *start = xas.xa_index + 1;
                        goto out;
                }
-               continue;
-put_page:
-               put_page(page);
-retry:
-               xas_reset(&xas);
        }
 
        /*
@@ -2081,7 +2093,7 @@ retry:
 EXPORT_SYMBOL(find_get_pages_contig);
 
 /**
- * find_get_pages_range_tag - find and return pages in given range matching @tag
+ * find_get_pages_range_tag - Find and return head pages matching @tag.
  * @mapping:   the address_space to search
  * @index:     the starting page index
  * @end:       The final page index (inclusive)
@@ -2089,8 +2101,9 @@ EXPORT_SYMBOL(find_get_pages_contig);
  * @nr_pages:  the maximum number of pages
  * @pages:     where the resulting pages are placed
  *
- * Like find_get_pages, except we only return pages which are tagged with
- * @tag.   We update @index to index the next page for the traversal.
+ * Like find_get_pages(), except we only return head pages which are tagged
+ * with @tag.  @index is updated to the index immediately after the last
+ * page we return, ready for the next iteration.
  *
  * Return: the number of pages which were found.
  */
@@ -2106,9 +2119,7 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
                return 0;
 
        rcu_read_lock();
-       xas_for_each_marked(&xas, page, end, tag) {
-               if (xas_retry(&xas, page))
-                       continue;
+       while ((page = find_get_entry(&xas, end, tag))) {
                /*
                 * Shadow entries should never be tagged, but this iteration
                 * is lockless so there is a window for page reclaim to evict
@@ -2117,23 +2128,11 @@ unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index,
                if (xa_is_value(page))
                        continue;
 
-               if (!page_cache_get_speculative(page))
-                       goto retry;
-
-               /* Has the page moved or been split? */
-               if (unlikely(page != xas_reload(&xas)))
-                       goto put_page;
-
-               pages[ret] = find_subpage(page, xas.xa_index);
+               pages[ret] = page;
                if (++ret == nr_pages) {
-                       *index = xas.xa_index + 1;
+                       *index = page->index + thp_nr_pages(page);
                        goto out;
                }
-               continue;
-put_page:
-               put_page(page);
-retry:
-               xas_reset(&xas);
        }
 
        /*
@@ -2173,287 +2172,267 @@ static void shrink_readahead_size_eio(struct file_ra_state *ra)
        ra->ra_pages /= 4;
 }
 
-static int lock_page_for_iocb(struct kiocb *iocb, struct page *page)
+/*
+ * filemap_get_read_batch - Get a batch of pages for read
+ *
+ * Get a batch of pages which represent a contiguous range of bytes
+ * in the file.  No tail pages will be returned.  If @index is in the
+ * middle of a THP, the entire THP will be returned.  The last page in
+ * the batch may have Readahead set or be not Uptodate so that the
+ * caller can take the appropriate action.
+ */
+static void filemap_get_read_batch(struct address_space *mapping,
+               pgoff_t index, pgoff_t max, struct pagevec *pvec)
 {
-       if (iocb->ki_flags & IOCB_WAITQ)
-               return lock_page_async(page, iocb->ki_waitq);
-       else if (iocb->ki_flags & IOCB_NOWAIT)
-               return trylock_page(page) ? 0 : -EAGAIN;
-       else
-               return lock_page_killable(page);
+       XA_STATE(xas, &mapping->i_pages, index);
+       struct page *head;
+
+       rcu_read_lock();
+       for (head = xas_load(&xas); head; head = xas_next(&xas)) {
+               if (xas_retry(&xas, head))
+                       continue;
+               if (xas.xa_index > max || xa_is_value(head))
+                       break;
+               if (!page_cache_get_speculative(head))
+                       goto retry;
+
+               /* Has the page moved or been split? */
+               if (unlikely(head != xas_reload(&xas)))
+                       goto put_page;
+
+               if (!pagevec_add(pvec, head))
+                       break;
+               if (!PageUptodate(head))
+                       break;
+               if (PageReadahead(head))
+                       break;
+               xas.xa_index = head->index + thp_nr_pages(head) - 1;
+               xas.xa_offset = (xas.xa_index >> xas.xa_shift) & XA_CHUNK_MASK;
+               continue;
+put_page:
+               put_page(head);
+retry:
+               xas_reset(&xas);
+       }
+       rcu_read_unlock();
 }
 
-static struct page *
-generic_file_buffered_read_readpage(struct kiocb *iocb,
-                                   struct file *filp,
-                                   struct address_space *mapping,
-                                   struct page *page)
+static int filemap_read_page(struct file *file, struct address_space *mapping,
+               struct page *page)
 {
-       struct file_ra_state *ra = &filp->f_ra;
        int error;
 
-       if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT)) {
-               unlock_page(page);
-               put_page(page);
-               return ERR_PTR(-EAGAIN);
-       }
-
        /*
-        * A previous I/O error may have been due to temporary
-        * failures, eg. multipath errors.
-        * PG_error will be set again if readpage fails.
+        * A previous I/O error may have been due to temporary failures,
+        * eg. multipath errors.  PG_error will be set again if readpage
+        * fails.
         */
        ClearPageError(page);
        /* Start the actual read. The read will unlock the page. */
-       error = mapping->a_ops->readpage(filp, page);
+       error = mapping->a_ops->readpage(file, page);
+       if (error)
+               return error;
 
-       if (unlikely(error)) {
-               put_page(page);
-               return error != AOP_TRUNCATED_PAGE ? ERR_PTR(error) : NULL;
-       }
+       error = wait_on_page_locked_killable(page);
+       if (error)
+               return error;
+       if (PageUptodate(page))
+               return 0;
+       if (!page->mapping)     /* page truncated */
+               return AOP_TRUNCATED_PAGE;
+       shrink_readahead_size_eio(&file->f_ra);
+       return -EIO;
+}
 
-       if (!PageUptodate(page)) {
-               error = lock_page_for_iocb(iocb, page);
-               if (unlikely(error)) {
-                       put_page(page);
-                       return ERR_PTR(error);
-               }
-               if (!PageUptodate(page)) {
-                       if (page->mapping == NULL) {
-                               /*
-                                * invalidate_mapping_pages got it
-                                */
-                               unlock_page(page);
-                               put_page(page);
-                               return NULL;
-                       }
-                       unlock_page(page);
-                       shrink_readahead_size_eio(ra);
-                       put_page(page);
-                       return ERR_PTR(-EIO);
-               }
-               unlock_page(page);
+static bool filemap_range_uptodate(struct address_space *mapping,
+               loff_t pos, struct iov_iter *iter, struct page *page)
+{
+       int count;
+
+       if (PageUptodate(page))
+               return true;
+       /* pipes can't handle partially uptodate pages */
+       if (iov_iter_is_pipe(iter))
+               return false;
+       if (!mapping->a_ops->is_partially_uptodate)
+               return false;
+       if (mapping->host->i_blkbits >= (PAGE_SHIFT + thp_order(page)))
+               return false;
+
+       count = iter->count;
+       if (page_offset(page) > pos) {
+               count -= page_offset(page) - pos;
+               pos = 0;
+       } else {
+               pos -= page_offset(page);
        }
 
-       return page;
+       return mapping->a_ops->is_partially_uptodate(page, pos, count);
 }
 
-static struct page *
-generic_file_buffered_read_pagenotuptodate(struct kiocb *iocb,
-                                          struct file *filp,
-                                          struct iov_iter *iter,
-                                          struct page *page,
-                                          loff_t pos, loff_t count)
+static int filemap_update_page(struct kiocb *iocb,
+               struct address_space *mapping, struct iov_iter *iter,
+               struct page *page)
 {
-       struct address_space *mapping = filp->f_mapping;
-       struct inode *inode = mapping->host;
        int error;
 
-       /*
-        * See comment in do_read_cache_page on why
-        * wait_on_page_locked is used to avoid unnecessarily
-        * serialisations and why it's safe.
-        */
-       if (iocb->ki_flags & IOCB_WAITQ) {
-               error = wait_on_page_locked_async(page,
-                                               iocb->ki_waitq);
-       } else {
-               error = wait_on_page_locked_killable(page);
-       }
-       if (unlikely(error)) {
-               put_page(page);
-               return ERR_PTR(error);
+       if (!trylock_page(page)) {
+               if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_NOIO))
+                       return -EAGAIN;
+               if (!(iocb->ki_flags & IOCB_WAITQ)) {
+                       put_and_wait_on_page_locked(page, TASK_KILLABLE);
+                       return AOP_TRUNCATED_PAGE;
+               }
+               error = __lock_page_async(page, iocb->ki_waitq);
+               if (error)
+                       return error;
        }
-       if (PageUptodate(page))
-               return page;
 
-       if (inode->i_blkbits == PAGE_SHIFT ||
-                       !mapping->a_ops->is_partially_uptodate)
-               goto page_not_up_to_date;
-       /* pipes can't handle partially uptodate pages */
-       if (unlikely(iov_iter_is_pipe(iter)))
-               goto page_not_up_to_date;
-       if (!trylock_page(page))
-               goto page_not_up_to_date;
-       /* Did it get truncated before we got the lock? */
        if (!page->mapping)
-               goto page_not_up_to_date_locked;
-       if (!mapping->a_ops->is_partially_uptodate(page,
-                               pos & ~PAGE_MASK, count))
-               goto page_not_up_to_date_locked;
-       unlock_page(page);
-       return page;
-
-page_not_up_to_date:
-       /* Get exclusive access to the page ... */
-       error = lock_page_for_iocb(iocb, page);
-       if (unlikely(error)) {
-               put_page(page);
-               return ERR_PTR(error);
-       }
+               goto truncated;
 
-page_not_up_to_date_locked:
-       /* Did it get truncated before we got the lock? */
-       if (!page->mapping) {
-               unlock_page(page);
-               put_page(page);
-               return NULL;
-       }
+       error = 0;
+       if (filemap_range_uptodate(mapping, iocb->ki_pos, iter, page))
+               goto unlock;
 
-       /* Did somebody else fill it already? */
-       if (PageUptodate(page)) {
-               unlock_page(page);
-               return page;
-       }
+       error = -EAGAIN;
+       if (iocb->ki_flags & (IOCB_NOIO | IOCB_NOWAIT | IOCB_WAITQ))
+               goto unlock;
 
-       return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+       error = filemap_read_page(iocb->ki_filp, mapping, page);
+       if (error == AOP_TRUNCATED_PAGE)
+               put_page(page);
+       return error;
+truncated:
+       unlock_page(page);
+       put_page(page);
+       return AOP_TRUNCATED_PAGE;
+unlock:
+       unlock_page(page);
+       return error;
 }
 
-static struct page *
-generic_file_buffered_read_no_cached_page(struct kiocb *iocb,
-                                         struct iov_iter *iter)
+static int filemap_create_page(struct file *file,
+               struct address_space *mapping, pgoff_t index,
+               struct pagevec *pvec)
 {
-       struct file *filp = iocb->ki_filp;
-       struct address_space *mapping = filp->f_mapping;
-       pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
        struct page *page;
        int error;
 
-       if (iocb->ki_flags & IOCB_NOIO)
-               return ERR_PTR(-EAGAIN);
-
-       /*
-        * Ok, it wasn't cached, so we need to create a new
-        * page..
-        */
        page = page_cache_alloc(mapping);
        if (!page)
-               return ERR_PTR(-ENOMEM);
+               return -ENOMEM;
 
        error = add_to_page_cache_lru(page, mapping, index,
-                                     mapping_gfp_constraint(mapping, GFP_KERNEL));
-       if (error) {
-               put_page(page);
-               return error != -EEXIST ? ERR_PTR(error) : NULL;
-       }
+                       mapping_gfp_constraint(mapping, GFP_KERNEL));
+       if (error == -EEXIST)
+               error = AOP_TRUNCATED_PAGE;
+       if (error)
+               goto error;
+
+       error = filemap_read_page(file, mapping, page);
+       if (error)
+               goto error;
 
-       return generic_file_buffered_read_readpage(iocb, filp, mapping, page);
+       pagevec_add(pvec, page);
+       return 0;
+error:
+       put_page(page);
+       return error;
+}
+
+static int filemap_readahead(struct kiocb *iocb, struct file *file,
+               struct address_space *mapping, struct page *page,
+               pgoff_t last_index)
+{
+       if (iocb->ki_flags & IOCB_NOIO)
+               return -EAGAIN;
+       page_cache_async_readahead(mapping, &file->f_ra, file, page,
+                       page->index, last_index - page->index);
+       return 0;
 }
 
-static int generic_file_buffered_read_get_pages(struct kiocb *iocb,
-                                               struct iov_iter *iter,
-                                               struct page **pages,
-                                               unsigned int nr)
+static int filemap_get_pages(struct kiocb *iocb, struct iov_iter *iter,
+               struct pagevec *pvec)
 {
        struct file *filp = iocb->ki_filp;
        struct address_space *mapping = filp->f_mapping;
        struct file_ra_state *ra = &filp->f_ra;
        pgoff_t index = iocb->ki_pos >> PAGE_SHIFT;
-       pgoff_t last_index = (iocb->ki_pos + iter->count + PAGE_SIZE-1) >> PAGE_SHIFT;
-       int i, j, nr_got, err = 0;
+       pgoff_t last_index;
+       struct page *page;
+       int err = 0;
 
-       nr = min_t(unsigned long, last_index - index, nr);
-find_page:
+       last_index = DIV_ROUND_UP(iocb->ki_pos + iter->count, PAGE_SIZE);
+retry:
        if (fatal_signal_pending(current))
                return -EINTR;
 
-       nr_got = find_get_pages_contig(mapping, index, nr, pages);
-       if (nr_got)
-               goto got_pages;
-
-       if (iocb->ki_flags & IOCB_NOIO)
-               return -EAGAIN;
-
-       page_cache_sync_readahead(mapping, ra, filp, index, last_index - index);
-
-       nr_got = find_get_pages_contig(mapping, index, nr, pages);
-       if (nr_got)
-               goto got_pages;
-
-       pages[0] = generic_file_buffered_read_no_cached_page(iocb, iter);
-       err = PTR_ERR_OR_ZERO(pages[0]);
-       if (!IS_ERR_OR_NULL(pages[0]))
-               nr_got = 1;
-got_pages:
-       for (i = 0; i < nr_got; i++) {
-               struct page *page = pages[i];
-               pgoff_t pg_index = index + i;
-               loff_t pg_pos = max(iocb->ki_pos,
-                                   (loff_t) pg_index << PAGE_SHIFT);
-               loff_t pg_count = iocb->ki_pos + iter->count - pg_pos;
-
-               if (PageReadahead(page)) {
-                       if (iocb->ki_flags & IOCB_NOIO) {
-                               for (j = i; j < nr_got; j++)
-                                       put_page(pages[j]);
-                               nr_got = i;
-                               err = -EAGAIN;
-                               break;
-                       }
-                       page_cache_async_readahead(mapping, ra, filp, page,
-                                       pg_index, last_index - pg_index);
-               }
-
-               if (!PageUptodate(page)) {
-                       if ((iocb->ki_flags & IOCB_NOWAIT) ||
-                           ((iocb->ki_flags & IOCB_WAITQ) && i)) {
-                               for (j = i; j < nr_got; j++)
-                                       put_page(pages[j]);
-                               nr_got = i;
-                               err = -EAGAIN;
-                               break;
-                       }
+       filemap_get_read_batch(mapping, index, last_index, pvec);
+       if (!pagevec_count(pvec)) {
+               if (iocb->ki_flags & IOCB_NOIO)
+                       return -EAGAIN;
+               page_cache_sync_readahead(mapping, ra, filp, index,
+                               last_index - index);
+               filemap_get_read_batch(mapping, index, last_index, pvec);
+       }
+       if (!pagevec_count(pvec)) {
+               if (iocb->ki_flags & (IOCB_NOWAIT | IOCB_WAITQ))
+                       return -EAGAIN;
+               err = filemap_create_page(filp, mapping,
+                               iocb->ki_pos >> PAGE_SHIFT, pvec);
+               if (err == AOP_TRUNCATED_PAGE)
+                       goto retry;
+               return err;
+       }
 
-                       page = generic_file_buffered_read_pagenotuptodate(iocb,
-                                       filp, iter, page, pg_pos, pg_count);
-                       if (IS_ERR_OR_NULL(page)) {
-                               for (j = i + 1; j < nr_got; j++)
-                                       put_page(pages[j]);
-                               nr_got = i;
-                               err = PTR_ERR_OR_ZERO(page);
-                               break;
-                       }
-               }
+       page = pvec->pages[pagevec_count(pvec) - 1];
+       if (PageReadahead(page)) {
+               err = filemap_readahead(iocb, filp, mapping, page, last_index);
+               if (err)
+                       goto err;
+       }
+       if (!PageUptodate(page)) {
+               if ((iocb->ki_flags & IOCB_WAITQ) && pagevec_count(pvec) > 1)
+                       iocb->ki_flags |= IOCB_NOWAIT;
+               err = filemap_update_page(iocb, mapping, iter, page);
+               if (err)
+                       goto err;
        }
 
-       if (likely(nr_got))
-               return nr_got;
-       if (err)
-               return err;
-       /*
-        * No pages and no error means we raced and should retry:
-        */
-       goto find_page;
+       return 0;
+err:
+       if (err < 0)
+               put_page(page);
+       if (likely(--pvec->nr))
+               return 0;
+       if (err == AOP_TRUNCATED_PAGE)
+               goto retry;
+       return err;
 }
 
 /**
- * generic_file_buffered_read - generic file read routine
- * @iocb:      the iocb to read
- * @iter:      data destination
- * @written:   already copied
- *
- * This is a generic file read routine, and uses the
- * mapping->a_ops->readpage() function for the actual low-level stuff.
+ * filemap_read - Read data from the page cache.
+ * @iocb: The iocb to read.
+ * @iter: Destination for the data.
+ * @already_read: Number of bytes already read by the caller.
  *
- * This is really ugly. But the goto's actually try to clarify some
- * of the logic when it comes to error handling etc.
+ * Copies data from the page cache.  If the data is not currently present,
+ * uses the readahead and readpage address_space operations to fetch it.
  *
- * Return:
- * * total number of bytes copied, including those the were already @written
- * * negative error code if nothing was copied
+ * Return: Total number of bytes copied, including those already read by
+ * the caller.  If an error happens before any bytes are copied, returns
+ * a negative error number.
  */
-ssize_t generic_file_buffered_read(struct kiocb *iocb,
-               struct iov_iter *iter, ssize_t written)
+ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter,
+               ssize_t already_read)
 {
        struct file *filp = iocb->ki_filp;
        struct file_ra_state *ra = &filp->f_ra;
        struct address_space *mapping = filp->f_mapping;
        struct inode *inode = mapping->host;
-       struct page *pages_onstack[PAGEVEC_SIZE], **pages = NULL;
-       unsigned int nr_pages = min_t(unsigned int, 512,
-                       ((iocb->ki_pos + iter->count + PAGE_SIZE - 1) >> PAGE_SHIFT) -
-                       (iocb->ki_pos >> PAGE_SHIFT));
-       int i, pg_nr, error = 0;
+       struct pagevec pvec;
+       int i, error = 0;
        bool writably_mapped;
        loff_t isize, end_offset;
 
@@ -2463,14 +2442,7 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                return 0;
 
        iov_iter_truncate(iter, inode->i_sb->s_maxbytes);
-
-       if (nr_pages > ARRAY_SIZE(pages_onstack))
-               pages = kmalloc_array(nr_pages, sizeof(void *), GFP_KERNEL);
-
-       if (!pages) {
-               pages = pages_onstack;
-               nr_pages = min_t(unsigned int, nr_pages, ARRAY_SIZE(pages_onstack));
-       }
+       pagevec_init(&pvec);
 
        do {
                cond_resched();
@@ -2480,16 +2452,12 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                 * can no longer safely return -EIOCBQUEUED. Hence mark
                 * an async read NOWAIT at that point.
                 */
-               if ((iocb->ki_flags & IOCB_WAITQ) && written)
+               if ((iocb->ki_flags & IOCB_WAITQ) && already_read)
                        iocb->ki_flags |= IOCB_NOWAIT;
 
-               i = 0;
-               pg_nr = generic_file_buffered_read_get_pages(iocb, iter,
-                                                            pages, nr_pages);
-               if (pg_nr < 0) {
-                       error = pg_nr;
+               error = filemap_get_pages(iocb, iter, &pvec);
+               if (error < 0)
                        break;
-               }
 
                /*
                 * i_size must be checked after we know the pages are Uptodate.
@@ -2502,13 +2470,8 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                isize = i_size_read(inode);
                if (unlikely(iocb->ki_pos >= isize))
                        goto put_pages;
-
                end_offset = min_t(loff_t, isize, iocb->ki_pos + iter->count);
 
-               while ((iocb->ki_pos >> PAGE_SHIFT) + pg_nr >
-                      (end_offset + PAGE_SIZE - 1) >> PAGE_SHIFT)
-                       put_page(pages[--pg_nr]);
-
                /*
                 * Once we start copying data, we don't want to be touching any
                 * cachelines that might be contended:
@@ -2521,27 +2484,35 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                 */
                if (iocb->ki_pos >> PAGE_SHIFT !=
                    ra->prev_pos >> PAGE_SHIFT)
-                       mark_page_accessed(pages[0]);
-               for (i = 1; i < pg_nr; i++)
-                       mark_page_accessed(pages[i]);
+                       mark_page_accessed(pvec.pages[0]);
 
-               for (i = 0; i < pg_nr; i++) {
-                       unsigned int offset = iocb->ki_pos & ~PAGE_MASK;
-                       unsigned int bytes = min_t(loff_t, end_offset - iocb->ki_pos,
-                                                  PAGE_SIZE - offset);
-                       unsigned int copied;
+               for (i = 0; i < pagevec_count(&pvec); i++) {
+                       struct page *page = pvec.pages[i];
+                       size_t page_size = thp_size(page);
+                       size_t offset = iocb->ki_pos & (page_size - 1);
+                       size_t bytes = min_t(loff_t, end_offset - iocb->ki_pos,
+                                            page_size - offset);
+                       size_t copied;
 
+                       if (end_offset < page_offset(page))
+                               break;
+                       if (i > 0)
+                               mark_page_accessed(page);
                        /*
                         * If users can be writing to this page using arbitrary
                         * virtual addresses, take care about potential aliasing
                         * before reading the page on the kernel side.
                         */
-                       if (writably_mapped)
-                               flush_dcache_page(pages[i]);
+                       if (writably_mapped) {
+                               int j;
+
+                               for (j = 0; j < thp_nr_pages(page); j++)
+                                       flush_dcache_page(page + j);
+                       }
 
-                       copied = copy_page_to_iter(pages[i], offset, bytes, iter);
+                       copied = copy_page_to_iter(page, offset, bytes, iter);
 
-                       written += copied;
+                       already_read += copied;
                        iocb->ki_pos += copied;
                        ra->prev_pos = iocb->ki_pos;
 
@@ -2551,18 +2522,16 @@ ssize_t generic_file_buffered_read(struct kiocb *iocb,
                        }
                }
 put_pages:
-               for (i = 0; i < pg_nr; i++)
-                       put_page(pages[i]);
+               for (i = 0; i < pagevec_count(&pvec); i++)
+                       put_page(pvec.pages[i]);
+               pagevec_reinit(&pvec);
        } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error);
 
        file_accessed(filp);
 
-       if (pages != pages_onstack)
-               kfree(pages);
-
-       return written ? written : error;
+       return already_read ? already_read : error;
 }
-EXPORT_SYMBOL_GPL(generic_file_buffered_read);
+EXPORT_SYMBOL_GPL(filemap_read);
 
 /**
  * generic_file_read_iter - generic filesystem read routine
@@ -2592,7 +2561,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
        ssize_t retval = 0;
 
        if (!count)
-               goto out; /* skip atime */
+               return 0; /* skip atime */
 
        if (iocb->ki_flags & IOCB_DIRECT) {
                struct file *file = iocb->ki_filp;
@@ -2610,7 +2579,7 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                                                iocb->ki_pos,
                                                iocb->ki_pos + count - 1);
                        if (retval < 0)
-                               goto out;
+                               return retval;
                }
 
                file_accessed(file);
@@ -2620,7 +2589,8 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                        iocb->ki_pos += retval;
                        count -= retval;
                }
-               iov_iter_revert(iter, count - iov_iter_count(iter));
+               if (retval != -EIOCBQUEUED)
+                       iov_iter_revert(iter, count - iov_iter_count(iter));
 
                /*
                 * Btrfs can have a short DIO read if we encounter
@@ -2633,15 +2603,116 @@ generic_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
                 */
                if (retval < 0 || !count || iocb->ki_pos >= size ||
                    IS_DAX(inode))
-                       goto out;
+                       return retval;
        }
 
-       retval = generic_file_buffered_read(iocb, iter, retval);
-out:
-       return retval;
+       return filemap_read(iocb, iter, retval);
 }
 EXPORT_SYMBOL(generic_file_read_iter);
 
+static inline loff_t page_seek_hole_data(struct xa_state *xas,
+               struct address_space *mapping, struct page *page,
+               loff_t start, loff_t end, bool seek_data)
+{
+       const struct address_space_operations *ops = mapping->a_ops;
+       size_t offset, bsz = i_blocksize(mapping->host);
+
+       if (xa_is_value(page) || PageUptodate(page))
+               return seek_data ? start : end;
+       if (!ops->is_partially_uptodate)
+               return seek_data ? end : start;
+
+       xas_pause(xas);
+       rcu_read_unlock();
+       lock_page(page);
+       if (unlikely(page->mapping != mapping))
+               goto unlock;
+
+       offset = offset_in_thp(page, start) & ~(bsz - 1);
+
+       do {
+               if (ops->is_partially_uptodate(page, offset, bsz) == seek_data)
+                       break;
+               start = (start + bsz) & ~(bsz - 1);
+               offset += bsz;
+       } while (offset < thp_size(page));
+unlock:
+       unlock_page(page);
+       rcu_read_lock();
+       return start;
+}
+
+static inline
+unsigned int seek_page_size(struct xa_state *xas, struct page *page)
+{
+       if (xa_is_value(page))
+               return PAGE_SIZE << xa_get_order(xas->xa, xas->xa_index);
+       return thp_size(page);
+}
+
+/**
+ * mapping_seek_hole_data - Seek for SEEK_DATA / SEEK_HOLE in the page cache.
+ * @mapping: Address space to search.
+ * @start: First byte to consider.
+ * @end: Limit of search (exclusive).
+ * @whence: Either SEEK_HOLE or SEEK_DATA.
+ *
+ * If the page cache knows which blocks contain holes and which blocks
+ * contain data, your filesystem can use this function to implement
+ * SEEK_HOLE and SEEK_DATA.  This is useful for filesystems which are
+ * entirely memory-based such as tmpfs, and filesystems which support
+ * unwritten extents.
+ *
+ * Return: The requested offset on successs, or -ENXIO if @whence specifies
+ * SEEK_DATA and there is no data after @start.  There is an implicit hole
+ * after @end - 1, so SEEK_HOLE returns @end if all the bytes between @start
+ * and @end contain data.
+ */
+loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start,
+               loff_t end, int whence)
+{
+       XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT);
+       pgoff_t max = (end - 1) / PAGE_SIZE;
+       bool seek_data = (whence == SEEK_DATA);
+       struct page *page;
+
+       if (end <= start)
+               return -ENXIO;
+
+       rcu_read_lock();
+       while ((page = find_get_entry(&xas, max, XA_PRESENT))) {
+               loff_t pos = xas.xa_index * PAGE_SIZE;
+
+               if (start < pos) {
+                       if (!seek_data)
+                               goto unlock;
+                       start = pos;
+               }
+
+               pos += seek_page_size(&xas, page);
+               start = page_seek_hole_data(&xas, mapping, page, start, pos,
+                               seek_data);
+               if (start < pos)
+                       goto unlock;
+               if (!xa_is_value(page))
+                       put_page(page);
+       }
+       rcu_read_unlock();
+
+       if (seek_data)
+               return -ENXIO;
+       goto out;
+
+unlock:
+       rcu_read_unlock();
+       if (!xa_is_value(page))
+               put_page(page);
+out:
+       if (start > end)
+               return end;
+       return start;
+}
+
 #ifdef CONFIG_MMU
 #define MMAP_LOTSAMISS  (100)
 /*
@@ -3431,7 +3502,8 @@ generic_file_direct_write(struct kiocb *iocb, struct iov_iter *from)
                }
                iocb->ki_pos = pos;
        }
-       iov_iter_revert(from, write_len - iov_iter_count(from));
+       if (written != -EIOCBQUEUED)
+               iov_iter_revert(from, write_len - iov_iter_count(from));
 out:
        return written;
 }
index e4c224c..e405796 100644 (file)
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -78,9 +78,8 @@ static inline struct page *try_get_compound_head(struct page *page, int refs)
  * considered failure, and furthermore, a likely bug in the caller, so a warning
  * is also emitted.
  */
-static __maybe_unused struct page *try_grab_compound_head(struct page *page,
-                                                         int refs,
-                                                         unsigned int flags)
+__maybe_unused struct page *try_grab_compound_head(struct page *page,
+                                                  int refs, unsigned int flags)
 {
        if (flags & FOLL_GET)
                return try_get_compound_head(page, refs);
index 874b732..6ef8f5e 100644 (file)
@@ -368,20 +368,24 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
 
        BUG_ON(end1 > page_size(page) || end2 > page_size(page));
 
+       if (start1 >= end1)
+               start1 = end1 = 0;
+       if (start2 >= end2)
+               start2 = end2 = 0;
+
        for (i = 0; i < compound_nr(page); i++) {
                void *kaddr = NULL;
 
-               if (start1 < PAGE_SIZE || start2 < PAGE_SIZE)
-                       kaddr = kmap_atomic(page + i);
-
                if (start1 >= PAGE_SIZE) {
                        start1 -= PAGE_SIZE;
                        end1 -= PAGE_SIZE;
                } else {
                        unsigned this_end = min_t(unsigned, end1, PAGE_SIZE);
 
-                       if (end1 > start1)
+                       if (end1 > start1) {
+                               kaddr = kmap_atomic(page + i);
                                memset(kaddr + start1, 0, this_end - start1);
+                       }
                        end1 -= this_end;
                        start1 = 0;
                }
@@ -392,8 +396,11 @@ void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
                } else {
                        unsigned this_end = min_t(unsigned, end2, PAGE_SIZE);
 
-                       if (end2 > start2)
+                       if (end2 > start2) {
+                               if (!kaddr)
+                                       kaddr = kmap_atomic(page + i);
                                memset(kaddr + start2, 0, this_end - start2);
+                       }
                        end2 -= this_end;
                        start2 = 0;
                }
@@ -611,7 +618,7 @@ void __kmap_local_sched_out(void)
                int idx;
 
                /* With debug all even slots are unmapped and act as guard */
-               if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+               if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
                        WARN_ON_ONCE(!pte_none(pteval));
                        continue;
                }
@@ -647,7 +654,7 @@ void __kmap_local_sched_in(void)
                int idx;
 
                /* With debug all even slots are unmapped and act as guard */
-               if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) {
+               if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) {
                        WARN_ON_ONCE(!pte_none(pteval));
                        continue;
                }
index 91ca9b1..ae907a9 100644 (file)
@@ -386,7 +386,11 @@ static int __init hugepage_init(void)
        struct kobject *hugepage_kobj;
 
        if (!has_transparent_hugepage()) {
-               transparent_hugepage_flags = 0;
+               /*
+                * Hardware doesn't support hugepages, hence disable
+                * DAX PMD support.
+                */
+               transparent_hugepage_flags = 1 << TRANSPARENT_HUGEPAGE_NEVER_DAX;
                return -EINVAL;
        }
 
@@ -636,6 +640,7 @@ static vm_fault_t __do_huge_pmd_anonymous_page(struct vm_fault *vmf,
                lru_cache_add_inactive_or_unevictable(page, vma);
                pgtable_trans_huge_deposit(vma->vm_mm, vmf->pmd, pgtable);
                set_pmd_at(vma->vm_mm, haddr, vmf->pmd, entry);
+               update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
                add_mm_counter(vma->vm_mm, MM_ANONPAGES, HPAGE_PMD_NR);
                mm_inc_nr_ptes(vma->vm_mm);
                spin_unlock(vmf->ptl);
@@ -663,9 +668,9 @@ release:
  *         available
  * never: never stall for any thp allocation
  */
-static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
+gfp_t vma_thp_gfp_mask(struct vm_area_struct *vma)
 {
-       const bool vma_madvised = !!(vma->vm_flags & VM_HUGEPAGE);
+       const bool vma_madvised = vma && (vma->vm_flags & VM_HUGEPAGE);
 
        /* Always do synchronous compaction */
        if (test_bit(TRANSPARENT_HUGEPAGE_DEFRAG_DIRECT_FLAG, &transparent_hugepage_flags))
@@ -690,20 +695,19 @@ static inline gfp_t alloc_hugepage_direct_gfpmask(struct vm_area_struct *vma)
 }
 
 /* Caller must hold page table lock. */
-static bool set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
+static void set_huge_zero_page(pgtable_t pgtable, struct mm_struct *mm,
                struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd,
                struct page *zero_page)
 {
        pmd_t entry;
        if (!pmd_none(*pmd))
-               return false;
+               return;
        entry = mk_pmd(zero_page, vma->vm_page_prot);
        entry = pmd_mkhuge(entry);
        if (pgtable)
                pgtable_trans_huge_deposit(mm, pmd, pgtable);
        set_pmd_at(mm, haddr, pmd, entry);
        mm_inc_nr_ptes(mm);
-       return true;
 }
 
 vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
@@ -749,6 +753,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                        } else {
                                set_huge_zero_page(pgtable, vma->vm_mm, vma,
                                                   haddr, vmf->pmd, zero_page);
+                               update_mmu_cache_pmd(vma, vmf->address, vmf->pmd);
                                spin_unlock(vmf->ptl);
                        }
                } else {
@@ -757,7 +762,7 @@ vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf)
                }
                return ret;
        }
-       gfp = alloc_hugepage_direct_gfpmask(vma);
+       gfp = vma_thp_gfp_mask(vma);
        page = alloc_hugepage_vma(gfp, vma, haddr, HPAGE_PMD_ORDER);
        if (unlikely(!page)) {
                count_vm_event(THP_FAULT_FALLBACK);
@@ -1095,9 +1100,7 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
         * best effort that the pinned pages won't be replaced by another
         * random page during the coming copy-on-write.
         */
-       if (unlikely(is_cow_mapping(vma->vm_flags) &&
-                    atomic_read(&src_mm->has_pinned) &&
-                    page_maybe_dma_pinned(src_page))) {
+       if (unlikely(page_needs_cow_for_dma(vma, src_page))) {
                pte_free(dst_mm, pgtable);
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);
@@ -1209,9 +1212,7 @@ int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
        }
 
        /* Please refer to comments in copy_huge_pmd() */
-       if (unlikely(is_cow_mapping(vma->vm_flags) &&
-                    atomic_read(&src_mm->has_pinned) &&
-                    page_maybe_dma_pinned(pud_page(pud)))) {
+       if (unlikely(page_needs_cow_for_dma(vma, pud_page(pud)))) {
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);
                __split_huge_pud(vma, src_pud, addr);
@@ -1439,7 +1440,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                if (!get_page_unless_zero(page))
                        goto out_unlock;
                spin_unlock(vmf->ptl);
-               put_and_wait_on_page_locked(page);
+               put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
                goto out;
        }
 
@@ -1475,7 +1476,7 @@ vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t pmd)
                if (!get_page_unless_zero(page))
                        goto out_unlock;
                spin_unlock(vmf->ptl);
-               put_and_wait_on_page_locked(page);
+               put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
                goto out;
        }
 
@@ -2176,7 +2177,8 @@ static void __split_huge_pmd_locked(struct vm_area_struct *vma, pmd_t *pmd,
                lock_page_memcg(page);
                if (atomic_add_negative(-1, compound_mapcount_ptr(page))) {
                        /* Last compound_mapcount is gone. */
-                       __dec_lruvec_page_state(page, NR_ANON_THPS);
+                       __mod_lruvec_page_state(page, NR_ANON_THPS,
+                                               -HPAGE_PMD_NR);
                        if (TestClearPageDoubleMap(page)) {
                                /* No need in mapcount reference anymore */
                                for (i = 0; i < HPAGE_PMD_NR; i++)
@@ -2465,7 +2467,7 @@ static void __split_huge_page(struct page *page, struct list_head *list,
        int i;
 
        /* complete memcg works before add pages to LRU */
-       mem_cgroup_split_huge_fixup(head);
+       split_page_memcg(head, nr);
 
        if (PageAnon(head) && PageSwapCache(head)) {
                swp_entry_t entry = { .val = page_private(head) };
@@ -2751,10 +2753,14 @@ int split_huge_page_to_list(struct page *page, struct list_head *list)
                }
                spin_unlock(&ds_queue->split_queue_lock);
                if (mapping) {
+                       int nr = thp_nr_pages(head);
+
                        if (PageSwapBacked(head))
-                               __dec_lruvec_page_state(head, NR_SHMEM_THPS);
+                               __mod_lruvec_page_state(head, NR_SHMEM_THPS,
+                                                       -nr);
                        else
-                               __dec_lruvec_page_state(head, NR_FILE_THPS);
+                               __mod_lruvec_page_state(head, NR_FILE_THPS,
+                                                       -nr);
                }
 
                __split_huge_page(page, list, end);
index 905a7d5..a86a58e 100644 (file)
@@ -79,34 +79,29 @@ DEFINE_SPINLOCK(hugetlb_lock);
 static int num_fault_mutexes;
 struct mutex *hugetlb_fault_mutex_table ____cacheline_aligned_in_smp;
 
-static inline bool PageHugeFreed(struct page *head)
-{
-       return page_private(head + 4) == -1UL;
-}
+/* Forward declaration */
+static int hugetlb_acct_memory(struct hstate *h, long delta);
 
-static inline void SetPageHugeFreed(struct page *head)
+static inline bool subpool_is_free(struct hugepage_subpool *spool)
 {
-       set_page_private(head + 4, -1UL);
-}
+       if (spool->count)
+               return false;
+       if (spool->max_hpages != -1)
+               return spool->used_hpages == 0;
+       if (spool->min_hpages != -1)
+               return spool->rsv_hpages == spool->min_hpages;
 
-static inline void ClearPageHugeFreed(struct page *head)
-{
-       set_page_private(head + 4, 0);
+       return true;
 }
 
-/* Forward declaration */
-static int hugetlb_acct_memory(struct hstate *h, long delta);
-
 static inline void unlock_or_release_subpool(struct hugepage_subpool *spool)
 {
-       bool free = (spool->count == 0) && (spool->used_hpages == 0);
-
        spin_unlock(&spool->lock);
 
        /* If no pages are used, and no other handles to the subpool
         * remain, give up any reservations based on minimum size and
         * free the subpool */
-       if (free) {
+       if (subpool_is_free(spool)) {
                if (spool->min_hpages != -1)
                        hugetlb_acct_memory(spool->hstate,
                                                -spool->min_hpages);
@@ -285,6 +280,17 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
                nrg->reservation_counter =
                        &h_cg->rsvd_hugepage[hstate_index(h)];
                nrg->css = &h_cg->css;
+               /*
+                * The caller will hold exactly one h_cg->css reference for the
+                * whole contiguous reservation region. But this area might be
+                * scattered when there are already some file_regions reside in
+                * it. As a result, many file_regions may share only one css
+                * reference. In order to ensure that one file_region must hold
+                * exactly one h_cg->css reference, we should do css_get for
+                * each file_region and leave the reference held by caller
+                * untouched.
+                */
+               css_get(&h_cg->css);
                if (!resv->pages_per_hpage)
                        resv->pages_per_hpage = pages_per_huge_page(h);
                /* pages_per_hpage should be the same for all entries in
@@ -298,6 +304,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg,
 #endif
 }
 
+static void put_uncharge_info(struct file_region *rg)
+{
+#ifdef CONFIG_CGROUP_HUGETLB
+       if (rg->css)
+               css_put(rg->css);
+#endif
+}
+
 static bool has_same_uncharge_info(struct file_region *rg,
                                   struct file_region *org)
 {
@@ -321,6 +335,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
                prg->to = rg->to;
 
                list_del(&rg->link);
+               put_uncharge_info(rg);
                kfree(rg);
 
                rg = prg;
@@ -332,10 +347,29 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg)
                nrg->from = rg->from;
 
                list_del(&rg->link);
+               put_uncharge_info(rg);
                kfree(rg);
        }
 }
 
+static inline long
+hugetlb_resv_map_add(struct resv_map *map, struct file_region *rg, long from,
+                    long to, struct hstate *h, struct hugetlb_cgroup *cg,
+                    long *regions_needed)
+{
+       struct file_region *nrg;
+
+       if (!regions_needed) {
+               nrg = get_file_region_entry_from_cache(map, from, to);
+               record_hugetlb_cgroup_uncharge_info(cg, h, map, nrg);
+               list_add(&nrg->link, rg->link.prev);
+               coalesce_file_region(map, nrg);
+       } else
+               *regions_needed += 1;
+
+       return to - from;
+}
+
 /*
  * Must be called with resv->lock held.
  *
@@ -351,7 +385,7 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
        long add = 0;
        struct list_head *head = &resv->regions;
        long last_accounted_offset = f;
-       struct file_region *rg = NULL, *trg = NULL, *nrg = NULL;
+       struct file_region *rg = NULL, *trg = NULL;
 
        if (regions_needed)
                *regions_needed = 0;
@@ -374,24 +408,17 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
                /* When we find a region that starts beyond our range, we've
                 * finished.
                 */
-               if (rg->from > t)
+               if (rg->from >= t)
                        break;
 
                /* Add an entry for last_accounted_offset -> rg->from, and
                 * update last_accounted_offset.
                 */
-               if (rg->from > last_accounted_offset) {
-                       add += rg->from - last_accounted_offset;
-                       if (!regions_needed) {
-                               nrg = get_file_region_entry_from_cache(
-                                       resv, last_accounted_offset, rg->from);
-                               record_hugetlb_cgroup_uncharge_info(h_cg, h,
-                                                                   resv, nrg);
-                               list_add(&nrg->link, rg->link.prev);
-                               coalesce_file_region(resv, nrg);
-                       } else
-                               *regions_needed += 1;
-               }
+               if (rg->from > last_accounted_offset)
+                       add += hugetlb_resv_map_add(resv, rg,
+                                                   last_accounted_offset,
+                                                   rg->from, h, h_cg,
+                                                   regions_needed);
 
                last_accounted_offset = rg->to;
        }
@@ -399,17 +426,9 @@ static long add_reservation_in_range(struct resv_map *resv, long f, long t,
        /* Handle the case where our range extends beyond
         * last_accounted_offset.
         */
-       if (last_accounted_offset < t) {
-               add += t - last_accounted_offset;
-               if (!regions_needed) {
-                       nrg = get_file_region_entry_from_cache(
-                               resv, last_accounted_offset, t);
-                       record_hugetlb_cgroup_uncharge_info(h_cg, h, resv, nrg);
-                       list_add(&nrg->link, rg->link.prev);
-                       coalesce_file_region(resv, nrg);
-               } else
-                       *regions_needed += 1;
-       }
+       if (last_accounted_offset < t)
+               add += hugetlb_resv_map_add(resv, rg, last_accounted_offset,
+                                           t, h, h_cg, regions_needed);
 
        VM_BUG_ON(add < 0);
        return add;
@@ -664,7 +683,7 @@ retry:
 
                        del += t - f;
                        hugetlb_cgroup_uncharge_file_region(
-                               resv, rg, t - f);
+                               resv, rg, t - f, false);
 
                        /* New entry for end of split region */
                        nrg->from = t;
@@ -685,7 +704,7 @@ retry:
                if (f <= rg->from && t >= rg->to) { /* Remove entire region */
                        del += rg->to - rg->from;
                        hugetlb_cgroup_uncharge_file_region(resv, rg,
-                                                           rg->to - rg->from);
+                                                           rg->to - rg->from, true);
                        list_del(&rg->link);
                        kfree(rg);
                        continue;
@@ -693,13 +712,13 @@ retry:
 
                if (f <= rg->from) {    /* Trim beginning of region */
                        hugetlb_cgroup_uncharge_file_region(resv, rg,
-                                                           t - rg->from);
+                                                           t - rg->from, false);
 
                        del += t - rg->from;
                        rg->from = t;
                } else {                /* Trim end of region */
                        hugetlb_cgroup_uncharge_file_region(resv, rg,
-                                                           rg->to - f);
+                                                           rg->to - f, false);
 
                        del += rg->to - f;
                        rg->to = f;
@@ -1043,7 +1062,7 @@ static void enqueue_huge_page(struct hstate *h, struct page *page)
        list_move(&page->lru, &h->hugepage_freelists[nid]);
        h->free_huge_pages++;
        h->free_huge_pages_node[nid]++;
-       SetPageHugeFreed(page);
+       SetHPageFreed(page);
 }
 
 static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
@@ -1060,7 +1079,7 @@ static struct page *dequeue_huge_page_node_exact(struct hstate *h, int nid)
 
                list_move(&page->lru, &h->hugepage_activelist);
                set_page_refcounted(page);
-               ClearPageHugeFreed(page);
+               ClearHPageFreed(page);
                h->free_huge_pages--;
                h->free_huge_pages_node[nid]--;
                return page;
@@ -1133,7 +1152,7 @@ static struct page *dequeue_huge_page_vma(struct hstate *h,
        nid = huge_node(vma, address, gfp_mask, &mpol, &nodemask);
        page = dequeue_huge_page_nodemask(h, gfp_mask, nid, nodemask);
        if (page && !avoid_reserve && vma_has_reserves(vma, chg)) {
-               SetPagePrivate(page);
+               SetHPageRestoreReserve(page);
                h->resv_huge_pages--;
        }
 
@@ -1224,8 +1243,7 @@ static void destroy_compound_gigantic_page(struct page *page,
        struct page *p = page + 1;
 
        atomic_set(compound_mapcount_ptr(page), 0);
-       if (hpage_pincount_available(page))
-               atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(compound_pincount_ptr(page), 0);
 
        for (i = 1; i < nr_pages; i++, p = mem_map_next(p, page, i)) {
                clear_compound_head(p);
@@ -1312,14 +1330,16 @@ static inline void destroy_compound_gigantic_page(struct page *page,
 static void update_and_free_page(struct hstate *h, struct page *page)
 {
        int i;
+       struct page *subpage = page;
 
        if (hstate_is_gigantic(h) && !gigantic_page_runtime_supported())
                return;
 
        h->nr_huge_pages--;
        h->nr_huge_pages_node[page_to_nid(page)]--;
-       for (i = 0; i < pages_per_huge_page(h); i++) {
-               page[i].flags &= ~(1 << PG_locked | 1 << PG_error |
+       for (i = 0; i < pages_per_huge_page(h);
+            i++, subpage = mem_map_next(subpage, page, i)) {
+               subpage->flags &= ~(1 << PG_locked | 1 << PG_error |
                                1 << PG_referenced | 1 << PG_dirty |
                                1 << PG_active | 1 << PG_private |
                                1 << PG_writeback);
@@ -1353,52 +1373,6 @@ struct hstate *size_to_hstate(unsigned long size)
        return NULL;
 }
 
-/*
- * Test to determine whether the hugepage is "active/in-use" (i.e. being linked
- * to hstate->hugepage_activelist.)
- *
- * This function can be called for tail pages, but never returns true for them.
- */
-bool page_huge_active(struct page *page)
-{
-       return PageHeadHuge(page) && PagePrivate(&page[1]);
-}
-
-/* never called for tail page */
-void set_page_huge_active(struct page *page)
-{
-       VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
-       SetPagePrivate(&page[1]);
-}
-
-static void clear_page_huge_active(struct page *page)
-{
-       VM_BUG_ON_PAGE(!PageHeadHuge(page), page);
-       ClearPagePrivate(&page[1]);
-}
-
-/*
- * Internal hugetlb specific page flag. Do not use outside of the hugetlb
- * code
- */
-static inline bool PageHugeTemporary(struct page *page)
-{
-       if (!PageHuge(page))
-               return false;
-
-       return (unsigned long)page[2].mapping == -1U;
-}
-
-static inline void SetPageHugeTemporary(struct page *page)
-{
-       page[2].mapping = (void *)-1U;
-}
-
-static inline void ClearPageHugeTemporary(struct page *page)
-{
-       page[2].mapping = NULL;
-}
-
 static void __free_huge_page(struct page *page)
 {
        /*
@@ -1407,24 +1381,23 @@ static void __free_huge_page(struct page *page)
         */
        struct hstate *h = page_hstate(page);
        int nid = page_to_nid(page);
-       struct hugepage_subpool *spool =
-               (struct hugepage_subpool *)page_private(page);
+       struct hugepage_subpool *spool = hugetlb_page_subpool(page);
        bool restore_reserve;
 
        VM_BUG_ON_PAGE(page_count(page), page);
        VM_BUG_ON_PAGE(page_mapcount(page), page);
 
-       set_page_private(page, 0);
+       hugetlb_set_page_subpool(page, NULL);
        page->mapping = NULL;
-       restore_reserve = PagePrivate(page);
-       ClearPagePrivate(page);
+       restore_reserve = HPageRestoreReserve(page);
+       ClearHPageRestoreReserve(page);
 
        /*
-        * If PagePrivate() was set on page, page allocation consumed a
+        * If HPageRestoreReserve was set on page, page allocation consumed a
         * reservation.  If the page was associated with a subpool, there
         * would have been a page reserved in the subpool before allocation
         * via hugepage_subpool_get_pages().  Since we are 'restoring' the
-        * reservtion, do not call hugepage_subpool_put_pages() as this will
+        * reservation, do not call hugepage_subpool_put_pages() as this will
         * remove the reserved page from the subpool.
         */
        if (!restore_reserve) {
@@ -1439,7 +1412,7 @@ static void __free_huge_page(struct page *page)
        }
 
        spin_lock(&hugetlb_lock);
-       clear_page_huge_active(page);
+       ClearHPageMigratable(page);
        hugetlb_cgroup_uncharge_page(hstate_index(h),
                                     pages_per_huge_page(h), page);
        hugetlb_cgroup_uncharge_page_rsvd(hstate_index(h),
@@ -1447,9 +1420,9 @@ static void __free_huge_page(struct page *page)
        if (restore_reserve)
                h->resv_huge_pages++;
 
-       if (PageHugeTemporary(page)) {
+       if (HPageTemporary(page)) {
                list_del(&page->lru);
-               ClearPageHugeTemporary(page);
+               ClearHPageTemporary(page);
                update_and_free_page(h, page);
        } else if (h->surplus_huge_pages_node[nid]) {
                /* remove the page from active list */
@@ -1516,12 +1489,13 @@ static void prep_new_huge_page(struct hstate *h, struct page *page, int nid)
 {
        INIT_LIST_HEAD(&page->lru);
        set_compound_page_dtor(page, HUGETLB_PAGE_DTOR);
+       hugetlb_set_page_subpool(page, NULL);
        set_hugetlb_cgroup(page, NULL);
        set_hugetlb_cgroup_rsvd(page, NULL);
        spin_lock(&hugetlb_lock);
        h->nr_huge_pages++;
        h->nr_huge_pages_node[nid]++;
-       ClearPageHugeFreed(page);
+       ClearHPageFreed(page);
        spin_unlock(&hugetlb_lock);
 }
 
@@ -1553,9 +1527,7 @@ static void prep_compound_gigantic_page(struct page *page, unsigned int order)
                set_compound_head(p, page);
        }
        atomic_set(compound_mapcount_ptr(page), -1);
-
-       if (hpage_pincount_available(page))
-               atomic_set(compound_pincount_ptr(page), 0);
+       atomic_set(compound_pincount_ptr(page), 0);
 }
 
 /*
@@ -1794,7 +1766,7 @@ retry:
                 * We should make sure that the page is already on the free list
                 * when it is dissolved.
                 */
-               if (unlikely(!PageHugeFreed(head))) {
+               if (unlikely(!HPageFreed(head))) {
                        spin_unlock(&hugetlb_lock);
                        cond_resched();
 
@@ -1885,7 +1857,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
         * codeflow
         */
        if (h->surplus_huge_pages >= h->nr_overcommit_huge_pages) {
-               SetPageHugeTemporary(page);
+               SetHPageTemporary(page);
                spin_unlock(&hugetlb_lock);
                put_page(page);
                return NULL;
@@ -1916,7 +1888,7 @@ static struct page *alloc_migrate_huge_page(struct hstate *h, gfp_t gfp_mask,
         * We do not account these pages as surplus because they are only
         * temporary and will be released properly on the last reference
         */
-       SetPageHugeTemporary(page);
+       SetHPageTemporary(page);
 
        return page;
 }
@@ -2254,24 +2226,24 @@ static long vma_add_reservation(struct hstate *h,
  * This routine is called to restore a reservation on error paths.  In the
  * specific error paths, a huge page was allocated (via alloc_huge_page)
  * and is about to be freed.  If a reservation for the page existed,
- * alloc_huge_page would have consumed the reservation and set PagePrivate
- * in the newly allocated page.  When the page is freed via free_huge_page,
- * the global reservation count will be incremented if PagePrivate is set.
- * However, free_huge_page can not adjust the reserve map.  Adjust the
- * reserve map here to be consistent with global reserve count adjustments
- * to be made by free_huge_page.
+ * alloc_huge_page would have consumed the reservation and set
+ * HPageRestoreReserve in the newly allocated page.  When the page is freed
+ * via free_huge_page, the global reservation count will be incremented if
+ * HPageRestoreReserve is set.  However, free_huge_page can not adjust the
+ * reserve map.  Adjust the reserve map here to be consistent with global
+ * reserve count adjustments to be made by free_huge_page.
  */
 static void restore_reserve_on_error(struct hstate *h,
                        struct vm_area_struct *vma, unsigned long address,
                        struct page *page)
 {
-       if (unlikely(PagePrivate(page))) {
+       if (unlikely(HPageRestoreReserve(page))) {
                long rc = vma_needs_reservation(h, vma, address);
 
                if (unlikely(rc < 0)) {
                        /*
                         * Rare out of memory condition in reserve map
-                        * manipulation.  Clear PagePrivate so that
+                        * manipulation.  Clear HPageRestoreReserve so that
                         * global reserve count will not be incremented
                         * by free_huge_page.  This will make it appear
                         * as though the reservation for this page was
@@ -2280,7 +2252,7 @@ static void restore_reserve_on_error(struct hstate *h,
                         * is better than inconsistent global huge page
                         * accounting of reserve counts.
                         */
-                       ClearPagePrivate(page);
+                       ClearHPageRestoreReserve(page);
                } else if (rc) {
                        rc = vma_add_reservation(h, vma, address);
                        if (unlikely(rc < 0))
@@ -2288,7 +2260,7 @@ static void restore_reserve_on_error(struct hstate *h,
                                 * See above comment about rare out of
                                 * memory condition.
                                 */
-                               ClearPagePrivate(page);
+                               ClearHPageRestoreReserve(page);
                } else
                        vma_end_reservation(h, vma, address);
        }
@@ -2369,7 +2341,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
                if (!page)
                        goto out_uncharge_cgroup;
                if (!avoid_reserve && vma_has_reserves(vma, gbl_chg)) {
-                       SetPagePrivate(page);
+                       SetHPageRestoreReserve(page);
                        h->resv_huge_pages--;
                }
                spin_lock(&hugetlb_lock);
@@ -2387,7 +2359,7 @@ struct page *alloc_huge_page(struct vm_area_struct *vma,
 
        spin_unlock(&hugetlb_lock);
 
-       set_page_private(page, (unsigned long)spool);
+       hugetlb_set_page_subpool(page, spool);
 
        map_commit = vma_commit_reservation(h, vma, addr);
        if (unlikely(map_chg > map_commit)) {
@@ -2476,7 +2448,7 @@ static void __init gather_bootmem_prealloc(void)
                struct hstate *h = m->hstate;
 
                WARN_ON(page_count(page) != 1);
-               prep_compound_huge_page(page, h->order);
+               prep_compound_huge_page(page, huge_page_order(h));
                WARN_ON(PageReserved(page));
                prep_new_huge_page(h, page, page_to_nid(page));
                put_page(page); /* free it into the hugepage allocator */
@@ -2488,7 +2460,7 @@ static void __init gather_bootmem_prealloc(void)
                 * side-effects, like CommitLimit going negative.
                 */
                if (hstate_is_gigantic(h))
-                       adjust_managed_page_count(page, 1 << h->order);
+                       adjust_managed_page_count(page, pages_per_huge_page(h));
                cond_resched();
        }
 }
@@ -2520,7 +2492,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
                if (hstate_is_gigantic(h)) {
                        if (hugetlb_cma_size) {
                                pr_warn_once("HugeTLB: hugetlb_cma is enabled, skip boot time allocation\n");
-                               break;
+                               goto free;
                        }
                        if (!alloc_bootmem_huge_page(h))
                                break;
@@ -2538,7 +2510,7 @@ static void __init hugetlb_hstate_alloc_pages(struct hstate *h)
                        h->max_huge_pages, buf, i);
                h->max_huge_pages = i;
        }
-
+free:
        kfree(node_alloc_noretry);
 }
 
@@ -2988,8 +2960,10 @@ static int hugetlb_sysfs_add_hstate(struct hstate *h, struct kobject *parent,
                return -ENOMEM;
 
        retval = sysfs_create_group(hstate_kobjs[hi], hstate_attr_group);
-       if (retval)
+       if (retval) {
                kobject_put(hstate_kobjs[hi]);
+               hstate_kobjs[hi] = NULL;
+       }
 
        return retval;
 }
@@ -3159,6 +3133,9 @@ static int __init hugetlb_init(void)
 {
        int i;
 
+       BUILD_BUG_ON(sizeof_field(struct page, private) * BITS_PER_BYTE <
+                       __NR_HPAGEFLAGS);
+
        if (!hugepages_supported()) {
                if (hugetlb_max_hstate || default_hstate_max_huge_pages)
                        pr_warn("HugeTLB: huge pages not supported, ignoring associated command-line parameters\n");
@@ -3239,7 +3216,7 @@ void __init hugetlb_add_hstate(unsigned int order)
        BUG_ON(order == 0);
        h = &hstates[hugetlb_max_hstate++];
        h->order = order;
-       h->mask = ~((1ULL << (order + PAGE_SHIFT)) - 1);
+       h->mask = ~(huge_page_size(h) - 1);
        for (i = 0; i < MAX_NUMNODES; ++i)
                INIT_LIST_HEAD(&h->hugepage_freelists[i]);
        INIT_LIST_HEAD(&h->hugepage_activelist);
@@ -3408,8 +3385,7 @@ static unsigned int allowed_mems_nr(struct hstate *h)
        mpol_allowed = policy_nodemask_current(gfp_mask);
 
        for_each_node_mask(node, cpuset_current_mems_allowed) {
-               if (!mpol_allowed ||
-                   (mpol_allowed && node_isset(node, *mpol_allowed)))
+               if (!mpol_allowed || node_isset(node, *mpol_allowed))
                        nr += array[node];
        }
 
@@ -3515,7 +3491,7 @@ void hugetlb_report_meminfo(struct seq_file *m)
        for_each_hstate(h) {
                unsigned long count = h->nr_huge_pages;
 
-               total += (PAGE_SIZE << huge_page_order(h)) * count;
+               total += huge_page_size(h) * count;
 
                if (h == &default_hstate)
                        seq_printf(m,
@@ -3528,10 +3504,10 @@ void hugetlb_report_meminfo(struct seq_file *m)
                                   h->free_huge_pages,
                                   h->resv_huge_pages,
                                   h->surplus_huge_pages,
-                                  (PAGE_SIZE << huge_page_order(h)) / 1024);
+                                  huge_page_size(h) / SZ_1K);
        }
 
-       seq_printf(m, "Hugetlb:        %8lu kB\n", total / 1024);
+       seq_printf(m, "Hugetlb:        %8lu kB\n", total / SZ_1K);
 }
 
 int hugetlb_report_node_meminfo(char *buf, int len, int nid)
@@ -3565,7 +3541,7 @@ void hugetlb_show_meminfo(void)
                                h->nr_huge_pages_node[nid],
                                h->free_huge_pages_node[nid],
                                h->surplus_huge_pages_node[nid],
-                               1UL << (huge_page_order(h) + PAGE_SHIFT - 10));
+                               huge_page_size(h) / SZ_1K);
 }
 
 void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm)
@@ -3589,6 +3565,9 @@ static int hugetlb_acct_memory(struct hstate *h, long delta)
 {
        int ret = -ENOMEM;
 
+       if (!delta)
+               return 0;
+
        spin_lock(&hugetlb_lock);
        /*
         * When cpuset is configured, it breaks the strict hugetlb page
@@ -3685,15 +3664,13 @@ static int hugetlb_vm_op_split(struct vm_area_struct *vma, unsigned long addr)
 
 static unsigned long hugetlb_vm_op_pagesize(struct vm_area_struct *vma)
 {
-       struct hstate *hstate = hstate_vma(vma);
-
-       return 1UL << huge_page_shift(hstate);
+       return huge_page_size(hstate_vma(vma));
 }
 
 /*
  * We cannot handle pagefaults against hugetlb pages at all.  They cause
  * handle_mm_fault() to try to instantiate regular-sized pages in the
- * hugegpage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
+ * hugepage VMA.  do_page_fault() is supposed to trap this, so BUG is we get
  * this far.
  */
 static vm_fault_t hugetlb_vm_op_fault(struct vm_fault *vmf)
@@ -3772,21 +3749,32 @@ static bool is_hugetlb_entry_hwpoisoned(pte_t pte)
                return false;
 }
 
+static void
+hugetlb_install_page(struct vm_area_struct *vma, pte_t *ptep, unsigned long addr,
+                    struct page *new_page)
+{
+       __SetPageUptodate(new_page);
+       set_huge_pte_at(vma->vm_mm, addr, ptep, make_huge_pte(vma, new_page, 1));
+       hugepage_add_new_anon_rmap(new_page, vma, addr);
+       hugetlb_count_add(pages_per_huge_page(hstate_vma(vma)), vma->vm_mm);
+       ClearHPageRestoreReserve(new_page);
+       SetHPageMigratable(new_page);
+}
+
 int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                            struct vm_area_struct *vma)
 {
        pte_t *src_pte, *dst_pte, entry, dst_entry;
        struct page *ptepage;
        unsigned long addr;
-       int cow;
+       bool cow = is_cow_mapping(vma->vm_flags);
        struct hstate *h = hstate_vma(vma);
        unsigned long sz = huge_page_size(h);
+       unsigned long npages = pages_per_huge_page(h);
        struct address_space *mapping = vma->vm_file->f_mapping;
        struct mmu_notifier_range range;
        int ret = 0;
 
-       cow = (vma->vm_flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
-
        if (cow) {
                mmu_notifier_range_init(&range, MMU_NOTIFY_CLEAR, 0, vma, src,
                                        vma->vm_start,
@@ -3831,6 +3819,7 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
                entry = huge_ptep_get(src_pte);
                dst_entry = huge_ptep_get(dst_pte);
+again:
                if (huge_pte_none(entry) || !huge_pte_none(dst_entry)) {
                        /*
                         * Skip if src entry none.  Also, skip in the
@@ -3854,6 +3843,52 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                        }
                        set_huge_swap_pte_at(dst, addr, dst_pte, entry, sz);
                } else {
+                       entry = huge_ptep_get(src_pte);
+                       ptepage = pte_page(entry);
+                       get_page(ptepage);
+
+                       /*
+                        * This is a rare case where we see pinned hugetlb
+                        * pages while they're prone to COW.  We need to do the
+                        * COW earlier during fork.
+                        *
+                        * When pre-allocating the page or copying data, we
+                        * need to be without the pgtable locks since we could
+                        * sleep during the process.
+                        */
+                       if (unlikely(page_needs_cow_for_dma(vma, ptepage))) {
+                               pte_t src_pte_old = entry;
+                               struct page *new;
+
+                               spin_unlock(src_ptl);
+                               spin_unlock(dst_ptl);
+                               /* Do not use reserve as it's private owned */
+                               new = alloc_huge_page(vma, addr, 1);
+                               if (IS_ERR(new)) {
+                                       put_page(ptepage);
+                                       ret = PTR_ERR(new);
+                                       break;
+                               }
+                               copy_user_huge_page(new, ptepage, addr, vma,
+                                                   npages);
+                               put_page(ptepage);
+
+                               /* Install the new huge page if src pte stable */
+                               dst_ptl = huge_pte_lock(h, dst, dst_pte);
+                               src_ptl = huge_pte_lockptr(h, src, src_pte);
+                               spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING);
+                               entry = huge_ptep_get(src_pte);
+                               if (!pte_same(src_pte_old, entry)) {
+                                       put_page(new);
+                                       /* dst_entry won't change as in child */
+                                       goto again;
+                               }
+                               hugetlb_install_page(vma, dst_pte, addr, new);
+                               spin_unlock(src_ptl);
+                               spin_unlock(dst_ptl);
+                               continue;
+                       }
+
                        if (cow) {
                                /*
                                 * No need to notify as we are downgrading page
@@ -3864,12 +3899,10 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
                                 */
                                huge_ptep_set_wrprotect(src, addr, src_pte);
                        }
-                       entry = huge_ptep_get(src_pte);
-                       ptepage = pte_page(entry);
-                       get_page(ptepage);
+
                        page_dup_rmap(ptepage, true);
                        set_huge_pte_at(dst, addr, dst_pte, entry);
-                       hugetlb_count_add(pages_per_huge_page(h), dst);
+                       hugetlb_count_add(npages, dst);
                }
                spin_unlock(src_ptl);
                spin_unlock(dst_ptl);
@@ -4017,7 +4050,7 @@ void unmap_hugepage_range(struct vm_area_struct *vma, unsigned long start,
 
 /*
  * This is called when the original mapper is failing to COW a MAP_PRIVATE
- * mappping it owns the reserve page for. The intention is to unmap the page
+ * mapping it owns the reserve page for. The intention is to unmap the page
  * from other VMAs and let the children be SIGKILLed if they are faulting the
  * same region.
  */
@@ -4196,7 +4229,7 @@ retry_avoidcopy:
        spin_lock(ptl);
        ptep = huge_pte_offset(mm, haddr, huge_page_size(h));
        if (likely(ptep && pte_same(huge_ptep_get(ptep), pte))) {
-               ClearPagePrivate(new_page);
+               ClearHPageRestoreReserve(new_page);
 
                /* Break COW */
                huge_ptep_clear_flush(vma, haddr, ptep);
@@ -4205,7 +4238,7 @@ retry_avoidcopy:
                                make_huge_pte(vma, new_page, 1));
                page_remove_rmap(old_page, true);
                hugepage_add_new_anon_rmap(new_page, vma, haddr);
-               set_page_huge_active(new_page);
+               SetHPageMigratable(new_page);
                /* Make the old page be freed below */
                new_page = old_page;
        }
@@ -4263,7 +4296,7 @@ int huge_add_to_page_cache(struct page *page, struct address_space *mapping,
 
        if (err)
                return err;
-       ClearPagePrivate(page);
+       ClearHPageRestoreReserve(page);
 
        /*
         * set page dirty so that it will not be removed from cache/file
@@ -4425,7 +4458,7 @@ retry:
                goto backout;
 
        if (anon_rmap) {
-               ClearPagePrivate(page);
+               ClearHPageRestoreReserve(page);
                hugepage_add_new_anon_rmap(page, vma, haddr);
        } else
                page_dup_rmap(page, true);
@@ -4442,12 +4475,12 @@ retry:
        spin_unlock(ptl);
 
        /*
-        * Only make newly allocated pages active.  Existing pages found
-        * in the pagecache could be !page_huge_active() if they have been
-        * isolated for migration.
+        * Only set HPageMigratable in newly allocated pages.  Existing pages
+        * found in the pagecache may not have HPageMigratableset if they have
+        * been isolated for migration.
         */
        if (new_page)
-               set_page_huge_active(page);
+               SetHPageMigratable(page);
 
        unlock_page(page);
 out:
@@ -4477,7 +4510,7 @@ u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
 }
 #else
 /*
- * For uniprocesor systems we always use a single mutex, so just
+ * For uniprocessor systems we always use a single mutex, so just
  * return 0 and avoid the hashing overhead.
  */
 u32 hugetlb_fault_mutex_hash(struct address_space *mapping, pgoff_t idx)
@@ -4739,7 +4772,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
        if (vm_shared) {
                page_dup_rmap(page, true);
        } else {
-               ClearPagePrivate(page);
+               ClearHPageRestoreReserve(page);
                hugepage_add_new_anon_rmap(page, dst_vma, dst_addr);
        }
 
@@ -4758,7 +4791,7 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
        update_mmu_cache(dst_vma, dst_addr, dst_pte);
 
        spin_unlock(ptl);
-       set_page_huge_active(page);
+       SetHPageMigratable(page);
        if (vm_shared)
                unlock_page(page);
        ret = 0;
@@ -4773,6 +4806,20 @@ out_release_nounlock:
        goto out;
 }
 
+static void record_subpages_vmas(struct page *page, struct vm_area_struct *vma,
+                                int refs, struct page **pages,
+                                struct vm_area_struct **vmas)
+{
+       int nr;
+
+       for (nr = 0; nr < refs; nr++) {
+               if (likely(pages))
+                       pages[nr] = mem_map_offset(page, nr);
+               if (vmas)
+                       vmas[nr] = vma;
+       }
+}
+
 long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                         struct page **pages, struct vm_area_struct **vmas,
                         unsigned long *position, unsigned long *nr_pages,
@@ -4782,7 +4829,7 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
        unsigned long vaddr = *position;
        unsigned long remainder = *nr_pages;
        struct hstate *h = hstate_vma(vma);
-       int err = -EFAULT;
+       int err = -EFAULT, refs;
 
        while (vaddr < vma->vm_end && remainder) {
                pte_t *pte;
@@ -4902,20 +4949,29 @@ long follow_hugetlb_page(struct mm_struct *mm, struct vm_area_struct *vma,
                        continue;
                }
 
-same_page:
+               refs = min3(pages_per_huge_page(h) - pfn_offset,
+                           (vma->vm_end - vaddr) >> PAGE_SHIFT, remainder);
+
+               if (pages || vmas)
+                       record_subpages_vmas(mem_map_offset(page, pfn_offset),
+                                            vma, refs,
+                                            likely(pages) ? pages + i : NULL,
+                                            vmas ? vmas + i : NULL);
+
                if (pages) {
-                       pages[i] = mem_map_offset(page, pfn_offset);
                        /*
-                        * try_grab_page() should always succeed here, because:
-                        * a) we hold the ptl lock, and b) we've just checked
-                        * that the huge page is present in the page tables. If
-                        * the huge page is present, then the tail pages must
-                        * also be present. The ptl prevents the head page and
-                        * tail pages from being rearranged in any way. So this
-                        * page must be available at this point, unless the page
-                        * refcount overflowed:
+                        * try_grab_compound_head() should always succeed here,
+                        * because: a) we hold the ptl lock, and b) we've just
+                        * checked that the huge page is present in the page
+                        * tables. If the huge page is present, then the tail
+                        * pages must also be present. The ptl prevents the
+                        * head page and tail pages from being rearranged in
+                        * any way. So this page must be available at this
+                        * point, unless the page refcount overflowed:
                         */
-                       if (WARN_ON_ONCE(!try_grab_page(pages[i], flags))) {
+                       if (WARN_ON_ONCE(!try_grab_compound_head(pages[i],
+                                                                refs,
+                                                                flags))) {
                                spin_unlock(ptl);
                                remainder = 0;
                                err = -ENOMEM;
@@ -4923,21 +4979,10 @@ same_page:
                        }
                }
 
-               if (vmas)
-                       vmas[i] = vma;
-
-               vaddr += PAGE_SIZE;
-               ++pfn_offset;
-               --remainder;
-               ++i;
-               if (vaddr < vma->vm_end && remainder &&
-                               pfn_offset < pages_per_huge_page(h)) {
-                       /*
-                        * We use pfn_offset to avoid touching the pageframes
-                        * of this compound page.
-                        */
-                       goto same_page;
-               }
+               vaddr += (refs << PAGE_SHIFT);
+               remainder -= refs;
+               i += refs;
+
                spin_unlock(ptl);
        }
        *nr_pages = remainder;
@@ -5051,12 +5096,13 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
        return pages << h->order;
 }
 
-int hugetlb_reserve_pages(struct inode *inode,
+/* Return true if reservation was successful, false otherwise.  */
+bool hugetlb_reserve_pages(struct inode *inode,
                                        long from, long to,
                                        struct vm_area_struct *vma,
                                        vm_flags_t vm_flags)
 {
-       long ret, chg, add = -1;
+       long chg, add = -1;
        struct hstate *h = hstate_inode(inode);
        struct hugepage_subpool *spool = subpool_inode(inode);
        struct resv_map *resv_map;
@@ -5066,7 +5112,7 @@ int hugetlb_reserve_pages(struct inode *inode,
        /* This should never happen */
        if (from > to) {
                VM_WARN(1, "%s called with a negative range\n", __func__);
-               return -EINVAL;
+               return false;
        }
 
        /*
@@ -5075,7 +5121,7 @@ int hugetlb_reserve_pages(struct inode *inode,
         * without using reserves
         */
        if (vm_flags & VM_NORESERVE)
-               return 0;
+               return true;
 
        /*
         * Shared mappings base their reservation on the number of pages that
@@ -5097,7 +5143,7 @@ int hugetlb_reserve_pages(struct inode *inode,
                /* Private mapping. */
                resv_map = resv_map_alloc();
                if (!resv_map)
-                       return -ENOMEM;
+                       return false;
 
                chg = to - from;
 
@@ -5105,18 +5151,12 @@ int hugetlb_reserve_pages(struct inode *inode,
                set_vma_resv_flags(vma, HPAGE_RESV_OWNER);
        }
 
-       if (chg < 0) {
-               ret = chg;
+       if (chg < 0)
                goto out_err;
-       }
 
-       ret = hugetlb_cgroup_charge_cgroup_rsvd(
-               hstate_index(h), chg * pages_per_huge_page(h), &h_cg);
-
-       if (ret < 0) {
-               ret = -ENOMEM;
+       if (hugetlb_cgroup_charge_cgroup_rsvd(hstate_index(h),
+                               chg * pages_per_huge_page(h), &h_cg) < 0)
                goto out_err;
-       }
 
        if (vma && !(vma->vm_flags & VM_MAYSHARE) && h_cg) {
                /* For private mappings, the hugetlb_cgroup uncharge info hangs
@@ -5131,19 +5171,15 @@ int hugetlb_reserve_pages(struct inode *inode,
         * reservations already in place (gbl_reserve).
         */
        gbl_reserve = hugepage_subpool_get_pages(spool, chg);
-       if (gbl_reserve < 0) {
-               ret = -ENOSPC;
+       if (gbl_reserve < 0)
                goto out_uncharge_cgroup;
-       }
 
        /*
         * Check enough hugepages are available for the reservation.
         * Hand the pages back to the subpool if there are not
         */
-       ret = hugetlb_acct_memory(h, gbl_reserve);
-       if (ret < 0) {
+       if (hugetlb_acct_memory(h, gbl_reserve) < 0)
                goto out_put_pages;
-       }
 
        /*
         * Account for the reservations made. Shared mappings record regions
@@ -5161,7 +5197,6 @@ int hugetlb_reserve_pages(struct inode *inode,
 
                if (unlikely(add < 0)) {
                        hugetlb_acct_memory(h, -gbl_reserve);
-                       ret = add;
                        goto out_put_pages;
                } else if (unlikely(chg > add)) {
                        /*
@@ -5173,6 +5208,10 @@ int hugetlb_reserve_pages(struct inode *inode,
                         */
                        long rsv_adjust;
 
+                       /*
+                        * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the
+                        * reference to h_cg->css. See comment below for detail.
+                        */
                        hugetlb_cgroup_uncharge_cgroup_rsvd(
                                hstate_index(h),
                                (chg - add) * pages_per_huge_page(h), h_cg);
@@ -5180,9 +5219,18 @@ int hugetlb_reserve_pages(struct inode *inode,
                        rsv_adjust = hugepage_subpool_put_pages(spool,
                                                                chg - add);
                        hugetlb_acct_memory(h, -rsv_adjust);
+               } else if (h_cg) {
+                       /*
+                        * The file_regions will hold their own reference to
+                        * h_cg->css. So we should release the reference held
+                        * via hugetlb_cgroup_charge_cgroup_rsvd() when we are
+                        * done.
+                        */
+                       hugetlb_cgroup_put_rsvd_cgroup(h_cg);
                }
        }
-       return 0;
+       return true;
+
 out_put_pages:
        /* put back original number of pages, chg */
        (void)hugepage_subpool_put_pages(spool, chg);
@@ -5198,7 +5246,7 @@ out_err:
                        region_abort(resv_map, from, to, regions_needed);
        if (vma && is_vma_resv_set(vma, HPAGE_RESV_OWNER))
                kref_put(&resv_map->refs, resv_map_release);
-       return ret;
+       return false;
 }
 
 long hugetlb_unreserve_pages(struct inode *inode, long start, long end,
@@ -5259,7 +5307,7 @@ static unsigned long page_table_shareable(struct vm_area_struct *svma,
         */
        if (pmd_index(addr) != pmd_index(saddr) ||
            vm_flags != svm_flags ||
-           sbase < svma->vm_start || svma->vm_end < s_end)
+           !range_in_vma(svma, sbase, s_end))
                return 0;
 
        return saddr;
@@ -5286,21 +5334,23 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr)
 void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma,
                                unsigned long *start, unsigned long *end)
 {
-       unsigned long a_start, a_end;
+       unsigned long v_start = ALIGN(vma->vm_start, PUD_SIZE),
+               v_end = ALIGN_DOWN(vma->vm_end, PUD_SIZE);
 
-       if (!(vma->vm_flags & VM_MAYSHARE))
+       /*
+        * vma need span at least one aligned PUD size and the start,end range
+        * must at least partialy within it.
+        */
+       if (!(vma->vm_flags & VM_MAYSHARE) || !(v_end > v_start) ||
+               (*end <= v_start) || (*start >= v_end))
                return;
 
        /* Extend the range to be PUD aligned for a worst case scenario */
-       a_start = ALIGN_DOWN(*start, PUD_SIZE);
-       a_end = ALIGN(*end, PUD_SIZE);
+       if (*start > v_start)
+               *start = ALIGN_DOWN(*start, PUD_SIZE);
 
-       /*
-        * Intersect the range with the vma range, since pmd sharing won't be
-        * across vma after all
-        */
-       *start = max(vma->vm_start, a_start);
-       *end = min(vma->vm_end, a_end);
+       if (*end < v_end)
+               *end = ALIGN(*end, PUD_SIZE);
 }
 
 /*
@@ -5583,12 +5633,13 @@ bool isolate_huge_page(struct page *page, struct list_head *list)
        bool ret = true;
 
        spin_lock(&hugetlb_lock);
-       if (!PageHeadHuge(page) || !page_huge_active(page) ||
+       if (!PageHeadHuge(page) ||
+           !HPageMigratable(page) ||
            !get_page_unless_zero(page)) {
                ret = false;
                goto unlock;
        }
-       clear_page_huge_active(page);
+       ClearHPageMigratable(page);
        list_move_tail(&page->lru, list);
 unlock:
        spin_unlock(&hugetlb_lock);
@@ -5597,9 +5648,8 @@ unlock:
 
 void putback_active_hugepage(struct page *page)
 {
-       VM_BUG_ON_PAGE(!PageHead(page), page);
        spin_lock(&hugetlb_lock);
-       set_page_huge_active(page);
+       SetHPageMigratable(page);
        list_move_tail(&page->lru, &(page_hstate(page))->hugepage_activelist);
        spin_unlock(&hugetlb_lock);
        put_page(page);
@@ -5622,12 +5672,12 @@ void move_hugetlb_state(struct page *oldpage, struct page *newpage, int reason)
         * here as well otherwise the global surplus count will not match
         * the per-node's.
         */
-       if (PageHugeTemporary(newpage)) {
+       if (HPageTemporary(newpage)) {
                int old_nid = page_to_nid(oldpage);
                int new_nid = page_to_nid(newpage);
 
-               SetPageHugeTemporary(oldpage);
-               ClearPageHugeTemporary(newpage);
+               SetHPageTemporary(oldpage);
+               ClearHPageTemporary(newpage);
 
                spin_lock(&hugetlb_lock);
                if (h->surplus_huge_pages_node[old_nid]) {
index 9182848..603a131 100644 (file)
@@ -113,7 +113,7 @@ static void hugetlb_cgroup_init(struct hugetlb_cgroup *h_cgroup,
                        rsvd_parent);
 
                limit = round_down(PAGE_COUNTER_MAX,
-                                  1 << huge_page_order(&hstates[idx]));
+                                  pages_per_huge_page(&hstates[idx]));
 
                ret = page_counter_set_max(
                        hugetlb_cgroup_counter_from_cgroup(h_cgroup, idx),
@@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start,
 
 void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
                                         struct file_region *rg,
-                                        unsigned long nr_pages)
+                                        unsigned long nr_pages,
+                                        bool region_del)
 {
        if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages)
                return;
@@ -400,7 +401,12 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv,
            !resv->reservation_counter) {
                page_counter_uncharge(rg->reservation_counter,
                                      nr_pages * resv->pages_per_hpage);
-               css_put(rg->css);
+               /*
+                * Only do css_put(rg->css) when we delete the entire region
+                * because one file_region must hold exactly one css reference.
+                */
+               if (region_del)
+                       css_put(rg->css);
        }
 }
 
@@ -460,7 +466,7 @@ static int hugetlb_cgroup_read_u64_max(struct seq_file *seq, void *v)
        counter = &h_cg->hugepage[idx];
 
        limit = round_down(PAGE_COUNTER_MAX,
-                          1 << huge_page_order(&hstates[idx]));
+                          pages_per_huge_page(&hstates[idx]));
 
        switch (MEMFILE_ATTR(cft->private)) {
        case RES_RSVD_USAGE:
@@ -507,7 +513,7 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
                return ret;
 
        idx = MEMFILE_IDX(of_cft(of)->private);
-       nr_pages = round_down(nr_pages, 1 << huge_page_order(&hstates[idx]));
+       nr_pages = round_down(nr_pages, pages_per_huge_page(&hstates[idx]));
 
        switch (MEMFILE_ATTR(of_cft(of)->private)) {
        case RES_RSVD_LIMIT:
index 25d2b24..1432fee 100644 (file)
@@ -60,8 +60,8 @@ static inline void force_page_cache_readahead(struct address_space *mapping,
        force_page_cache_ra(&ractl, &file->f_ra, nr_to_read);
 }
 
-struct page *find_get_entry(struct address_space *mapping, pgoff_t index);
-struct page *find_lock_entry(struct address_space *mapping, pgoff_t index);
+unsigned find_lock_entries(struct address_space *mapping, pgoff_t start,
+               pgoff_t end, struct pagevec *pvec, pgoff_t *indices);
 
 /**
  * page_evictable - test whether a page is evictable
@@ -296,11 +296,6 @@ static inline unsigned int buddy_order(struct page *page)
  */
 #define buddy_order_unsafe(page)       READ_ONCE(page_private(page))
 
-static inline bool is_cow_mapping(vm_flags_t flags)
-{
-       return (flags & (VM_SHARED | VM_MAYWRITE)) == VM_MAYWRITE;
-}
-
 /*
  * These three helpers classifies VMAs for virtual memory accounting.
  */
index b251676..b5e08d4 100644 (file)
@@ -60,7 +60,7 @@ void kasan_disable_current(void)
 
 void __kasan_unpoison_range(const void *address, size_t size)
 {
-       unpoison_range(address, size);
+       kasan_unpoison(address, size);
 }
 
 #if CONFIG_KASAN_STACK
@@ -69,7 +69,7 @@ void kasan_unpoison_task_stack(struct task_struct *task)
 {
        void *base = task_stack_page(task);
 
-       unpoison_range(base, THREAD_SIZE);
+       kasan_unpoison(base, THREAD_SIZE);
 }
 
 /* Unpoison the stack for the current task beyond a watermark sp value. */
@@ -82,7 +82,7 @@ asmlinkage void kasan_unpoison_task_stack_below(const void *watermark)
         */
        void *base = (void *)((unsigned long)watermark & ~(THREAD_SIZE - 1));
 
-       unpoison_range(base, watermark - base);
+       kasan_unpoison(base, watermark - base);
 }
 #endif /* CONFIG_KASAN_STACK */
 
@@ -105,18 +105,17 @@ void __kasan_alloc_pages(struct page *page, unsigned int order)
        if (unlikely(PageHighMem(page)))
                return;
 
-       tag = random_tag();
+       tag = kasan_random_tag();
        for (i = 0; i < (1 << order); i++)
                page_kasan_tag_set(page + i, tag);
-       unpoison_range(page_address(page), PAGE_SIZE << order);
+       kasan_unpoison(page_address(page), PAGE_SIZE << order);
 }
 
 void __kasan_free_pages(struct page *page, unsigned int order)
 {
        if (likely(!PageHighMem(page)))
-               poison_range(page_address(page),
-                               PAGE_SIZE << order,
-                               KASAN_FREE_PAGE);
+               kasan_poison(page_address(page), PAGE_SIZE << order,
+                            KASAN_FREE_PAGE);
 }
 
 /*
@@ -211,6 +210,11 @@ void __kasan_cache_create(struct kmem_cache *cache, unsigned int *size,
                *size = optimal_size;
 }
 
+void __kasan_cache_create_kmalloc(struct kmem_cache *cache)
+{
+       cache->kasan_info.is_kmalloc = true;
+}
+
 size_t __kasan_metadata_size(struct kmem_cache *cache)
 {
        if (!kasan_stack_collection_enabled())
@@ -246,18 +250,19 @@ void __kasan_poison_slab(struct page *page)
 
        for (i = 0; i < compound_nr(page); i++)
                page_kasan_tag_reset(page + i);
-       poison_range(page_address(page), page_size(page),
+       kasan_poison(page_address(page), page_size(page),
                     KASAN_KMALLOC_REDZONE);
 }
 
 void __kasan_unpoison_object_data(struct kmem_cache *cache, void *object)
 {
-       unpoison_range(object, cache->object_size);
+       kasan_unpoison(object, cache->object_size);
 }
 
 void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
 {
-       poison_range(object, cache->object_size, KASAN_KMALLOC_REDZONE);
+       kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
+                       KASAN_KMALLOC_REDZONE);
 }
 
 /*
@@ -274,27 +279,18 @@ void __kasan_poison_object_data(struct kmem_cache *cache, void *object)
  *    based on objects indexes, so that objects that are next to each other
  *    get different tags.
  */
-static u8 assign_tag(struct kmem_cache *cache, const void *object,
-                       bool init, bool keep_tag)
+static inline u8 assign_tag(struct kmem_cache *cache,
+                                       const void *object, bool init)
 {
        if (IS_ENABLED(CONFIG_KASAN_GENERIC))
                return 0xff;
 
        /*
-        * 1. When an object is kmalloc()'ed, two hooks are called:
-        *    kasan_slab_alloc() and kasan_kmalloc(). We assign the
-        *    tag only in the first one.
-        * 2. We reuse the same tag for krealloc'ed objects.
-        */
-       if (keep_tag)
-               return get_tag(object);
-
-       /*
         * If the cache neither has a constructor nor has SLAB_TYPESAFE_BY_RCU
         * set, assign a tag when the object is being allocated (init == false).
         */
        if (!cache->ctor && !(cache->flags & SLAB_TYPESAFE_BY_RCU))
-               return init ? KASAN_TAG_KERNEL : random_tag();
+               return init ? KASAN_TAG_KERNEL : kasan_random_tag();
 
        /* For caches that either have a constructor or SLAB_TYPESAFE_BY_RCU: */
 #ifdef CONFIG_SLAB
@@ -305,7 +301,7 @@ static u8 assign_tag(struct kmem_cache *cache, const void *object,
         * For SLUB assign a random tag during slab creation, otherwise reuse
         * the already assigned tag.
         */
-       return init ? random_tag() : get_tag(object);
+       return init ? kasan_random_tag() : get_tag(object);
 #endif
 }
 
@@ -321,13 +317,13 @@ void * __must_check __kasan_init_slab_obj(struct kmem_cache *cache,
        }
 
        /* Tag is ignored in set_tag() without CONFIG_KASAN_SW/HW_TAGS */
-       object = set_tag(object, assign_tag(cache, object, true, false));
+       object = set_tag(object, assign_tag(cache, object, true));
 
        return (void *)object;
 }
 
-static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
-                             unsigned long ip, bool quarantine)
+static inline bool ____kasan_slab_free(struct kmem_cache *cache,
+                               void *object, unsigned long ip, bool quarantine)
 {
        u8 tag;
        void *tagged_object;
@@ -336,6 +332,9 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
        tagged_object = object;
        object = kasan_reset_tag(object);
 
+       if (is_kfence_address(object))
+               return false;
+
        if (unlikely(nearest_obj(cache, virt_to_head_page(object), object) !=
            object)) {
                kasan_report_invalid_free(tagged_object, ip);
@@ -346,22 +345,21 @@ static bool ____kasan_slab_free(struct kmem_cache *cache, void *object,
        if (unlikely(cache->flags & SLAB_TYPESAFE_BY_RCU))
                return false;
 
-       if (check_invalid_free(tagged_object)) {
+       if (!kasan_byte_accessible(tagged_object)) {
                kasan_report_invalid_free(tagged_object, ip);
                return true;
        }
 
-       poison_range(object, cache->object_size, KASAN_KMALLOC_FREE);
-
-       if (!kasan_stack_collection_enabled())
-               return false;
+       kasan_poison(object, round_up(cache->object_size, KASAN_GRANULE_SIZE),
+                       KASAN_KMALLOC_FREE);
 
        if ((IS_ENABLED(CONFIG_KASAN_GENERIC) && !quarantine))
                return false;
 
-       kasan_set_free_info(cache, object, tag);
+       if (kasan_stack_collection_enabled())
+               kasan_set_free_info(cache, object, tag);
 
-       return quarantine_put(cache, object);
+       return kasan_quarantine_put(cache, object);
 }
 
 bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
@@ -369,6 +367,31 @@ bool __kasan_slab_free(struct kmem_cache *cache, void *object, unsigned long ip)
        return ____kasan_slab_free(cache, object, ip, true);
 }
 
+static inline bool ____kasan_kfree_large(void *ptr, unsigned long ip)
+{
+       if (ptr != page_address(virt_to_head_page(ptr))) {
+               kasan_report_invalid_free(ptr, ip);
+               return true;
+       }
+
+       if (!kasan_byte_accessible(ptr)) {
+               kasan_report_invalid_free(ptr, ip);
+               return true;
+       }
+
+       /*
+        * The object will be poisoned by kasan_free_pages() or
+        * kasan_slab_free_mempool().
+        */
+
+       return false;
+}
+
+void __kasan_kfree_large(void *ptr, unsigned long ip)
+{
+       ____kasan_kfree_large(ptr, ip);
+}
+
 void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
 {
        struct page *page;
@@ -382,88 +405,147 @@ void __kasan_slab_free_mempool(void *ptr, unsigned long ip)
         * KMALLOC_MAX_SIZE, and kmalloc falls back onto page_alloc.
         */
        if (unlikely(!PageSlab(page))) {
-               if (ptr != page_address(page)) {
-                       kasan_report_invalid_free(ptr, ip);
+               if (____kasan_kfree_large(ptr, ip))
                        return;
-               }
-               poison_range(ptr, page_size(page), KASAN_FREE_PAGE);
+               kasan_poison(ptr, page_size(page), KASAN_FREE_PAGE);
        } else {
                ____kasan_slab_free(page->slab_cache, ptr, ip, false);
        }
 }
 
-static void set_alloc_info(struct kmem_cache *cache, void *object, gfp_t flags)
+static void set_alloc_info(struct kmem_cache *cache, void *object,
+                               gfp_t flags, bool is_kmalloc)
 {
        struct kasan_alloc_meta *alloc_meta;
 
+       /* Don't save alloc info for kmalloc caches in kasan_slab_alloc(). */
+       if (cache->kasan_info.is_kmalloc && !is_kmalloc)
+               return;
+
        alloc_meta = kasan_get_alloc_meta(cache, object);
        if (alloc_meta)
                kasan_set_track(&alloc_meta->alloc_track, flags);
 }
 
-static void *____kasan_kmalloc(struct kmem_cache *cache, const void *object,
-                               size_t size, gfp_t flags, bool keep_tag)
+void * __must_check __kasan_slab_alloc(struct kmem_cache *cache,
+                                       void *object, gfp_t flags)
 {
-       unsigned long redzone_start;
-       unsigned long redzone_end;
        u8 tag;
+       void *tagged_object;
 
        if (gfpflags_allow_blocking(flags))
-               quarantine_reduce();
+               kasan_quarantine_reduce();
 
        if (unlikely(object == NULL))
                return NULL;
 
-       redzone_start = round_up((unsigned long)(object + size),
-                               KASAN_GRANULE_SIZE);
-       redzone_end = round_up((unsigned long)object + cache->object_size,
-                               KASAN_GRANULE_SIZE);
-       tag = assign_tag(cache, object, false, keep_tag);
+       if (is_kfence_address(object))
+               return (void *)object;
 
-       /* Tag is ignored in set_tag without CONFIG_KASAN_SW/HW_TAGS */
-       unpoison_range(set_tag(object, tag), size);
-       poison_range((void *)redzone_start, redzone_end - redzone_start,
-                    KASAN_KMALLOC_REDZONE);
+       /*
+        * Generate and assign random tag for tag-based modes.
+        * Tag is ignored in set_tag() for the generic mode.
+        */
+       tag = assign_tag(cache, object, false);
+       tagged_object = set_tag(object, tag);
+
+       /*
+        * Unpoison the whole object.
+        * For kmalloc() allocations, kasan_kmalloc() will do precise poisoning.
+        */
+       kasan_unpoison(tagged_object, cache->object_size);
 
+       /* Save alloc info (if possible) for non-kmalloc() allocations. */
        if (kasan_stack_collection_enabled())
-               set_alloc_info(cache, (void *)object, flags);
+               set_alloc_info(cache, (void *)object, flags, false);
 
-       return set_tag(object, tag);
+       return tagged_object;
 }
 
-void * __must_check __kasan_slab_alloc(struct kmem_cache *cache,
-                                       void *object, gfp_t flags)
+static inline void *____kasan_kmalloc(struct kmem_cache *cache,
+                               const void *object, size_t size, gfp_t flags)
 {
-       return ____kasan_kmalloc(cache, object, cache->object_size, flags, false);
+       unsigned long redzone_start;
+       unsigned long redzone_end;
+
+       if (gfpflags_allow_blocking(flags))
+               kasan_quarantine_reduce();
+
+       if (unlikely(object == NULL))
+               return NULL;
+
+       if (is_kfence_address(kasan_reset_tag(object)))
+               return (void *)object;
+
+       /*
+        * The object has already been unpoisoned by kasan_slab_alloc() for
+        * kmalloc() or by kasan_krealloc() for krealloc().
+        */
+
+       /*
+        * The redzone has byte-level precision for the generic mode.
+        * Partially poison the last object granule to cover the unaligned
+        * part of the redzone.
+        */
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               kasan_poison_last_granule((void *)object, size);
+
+       /* Poison the aligned part of the redzone. */
+       redzone_start = round_up((unsigned long)(object + size),
+                               KASAN_GRANULE_SIZE);
+       redzone_end = round_up((unsigned long)(object + cache->object_size),
+                               KASAN_GRANULE_SIZE);
+       kasan_poison((void *)redzone_start, redzone_end - redzone_start,
+                          KASAN_KMALLOC_REDZONE);
+
+       /*
+        * Save alloc info (if possible) for kmalloc() allocations.
+        * This also rewrites the alloc info when called from kasan_krealloc().
+        */
+       if (kasan_stack_collection_enabled())
+               set_alloc_info(cache, (void *)object, flags, true);
+
+       /* Keep the tag that was set by kasan_slab_alloc(). */
+       return (void *)object;
 }
 
 void * __must_check __kasan_kmalloc(struct kmem_cache *cache, const void *object,
                                        size_t size, gfp_t flags)
 {
-       return ____kasan_kmalloc(cache, object, size, flags, true);
+       return ____kasan_kmalloc(cache, object, size, flags);
 }
 EXPORT_SYMBOL(__kasan_kmalloc);
 
 void * __must_check __kasan_kmalloc_large(const void *ptr, size_t size,
                                                gfp_t flags)
 {
-       struct page *page;
        unsigned long redzone_start;
        unsigned long redzone_end;
 
        if (gfpflags_allow_blocking(flags))
-               quarantine_reduce();
+               kasan_quarantine_reduce();
 
        if (unlikely(ptr == NULL))
                return NULL;
 
-       page = virt_to_page(ptr);
+       /*
+        * The object has already been unpoisoned by kasan_alloc_pages() for
+        * alloc_pages() or by kasan_krealloc() for krealloc().
+        */
+
+       /*
+        * The redzone has byte-level precision for the generic mode.
+        * Partially poison the last object granule to cover the unaligned
+        * part of the redzone.
+        */
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               kasan_poison_last_granule(ptr, size);
+
+       /* Poison the aligned part of the redzone. */
        redzone_start = round_up((unsigned long)(ptr + size),
                                KASAN_GRANULE_SIZE);
-       redzone_end = (unsigned long)ptr + page_size(page);
-
-       unpoison_range(ptr, size);
-       poison_range((void *)redzone_start, redzone_end - redzone_start,
+       redzone_end = (unsigned long)ptr + page_size(virt_to_page(ptr));
+       kasan_poison((void *)redzone_start, redzone_end - redzone_start,
                     KASAN_PAGE_REDZONE);
 
        return (void *)ptr;
@@ -476,18 +558,27 @@ void * __must_check __kasan_krealloc(const void *object, size_t size, gfp_t flag
        if (unlikely(object == ZERO_SIZE_PTR))
                return (void *)object;
 
+       /*
+        * Unpoison the object's data.
+        * Part of it might already have been unpoisoned, but it's unknown
+        * how big that part is.
+        */
+       kasan_unpoison(object, size);
+
        page = virt_to_head_page(object);
 
+       /* Piggy-back on kmalloc() instrumentation to poison the redzone. */
        if (unlikely(!PageSlab(page)))
                return __kasan_kmalloc_large(object, size, flags);
        else
-               return ____kasan_kmalloc(page->slab_cache, object, size,
-                                               flags, true);
+               return ____kasan_kmalloc(page->slab_cache, object, size, flags);
 }
 
-void __kasan_kfree_large(void *ptr, unsigned long ip)
+bool __kasan_check_byte(const void *address, unsigned long ip)
 {
-       if (ptr != page_address(virt_to_head_page(ptr)))
-               kasan_report_invalid_free(ptr, ip);
-       /* The object will be poisoned by kasan_free_pages(). */
+       if (!kasan_byte_accessible(address)) {
+               kasan_report((unsigned long)address, 1, false, ip);
+               return false;
+       }
+       return true;
 }
index 5106b84..2e55e0f 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/init.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
+#include <linux/kfence.h>
 #include <linux/kmemleak.h>
 #include <linux/linkage.h>
 #include <linux/memblock.h>
@@ -158,7 +159,7 @@ static __always_inline bool memory_is_poisoned(unsigned long addr, size_t size)
        return memory_is_poisoned_n(addr, size);
 }
 
-static __always_inline bool check_memory_region_inline(unsigned long addr,
+static __always_inline bool check_region_inline(unsigned long addr,
                                                size_t size, bool write,
                                                unsigned long ret_ip)
 {
@@ -179,37 +180,37 @@ static __always_inline bool check_memory_region_inline(unsigned long addr,
        return !kasan_report(addr, size, write, ret_ip);
 }
 
-bool check_memory_region(unsigned long addr, size_t size, bool write,
-                               unsigned long ret_ip)
+bool kasan_check_range(unsigned long addr, size_t size, bool write,
+                                       unsigned long ret_ip)
 {
-       return check_memory_region_inline(addr, size, write, ret_ip);
+       return check_region_inline(addr, size, write, ret_ip);
 }
 
-bool check_invalid_free(void *addr)
+bool kasan_byte_accessible(const void *addr)
 {
        s8 shadow_byte = READ_ONCE(*(s8 *)kasan_mem_to_shadow(addr));
 
-       return shadow_byte < 0 || shadow_byte >= KASAN_GRANULE_SIZE;
+       return shadow_byte >= 0 && shadow_byte < KASAN_GRANULE_SIZE;
 }
 
 void kasan_cache_shrink(struct kmem_cache *cache)
 {
-       quarantine_remove_cache(cache);
+       kasan_quarantine_remove_cache(cache);
 }
 
 void kasan_cache_shutdown(struct kmem_cache *cache)
 {
        if (!__kmem_cache_empty(cache))
-               quarantine_remove_cache(cache);
+               kasan_quarantine_remove_cache(cache);
 }
 
 static void register_global(struct kasan_global *global)
 {
        size_t aligned_size = round_up(global->size, KASAN_GRANULE_SIZE);
 
-       unpoison_range(global->beg, global->size);
+       kasan_unpoison(global->beg, global->size);
 
-       poison_range(global->beg + aligned_size,
+       kasan_poison(global->beg + aligned_size,
                     global->size_with_redzone - aligned_size,
                     KASAN_GLOBAL_REDZONE);
 }
@@ -231,7 +232,7 @@ EXPORT_SYMBOL(__asan_unregister_globals);
 #define DEFINE_ASAN_LOAD_STORE(size)                                   \
        void __asan_load##size(unsigned long addr)                      \
        {                                                               \
-               check_memory_region_inline(addr, size, false, _RET_IP_);\
+               check_region_inline(addr, size, false, _RET_IP_);       \
        }                                                               \
        EXPORT_SYMBOL(__asan_load##size);                               \
        __alias(__asan_load##size)                                      \
@@ -239,7 +240,7 @@ EXPORT_SYMBOL(__asan_unregister_globals);
        EXPORT_SYMBOL(__asan_load##size##_noabort);                     \
        void __asan_store##size(unsigned long addr)                     \
        {                                                               \
-               check_memory_region_inline(addr, size, true, _RET_IP_); \
+               check_region_inline(addr, size, true, _RET_IP_);        \
        }                                                               \
        EXPORT_SYMBOL(__asan_store##size);                              \
        __alias(__asan_store##size)                                     \
@@ -254,7 +255,7 @@ DEFINE_ASAN_LOAD_STORE(16);
 
 void __asan_loadN(unsigned long addr, size_t size)
 {
-       check_memory_region(addr, size, false, _RET_IP_);
+       kasan_check_range(addr, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__asan_loadN);
 
@@ -264,7 +265,7 @@ EXPORT_SYMBOL(__asan_loadN_noabort);
 
 void __asan_storeN(unsigned long addr, size_t size)
 {
-       check_memory_region(addr, size, true, _RET_IP_);
+       kasan_check_range(addr, size, true, _RET_IP_);
 }
 EXPORT_SYMBOL(__asan_storeN);
 
@@ -290,11 +291,11 @@ void __asan_alloca_poison(unsigned long addr, size_t size)
 
        WARN_ON(!IS_ALIGNED(addr, KASAN_ALLOCA_REDZONE_SIZE));
 
-       unpoison_range((const void *)(addr + rounded_down_size),
-                      size - rounded_down_size);
-       poison_range(left_redzone, KASAN_ALLOCA_REDZONE_SIZE,
+       kasan_unpoison((const void *)(addr + rounded_down_size),
+                       size - rounded_down_size);
+       kasan_poison(left_redzone, KASAN_ALLOCA_REDZONE_SIZE,
                     KASAN_ALLOCA_LEFT);
-       poison_range(right_redzone, padding_size + KASAN_ALLOCA_REDZONE_SIZE,
+       kasan_poison(right_redzone, padding_size + KASAN_ALLOCA_REDZONE_SIZE,
                     KASAN_ALLOCA_RIGHT);
 }
 EXPORT_SYMBOL(__asan_alloca_poison);
@@ -305,7 +306,7 @@ void __asan_allocas_unpoison(const void *stack_top, const void *stack_bottom)
        if (unlikely(!stack_top || stack_top > stack_bottom))
                return;
 
-       unpoison_range(stack_top, stack_bottom - stack_top);
+       kasan_unpoison(stack_top, stack_bottom - stack_top);
 }
 EXPORT_SYMBOL(__asan_allocas_unpoison);
 
@@ -331,7 +332,7 @@ void kasan_record_aux_stack(void *addr)
        struct kasan_alloc_meta *alloc_meta;
        void *object;
 
-       if (!(page && PageSlab(page)))
+       if (is_kfence_address(addr) || !(page && PageSlab(page)))
                return;
 
        cache = page->slab_cache;
index d558799..2aad21f 100644 (file)
@@ -48,7 +48,7 @@ EXPORT_SYMBOL(kasan_flag_enabled);
 /* Whether to collect alloc/free stack traces. */
 DEFINE_STATIC_KEY_FALSE(kasan_flag_stacktrace);
 
-/* Whether panic or disable tag checking on fault. */
+/* Whether to panic or print a report and disable tag checking on fault. */
 bool kasan_flag_panic __ro_after_init;
 
 /* kasan=off/on */
@@ -185,3 +185,19 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 
        return &alloc_meta->free_track[0];
 }
+
+#if IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
+
+void kasan_set_tagging_report_once(bool state)
+{
+       hw_set_tagging_report_once(state);
+}
+EXPORT_SYMBOL_GPL(kasan_set_tagging_report_once);
+
+void kasan_enable_tagging(void)
+{
+       hw_enable_tagging();
+}
+EXPORT_SYMBOL_GPL(kasan_enable_tagging);
+
+#endif
index 8c706e7..8c55634 100644 (file)
@@ -3,6 +3,7 @@
 #define __MM_KASAN_KASAN_H
 
 #include <linux/kasan.h>
+#include <linux/kfence.h>
 #include <linux/stackdepot.h>
 
 #ifdef CONFIG_KASAN_HW_TAGS
@@ -36,6 +37,12 @@ extern bool kasan_flag_panic __ro_after_init;
 #define KASAN_TAG_INVALID      0xFE /* inaccessible memory tag */
 #define KASAN_TAG_MAX          0xFD /* maximum value for random tags */
 
+#ifdef CONFIG_KASAN_HW_TAGS
+#define KASAN_TAG_MIN          0xF0 /* mimimum value for random tags */
+#else
+#define KASAN_TAG_MIN          0x00 /* mimimum value for random tags */
+#endif
+
 #ifdef CONFIG_KASAN_GENERIC
 #define KASAN_FREE_PAGE         0xFF  /* page was freed */
 #define KASAN_PAGE_REDZONE      0xFE  /* redzone for kmalloc_large allocations */
@@ -195,14 +202,14 @@ static inline bool addr_has_metadata(const void *addr)
 }
 
 /**
- * check_memory_region - Check memory region, and report if invalid access.
+ * kasan_check_range - Check memory region, and report if invalid access.
  * @addr: the accessed address
  * @size: the accessed size
  * @write: true if access is a write access
  * @ret_ip: return address
  * @return: true if access was valid, false if invalid
  */
-bool check_memory_region(unsigned long addr, size_t size, bool write,
+bool kasan_check_range(unsigned long addr, size_t size, bool write,
                                unsigned long ret_ip);
 
 #else /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
@@ -215,19 +222,19 @@ static inline bool addr_has_metadata(const void *addr)
 #endif /* CONFIG_KASAN_GENERIC || CONFIG_KASAN_SW_TAGS */
 
 #if defined(CONFIG_KASAN_SW_TAGS) || defined(CONFIG_KASAN_HW_TAGS)
-void print_tags(u8 addr_tag, const void *addr);
+void kasan_print_tags(u8 addr_tag, const void *addr);
 #else
-static inline void print_tags(u8 addr_tag, const void *addr) { }
+static inline void kasan_print_tags(u8 addr_tag, const void *addr) { }
 #endif
 
-void *find_first_bad_addr(void *addr, size_t size);
-const char *get_bug_type(struct kasan_access_info *info);
-void metadata_fetch_row(char *buffer, void *row);
+void *kasan_find_first_bad_addr(void *addr, size_t size);
+const char *kasan_get_bug_type(struct kasan_access_info *info);
+void kasan_metadata_fetch_row(char *buffer, void *row);
 
 #if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK
-void print_address_stack_frame(const void *addr);
+void kasan_print_address_stack_frame(const void *addr);
 #else
-static inline void print_address_stack_frame(const void *addr) { }
+static inline void kasan_print_address_stack_frame(const void *addr) { }
 #endif
 
 bool kasan_report(unsigned long addr, size_t size,
@@ -244,13 +251,13 @@ struct kasan_track *kasan_get_free_track(struct kmem_cache *cache,
 
 #if defined(CONFIG_KASAN_GENERIC) && \
        (defined(CONFIG_SLAB) || defined(CONFIG_SLUB))
-bool quarantine_put(struct kmem_cache *cache, void *object);
-void quarantine_reduce(void);
-void quarantine_remove_cache(struct kmem_cache *cache);
+bool kasan_quarantine_put(struct kmem_cache *cache, void *object);
+void kasan_quarantine_reduce(void);
+void kasan_quarantine_remove_cache(struct kmem_cache *cache);
 #else
-static inline bool quarantine_put(struct kmem_cache *cache, void *object) { return false; }
-static inline void quarantine_reduce(void) { }
-static inline void quarantine_remove_cache(struct kmem_cache *cache) { }
+static inline bool kasan_quarantine_put(struct kmem_cache *cache, void *object) { return false; }
+static inline void kasan_quarantine_reduce(void) { }
+static inline void kasan_quarantine_remove_cache(struct kmem_cache *cache) { }
 #endif
 
 #ifndef arch_kasan_set_tag
@@ -274,6 +281,9 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 #ifndef arch_init_tags
 #define arch_init_tags(max_tag)
 #endif
+#ifndef arch_set_tagging_report_once
+#define arch_set_tagging_report_once(state)
+#endif
 #ifndef arch_get_random_tag
 #define arch_get_random_tag()  (0xFF)
 #endif
@@ -286,51 +296,129 @@ static inline const void *arch_kasan_set_tag(const void *addr, u8 tag)
 
 #define hw_enable_tagging()                    arch_enable_tagging()
 #define hw_init_tags(max_tag)                  arch_init_tags(max_tag)
+#define hw_set_tagging_report_once(state)      arch_set_tagging_report_once(state)
 #define hw_get_random_tag()                    arch_get_random_tag()
 #define hw_get_mem_tag(addr)                   arch_get_mem_tag(addr)
 #define hw_set_mem_tag_range(addr, size, tag)  arch_set_mem_tag_range((addr), (size), (tag))
 
+#else /* CONFIG_KASAN_HW_TAGS */
+
+#define hw_enable_tagging()
+#define hw_set_tagging_report_once(state)
+
 #endif /* CONFIG_KASAN_HW_TAGS */
 
+#if defined(CONFIG_KASAN_HW_TAGS) && IS_ENABLED(CONFIG_KASAN_KUNIT_TEST)
+
+void kasan_set_tagging_report_once(bool state);
+void kasan_enable_tagging(void);
+
+#else /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
+
+static inline void kasan_set_tagging_report_once(bool state) { }
+static inline void kasan_enable_tagging(void) { }
+
+#endif /* CONFIG_KASAN_HW_TAGS || CONFIG_KASAN_KUNIT_TEST */
+
 #ifdef CONFIG_KASAN_SW_TAGS
-u8 random_tag(void);
+u8 kasan_random_tag(void);
 #elif defined(CONFIG_KASAN_HW_TAGS)
-static inline u8 random_tag(void) { return hw_get_random_tag(); }
+static inline u8 kasan_random_tag(void) { return hw_get_random_tag(); }
 #else
-static inline u8 random_tag(void) { return 0; }
+static inline u8 kasan_random_tag(void) { return 0; }
 #endif
 
 #ifdef CONFIG_KASAN_HW_TAGS
 
-static inline void poison_range(const void *address, size_t size, u8 value)
+static inline void kasan_poison(const void *addr, size_t size, u8 value)
 {
-       hw_set_mem_tag_range(kasan_reset_tag(address),
-                       round_up(size, KASAN_GRANULE_SIZE), value);
+       addr = kasan_reset_tag(addr);
+
+       /* Skip KFENCE memory if called explicitly outside of sl*b. */
+       if (is_kfence_address(addr))
+               return;
+
+       if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+               return;
+       if (WARN_ON(size & KASAN_GRANULE_MASK))
+               return;
+
+       hw_set_mem_tag_range((void *)addr, size, value);
 }
 
-static inline void unpoison_range(const void *address, size_t size)
+static inline void kasan_unpoison(const void *addr, size_t size)
 {
-       hw_set_mem_tag_range(kasan_reset_tag(address),
-                       round_up(size, KASAN_GRANULE_SIZE), get_tag(address));
+       u8 tag = get_tag(addr);
+
+       addr = kasan_reset_tag(addr);
+
+       /* Skip KFENCE memory if called explicitly outside of sl*b. */
+       if (is_kfence_address(addr))
+               return;
+
+       if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+               return;
+       size = round_up(size, KASAN_GRANULE_SIZE);
+
+       hw_set_mem_tag_range((void *)addr, size, tag);
 }
 
-static inline bool check_invalid_free(void *addr)
+static inline bool kasan_byte_accessible(const void *addr)
 {
        u8 ptr_tag = get_tag(addr);
-       u8 mem_tag = hw_get_mem_tag(addr);
+       u8 mem_tag = hw_get_mem_tag((void *)addr);
 
-       return (mem_tag == KASAN_TAG_INVALID) ||
-               (ptr_tag != KASAN_TAG_KERNEL && ptr_tag != mem_tag);
+       return (mem_tag != KASAN_TAG_INVALID) &&
+               (ptr_tag == KASAN_TAG_KERNEL || ptr_tag == mem_tag);
 }
 
 #else /* CONFIG_KASAN_HW_TAGS */
 
-void poison_range(const void *address, size_t size, u8 value);
-void unpoison_range(const void *address, size_t size);
-bool check_invalid_free(void *addr);
+/**
+ * kasan_poison - mark the memory range as unaccessible
+ * @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
+ * @size - range size, must be aligned to KASAN_GRANULE_SIZE
+ * @value - value that's written to metadata for the range
+ *
+ * The size gets aligned to KASAN_GRANULE_SIZE before marking the range.
+ */
+void kasan_poison(const void *addr, size_t size, u8 value);
+
+/**
+ * kasan_unpoison - mark the memory range as accessible
+ * @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
+ * @size - range size, can be unaligned
+ *
+ * For the tag-based modes, the @size gets aligned to KASAN_GRANULE_SIZE before
+ * marking the range.
+ * For the generic mode, the last granule of the memory range gets partially
+ * unpoisoned based on the @size.
+ */
+void kasan_unpoison(const void *addr, size_t size);
+
+bool kasan_byte_accessible(const void *addr);
 
 #endif /* CONFIG_KASAN_HW_TAGS */
 
+#ifdef CONFIG_KASAN_GENERIC
+
+/**
+ * kasan_poison_last_granule - mark the last granule of the memory range as
+ * unaccessible
+ * @addr - range start address, must be aligned to KASAN_GRANULE_SIZE
+ * @size - range size
+ *
+ * This function is only available for the generic mode, as it's the only mode
+ * that has partially poisoned memory granules.
+ */
+void kasan_poison_last_granule(const void *address, size_t size);
+
+#else /* CONFIG_KASAN_GENERIC */
+
+static inline void kasan_poison_last_granule(const void *address, size_t size) { }
+
+#endif /* CONFIG_KASAN_GENERIC */
+
 /*
  * Exported functions for interfaces called from assembly or from generated
  * code. Declarations here to avoid warning about missing declarations.
index 5578312..728fb24 100644 (file)
@@ -168,7 +168,7 @@ static void qlist_free_all(struct qlist_head *q, struct kmem_cache *cache)
        qlist_init(q);
 }
 
-bool quarantine_put(struct kmem_cache *cache, void *object)
+bool kasan_quarantine_put(struct kmem_cache *cache, void *object)
 {
        unsigned long flags;
        struct qlist_head *q;
@@ -184,11 +184,11 @@ bool quarantine_put(struct kmem_cache *cache, void *object)
 
        /*
         * Note: irq must be disabled until after we move the batch to the
-        * global quarantine. Otherwise quarantine_remove_cache() can miss
-        * some objects belonging to the cache if they are in our local temp
-        * list. quarantine_remove_cache() executes on_each_cpu() at the
-        * beginning which ensures that it either sees the objects in per-cpu
-        * lists or in the global quarantine.
+        * global quarantine. Otherwise kasan_quarantine_remove_cache() can
+        * miss some objects belonging to the cache if they are in our local
+        * temp list. kasan_quarantine_remove_cache() executes on_each_cpu()
+        * at the beginning which ensures that it either sees the objects in
+        * per-cpu lists or in the global quarantine.
         */
        local_irq_save(flags);
 
@@ -222,7 +222,7 @@ bool quarantine_put(struct kmem_cache *cache, void *object)
        return true;
 }
 
-void quarantine_reduce(void)
+void kasan_quarantine_reduce(void)
 {
        size_t total_size, new_quarantine_size, percpu_quarantines;
        unsigned long flags;
@@ -234,7 +234,7 @@ void quarantine_reduce(void)
                return;
 
        /*
-        * srcu critical section ensures that quarantine_remove_cache()
+        * srcu critical section ensures that kasan_quarantine_remove_cache()
         * will not miss objects belonging to the cache while they are in our
         * local to_free list. srcu is chosen because (1) it gives us private
         * grace period domain that does not interfere with anything else,
@@ -309,15 +309,15 @@ static void per_cpu_remove_cache(void *arg)
 }
 
 /* Free all quarantined objects belonging to cache. */
-void quarantine_remove_cache(struct kmem_cache *cache)
+void kasan_quarantine_remove_cache(struct kmem_cache *cache)
 {
        unsigned long flags, i;
        struct qlist_head to_free = QLIST_INIT;
 
        /*
         * Must be careful to not miss any objects that are being moved from
-        * per-cpu list to the global quarantine in quarantine_put(),
-        * nor objects being freed in quarantine_reduce(). on_each_cpu()
+        * per-cpu list to the global quarantine in kasan_quarantine_put(),
+        * nor objects being freed in kasan_quarantine_reduce(). on_each_cpu()
         * achieves the first goal, while synchronize_srcu() achieves the
         * second.
         */
index c0fb217..87b2712 100644 (file)
@@ -25,6 +25,7 @@
 #include <linux/module.h>
 #include <linux/sched/task_stack.h>
 #include <linux/uaccess.h>
+#include <trace/events/error_report.h>
 
 #include <asm/sections.h>
 
@@ -61,7 +62,7 @@ __setup("kasan_multi_shot", kasan_set_multi_shot);
 static void print_error_description(struct kasan_access_info *info)
 {
        pr_err("BUG: KASAN: %s in %pS\n",
-               get_bug_type(info), (void *)info->ip);
+               kasan_get_bug_type(info), (void *)info->ip);
        if (info->access_size)
                pr_err("%s of size %zu at addr %px by task %s/%d\n",
                        info->is_write ? "Write" : "Read", info->access_size,
@@ -84,8 +85,9 @@ static void start_report(unsigned long *flags)
        pr_err("==================================================================\n");
 }
 
-static void end_report(unsigned long *flags)
+static void end_report(unsigned long *flags, unsigned long addr)
 {
+       trace_error_report_end(ERROR_DETECTOR_KASAN, addr);
        pr_err("==================================================================\n");
        add_taint(TAINT_BAD_PAGE, LOCKDEP_NOW_UNRELIABLE);
        spin_unlock_irqrestore(&report_lock, *flags);
@@ -247,7 +249,7 @@ static void print_address_description(void *addr, u8 tag)
                dump_page(page, "kasan: bad access detected");
        }
 
-       print_address_stack_frame(addr);
+       kasan_print_address_stack_frame(addr);
 }
 
 static bool meta_row_is_guilty(const void *row, const void *addr)
@@ -293,7 +295,7 @@ static void print_memory_metadata(const void *addr)
                 * function, because generic functions may try to
                 * access kasan mapping for the passed address.
                 */
-               metadata_fetch_row(&metadata[0], row);
+               kasan_metadata_fetch_row(&metadata[0], row);
 
                print_hex_dump(KERN_ERR, buffer,
                        DUMP_PREFIX_NONE, META_BYTES_PER_ROW, 1,
@@ -331,7 +333,7 @@ static void kasan_update_kunit_status(struct kunit *cur_test)
        }
 
        kasan_data = (struct kunit_kasan_expectation *)resource->data;
-       kasan_data->report_found = true;
+       WRITE_ONCE(kasan_data->report_found, true);
        kunit_put_resource(resource);
 }
 #endif /* IS_ENABLED(CONFIG_KUNIT) */
@@ -350,12 +352,12 @@ void kasan_report_invalid_free(void *object, unsigned long ip)
 
        start_report(&flags);
        pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip);
-       print_tags(tag, object);
+       kasan_print_tags(tag, object);
        pr_err("\n");
        print_address_description(object, tag);
        pr_err("\n");
        print_memory_metadata(object);
-       end_report(&flags);
+       end_report(&flags, (unsigned long)object);
 }
 
 static void __kasan_report(unsigned long addr, size_t size, bool is_write,
@@ -378,7 +380,8 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 
        info.access_addr = tagged_addr;
        if (addr_has_metadata(untagged_addr))
-               info.first_bad_addr = find_first_bad_addr(tagged_addr, size);
+               info.first_bad_addr =
+                       kasan_find_first_bad_addr(tagged_addr, size);
        else
                info.first_bad_addr = untagged_addr;
        info.access_size = size;
@@ -389,7 +392,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 
        print_error_description(&info);
        if (addr_has_metadata(untagged_addr))
-               print_tags(get_tag(tagged_addr), info.first_bad_addr);
+               kasan_print_tags(get_tag(tagged_addr), info.first_bad_addr);
        pr_err("\n");
 
        if (addr_has_metadata(untagged_addr)) {
@@ -400,7 +403,7 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
                dump_stack();
        }
 
-       end_report(&flags);
+       end_report(&flags, addr);
 }
 
 bool kasan_report(unsigned long addr, size_t size, bool is_write,
index 8a9c889..41f3745 100644 (file)
@@ -30,7 +30,7 @@
 #include "kasan.h"
 #include "../slab.h"
 
-void *find_first_bad_addr(void *addr, size_t size)
+void *kasan_find_first_bad_addr(void *addr, size_t size)
 {
        void *p = addr;
 
@@ -105,7 +105,7 @@ static const char *get_wild_bug_type(struct kasan_access_info *info)
        return bug_type;
 }
 
-const char *get_bug_type(struct kasan_access_info *info)
+const char *kasan_get_bug_type(struct kasan_access_info *info)
 {
        /*
         * If access_size is a negative number, then it has reason to be
@@ -123,7 +123,7 @@ const char *get_bug_type(struct kasan_access_info *info)
        return get_wild_bug_type(info);
 }
 
-void metadata_fetch_row(char *buffer, void *row)
+void kasan_metadata_fetch_row(char *buffer, void *row)
 {
        memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW);
 }
@@ -263,7 +263,7 @@ static bool __must_check get_address_stack_frame_info(const void *addr,
        return true;
 }
 
-void print_address_stack_frame(const void *addr)
+void kasan_print_address_stack_frame(const void *addr)
 {
        unsigned long offset;
        const char *frame_descr;
index 57114f0..42b2168 100644 (file)
 
 #include "kasan.h"
 
-const char *get_bug_type(struct kasan_access_info *info)
+const char *kasan_get_bug_type(struct kasan_access_info *info)
 {
        return "invalid-access";
 }
 
-void *find_first_bad_addr(void *addr, size_t size)
+void *kasan_find_first_bad_addr(void *addr, size_t size)
 {
        return kasan_reset_tag(addr);
 }
 
-void metadata_fetch_row(char *buffer, void *row)
+void kasan_metadata_fetch_row(char *buffer, void *row)
 {
        int i;
 
@@ -33,7 +33,7 @@ void metadata_fetch_row(char *buffer, void *row)
                buffer[i] = hw_get_mem_tag(row + i * KASAN_GRANULE_SIZE);
 }
 
-void print_tags(u8 addr_tag, const void *addr)
+void kasan_print_tags(u8 addr_tag, const void *addr)
 {
        u8 memory_tag = hw_get_mem_tag((void *)addr);
 
index 1b02679..3d20d34 100644 (file)
@@ -29,7 +29,7 @@
 #include "kasan.h"
 #include "../slab.h"
 
-const char *get_bug_type(struct kasan_access_info *info)
+const char *kasan_get_bug_type(struct kasan_access_info *info)
 {
 #ifdef CONFIG_KASAN_SW_TAGS_IDENTIFY
        struct kasan_alloc_meta *alloc_meta;
@@ -72,7 +72,7 @@ const char *get_bug_type(struct kasan_access_info *info)
        return "invalid-access";
 }
 
-void *find_first_bad_addr(void *addr, size_t size)
+void *kasan_find_first_bad_addr(void *addr, size_t size)
 {
        u8 tag = get_tag(addr);
        void *p = kasan_reset_tag(addr);
@@ -83,12 +83,12 @@ void *find_first_bad_addr(void *addr, size_t size)
        return p;
 }
 
-void metadata_fetch_row(char *buffer, void *row)
+void kasan_metadata_fetch_row(char *buffer, void *row)
 {
        memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW);
 }
 
-void print_tags(u8 addr_tag, const void *addr)
+void kasan_print_tags(u8 addr_tag, const void *addr)
 {
        u8 *shadow = (u8 *)kasan_mem_to_shadow(addr);
 
index 7c2c08c..63f4344 100644 (file)
@@ -13,6 +13,7 @@
 #include <linux/init.h>
 #include <linux/kasan.h>
 #include <linux/kernel.h>
+#include <linux/kfence.h>
 #include <linux/kmemleak.h>
 #include <linux/memory.h>
 #include <linux/mm.h>
 
 bool __kasan_check_read(const volatile void *p, unsigned int size)
 {
-       return check_memory_region((unsigned long)p, size, false, _RET_IP_);
+       return kasan_check_range((unsigned long)p, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__kasan_check_read);
 
 bool __kasan_check_write(const volatile void *p, unsigned int size)
 {
-       return check_memory_region((unsigned long)p, size, true, _RET_IP_);
+       return kasan_check_range((unsigned long)p, size, true, _RET_IP_);
 }
 EXPORT_SYMBOL(__kasan_check_write);
 
 #undef memset
 void *memset(void *addr, int c, size_t len)
 {
-       if (!check_memory_region((unsigned long)addr, len, true, _RET_IP_))
+       if (!kasan_check_range((unsigned long)addr, len, true, _RET_IP_))
                return NULL;
 
        return __memset(addr, c, len);
@@ -50,8 +51,8 @@ void *memset(void *addr, int c, size_t len)
 #undef memmove
 void *memmove(void *dest, const void *src, size_t len)
 {
-       if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
-           !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
+       if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
+           !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
                return NULL;
 
        return __memmove(dest, src, len);
@@ -61,18 +62,14 @@ void *memmove(void *dest, const void *src, size_t len)
 #undef memcpy
 void *memcpy(void *dest, const void *src, size_t len)
 {
-       if (!check_memory_region((unsigned long)src, len, false, _RET_IP_) ||
-           !check_memory_region((unsigned long)dest, len, true, _RET_IP_))
+       if (!kasan_check_range((unsigned long)src, len, false, _RET_IP_) ||
+           !kasan_check_range((unsigned long)dest, len, true, _RET_IP_))
                return NULL;
 
        return __memcpy(dest, src, len);
 }
 
-/*
- * Poisons the shadow memory for 'size' bytes starting from 'addr'.
- * Memory addresses should be aligned to KASAN_GRANULE_SIZE.
- */
-void poison_range(const void *address, size_t size, u8 value)
+void kasan_poison(const void *addr, size_t size, u8 value)
 {
        void *shadow_start, *shadow_end;
 
@@ -81,36 +78,62 @@ void poison_range(const void *address, size_t size, u8 value)
         * some of the callers (e.g. kasan_poison_object_data) pass tagged
         * addresses to this function.
         */
-       address = kasan_reset_tag(address);
-       size = round_up(size, KASAN_GRANULE_SIZE);
+       addr = kasan_reset_tag(addr);
 
-       shadow_start = kasan_mem_to_shadow(address);
-       shadow_end = kasan_mem_to_shadow(address + size);
+       /* Skip KFENCE memory if called explicitly outside of sl*b. */
+       if (is_kfence_address(addr))
+               return;
+
+       if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+               return;
+       if (WARN_ON(size & KASAN_GRANULE_MASK))
+               return;
+
+       shadow_start = kasan_mem_to_shadow(addr);
+       shadow_end = kasan_mem_to_shadow(addr + size);
 
        __memset(shadow_start, value, shadow_end - shadow_start);
 }
+EXPORT_SYMBOL(kasan_poison);
 
-void unpoison_range(const void *address, size_t size)
+#ifdef CONFIG_KASAN_GENERIC
+void kasan_poison_last_granule(const void *addr, size_t size)
 {
-       u8 tag = get_tag(address);
+       if (size & KASAN_GRANULE_MASK) {
+               u8 *shadow = (u8 *)kasan_mem_to_shadow(addr + size);
+               *shadow = size & KASAN_GRANULE_MASK;
+       }
+}
+#endif
+
+void kasan_unpoison(const void *addr, size_t size)
+{
+       u8 tag = get_tag(addr);
 
        /*
         * Perform shadow offset calculation based on untagged address, as
         * some of the callers (e.g. kasan_unpoison_object_data) pass tagged
         * addresses to this function.
         */
-       address = kasan_reset_tag(address);
+       addr = kasan_reset_tag(addr);
+
+       /*
+        * Skip KFENCE memory if called explicitly outside of sl*b. Also note
+        * that calls to ksize(), where size is not a multiple of machine-word
+        * size, would otherwise poison the invalid portion of the word.
+        */
+       if (is_kfence_address(addr))
+               return;
 
-       poison_range(address, size, tag);
+       if (WARN_ON((unsigned long)addr & KASAN_GRANULE_MASK))
+               return;
 
-       if (size & KASAN_GRANULE_MASK) {
-               u8 *shadow = (u8 *)kasan_mem_to_shadow(address + size);
+       /* Unpoison all granules that cover the object. */
+       kasan_poison(addr, round_up(size, KASAN_GRANULE_SIZE), tag);
 
-               if (IS_ENABLED(CONFIG_KASAN_SW_TAGS))
-                       *shadow = tag;
-               else /* CONFIG_KASAN_GENERIC */
-                       *shadow = size & KASAN_GRANULE_MASK;
-       }
+       /* Partially poison the last granule for the generic mode. */
+       if (IS_ENABLED(CONFIG_KASAN_GENERIC))
+               kasan_poison_last_granule(addr, size);
 }
 
 #ifdef CONFIG_MEMORY_HOTPLUG
@@ -286,7 +309,7 @@ int kasan_populate_vmalloc(unsigned long addr, unsigned long size)
         * // vmalloc() allocates memory
         * // let a = area->addr
         * // we reach kasan_populate_vmalloc
-        * // and call unpoison_range:
+        * // and call kasan_unpoison:
         * STORE shadow(a), unpoison_val
         * ...
         * STORE shadow(a+99), unpoison_val     x = LOAD p
@@ -321,7 +344,7 @@ void kasan_poison_vmalloc(const void *start, unsigned long size)
                return;
 
        size = round_up(size, KASAN_GRANULE_SIZE);
-       poison_range(start, size, KASAN_VMALLOC_INVALID);
+       kasan_poison(start, size, KASAN_VMALLOC_INVALID);
 }
 
 void kasan_unpoison_vmalloc(const void *start, unsigned long size)
@@ -329,7 +352,7 @@ void kasan_unpoison_vmalloc(const void *start, unsigned long size)
        if (!is_vmalloc_or_module_addr(start))
                return;
 
-       unpoison_range(start, size);
+       kasan_unpoison(start, size);
 }
 
 static int kasan_depopulate_vmalloc_pte(pte_t *ptep, unsigned long addr,
index 5dcd830..94c2d33 100644 (file)
@@ -57,7 +57,7 @@ void __init kasan_init_sw_tags(void)
  * sequence has in fact positive effect, since interrupts that randomly skew
  * PRNG at unpredictable points do only good.
  */
-u8 random_tag(void)
+u8 kasan_random_tag(void)
 {
        u32 state = this_cpu_read(prng_state);
 
@@ -67,7 +67,7 @@ u8 random_tag(void)
        return (u8)(state % (KASAN_TAG_MAX + 1));
 }
 
-bool check_memory_region(unsigned long addr, size_t size, bool write,
+bool kasan_check_range(unsigned long addr, size_t size, bool write,
                                unsigned long ret_ip)
 {
        u8 tag;
@@ -118,24 +118,24 @@ bool check_memory_region(unsigned long addr, size_t size, bool write,
        return true;
 }
 
-bool check_invalid_free(void *addr)
+bool kasan_byte_accessible(const void *addr)
 {
        u8 tag = get_tag(addr);
        u8 shadow_byte = READ_ONCE(*(u8 *)kasan_mem_to_shadow(kasan_reset_tag(addr)));
 
-       return (shadow_byte == KASAN_TAG_INVALID) ||
-               (tag != KASAN_TAG_KERNEL && tag != shadow_byte);
+       return (shadow_byte != KASAN_TAG_INVALID) &&
+               (tag == KASAN_TAG_KERNEL || tag == shadow_byte);
 }
 
 #define DEFINE_HWASAN_LOAD_STORE(size)                                 \
        void __hwasan_load##size##_noabort(unsigned long addr)          \
        {                                                               \
-               check_memory_region(addr, size, false, _RET_IP_);       \
+               kasan_check_range(addr, size, false, _RET_IP_); \
        }                                                               \
        EXPORT_SYMBOL(__hwasan_load##size##_noabort);                   \
        void __hwasan_store##size##_noabort(unsigned long addr)         \
        {                                                               \
-               check_memory_region(addr, size, true, _RET_IP_);        \
+               kasan_check_range(addr, size, true, _RET_IP_);          \
        }                                                               \
        EXPORT_SYMBOL(__hwasan_store##size##_noabort)
 
@@ -147,19 +147,19 @@ DEFINE_HWASAN_LOAD_STORE(16);
 
 void __hwasan_loadN_noabort(unsigned long addr, unsigned long size)
 {
-       check_memory_region(addr, size, false, _RET_IP_);
+       kasan_check_range(addr, size, false, _RET_IP_);
 }
 EXPORT_SYMBOL(__hwasan_loadN_noabort);
 
 void __hwasan_storeN_noabort(unsigned long addr, unsigned long size)
 {
-       check_memory_region(addr, size, true, _RET_IP_);
+       kasan_check_range(addr, size, true, _RET_IP_);
 }
 EXPORT_SYMBOL(__hwasan_storeN_noabort);
 
 void __hwasan_tag_memory(unsigned long addr, u8 tag, unsigned long size)
 {
-       poison_range((void *)addr, size, tag);
+       kasan_poison((void *)addr, size, tag);
 }
 EXPORT_SYMBOL(__hwasan_tag_memory);
 
diff --git a/mm/kfence/Makefile b/mm/kfence/Makefile
new file mode 100644 (file)
index 0000000..6872cd5
--- /dev/null
@@ -0,0 +1,6 @@
+# SPDX-License-Identifier: GPL-2.0
+
+obj-$(CONFIG_KFENCE) := core.o report.o
+
+CFLAGS_kfence_test.o := -g -fno-omit-frame-pointer -fno-optimize-sibling-calls
+obj-$(CONFIG_KFENCE_KUNIT_TEST) += kfence_test.o
diff --git a/mm/kfence/core.c b/mm/kfence/core.c
new file mode 100644 (file)
index 0000000..d53c91f
--- /dev/null
@@ -0,0 +1,850 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KFENCE guarded object allocator and fault handling.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#define pr_fmt(fmt) "kfence: " fmt
+
+#include <linux/atomic.h>
+#include <linux/bug.h>
+#include <linux/debugfs.h>
+#include <linux/kcsan-checks.h>
+#include <linux/kfence.h>
+#include <linux/kmemleak.h>
+#include <linux/list.h>
+#include <linux/lockdep.h>
+#include <linux/memblock.h>
+#include <linux/moduleparam.h>
+#include <linux/random.h>
+#include <linux/rcupdate.h>
+#include <linux/seq_file.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+/* Disables KFENCE on the first warning assuming an irrecoverable error. */
+#define KFENCE_WARN_ON(cond)                                                   \
+       ({                                                                     \
+               const bool __cond = WARN_ON(cond);                             \
+               if (unlikely(__cond))                                          \
+                       WRITE_ONCE(kfence_enabled, false);                     \
+               __cond;                                                        \
+       })
+
+/* === Data ================================================================= */
+
+static bool kfence_enabled __read_mostly;
+
+static unsigned long kfence_sample_interval __read_mostly = CONFIG_KFENCE_SAMPLE_INTERVAL;
+
+#ifdef MODULE_PARAM_PREFIX
+#undef MODULE_PARAM_PREFIX
+#endif
+#define MODULE_PARAM_PREFIX "kfence."
+
+static int param_set_sample_interval(const char *val, const struct kernel_param *kp)
+{
+       unsigned long num;
+       int ret = kstrtoul(val, 0, &num);
+
+       if (ret < 0)
+               return ret;
+
+       if (!num) /* Using 0 to indicate KFENCE is disabled. */
+               WRITE_ONCE(kfence_enabled, false);
+       else if (!READ_ONCE(kfence_enabled) && system_state != SYSTEM_BOOTING)
+               return -EINVAL; /* Cannot (re-)enable KFENCE on-the-fly. */
+
+       *((unsigned long *)kp->arg) = num;
+       return 0;
+}
+
+static int param_get_sample_interval(char *buffer, const struct kernel_param *kp)
+{
+       if (!READ_ONCE(kfence_enabled))
+               return sprintf(buffer, "0\n");
+
+       return param_get_ulong(buffer, kp);
+}
+
+static const struct kernel_param_ops sample_interval_param_ops = {
+       .set = param_set_sample_interval,
+       .get = param_get_sample_interval,
+};
+module_param_cb(sample_interval, &sample_interval_param_ops, &kfence_sample_interval, 0600);
+
+/* The pool of pages used for guard pages and objects. */
+char *__kfence_pool __ro_after_init;
+EXPORT_SYMBOL(__kfence_pool); /* Export for test modules. */
+
+/*
+ * Per-object metadata, with one-to-one mapping of object metadata to
+ * backing pages (in __kfence_pool).
+ */
+static_assert(CONFIG_KFENCE_NUM_OBJECTS > 0);
+struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
+/* Freelist with available objects. */
+static struct list_head kfence_freelist = LIST_HEAD_INIT(kfence_freelist);
+static DEFINE_RAW_SPINLOCK(kfence_freelist_lock); /* Lock protecting freelist. */
+
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+/* The static key to set up a KFENCE allocation. */
+DEFINE_STATIC_KEY_FALSE(kfence_allocation_key);
+#endif
+
+/* Gates the allocation, ensuring only one succeeds in a given period. */
+atomic_t kfence_allocation_gate = ATOMIC_INIT(1);
+
+/* Statistics counters for debugfs. */
+enum kfence_counter_id {
+       KFENCE_COUNTER_ALLOCATED,
+       KFENCE_COUNTER_ALLOCS,
+       KFENCE_COUNTER_FREES,
+       KFENCE_COUNTER_ZOMBIES,
+       KFENCE_COUNTER_BUGS,
+       KFENCE_COUNTER_COUNT,
+};
+static atomic_long_t counters[KFENCE_COUNTER_COUNT];
+static const char *const counter_names[] = {
+       [KFENCE_COUNTER_ALLOCATED]      = "currently allocated",
+       [KFENCE_COUNTER_ALLOCS]         = "total allocations",
+       [KFENCE_COUNTER_FREES]          = "total frees",
+       [KFENCE_COUNTER_ZOMBIES]        = "zombie allocations",
+       [KFENCE_COUNTER_BUGS]           = "total bugs",
+};
+static_assert(ARRAY_SIZE(counter_names) == KFENCE_COUNTER_COUNT);
+
+/* === Internals ============================================================ */
+
+static bool kfence_protect(unsigned long addr)
+{
+       return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), true));
+}
+
+static bool kfence_unprotect(unsigned long addr)
+{
+       return !KFENCE_WARN_ON(!kfence_protect_page(ALIGN_DOWN(addr, PAGE_SIZE), false));
+}
+
+static inline struct kfence_metadata *addr_to_metadata(unsigned long addr)
+{
+       long index;
+
+       /* The checks do not affect performance; only called from slow-paths. */
+
+       if (!is_kfence_address((void *)addr))
+               return NULL;
+
+       /*
+        * May be an invalid index if called with an address at the edge of
+        * __kfence_pool, in which case we would report an "invalid access"
+        * error.
+        */
+       index = (addr - (unsigned long)__kfence_pool) / (PAGE_SIZE * 2) - 1;
+       if (index < 0 || index >= CONFIG_KFENCE_NUM_OBJECTS)
+               return NULL;
+
+       return &kfence_metadata[index];
+}
+
+static inline unsigned long metadata_to_pageaddr(const struct kfence_metadata *meta)
+{
+       unsigned long offset = (meta - kfence_metadata + 1) * PAGE_SIZE * 2;
+       unsigned long pageaddr = (unsigned long)&__kfence_pool[offset];
+
+       /* The checks do not affect performance; only called from slow-paths. */
+
+       /* Only call with a pointer into kfence_metadata. */
+       if (KFENCE_WARN_ON(meta < kfence_metadata ||
+                          meta >= kfence_metadata + CONFIG_KFENCE_NUM_OBJECTS))
+               return 0;
+
+       /*
+        * This metadata object only ever maps to 1 page; verify that the stored
+        * address is in the expected range.
+        */
+       if (KFENCE_WARN_ON(ALIGN_DOWN(meta->addr, PAGE_SIZE) != pageaddr))
+               return 0;
+
+       return pageaddr;
+}
+
+/*
+ * Update the object's metadata state, including updating the alloc/free stacks
+ * depending on the state transition.
+ */
+static noinline void metadata_update_state(struct kfence_metadata *meta,
+                                          enum kfence_object_state next)
+{
+       struct kfence_track *track =
+               next == KFENCE_OBJECT_FREED ? &meta->free_track : &meta->alloc_track;
+
+       lockdep_assert_held(&meta->lock);
+
+       /*
+        * Skip over 1 (this) functions; noinline ensures we do not accidentally
+        * skip over the caller by never inlining.
+        */
+       track->num_stack_entries = stack_trace_save(track->stack_entries, KFENCE_STACK_DEPTH, 1);
+       track->pid = task_pid_nr(current);
+
+       /*
+        * Pairs with READ_ONCE() in
+        *      kfence_shutdown_cache(),
+        *      kfence_handle_page_fault().
+        */
+       WRITE_ONCE(meta->state, next);
+}
+
+/* Write canary byte to @addr. */
+static inline bool set_canary_byte(u8 *addr)
+{
+       *addr = KFENCE_CANARY_PATTERN(addr);
+       return true;
+}
+
+/* Check canary byte at @addr. */
+static inline bool check_canary_byte(u8 *addr)
+{
+       if (likely(*addr == KFENCE_CANARY_PATTERN(addr)))
+               return true;
+
+       atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+       kfence_report_error((unsigned long)addr, false, NULL, addr_to_metadata((unsigned long)addr),
+                           KFENCE_ERROR_CORRUPTION);
+       return false;
+}
+
+/* __always_inline this to ensure we won't do an indirect call to fn. */
+static __always_inline void for_each_canary(const struct kfence_metadata *meta, bool (*fn)(u8 *))
+{
+       const unsigned long pageaddr = ALIGN_DOWN(meta->addr, PAGE_SIZE);
+       unsigned long addr;
+
+       lockdep_assert_held(&meta->lock);
+
+       /*
+        * We'll iterate over each canary byte per-side until fn() returns
+        * false. However, we'll still iterate over the canary bytes to the
+        * right of the object even if there was an error in the canary bytes to
+        * the left of the object. Specifically, if check_canary_byte()
+        * generates an error, showing both sides might give more clues as to
+        * what the error is about when displaying which bytes were corrupted.
+        */
+
+       /* Apply to left of object. */
+       for (addr = pageaddr; addr < meta->addr; addr++) {
+               if (!fn((u8 *)addr))
+                       break;
+       }
+
+       /* Apply to right of object. */
+       for (addr = meta->addr + meta->size; addr < pageaddr + PAGE_SIZE; addr++) {
+               if (!fn((u8 *)addr))
+                       break;
+       }
+}
+
+static void *kfence_guarded_alloc(struct kmem_cache *cache, size_t size, gfp_t gfp)
+{
+       struct kfence_metadata *meta = NULL;
+       unsigned long flags;
+       struct page *page;
+       void *addr;
+
+       /* Try to obtain a free object. */
+       raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+       if (!list_empty(&kfence_freelist)) {
+               meta = list_entry(kfence_freelist.next, struct kfence_metadata, list);
+               list_del_init(&meta->list);
+       }
+       raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+       if (!meta)
+               return NULL;
+
+       if (unlikely(!raw_spin_trylock_irqsave(&meta->lock, flags))) {
+               /*
+                * This is extremely unlikely -- we are reporting on a
+                * use-after-free, which locked meta->lock, and the reporting
+                * code via printk calls kmalloc() which ends up in
+                * kfence_alloc() and tries to grab the same object that we're
+                * reporting on. While it has never been observed, lockdep does
+                * report that there is a possibility of deadlock. Fix it by
+                * using trylock and bailing out gracefully.
+                */
+               raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+               /* Put the object back on the freelist. */
+               list_add_tail(&meta->list, &kfence_freelist);
+               raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+
+               return NULL;
+       }
+
+       meta->addr = metadata_to_pageaddr(meta);
+       /* Unprotect if we're reusing this page. */
+       if (meta->state == KFENCE_OBJECT_FREED)
+               kfence_unprotect(meta->addr);
+
+       /*
+        * Note: for allocations made before RNG initialization, will always
+        * return zero. We still benefit from enabling KFENCE as early as
+        * possible, even when the RNG is not yet available, as this will allow
+        * KFENCE to detect bugs due to earlier allocations. The only downside
+        * is that the out-of-bounds accesses detected are deterministic for
+        * such allocations.
+        */
+       if (prandom_u32_max(2)) {
+               /* Allocate on the "right" side, re-calculate address. */
+               meta->addr += PAGE_SIZE - size;
+               meta->addr = ALIGN_DOWN(meta->addr, cache->align);
+       }
+
+       addr = (void *)meta->addr;
+
+       /* Update remaining metadata. */
+       metadata_update_state(meta, KFENCE_OBJECT_ALLOCATED);
+       /* Pairs with READ_ONCE() in kfence_shutdown_cache(). */
+       WRITE_ONCE(meta->cache, cache);
+       meta->size = size;
+       for_each_canary(meta, set_canary_byte);
+
+       /* Set required struct page fields. */
+       page = virt_to_page(meta->addr);
+       page->slab_cache = cache;
+       if (IS_ENABLED(CONFIG_SLUB))
+               page->objects = 1;
+       if (IS_ENABLED(CONFIG_SLAB))
+               page->s_mem = addr;
+
+       raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+       /* Memory initialization. */
+
+       /*
+        * We check slab_want_init_on_alloc() ourselves, rather than letting
+        * SL*B do the initialization, as otherwise we might overwrite KFENCE's
+        * redzone.
+        */
+       if (unlikely(slab_want_init_on_alloc(gfp, cache)))
+               memzero_explicit(addr, size);
+       if (cache->ctor)
+               cache->ctor(addr);
+
+       if (CONFIG_KFENCE_STRESS_TEST_FAULTS && !prandom_u32_max(CONFIG_KFENCE_STRESS_TEST_FAULTS))
+               kfence_protect(meta->addr); /* Random "faults" by protecting the object. */
+
+       atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCATED]);
+       atomic_long_inc(&counters[KFENCE_COUNTER_ALLOCS]);
+
+       return addr;
+}
+
+static void kfence_guarded_free(void *addr, struct kfence_metadata *meta, bool zombie)
+{
+       struct kcsan_scoped_access assert_page_exclusive;
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&meta->lock, flags);
+
+       if (meta->state != KFENCE_OBJECT_ALLOCATED || meta->addr != (unsigned long)addr) {
+               /* Invalid or double-free, bail out. */
+               atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+               kfence_report_error((unsigned long)addr, false, NULL, meta,
+                                   KFENCE_ERROR_INVALID_FREE);
+               raw_spin_unlock_irqrestore(&meta->lock, flags);
+               return;
+       }
+
+       /* Detect racy use-after-free, or incorrect reallocation of this page by KFENCE. */
+       kcsan_begin_scoped_access((void *)ALIGN_DOWN((unsigned long)addr, PAGE_SIZE), PAGE_SIZE,
+                                 KCSAN_ACCESS_SCOPED | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT,
+                                 &assert_page_exclusive);
+
+       if (CONFIG_KFENCE_STRESS_TEST_FAULTS)
+               kfence_unprotect((unsigned long)addr); /* To check canary bytes. */
+
+       /* Restore page protection if there was an OOB access. */
+       if (meta->unprotected_page) {
+               kfence_protect(meta->unprotected_page);
+               meta->unprotected_page = 0;
+       }
+
+       /* Check canary bytes for memory corruption. */
+       for_each_canary(meta, check_canary_byte);
+
+       /*
+        * Clear memory if init-on-free is set. While we protect the page, the
+        * data is still there, and after a use-after-free is detected, we
+        * unprotect the page, so the data is still accessible.
+        */
+       if (!zombie && unlikely(slab_want_init_on_free(meta->cache)))
+               memzero_explicit(addr, meta->size);
+
+       /* Mark the object as freed. */
+       metadata_update_state(meta, KFENCE_OBJECT_FREED);
+
+       raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+       /* Protect to detect use-after-frees. */
+       kfence_protect((unsigned long)addr);
+
+       kcsan_end_scoped_access(&assert_page_exclusive);
+       if (!zombie) {
+               /* Add it to the tail of the freelist for reuse. */
+               raw_spin_lock_irqsave(&kfence_freelist_lock, flags);
+               KFENCE_WARN_ON(!list_empty(&meta->list));
+               list_add_tail(&meta->list, &kfence_freelist);
+               raw_spin_unlock_irqrestore(&kfence_freelist_lock, flags);
+
+               atomic_long_dec(&counters[KFENCE_COUNTER_ALLOCATED]);
+               atomic_long_inc(&counters[KFENCE_COUNTER_FREES]);
+       } else {
+               /* See kfence_shutdown_cache(). */
+               atomic_long_inc(&counters[KFENCE_COUNTER_ZOMBIES]);
+       }
+}
+
+static void rcu_guarded_free(struct rcu_head *h)
+{
+       struct kfence_metadata *meta = container_of(h, struct kfence_metadata, rcu_head);
+
+       kfence_guarded_free((void *)meta->addr, meta, false);
+}
+
+static bool __init kfence_init_pool(void)
+{
+       unsigned long addr = (unsigned long)__kfence_pool;
+       struct page *pages;
+       int i;
+
+       if (!__kfence_pool)
+               return false;
+
+       if (!arch_kfence_init_pool())
+               goto err;
+
+       pages = virt_to_page(addr);
+
+       /*
+        * Set up object pages: they must have PG_slab set, to avoid freeing
+        * these as real pages.
+        *
+        * We also want to avoid inserting kfence_free() in the kfree()
+        * fast-path in SLUB, and therefore need to ensure kfree() correctly
+        * enters __slab_free() slow-path.
+        */
+       for (i = 0; i < KFENCE_POOL_SIZE / PAGE_SIZE; i++) {
+               if (!i || (i % 2))
+                       continue;
+
+               /* Verify we do not have a compound head page. */
+               if (WARN_ON(compound_head(&pages[i]) != &pages[i]))
+                       goto err;
+
+               __SetPageSlab(&pages[i]);
+       }
+
+       /*
+        * Protect the first 2 pages. The first page is mostly unnecessary, and
+        * merely serves as an extended guard page. However, adding one
+        * additional page in the beginning gives us an even number of pages,
+        * which simplifies the mapping of address to metadata index.
+        */
+       for (i = 0; i < 2; i++) {
+               if (unlikely(!kfence_protect(addr)))
+                       goto err;
+
+               addr += PAGE_SIZE;
+       }
+
+       for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+               struct kfence_metadata *meta = &kfence_metadata[i];
+
+               /* Initialize metadata. */
+               INIT_LIST_HEAD(&meta->list);
+               raw_spin_lock_init(&meta->lock);
+               meta->state = KFENCE_OBJECT_UNUSED;
+               meta->addr = addr; /* Initialize for validation in metadata_to_pageaddr(). */
+               list_add_tail(&meta->list, &kfence_freelist);
+
+               /* Protect the right redzone. */
+               if (unlikely(!kfence_protect(addr + PAGE_SIZE)))
+                       goto err;
+
+               addr += 2 * PAGE_SIZE;
+       }
+
+       /*
+        * The pool is live and will never be deallocated from this point on.
+        * Remove the pool object from the kmemleak object tree, as it would
+        * otherwise overlap with allocations returned by kfence_alloc(), which
+        * are registered with kmemleak through the slab post-alloc hook.
+        */
+       kmemleak_free(__kfence_pool);
+
+       return true;
+
+err:
+       /*
+        * Only release unprotected pages, and do not try to go back and change
+        * page attributes due to risk of failing to do so as well. If changing
+        * page attributes for some pages fails, it is very likely that it also
+        * fails for the first page, and therefore expect addr==__kfence_pool in
+        * most failure cases.
+        */
+       memblock_free_late(__pa(addr), KFENCE_POOL_SIZE - (addr - (unsigned long)__kfence_pool));
+       __kfence_pool = NULL;
+       return false;
+}
+
+/* === DebugFS Interface ==================================================== */
+
+static int stats_show(struct seq_file *seq, void *v)
+{
+       int i;
+
+       seq_printf(seq, "enabled: %i\n", READ_ONCE(kfence_enabled));
+       for (i = 0; i < KFENCE_COUNTER_COUNT; i++)
+               seq_printf(seq, "%s: %ld\n", counter_names[i], atomic_long_read(&counters[i]));
+
+       return 0;
+}
+DEFINE_SHOW_ATTRIBUTE(stats);
+
+/*
+ * debugfs seq_file operations for /sys/kernel/debug/kfence/objects.
+ * start_object() and next_object() return the object index + 1, because NULL is used
+ * to stop iteration.
+ */
+static void *start_object(struct seq_file *seq, loff_t *pos)
+{
+       if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
+               return (void *)((long)*pos + 1);
+       return NULL;
+}
+
+static void stop_object(struct seq_file *seq, void *v)
+{
+}
+
+static void *next_object(struct seq_file *seq, void *v, loff_t *pos)
+{
+       ++*pos;
+       if (*pos < CONFIG_KFENCE_NUM_OBJECTS)
+               return (void *)((long)*pos + 1);
+       return NULL;
+}
+
+static int show_object(struct seq_file *seq, void *v)
+{
+       struct kfence_metadata *meta = &kfence_metadata[(long)v - 1];
+       unsigned long flags;
+
+       raw_spin_lock_irqsave(&meta->lock, flags);
+       kfence_print_object(seq, meta);
+       raw_spin_unlock_irqrestore(&meta->lock, flags);
+       seq_puts(seq, "---------------------------------\n");
+
+       return 0;
+}
+
+static const struct seq_operations object_seqops = {
+       .start = start_object,
+       .next = next_object,
+       .stop = stop_object,
+       .show = show_object,
+};
+
+static int open_objects(struct inode *inode, struct file *file)
+{
+       return seq_open(file, &object_seqops);
+}
+
+static const struct file_operations objects_fops = {
+       .open = open_objects,
+       .read = seq_read,
+       .llseek = seq_lseek,
+};
+
+static int __init kfence_debugfs_init(void)
+{
+       struct dentry *kfence_dir = debugfs_create_dir("kfence", NULL);
+
+       debugfs_create_file("stats", 0444, kfence_dir, NULL, &stats_fops);
+       debugfs_create_file("objects", 0400, kfence_dir, NULL, &objects_fops);
+       return 0;
+}
+
+late_initcall(kfence_debugfs_init);
+
+/* === Allocation Gate Timer ================================================ */
+
+/*
+ * Set up delayed work, which will enable and disable the static key. We need to
+ * use a work queue (rather than a simple timer), since enabling and disabling a
+ * static key cannot be done from an interrupt.
+ *
+ * Note: Toggling a static branch currently causes IPIs, and here we'll end up
+ * with a total of 2 IPIs to all CPUs. If this ends up a problem in future (with
+ * more aggressive sampling intervals), we could get away with a variant that
+ * avoids IPIs, at the cost of not immediately capturing allocations if the
+ * instructions remain cached.
+ */
+static struct delayed_work kfence_timer;
+static void toggle_allocation_gate(struct work_struct *work)
+{
+       if (!READ_ONCE(kfence_enabled))
+               return;
+
+       /* Enable static key, and await allocation to happen. */
+       atomic_set(&kfence_allocation_gate, 0);
+#ifdef CONFIG_KFENCE_STATIC_KEYS
+       static_branch_enable(&kfence_allocation_key);
+       /*
+        * Await an allocation. Timeout after 1 second, in case the kernel stops
+        * doing allocations, to avoid stalling this worker task for too long.
+        */
+       {
+               unsigned long end_wait = jiffies + HZ;
+
+               do {
+                       set_current_state(TASK_UNINTERRUPTIBLE);
+                       if (atomic_read(&kfence_allocation_gate) != 0)
+                               break;
+                       schedule_timeout(1);
+               } while (time_before(jiffies, end_wait));
+               __set_current_state(TASK_RUNNING);
+       }
+       /* Disable static key and reset timer. */
+       static_branch_disable(&kfence_allocation_key);
+#endif
+       schedule_delayed_work(&kfence_timer, msecs_to_jiffies(kfence_sample_interval));
+}
+static DECLARE_DELAYED_WORK(kfence_timer, toggle_allocation_gate);
+
+/* === Public interface ===================================================== */
+
+void __init kfence_alloc_pool(void)
+{
+       if (!kfence_sample_interval)
+               return;
+
+       __kfence_pool = memblock_alloc(KFENCE_POOL_SIZE, PAGE_SIZE);
+
+       if (!__kfence_pool)
+               pr_err("failed to allocate pool\n");
+}
+
+void __init kfence_init(void)
+{
+       /* Setting kfence_sample_interval to 0 on boot disables KFENCE. */
+       if (!kfence_sample_interval)
+               return;
+
+       if (!kfence_init_pool()) {
+               pr_err("%s failed\n", __func__);
+               return;
+       }
+
+       WRITE_ONCE(kfence_enabled, true);
+       schedule_delayed_work(&kfence_timer, 0);
+       pr_info("initialized - using %lu bytes for %d objects at 0x%p-0x%p\n", KFENCE_POOL_SIZE,
+               CONFIG_KFENCE_NUM_OBJECTS, (void *)__kfence_pool,
+               (void *)(__kfence_pool + KFENCE_POOL_SIZE));
+}
+
+void kfence_shutdown_cache(struct kmem_cache *s)
+{
+       unsigned long flags;
+       struct kfence_metadata *meta;
+       int i;
+
+       for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+               bool in_use;
+
+               meta = &kfence_metadata[i];
+
+               /*
+                * If we observe some inconsistent cache and state pair where we
+                * should have returned false here, cache destruction is racing
+                * with either kmem_cache_alloc() or kmem_cache_free(). Taking
+                * the lock will not help, as different critical section
+                * serialization will have the same outcome.
+                */
+               if (READ_ONCE(meta->cache) != s ||
+                   READ_ONCE(meta->state) != KFENCE_OBJECT_ALLOCATED)
+                       continue;
+
+               raw_spin_lock_irqsave(&meta->lock, flags);
+               in_use = meta->cache == s && meta->state == KFENCE_OBJECT_ALLOCATED;
+               raw_spin_unlock_irqrestore(&meta->lock, flags);
+
+               if (in_use) {
+                       /*
+                        * This cache still has allocations, and we should not
+                        * release them back into the freelist so they can still
+                        * safely be used and retain the kernel's default
+                        * behaviour of keeping the allocations alive (leak the
+                        * cache); however, they effectively become "zombie
+                        * allocations" as the KFENCE objects are the only ones
+                        * still in use and the owning cache is being destroyed.
+                        *
+                        * We mark them freed, so that any subsequent use shows
+                        * more useful error messages that will include stack
+                        * traces of the user of the object, the original
+                        * allocation, and caller to shutdown_cache().
+                        */
+                       kfence_guarded_free((void *)meta->addr, meta, /*zombie=*/true);
+               }
+       }
+
+       for (i = 0; i < CONFIG_KFENCE_NUM_OBJECTS; i++) {
+               meta = &kfence_metadata[i];
+
+               /* See above. */
+               if (READ_ONCE(meta->cache) != s || READ_ONCE(meta->state) != KFENCE_OBJECT_FREED)
+                       continue;
+
+               raw_spin_lock_irqsave(&meta->lock, flags);
+               if (meta->cache == s && meta->state == KFENCE_OBJECT_FREED)
+                       meta->cache = NULL;
+               raw_spin_unlock_irqrestore(&meta->lock, flags);
+       }
+}
+
+void *__kfence_alloc(struct kmem_cache *s, size_t size, gfp_t flags)
+{
+       /*
+        * allocation_gate only needs to become non-zero, so it doesn't make
+        * sense to continue writing to it and pay the associated contention
+        * cost, in case we have a large number of concurrent allocations.
+        */
+       if (atomic_read(&kfence_allocation_gate) || atomic_inc_return(&kfence_allocation_gate) > 1)
+               return NULL;
+
+       if (!READ_ONCE(kfence_enabled))
+               return NULL;
+
+       if (size > PAGE_SIZE)
+               return NULL;
+
+       return kfence_guarded_alloc(s, size, flags);
+}
+
+size_t kfence_ksize(const void *addr)
+{
+       const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+       /*
+        * Read locklessly -- if there is a race with __kfence_alloc(), this is
+        * either a use-after-free or invalid access.
+        */
+       return meta ? meta->size : 0;
+}
+
+void *kfence_object_start(const void *addr)
+{
+       const struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+       /*
+        * Read locklessly -- if there is a race with __kfence_alloc(), this is
+        * either a use-after-free or invalid access.
+        */
+       return meta ? (void *)meta->addr : NULL;
+}
+
+void __kfence_free(void *addr)
+{
+       struct kfence_metadata *meta = addr_to_metadata((unsigned long)addr);
+
+       /*
+        * If the objects of the cache are SLAB_TYPESAFE_BY_RCU, defer freeing
+        * the object, as the object page may be recycled for other-typed
+        * objects once it has been freed. meta->cache may be NULL if the cache
+        * was destroyed.
+        */
+       if (unlikely(meta->cache && (meta->cache->flags & SLAB_TYPESAFE_BY_RCU)))
+               call_rcu(&meta->rcu_head, rcu_guarded_free);
+       else
+               kfence_guarded_free(addr, meta, false);
+}
+
+bool kfence_handle_page_fault(unsigned long addr, bool is_write, struct pt_regs *regs)
+{
+       const int page_index = (addr - (unsigned long)__kfence_pool) / PAGE_SIZE;
+       struct kfence_metadata *to_report = NULL;
+       enum kfence_error_type error_type;
+       unsigned long flags;
+
+       if (!is_kfence_address((void *)addr))
+               return false;
+
+       if (!READ_ONCE(kfence_enabled)) /* If disabled at runtime ... */
+               return kfence_unprotect(addr); /* ... unprotect and proceed. */
+
+       atomic_long_inc(&counters[KFENCE_COUNTER_BUGS]);
+
+       if (page_index % 2) {
+               /* This is a redzone, report a buffer overflow. */
+               struct kfence_metadata *meta;
+               int distance = 0;
+
+               meta = addr_to_metadata(addr - PAGE_SIZE);
+               if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
+                       to_report = meta;
+                       /* Data race ok; distance calculation approximate. */
+                       distance = addr - data_race(meta->addr + meta->size);
+               }
+
+               meta = addr_to_metadata(addr + PAGE_SIZE);
+               if (meta && READ_ONCE(meta->state) == KFENCE_OBJECT_ALLOCATED) {
+                       /* Data race ok; distance calculation approximate. */
+                       if (!to_report || distance > data_race(meta->addr) - addr)
+                               to_report = meta;
+               }
+
+               if (!to_report)
+                       goto out;
+
+               raw_spin_lock_irqsave(&to_report->lock, flags);
+               to_report->unprotected_page = addr;
+               error_type = KFENCE_ERROR_OOB;
+
+               /*
+                * If the object was freed before we took the look we can still
+                * report this as an OOB -- the report will simply show the
+                * stacktrace of the free as well.
+                */
+       } else {
+               to_report = addr_to_metadata(addr);
+               if (!to_report)
+                       goto out;
+
+               raw_spin_lock_irqsave(&to_report->lock, flags);
+               error_type = KFENCE_ERROR_UAF;
+               /*
+                * We may race with __kfence_alloc(), and it is possible that a
+                * freed object may be reallocated. We simply report this as a
+                * use-after-free, with the stack trace showing the place where
+                * the object was re-allocated.
+                */
+       }
+
+out:
+       if (to_report) {
+               kfence_report_error(addr, is_write, regs, to_report, error_type);
+               raw_spin_unlock_irqrestore(&to_report->lock, flags);
+       } else {
+               /* This may be a UAF or OOB access, but we can't be sure. */
+               kfence_report_error(addr, is_write, regs, NULL, KFENCE_ERROR_INVALID);
+       }
+
+       return kfence_unprotect(addr); /* Unprotect and let access proceed. */
+}
diff --git a/mm/kfence/kfence.h b/mm/kfence/kfence.h
new file mode 100644 (file)
index 0000000..2406532
--- /dev/null
@@ -0,0 +1,106 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Kernel Electric-Fence (KFENCE). For more info please see
+ * Documentation/dev-tools/kfence.rst.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#ifndef MM_KFENCE_KFENCE_H
+#define MM_KFENCE_KFENCE_H
+
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/types.h>
+
+#include "../slab.h" /* for struct kmem_cache */
+
+/*
+ * Get the canary byte pattern for @addr. Use a pattern that varies based on the
+ * lower 3 bits of the address, to detect memory corruptions with higher
+ * probability, where similar constants are used.
+ */
+#define KFENCE_CANARY_PATTERN(addr) ((u8)0xaa ^ (u8)((unsigned long)(addr) & 0x7))
+
+/* Maximum stack depth for reports. */
+#define KFENCE_STACK_DEPTH 64
+
+/* KFENCE object states. */
+enum kfence_object_state {
+       KFENCE_OBJECT_UNUSED,           /* Object is unused. */
+       KFENCE_OBJECT_ALLOCATED,        /* Object is currently allocated. */
+       KFENCE_OBJECT_FREED,            /* Object was allocated, and then freed. */
+};
+
+/* Alloc/free tracking information. */
+struct kfence_track {
+       pid_t pid;
+       int num_stack_entries;
+       unsigned long stack_entries[KFENCE_STACK_DEPTH];
+};
+
+/* KFENCE metadata per guarded allocation. */
+struct kfence_metadata {
+       struct list_head list;          /* Freelist node; access under kfence_freelist_lock. */
+       struct rcu_head rcu_head;       /* For delayed freeing. */
+
+       /*
+        * Lock protecting below data; to ensure consistency of the below data,
+        * since the following may execute concurrently: __kfence_alloc(),
+        * __kfence_free(), kfence_handle_page_fault(). However, note that we
+        * cannot grab the same metadata off the freelist twice, and multiple
+        * __kfence_alloc() cannot run concurrently on the same metadata.
+        */
+       raw_spinlock_t lock;
+
+       /* The current state of the object; see above. */
+       enum kfence_object_state state;
+
+       /*
+        * Allocated object address; cannot be calculated from size, because of
+        * alignment requirements.
+        *
+        * Invariant: ALIGN_DOWN(addr, PAGE_SIZE) is constant.
+        */
+       unsigned long addr;
+
+       /*
+        * The size of the original allocation.
+        */
+       size_t size;
+
+       /*
+        * The kmem_cache cache of the last allocation; NULL if never allocated
+        * or the cache has already been destroyed.
+        */
+       struct kmem_cache *cache;
+
+       /*
+        * In case of an invalid access, the page that was unprotected; we
+        * optimistically only store one address.
+        */
+       unsigned long unprotected_page;
+
+       /* Allocation and free stack information. */
+       struct kfence_track alloc_track;
+       struct kfence_track free_track;
+};
+
+extern struct kfence_metadata kfence_metadata[CONFIG_KFENCE_NUM_OBJECTS];
+
+/* KFENCE error types for report generation. */
+enum kfence_error_type {
+       KFENCE_ERROR_OOB,               /* Detected a out-of-bounds access. */
+       KFENCE_ERROR_UAF,               /* Detected a use-after-free access. */
+       KFENCE_ERROR_CORRUPTION,        /* Detected a memory corruption on free. */
+       KFENCE_ERROR_INVALID,           /* Invalid access of unknown type. */
+       KFENCE_ERROR_INVALID_FREE,      /* Invalid free. */
+};
+
+void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
+                        const struct kfence_metadata *meta, enum kfence_error_type type);
+
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta);
+
+#endif /* MM_KFENCE_KFENCE_H */
diff --git a/mm/kfence/kfence_test.c b/mm/kfence/kfence_test.c
new file mode 100644 (file)
index 0000000..4acf425
--- /dev/null
@@ -0,0 +1,858 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test cases for KFENCE memory safety error detector. Since the interface with
+ * which KFENCE's reports are obtained is via the console, this is the output we
+ * should verify. For each test case checks the presence (or absence) of
+ * generated reports. Relies on 'console' tracepoint to capture reports as they
+ * appear in the kernel log.
+ *
+ * Copyright (C) 2020, Google LLC.
+ * Author: Alexander Potapenko <glider@google.com>
+ *         Marco Elver <elver@google.com>
+ */
+
+#include <kunit/test.h>
+#include <linux/jiffies.h>
+#include <linux/kernel.h>
+#include <linux/kfence.h>
+#include <linux/mm.h>
+#include <linux/random.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/string.h>
+#include <linux/tracepoint.h>
+#include <trace/events/printk.h>
+
+#include "kfence.h"
+
+/* Report as observed from console. */
+static struct {
+       spinlock_t lock;
+       int nlines;
+       char lines[2][256];
+} observed = {
+       .lock = __SPIN_LOCK_UNLOCKED(observed.lock),
+};
+
+/* Probe for console output: obtains observed lines of interest. */
+static void probe_console(void *ignore, const char *buf, size_t len)
+{
+       unsigned long flags;
+       int nlines;
+
+       spin_lock_irqsave(&observed.lock, flags);
+       nlines = observed.nlines;
+
+       if (strnstr(buf, "BUG: KFENCE: ", len) && strnstr(buf, "test_", len)) {
+               /*
+                * KFENCE report and related to the test.
+                *
+                * The provided @buf is not NUL-terminated; copy no more than
+                * @len bytes and let strscpy() add the missing NUL-terminator.
+                */
+               strscpy(observed.lines[0], buf, min(len + 1, sizeof(observed.lines[0])));
+               nlines = 1;
+       } else if (nlines == 1 && (strnstr(buf, "at 0x", len) || strnstr(buf, "of 0x", len))) {
+               strscpy(observed.lines[nlines++], buf, min(len + 1, sizeof(observed.lines[0])));
+       }
+
+       WRITE_ONCE(observed.nlines, nlines); /* Publish new nlines. */
+       spin_unlock_irqrestore(&observed.lock, flags);
+}
+
+/* Check if a report related to the test exists. */
+static bool report_available(void)
+{
+       return READ_ONCE(observed.nlines) == ARRAY_SIZE(observed.lines);
+}
+
+/* Information we expect in a report. */
+struct expect_report {
+       enum kfence_error_type type; /* The type or error. */
+       void *fn; /* Function pointer to expected function where access occurred. */
+       char *addr; /* Address at which the bad access occurred. */
+       bool is_write; /* Is access a write. */
+};
+
+static const char *get_access_type(const struct expect_report *r)
+{
+       return r->is_write ? "write" : "read";
+}
+
+/* Check observed report matches information in @r. */
+static bool report_matches(const struct expect_report *r)
+{
+       bool ret = false;
+       unsigned long flags;
+       typeof(observed.lines) expect;
+       const char *end;
+       char *cur;
+
+       /* Doubled-checked locking. */
+       if (!report_available())
+               return false;
+
+       /* Generate expected report contents. */
+
+       /* Title */
+       cur = expect[0];
+       end = &expect[0][sizeof(expect[0]) - 1];
+       switch (r->type) {
+       case KFENCE_ERROR_OOB:
+               cur += scnprintf(cur, end - cur, "BUG: KFENCE: out-of-bounds %s",
+                                get_access_type(r));
+               break;
+       case KFENCE_ERROR_UAF:
+               cur += scnprintf(cur, end - cur, "BUG: KFENCE: use-after-free %s",
+                                get_access_type(r));
+               break;
+       case KFENCE_ERROR_CORRUPTION:
+               cur += scnprintf(cur, end - cur, "BUG: KFENCE: memory corruption");
+               break;
+       case KFENCE_ERROR_INVALID:
+               cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid %s",
+                                get_access_type(r));
+               break;
+       case KFENCE_ERROR_INVALID_FREE:
+               cur += scnprintf(cur, end - cur, "BUG: KFENCE: invalid free");
+               break;
+       }
+
+       scnprintf(cur, end - cur, " in %pS", r->fn);
+       /* The exact offset won't match, remove it; also strip module name. */
+       cur = strchr(expect[0], '+');
+       if (cur)
+               *cur = '\0';
+
+       /* Access information */
+       cur = expect[1];
+       end = &expect[1][sizeof(expect[1]) - 1];
+
+       switch (r->type) {
+       case KFENCE_ERROR_OOB:
+               cur += scnprintf(cur, end - cur, "Out-of-bounds %s at", get_access_type(r));
+               break;
+       case KFENCE_ERROR_UAF:
+               cur += scnprintf(cur, end - cur, "Use-after-free %s at", get_access_type(r));
+               break;
+       case KFENCE_ERROR_CORRUPTION:
+               cur += scnprintf(cur, end - cur, "Corrupted memory at");
+               break;
+       case KFENCE_ERROR_INVALID:
+               cur += scnprintf(cur, end - cur, "Invalid %s at", get_access_type(r));
+               break;
+       case KFENCE_ERROR_INVALID_FREE:
+               cur += scnprintf(cur, end - cur, "Invalid free of");
+               break;
+       }
+
+       cur += scnprintf(cur, end - cur, " 0x%p", (void *)r->addr);
+
+       spin_lock_irqsave(&observed.lock, flags);
+       if (!report_available())
+               goto out; /* A new report is being captured. */
+
+       /* Finally match expected output to what we actually observed. */
+       ret = strstr(observed.lines[0], expect[0]) && strstr(observed.lines[1], expect[1]);
+out:
+       spin_unlock_irqrestore(&observed.lock, flags);
+       return ret;
+}
+
+/* ===== Test cases ===== */
+
+#define TEST_PRIV_WANT_MEMCACHE ((void *)1)
+
+/* Cache used by tests; if NULL, allocate from kmalloc instead. */
+static struct kmem_cache *test_cache;
+
+static size_t setup_test_cache(struct kunit *test, size_t size, slab_flags_t flags,
+                              void (*ctor)(void *))
+{
+       if (test->priv != TEST_PRIV_WANT_MEMCACHE)
+               return size;
+
+       kunit_info(test, "%s: size=%zu, ctor=%ps\n", __func__, size, ctor);
+
+       /*
+        * Use SLAB_NOLEAKTRACE to prevent merging with existing caches. Any
+        * other flag in SLAB_NEVER_MERGE also works. Use SLAB_ACCOUNT to
+        * allocate via memcg, if enabled.
+        */
+       flags |= SLAB_NOLEAKTRACE | SLAB_ACCOUNT;
+       test_cache = kmem_cache_create("test", size, 1, flags, ctor);
+       KUNIT_ASSERT_TRUE_MSG(test, test_cache, "could not create cache");
+
+       return size;
+}
+
+static void test_cache_destroy(void)
+{
+       if (!test_cache)
+               return;
+
+       kmem_cache_destroy(test_cache);
+       test_cache = NULL;
+}
+
+static inline size_t kmalloc_cache_alignment(size_t size)
+{
+       return kmalloc_caches[kmalloc_type(GFP_KERNEL)][kmalloc_index(size)]->align;
+}
+
+/* Must always inline to match stack trace against caller. */
+static __always_inline void test_free(void *ptr)
+{
+       if (test_cache)
+               kmem_cache_free(test_cache, ptr);
+       else
+               kfree(ptr);
+}
+
+/*
+ * If this should be a KFENCE allocation, and on which side the allocation and
+ * the closest guard page should be.
+ */
+enum allocation_policy {
+       ALLOCATE_ANY, /* KFENCE, any side. */
+       ALLOCATE_LEFT, /* KFENCE, left side of page. */
+       ALLOCATE_RIGHT, /* KFENCE, right side of page. */
+       ALLOCATE_NONE, /* No KFENCE allocation. */
+};
+
+/*
+ * Try to get a guarded allocation from KFENCE. Uses either kmalloc() or the
+ * current test_cache if set up.
+ */
+static void *test_alloc(struct kunit *test, size_t size, gfp_t gfp, enum allocation_policy policy)
+{
+       void *alloc;
+       unsigned long timeout, resched_after;
+       const char *policy_name;
+
+       switch (policy) {
+       case ALLOCATE_ANY:
+               policy_name = "any";
+               break;
+       case ALLOCATE_LEFT:
+               policy_name = "left";
+               break;
+       case ALLOCATE_RIGHT:
+               policy_name = "right";
+               break;
+       case ALLOCATE_NONE:
+               policy_name = "none";
+               break;
+       }
+
+       kunit_info(test, "%s: size=%zu, gfp=%x, policy=%s, cache=%i\n", __func__, size, gfp,
+                  policy_name, !!test_cache);
+
+       /*
+        * 100x the sample interval should be more than enough to ensure we get
+        * a KFENCE allocation eventually.
+        */
+       timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+       /*
+        * Especially for non-preemption kernels, ensure the allocation-gate
+        * timer can catch up: after @resched_after, every failed allocation
+        * attempt yields, to ensure the allocation-gate timer is scheduled.
+        */
+       resched_after = jiffies + msecs_to_jiffies(CONFIG_KFENCE_SAMPLE_INTERVAL);
+       do {
+               if (test_cache)
+                       alloc = kmem_cache_alloc(test_cache, gfp);
+               else
+                       alloc = kmalloc(size, gfp);
+
+               if (is_kfence_address(alloc)) {
+                       struct page *page = virt_to_head_page(alloc);
+                       struct kmem_cache *s = test_cache ?: kmalloc_caches[kmalloc_type(GFP_KERNEL)][kmalloc_index(size)];
+
+                       /*
+                        * Verify that various helpers return the right values
+                        * even for KFENCE objects; these are required so that
+                        * memcg accounting works correctly.
+                        */
+                       KUNIT_EXPECT_EQ(test, obj_to_index(s, page, alloc), 0U);
+                       KUNIT_EXPECT_EQ(test, objs_per_slab_page(s, page), 1);
+
+                       if (policy == ALLOCATE_ANY)
+                               return alloc;
+                       if (policy == ALLOCATE_LEFT && IS_ALIGNED((unsigned long)alloc, PAGE_SIZE))
+                               return alloc;
+                       if (policy == ALLOCATE_RIGHT &&
+                           !IS_ALIGNED((unsigned long)alloc, PAGE_SIZE))
+                               return alloc;
+               } else if (policy == ALLOCATE_NONE)
+                       return alloc;
+
+               test_free(alloc);
+
+               if (time_after(jiffies, resched_after))
+                       cond_resched();
+       } while (time_before(jiffies, timeout));
+
+       KUNIT_ASSERT_TRUE_MSG(test, false, "failed to allocate from KFENCE");
+       return NULL; /* Unreachable. */
+}
+
+static void test_out_of_bounds_read(struct kunit *test)
+{
+       size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_OOB,
+               .fn = test_out_of_bounds_read,
+               .is_write = false,
+       };
+       char *buf;
+
+       setup_test_cache(test, size, 0, NULL);
+
+       /*
+        * If we don't have our own cache, adjust based on alignment, so that we
+        * actually access guard pages on either side.
+        */
+       if (!test_cache)
+               size = kmalloc_cache_alignment(size);
+
+       /* Test both sides. */
+
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+       expect.addr = buf - 1;
+       READ_ONCE(*expect.addr);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+       test_free(buf);
+
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+       expect.addr = buf + size;
+       READ_ONCE(*expect.addr);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+       test_free(buf);
+}
+
+static void test_out_of_bounds_write(struct kunit *test)
+{
+       size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_OOB,
+               .fn = test_out_of_bounds_write,
+               .is_write = true,
+       };
+       char *buf;
+
+       setup_test_cache(test, size, 0, NULL);
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+       expect.addr = buf - 1;
+       WRITE_ONCE(*expect.addr, 42);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+       test_free(buf);
+}
+
+static void test_use_after_free_read(struct kunit *test)
+{
+       const size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_UAF,
+               .fn = test_use_after_free_read,
+               .is_write = false,
+       };
+
+       setup_test_cache(test, size, 0, NULL);
+       expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       test_free(expect.addr);
+       READ_ONCE(*expect.addr);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_double_free(struct kunit *test)
+{
+       const size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_INVALID_FREE,
+               .fn = test_double_free,
+       };
+
+       setup_test_cache(test, size, 0, NULL);
+       expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       test_free(expect.addr);
+       test_free(expect.addr); /* Double-free. */
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_invalid_addr_free(struct kunit *test)
+{
+       const size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_INVALID_FREE,
+               .fn = test_invalid_addr_free,
+       };
+       char *buf;
+
+       setup_test_cache(test, size, 0, NULL);
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       expect.addr = buf + 1; /* Free on invalid address. */
+       test_free(expect.addr); /* Invalid address free. */
+       test_free(buf); /* No error. */
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+static void test_corruption(struct kunit *test)
+{
+       size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_CORRUPTION,
+               .fn = test_corruption,
+       };
+       char *buf;
+
+       setup_test_cache(test, size, 0, NULL);
+
+       /* Test both sides. */
+
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT);
+       expect.addr = buf + size;
+       WRITE_ONCE(*expect.addr, 42);
+       test_free(buf);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+       expect.addr = buf - 1;
+       WRITE_ONCE(*expect.addr, 42);
+       test_free(buf);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/*
+ * KFENCE is unable to detect an OOB if the allocation's alignment requirements
+ * leave a gap between the object and the guard page. Specifically, an
+ * allocation of e.g. 73 bytes is aligned on 8 and 128 bytes for SLUB or SLAB
+ * respectively. Therefore it is impossible for the allocated object to
+ * contiguously line up with the right guard page.
+ *
+ * However, we test that an access to memory beyond the gap results in KFENCE
+ * detecting an OOB access.
+ */
+static void test_kmalloc_aligned_oob_read(struct kunit *test)
+{
+       const size_t size = 73;
+       const size_t align = kmalloc_cache_alignment(size);
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_OOB,
+               .fn = test_kmalloc_aligned_oob_read,
+               .is_write = false,
+       };
+       char *buf;
+
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+
+       /*
+        * The object is offset to the right, so there won't be an OOB to the
+        * left of it.
+        */
+       READ_ONCE(*(buf - 1));
+       KUNIT_EXPECT_FALSE(test, report_available());
+
+       /*
+        * @buf must be aligned on @align, therefore buf + size belongs to the
+        * same page -> no OOB.
+        */
+       READ_ONCE(*(buf + size));
+       KUNIT_EXPECT_FALSE(test, report_available());
+
+       /* Overflowing by @align bytes will result in an OOB. */
+       expect.addr = buf + size + align;
+       READ_ONCE(*expect.addr);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+
+       test_free(buf);
+}
+
+static void test_kmalloc_aligned_oob_write(struct kunit *test)
+{
+       const size_t size = 73;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_CORRUPTION,
+               .fn = test_kmalloc_aligned_oob_write,
+       };
+       char *buf;
+
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT);
+       /*
+        * The object is offset to the right, so we won't get a page
+        * fault immediately after it.
+        */
+       expect.addr = buf + size;
+       WRITE_ONCE(*expect.addr, READ_ONCE(*expect.addr) + 1);
+       KUNIT_EXPECT_FALSE(test, report_available());
+       test_free(buf);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test cache shrinking and destroying with KFENCE. */
+static void test_shrink_memcache(struct kunit *test)
+{
+       const size_t size = 32;
+       void *buf;
+
+       setup_test_cache(test, size, 0, NULL);
+       KUNIT_EXPECT_TRUE(test, test_cache);
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       kmem_cache_shrink(test_cache);
+       test_free(buf);
+
+       KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+static void ctor_set_x(void *obj)
+{
+       /* Every object has at least 8 bytes. */
+       memset(obj, 'x', 8);
+}
+
+/* Ensure that SL*B does not modify KFENCE objects on bulk free. */
+static void test_free_bulk(struct kunit *test)
+{
+       int iter;
+
+       for (iter = 0; iter < 5; iter++) {
+               const size_t size = setup_test_cache(test, 8 + prandom_u32_max(300), 0,
+                                                    (iter & 1) ? ctor_set_x : NULL);
+               void *objects[] = {
+                       test_alloc(test, size, GFP_KERNEL, ALLOCATE_RIGHT),
+                       test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+                       test_alloc(test, size, GFP_KERNEL, ALLOCATE_LEFT),
+                       test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+                       test_alloc(test, size, GFP_KERNEL, ALLOCATE_NONE),
+               };
+
+               kmem_cache_free_bulk(test_cache, ARRAY_SIZE(objects), objects);
+               KUNIT_ASSERT_FALSE(test, report_available());
+               test_cache_destroy();
+       }
+}
+
+/* Test init-on-free works. */
+static void test_init_on_free(struct kunit *test)
+{
+       const size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_UAF,
+               .fn = test_init_on_free,
+               .is_write = false,
+       };
+       int i;
+
+       if (!IS_ENABLED(CONFIG_INIT_ON_FREE_DEFAULT_ON))
+               return;
+       /* Assume it hasn't been disabled on command line. */
+
+       setup_test_cache(test, size, 0, NULL);
+       expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       for (i = 0; i < size; i++)
+               expect.addr[i] = i + 1;
+       test_free(expect.addr);
+
+       for (i = 0; i < size; i++) {
+               /*
+                * This may fail if the page was recycled by KFENCE and then
+                * written to again -- this however, is near impossible with a
+                * default config.
+                */
+               KUNIT_EXPECT_EQ(test, expect.addr[i], (char)0);
+
+               if (!i) /* Only check first access to not fail test if page is ever re-protected. */
+                       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+       }
+}
+
+/* Ensure that constructors work properly. */
+static void test_memcache_ctor(struct kunit *test)
+{
+       const size_t size = 32;
+       char *buf;
+       int i;
+
+       setup_test_cache(test, size, 0, ctor_set_x);
+       buf = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+
+       for (i = 0; i < 8; i++)
+               KUNIT_EXPECT_EQ(test, buf[i], (char)'x');
+
+       test_free(buf);
+
+       KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+/* Test that memory is zeroed if requested. */
+static void test_gfpzero(struct kunit *test)
+{
+       const size_t size = PAGE_SIZE; /* PAGE_SIZE so we can use ALLOCATE_ANY. */
+       char *buf1, *buf2;
+       int i;
+
+       if (CONFIG_KFENCE_SAMPLE_INTERVAL > 100) {
+               kunit_warn(test, "skipping ... would take too long\n");
+               return;
+       }
+
+       setup_test_cache(test, size, 0, NULL);
+       buf1 = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       for (i = 0; i < size; i++)
+               buf1[i] = i + 1;
+       test_free(buf1);
+
+       /* Try to get same address again -- this can take a while. */
+       for (i = 0;; i++) {
+               buf2 = test_alloc(test, size, GFP_KERNEL | __GFP_ZERO, ALLOCATE_ANY);
+               if (buf1 == buf2)
+                       break;
+               test_free(buf2);
+
+               if (i == CONFIG_KFENCE_NUM_OBJECTS) {
+                       kunit_warn(test, "giving up ... cannot get same object back\n");
+                       return;
+               }
+       }
+
+       for (i = 0; i < size; i++)
+               KUNIT_EXPECT_EQ(test, buf2[i], (char)0);
+
+       test_free(buf2);
+
+       KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+static void test_invalid_access(struct kunit *test)
+{
+       const struct expect_report expect = {
+               .type = KFENCE_ERROR_INVALID,
+               .fn = test_invalid_access,
+               .addr = &__kfence_pool[10],
+               .is_write = false,
+       };
+
+       READ_ONCE(__kfence_pool[10]);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test SLAB_TYPESAFE_BY_RCU works. */
+static void test_memcache_typesafe_by_rcu(struct kunit *test)
+{
+       const size_t size = 32;
+       struct expect_report expect = {
+               .type = KFENCE_ERROR_UAF,
+               .fn = test_memcache_typesafe_by_rcu,
+               .is_write = false,
+       };
+
+       setup_test_cache(test, size, SLAB_TYPESAFE_BY_RCU, NULL);
+       KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
+
+       expect.addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY);
+       *expect.addr = 42;
+
+       rcu_read_lock();
+       test_free(expect.addr);
+       KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
+       /*
+        * Up to this point, memory should not have been freed yet, and
+        * therefore there should be no KFENCE report from the above access.
+        */
+       rcu_read_unlock();
+
+       /* Above access to @expect.addr should not have generated a report! */
+       KUNIT_EXPECT_FALSE(test, report_available());
+
+       /* Only after rcu_barrier() is the memory guaranteed to be freed. */
+       rcu_barrier();
+
+       /* Expect use-after-free. */
+       KUNIT_EXPECT_EQ(test, *expect.addr, (char)42);
+       KUNIT_EXPECT_TRUE(test, report_matches(&expect));
+}
+
+/* Test krealloc(). */
+static void test_krealloc(struct kunit *test)
+{
+       const size_t size = 32;
+       const struct expect_report expect = {
+               .type = KFENCE_ERROR_UAF,
+               .fn = test_krealloc,
+               .addr = test_alloc(test, size, GFP_KERNEL, ALLOCATE_ANY),
+               .is_write = false,
+       };
+       char *buf = expect.addr;
+       int i;
+
+       KUNIT_EXPECT_FALSE(test, test_cache);
+       KUNIT_EXPECT_EQ(test, ksize(buf), size); /* Precise size match after KFENCE alloc. */
+       for (i = 0; i < size; i++)
+               buf[i] = i + 1;
+
+       /* Check that we successfully change the size. */
+       buf = krealloc(buf, size * 3, GFP_KERNEL); /* Grow. */
+       /* Note: Might no longer be a KFENCE alloc. */
+       KUNIT_EXPECT_GE(test, ksize(buf), size * 3);
+       for (i = 0; i < size; i++)
+               KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
+       for (; i < size * 3; i++) /* Fill to extra bytes. */
+               buf[i] = i + 1;
+
+       buf = krealloc(buf, size * 2, GFP_KERNEL); /* Shrink. */
+       KUNIT_EXPECT_GE(test, ksize(buf), size * 2);
+       for (i = 0; i < size * 2; i++)
+               KUNIT_EXPECT_EQ(test, buf[i], (char)(i + 1));
+
+       buf = krealloc(buf, 0, GFP_KERNEL); /* Free. */
+       KUNIT_EXPECT_EQ(test, (unsigned long)buf, (unsigned long)ZERO_SIZE_PTR);
+       KUNIT_ASSERT_FALSE(test, report_available()); /* No reports yet! */
+
+       READ_ONCE(*expect.addr); /* Ensure krealloc() actually freed earlier KFENCE object. */
+       KUNIT_ASSERT_TRUE(test, report_matches(&expect));
+}
+
+/* Test that some objects from a bulk allocation belong to KFENCE pool. */
+static void test_memcache_alloc_bulk(struct kunit *test)
+{
+       const size_t size = 32;
+       bool pass = false;
+       unsigned long timeout;
+
+       setup_test_cache(test, size, 0, NULL);
+       KUNIT_EXPECT_TRUE(test, test_cache); /* Want memcache. */
+       /*
+        * 100x the sample interval should be more than enough to ensure we get
+        * a KFENCE allocation eventually.
+        */
+       timeout = jiffies + msecs_to_jiffies(100 * CONFIG_KFENCE_SAMPLE_INTERVAL);
+       do {
+               void *objects[100];
+               int i, num = kmem_cache_alloc_bulk(test_cache, GFP_ATOMIC, ARRAY_SIZE(objects),
+                                                  objects);
+               if (!num)
+                       continue;
+               for (i = 0; i < ARRAY_SIZE(objects); i++) {
+                       if (is_kfence_address(objects[i])) {
+                               pass = true;
+                               break;
+                       }
+               }
+               kmem_cache_free_bulk(test_cache, num, objects);
+               /*
+                * kmem_cache_alloc_bulk() disables interrupts, and calling it
+                * in a tight loop may not give KFENCE a chance to switch the
+                * static branch. Call cond_resched() to let KFENCE chime in.
+                */
+               cond_resched();
+       } while (!pass && time_before(jiffies, timeout));
+
+       KUNIT_EXPECT_TRUE(test, pass);
+       KUNIT_EXPECT_FALSE(test, report_available());
+}
+
+/*
+ * KUnit does not provide a way to provide arguments to tests, and we encode
+ * additional info in the name. Set up 2 tests per test case, one using the
+ * default allocator, and another using a custom memcache (suffix '-memcache').
+ */
+#define KFENCE_KUNIT_CASE(test_name)                                           \
+       { .run_case = test_name, .name = #test_name },                          \
+       { .run_case = test_name, .name = #test_name "-memcache" }
+
+static struct kunit_case kfence_test_cases[] = {
+       KFENCE_KUNIT_CASE(test_out_of_bounds_read),
+       KFENCE_KUNIT_CASE(test_out_of_bounds_write),
+       KFENCE_KUNIT_CASE(test_use_after_free_read),
+       KFENCE_KUNIT_CASE(test_double_free),
+       KFENCE_KUNIT_CASE(test_invalid_addr_free),
+       KFENCE_KUNIT_CASE(test_corruption),
+       KFENCE_KUNIT_CASE(test_free_bulk),
+       KFENCE_KUNIT_CASE(test_init_on_free),
+       KUNIT_CASE(test_kmalloc_aligned_oob_read),
+       KUNIT_CASE(test_kmalloc_aligned_oob_write),
+       KUNIT_CASE(test_shrink_memcache),
+       KUNIT_CASE(test_memcache_ctor),
+       KUNIT_CASE(test_invalid_access),
+       KUNIT_CASE(test_gfpzero),
+       KUNIT_CASE(test_memcache_typesafe_by_rcu),
+       KUNIT_CASE(test_krealloc),
+       KUNIT_CASE(test_memcache_alloc_bulk),
+       {},
+};
+
+/* ===== End test cases ===== */
+
+static int test_init(struct kunit *test)
+{
+       unsigned long flags;
+       int i;
+
+       spin_lock_irqsave(&observed.lock, flags);
+       for (i = 0; i < ARRAY_SIZE(observed.lines); i++)
+               observed.lines[i][0] = '\0';
+       observed.nlines = 0;
+       spin_unlock_irqrestore(&observed.lock, flags);
+
+       /* Any test with 'memcache' in its name will want a memcache. */
+       if (strstr(test->name, "memcache"))
+               test->priv = TEST_PRIV_WANT_MEMCACHE;
+       else
+               test->priv = NULL;
+
+       return 0;
+}
+
+static void test_exit(struct kunit *test)
+{
+       test_cache_destroy();
+}
+
+static struct kunit_suite kfence_test_suite = {
+       .name = "kfence",
+       .test_cases = kfence_test_cases,
+       .init = test_init,
+       .exit = test_exit,
+};
+static struct kunit_suite *kfence_test_suites[] = { &kfence_test_suite, NULL };
+
+static void register_tracepoints(struct tracepoint *tp, void *ignore)
+{
+       check_trace_callback_type_console(probe_console);
+       if (!strcmp(tp->name, "console"))
+               WARN_ON(tracepoint_probe_register(tp, probe_console, NULL));
+}
+
+static void unregister_tracepoints(struct tracepoint *tp, void *ignore)
+{
+       if (!strcmp(tp->name, "console"))
+               tracepoint_probe_unregister(tp, probe_console, NULL);
+}
+
+/*
+ * We only want to do tracepoints setup and teardown once, therefore we have to
+ * customize the init and exit functions and cannot rely on kunit_test_suite().
+ */
+static int __init kfence_test_init(void)
+{
+       /*
+        * Because we want to be able to build the test as a module, we need to
+        * iterate through all known tracepoints, since the static registration
+        * won't work here.
+        */
+       for_each_kernel_tracepoint(register_tracepoints, NULL);
+       return __kunit_test_suites_init(kfence_test_suites);
+}
+
+static void kfence_test_exit(void)
+{
+       __kunit_test_suites_exit(kfence_test_suites);
+       for_each_kernel_tracepoint(unregister_tracepoints, NULL);
+       tracepoint_synchronize_unregister();
+}
+
+late_initcall(kfence_test_init);
+module_exit(kfence_test_exit);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Alexander Potapenko <glider@google.com>, Marco Elver <elver@google.com>");
diff --git a/mm/kfence/report.c b/mm/kfence/report.c
new file mode 100644 (file)
index 0000000..e3f7145
--- /dev/null
@@ -0,0 +1,268 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * KFENCE reporting.
+ *
+ * Copyright (C) 2020, Google LLC.
+ */
+
+#include <stdarg.h>
+
+#include <linux/kernel.h>
+#include <linux/lockdep.h>
+#include <linux/printk.h>
+#include <linux/sched/debug.h>
+#include <linux/seq_file.h>
+#include <linux/stacktrace.h>
+#include <linux/string.h>
+#include <trace/events/error_report.h>
+
+#include <asm/kfence.h>
+
+#include "kfence.h"
+
+/* May be overridden by <asm/kfence.h>. */
+#ifndef ARCH_FUNC_PREFIX
+#define ARCH_FUNC_PREFIX ""
+#endif
+
+extern bool no_hash_pointers;
+
+/* Helper function to either print to a seq_file or to console. */
+__printf(2, 3)
+static void seq_con_printf(struct seq_file *seq, const char *fmt, ...)
+{
+       va_list args;
+
+       va_start(args, fmt);
+       if (seq)
+               seq_vprintf(seq, fmt, args);
+       else
+               vprintk(fmt, args);
+       va_end(args);
+}
+
+/*
+ * Get the number of stack entries to skip to get out of MM internals. @type is
+ * optional, and if set to NULL, assumes an allocation or free stack.
+ */
+static int get_stack_skipnr(const unsigned long stack_entries[], int num_entries,
+                           const enum kfence_error_type *type)
+{
+       char buf[64];
+       int skipnr, fallback = 0;
+
+       if (type) {
+               /* Depending on error type, find different stack entries. */
+               switch (*type) {
+               case KFENCE_ERROR_UAF:
+               case KFENCE_ERROR_OOB:
+               case KFENCE_ERROR_INVALID:
+                       /*
+                        * kfence_handle_page_fault() may be called with pt_regs
+                        * set to NULL; in that case we'll simply show the full
+                        * stack trace.
+                        */
+                       return 0;
+               case KFENCE_ERROR_CORRUPTION:
+               case KFENCE_ERROR_INVALID_FREE:
+                       break;
+               }
+       }
+
+       for (skipnr = 0; skipnr < num_entries; skipnr++) {
+               int len = scnprintf(buf, sizeof(buf), "%ps", (void *)stack_entries[skipnr]);
+
+               if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfence_") ||
+                   str_has_prefix(buf, ARCH_FUNC_PREFIX "__kfence_") ||
+                   !strncmp(buf, ARCH_FUNC_PREFIX "__slab_free", len)) {
+                       /*
+                        * In case of tail calls from any of the below
+                        * to any of the above.
+                        */
+                       fallback = skipnr + 1;
+               }
+
+               /* Also the *_bulk() variants by only checking prefixes. */
+               if (str_has_prefix(buf, ARCH_FUNC_PREFIX "kfree") ||
+                   str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_free") ||
+                   str_has_prefix(buf, ARCH_FUNC_PREFIX "__kmalloc") ||
+                   str_has_prefix(buf, ARCH_FUNC_PREFIX "kmem_cache_alloc"))
+                       goto found;
+       }
+       if (fallback < num_entries)
+               return fallback;
+found:
+       skipnr++;
+       return skipnr < num_entries ? skipnr : 0;
+}
+
+static void kfence_print_stack(struct seq_file *seq, const struct kfence_metadata *meta,
+                              bool show_alloc)
+{
+       const struct kfence_track *track = show_alloc ? &meta->alloc_track : &meta->free_track;
+
+       if (track->num_stack_entries) {
+               /* Skip allocation/free internals stack. */
+               int i = get_stack_skipnr(track->stack_entries, track->num_stack_entries, NULL);
+
+               /* stack_trace_seq_print() does not exist; open code our own. */
+               for (; i < track->num_stack_entries; i++)
+                       seq_con_printf(seq, " %pS\n", (void *)track->stack_entries[i]);
+       } else {
+               seq_con_printf(seq, " no %s stack\n", show_alloc ? "allocation" : "deallocation");
+       }
+}
+
+void kfence_print_object(struct seq_file *seq, const struct kfence_metadata *meta)
+{
+       const int size = abs(meta->size);
+       const unsigned long start = meta->addr;
+       const struct kmem_cache *const cache = meta->cache;
+
+       lockdep_assert_held(&meta->lock);
+
+       if (meta->state == KFENCE_OBJECT_UNUSED) {
+               seq_con_printf(seq, "kfence-#%td unused\n", meta - kfence_metadata);
+               return;
+       }
+
+       seq_con_printf(seq,
+                      "kfence-#%td [0x%p-0x%p"
+                      ", size=%d, cache=%s] allocated by task %d:\n",
+                      meta - kfence_metadata, (void *)start, (void *)(start + size - 1), size,
+                      (cache && cache->name) ? cache->name : "<destroyed>", meta->alloc_track.pid);
+       kfence_print_stack(seq, meta, true);
+
+       if (meta->state == KFENCE_OBJECT_FREED) {
+               seq_con_printf(seq, "\nfreed by task %d:\n", meta->free_track.pid);
+               kfence_print_stack(seq, meta, false);
+       }
+}
+
+/*
+ * Show bytes at @addr that are different from the expected canary values, up to
+ * @max_bytes.
+ */
+static void print_diff_canary(unsigned long address, size_t bytes_to_show,
+                             const struct kfence_metadata *meta)
+{
+       const unsigned long show_until_addr = address + bytes_to_show;
+       const u8 *cur, *end;
+
+       /* Do not show contents of object nor read into following guard page. */
+       end = (const u8 *)(address < meta->addr ? min(show_until_addr, meta->addr)
+                                               : min(show_until_addr, PAGE_ALIGN(address)));
+
+       pr_cont("[");
+       for (cur = (const u8 *)address; cur < end; cur++) {
+               if (*cur == KFENCE_CANARY_PATTERN(cur))
+                       pr_cont(" .");
+               else if (no_hash_pointers)
+                       pr_cont(" 0x%02x", *cur);
+               else /* Do not leak kernel memory in non-debug builds. */
+                       pr_cont(" !");
+       }
+       pr_cont(" ]");
+}
+
+static const char *get_access_type(bool is_write)
+{
+       return is_write ? "write" : "read";
+}
+
+void kfence_report_error(unsigned long address, bool is_write, struct pt_regs *regs,
+                        const struct kfence_metadata *meta, enum kfence_error_type type)
+{
+       unsigned long stack_entries[KFENCE_STACK_DEPTH] = { 0 };
+       const ptrdiff_t object_index = meta ? meta - kfence_metadata : -1;
+       int num_stack_entries;
+       int skipnr = 0;
+
+       if (regs) {
+               num_stack_entries = stack_trace_save_regs(regs, stack_entries, KFENCE_STACK_DEPTH, 0);
+       } else {
+               num_stack_entries = stack_trace_save(stack_entries, KFENCE_STACK_DEPTH, 1);
+               skipnr = get_stack_skipnr(stack_entries, num_stack_entries, &type);
+       }
+
+       /* Require non-NULL meta, except if KFENCE_ERROR_INVALID. */
+       if (WARN_ON(type != KFENCE_ERROR_INVALID && !meta))
+               return;
+
+       if (meta)
+               lockdep_assert_held(&meta->lock);
+       /*
+        * Because we may generate reports in printk-unfriendly parts of the
+        * kernel, such as scheduler code, the use of printk() could deadlock.
+        * Until such time that all printing code here is safe in all parts of
+        * the kernel, accept the risk, and just get our message out (given the
+        * system might already behave unpredictably due to the memory error).
+        * As such, also disable lockdep to hide warnings, and avoid disabling
+        * lockdep for the rest of the kernel.
+        */
+       lockdep_off();
+
+       pr_err("==================================================================\n");
+       /* Print report header. */
+       switch (type) {
+       case KFENCE_ERROR_OOB: {
+               const bool left_of_object = address < meta->addr;
+
+               pr_err("BUG: KFENCE: out-of-bounds %s in %pS\n\n", get_access_type(is_write),
+                      (void *)stack_entries[skipnr]);
+               pr_err("Out-of-bounds %s at 0x%p (%luB %s of kfence-#%td):\n",
+                      get_access_type(is_write), (void *)address,
+                      left_of_object ? meta->addr - address : address - meta->addr,
+                      left_of_object ? "left" : "right", object_index);
+               break;
+       }
+       case KFENCE_ERROR_UAF:
+               pr_err("BUG: KFENCE: use-after-free %s in %pS\n\n", get_access_type(is_write),
+                      (void *)stack_entries[skipnr]);
+               pr_err("Use-after-free %s at 0x%p (in kfence-#%td):\n",
+                      get_access_type(is_write), (void *)address, object_index);
+               break;
+       case KFENCE_ERROR_CORRUPTION:
+               pr_err("BUG: KFENCE: memory corruption in %pS\n\n", (void *)stack_entries[skipnr]);
+               pr_err("Corrupted memory at 0x%p ", (void *)address);
+               print_diff_canary(address, 16, meta);
+               pr_cont(" (in kfence-#%td):\n", object_index);
+               break;
+       case KFENCE_ERROR_INVALID:
+               pr_err("BUG: KFENCE: invalid %s in %pS\n\n", get_access_type(is_write),
+                      (void *)stack_entries[skipnr]);
+               pr_err("Invalid %s at 0x%p:\n", get_access_type(is_write),
+                      (void *)address);
+               break;
+       case KFENCE_ERROR_INVALID_FREE:
+               pr_err("BUG: KFENCE: invalid free in %pS\n\n", (void *)stack_entries[skipnr]);
+               pr_err("Invalid free of 0x%p (in kfence-#%td):\n", (void *)address,
+                      object_index);
+               break;
+       }
+
+       /* Print stack trace and object info. */
+       stack_trace_print(stack_entries + skipnr, num_stack_entries - skipnr, 0);
+
+       if (meta) {
+               pr_err("\n");
+               kfence_print_object(NULL, meta);
+       }
+
+       /* Print report footer. */
+       pr_err("\n");
+       if (no_hash_pointers && regs)
+               show_regs(regs);
+       else
+               dump_stack_print_info(KERN_ERR);
+       trace_error_report_end(ERROR_DETECTOR_KFENCE, address);
+       pr_err("==================================================================\n");
+
+       lockdep_on();
+
+       if (panic_on_warn)
+               panic("panic_on_warn set ...\n");
+
+       /* We encountered a memory unsafety error, taint the kernel! */
+       add_taint(TAINT_BAD_PAGE, LOCKDEP_STILL_OK);
+}
index fb0fdae..a7d6cb9 100644 (file)
@@ -442,18 +442,28 @@ static inline int khugepaged_test_exit(struct mm_struct *mm)
 static bool hugepage_vma_check(struct vm_area_struct *vma,
                               unsigned long vm_flags)
 {
-       if ((!(vm_flags & VM_HUGEPAGE) && !khugepaged_always()) ||
-           (vm_flags & VM_NOHUGEPAGE) ||
+       /* Explicitly disabled through madvise. */
+       if ((vm_flags & VM_NOHUGEPAGE) ||
            test_bit(MMF_DISABLE_THP, &vma->vm_mm->flags))
                return false;
 
-       if (shmem_file(vma->vm_file) ||
-           (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
-            vma->vm_file &&
-            (vm_flags & VM_DENYWRITE))) {
+       /* Enabled via shmem mount options or sysfs settings. */
+       if (shmem_file(vma->vm_file) && shmem_huge_enabled(vma)) {
                return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
                                HPAGE_PMD_NR);
        }
+
+       /* THP settings require madvise. */
+       if (!(vm_flags & VM_HUGEPAGE) && !khugepaged_always())
+               return false;
+
+       /* Read-only file mappings need to be aligned for THP to work. */
+       if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && vma->vm_file &&
+           (vm_flags & VM_DENYWRITE)) {
+               return IS_ALIGNED((vma->vm_start >> PAGE_SHIFT) - vma->vm_pgoff,
+                               HPAGE_PMD_NR);
+       }
+
        if (!vma->anon_vma || vma->vm_ops)
                return false;
        if (vma_is_temporary_stack(vma))
@@ -1643,6 +1653,7 @@ static void collapse_file(struct mm_struct *mm,
        XA_STATE_ORDER(xas, &mapping->i_pages, start, HPAGE_PMD_ORDER);
        int nr_none = 0, result = SCAN_SUCCEED;
        bool is_shmem = shmem_file(file);
+       int nr;
 
        VM_BUG_ON(!IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) && !is_shmem);
        VM_BUG_ON(start & (HPAGE_PMD_NR - 1));
@@ -1854,11 +1865,12 @@ out_unlock:
                put_page(page);
                goto xa_unlocked;
        }
+       nr = thp_nr_pages(new_page);
 
        if (is_shmem)
-               __inc_lruvec_page_state(new_page, NR_SHMEM_THPS);
+               __mod_lruvec_page_state(new_page, NR_SHMEM_THPS, nr);
        else {
-               __inc_lruvec_page_state(new_page, NR_FILE_THPS);
+               __mod_lruvec_page_state(new_page, NR_FILE_THPS, nr);
                filemap_nr_thps_inc(mapping);
        }
 
index c0014d3..fe6e3ae 100644 (file)
@@ -97,6 +97,7 @@
 #include <linux/atomic.h>
 
 #include <linux/kasan.h>
+#include <linux/kfence.h>
 #include <linux/kmemleak.h>
 #include <linux/memory_hotplug.h>
 
@@ -589,7 +590,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size,
        atomic_set(&object->use_count, 1);
        object->flags = OBJECT_ALLOCATED;
        object->pointer = ptr;
-       object->size = size;
+       object->size = kfence_ksize((void *)ptr) ?: size;
        object->excess_ref = 0;
        object->min_count = min_count;
        object->count = 0;                      /* white color initially */
index fe23008..6f067b6 100644 (file)
@@ -373,21 +373,13 @@ static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
        struct list_lru_memcg *memcg_lrus;
        /*
         * This is called when shrinker has already been unregistered,
-        * and nobody can use it. So, there is no need to use kvfree_rcu_local().
+        * and nobody can use it. So, there is no need to use kvfree_rcu().
         */
        memcg_lrus = rcu_dereference_protected(nlru->memcg_lrus, true);
        __memcg_destroy_list_lru_node(memcg_lrus, 0, memcg_nr_cache_ids);
        kvfree(memcg_lrus);
 }
 
-static void kvfree_rcu_local(struct rcu_head *head)
-{
-       struct list_lru_memcg *mlru;
-
-       mlru = container_of(head, struct list_lru_memcg, rcu);
-       kvfree(mlru);
-}
-
 static int memcg_update_list_lru_node(struct list_lru_node *nlru,
                                      int old_size, int new_size)
 {
@@ -419,7 +411,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
        rcu_assign_pointer(nlru->memcg_lrus, new);
        spin_unlock_irq(&nlru->lock);
 
-       call_rcu(&old->rcu, kvfree_rcu_local);
+       kvfree_rcu(old, rcu);
        return 0;
 }
 
index 0938fd3..01fef79 100644 (file)
@@ -539,8 +539,9 @@ static inline bool can_do_pageout(struct vm_area_struct *vma)
         * otherwise we'd be including shared non-exclusive mappings, which
         * opens a side channel.
         */
-       return inode_owner_or_capable(file_inode(vma->vm_file)) ||
-               inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
+       return inode_owner_or_capable(&init_user_ns,
+                                     file_inode(vma->vm_file)) ||
+              file_permission(vma->vm_file, MAY_WRITE) == 0;
 }
 
 static long madvise_pageout(struct vm_area_struct *vma,
@@ -1197,12 +1198,22 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
                goto release_task;
        }
 
-       mm = mm_access(task, PTRACE_MODE_ATTACH_FSCREDS);
+       /* Require PTRACE_MODE_READ to avoid leaking ASLR metadata. */
+       mm = mm_access(task, PTRACE_MODE_READ_FSCREDS);
        if (IS_ERR_OR_NULL(mm)) {
                ret = IS_ERR(mm) ? PTR_ERR(mm) : -ESRCH;
                goto release_task;
        }
 
+       /*
+        * Require CAP_SYS_NICE for influencing process performance. Note that
+        * only non-destructive hints are currently supported.
+        */
+       if (!capable(CAP_SYS_NICE)) {
+               ret = -EPERM;
+               goto release_mm;
+       }
+
        total_len = iov_iter_count(&iter);
 
        while (iov_iter_count(&iter)) {
@@ -1217,6 +1228,7 @@ SYSCALL_DEFINE5(process_madvise, int, pidfd, const struct iovec __user *, vec,
        if (ret == 0)
                ret = total_len - iov_iter_count(&iter);
 
+release_mm:
        mmput(mm);
 release_task:
        put_task_struct(task);
index 913c2b9..e064ac0 100644 (file)
@@ -255,6 +255,11 @@ struct cgroup_subsys_state *vmpressure_to_css(struct vmpressure *vmpr)
 #ifdef CONFIG_MEMCG_KMEM
 extern spinlock_t css_set_lock;
 
+static int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
+                              unsigned int nr_pages);
+static void __memcg_kmem_uncharge(struct mem_cgroup *memcg,
+                                 unsigned int nr_pages);
+
 static void obj_cgroup_release(struct percpu_ref *ref)
 {
        struct obj_cgroup *objcg = container_of(ref, struct obj_cgroup, refcnt);
@@ -447,8 +452,7 @@ static void memcg_free_shrinker_maps(struct mem_cgroup *memcg)
        for_each_node(nid) {
                pn = mem_cgroup_nodeinfo(memcg, nid);
                map = rcu_dereference_protected(pn->shrinker_map, true);
-               if (map)
-                       kvfree(map);
+               kvfree(map);
                rcu_assign_pointer(pn->shrinker_map, NULL);
        }
 }
@@ -1043,29 +1047,6 @@ struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
 }
 EXPORT_SYMBOL(get_mem_cgroup_from_mm);
 
-/**
- * get_mem_cgroup_from_page: Obtain a reference on given page's memcg.
- * @page: page from which memcg should be extracted.
- *
- * Obtain a reference on page->memcg and returns it if successful. Otherwise
- * root_mem_cgroup is returned.
- */
-struct mem_cgroup *get_mem_cgroup_from_page(struct page *page)
-{
-       struct mem_cgroup *memcg = page_memcg(page);
-
-       if (mem_cgroup_disabled())
-               return NULL;
-
-       rcu_read_lock();
-       /* Page should not get uncharged and freed memcg under us. */
-       if (!memcg || WARN_ON_ONCE(!css_tryget(&memcg->css)))
-               memcg = root_mem_cgroup;
-       rcu_read_unlock();
-       return memcg;
-}
-EXPORT_SYMBOL(get_mem_cgroup_from_page);
-
 static __always_inline struct mem_cgroup *active_memcg(void)
 {
        if (in_interrupt())
@@ -1080,13 +1061,9 @@ static __always_inline struct mem_cgroup *get_active_memcg(void)
 
        rcu_read_lock();
        memcg = active_memcg();
-       if (memcg) {
-               /* current->active_memcg must hold a ref. */
-               if (WARN_ON_ONCE(!css_tryget(&memcg->css)))
-                       memcg = root_mem_cgroup;
-               else
-                       memcg = current->active_memcg;
-       }
+       /* remote memcg must hold a ref. */
+       if (memcg && WARN_ON_ONCE(!css_tryget(&memcg->css)))
+               memcg = root_mem_cgroup;
        rcu_read_unlock();
 
        return memcg;
@@ -1346,20 +1323,19 @@ void lruvec_memcg_debug(struct lruvec *lruvec, struct page *page)
  * lock_page_lruvec - lock and return lruvec for a given page.
  * @page: the page
  *
- * This series functions should be used in either conditions:
- * PageLRU is cleared or unset
- * or page->_refcount is zero
- * or page is locked.
+ * These functions are safe to use under any of the following conditions:
+ * - page locked
+ * - PageLRU cleared
+ * - lock_page_memcg()
+ * - page->_refcount is zero
  */
 struct lruvec *lock_page_lruvec(struct page *page)
 {
        struct lruvec *lruvec;
        struct pglist_data *pgdat = page_pgdat(page);
 
-       rcu_read_lock();
        lruvec = mem_cgroup_page_lruvec(page, pgdat);
        spin_lock(&lruvec->lru_lock);
-       rcu_read_unlock();
 
        lruvec_memcg_debug(lruvec, page);
 
@@ -1371,10 +1347,8 @@ struct lruvec *lock_page_lruvec_irq(struct page *page)
        struct lruvec *lruvec;
        struct pglist_data *pgdat = page_pgdat(page);
 
-       rcu_read_lock();
        lruvec = mem_cgroup_page_lruvec(page, pgdat);
        spin_lock_irq(&lruvec->lru_lock);
-       rcu_read_unlock();
 
        lruvec_memcg_debug(lruvec, page);
 
@@ -1386,10 +1360,8 @@ struct lruvec *lock_page_lruvec_irqsave(struct page *page, unsigned long *flags)
        struct lruvec *lruvec;
        struct pglist_data *pgdat = page_pgdat(page);
 
-       rcu_read_lock();
        lruvec = mem_cgroup_page_lruvec(page, pgdat);
        spin_lock_irqsave(&lruvec->lru_lock, *flags);
-       rcu_read_unlock();
 
        lruvec_memcg_debug(lruvec, page);
 
@@ -1512,72 +1484,73 @@ static bool mem_cgroup_wait_acct_move(struct mem_cgroup *memcg)
 
 struct memory_stat {
        const char *name;
-       unsigned int ratio;
        unsigned int idx;
 };
 
-static struct memory_stat memory_stats[] = {
-       { "anon", PAGE_SIZE, NR_ANON_MAPPED },
-       { "file", PAGE_SIZE, NR_FILE_PAGES },
-       { "kernel_stack", 1024, NR_KERNEL_STACK_KB },
-       { "pagetables", PAGE_SIZE, NR_PAGETABLE },
-       { "percpu", 1, MEMCG_PERCPU_B },
-       { "sock", PAGE_SIZE, MEMCG_SOCK },
-       { "shmem", PAGE_SIZE, NR_SHMEM },
-       { "file_mapped", PAGE_SIZE, NR_FILE_MAPPED },
-       { "file_dirty", PAGE_SIZE, NR_FILE_DIRTY },
-       { "file_writeback", PAGE_SIZE, NR_WRITEBACK },
+static const struct memory_stat memory_stats[] = {
+       { "anon",                       NR_ANON_MAPPED                  },
+       { "file",                       NR_FILE_PAGES                   },
+       { "kernel_stack",               NR_KERNEL_STACK_KB              },
+       { "pagetables",                 NR_PAGETABLE                    },
+       { "percpu",                     MEMCG_PERCPU_B                  },
+       { "sock",                       MEMCG_SOCK                      },
+       { "shmem",                      NR_SHMEM                        },
+       { "file_mapped",                NR_FILE_MAPPED                  },
+       { "file_dirty",                 NR_FILE_DIRTY                   },
+       { "file_writeback",             NR_WRITEBACK                    },
+#ifdef CONFIG_SWAP
+       { "swapcached",                 NR_SWAPCACHE                    },
+#endif
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-       /*
-        * The ratio will be initialized in memory_stats_init(). Because
-        * on some architectures, the macro of HPAGE_PMD_SIZE is not
-        * constant(e.g. powerpc).
-        */
-       { "anon_thp", 0, NR_ANON_THPS },
-       { "file_thp", 0, NR_FILE_THPS },
-       { "shmem_thp", 0, NR_SHMEM_THPS },
+       { "anon_thp",                   NR_ANON_THPS                    },
+       { "file_thp",                   NR_FILE_THPS                    },
+       { "shmem_thp",                  NR_SHMEM_THPS                   },
 #endif
-       { "inactive_anon", PAGE_SIZE, NR_INACTIVE_ANON },
-       { "active_anon", PAGE_SIZE, NR_ACTIVE_ANON },
-       { "inactive_file", PAGE_SIZE, NR_INACTIVE_FILE },
-       { "active_file", PAGE_SIZE, NR_ACTIVE_FILE },
-       { "unevictable", PAGE_SIZE, NR_UNEVICTABLE },
-
-       /*
-        * Note: The slab_reclaimable and slab_unreclaimable must be
-        * together and slab_reclaimable must be in front.
-        */
-       { "slab_reclaimable", 1, NR_SLAB_RECLAIMABLE_B },
-       { "slab_unreclaimable", 1, NR_SLAB_UNRECLAIMABLE_B },
+       { "inactive_anon",              NR_INACTIVE_ANON                },
+       { "active_anon",                NR_ACTIVE_ANON                  },
+       { "inactive_file",              NR_INACTIVE_FILE                },
+       { "active_file",                NR_ACTIVE_FILE                  },
+       { "unevictable",                NR_UNEVICTABLE                  },
+       { "slab_reclaimable",           NR_SLAB_RECLAIMABLE_B           },
+       { "slab_unreclaimable",         NR_SLAB_UNRECLAIMABLE_B         },
 
        /* The memory events */
-       { "workingset_refault_anon", 1, WORKINGSET_REFAULT_ANON },
-       { "workingset_refault_file", 1, WORKINGSET_REFAULT_FILE },
-       { "workingset_activate_anon", 1, WORKINGSET_ACTIVATE_ANON },
-       { "workingset_activate_file", 1, WORKINGSET_ACTIVATE_FILE },
-       { "workingset_restore_anon", 1, WORKINGSET_RESTORE_ANON },
-       { "workingset_restore_file", 1, WORKINGSET_RESTORE_FILE },
-       { "workingset_nodereclaim", 1, WORKINGSET_NODERECLAIM },
+       { "workingset_refault_anon",    WORKINGSET_REFAULT_ANON         },
+       { "workingset_refault_file",    WORKINGSET_REFAULT_FILE         },
+       { "workingset_activate_anon",   WORKINGSET_ACTIVATE_ANON        },
+       { "workingset_activate_file",   WORKINGSET_ACTIVATE_FILE        },
+       { "workingset_restore_anon",    WORKINGSET_RESTORE_ANON         },
+       { "workingset_restore_file",    WORKINGSET_RESTORE_FILE         },
+       { "workingset_nodereclaim",     WORKINGSET_NODERECLAIM          },
 };
 
-static int __init memory_stats_init(void)
-{
-       int i;
-
-       for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               if (memory_stats[i].idx == NR_ANON_THPS ||
-                   memory_stats[i].idx == NR_FILE_THPS ||
-                   memory_stats[i].idx == NR_SHMEM_THPS)
-                       memory_stats[i].ratio = HPAGE_PMD_SIZE;
-#endif
-               VM_BUG_ON(!memory_stats[i].ratio);
-               VM_BUG_ON(memory_stats[i].idx >= MEMCG_NR_STAT);
+/* Translate stat items to the correct unit for memory.stat output */
+static int memcg_page_state_unit(int item)
+{
+       switch (item) {
+       case MEMCG_PERCPU_B:
+       case NR_SLAB_RECLAIMABLE_B:
+       case NR_SLAB_UNRECLAIMABLE_B:
+       case WORKINGSET_REFAULT_ANON:
+       case WORKINGSET_REFAULT_FILE:
+       case WORKINGSET_ACTIVATE_ANON:
+       case WORKINGSET_ACTIVATE_FILE:
+       case WORKINGSET_RESTORE_ANON:
+       case WORKINGSET_RESTORE_FILE:
+       case WORKINGSET_NODERECLAIM:
+               return 1;
+       case NR_KERNEL_STACK_KB:
+               return SZ_1K;
+       default:
+               return PAGE_SIZE;
        }
+}
 
-       return 0;
+static inline unsigned long memcg_page_state_output(struct mem_cgroup *memcg,
+                                                   int item)
+{
+       return memcg_page_state(memcg, item) * memcg_page_state_unit(item);
 }
-pure_initcall(memory_stats_init);
 
 static char *memory_stat_format(struct mem_cgroup *memcg)
 {
@@ -1602,13 +1575,12 @@ static char *memory_stat_format(struct mem_cgroup *memcg)
        for (i = 0; i < ARRAY_SIZE(memory_stats); i++) {
                u64 size;
 
-               size = memcg_page_state(memcg, memory_stats[i].idx);
-               size *= memory_stats[i].ratio;
+               size = memcg_page_state_output(memcg, memory_stats[i].idx);
                seq_buf_printf(&s, "%s %llu\n", memory_stats[i].name, size);
 
                if (unlikely(memory_stats[i].idx == NR_SLAB_UNRECLAIMABLE_B)) {
-                       size = memcg_page_state(memcg, NR_SLAB_RECLAIMABLE_B) +
-                              memcg_page_state(memcg, NR_SLAB_UNRECLAIMABLE_B);
+                       size += memcg_page_state_output(memcg,
+                                                       NR_SLAB_RECLAIMABLE_B);
                        seq_buf_printf(&s, "slab %llu\n", size);
                }
        }
@@ -2935,9 +2907,10 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg)
 
 #ifdef CONFIG_MEMCG_KMEM
 int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
-                                gfp_t gfp)
+                                gfp_t gfp, bool new_page)
 {
        unsigned int objects = objs_per_slab_page(s, page);
+       unsigned long memcg_data;
        void *vec;
 
        vec = kcalloc_node(objects, sizeof(struct obj_cgroup *), gfp,
@@ -2945,11 +2918,25 @@ int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
        if (!vec)
                return -ENOMEM;
 
-       if (!set_page_objcgs(page, vec))
+       memcg_data = (unsigned long) vec | MEMCG_DATA_OBJCGS;
+       if (new_page) {
+               /*
+                * If the slab page is brand new and nobody can yet access
+                * it's memcg_data, no synchronization is required and
+                * memcg_data can be simply assigned.
+                */
+               page->memcg_data = memcg_data;
+       } else if (cmpxchg(&page->memcg_data, 0, memcg_data)) {
+               /*
+                * If the slab page is already in use, somebody can allocate
+                * and assign obj_cgroups in parallel. In this case the existing
+                * objcg vector should be reused.
+                */
                kfree(vec);
-       else
-               kmemleak_not_leak(vec);
+               return 0;
+       }
 
+       kmemleak_not_leak(vec);
        return 0;
 }
 
@@ -3077,8 +3064,8 @@ static void memcg_free_cache_id(int id)
  *
  * Returns 0 on success, an error code on failure.
  */
-int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
-                       unsigned int nr_pages)
+static int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
+                              unsigned int nr_pages)
 {
        struct page_counter *counter;
        int ret;
@@ -3110,7 +3097,7 @@ int __memcg_kmem_charge(struct mem_cgroup *memcg, gfp_t gfp,
  * @memcg: memcg to uncharge
  * @nr_pages: number of pages to uncharge
  */
-void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages)
+static void __memcg_kmem_uncharge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
        if (!cgroup_subsys_on_dfl(memory_cgrp_subsys))
                page_counter_uncharge(&memcg->kmem, nr_pages);
@@ -3300,24 +3287,21 @@ void obj_cgroup_uncharge(struct obj_cgroup *objcg, size_t size)
 
 #endif /* CONFIG_MEMCG_KMEM */
 
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
 /*
- * Because page_memcg(head) is not set on compound tails, set it now.
+ * Because page_memcg(head) is not set on tails, set it now.
  */
-void mem_cgroup_split_huge_fixup(struct page *head)
+void split_page_memcg(struct page *head, unsigned int nr)
 {
        struct mem_cgroup *memcg = page_memcg(head);
        int i;
 
-       if (mem_cgroup_disabled())
+       if (mem_cgroup_disabled() || !memcg)
                return;
 
-       for (i = 1; i < HPAGE_PMD_NR; i++) {
-               css_get(&memcg->css);
-               head[i].memcg_data = (unsigned long)memcg;
-       }
+       for (i = 1; i < nr; i++)
+               head[i].memcg_data = head->memcg_data;
+       css_get_many(&memcg->css, nr - 1);
 }
-#endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
 #ifdef CONFIG_MEMCG_SWAP
 /**
@@ -4072,10 +4056,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
                if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
                        continue;
                nr = memcg_page_state_local(memcg, memcg1_stats[i]);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               if (memcg1_stats[i] == NR_ANON_THPS)
-                       nr *= HPAGE_PMD_NR;
-#endif
                seq_printf(m, "%s %lu\n", memcg1_stat_names[i], nr * PAGE_SIZE);
        }
 
@@ -4106,10 +4086,6 @@ static int memcg_stat_show(struct seq_file *m, void *v)
                if (memcg1_stats[i] == MEMCG_SWAP && !do_memsw_account())
                        continue;
                nr = memcg_page_state(memcg, memcg1_stats[i]);
-#ifdef CONFIG_TRANSPARENT_HUGEPAGE
-               if (memcg1_stats[i] == NR_ANON_THPS)
-                       nr *= HPAGE_PMD_NR;
-#endif
                seq_printf(m, "total_%s %llu\n", memcg1_stat_names[i],
                                                (u64)nr * PAGE_SIZE);
        }
@@ -4897,7 +4873,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 
        /* the process need read permission on control file */
        /* AV: shouldn't we check that it's been opened for read instead? */
-       ret = inode_permission(file_inode(cfile.file), MAY_READ);
+       ret = file_permission(cfile.file, MAY_READ);
        if (ret < 0)
                goto out_put_cfile;
 
@@ -5193,7 +5169,7 @@ static int alloc_mem_cgroup_per_node_info(struct mem_cgroup *memcg, int node)
                return 1;
        }
 
-       pn->lruvec_stat_cpu = alloc_percpu_gfp(struct lruvec_stat,
+       pn->lruvec_stat_cpu = alloc_percpu_gfp(struct batched_lruvec_stat,
                                               GFP_KERNEL_ACCOUNT);
        if (!pn->lruvec_stat_cpu) {
                free_percpu(pn->lruvec_stat_local);
@@ -5642,7 +5618,6 @@ static int mem_cgroup_move_account(struct page *page,
                                __mod_lruvec_state(to_vec, NR_ANON_THPS,
                                                   nr_pages);
                        }
-
                }
        } else {
                __mod_lruvec_state(from_vec, NR_FILE_PAGES, -nr_pages);
@@ -6393,6 +6368,12 @@ static int memory_stat_show(struct seq_file *m, void *v)
 }
 
 #ifdef CONFIG_NUMA
+static inline unsigned long lruvec_page_state_output(struct lruvec *lruvec,
+                                                    int item)
+{
+       return lruvec_page_state(lruvec, item) * memcg_page_state_unit(item);
+}
+
 static int memory_numa_stat_show(struct seq_file *m, void *v)
 {
        int i;
@@ -6410,8 +6391,8 @@ static int memory_numa_stat_show(struct seq_file *m, void *v)
                        struct lruvec *lruvec;
 
                        lruvec = mem_cgroup_lruvec(memcg, NODE_DATA(nid));
-                       size = lruvec_page_state(lruvec, memory_stats[i].idx);
-                       size *= memory_stats[i].ratio;
+                       size = lruvec_page_state_output(lruvec,
+                                                       memory_stats[i].idx);
                        seq_printf(m, " N%d=%llu", nid, size);
                }
                seq_putc(m, '\n');
@@ -6760,7 +6741,19 @@ int mem_cgroup_charge(struct page *page, struct mm_struct *mm, gfp_t gfp_mask)
        memcg_check_events(memcg, page);
        local_irq_enable();
 
-       if (PageSwapCache(page)) {
+       /*
+        * Cgroup1's unified memory+swap counter has been charged with the
+        * new swapcache page, finish the transfer by uncharging the swap
+        * slot. The swap slot would also get uncharged when it dies, but
+        * it can stick around indefinitely and we'd count the page twice
+        * the entire time.
+        *
+        * Cgroup2 has separate resource counters for memory and swap,
+        * so this is a non-issue here. Memory and swap charge lifetimes
+        * correspond 1:1 to page and swap slot lifetimes: we charge the
+        * page to memory here, and uncharge swap when the slot is freed.
+        */
+       if (do_memsw_account() && PageSwapCache(page)) {
                swp_entry_t entry = { .val = page_private(page) };
                /*
                 * The swap entry might not get freed for a long time,
@@ -6851,31 +6844,6 @@ static void uncharge_page(struct page *page, struct uncharge_gather *ug)
        css_put(&ug->memcg->css);
 }
 
-static void uncharge_list(struct list_head *page_list)
-{
-       struct uncharge_gather ug;
-       struct list_head *next;
-
-       uncharge_gather_clear(&ug);
-
-       /*
-        * Note that the list can be a single page->lru; hence the
-        * do-while loop instead of a simple list_for_each_entry().
-        */
-       next = page_list->next;
-       do {
-               struct page *page;
-
-               page = list_entry(next, struct page, lru);
-               next = page->lru.next;
-
-               uncharge_page(page, &ug);
-       } while (next != page_list);
-
-       if (ug.memcg)
-               uncharge_batch(&ug);
-}
-
 /**
  * mem_cgroup_uncharge - uncharge a page
  * @page: page to uncharge
@@ -6907,11 +6875,17 @@ void mem_cgroup_uncharge(struct page *page)
  */
 void mem_cgroup_uncharge_list(struct list_head *page_list)
 {
+       struct uncharge_gather ug;
+       struct page *page;
+
        if (mem_cgroup_disabled())
                return;
 
-       if (!list_empty(page_list))
-               uncharge_list(page_list);
+       uncharge_gather_clear(&ug);
+       list_for_each_entry(page, page_list, lru)
+               uncharge_page(page, &ug);
+       if (ug.memcg)
+               uncharge_batch(&ug);
 }
 
 /**
@@ -7078,6 +7052,14 @@ static int __init mem_cgroup_init(void)
 {
        int cpu, node;
 
+       /*
+        * Currently s32 type (can refer to struct batched_lruvec_stat) is
+        * used for per-memcg-per-cpu caching of per-node statistics. In order
+        * to work fine, we should make sure that the overfill threshold can't
+        * exceed S32_MAX / PAGE_SIZE.
+        */
+       BUILD_BUG_ON(MEMCG_CHARGE_BATCH > S32_MAX / PAGE_SIZE);
+
        cpuhp_setup_state_nocalls(CPUHP_MM_MEMCQ_DEAD, "mm/memctrl:dead", NULL,
                                  memcg_hotplug_cpu_dead);
 
index e948163..24210c9 100644 (file)
@@ -243,9 +243,13 @@ static int kill_proc(struct to_kill *tk, unsigned long pfn, int flags)
                        pfn, t->comm, t->pid);
 
        if (flags & MF_ACTION_REQUIRED) {
-               WARN_ON_ONCE(t != current);
-               ret = force_sig_mceerr(BUS_MCEERR_AR,
+               if (t == current)
+                       ret = force_sig_mceerr(BUS_MCEERR_AR,
                                         (void __user *)tk->addr, addr_lsb);
+               else
+                       /* Signal other processes sharing the page if they have PF_MCE_EARLY set. */
+                       ret = send_sig_mceerr(BUS_MCEERR_AO, (void __user *)tk->addr,
+                               addr_lsb, t);
        } else {
                /*
                 * Don't use force here, it's convenient if the signal
@@ -440,26 +444,26 @@ static struct task_struct *find_early_kill_thread(struct task_struct *tsk)
  * Determine whether a given process is "early kill" process which expects
  * to be signaled when some page under the process is hwpoisoned.
  * Return task_struct of the dedicated thread (main thread unless explicitly
- * specified) if the process is "early kill," and otherwise returns NULL.
+ * specified) if the process is "early kill" and otherwise returns NULL.
  *
- * Note that the above is true for Action Optional case, but not for Action
- * Required case where SIGBUS should sent only to the current thread.
+ * Note that the above is true for Action Optional case. For Action Required
+ * case, it's only meaningful to the current thread which need to be signaled
+ * with SIGBUS, this error is Action Optional for other non current
+ * processes sharing the same error page,if the process is "early kill", the
+ * task_struct of the dedicated thread will also be returned.
  */
 static struct task_struct *task_early_kill(struct task_struct *tsk,
                                           int force_early)
 {
        if (!tsk->mm)
                return NULL;
-       if (force_early) {
-               /*
-                * Comparing ->mm here because current task might represent
-                * a subthread, while tsk always points to the main thread.
-                */
-               if (tsk->mm == current->mm)
-                       return current;
-               else
-                       return NULL;
-       }
+       /*
+        * Comparing ->mm here because current task might represent
+        * a subthread, while tsk always points to the main thread.
+        */
+       if (force_early && tsk->mm == current->mm)
+               return current;
+
        return find_early_kill_thread(tsk);
 }
 
@@ -1308,6 +1312,12 @@ static int memory_failure_dev_pagemap(unsigned long pfn, int flags,
                 */
                put_page(page);
 
+       /* device metadata space is not recoverable */
+       if (!pgmap_pfn_valid(pgmap, pfn)) {
+               rc = -ENXIO;
+               goto out;
+       }
+
        /*
         * Prevent the inode from being freed while we are interrogating
         * the address_space, typically this would be handled by
index 5da9640..550405f 100644 (file)
@@ -166,7 +166,7 @@ static int __init init_zero_pfn(void)
        zero_pfn = page_to_pfn(ZERO_PAGE(0));
        return 0;
 }
-core_initcall(init_zero_pfn);
+early_initcall(init_zero_pfn);
 
 void mm_trace_rss_stat(struct mm_struct *mm, int member, long count)
 {
@@ -809,12 +809,8 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
                  pte_t *dst_pte, pte_t *src_pte, unsigned long addr, int *rss,
                  struct page **prealloc, pte_t pte, struct page *page)
 {
-       struct mm_struct *src_mm = src_vma->vm_mm;
        struct page *new_page;
 
-       if (!is_cow_mapping(src_vma->vm_flags))
-               return 1;
-
        /*
         * What we want to do is to check whether this page may
         * have been pinned by the parent process.  If so,
@@ -828,9 +824,7 @@ copy_present_page(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma
         * the page count. That might give false positives for
         * for pinning, but it will work correctly.
         */
-       if (likely(!atomic_read(&src_mm->has_pinned)))
-               return 1;
-       if (likely(!page_maybe_dma_pinned(page)))
+       if (likely(!page_needs_cow_for_dma(src_vma, page)))
                return 1;
 
        new_page = *prealloc;
@@ -2177,11 +2171,11 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                        unsigned long addr, unsigned long end,
                        unsigned long pfn, pgprot_t prot)
 {
-       pte_t *pte;
+       pte_t *pte, *mapped_pte;
        spinlock_t *ptl;
        int err = 0;
 
-       pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
+       mapped_pte = pte = pte_alloc_map_lock(mm, pmd, addr, &ptl);
        if (!pte)
                return -ENOMEM;
        arch_enter_lazy_mmu_mode();
@@ -2195,7 +2189,7 @@ static int remap_pte_range(struct mm_struct *mm, pmd_t *pmd,
                pfn++;
        } while (pte++, addr += PAGE_SIZE, addr != end);
        arch_leave_lazy_mmu_mode();
-       pte_unmap_unlock(pte - 1, ptl);
+       pte_unmap_unlock(mapped_pte, ptl);
        return err;
 }
 
@@ -2394,18 +2388,18 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
                                     pte_fn_t fn, void *data, bool create,
                                     pgtbl_mod_mask *mask)
 {
-       pte_t *pte;
+       pte_t *pte, *mapped_pte;
        int err = 0;
        spinlock_t *ptl;
 
        if (create) {
-               pte = (mm == &init_mm) ?
+               mapped_pte = pte = (mm == &init_mm) ?
                        pte_alloc_kernel_track(pmd, addr, mask) :
                        pte_alloc_map_lock(mm, pmd, addr, &ptl);
                if (!pte)
                        return -ENOMEM;
        } else {
-               pte = (mm == &init_mm) ?
+               mapped_pte = pte = (mm == &init_mm) ?
                        pte_offset_kernel(pmd, addr) :
                        pte_offset_map_lock(mm, pmd, addr, &ptl);
        }
@@ -2428,7 +2422,7 @@ static int apply_to_pte_range(struct mm_struct *mm, pmd_t *pmd,
        arch_leave_lazy_mmu_mode();
 
        if (mm != &init_mm)
-               pte_unmap_unlock(pte-1, ptl);
+               pte_unmap_unlock(mapped_pte, ptl);
        return err;
 }
 
@@ -2902,7 +2896,6 @@ static vm_fault_t wp_page_copy(struct vm_fault *vmf)
                }
                flush_cache_page(vma, vmf->address, pte_pfn(vmf->orig_pte));
                entry = mk_pte(new_page, vma->vm_page_prot);
-               entry = pte_sw_mkyoung(entry);
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
 
                /*
@@ -3104,6 +3097,14 @@ static vm_fault_t do_wp_page(struct vm_fault *vmf)
                return handle_userfault(vmf, VM_UFFD_WP);
        }
 
+       /*
+        * Userfaultfd write-protect can defer flushes. Ensure the TLB
+        * is flushed in this case before copying.
+        */
+       if (unlikely(userfaultfd_wp(vmf->vma) &&
+                    mm_tlb_flush_pending(vmf->vma->vm_mm)))
+               flush_tlb_page(vmf->vma, vmf->address);
+
        vmf->page = vm_normal_page(vma, vmf->address, vmf->orig_pte);
        if (!vmf->page) {
                /*
@@ -3560,7 +3561,6 @@ static vm_fault_t do_anonymous_page(struct vm_fault *vmf)
        __SetPageUptodate(page);
 
        entry = mk_pte(page, vma->vm_page_prot);
-       entry = pte_sw_mkyoung(entry);
        if (vma->vm_flags & VM_WRITE)
                entry = pte_mkwrite(pte_mkdirty(entry));
 
@@ -3745,8 +3745,6 @@ void do_set_pte(struct vm_fault *vmf, struct page *page, unsigned long addr)
 
        if (prefault && arch_wants_old_prefaulted_pte())
                entry = pte_mkold(entry);
-       else
-               entry = pte_sw_mkyoung(entry);
 
        if (write)
                entry = maybe_mkwrite(pte_mkdirty(entry), vma);
@@ -5177,17 +5175,19 @@ long copy_huge_page_from_user(struct page *dst_page,
        void *page_kaddr;
        unsigned long i, rc = 0;
        unsigned long ret_val = pages_per_huge_page * PAGE_SIZE;
+       struct page *subpage = dst_page;
 
-       for (i = 0; i < pages_per_huge_page; i++) {
+       for (i = 0; i < pages_per_huge_page;
+            i++, subpage = mem_map_next(subpage, dst_page, i)) {
                if (allow_pagefault)
-                       page_kaddr = kmap(dst_page + i);
+                       page_kaddr = kmap(subpage);
                else
-                       page_kaddr = kmap_atomic(dst_page + i);
+                       page_kaddr = kmap_atomic(subpage);
                rc = copy_from_user(page_kaddr,
                                (const void __user *)(src + i * PAGE_SIZE),
                                PAGE_SIZE);
                if (allow_pagefault)
-                       kunmap(dst_page + i);
+                       kunmap(subpage);
                else
                        kunmap_atomic(page_kaddr);
 
index f9d57b9..0cdbbfb 100644 (file)
@@ -67,17 +67,17 @@ void put_online_mems(void)
 bool movable_node_enabled = false;
 
 #ifndef CONFIG_MEMORY_HOTPLUG_DEFAULT_ONLINE
-int memhp_default_online_type = MMOP_OFFLINE;
+int mhp_default_online_type = MMOP_OFFLINE;
 #else
-int memhp_default_online_type = MMOP_ONLINE;
+int mhp_default_online_type = MMOP_ONLINE;
 #endif
 
 static int __init setup_memhp_default_state(char *str)
 {
-       const int online_type = memhp_online_type_from_str(str);
+       const int online_type = mhp_online_type_from_str(str);
 
        if (online_type >= 0)
-               memhp_default_online_type = online_type;
+               mhp_default_online_type = online_type;
 
        return 1;
 }
@@ -107,6 +107,9 @@ static struct resource *register_memory_resource(u64 start, u64 size,
        if (strcmp(resource_name, "System RAM"))
                flags |= IORESOURCE_SYSRAM_DRIVER_MANAGED;
 
+       if (!mhp_range_allowed(start, size, true))
+               return ERR_PTR(-E2BIG);
+
        /*
         * Make sure value parsed from 'mem=' only restricts memory adding
         * while booting, so that memory hotplug won't be impacted. Please
@@ -284,21 +287,53 @@ static int check_pfn_span(unsigned long pfn, unsigned long nr_pages,
        return 0;
 }
 
-static int check_hotplug_memory_addressable(unsigned long pfn,
-                                           unsigned long nr_pages)
+/*
+ * Return page for the valid pfn only if the page is online. All pfn
+ * walkers which rely on the fully initialized page->flags and others
+ * should use this rather than pfn_valid && pfn_to_page
+ */
+struct page *pfn_to_online_page(unsigned long pfn)
 {
-       const u64 max_addr = PFN_PHYS(pfn + nr_pages) - 1;
+       unsigned long nr = pfn_to_section_nr(pfn);
+       struct dev_pagemap *pgmap;
+       struct mem_section *ms;
 
-       if (max_addr >> MAX_PHYSMEM_BITS) {
-               const u64 max_allowed = (1ull << (MAX_PHYSMEM_BITS + 1)) - 1;
-               WARN(1,
-                    "Hotplugged memory exceeds maximum addressable address, range=%#llx-%#llx, maximum=%#llx\n",
-                    (u64)PFN_PHYS(pfn), max_addr, max_allowed);
-               return -E2BIG;
-       }
+       if (nr >= NR_MEM_SECTIONS)
+               return NULL;
 
-       return 0;
+       ms = __nr_to_section(nr);
+       if (!online_section(ms))
+               return NULL;
+
+       /*
+        * Save some code text when online_section() +
+        * pfn_section_valid() are sufficient.
+        */
+       if (IS_ENABLED(CONFIG_HAVE_ARCH_PFN_VALID) && !pfn_valid(pfn))
+               return NULL;
+
+       if (!pfn_section_valid(ms, pfn))
+               return NULL;
+
+       if (!online_device_section(ms))
+               return pfn_to_page(pfn);
+
+       /*
+        * Slowpath: when ZONE_DEVICE collides with
+        * ZONE_{NORMAL,MOVABLE} within the same section some pfns in
+        * the section may be 'offline' but 'valid'. Only
+        * get_dev_pagemap() can determine sub-section online status.
+        */
+       pgmap = get_dev_pagemap(pfn, NULL);
+       put_dev_pagemap(pgmap);
+
+       /* The presence of a pgmap indicates ZONE_DEVICE offline pfn */
+       if (pgmap)
+               return NULL;
+
+       return pfn_to_page(pfn);
 }
+EXPORT_SYMBOL_GPL(pfn_to_online_page);
 
 /*
  * Reasonably generic function for adding memory.  It is
@@ -317,9 +352,7 @@ int __ref __add_pages(int nid, unsigned long pfn, unsigned long nr_pages,
        if (WARN_ON_ONCE(!params->pgprot.pgprot))
                return -EINVAL;
 
-       err = check_hotplug_memory_addressable(pfn, nr_pages);
-       if (err)
-               return err;
+       VM_BUG_ON(!mhp_range_allowed(PFN_PHYS(pfn), nr_pages * PAGE_SIZE, false));
 
        if (altmap) {
                /*
@@ -445,20 +478,19 @@ static void update_pgdat_span(struct pglist_data *pgdat)
 
        for (zone = pgdat->node_zones;
             zone < pgdat->node_zones + MAX_NR_ZONES; zone++) {
-               unsigned long zone_end_pfn = zone->zone_start_pfn +
-                                            zone->spanned_pages;
+               unsigned long end_pfn = zone_end_pfn(zone);
 
                /* No need to lock the zones, they can't change. */
                if (!zone->spanned_pages)
                        continue;
                if (!node_end_pfn) {
                        node_start_pfn = zone->zone_start_pfn;
-                       node_end_pfn = zone_end_pfn;
+                       node_end_pfn = end_pfn;
                        continue;
                }
 
-               if (zone_end_pfn > node_end_pfn)
-                       node_end_pfn = zone_end_pfn;
+               if (end_pfn > node_end_pfn)
+                       node_end_pfn = end_pfn;
                if (zone->zone_start_pfn < node_start_pfn)
                        node_start_pfn = zone->zone_start_pfn;
        }
@@ -678,6 +710,14 @@ static void __meminit resize_pgdat_range(struct pglist_data *pgdat, unsigned lon
        pgdat->node_spanned_pages = max(start_pfn + nr_pages, old_end_pfn) - pgdat->node_start_pfn;
 
 }
+
+static void section_taint_zone_device(unsigned long pfn)
+{
+       struct mem_section *ms = __pfn_to_section(pfn);
+
+       ms->section_mem_map |= SECTION_TAINT_ZONE_DEVICE;
+}
+
 /*
  * Associate the pfn range with the given zone, initializing the memmaps
  * and resizing the pgdat/zone data to span the added pages. After this
@@ -708,12 +748,25 @@ void __ref move_pfn_range_to_zone(struct zone *zone, unsigned long start_pfn,
        pgdat_resize_unlock(pgdat, &flags);
 
        /*
+        * Subsection population requires care in pfn_to_online_page().
+        * Set the taint to enable the slow path detection of
+        * ZONE_DEVICE pages in an otherwise  ZONE_{NORMAL,MOVABLE}
+        * section.
+        */
+       if (zone_is_zone_device(zone)) {
+               if (!IS_ALIGNED(start_pfn, PAGES_PER_SECTION))
+                       section_taint_zone_device(start_pfn);
+               if (!IS_ALIGNED(start_pfn + nr_pages, PAGES_PER_SECTION))
+                       section_taint_zone_device(start_pfn + nr_pages);
+       }
+
+       /*
         * TODO now we have a visible range of pages which are not associated
         * with their zone properly. Not nice but set_pfnblock_flags_mask
         * expects the zone spans the pfn range. All the pages in the range
         * are reserved so nobody should be touching them so we should be safe
         */
-       memmap_init_zone(nr_pages, nid, zone_idx(zone), start_pfn, 0,
+       memmap_init_range(nr_pages, nid, zone_idx(zone), start_pfn, 0,
                         MEMINIT_HOTPLUG, altmap, migratetype);
 
        set_zone_contiguous(zone);
@@ -1007,7 +1060,7 @@ static int check_hotplug_memory_range(u64 start, u64 size)
 
 static int online_memory_block(struct memory_block *mem, void *arg)
 {
-       mem->online_type = memhp_default_online_type;
+       mem->online_type = mhp_default_online_type;
        return device_online(&mem->dev);
 }
 
@@ -1019,7 +1072,7 @@ static int online_memory_block(struct memory_block *mem, void *arg)
  */
 int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
 {
-       struct mhp_params params = { .pgprot = PAGE_KERNEL };
+       struct mhp_params params = { .pgprot = pgprot_mhp(PAGE_KERNEL) };
        u64 start, size;
        bool new_node = false;
        int ret;
@@ -1084,11 +1137,11 @@ int __ref add_memory_resource(int nid, struct resource *res, mhp_t mhp_flags)
         * In case we're allowed to merge the resource, flag it and trigger
         * merging now that adding succeeded.
         */
-       if (mhp_flags & MEMHP_MERGE_RESOURCE)
+       if (mhp_flags & MHP_MERGE_RESOURCE)
                merge_system_ram_resource(res);
 
        /* online pages if requested */
-       if (memhp_default_online_type != MMOP_OFFLINE)
+       if (mhp_default_online_type != MMOP_OFFLINE)
                walk_memory_blocks(start, size, NULL, online_memory_block);
 
        return ret;
@@ -1180,6 +1233,61 @@ out_unlock:
 }
 EXPORT_SYMBOL_GPL(add_memory_driver_managed);
 
+/*
+ * Platforms should define arch_get_mappable_range() that provides
+ * maximum possible addressable physical memory range for which the
+ * linear mapping could be created. The platform returned address
+ * range must adhere to these following semantics.
+ *
+ * - range.start <= range.end
+ * - Range includes both end points [range.start..range.end]
+ *
+ * There is also a fallback definition provided here, allowing the
+ * entire possible physical address range in case any platform does
+ * not define arch_get_mappable_range().
+ */
+struct range __weak arch_get_mappable_range(void)
+{
+       struct range mhp_range = {
+               .start = 0UL,
+               .end = -1ULL,
+       };
+       return mhp_range;
+}
+
+struct range mhp_get_pluggable_range(bool need_mapping)
+{
+       const u64 max_phys = (1ULL << MAX_PHYSMEM_BITS) - 1;
+       struct range mhp_range;
+
+       if (need_mapping) {
+               mhp_range = arch_get_mappable_range();
+               if (mhp_range.start > max_phys) {
+                       mhp_range.start = 0;
+                       mhp_range.end = 0;
+               }
+               mhp_range.end = min_t(u64, mhp_range.end, max_phys);
+       } else {
+               mhp_range.start = 0;
+               mhp_range.end = max_phys;
+       }
+       return mhp_range;
+}
+EXPORT_SYMBOL_GPL(mhp_get_pluggable_range);
+
+bool mhp_range_allowed(u64 start, u64 size, bool need_mapping)
+{
+       struct range mhp_range = mhp_get_pluggable_range(need_mapping);
+       u64 end = start + size;
+
+       if (start < end && start >= mhp_range.start && (end - 1) <= mhp_range.end)
+               return true;
+
+       pr_warn("Hotplug memory [%#llx-%#llx] exceeds maximum addressable range [%#llx-%#llx]\n",
+               start, end, mhp_range.start, mhp_range.end);
+       return false;
+}
+
 #ifdef CONFIG_MEMORY_HOTREMOVE
 /*
  * Confirm all pages in a range [start, end) belong to the same zone (skipping
@@ -1260,7 +1368,14 @@ static int scan_movable_pages(unsigned long start, unsigned long end,
                if (!PageHuge(page))
                        continue;
                head = compound_head(page);
-               if (page_huge_active(head))
+               /*
+                * This test is racy as we hold no reference or lock.  The
+                * hugetlb page could have been free'ed and head is no longer
+                * a hugetlb page before the following check.  In such unlikely
+                * cases false positives and negatives are possible.  Calling
+                * code must deal with these scenarios.
+                */
+               if (HPageMigratable(head))
                        goto found;
                skip = compound_nr(head) - (page - head);
                pfn += skip - 1;
index 2c3a865..ab51132 100644 (file)
@@ -677,7 +677,7 @@ static int queue_pages_test_walk(unsigned long start, unsigned long end,
        unsigned long flags = qp->flags;
 
        /* range check first */
-       VM_BUG_ON_VMA((vma->vm_start > start) || (vma->vm_end < end), vma);
+       VM_BUG_ON_VMA(!range_in_vma(vma, start, end), vma);
 
        if (!qp->first) {
                qp->first = vma;
@@ -875,6 +875,16 @@ static long do_set_mempolicy(unsigned short mode, unsigned short flags,
                goto out;
        }
 
+       if (flags & MPOL_F_NUMA_BALANCING) {
+               if (new && new->mode == MPOL_BIND) {
+                       new->flags |= (MPOL_F_MOF | MPOL_F_MORON);
+               } else {
+                       ret = -EINVAL;
+                       mpol_put(new);
+                       goto out;
+               }
+       }
+
        ret = mpol_set_nodemask(new, nodes, scratch);
        if (ret) {
                mpol_put(new);
@@ -2486,6 +2496,12 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
                break;
 
        case MPOL_BIND:
+               /* Optimize placement among multiple nodes via NUMA balancing */
+               if (pol->flags & MPOL_F_MORON) {
+                       if (node_isset(thisnid, pol->v.nodes))
+                               break;
+                       goto out;
+               }
 
                /*
                 * allows binding to multiple nodes.
index 624ed51..79959fa 100644 (file)
@@ -104,7 +104,7 @@ static inline void poison_element(mempool_t *pool, void *element)
 static __always_inline void kasan_poison_element(mempool_t *pool, void *element)
 {
        if (pool->alloc == mempool_alloc_slab || pool->alloc == mempool_kmalloc)
-               kasan_slab_free_mempool(element, _RET_IP_);
+               kasan_slab_free_mempool(element);
        else if (pool->alloc == mempool_alloc_pages)
                kasan_free_pages(element, (unsigned long)pool->pool_data);
 }
index 16b2fb4..7aa7d6e 100644 (file)
@@ -80,6 +80,21 @@ static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id)
        return pfn + vmem_altmap_offset(pgmap_altmap(pgmap));
 }
 
+bool pgmap_pfn_valid(struct dev_pagemap *pgmap, unsigned long pfn)
+{
+       int i;
+
+       for (i = 0; i < pgmap->nr_range; i++) {
+               struct range *range = &pgmap->ranges[i];
+
+               if (pfn >= PHYS_PFN(range->start) &&
+                   pfn <= PHYS_PFN(range->end))
+                       return pfn >= pfn_first(pgmap, i);
+       }
+
+       return false;
+}
+
 static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
 {
        const struct range *range = &pgmap->ranges[range_id];
@@ -185,6 +200,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
                int range_id, int nid)
 {
+       const bool is_private = pgmap->type == MEMORY_DEVICE_PRIVATE;
        struct range *range = &pgmap->ranges[range_id];
        struct dev_pagemap *conflict_pgmap;
        int error, is_ram;
@@ -230,6 +246,11 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
        if (error)
                goto err_pfn_remap;
 
+       if (!mhp_range_allowed(range->start, range_len(range), !is_private)) {
+               error = -EINVAL;
+               goto err_pfn_remap;
+       }
+
        mem_hotplug_begin();
 
        /*
@@ -243,7 +264,7 @@ static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
         * the CPU, we do want the linear mapping and thus use
         * arch_add_memory().
         */
-       if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
+       if (is_private) {
                error = add_pages(nid, PHYS_PFN(range->start),
                                PHYS_PFN(range_len(range)), params);
        } else {
index 20ca887..62b81d5 100644 (file)
@@ -331,7 +331,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,
        if (!get_page_unless_zero(page))
                goto out;
        pte_unmap_unlock(ptep, ptl);
-       put_and_wait_on_page_locked(page);
+       put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
        return;
 out:
        pte_unmap_unlock(ptep, ptl);
@@ -365,7 +365,7 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd)
        if (!get_page_unless_zero(page))
                goto unlock;
        spin_unlock(ptl);
-       put_and_wait_on_page_locked(page);
+       put_and_wait_on_page_locked(page, TASK_UNINTERRUPTIBLE);
        return;
 unlock:
        spin_unlock(ptl);
@@ -500,6 +500,12 @@ int migrate_page_move_mapping(struct address_space *mapping,
                        __mod_lruvec_state(old_lruvec, NR_SHMEM, -nr);
                        __mod_lruvec_state(new_lruvec, NR_SHMEM, nr);
                }
+#ifdef CONFIG_SWAP
+               if (PageSwapCache(page)) {
+                       __mod_lruvec_state(old_lruvec, NR_SWAPCACHE, -nr);
+                       __mod_lruvec_state(new_lruvec, NR_SWAPCACHE, nr);
+               }
+#endif
                if (dirty && mapping_can_writeback(mapping)) {
                        __mod_lruvec_state(old_lruvec, NR_FILE_DIRTY, -nr);
                        __mod_zone_page_state(oldzone, NR_ZONE_WRITE_PENDING, -nr);
index 02db1a8..9122676 100644 (file)
@@ -166,8 +166,9 @@ static inline bool can_do_mincore(struct vm_area_struct *vma)
         * for writing; otherwise we'd be including shared non-exclusive
         * mappings, which opens a side channel.
         */
-       return inode_owner_or_capable(file_inode(vma->vm_file)) ||
-               inode_permission(file_inode(vma->vm_file), MAY_WRITE) == 0;
+       return inode_owner_or_capable(&init_user_ns,
+                                     file_inode(vma->vm_file)) ||
+              file_permission(vma->vm_file, MAY_WRITE) == 0;
 }
 
 static const struct mm_walk_ops mincore_walk_ops = {
index 55b3b36..f8f8cc3 100644 (file)
@@ -278,8 +278,7 @@ static void __munlock_pagevec(struct pagevec *pvec, struct zone *zone)
                         */
                        if (TestClearPageLRU(page)) {
                                lruvec = relock_page_lruvec_irq(page, lruvec);
-                               del_page_from_lru_list(page, lruvec,
-                                                       page_lru(page));
+                               del_page_from_lru_list(page, lruvec);
                                continue;
                        } else
                                __munlock_isolation_failed(page);
@@ -623,7 +622,7 @@ static unsigned long count_mm_mlocked_page_nr(struct mm_struct *mm,
 
        vma = find_vma(mm, start);
        if (vma == NULL)
-               vma = mm->mmap;
+               return 0;
 
        for (; vma ; vma = vma->vm_next) {
                if (start >= vma->vm_end)
index 90673fe..3f28759 100644 (file)
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -189,7 +189,6 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long
                struct list_head *uf);
 SYSCALL_DEFINE1(brk, unsigned long, brk)
 {
-       unsigned long retval;
        unsigned long newbrk, oldbrk, origbrk;
        struct mm_struct *mm = current->mm;
        struct vm_area_struct *next;
@@ -281,9 +280,8 @@ success:
        return brk;
 
 out:
-       retval = origbrk;
        mmap_write_unlock(mm);
-       return retval;
+       return origbrk;
 }
 
 static inline unsigned long vma_compute_gap(struct vm_area_struct *vma)
index 61ee40e..459d195 100644 (file)
@@ -501,10 +501,33 @@ static int mn_hlist_invalidate_range_start(
                                                "");
                                WARN_ON(mmu_notifier_range_blockable(range) ||
                                        _ret != -EAGAIN);
+                               /*
+                                * We call all the notifiers on any EAGAIN,
+                                * there is no way for a notifier to know if
+                                * its start method failed, thus a start that
+                                * does EAGAIN can't also do end.
+                                */
+                               WARN_ON(ops->invalidate_range_end);
                                ret = _ret;
                        }
                }
        }
+
+       if (ret) {
+               /*
+                * Must be non-blocking to get here.  If there are multiple
+                * notifiers and one or more failed start, any that succeeded
+                * start are expecting their end to be called.  Do so now.
+                */
+               hlist_for_each_entry_rcu(subscription, &subscriptions->list,
+                                        hlist, srcu_read_lock_held(&srcu)) {
+                       if (!subscription->ops->invalidate_range_end)
+                               continue;
+
+                       subscription->ops->invalidate_range_end(subscription,
+                                                               range);
+               }
+       }
        srcu_read_unlock(&srcu, id);
 
        return ret;
index ab70902..94188df 100644 (file)
@@ -617,10 +617,11 @@ static int do_mprotect_pkey(unsigned long start, size_t len,
                if (tmp > end)
                        tmp = end;
 
-               if (vma->vm_ops && vma->vm_ops->mprotect)
+               if (vma->vm_ops && vma->vm_ops->mprotect) {
                        error = vma->vm_ops->mprotect(vma, nstart, tmp, newflags);
-               if (error)
-                       goto out;
+                       if (error)
+                               goto out;
+               }
 
                error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
                if (error)
index 4719269..ec8f840 100644 (file)
@@ -593,6 +593,14 @@ static unsigned long move_vma(struct vm_area_struct *vma,
                /* We always clear VM_LOCKED[ONFAULT] on the old vma */
                vma->vm_flags &= VM_LOCKED_CLEAR_MASK;
 
+               /*
+                * anon_vma links of the old vma is no longer needed after its page
+                * table has been moved.
+                */
+               if (new_vma != vma && vma->vm_start == old_addr &&
+                       vma->vm_end == (old_addr + old_len))
+                       unlink_anon_vmas(vma);
+
                /* Because we won't unmap we don't need to touch locked_vm */
                return new_addr;
        }
index c9a33ff..9efaf43 100644 (file)
@@ -395,9 +395,8 @@ static int dump_task(struct task_struct *p, void *arg)
        task = find_lock_task_mm(p);
        if (!task) {
                /*
-                * This is a kthread or all of p's threads have already
-                * detached their mm's.  There's no need to report
-                * them; they can't be oom killed anyway.
+                * All of p's threads have already detached their mm's. There's
+                * no need to report them; they can't be oom killed anyway.
                 */
                return 0;
        }
index eb34d20..9e35b63 100644 (file)
@@ -2833,6 +2833,22 @@ void wait_on_page_writeback(struct page *page)
 }
 EXPORT_SYMBOL_GPL(wait_on_page_writeback);
 
+/*
+ * Wait for a page to complete writeback.  Returns -EINTR if we get a
+ * fatal signal while waiting.
+ */
+int wait_on_page_writeback_killable(struct page *page)
+{
+       while (PageWriteback(page)) {
+               trace_wait_on_page_writeback(page, page_mapping(page));
+               if (wait_on_page_bit_killable(page, PG_writeback))
+                       return -EINTR;
+       }
+
+       return 0;
+}
+EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable);
+
 /**
  * wait_for_stable_page() - wait for writeback to finish, if necessary.
  * @page:      The page to wait on.
index ef5070f..cfc7287 100644 (file)
@@ -1282,6 +1282,12 @@ static __always_inline bool free_pages_prepare(struct page *page,
        kernel_poison_pages(page, 1 << order);
 
        /*
+        * With hardware tag-based KASAN, memory tags must be set before the
+        * page becomes unavailable via debug_pagealloc or arch_free_page.
+        */
+       kasan_free_nondeferred_pages(page, order);
+
+       /*
         * arch_free_page() can make the page's contents inaccessible.  s390
         * does this.  So nothing which can access the page's contents should
         * happen after this.
@@ -1290,8 +1296,6 @@ static __always_inline bool free_pages_prepare(struct page *page,
 
        debug_pagealloc_unmap_pages(page, 1 << order);
 
-       kasan_free_nondeferred_pages(page, order);
-
        return true;
 }
 
@@ -2168,6 +2172,7 @@ void __init init_cma_reserved_pageblock(struct page *page)
        }
 
        adjust_managed_page_count(page, pageblock_nr_pages);
+       page_zone(page)->cma_pages += pageblock_nr_pages;
 }
 #endif
 
@@ -3309,6 +3314,7 @@ void split_page(struct page *page, unsigned int order)
        for (i = 1; i < (1 << order); i++)
                set_page_refcounted(page + i);
        split_page_owner(page, 1 << order);
+       split_page_memcg(page, 1 << order);
 }
 EXPORT_SYMBOL_GPL(split_page);
 
@@ -5584,10 +5590,9 @@ void show_free_areas(unsigned int filter, nodemask_t *nodemask)
                        K(node_page_state(pgdat, NR_WRITEBACK)),
                        K(node_page_state(pgdat, NR_SHMEM)),
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
-                       K(node_page_state(pgdat, NR_SHMEM_THPS) * HPAGE_PMD_NR),
-                       K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)
-                                       * HPAGE_PMD_NR),
-                       K(node_page_state(pgdat, NR_ANON_THPS) * HPAGE_PMD_NR),
+                       K(node_page_state(pgdat, NR_SHMEM_THPS)),
+                       K(node_page_state(pgdat, NR_SHMEM_PMDMAPPED)),
+                       K(node_page_state(pgdat, NR_ANON_THPS)),
 #endif
                        K(node_page_state(pgdat, NR_WRITEBACK_TEMP)),
                        node_page_state(pgdat, NR_KERNEL_STACK_KB),
@@ -6122,7 +6127,7 @@ overlap_memmap_init(unsigned long zone, unsigned long *pfn)
  * (usually MIGRATE_MOVABLE). Besides setting the migratetype, no related
  * zone stats (e.g., nr_isolate_pageblock) are touched.
  */
-void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
+void __meminit memmap_init_range(unsigned long size, int nid, unsigned long zone,
                unsigned long start_pfn, unsigned long zone_end_pfn,
                enum meminit_context context,
                struct vmem_altmap *altmap, int migratetype)
@@ -6259,24 +6264,97 @@ static void __meminit zone_init_free_lists(struct zone *zone)
        }
 }
 
-void __meminit __weak memmap_init(unsigned long size, int nid,
-                                 unsigned long zone,
-                                 unsigned long range_start_pfn)
+#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
+/*
+ * Only struct pages that correspond to ranges defined by memblock.memory
+ * are zeroed and initialized by going through __init_single_page() during
+ * memmap_init_zone().
+ *
+ * But, there could be struct pages that correspond to holes in
+ * memblock.memory. This can happen because of the following reasons:
+ * - physical memory bank size is not necessarily the exact multiple of the
+ *   arbitrary section size
+ * - early reserved memory may not be listed in memblock.memory
+ * - memory layouts defined with memmap= kernel parameter may not align
+ *   nicely with memmap sections
+ *
+ * Explicitly initialize those struct pages so that:
+ * - PG_Reserved is set
+ * - zone and node links point to zone and node that span the page if the
+ *   hole is in the middle of a zone
+ * - zone and node links point to adjacent zone/node if the hole falls on
+ *   the zone boundary; the pages in such holes will be prepended to the
+ *   zone/node above the hole except for the trailing pages in the last
+ *   section that will be appended to the zone/node below.
+ */
+static u64 __meminit init_unavailable_range(unsigned long spfn,
+                                           unsigned long epfn,
+                                           int zone, int node)
 {
+       unsigned long pfn;
+       u64 pgcnt = 0;
+
+       for (pfn = spfn; pfn < epfn; pfn++) {
+               if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
+                       pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
+                               + pageblock_nr_pages - 1;
+                       continue;
+               }
+               __init_single_page(pfn_to_page(pfn), pfn, zone, node);
+               __SetPageReserved(pfn_to_page(pfn));
+               pgcnt++;
+       }
+
+       return pgcnt;
+}
+#else
+static inline u64 init_unavailable_range(unsigned long spfn, unsigned long epfn,
+                                        int zone, int node)
+{
+       return 0;
+}
+#endif
+
+void __meminit __weak memmap_init_zone(struct zone *zone)
+{
+       unsigned long zone_start_pfn = zone->zone_start_pfn;
+       unsigned long zone_end_pfn = zone_start_pfn + zone->spanned_pages;
+       int i, nid = zone_to_nid(zone), zone_id = zone_idx(zone);
+       static unsigned long hole_pfn;
        unsigned long start_pfn, end_pfn;
-       unsigned long range_end_pfn = range_start_pfn + size;
-       int i;
+       u64 pgcnt = 0;
 
        for_each_mem_pfn_range(i, nid, &start_pfn, &end_pfn, NULL) {
-               start_pfn = clamp(start_pfn, range_start_pfn, range_end_pfn);
-               end_pfn = clamp(end_pfn, range_start_pfn, range_end_pfn);
+               start_pfn = clamp(start_pfn, zone_start_pfn, zone_end_pfn);
+               end_pfn = clamp(end_pfn, zone_start_pfn, zone_end_pfn);
 
-               if (end_pfn > start_pfn) {
-                       size = end_pfn - start_pfn;
-                       memmap_init_zone(size, nid, zone, start_pfn, range_end_pfn,
-                                        MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
-               }
+               if (end_pfn > start_pfn)
+                       memmap_init_range(end_pfn - start_pfn, nid,
+                                       zone_id, start_pfn, zone_end_pfn,
+                                       MEMINIT_EARLY, NULL, MIGRATE_MOVABLE);
+
+               if (hole_pfn < start_pfn)
+                       pgcnt += init_unavailable_range(hole_pfn, start_pfn,
+                                                       zone_id, nid);
+               hole_pfn = end_pfn;
        }
+
+#ifdef CONFIG_SPARSEMEM
+       /*
+        * Initialize the hole in the range [zone_end_pfn, section_end].
+        * If zone boundary falls in the middle of a section, this hole
+        * will be re-initialized during the call to this function for the
+        * higher zone.
+        */
+       end_pfn = round_up(zone_end_pfn, PAGES_PER_SECTION);
+       if (hole_pfn < end_pfn)
+               pgcnt += init_unavailable_range(hole_pfn, end_pfn,
+                                               zone_id, nid);
+#endif
+
+       if (pgcnt)
+               pr_info("  %s zone: %llu pages in unavailable ranges\n",
+                       zone->name, pgcnt);
 }
 
 static int zone_batchsize(struct zone *zone)
@@ -6768,25 +6846,22 @@ static unsigned long __init usemap_size(unsigned long zone_start_pfn, unsigned l
        return usemapsize / 8;
 }
 
-static void __ref setup_usemap(struct pglist_data *pgdat,
-                               struct zone *zone,
-                               unsigned long zone_start_pfn,
-                               unsigned long zonesize)
+static void __ref setup_usemap(struct zone *zone)
 {
-       unsigned long usemapsize = usemap_size(zone_start_pfn, zonesize);
+       unsigned long usemapsize = usemap_size(zone->zone_start_pfn,
+                                              zone->spanned_pages);
        zone->pageblock_flags = NULL;
        if (usemapsize) {
                zone->pageblock_flags =
                        memblock_alloc_node(usemapsize, SMP_CACHE_BYTES,
-                                           pgdat->node_id);
+                                           zone_to_nid(zone));
                if (!zone->pageblock_flags)
                        panic("Failed to allocate %ld bytes for zone %s pageblock flags on node %d\n",
-                             usemapsize, zone->name, pgdat->node_id);
+                             usemapsize, zone->name, zone_to_nid(zone));
        }
 }
 #else
-static inline void setup_usemap(struct pglist_data *pgdat, struct zone *zone,
-                               unsigned long zone_start_pfn, unsigned long zonesize) {}
+static inline void setup_usemap(struct zone *zone) {}
 #endif /* CONFIG_SPARSEMEM */
 
 #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE
@@ -6933,7 +7008,6 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
        for (j = 0; j < MAX_NR_ZONES; j++) {
                struct zone *zone = pgdat->node_zones + j;
                unsigned long size, freesize, memmap_pages;
-               unsigned long zone_start_pfn = zone->zone_start_pfn;
 
                size = zone->spanned_pages;
                freesize = zone->present_pages;
@@ -6981,9 +7055,9 @@ static void __init free_area_init_core(struct pglist_data *pgdat)
                        continue;
 
                set_pageblock_order();
-               setup_usemap(pgdat, zone, zone_start_pfn, size);
-               init_currently_empty_zone(zone, zone_start_pfn, size);
-               memmap_init(size, nid, j, zone_start_pfn);
+               setup_usemap(zone);
+               init_currently_empty_zone(zone, zone->zone_start_pfn, size);
+               memmap_init_zone(zone);
        }
 }
 
@@ -7077,88 +7151,6 @@ void __init free_area_init_memoryless_node(int nid)
        free_area_init_node(nid);
 }
 
-#if !defined(CONFIG_FLAT_NODE_MEM_MAP)
-/*
- * Initialize all valid struct pages in the range [spfn, epfn) and mark them
- * PageReserved(). Return the number of struct pages that were initialized.
- */
-static u64 __init init_unavailable_range(unsigned long spfn, unsigned long epfn)
-{
-       unsigned long pfn;
-       u64 pgcnt = 0;
-
-       for (pfn = spfn; pfn < epfn; pfn++) {
-               if (!pfn_valid(ALIGN_DOWN(pfn, pageblock_nr_pages))) {
-                       pfn = ALIGN_DOWN(pfn, pageblock_nr_pages)
-                               + pageblock_nr_pages - 1;
-                       continue;
-               }
-               /*
-                * Use a fake node/zone (0) for now. Some of these pages
-                * (in memblock.reserved but not in memblock.memory) will
-                * get re-initialized via reserve_bootmem_region() later.
-                */
-               __init_single_page(pfn_to_page(pfn), pfn, 0, 0);
-               __SetPageReserved(pfn_to_page(pfn));
-               pgcnt++;
-       }
-
-       return pgcnt;
-}
-
-/*
- * Only struct pages that are backed by physical memory are zeroed and
- * initialized by going through __init_single_page(). But, there are some
- * struct pages which are reserved in memblock allocator and their fields
- * may be accessed (for example page_to_pfn() on some configuration accesses
- * flags). We must explicitly initialize those struct pages.
- *
- * This function also addresses a similar issue where struct pages are left
- * uninitialized because the physical address range is not covered by
- * memblock.memory or memblock.reserved. That could happen when memblock
- * layout is manually configured via memmap=, or when the highest physical
- * address (max_pfn) does not end on a section boundary.
- */
-static void __init init_unavailable_mem(void)
-{
-       phys_addr_t start, end;
-       u64 i, pgcnt;
-       phys_addr_t next = 0;
-
-       /*
-        * Loop through unavailable ranges not covered by memblock.memory.
-        */
-       pgcnt = 0;
-       for_each_mem_range(i, &start, &end) {
-               if (next < start)
-                       pgcnt += init_unavailable_range(PFN_DOWN(next),
-                                                       PFN_UP(start));
-               next = end;
-       }
-
-       /*
-        * Early sections always have a fully populated memmap for the whole
-        * section - see pfn_valid(). If the last section has holes at the
-        * end and that section is marked "online", the memmap will be
-        * considered initialized. Make sure that memmap has a well defined
-        * state.
-        */
-       pgcnt += init_unavailable_range(PFN_DOWN(next),
-                                       round_up(max_pfn, PAGES_PER_SECTION));
-
-       /*
-        * Struct pages that do not have backing memory. This could be because
-        * firmware is using some of this memory, or for some other reasons.
-        */
-       if (pgcnt)
-               pr_info("Zeroed struct page in unavailable ranges: %lld pages", pgcnt);
-}
-#else
-static inline void __init init_unavailable_mem(void)
-{
-}
-#endif /* !CONFIG_FLAT_NODE_MEM_MAP */
-
 #if MAX_NUMNODES > 1
 /*
  * Figure out the number of possible node ids.
@@ -7582,7 +7574,6 @@ void __init free_area_init(unsigned long *max_zone_pfn)
        /* Initialise every node */
        mminit_verify_pageflags_layout();
        setup_nr_node_ids();
-       init_unavailable_mem();
        for_each_online_node(nid) {
                pg_data_t *pgdat = NODE_DATA(nid);
                free_area_init_node(nid);
@@ -7698,17 +7689,6 @@ unsigned long free_reserved_area(void *start, void *end, int poison, const char
        return pages;
 }
 
-#ifdef CONFIG_HIGHMEM
-void free_highmem_page(struct page *page)
-{
-       __free_reserved_page(page);
-       totalram_pages_inc();
-       atomic_long_inc(&page_zone(page)->managed_pages);
-       totalhigh_pages_inc();
-}
-#endif
-
-
 void __init mem_init_print_info(const char *str)
 {
        unsigned long physpages, codesize, datasize, rosize, bss_size;
index 92f7941..c493ce9 100644 (file)
@@ -41,9 +41,9 @@ void end_swap_bio_write(struct bio *bio)
                 * Also clear PG_reclaim to avoid rotate_reclaimable_page()
                 */
                set_page_dirty(page);
-               pr_alert("Write-error on swap-device (%u:%u:%llu)\n",
-                        MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
-                        (unsigned long long)bio->bi_iter.bi_sector);
+               pr_alert_ratelimited("Write-error on swap-device (%u:%u:%llu)\n",
+                                    MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
+                                    (unsigned long long)bio->bi_iter.bi_sector);
                ClearPageReclaim(page);
        }
        end_page_writeback(page);
@@ -106,9 +106,9 @@ static void end_swap_bio_read(struct bio *bio)
        if (bio->bi_status) {
                SetPageError(page);
                ClearPageUptodate(page);
-               pr_alert("Read-error on swap-device (%u:%u:%llu)\n",
-                        MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
-                        (unsigned long long)bio->bi_iter.bi_sector);
+               pr_alert_ratelimited("Read-error on swap-device (%u:%u:%llu)\n",
+                                    MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)),
+                                    (unsigned long long)bio->bi_iter.bi_sector);
                goto out;
        }
 
@@ -254,11 +254,6 @@ out:
        return ret;
 }
 
-static sector_t swap_page_sector(struct page *page)
-{
-       return (sector_t)__page_file_index(page) << (PAGE_SHIFT - 9);
-}
-
 static inline void count_swpout_vm_event(struct page *page)
 {
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
index af464bb..d15c7c4 100644 (file)
@@ -263,8 +263,8 @@ void pagetypeinfo_showmixedcount_print(struct seq_file *m,
        struct page *page;
        struct page_ext *page_ext;
        struct page_owner *page_owner;
-       unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
-       unsigned long end_pfn = pfn + zone->spanned_pages;
+       unsigned long pfn, block_end_pfn;
+       unsigned long end_pfn = zone_end_pfn(zone);
        unsigned long count[MIGRATE_TYPES] = { 0, };
        int pageblock_mt, page_mt;
        int i;
index cd8e13d..c50d93f 100644 (file)
@@ -211,7 +211,7 @@ page_reporting_cycle(struct page_reporting_dev_info *prdev, struct zone *zone,
        }
 
        /* Rotate any leftover pages to the head of the freelist */
-       if (&next->lru != list && !list_is_first(&next->lru, list))
+       if (!list_entry_is_head(next, list, lru) && !list_is_first(&next->lru, list))
                list_rotate_to_front(&next->lru, list);
 
        spin_unlock_irq(&zone->lock);
index ad7a37e..6596a0a 100644 (file)
@@ -69,6 +69,7 @@
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 #include <linux/bitmap.h>
+#include <linux/cpumask.h>
 #include <linux/memblock.h>
 #include <linux/err.h>
 #include <linux/lcm.h>
@@ -2662,13 +2663,14 @@ early_param("percpu_alloc", percpu_alloc_setup);
  * On success, pointer to the new allocation_info is returned.  On
  * failure, ERR_PTR value is returned.
  */
-static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
+static struct pcpu_alloc_info * __init __flatten pcpu_build_alloc_info(
                                size_t reserved_size, size_t dyn_size,
                                size_t atom_size,
                                pcpu_fc_cpu_distance_fn_t cpu_distance_fn)
 {
        static int group_map[NR_CPUS] __initdata;
        static int group_cnt[NR_CPUS] __initdata;
+       static struct cpumask mask __initdata;
        const size_t static_size = __per_cpu_end - __per_cpu_start;
        int nr_groups = 1, nr_units = 0;
        size_t size_sum, min_unit_size, alloc_size;
@@ -2681,6 +2683,7 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
        /* this function may be called multiple times */
        memset(group_map, 0, sizeof(group_map));
        memset(group_cnt, 0, sizeof(group_cnt));
+       cpumask_clear(&mask);
 
        /* calculate size_sum and ensure dyn_size is enough for early alloc */
        size_sum = PFN_ALIGN(static_size + reserved_size +
@@ -2702,24 +2705,27 @@ static struct pcpu_alloc_info * __init pcpu_build_alloc_info(
                upa--;
        max_upa = upa;
 
+       cpumask_copy(&mask, cpu_possible_mask);
+
        /* group cpus according to their proximity */
-       for_each_possible_cpu(cpu) {
-               group = 0;
-       next_group:
-               for_each_possible_cpu(tcpu) {
-                       if (cpu == tcpu)
-                               break;
-                       if (group_map[tcpu] == group && cpu_distance_fn &&
-                           (cpu_distance_fn(cpu, tcpu) > LOCAL_DISTANCE ||
-                            cpu_distance_fn(tcpu, cpu) > LOCAL_DISTANCE)) {
-                               group++;
-                               nr_groups = max(nr_groups, group + 1);
-                               goto next_group;
-                       }
-               }
+       for (group = 0; !cpumask_empty(&mask); group++) {
+               /* pop the group's first cpu */
+               cpu = cpumask_first(&mask);
                group_map[cpu] = group;
                group_cnt[group]++;
+               cpumask_clear_cpu(cpu, &mask);
+
+               for_each_cpu(tcpu, &mask) {
+                       if (!cpu_distance_fn ||
+                           (cpu_distance_fn(cpu, tcpu) == LOCAL_DISTANCE &&
+                            cpu_distance_fn(tcpu, cpu) == LOCAL_DISTANCE)) {
+                               group_map[tcpu] = group;
+                               group_cnt[group]++;
+                               cpumask_clear_cpu(tcpu, &mask);
+                       }
+               }
        }
+       nr_groups = group;
 
        /*
         * Wasted space is caused by a ratio imbalance of upa to group_cnt.
index 9578db8..c2210e1 100644 (file)
@@ -135,8 +135,9 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address,
 {
        pmd_t pmd;
        VM_BUG_ON(address & ~HPAGE_PMD_MASK);
-       VM_BUG_ON((pmd_present(*pmdp) && !pmd_trans_huge(*pmdp) &&
-                          !pmd_devmap(*pmdp)) || !pmd_present(*pmdp));
+       VM_BUG_ON(!pmd_present(*pmdp));
+       /* Below assumes pmd_present() is true */
+       VM_BUG_ON(!pmd_trans_huge(*pmdp) && !pmd_devmap(*pmdp));
        pmd = pmdp_huge_get_and_clear(vma->vm_mm, address, pmdp);
        flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE);
        return pmd;
index 08c56aa..b0fc27e 100644 (file)
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -168,7 +168,7 @@ static void anon_vma_chain_link(struct vm_area_struct *vma,
  *
  * Anon-vma allocations are very subtle, because we may have
  * optimistically looked up an anon_vma in page_lock_anon_vma_read()
- * and that may actually touch the spinlock even in the newly
+ * and that may actually touch the rwsem even in the newly
  * allocated vma (it depends on RCU to make sure that the
  * anon_vma isn't actually destroyed).
  *
@@ -359,7 +359,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
                goto out_error_free_anon_vma;
 
        /*
-        * The root anon_vma's spinlock is the lock actually used when we
+        * The root anon_vma's rwsem is the lock actually used when we
         * lock any of the anon_vmas in this anon_vma tree.
         */
        anon_vma->root = pvma->anon_vma->root;
@@ -413,8 +413,15 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
                list_del(&avc->same_vma);
                anon_vma_chain_free(avc);
        }
-       if (vma->anon_vma)
+       if (vma->anon_vma) {
                vma->anon_vma->degree--;
+
+               /*
+                * vma would still be needed after unlink, and anon_vma will be prepared
+                * when handle fault.
+                */
+               vma->anon_vma = NULL;
+       }
        unlock_anon_vma_root(root);
 
        /*
@@ -455,8 +462,8 @@ void __init anon_vma_init(void)
  * Getting a lock on a stable anon_vma from a page off the LRU is tricky!
  *
  * Since there is no serialization what so ever against page_remove_rmap()
- * the best this function can do is return a locked anon_vma that might
- * have been relevant to this page.
+ * the best this function can do is return a refcount increased anon_vma
+ * that might have been relevant to this page.
  *
  * The page might have been remapped to a different anon_vma or the anon_vma
  * returned may already be freed (and even reused).
@@ -1079,8 +1086,7 @@ static void __page_check_anon_rmap(struct page *page,
         * be set up correctly at this point.
         *
         * We have exclusion against page_add_anon_rmap because the caller
-        * always holds the page locked, except if called from page_dup_rmap,
-        * in which case the page is already known to be setup.
+        * always holds the page locked.
         *
         * We have exclusion against page_add_new_anon_rmap because those pages
         * are initially only visible via the pagetables, and the pte is locked
@@ -1144,7 +1150,7 @@ void do_page_add_anon_rmap(struct page *page,
                 * disabled.
                 */
                if (compound)
-                       __inc_lruvec_page_state(page, NR_ANON_THPS);
+                       __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
                __mod_lruvec_page_state(page, NR_ANON_MAPPED, nr);
        }
 
@@ -1186,7 +1192,7 @@ void page_add_new_anon_rmap(struct page *page,
                if (hpage_pincount_available(page))
                        atomic_set(compound_pincount_ptr(page), 0);
 
-               __inc_lruvec_page_state(page, NR_ANON_THPS);
+               __mod_lruvec_page_state(page, NR_ANON_THPS, nr);
        } else {
                /* Anon THP always mapped first with PMD */
                VM_BUG_ON_PAGE(PageTransCompound(page), page);
@@ -1211,16 +1217,20 @@ void page_add_file_rmap(struct page *page, bool compound)
        VM_BUG_ON_PAGE(compound && !PageTransHuge(page), page);
        lock_page_memcg(page);
        if (compound && PageTransHuge(page)) {
-               for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
+               int nr_pages = thp_nr_pages(page);
+
+               for (i = 0, nr = 0; i < nr_pages; i++) {
                        if (atomic_inc_and_test(&page[i]._mapcount))
                                nr++;
                }
                if (!atomic_inc_and_test(compound_mapcount_ptr(page)))
                        goto out;
                if (PageSwapBacked(page))
-                       __inc_node_page_state(page, NR_SHMEM_PMDMAPPED);
+                       __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
+                                               nr_pages);
                else
-                       __inc_node_page_state(page, NR_FILE_PMDMAPPED);
+                       __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
+                                               nr_pages);
        } else {
                if (PageTransCompound(page) && page_mapping(page)) {
                        VM_WARN_ON_ONCE(!PageLocked(page));
@@ -1252,16 +1262,20 @@ static void page_remove_file_rmap(struct page *page, bool compound)
 
        /* page still mapped by someone else? */
        if (compound && PageTransHuge(page)) {
-               for (i = 0, nr = 0; i < thp_nr_pages(page); i++) {
+               int nr_pages = thp_nr_pages(page);
+
+               for (i = 0, nr = 0; i < nr_pages; i++) {
                        if (atomic_add_negative(-1, &page[i]._mapcount))
                                nr++;
                }
                if (!atomic_add_negative(-1, compound_mapcount_ptr(page)))
                        return;
                if (PageSwapBacked(page))
-                       __dec_node_page_state(page, NR_SHMEM_PMDMAPPED);
+                       __mod_lruvec_page_state(page, NR_SHMEM_PMDMAPPED,
+                                               -nr_pages);
                else
-                       __dec_node_page_state(page, NR_FILE_PMDMAPPED);
+                       __mod_lruvec_page_state(page, NR_FILE_PMDMAPPED,
+                                               -nr_pages);
        } else {
                if (!atomic_add_negative(-1, &page->_mapcount))
                        return;
@@ -1292,7 +1306,7 @@ static void page_remove_anon_compound_rmap(struct page *page)
        if (!IS_ENABLED(CONFIG_TRANSPARENT_HUGEPAGE))
                return;
 
-       __dec_lruvec_page_state(page, NR_ANON_THPS);
+       __mod_lruvec_page_state(page, NR_ANON_THPS, -thp_nr_pages(page));
 
        if (TestClearPageDoubleMap(page)) {
                /*
@@ -1722,9 +1736,9 @@ static bool invalid_migration_vma(struct vm_area_struct *vma, void *arg)
        return vma_is_temporary_stack(vma);
 }
 
-static int page_mapcount_is_zero(struct page *page)
+static int page_not_mapped(struct page *page)
 {
-       return !total_mapcount(page);
+       return !page_mapped(page);
 }
 
 /**
@@ -1742,7 +1756,7 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
        struct rmap_walk_control rwc = {
                .rmap_one = try_to_unmap_one,
                .arg = (void *)flags,
-               .done = page_mapcount_is_zero,
+               .done = page_not_mapped,
                .anon_lock = page_lock_anon_vma_read,
        };
 
@@ -1766,11 +1780,6 @@ bool try_to_unmap(struct page *page, enum ttu_flags flags)
        return !page_mapcount(page) ? true : false;
 }
 
-static int page_not_mapped(struct page *page)
-{
-       return !page_mapped(page);
-};
-
 /**
  * try_to_munlock - try to munlock a page
  * @page: the page to be munlocked
index 1b254fb..b2db4ed 100644 (file)
@@ -713,7 +713,7 @@ next:
                }
                if (PageTransHuge(page)) {
                        count_vm_event(THP_FILE_ALLOC);
-                       __inc_lruvec_page_state(page, NR_SHMEM_THPS);
+                       __mod_lruvec_page_state(page, NR_SHMEM_THPS, nr);
                }
                mapping->nrpages += nr;
                __mod_lruvec_page_state(page, NR_FILE_PAGES, nr);
@@ -842,7 +842,6 @@ unsigned long shmem_swap_usage(struct vm_area_struct *vma)
 void shmem_unlock_mapping(struct address_space *mapping)
 {
        struct pagevec pvec;
-       pgoff_t indices[PAGEVEC_SIZE];
        pgoff_t index = 0;
 
        pagevec_init(&pvec);
@@ -850,16 +849,8 @@ void shmem_unlock_mapping(struct address_space *mapping)
         * Minor point, but we might as well stop if someone else SHM_LOCKs it.
         */
        while (!mapping_unevictable(mapping)) {
-               /*
-                * Avoid pagevec_lookup(): find_get_pages() returns 0 as if it
-                * has finished, if it hits a row of PAGEVEC_SIZE swap entries.
-                */
-               pvec.nr = find_get_entries(mapping, index,
-                                          PAGEVEC_SIZE, pvec.pages, indices);
-               if (!pvec.nr)
+               if (!pagevec_lookup(&pvec, mapping, &index))
                        break;
-               index = indices[pvec.nr - 1] + 1;
-               pagevec_remove_exceptionals(&pvec);
                check_move_unevictable_pages(&pvec);
                pagevec_release(&pvec);
                cond_resched();
@@ -916,18 +907,12 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
 
        pagevec_init(&pvec);
        index = start;
-       while (index < end) {
-               pvec.nr = find_get_entries(mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE),
-                       pvec.pages, indices);
-               if (!pvec.nr)
-                       break;
+       while (index < end && find_lock_entries(mapping, index, end - 1,
+                       &pvec, indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        index = indices[i];
-                       if (index >= end)
-                               break;
 
                        if (xa_is_value(page)) {
                                if (unfalloc)
@@ -936,18 +921,10 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                                                                index, page);
                                continue;
                        }
+                       index += thp_nr_pages(page) - 1;
 
-                       VM_BUG_ON_PAGE(page_to_pgoff(page) != index, page);
-
-                       if (!trylock_page(page))
-                               continue;
-
-                       if ((!unfalloc || !PageUptodate(page)) &&
-                           page_mapping(page) == mapping) {
-                               VM_BUG_ON_PAGE(PageWriteback(page), page);
-                               if (shmem_punch_compound(page, start, end))
-                                       truncate_inode_page(mapping, page);
-                       }
+                       if (!unfalloc || !PageUptodate(page))
+                               truncate_inode_page(mapping, page);
                        unlock_page(page);
                }
                pagevec_remove_exceptionals(&pvec);
@@ -988,10 +965,8 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
        while (index < end) {
                cond_resched();
 
-               pvec.nr = find_get_entries(mapping, index,
-                               min(end - index, (pgoff_t)PAGEVEC_SIZE),
-                               pvec.pages, indices);
-               if (!pvec.nr) {
+               if (!find_get_entries(mapping, index, end - 1, &pvec,
+                               indices)) {
                        /* If all gone or hole-punch or unfalloc, we're done */
                        if (index == start || end != -1)
                                break;
@@ -1003,9 +978,6 @@ static void shmem_undo_range(struct inode *inode, loff_t lstart, loff_t lend,
                        struct page *page = pvec.pages[i];
 
                        index = indices[i];
-                       if (index >= end)
-                               break;
-
                        if (xa_is_value(page)) {
                                if (unfalloc)
                                        continue;
@@ -1060,7 +1032,8 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend)
 }
 EXPORT_SYMBOL_GPL(shmem_truncate_range);
 
-static int shmem_getattr(const struct path *path, struct kstat *stat,
+static int shmem_getattr(struct user_namespace *mnt_userns,
+                        const struct path *path, struct kstat *stat,
                         u32 request_mask, unsigned int query_flags)
 {
        struct inode *inode = path->dentry->d_inode;
@@ -1072,7 +1045,7 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
                shmem_recalc_inode(inode);
                spin_unlock_irq(&info->lock);
        }
-       generic_fillattr(inode, stat);
+       generic_fillattr(&init_user_ns, inode, stat);
 
        if (is_huge_enabled(sb_info))
                stat->blksize = HPAGE_PMD_SIZE;
@@ -1080,14 +1053,15 @@ static int shmem_getattr(const struct path *path, struct kstat *stat,
        return 0;
 }
 
-static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
+static int shmem_setattr(struct user_namespace *mnt_userns,
+                        struct dentry *dentry, struct iattr *attr)
 {
        struct inode *inode = d_inode(dentry);
        struct shmem_inode_info *info = SHMEM_I(inode);
        struct shmem_sb_info *sbinfo = SHMEM_SB(inode->i_sb);
        int error;
 
-       error = setattr_prepare(dentry, attr);
+       error = setattr_prepare(&init_user_ns, dentry, attr);
        if (error)
                return error;
 
@@ -1141,9 +1115,9 @@ static int shmem_setattr(struct dentry *dentry, struct iattr *attr)
                }
        }
 
-       setattr_copy(inode, attr);
+       setattr_copy(&init_user_ns, inode, attr);
        if (attr->ia_valid & ATTR_MODE)
-               error = posix_acl_chmod(inode, inode->i_mode);
+               error = posix_acl_chmod(&init_user_ns, inode, inode->i_mode);
        return error;
 }
 
@@ -1531,6 +1505,30 @@ static struct page *shmem_swapin(swp_entry_t swap, gfp_t gfp,
        return page;
 }
 
+/*
+ * Make sure huge_gfp is always more limited than limit_gfp.
+ * Some of the flags set permissions, while others set limitations.
+ */
+static gfp_t limit_gfp_mask(gfp_t huge_gfp, gfp_t limit_gfp)
+{
+       gfp_t allowflags = __GFP_IO | __GFP_FS | __GFP_RECLAIM;
+       gfp_t denyflags = __GFP_NOWARN | __GFP_NORETRY;
+       gfp_t zoneflags = limit_gfp & GFP_ZONEMASK;
+       gfp_t result = huge_gfp & ~(allowflags | GFP_ZONEMASK);
+
+       /* Allow allocations only from the originally specified zones. */
+       result |= zoneflags;
+
+       /*
+        * Minimize the result gfp by taking the union with the deny flags,
+        * and the intersection of the allow flags.
+        */
+       result |= (limit_gfp & denyflags);
+       result |= (huge_gfp & limit_gfp) & allowflags;
+
+       return result;
+}
+
 static struct page *shmem_alloc_hugepage(gfp_t gfp,
                struct shmem_inode_info *info, pgoff_t index)
 {
@@ -1545,8 +1543,8 @@ static struct page *shmem_alloc_hugepage(gfp_t gfp,
                return NULL;
 
        shmem_pseudo_vma_init(&pvma, info, hindex);
-       page = alloc_pages_vma(gfp | __GFP_COMP | __GFP_NORETRY | __GFP_NOWARN,
-                       HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(), true);
+       page = alloc_pages_vma(gfp, HPAGE_PMD_ORDER, &pvma, 0, numa_node_id(),
+                              true);
        shmem_pseudo_vma_destroy(&pvma);
        if (page)
                prep_transhuge_page(page);
@@ -1802,6 +1800,7 @@ static int shmem_getpage_gfp(struct inode *inode, pgoff_t index,
        struct page *page;
        enum sgp_type sgp_huge = sgp;
        pgoff_t hindex = index;
+       gfp_t huge_gfp;
        int error;
        int once = 0;
        int alloced = 0;
@@ -1819,7 +1818,8 @@ repeat:
        sbinfo = SHMEM_SB(inode->i_sb);
        charge_mm = vma ? vma->vm_mm : current->mm;
 
-       page = find_lock_entry(mapping, index);
+       page = pagecache_get_page(mapping, index,
+                                       FGP_ENTRY | FGP_HEAD | FGP_LOCK, 0);
        if (xa_is_value(page)) {
                error = shmem_swapin_page(inode, index, &page,
                                          sgp, gfp, vma, fault_type);
@@ -1887,7 +1887,9 @@ repeat:
        }
 
 alloc_huge:
-       page = shmem_alloc_and_acct_page(gfp, inode, index, true);
+       huge_gfp = vma_thp_gfp_mask(vma);
+       huge_gfp = limit_gfp_mask(huge_gfp, gfp);
+       page = shmem_alloc_and_acct_page(huge_gfp, inode, index, true);
        if (IS_ERR(page)) {
 alloc_nohuge:
                page = shmem_alloc_and_acct_page(gfp, inode,
@@ -2303,7 +2305,7 @@ static struct inode *shmem_get_inode(struct super_block *sb, const struct inode
        inode = new_inode(sb);
        if (inode) {
                inode->i_ino = ino;
-               inode_init_owner(inode, dir, mode);
+               inode_init_owner(&init_user_ns, inode, dir, mode);
                inode->i_blocks = 0;
                inode->i_atime = inode->i_mtime = inode->i_ctime = current_time(inode);
                inode->i_generation = prandom_u32();
@@ -2674,86 +2676,20 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
        return retval ? retval : error;
 }
 
-/*
- * llseek SEEK_DATA or SEEK_HOLE through the page cache.
- */
-static pgoff_t shmem_seek_hole_data(struct address_space *mapping,
-                                   pgoff_t index, pgoff_t end, int whence)
-{
-       struct page *page;
-       struct pagevec pvec;
-       pgoff_t indices[PAGEVEC_SIZE];
-       bool done = false;
-       int i;
-
-       pagevec_init(&pvec);
-       pvec.nr = 1;            /* start small: we may be there already */
-       while (!done) {
-               pvec.nr = find_get_entries(mapping, index,
-                                       pvec.nr, pvec.pages, indices);
-               if (!pvec.nr) {
-                       if (whence == SEEK_DATA)
-                               index = end;
-                       break;
-               }
-               for (i = 0; i < pvec.nr; i++, index++) {
-                       if (index < indices[i]) {
-                               if (whence == SEEK_HOLE) {
-                                       done = true;
-                                       break;
-                               }
-                               index = indices[i];
-                       }
-                       page = pvec.pages[i];
-                       if (page && !xa_is_value(page)) {
-                               if (!PageUptodate(page))
-                                       page = NULL;
-                       }
-                       if (index >= end ||
-                           (page && whence == SEEK_DATA) ||
-                           (!page && whence == SEEK_HOLE)) {
-                               done = true;
-                               break;
-                       }
-               }
-               pagevec_remove_exceptionals(&pvec);
-               pagevec_release(&pvec);
-               pvec.nr = PAGEVEC_SIZE;
-               cond_resched();
-       }
-       return index;
-}
-
 static loff_t shmem_file_llseek(struct file *file, loff_t offset, int whence)
 {
        struct address_space *mapping = file->f_mapping;
        struct inode *inode = mapping->host;
-       pgoff_t start, end;
-       loff_t new_offset;
 
        if (whence != SEEK_DATA && whence != SEEK_HOLE)
                return generic_file_llseek_size(file, offset, whence,
                                        MAX_LFS_FILESIZE, i_size_read(inode));
+       if (offset < 0)
+               return -ENXIO;
+
        inode_lock(inode);
        /* We're holding i_mutex so we can access i_size directly */
-
-       if (offset < 0 || offset >= inode->i_size)
-               offset = -ENXIO;
-       else {
-               start = offset >> PAGE_SHIFT;
-               end = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
-               new_offset = shmem_seek_hole_data(mapping, start, end, whence);
-               new_offset <<= PAGE_SHIFT;
-               if (new_offset > offset) {
-                       if (new_offset < inode->i_size)
-                               offset = new_offset;
-                       else if (whence == SEEK_DATA)
-                               offset = -ENXIO;
-                       else
-                               offset = inode->i_size;
-               }
-       }
-
+       offset = mapping_seek_hole_data(mapping, offset, inode->i_size, whence);
        if (offset >= 0)
                offset = vfs_setpos(file, offset, MAX_LFS_FILESIZE);
        inode_unlock(inode);
@@ -2917,7 +2853,8 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf)
  * File creation. Allocate an inode, and we're done..
  */
 static int
-shmem_mknod(struct inode *dir, struct dentry *dentry, umode_t mode, dev_t dev)
+shmem_mknod(struct user_namespace *mnt_userns, struct inode *dir,
+           struct dentry *dentry, umode_t mode, dev_t dev)
 {
        struct inode *inode;
        int error = -ENOSPC;
@@ -2946,7 +2883,8 @@ out_iput:
 }
 
 static int
-shmem_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
+shmem_tmpfile(struct user_namespace *mnt_userns, struct inode *dir,
+             struct dentry *dentry, umode_t mode)
 {
        struct inode *inode;
        int error = -ENOSPC;
@@ -2969,20 +2907,22 @@ out_iput:
        return error;
 }
 
-static int shmem_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int shmem_mkdir(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        int error;
 
-       if ((error = shmem_mknod(dir, dentry, mode | S_IFDIR, 0)))
+       if ((error = shmem_mknod(&init_user_ns, dir, dentry,
+                                mode | S_IFDIR, 0)))
                return error;
        inc_nlink(dir);
        return 0;
 }
 
-static int shmem_create(struct inode *dir, struct dentry *dentry, umode_t mode,
-               bool excl)
+static int shmem_create(struct user_namespace *mnt_userns, struct inode *dir,
+                       struct dentry *dentry, umode_t mode, bool excl)
 {
-       return shmem_mknod(dir, dentry, mode | S_IFREG, 0);
+       return shmem_mknod(&init_user_ns, dir, dentry, mode | S_IFREG, 0);
 }
 
 /*
@@ -3062,7 +3002,8 @@ static int shmem_exchange(struct inode *old_dir, struct dentry *old_dentry, stru
        return 0;
 }
 
-static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
+static int shmem_whiteout(struct user_namespace *mnt_userns,
+                         struct inode *old_dir, struct dentry *old_dentry)
 {
        struct dentry *whiteout;
        int error;
@@ -3071,7 +3012,7 @@ static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
        if (!whiteout)
                return -ENOMEM;
 
-       error = shmem_mknod(old_dir, whiteout,
+       error = shmem_mknod(&init_user_ns, old_dir, whiteout,
                            S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
        dput(whiteout);
        if (error)
@@ -3094,7 +3035,10 @@ static int shmem_whiteout(struct inode *old_dir, struct dentry *old_dentry)
  * it exists so that the VFS layer correctly free's it when it
  * gets overwritten.
  */
-static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags)
+static int shmem_rename2(struct user_namespace *mnt_userns,
+                        struct inode *old_dir, struct dentry *old_dentry,
+                        struct inode *new_dir, struct dentry *new_dentry,
+                        unsigned int flags)
 {
        struct inode *inode = d_inode(old_dentry);
        int they_are_dirs = S_ISDIR(inode->i_mode);
@@ -3111,7 +3055,7 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
        if (flags & RENAME_WHITEOUT) {
                int error;
 
-               error = shmem_whiteout(old_dir, old_dentry);
+               error = shmem_whiteout(&init_user_ns, old_dir, old_dentry);
                if (error)
                        return error;
        }
@@ -3135,7 +3079,8 @@ static int shmem_rename2(struct inode *old_dir, struct dentry *old_dentry, struc
        return 0;
 }
 
-static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
+static int shmem_symlink(struct user_namespace *mnt_userns, struct inode *dir,
+                        struct dentry *dentry, const char *symname)
 {
        int error;
        int len;
@@ -3273,6 +3218,7 @@ static int shmem_xattr_handler_get(const struct xattr_handler *handler,
 }
 
 static int shmem_xattr_handler_set(const struct xattr_handler *handler,
+                                  struct user_namespace *mnt_userns,
                                   struct dentry *unused, struct inode *inode,
                                   const char *name, const void *value,
                                   size_t size, int flags)
index dcc55e7..ae651bf 100644 (file)
--- a/mm/slab.c
+++ b/mm/slab.c
 #include       <linux/seq_file.h>
 #include       <linux/notifier.h>
 #include       <linux/kallsyms.h>
+#include       <linux/kfence.h>
 #include       <linux/cpu.h>
 #include       <linux/sysctl.h>
 #include       <linux/module.h>
@@ -272,7 +273,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
 #define        STATS_DEC_ACTIVE(x)     ((x)->num_active--)
 #define        STATS_INC_ALLOCED(x)    ((x)->num_allocations++)
 #define        STATS_INC_GROWN(x)      ((x)->grown++)
-#define        STATS_ADD_REAPED(x,y)   ((x)->reaped += (y))
+#define        STATS_ADD_REAPED(x, y)  ((x)->reaped += (y))
 #define        STATS_SET_HIGH(x)                                               \
        do {                                                            \
                if ((x)->num_active > (x)->high_mark)                   \
@@ -296,7 +297,7 @@ static void kmem_cache_node_init(struct kmem_cache_node *parent)
 #define        STATS_DEC_ACTIVE(x)     do { } while (0)
 #define        STATS_INC_ALLOCED(x)    do { } while (0)
 #define        STATS_INC_GROWN(x)      do { } while (0)
-#define        STATS_ADD_REAPED(x,y)   do { (void)(y); } while (0)
+#define        STATS_ADD_REAPED(x, y)  do { (void)(y); } while (0)
 #define        STATS_SET_HIGH(x)       do { } while (0)
 #define        STATS_INC_ERR(x)        do { } while (0)
 #define        STATS_INC_NODEALLOCS(x) do { } while (0)
@@ -332,7 +333,7 @@ static int obj_offset(struct kmem_cache *cachep)
 static unsigned long long *dbg_redzone1(struct kmem_cache *cachep, void *objp)
 {
        BUG_ON(!(cachep->flags & SLAB_RED_ZONE));
-       return (unsigned long long*) (objp + obj_offset(cachep) -
+       return (unsigned long long *) (objp + obj_offset(cachep) -
                                      sizeof(unsigned long long));
 }
 
@@ -580,7 +581,7 @@ static int transfer_objects(struct array_cache *to,
        if (!nr)
                return 0;
 
-       memcpy(to->entry + to->avail, from->entry + from->avail -nr,
+       memcpy(to->entry + to->avail, from->entry + from->avail - nr,
                        sizeof(void *) *nr);
 
        from->avail -= nr;
@@ -1379,7 +1380,7 @@ static struct page *kmem_getpages(struct kmem_cache *cachep, gfp_t flags,
                return NULL;
        }
 
-       account_slab_page(page, cachep->gfporder, cachep);
+       account_slab_page(page, cachep->gfporder, cachep, flags);
        __SetPageSlab(page);
        /* Record if ALLOC_NO_WATERMARKS was set when allocating the slab */
        if (sk_memalloc_socks() && page_is_pfmemalloc(page))
@@ -1790,8 +1791,7 @@ static int __ref setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 }
 
 slab_flags_t kmem_cache_flags(unsigned int object_size,
-       slab_flags_t flags, const char *name,
-       void (*ctor)(void *))
+       slab_flags_t flags, const char *name)
 {
        return flags;
 }
@@ -2738,7 +2738,7 @@ static void *cache_free_debugcheck(struct kmem_cache *cachep, void *objp,
 
 #else
 #define kfree_debugcheck(x) do { } while(0)
-#define cache_free_debugcheck(x,objp,z) (objp)
+#define cache_free_debugcheck(x, objp, z) (objp)
 #endif
 
 static inline void fixup_objfreelist_debug(struct kmem_cache *cachep,
@@ -2992,7 +2992,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
                                gfp_t flags, void *objp, unsigned long caller)
 {
        WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO));
-       if (!objp)
+       if (!objp || is_kfence_address(objp))
                return objp;
        if (cachep->flags & SLAB_POISON) {
                check_poison_obj(cachep, objp);
@@ -3025,7 +3025,7 @@ static void *cache_alloc_debugcheck_after(struct kmem_cache *cachep,
        return objp;
 }
 #else
-#define cache_alloc_debugcheck_after(a,b,objp,d) (objp)
+#define cache_alloc_debugcheck_after(a, b, objp, d) (objp)
 #endif
 
 static inline void *____cache_alloc(struct kmem_cache *cachep, gfp_t flags)
@@ -3209,7 +3209,7 @@ must_grow:
 }
 
 static __always_inline void *
-slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
+slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid, size_t orig_size,
                   unsigned long caller)
 {
        unsigned long save_flags;
@@ -3222,6 +3222,10 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
        if (unlikely(!cachep))
                return NULL;
 
+       ptr = kfence_alloc(cachep, orig_size, flags);
+       if (unlikely(ptr))
+               goto out_hooks;
+
        cache_alloc_debugcheck_before(cachep, flags);
        local_irq_save(save_flags);
 
@@ -3254,6 +3258,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
        if (unlikely(slab_want_init_on_alloc(flags, cachep)) && ptr)
                memset(ptr, 0, cachep->object_size);
 
+out_hooks:
        slab_post_alloc_hook(cachep, objcg, flags, 1, &ptr);
        return ptr;
 }
@@ -3291,7 +3296,7 @@ __do_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 #endif /* CONFIG_NUMA */
 
 static __always_inline void *
-slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
+slab_alloc(struct kmem_cache *cachep, gfp_t flags, size_t orig_size, unsigned long caller)
 {
        unsigned long save_flags;
        void *objp;
@@ -3302,6 +3307,10 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
        if (unlikely(!cachep))
                return NULL;
 
+       objp = kfence_alloc(cachep, orig_size, flags);
+       if (unlikely(objp))
+               goto out;
+
        cache_alloc_debugcheck_before(cachep, flags);
        local_irq_save(save_flags);
        objp = __do_cache_alloc(cachep, flags);
@@ -3312,6 +3321,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
        if (unlikely(slab_want_init_on_alloc(flags, cachep)) && objp)
                memset(objp, 0, cachep->object_size);
 
+out:
        slab_post_alloc_hook(cachep, objcg, flags, 1, &objp);
        return objp;
 }
@@ -3417,11 +3427,17 @@ free_done:
 static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
                                         unsigned long caller)
 {
+       if (is_kfence_address(objp)) {
+               kmemleak_free_recursive(objp, cachep->flags);
+               __kfence_free(objp);
+               return;
+       }
+
        if (unlikely(slab_want_init_on_free(cachep)))
                memset(objp, 0, cachep->object_size);
 
        /* Put the object into the quarantine, don't touch it for now. */
-       if (kasan_slab_free(cachep, objp, _RET_IP_))
+       if (kasan_slab_free(cachep, objp))
                return;
 
        /* Use KCSAN to help debug racy use-after-free. */
@@ -3483,7 +3499,7 @@ void ___cache_free(struct kmem_cache *cachep, void *objp,
  */
 void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags)
 {
-       void *ret = slab_alloc(cachep, flags, _RET_IP_);
+       void *ret = slab_alloc(cachep, flags, cachep->object_size, _RET_IP_);
 
        trace_kmem_cache_alloc(_RET_IP_, ret,
                               cachep->object_size, cachep->size, flags);
@@ -3516,7 +3532,7 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
 
        local_irq_disable();
        for (i = 0; i < size; i++) {
-               void *objp = __do_cache_alloc(s, flags);
+               void *objp = kfence_alloc(s, s->object_size, flags) ?: __do_cache_alloc(s, flags);
 
                if (unlikely(!objp))
                        goto error;
@@ -3549,7 +3565,7 @@ kmem_cache_alloc_trace(struct kmem_cache *cachep, gfp_t flags, size_t size)
 {
        void *ret;
 
-       ret = slab_alloc(cachep, flags, _RET_IP_);
+       ret = slab_alloc(cachep, flags, size, _RET_IP_);
 
        ret = kasan_kmalloc(cachep, ret, size, flags);
        trace_kmalloc(_RET_IP_, ret,
@@ -3575,7 +3591,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
  */
 void *kmem_cache_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid)
 {
-       void *ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+       void *ret = slab_alloc_node(cachep, flags, nodeid, cachep->object_size, _RET_IP_);
 
        trace_kmem_cache_alloc_node(_RET_IP_, ret,
                                    cachep->object_size, cachep->size,
@@ -3593,7 +3609,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *cachep,
 {
        void *ret;
 
-       ret = slab_alloc_node(cachep, flags, nodeid, _RET_IP_);
+       ret = slab_alloc_node(cachep, flags, nodeid, size, _RET_IP_);
 
        ret = kasan_kmalloc(cachep, ret, size, flags);
        trace_kmalloc_node(_RET_IP_, ret,
@@ -3674,7 +3690,7 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
        cachep = kmalloc_slab(size, flags);
        if (unlikely(ZERO_OR_NULL_PTR(cachep)))
                return cachep;
-       ret = slab_alloc(cachep, flags, caller);
+       ret = slab_alloc(cachep, flags, size, caller);
 
        ret = kasan_kmalloc(cachep, ret, size, flags);
        trace_kmalloc(caller, ret,
@@ -3717,7 +3733,7 @@ void kmem_cache_free(struct kmem_cache *cachep, void *objp)
        __cache_free(cachep, objp, _RET_IP_);
        local_irq_restore(flags);
 
-       trace_kmem_cache_free(_RET_IP_, objp);
+       trace_kmem_cache_free(_RET_IP_, objp, cachep->name);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -4173,7 +4189,10 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
        BUG_ON(objnr >= cachep->num);
 
        /* Find offset within object. */
-       offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
+       if (is_kfence_address(ptr))
+               offset = ptr - kfence_object_start(ptr);
+       else
+               offset = ptr - index_to_obj(cachep, page, objnr) - obj_offset(cachep);
 
        /* Allow address range falling entirely within usercopy region. */
        if (offset >= cachep->useroffset &&
index ecad9b5..076582f 100644 (file)
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -110,8 +110,7 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
                   slab_flags_t flags, void (*ctor)(void *));
 
 slab_flags_t kmem_cache_flags(unsigned int object_size,
-       slab_flags_t flags, const char *name,
-       void (*ctor)(void *));
+       slab_flags_t flags, const char *name);
 #else
 static inline struct kmem_cache *
 __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
@@ -119,8 +118,7 @@ __kmem_cache_alias(const char *name, unsigned int size, unsigned int align,
 { return NULL; }
 
 static inline slab_flags_t kmem_cache_flags(unsigned int object_size,
-       slab_flags_t flags, const char *name,
-       void (*ctor)(void *))
+       slab_flags_t flags, const char *name)
 {
        return flags;
 }
@@ -240,7 +238,7 @@ static inline bool kmem_cache_debug_flags(struct kmem_cache *s, slab_flags_t fla
 
 #ifdef CONFIG_MEMCG_KMEM
 int memcg_alloc_page_obj_cgroups(struct page *page, struct kmem_cache *s,
-                                gfp_t gfp);
+                                gfp_t gfp, bool new_page);
 
 static inline void memcg_free_page_obj_cgroups(struct page *page)
 {
@@ -317,7 +315,8 @@ static inline void memcg_slab_post_alloc_hook(struct kmem_cache *s,
                        page = virt_to_head_page(p[i]);
 
                        if (!page_objcgs(page) &&
-                           memcg_alloc_page_obj_cgroups(page, s, flags)) {
+                           memcg_alloc_page_obj_cgroups(page, s, flags,
+                                                        false)) {
                                obj_cgroup_uncharge(objcg, obj_full_size(s));
                                continue;
                        }
@@ -381,7 +380,8 @@ static inline struct mem_cgroup *memcg_from_slab_obj(void *ptr)
 }
 
 static inline int memcg_alloc_page_obj_cgroups(struct page *page,
-                                              struct kmem_cache *s, gfp_t gfp)
+                                              struct kmem_cache *s, gfp_t gfp,
+                                              bool new_page)
 {
        return 0;
 }
@@ -422,8 +422,12 @@ static inline struct kmem_cache *virt_to_cache(const void *obj)
 }
 
 static __always_inline void account_slab_page(struct page *page, int order,
-                                             struct kmem_cache *s)
+                                             struct kmem_cache *s,
+                                             gfp_t gfp)
 {
+       if (memcg_kmem_enabled() && (s->flags & SLAB_ACCOUNT))
+               memcg_alloc_page_obj_cgroups(page, s, gfp, true);
+
        mod_node_page_state(page_pgdat(page), cache_vmstat_idx(s),
                            PAGE_SIZE << order);
 }
index adbace4..88e8339 100644 (file)
@@ -12,6 +12,7 @@
 #include <linux/memory.h>
 #include <linux/cache.h>
 #include <linux/compiler.h>
+#include <linux/kfence.h>
 #include <linux/module.h>
 #include <linux/cpu.h>
 #include <linux/uaccess.h>
@@ -197,7 +198,7 @@ struct kmem_cache *find_mergeable(unsigned int size, unsigned int align,
        size = ALIGN(size, sizeof(void *));
        align = calculate_alignment(flags, align, size);
        size = ALIGN(size, align);
-       flags = kmem_cache_flags(size, flags, name, NULL);
+       flags = kmem_cache_flags(size, flags, name);
 
        if (flags & SLAB_NEVER_MERGE)
                return NULL;
@@ -309,9 +310,6 @@ kmem_cache_create_usercopy(const char *name,
        const char *cache_name;
        int err;
 
-       get_online_cpus();
-       get_online_mems();
-
        mutex_lock(&slab_mutex);
 
        err = kmem_cache_sanity_check(name, size);
@@ -360,9 +358,6 @@ kmem_cache_create_usercopy(const char *name,
 out_unlock:
        mutex_unlock(&slab_mutex);
 
-       put_online_mems();
-       put_online_cpus();
-
        if (err) {
                if (flags & SLAB_PANIC)
                        panic("kmem_cache_create: Failed to create slab '%s'. Error %d\n",
@@ -436,6 +431,7 @@ static void slab_caches_to_rcu_destroy_workfn(struct work_struct *work)
        rcu_barrier();
 
        list_for_each_entry_safe(s, s2, &to_destroy, list) {
+               kfence_shutdown_cache(s);
 #ifdef SLAB_SUPPORTS_SYSFS
                sysfs_slab_release(s);
 #else
@@ -461,6 +457,7 @@ static int shutdown_cache(struct kmem_cache *s)
                list_add_tail(&s->list, &slab_caches_to_rcu_destroy);
                schedule_work(&slab_caches_to_rcu_destroy_work);
        } else {
+               kfence_shutdown_cache(s);
 #ifdef SLAB_SUPPORTS_SYSFS
                sysfs_slab_unlink(s);
                sysfs_slab_release(s);
@@ -486,9 +483,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
        if (unlikely(!s))
                return;
 
-       get_online_cpus();
-       get_online_mems();
-
        mutex_lock(&slab_mutex);
 
        s->refcount--;
@@ -503,9 +497,6 @@ void kmem_cache_destroy(struct kmem_cache *s)
        }
 out_unlock:
        mutex_unlock(&slab_mutex);
-
-       put_online_mems();
-       put_online_cpus();
 }
 EXPORT_SYMBOL(kmem_cache_destroy);
 
@@ -522,12 +513,10 @@ int kmem_cache_shrink(struct kmem_cache *cachep)
 {
        int ret;
 
-       get_online_cpus();
-       get_online_mems();
+
        kasan_cache_shrink(cachep);
        ret = __kmem_cache_shrink(cachep);
-       put_online_mems();
-       put_online_cpus();
+
        return ret;
 }
 EXPORT_SYMBOL(kmem_cache_shrink);
@@ -654,6 +643,7 @@ struct kmem_cache *__init create_kmalloc_cache(const char *name,
                panic("Out of memory when creating slab %s\n", name);
 
        create_boot_cache(s, name, size, flags, useroffset, usersize);
+       kasan_cache_create_kmalloc(s);
        list_add(&s->list, &slab_caches);
        s->refcount = 1;
        return s;
@@ -912,8 +902,8 @@ void *kmalloc_order(size_t size, gfp_t flags, unsigned int order)
        page = alloc_pages(flags, order);
        if (likely(page)) {
                ret = page_address(page);
-               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
-                                   PAGE_SIZE << order);
+               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+                                     PAGE_SIZE << order);
        }
        ret = kasan_kmalloc_large(ret, size, flags);
        /* As ret might get tagged, call kmemleak hook after KASAN. */
@@ -1146,16 +1136,27 @@ static __always_inline void *__do_krealloc(const void *p, size_t new_size,
        void *ret;
        size_t ks;
 
-       ks = ksize(p);
+       /* Don't use instrumented ksize to allow precise KASAN poisoning. */
+       if (likely(!ZERO_OR_NULL_PTR(p))) {
+               if (!kasan_check_byte(p))
+                       return NULL;
+               ks = kfence_ksize(p) ?: __ksize(p);
+       } else
+               ks = 0;
 
+       /* If the object still fits, repoison it precisely. */
        if (ks >= new_size) {
                p = kasan_krealloc((void *)p, new_size, flags);
                return (void *)p;
        }
 
        ret = kmalloc_track_caller(new_size, flags);
-       if (ret && p)
-               memcpy(ret, p, ks);
+       if (ret && p) {
+               /* Disable KASAN checks as the object's redzone is accessed. */
+               kasan_disable_current();
+               memcpy(ret, kasan_reset_tag(p), ks);
+               kasan_enable_current();
+       }
 
        return ret;
 }
@@ -1232,22 +1233,24 @@ size_t ksize(const void *objp)
        size_t size;
 
        /*
-        * We need to check that the pointed to object is valid, and only then
-        * unpoison the shadow memory below. We use __kasan_check_read(), to
-        * generate a more useful report at the time ksize() is called (rather
-        * than later where behaviour is undefined due to potential
-        * use-after-free or double-free).
+        * We need to first check that the pointer to the object is valid, and
+        * only then unpoison the memory. The report printed from ksize() is
+        * more useful, then when it's printed later when the behaviour could
+        * be undefined due to a potential use-after-free or double-free.
+        *
+        * We use kasan_check_byte(), which is supported for the hardware
+        * tag-based KASAN mode, unlike kasan_check_read/write().
         *
-        * If the pointed to memory is invalid we return 0, to avoid users of
+        * If the pointed to memory is invalid, we return 0 to avoid users of
         * ksize() writing to and potentially corrupting the memory region.
         *
         * We want to perform the check before __ksize(), to avoid potentially
         * crashing in __ksize() due to accessing invalid metadata.
         */
-       if (unlikely(ZERO_OR_NULL_PTR(objp)) || !__kasan_check_read(objp, 1))
+       if (unlikely(ZERO_OR_NULL_PTR(objp)) || !kasan_check_byte(objp))
                return 0;
 
-       size = __ksize(objp);
+       size = kfence_ksize(objp) ?: __ksize(objp);
        /*
         * We assume that ksize callers could use whole allocated area,
         * so we need to unpoison this area.
index ef87ada..0578429 100644 (file)
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -673,7 +673,7 @@ void kmem_cache_free(struct kmem_cache *c, void *b)
                __kmem_cache_free(b, c->size);
        }
 
-       trace_kmem_cache_free(_RET_IP_, b);
+       trace_kmem_cache_free(_RET_IP_, b, c->name);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
index f5baf42..3021ce9 100644 (file)
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -27,6 +27,7 @@
 #include <linux/ctype.h>
 #include <linux/debugobjects.h>
 #include <linux/kallsyms.h>
+#include <linux/kfence.h>
 #include <linux/memory.h>
 #include <linux/math64.h>
 #include <linux/fault-inject.h>
@@ -235,6 +236,14 @@ static inline void stat(const struct kmem_cache *s, enum stat_item si)
 #endif
 }
 
+/*
+ * Tracks for which NUMA nodes we have kmem_cache_nodes allocated.
+ * Corresponds to node_state[N_NORMAL_MEMORY], but can temporarily
+ * differ during memory hotplug/hotremove operations.
+ * Protected by slab_mutex.
+ */
+static nodemask_t slab_nodes;
+
 /********************************************************************
  *                     Core slab cache functions
  *******************************************************************/
@@ -1400,7 +1409,6 @@ __setup("slub_debug", setup_slub_debug);
  * @object_size:       the size of an object without meta data
  * @flags:             flags to set
  * @name:              name of the cache
- * @ctor:              constructor function
  *
  * Debug option(s) are applied to @flags. In addition to the debug
  * option(s), if a slab name (or multiple) is specified i.e.
@@ -1408,13 +1416,21 @@ __setup("slub_debug", setup_slub_debug);
  * then only the select slabs will receive the debug option(s).
  */
 slab_flags_t kmem_cache_flags(unsigned int object_size,
-       slab_flags_t flags, const char *name,
-       void (*ctor)(void *))
+       slab_flags_t flags, const char *name)
 {
        char *iter;
        size_t len;
        char *next_block;
        slab_flags_t block_flags;
+       slab_flags_t slub_debug_local = slub_debug;
+
+       /*
+        * If the slab cache is for debugging (e.g. kmemleak) then
+        * don't store user (stack trace) information by default,
+        * but let the user enable it via the command line below.
+        */
+       if (flags & SLAB_NOLEAKTRACE)
+               slub_debug_local &= ~SLAB_STORE_USER;
 
        len = strlen(name);
        next_block = slub_debug_string;
@@ -1449,7 +1465,7 @@ slab_flags_t kmem_cache_flags(unsigned int object_size,
                }
        }
 
-       return flags | slub_debug;
+       return flags | slub_debug_local;
 }
 #else /* !CONFIG_SLUB_DEBUG */
 static inline void setup_object_debug(struct kmem_cache *s,
@@ -1474,8 +1490,7 @@ static inline void add_full(struct kmem_cache *s, struct kmem_cache_node *n,
 static inline void remove_full(struct kmem_cache *s, struct kmem_cache_node *n,
                                        struct page *page) {}
 slab_flags_t kmem_cache_flags(unsigned int object_size,
-       slab_flags_t flags, const char *name,
-       void (*ctor)(void *))
+       slab_flags_t flags, const char *name)
 {
        return flags;
 }
@@ -1514,7 +1529,7 @@ static inline void *kmalloc_large_node_hook(void *ptr, size_t size, gfp_t flags)
 static __always_inline void kfree_hook(void *x)
 {
        kmemleak_free(x);
-       kasan_kfree_large(x, _RET_IP_);
+       kasan_kfree_large(x);
 }
 
 static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
@@ -1544,7 +1559,7 @@ static __always_inline bool slab_free_hook(struct kmem_cache *s, void *x)
                                     KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ASSERT);
 
        /* KASAN might put x into memory quarantine, delaying its reuse */
-       return kasan_slab_free(s, x, _RET_IP_);
+       return kasan_slab_free(s, x);
 }
 
 static inline bool slab_free_freelist_hook(struct kmem_cache *s,
@@ -1556,6 +1571,11 @@ static inline bool slab_free_freelist_hook(struct kmem_cache *s,
        void *old_tail = *tail ? *tail : *head;
        int rsize;
 
+       if (is_kfence_address(next)) {
+               slab_free_hook(s, next);
+               return true;
+       }
+
        /* Head and tail of the reconstructed freelist */
        *head = NULL;
        *tail = NULL;
@@ -1771,7 +1791,7 @@ static struct page *allocate_slab(struct kmem_cache *s, gfp_t flags, int node)
 
        page->objects = oo_objects(oo);
 
-       account_slab_page(page, oo_order(oo), s);
+       account_slab_page(page, oo_order(oo), s, flags);
 
        page->slab_cache = s;
        __SetPageSlab(page);
@@ -1973,7 +1993,7 @@ static void *get_partial_node(struct kmem_cache *s, struct kmem_cache_node *n,
 
                t = acquire_slab(s, n, page, object == NULL, &objects);
                if (!t)
-                       continue; /* cmpxchg raced */
+                       break;
 
                available += objects;
                if (!object) {
@@ -2153,9 +2173,9 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
 {
        enum slab_modes { M_NONE, M_PARTIAL, M_FULL, M_FREE };
        struct kmem_cache_node *n = get_node(s, page_to_nid(page));
-       int lock = 0;
+       int lock = 0, free_delta = 0;
        enum slab_modes l = M_NONE, m = M_NONE;
-       void *nextfree;
+       void *nextfree, *freelist_iter, *freelist_tail;
        int tail = DEACTIVATE_TO_HEAD;
        struct page new;
        struct page old;
@@ -2166,45 +2186,34 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
        }
 
        /*
-        * Stage one: Free all available per cpu objects back
-        * to the page freelist while it is still frozen. Leave the
-        * last one.
-        *
-        * There is no need to take the list->lock because the page
-        * is still frozen.
+        * Stage one: Count the objects on cpu's freelist as free_delta and
+        * remember the last object in freelist_tail for later splicing.
         */
-       while (freelist && (nextfree = get_freepointer(s, freelist))) {
-               void *prior;
-               unsigned long counters;
+       freelist_tail = NULL;
+       freelist_iter = freelist;
+       while (freelist_iter) {
+               nextfree = get_freepointer(s, freelist_iter);
 
                /*
                 * If 'nextfree' is invalid, it is possible that the object at
-                * 'freelist' is already corrupted.  So isolate all objects
-                * starting at 'freelist'.
+                * 'freelist_iter' is already corrupted.  So isolate all objects
+                * starting at 'freelist_iter' by skipping them.
                 */
-               if (freelist_corrupted(s, page, &freelist, nextfree))
+               if (freelist_corrupted(s, page, &freelist_iter, nextfree))
                        break;
 
-               do {
-                       prior = page->freelist;
-                       counters = page->counters;
-                       set_freepointer(s, freelist, prior);
-                       new.counters = counters;
-                       new.inuse--;
-                       VM_BUG_ON(!new.frozen);
-
-               } while (!__cmpxchg_double_slab(s, page,
-                       prior, counters,
-                       freelist, new.counters,
-                       "drain percpu freelist"));
+               freelist_tail = freelist_iter;
+               free_delta++;
 
-               freelist = nextfree;
+               freelist_iter = nextfree;
        }
 
        /*
-        * Stage two: Ensure that the page is unfrozen while the
-        * list presence reflects the actual number of objects
-        * during unfreeze.
+        * Stage two: Unfreeze the page while splicing the per-cpu
+        * freelist to the head of page's freelist.
+        *
+        * Ensure that the page is unfrozen while the list presence
+        * reflects the actual number of objects during unfreeze.
         *
         * We setup the list membership and then perform a cmpxchg
         * with the count. If there is a mismatch then the page
@@ -2217,15 +2226,15 @@ static void deactivate_slab(struct kmem_cache *s, struct page *page,
         */
 redo:
 
-       old.freelist = page->freelist;
-       old.counters = page->counters;
+       old.freelist = READ_ONCE(page->freelist);
+       old.counters = READ_ONCE(page->counters);
        VM_BUG_ON(!old.frozen);
 
        /* Determine target state of the slab */
        new.counters = old.counters;
-       if (freelist) {
-               new.inuse--;
-               set_freepointer(s, freelist, old.freelist);
+       if (freelist_tail) {
+               new.inuse -= free_delta;
+               set_freepointer(s, freelist_tail, old.freelist);
                new.freelist = freelist;
        } else
                new.freelist = old.freelist;
@@ -2672,7 +2681,7 @@ static void *___slab_alloc(struct kmem_cache *s, gfp_t gfpflags, int node,
                 * ignore the node constraint
                 */
                if (unlikely(node != NUMA_NO_NODE &&
-                            !node_state(node, N_NORMAL_MEMORY)))
+                            !node_isset(node, slab_nodes)))
                        node = NUMA_NO_NODE;
                goto new_slab;
        }
@@ -2683,7 +2692,7 @@ redo:
                 * same as above but node_match() being false already
                 * implies node != NUMA_NO_NODE
                 */
-               if (!node_state(node, N_NORMAL_MEMORY)) {
+               if (!node_isset(node, slab_nodes)) {
                        node = NUMA_NO_NODE;
                        goto redo;
                } else {
@@ -2806,7 +2815,7 @@ static __always_inline void maybe_wipe_obj_freeptr(struct kmem_cache *s,
  * Otherwise we can simply pick the next object from the lockless free list.
  */
 static __always_inline void *slab_alloc_node(struct kmem_cache *s,
-               gfp_t gfpflags, int node, unsigned long addr)
+               gfp_t gfpflags, int node, unsigned long addr, size_t orig_size)
 {
        void *object;
        struct kmem_cache_cpu *c;
@@ -2817,6 +2826,11 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
        s = slab_pre_alloc_hook(s, &objcg, 1, gfpflags);
        if (!s)
                return NULL;
+
+       object = kfence_alloc(s, orig_size, gfpflags);
+       if (unlikely(object))
+               goto out;
+
 redo:
        /*
         * Must read kmem_cache cpu data via this cpu ptr. Preemption is
@@ -2889,20 +2903,21 @@ redo:
        if (unlikely(slab_want_init_on_alloc(gfpflags, s)) && object)
                memset(kasan_reset_tag(object), 0, s->object_size);
 
+out:
        slab_post_alloc_hook(s, objcg, gfpflags, 1, &object);
 
        return object;
 }
 
 static __always_inline void *slab_alloc(struct kmem_cache *s,
-               gfp_t gfpflags, unsigned long addr)
+               gfp_t gfpflags, unsigned long addr, size_t orig_size)
 {
-       return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr);
+       return slab_alloc_node(s, gfpflags, NUMA_NO_NODE, addr, orig_size);
 }
 
 void *kmem_cache_alloc(struct kmem_cache *s, gfp_t gfpflags)
 {
-       void *ret = slab_alloc(s, gfpflags, _RET_IP_);
+       void *ret = slab_alloc(s, gfpflags, _RET_IP_, s->object_size);
 
        trace_kmem_cache_alloc(_RET_IP_, ret, s->object_size,
                                s->size, gfpflags);
@@ -2914,7 +2929,7 @@ EXPORT_SYMBOL(kmem_cache_alloc);
 #ifdef CONFIG_TRACING
 void *kmem_cache_alloc_trace(struct kmem_cache *s, gfp_t gfpflags, size_t size)
 {
-       void *ret = slab_alloc(s, gfpflags, _RET_IP_);
+       void *ret = slab_alloc(s, gfpflags, _RET_IP_, size);
        trace_kmalloc(_RET_IP_, ret, size, s->size, gfpflags);
        ret = kasan_kmalloc(s, ret, size, gfpflags);
        return ret;
@@ -2925,7 +2940,7 @@ EXPORT_SYMBOL(kmem_cache_alloc_trace);
 #ifdef CONFIG_NUMA
 void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t gfpflags, int node)
 {
-       void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
+       void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, s->object_size);
 
        trace_kmem_cache_alloc_node(_RET_IP_, ret,
                                    s->object_size, s->size, gfpflags, node);
@@ -2939,7 +2954,7 @@ void *kmem_cache_alloc_node_trace(struct kmem_cache *s,
                                    gfp_t gfpflags,
                                    int node, size_t size)
 {
-       void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_);
+       void *ret = slab_alloc_node(s, gfpflags, node, _RET_IP_, size);
 
        trace_kmalloc_node(_RET_IP_, ret,
                           size, s->size, gfpflags, node);
@@ -2973,6 +2988,9 @@ static void __slab_free(struct kmem_cache *s, struct page *page,
 
        stat(s, FREE_SLOWPATH);
 
+       if (kfence_free(head))
+               return;
+
        if (kmem_cache_debug(s) &&
            !free_debug_processing(s, page, head, tail, cnt, addr))
                return;
@@ -3157,7 +3175,7 @@ void kmem_cache_free(struct kmem_cache *s, void *x)
        if (!s)
                return;
        slab_free(s, virt_to_head_page(x), x, NULL, 1, _RET_IP_);
-       trace_kmem_cache_free(_RET_IP_, x);
+       trace_kmem_cache_free(_RET_IP_, x, s->name);
 }
 EXPORT_SYMBOL(kmem_cache_free);
 
@@ -3217,6 +3235,13 @@ int build_detached_freelist(struct kmem_cache *s, size_t size,
                df->s = cache_from_obj(s, object); /* Support for memcg */
        }
 
+       if (is_kfence_address(object)) {
+               slab_free_hook(df->s, object);
+               __kfence_free(object);
+               p[size] = NULL; /* mark object processed */
+               return size;
+       }
+
        /* Start new detached freelist */
        df->page = page;
        set_freepointer(df->s, object, NULL);
@@ -3266,7 +3291,7 @@ void kmem_cache_free_bulk(struct kmem_cache *s, size_t size, void **p)
                if (!df.page)
                        continue;
 
-               slab_free(df.s, df.page, df.freelist, df.tail, df.cnt,_RET_IP_);
+               slab_free(df.s, df.page, df.freelist, df.tail, df.cnt, _RET_IP_);
        } while (likely(size));
 }
 EXPORT_SYMBOL(kmem_cache_free_bulk);
@@ -3292,8 +3317,14 @@ int kmem_cache_alloc_bulk(struct kmem_cache *s, gfp_t flags, size_t size,
        c = this_cpu_ptr(s->cpu_slab);
 
        for (i = 0; i < size; i++) {
-               void *object = c->freelist;
+               void *object = kfence_alloc(s, s->object_size, flags);
 
+               if (unlikely(object)) {
+                       p[i] = object;
+                       continue;
+               }
+
+               object = c->freelist;
                if (unlikely(!object)) {
                        /*
                         * We may have removed an object from c->freelist using
@@ -3548,8 +3579,7 @@ static void early_kmem_cache_node_alloc(int node)
        init_object(kmem_cache_node, n, SLUB_RED_ACTIVE);
        init_tracking(kmem_cache_node, n);
 #endif
-       n = kasan_kmalloc(kmem_cache_node, n, sizeof(struct kmem_cache_node),
-                     GFP_KERNEL);
+       n = kasan_slab_alloc(kmem_cache_node, n, GFP_KERNEL);
        page->freelist = get_freepointer(kmem_cache_node, n);
        page->inuse = 1;
        page->frozen = 0;
@@ -3586,7 +3616,7 @@ static int init_kmem_cache_nodes(struct kmem_cache *s)
 {
        int node;
 
-       for_each_node_state(node, N_NORMAL_MEMORY) {
+       for_each_node_mask(node, slab_nodes) {
                struct kmem_cache_node *n;
 
                if (slab_state == DOWN) {
@@ -3797,7 +3827,7 @@ static int calculate_sizes(struct kmem_cache *s, int forced_order)
 
 static int kmem_cache_open(struct kmem_cache *s, slab_flags_t flags)
 {
-       s->flags = kmem_cache_flags(s->size, flags, s->name, s->ctor);
+       s->flags = kmem_cache_flags(s->size, flags, s->name);
 #ifdef CONFIG_SLAB_FREELIST_HARDENED
        s->random = get_random_long();
 #endif
@@ -4018,7 +4048,7 @@ void *__kmalloc(size_t size, gfp_t flags)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       ret = slab_alloc(s, flags, _RET_IP_);
+       ret = slab_alloc(s, flags, _RET_IP_, size);
 
        trace_kmalloc(_RET_IP_, ret, size, s->size, flags);
 
@@ -4039,8 +4069,8 @@ static void *kmalloc_large_node(size_t size, gfp_t flags, int node)
        page = alloc_pages_node(node, flags, order);
        if (page) {
                ptr = page_address(page);
-               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
-                                   PAGE_SIZE << order);
+               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+                                     PAGE_SIZE << order);
        }
 
        return kmalloc_large_node_hook(ptr, size, flags);
@@ -4066,7 +4096,7 @@ void *__kmalloc_node(size_t size, gfp_t flags, int node)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       ret = slab_alloc_node(s, flags, node, _RET_IP_);
+       ret = slab_alloc_node(s, flags, node, _RET_IP_, size);
 
        trace_kmalloc_node(_RET_IP_, ret, size, s->size, flags, node);
 
@@ -4092,6 +4122,7 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
        struct kmem_cache *s;
        unsigned int offset;
        size_t object_size;
+       bool is_kfence = is_kfence_address(ptr);
 
        ptr = kasan_reset_tag(ptr);
 
@@ -4104,10 +4135,13 @@ void __check_heap_object(const void *ptr, unsigned long n, struct page *page,
                               to_user, 0, n);
 
        /* Find offset within object. */
-       offset = (ptr - page_address(page)) % s->size;
+       if (is_kfence)
+               offset = ptr - kfence_object_start(ptr);
+       else
+               offset = (ptr - page_address(page)) % s->size;
 
        /* Adjust for redzone and reject if within the redzone. */
-       if (kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
+       if (!is_kfence && kmem_cache_debug_flags(s, SLAB_RED_ZONE)) {
                if (offset < s->red_left_pad)
                        usercopy_abort("SLUB object in left red zone",
                                       s->name, to_user, offset, n);
@@ -4171,8 +4205,8 @@ void kfree(const void *x)
 
                BUG_ON(!PageCompound(page));
                kfree_hook(object);
-               mod_node_page_state(page_pgdat(page), NR_SLAB_UNRECLAIMABLE_B,
-                                   -(PAGE_SIZE << order));
+               mod_lruvec_page_state(page, NR_SLAB_UNRECLAIMABLE_B,
+                                     -(PAGE_SIZE << order));
                __free_pages(page, order);
                return;
        }
@@ -4267,8 +4301,6 @@ static int slab_mem_going_offline_callback(void *arg)
 
 static void slab_mem_offline_callback(void *arg)
 {
-       struct kmem_cache_node *n;
-       struct kmem_cache *s;
        struct memory_notify *marg = arg;
        int offline_node;
 
@@ -4282,21 +4314,12 @@ static void slab_mem_offline_callback(void *arg)
                return;
 
        mutex_lock(&slab_mutex);
-       list_for_each_entry(s, &slab_caches, list) {
-               n = get_node(s, offline_node);
-               if (n) {
-                       /*
-                        * if n->nr_slabs > 0, slabs still exist on the node
-                        * that is going down. We were unable to free them,
-                        * and offline_pages() function shouldn't call this
-                        * callback. So, we must fail.
-                        */
-                       BUG_ON(slabs_node(s, offline_node));
-
-                       s->node[offline_node] = NULL;
-                       kmem_cache_free(kmem_cache_node, n);
-               }
-       }
+       node_clear(offline_node, slab_nodes);
+       /*
+        * We no longer free kmem_cache_node structures here, as it would be
+        * racy with all get_node() users, and infeasible to protect them with
+        * slab_mutex.
+        */
        mutex_unlock(&slab_mutex);
 }
 
@@ -4323,6 +4346,12 @@ static int slab_mem_going_online_callback(void *arg)
        mutex_lock(&slab_mutex);
        list_for_each_entry(s, &slab_caches, list) {
                /*
+                * The structure may already exist if the node was previously
+                * onlined and offlined.
+                */
+               if (get_node(s, nid))
+                       continue;
+               /*
                 * XXX: kmem_cache_alloc_node will fallback to other nodes
                 *      since memory is not yet available from the node that
                 *      is brought up.
@@ -4335,6 +4364,11 @@ static int slab_mem_going_online_callback(void *arg)
                init_kmem_cache_node(n);
                s->node[nid] = n;
        }
+       /*
+        * Any cache created after this point will also have kmem_cache_node
+        * initialized for the new node.
+        */
+       node_set(nid, slab_nodes);
 out:
        mutex_unlock(&slab_mutex);
        return ret;
@@ -4415,6 +4449,7 @@ void __init kmem_cache_init(void)
 {
        static __initdata struct kmem_cache boot_kmem_cache,
                boot_kmem_cache_node;
+       int node;
 
        if (debug_guardpage_minorder())
                slub_max_order = 0;
@@ -4422,6 +4457,13 @@ void __init kmem_cache_init(void)
        kmem_cache_node = &boot_kmem_cache_node;
        kmem_cache = &boot_kmem_cache;
 
+       /*
+        * Initialize the nodemask for which we will allocate per node
+        * structures. Here we don't need taking slab_mutex yet.
+        */
+       for_each_node_state(node, N_NORMAL_MEMORY)
+               node_set(node, slab_nodes);
+
        create_boot_cache(kmem_cache_node, "kmem_cache_node",
                sizeof(struct kmem_cache_node), SLAB_HWCACHE_ALIGN, 0, 0);
 
@@ -4516,7 +4558,7 @@ void *__kmalloc_track_caller(size_t size, gfp_t gfpflags, unsigned long caller)
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       ret = slab_alloc(s, gfpflags, caller);
+       ret = slab_alloc(s, gfpflags, caller, size);
 
        /* Honor the call site pointer we received. */
        trace_kmalloc(caller, ret, size, s->size, gfpflags);
@@ -4547,7 +4589,7 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfpflags,
        if (unlikely(ZERO_OR_NULL_PTR(s)))
                return s;
 
-       ret = slab_alloc_node(s, gfpflags, node, caller);
+       ret = slab_alloc_node(s, gfpflags, node, caller, size);
 
        /* Honor the call site pointer we received. */
        trace_kmalloc_node(caller, ret, size, s->size, gfpflags, node);
@@ -4932,22 +4974,6 @@ enum slab_stat_type {
 #define SO_OBJECTS     (1 << SL_OBJECTS)
 #define SO_TOTAL       (1 << SL_TOTAL)
 
-#ifdef CONFIG_MEMCG
-static bool memcg_sysfs_enabled = IS_ENABLED(CONFIG_SLUB_MEMCG_SYSFS_ON);
-
-static int __init setup_slub_memcg_sysfs(char *str)
-{
-       int v;
-
-       if (get_option(&str, &v) > 0)
-               memcg_sysfs_enabled = v;
-
-       return 1;
-}
-
-__setup("slub_memcg_sysfs=", setup_slub_memcg_sysfs);
-#endif
-
 static ssize_t show_slab_objects(struct kmem_cache *s,
                                 char *buf, unsigned long flags)
 {
index 2cca714..31b844d 100644 (file)
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -83,9 +83,8 @@ static void __page_cache_release(struct page *page)
                unsigned long flags;
 
                lruvec = lock_page_lruvec_irqsave(page, &flags);
-               VM_BUG_ON_PAGE(!PageLRU(page), page);
-               __ClearPageLRU(page);
-               del_page_from_lru_list(page, lruvec, page_off_lru(page));
+               del_page_from_lru_list(page, lruvec);
+               __clear_page_lru_flags(page);
                unlock_page_lruvec_irqrestore(lruvec, flags);
        }
        __ClearPageWaiters(page);
@@ -229,9 +228,9 @@ static void pagevec_lru_move_fn(struct pagevec *pvec,
 static void pagevec_move_tail_fn(struct page *page, struct lruvec *lruvec)
 {
        if (!PageUnevictable(page)) {
-               del_page_from_lru_list(page, lruvec, page_lru(page));
+               del_page_from_lru_list(page, lruvec);
                ClearPageActive(page);
-               add_page_to_lru_list_tail(page, lruvec, page_lru(page));
+               add_page_to_lru_list_tail(page, lruvec);
                __count_vm_events(PGROTATED, thp_nr_pages(page));
        }
 }
@@ -308,13 +307,11 @@ void lru_note_cost_page(struct page *page)
 static void __activate_page(struct page *page, struct lruvec *lruvec)
 {
        if (!PageActive(page) && !PageUnevictable(page)) {
-               int lru = page_lru_base_type(page);
                int nr_pages = thp_nr_pages(page);
 
-               del_page_from_lru_list(page, lruvec, lru);
+               del_page_from_lru_list(page, lruvec);
                SetPageActive(page);
-               lru += LRU_ACTIVE;
-               add_page_to_lru_list(page, lruvec, lru);
+               add_page_to_lru_list(page, lruvec);
                trace_mm_lru_activate(page);
 
                __count_vm_events(PGACTIVATE, nr_pages);
@@ -519,8 +516,7 @@ void lru_cache_add_inactive_or_unevictable(struct page *page,
  */
 static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 {
-       int lru;
-       bool active;
+       bool active = PageActive(page);
        int nr_pages = thp_nr_pages(page);
 
        if (PageUnevictable(page))
@@ -530,10 +526,7 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
        if (page_mapped(page))
                return;
 
-       active = PageActive(page);
-       lru = page_lru_base_type(page);
-
-       del_page_from_lru_list(page, lruvec, lru + active);
+       del_page_from_lru_list(page, lruvec);
        ClearPageActive(page);
        ClearPageReferenced(page);
 
@@ -543,14 +536,14 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
                 * It can make readahead confusing.  But race window
                 * is _really_ small and  it's non-critical problem.
                 */
-               add_page_to_lru_list(page, lruvec, lru);
+               add_page_to_lru_list(page, lruvec);
                SetPageReclaim(page);
        } else {
                /*
                 * The page's writeback ends up during pagevec
                 * We moves tha page into tail of inactive.
                 */
-               add_page_to_lru_list_tail(page, lruvec, lru);
+               add_page_to_lru_list_tail(page, lruvec);
                __count_vm_events(PGROTATED, nr_pages);
        }
 
@@ -564,13 +557,12 @@ static void lru_deactivate_file_fn(struct page *page, struct lruvec *lruvec)
 static void lru_deactivate_fn(struct page *page, struct lruvec *lruvec)
 {
        if (PageActive(page) && !PageUnevictable(page)) {
-               int lru = page_lru_base_type(page);
                int nr_pages = thp_nr_pages(page);
 
-               del_page_from_lru_list(page, lruvec, lru + LRU_ACTIVE);
+               del_page_from_lru_list(page, lruvec);
                ClearPageActive(page);
                ClearPageReferenced(page);
-               add_page_to_lru_list(page, lruvec, lru);
+               add_page_to_lru_list(page, lruvec);
 
                __count_vm_events(PGDEACTIVATE, nr_pages);
                __count_memcg_events(lruvec_memcg(lruvec), PGDEACTIVATE,
@@ -582,11 +574,9 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec)
 {
        if (PageAnon(page) && PageSwapBacked(page) &&
            !PageSwapCache(page) && !PageUnevictable(page)) {
-               bool active = PageActive(page);
                int nr_pages = thp_nr_pages(page);
 
-               del_page_from_lru_list(page, lruvec,
-                                      LRU_INACTIVE_ANON + active);
+               del_page_from_lru_list(page, lruvec);
                ClearPageActive(page);
                ClearPageReferenced(page);
                /*
@@ -595,7 +585,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec)
                 * anonymous pages
                 */
                ClearPageSwapBacked(page);
-               add_page_to_lru_list(page, lruvec, LRU_INACTIVE_FILE);
+               add_page_to_lru_list(page, lruvec);
 
                __count_vm_events(PGLAZYFREE, nr_pages);
                __count_memcg_events(lruvec_memcg(lruvec), PGLAZYFREE,
@@ -918,9 +908,8 @@ void release_pages(struct page **pages, int nr)
                        if (prev_lruvec != lruvec)
                                lock_batch = 0;
 
-                       VM_BUG_ON_PAGE(!PageLRU(page), page);
-                       __ClearPageLRU(page);
-                       del_page_from_lru_list(page, lruvec, page_off_lru(page));
+                       del_page_from_lru_list(page, lruvec);
+                       __clear_page_lru_flags(page);
                }
 
                __ClearPageWaiters(page);
@@ -958,7 +947,6 @@ EXPORT_SYMBOL(__pagevec_release);
 
 static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
 {
-       enum lru_list lru;
        int was_unevictable = TestClearPageUnevictable(page);
        int nr_pages = thp_nr_pages(page);
 
@@ -994,19 +982,17 @@ static void __pagevec_lru_add_fn(struct page *page, struct lruvec *lruvec)
        smp_mb__after_atomic();
 
        if (page_evictable(page)) {
-               lru = page_lru(page);
                if (was_unevictable)
                        __count_vm_events(UNEVICTABLE_PGRESCUED, nr_pages);
        } else {
-               lru = LRU_UNEVICTABLE;
                ClearPageActive(page);
                SetPageUnevictable(page);
                if (!was_unevictable)
                        __count_vm_events(UNEVICTABLE_PGCULLED, nr_pages);
        }
 
-       add_page_to_lru_list(page, lruvec, lru);
-       trace_mm_lru_insertion(page, lru);
+       add_page_to_lru_list(page, lruvec);
+       trace_mm_lru_insertion(page);
 }
 
 /*
@@ -1032,45 +1018,11 @@ void __pagevec_lru_add(struct pagevec *pvec)
 }
 
 /**
- * pagevec_lookup_entries - gang pagecache lookup
- * @pvec:      Where the resulting entries are placed
- * @mapping:   The address_space to search
- * @start:     The starting entry index
- * @nr_entries:        The maximum number of pages
- * @indices:   The cache indices corresponding to the entries in @pvec
- *
- * pagevec_lookup_entries() will search for and return a group of up
- * to @nr_pages pages and shadow entries in the mapping.  All
- * entries are placed in @pvec.  pagevec_lookup_entries() takes a
- * reference against actual pages in @pvec.
- *
- * The search returns a group of mapping-contiguous entries with
- * ascending indexes.  There may be holes in the indices due to
- * not-present entries.
- *
- * Only one subpage of a Transparent Huge Page is returned in one call:
- * allowing truncate_inode_pages_range() to evict the whole THP without
- * cycling through a pagevec of extra references.
- *
- * pagevec_lookup_entries() returns the number of entries which were
- * found.
- */
-unsigned pagevec_lookup_entries(struct pagevec *pvec,
-                               struct address_space *mapping,
-                               pgoff_t start, unsigned nr_entries,
-                               pgoff_t *indices)
-{
-       pvec->nr = find_get_entries(mapping, start, nr_entries,
-                                   pvec->pages, indices);
-       return pagevec_count(pvec);
-}
-
-/**
  * pagevec_remove_exceptionals - pagevec exceptionals pruning
  * @pvec:      The pagevec to prune
  *
- * pagevec_lookup_entries() fills both pages and exceptional radix
- * tree entries into the pagevec.  This function prunes all
+ * find_get_entries() fills both pages and XArray value entries (aka
+ * exceptional entries) into the pagevec.  This function prunes all
  * exceptionals from @pvec without leaving holes, so that it can be
  * passed on to page-only pagevec operations.
  */
index 0357fbe..be9de6d 100644 (file)
@@ -193,8 +193,7 @@ static void drain_slots_cache_cpu(unsigned int cpu, unsigned int type,
                        cache->slots_ret = NULL;
                }
                spin_unlock_irq(&cache->free_lock);
-               if (slots)
-                       kvfree(slots);
+               kvfree(slots);
        }
 }
 
index 751c1ef..3cdee7b 100644 (file)
@@ -68,32 +68,6 @@ static struct {
        unsigned long find_total;
 } swap_cache_info;
 
-unsigned long total_swapcache_pages(void)
-{
-       unsigned int i, j, nr;
-       unsigned long ret = 0;
-       struct address_space *spaces;
-       struct swap_info_struct *si;
-
-       for (i = 0; i < MAX_SWAPFILES; i++) {
-               swp_entry_t entry = swp_entry(i, 1);
-
-               /* Avoid get_swap_device() to warn for bad swap entry */
-               if (!swp_swap_info(entry))
-                       continue;
-               /* Prevent swapoff to free swapper_spaces */
-               si = get_swap_device(entry);
-               if (!si)
-                       continue;
-               nr = nr_swapper_spaces[i];
-               spaces = swapper_spaces[i];
-               for (j = 0; j < nr; j++)
-                       ret += spaces[j].nrpages;
-               put_swap_device(si);
-       }
-       return ret;
-}
-
 static atomic_t swapin_readahead_hits = ATOMIC_INIT(4);
 
 void show_swap_cache_info(void)
@@ -113,11 +87,9 @@ void *get_shadow_from_swap_cache(swp_entry_t entry)
        pgoff_t idx = swp_offset(entry);
        struct page *page;
 
-       page = find_get_entry(address_space, idx);
+       page = xa_load(&address_space->i_pages, idx);
        if (xa_is_value(page))
                return page;
-       if (page)
-               put_page(page);
        return NULL;
 }
 
@@ -163,6 +135,7 @@ int add_to_swap_cache(struct page *page, swp_entry_t entry,
                address_space->nrexceptional -= nr_shadows;
                address_space->nrpages += nr;
                __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, nr);
+               __mod_lruvec_page_state(page, NR_SWAPCACHE, nr);
                ADD_CACHE_INFO(add_total, nr);
 unlock:
                xas_unlock_irq(&xas);
@@ -203,6 +176,7 @@ void __delete_from_swap_cache(struct page *page,
                address_space->nrexceptional += nr;
        address_space->nrpages -= nr;
        __mod_node_page_state(page_pgdat(page), NR_FILE_PAGES, -nr);
+       __mod_lruvec_page_state(page, NR_SWAPCACHE, -nr);
        ADD_CACHE_INFO(del_total, nr);
 }
 
@@ -429,7 +403,8 @@ struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index)
 {
        swp_entry_t swp;
        struct swap_info_struct *si;
-       struct page *page = find_get_entry(mapping, index);
+       struct page *page = pagecache_get_page(mapping, index,
+                                               FGP_ENTRY | FGP_HEAD, 0);
 
        if (!page)
                return page;
@@ -537,7 +512,6 @@ struct page *__read_swap_cache_async(swp_entry_t entry, gfp_t gfp_mask,
                workingset_refault(page, shadow);
 
        /* Caller will initiate read into locked page */
-       SetPageWorkingset(page);
        lru_cache_add(page);
        *new_page_allocated = true;
        return page;
@@ -927,7 +901,7 @@ static struct attribute *swap_attrs[] = {
        NULL,
 };
 
-static struct attribute_group swap_attr_group = {
+static const struct attribute_group swap_attr_group = {
        .attrs = swap_attrs,
 };
 
index 96799a2..084a5b9 100644 (file)
@@ -219,6 +219,19 @@ offset_to_swap_extent(struct swap_info_struct *sis, unsigned long offset)
        BUG();
 }
 
+sector_t swap_page_sector(struct page *page)
+{
+       struct swap_info_struct *sis = page_swap_info(page);
+       struct swap_extent *se;
+       sector_t sector;
+       pgoff_t offset;
+
+       offset = __page_file_index(page);
+       se = offset_to_swap_extent(sis, offset);
+       sector = se->start_block + (offset - se->start_page);
+       return sector << (PAGE_SHIFT - 9);
+}
+
 /*
  * swap allocation tell device that a cluster of swap can now be discarded,
  * to allow the swap device to optimize its wear-levelling.
@@ -1157,13 +1170,13 @@ static struct swap_info_struct *__swap_info_get(swp_entry_t entry)
        return p;
 
 bad_offset:
-       pr_err("swap_info_get: %s%08lx\n", Bad_offset, entry.val);
+       pr_err("%s: %s%08lx\n", __func__, Bad_offset, entry.val);
        goto out;
 bad_device:
-       pr_err("swap_info_get: %s%08lx\n", Unused_file, entry.val);
+       pr_err("%s: %s%08lx\n", __func__, Unused_file, entry.val);
        goto out;
 bad_nofile:
-       pr_err("swap_info_get: %s%08lx\n", Bad_file, entry.val);
+       pr_err("%s: %s%08lx\n", __func__, Bad_file, entry.val);
 out:
        return NULL;
 }
@@ -1180,7 +1193,7 @@ static struct swap_info_struct *_swap_info_get(swp_entry_t entry)
        return p;
 
 bad_free:
-       pr_err("swap_info_get: %s%08lx\n", Unused_offset, entry.val);
+       pr_err("%s: %s%08lx\n", __func__, Unused_offset, entry.val);
 out:
        return NULL;
 }
index 8aa4907..4559442 100644 (file)
@@ -57,11 +57,10 @@ static void clear_shadow_entry(struct address_space *mapping, pgoff_t index,
  * exceptional entries similar to what pagevec_remove_exceptionals does.
  */
 static void truncate_exceptional_pvec_entries(struct address_space *mapping,
-                               struct pagevec *pvec, pgoff_t *indices,
-                               pgoff_t end)
+                               struct pagevec *pvec, pgoff_t *indices)
 {
        int i, j;
-       bool dax, lock;
+       bool dax;
 
        /* Handled by shmem itself */
        if (shmem_mapping(mapping))
@@ -75,8 +74,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
                return;
 
        dax = dax_mapping(mapping);
-       lock = !dax && indices[j] < end;
-       if (lock)
+       if (!dax)
                xa_lock_irq(&mapping->i_pages);
 
        for (i = j; i < pagevec_count(pvec); i++) {
@@ -88,9 +86,6 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
                        continue;
                }
 
-               if (index >= end)
-                       continue;
-
                if (unlikely(dax)) {
                        dax_delete_mapping_entry(mapping, index);
                        continue;
@@ -99,7 +94,7 @@ static void truncate_exceptional_pvec_entries(struct address_space *mapping,
                __clear_shadow_entry(mapping, index, page);
        }
 
-       if (lock)
+       if (!dax)
                xa_unlock_irq(&mapping->i_pages);
        pvec->nr = j;
 }
@@ -326,51 +321,19 @@ void truncate_inode_pages_range(struct address_space *mapping,
 
        pagevec_init(&pvec);
        index = start;
-       while (index < end && pagevec_lookup_entries(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE),
-                       indices)) {
-               /*
-                * Pagevec array has exceptional entries and we may also fail
-                * to lock some pages. So we store pages that can be deleted
-                * in a new pagevec.
-                */
-               struct pagevec locked_pvec;
-
-               pagevec_init(&locked_pvec);
-               for (i = 0; i < pagevec_count(&pvec); i++) {
-                       struct page *page = pvec.pages[i];
-
-                       /* We rely upon deletion not changing page->index */
-                       index = indices[i];
-                       if (index >= end)
-                               break;
-
-                       if (xa_is_value(page))
-                               continue;
-
-                       if (!trylock_page(page))
-                               continue;
-                       WARN_ON(page_to_index(page) != index);
-                       if (PageWriteback(page)) {
-                               unlock_page(page);
-                               continue;
-                       }
-                       if (page->mapping != mapping) {
-                               unlock_page(page);
-                               continue;
-                       }
-                       pagevec_add(&locked_pvec, page);
-               }
-               for (i = 0; i < pagevec_count(&locked_pvec); i++)
-                       truncate_cleanup_page(mapping, locked_pvec.pages[i]);
-               delete_from_page_cache_batch(mapping, &locked_pvec);
-               for (i = 0; i < pagevec_count(&locked_pvec); i++)
-                       unlock_page(locked_pvec.pages[i]);
-               truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
+       while (index < end && find_lock_entries(mapping, index, end - 1,
+                       &pvec, indices)) {
+               index = indices[pagevec_count(&pvec) - 1] + 1;
+               truncate_exceptional_pvec_entries(mapping, &pvec, indices);
+               for (i = 0; i < pagevec_count(&pvec); i++)
+                       truncate_cleanup_page(mapping, pvec.pages[i]);
+               delete_from_page_cache_batch(mapping, &pvec);
+               for (i = 0; i < pagevec_count(&pvec); i++)
+                       unlock_page(pvec.pages[i]);
                pagevec_release(&pvec);
                cond_resched();
-               index++;
        }
+
        if (partial_start) {
                struct page *page = find_lock_page(mapping, start - 1);
                if (page) {
@@ -413,8 +376,8 @@ void truncate_inode_pages_range(struct address_space *mapping,
        index = start;
        for ( ; ; ) {
                cond_resched();
-               if (!pagevec_lookup_entries(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE), indices)) {
+               if (!find_get_entries(mapping, index, end - 1, &pvec,
+                               indices)) {
                        /* If all gone from start onwards, we're done */
                        if (index == start)
                                break;
@@ -422,23 +385,12 @@ void truncate_inode_pages_range(struct address_space *mapping,
                        index = start;
                        continue;
                }
-               if (index == start && indices[0] >= end) {
-                       /* All gone out of hole to be punched, we're done */
-                       pagevec_remove_exceptionals(&pvec);
-                       pagevec_release(&pvec);
-                       break;
-               }
 
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
                        index = indices[i];
-                       if (index >= end) {
-                               /* Restart punch to make sure all gone */
-                               index = start - 1;
-                               break;
-                       }
 
                        if (xa_is_value(page))
                                continue;
@@ -449,7 +401,7 @@ void truncate_inode_pages_range(struct address_space *mapping,
                        truncate_inode_page(mapping, page);
                        unlock_page(page);
                }
-               truncate_exceptional_pvec_entries(mapping, &pvec, indices, end);
+               truncate_exceptional_pvec_entries(mapping, &pvec, indices);
                pagevec_release(&pvec);
                index++;
        }
@@ -539,55 +491,19 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
        int i;
 
        pagevec_init(&pvec);
-       while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-                       indices)) {
+       while (find_lock_entries(mapping, index, end, &pvec, indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
                        index = indices[i];
-                       if (index > end)
-                               break;
 
                        if (xa_is_value(page)) {
                                invalidate_exceptional_entry(mapping, index,
                                                             page);
                                continue;
                        }
-
-                       if (!trylock_page(page))
-                               continue;
-
-                       WARN_ON(page_to_index(page) != index);
-
-                       /* Middle of THP: skip */
-                       if (PageTransTail(page)) {
-                               unlock_page(page);
-                               continue;
-                       } else if (PageTransHuge(page)) {
-                               index += HPAGE_PMD_NR - 1;
-                               i += HPAGE_PMD_NR - 1;
-                               /*
-                                * 'end' is in the middle of THP. Don't
-                                * invalidate the page as the part outside of
-                                * 'end' could be still useful.
-                                */
-                               if (index > end) {
-                                       unlock_page(page);
-                                       continue;
-                               }
-
-                               /* Take a pin outside pagevec */
-                               get_page(page);
-
-                               /*
-                                * Drop extra pins before trying to invalidate
-                                * the huge page.
-                                */
-                               pagevec_remove_exceptionals(&pvec);
-                               pagevec_release(&pvec);
-                       }
+                       index += thp_nr_pages(page) - 1;
 
                        ret = invalidate_inode_page(page);
                        unlock_page(page);
@@ -601,9 +517,6 @@ static unsigned long __invalidate_mapping_pages(struct address_space *mapping,
                                if (nr_pagevec)
                                        (*nr_pagevec)++;
                        }
-
-                       if (PageTransHuge(page))
-                               put_page(page);
                        count += ret;
                }
                pagevec_remove_exceptionals(&pvec);
@@ -725,16 +638,12 @@ int invalidate_inode_pages2_range(struct address_space *mapping,
 
        pagevec_init(&pvec);
        index = start;
-       while (index <= end && pagevec_lookup_entries(&pvec, mapping, index,
-                       min(end - index, (pgoff_t)PAGEVEC_SIZE - 1) + 1,
-                       indices)) {
+       while (find_get_entries(mapping, index, end, &pvec, indices)) {
                for (i = 0; i < pagevec_count(&pvec); i++) {
                        struct page *page = pvec.pages[i];
 
                        /* We rely upon deletion not changing page->index */
                        index = indices[i];
-                       if (index > end)
-                               break;
 
                        if (xa_is_value(page)) {
                                if (!invalidate_exceptional_entry2(mapping,
index b1b574a..562e87c 100644 (file)
@@ -310,7 +310,8 @@ unsigned long zone_reclaimable_pages(struct zone *zone)
  * @lru: lru to use
  * @zone_idx: zones to consider (use MAX_NR_ZONES for the whole LRU list)
  */
-unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru, int zone_idx)
+static unsigned long lruvec_lru_size(struct lruvec *lruvec, enum lru_list lru,
+                                    int zone_idx)
 {
        unsigned long size = 0;
        int zid;
@@ -1539,19 +1540,17 @@ unsigned int reclaim_clean_pages_from_list(struct zone *zone,
  * page:       page to consider
  * mode:       one of the LRU isolation modes defined above
  *
- * returns 0 on success, -ve errno on failure.
+ * returns true on success, false on failure.
  */
-int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
+bool __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
 {
-       int ret = -EBUSY;
-
        /* Only take pages on the LRU. */
        if (!PageLRU(page))
-               return ret;
+               return false;
 
        /* Compaction should not handle unevictable pages but CMA can do so */
        if (PageUnevictable(page) && !(mode & ISOLATE_UNEVICTABLE))
-               return ret;
+               return false;
 
        /*
         * To minimise LRU disruption, the caller can indicate that it only
@@ -1564,7 +1563,7 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
        if (mode & ISOLATE_ASYNC_MIGRATE) {
                /* All the caller can do on PageWriteback is block */
                if (PageWriteback(page))
-                       return ret;
+                       return false;
 
                if (PageDirty(page)) {
                        struct address_space *mapping;
@@ -1580,20 +1579,20 @@ int __isolate_lru_page_prepare(struct page *page, isolate_mode_t mode)
                         * from the page cache.
                         */
                        if (!trylock_page(page))
-                               return ret;
+                               return false;
 
                        mapping = page_mapping(page);
                        migrate_dirty = !mapping || mapping->a_ops->migratepage;
                        unlock_page(page);
                        if (!migrate_dirty)
-                               return ret;
+                               return false;
                }
        }
 
        if ((mode & ISOLATE_UNMAPPED) && page_mapped(page))
-               return ret;
+               return false;
 
-       return 0;
+       return true;
 }
 
 /*
@@ -1677,35 +1676,31 @@ static unsigned long isolate_lru_pages(unsigned long nr_to_scan,
                 * only when the page is being freed somewhere else.
                 */
                scan += nr_pages;
-               switch (__isolate_lru_page_prepare(page, mode)) {
-               case 0:
-                       /*
-                        * Be careful not to clear PageLRU until after we're
-                        * sure the page is not being freed elsewhere -- the
-                        * page release code relies on it.
-                        */
-                       if (unlikely(!get_page_unless_zero(page)))
-                               goto busy;
-
-                       if (!TestClearPageLRU(page)) {
-                               /*
-                                * This page may in other isolation path,
-                                * but we still hold lru_lock.
-                                */
-                               put_page(page);
-                               goto busy;
-                       }
-
-                       nr_taken += nr_pages;
-                       nr_zone_taken[page_zonenum(page)] += nr_pages;
-                       list_move(&page->lru, dst);
-                       break;
+               if (!__isolate_lru_page_prepare(page, mode)) {
+                       /* It is being freed elsewhere */
+                       list_move(&page->lru, src);
+                       continue;
+               }
+               /*
+                * Be careful not to clear PageLRU until after we're
+                * sure the page is not being freed elsewhere -- the
+                * page release code relies on it.
+                */
+               if (unlikely(!get_page_unless_zero(page))) {
+                       list_move(&page->lru, src);
+                       continue;
+               }
 
-               default:
-busy:
-                       /* else it is being freed elsewhere */
+               if (!TestClearPageLRU(page)) {
+                       /* Another thread is already isolating this page */
+                       put_page(page);
                        list_move(&page->lru, src);
+                       continue;
                }
+
+               nr_taken += nr_pages;
+               nr_zone_taken[page_zonenum(page)] += nr_pages;
+               list_move(&page->lru, dst);
        }
 
        /*
@@ -1772,7 +1767,7 @@ int isolate_lru_page(struct page *page)
 
                get_page(page);
                lruvec = lock_page_lruvec_irq(page);
-               del_page_from_lru_list(page, lruvec, page_lru(page));
+               del_page_from_lru_list(page, lruvec);
                unlock_page_lruvec_irq(lruvec);
                ret = 0;
        }
@@ -1829,7 +1824,6 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
        int nr_pages, nr_moved = 0;
        LIST_HEAD(pages_to_free);
        struct page *page;
-       enum lru_list lru;
 
        while (!list_empty(list)) {
                page = lru_to_page(list);
@@ -1856,8 +1850,7 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
                SetPageLRU(page);
 
                if (unlikely(put_page_testzero(page))) {
-                       __ClearPageLRU(page);
-                       __ClearPageActive(page);
+                       __clear_page_lru_flags(page);
 
                        if (unlikely(PageCompound(page))) {
                                spin_unlock_irq(&lruvec->lru_lock);
@@ -1874,11 +1867,8 @@ static unsigned noinline_for_stack move_pages_to_lru(struct lruvec *lruvec,
                 * inhibits memcg migration).
                 */
                VM_BUG_ON_PAGE(!lruvec_holds_page_lru_lock(page, lruvec), page);
-               lru = page_lru(page);
+               add_page_to_lru_list(page, lruvec);
                nr_pages = thp_nr_pages(page);
-
-               update_lru_size(lruvec, lru, page_zonenum(page), nr_pages);
-               list_add(&page->lru, &lruvec->lists[lru]);
                nr_moved += nr_pages;
                if (PageActive(page))
                        workingset_age_nonresident(lruvec, nr_pages);
@@ -4095,8 +4085,13 @@ module_init(kswapd_init)
  */
 int node_reclaim_mode __read_mostly;
 
-#define RECLAIM_WRITE (1<<0)   /* Writeout pages during reclaim */
-#define RECLAIM_UNMAP (1<<1)   /* Unmap pages during reclaim */
+/*
+ * These bit locations are exposed in the vm.zone_reclaim_mode sysctl
+ * ABI.  New bits are OK, but existing bits can never change.
+ */
+#define RECLAIM_ZONE  (1<<0)   /* Run shrink_inactive_list on the zone */
+#define RECLAIM_WRITE (1<<1)   /* Writeout pages during reclaim */
+#define RECLAIM_UNMAP (1<<2)   /* Unmap pages during reclaim */
 
 /*
  * Priority for NODE_RECLAIM. This determines the fraction of pages
@@ -4292,12 +4287,9 @@ void check_move_unevictable_pages(struct pagevec *pvec)
 
                lruvec = relock_page_lruvec_irq(page, lruvec);
                if (page_evictable(page) && PageUnevictable(page)) {
-                       enum lru_list lru = page_lru_base_type(page);
-
-                       VM_BUG_ON_PAGE(PageActive(page), page);
+                       del_page_from_lru_list(page, lruvec);
                        ClearPageUnevictable(page);
-                       del_page_from_lru_list(page, lruvec, LRU_UNEVICTABLE);
-                       add_page_to_lru_list(page, lruvec, lru);
+                       add_page_to_lru_list(page, lruvec);
                        pgrescued += nr_pages;
                }
                SetPageLRU(page);
index f894216..74b2c37 100644 (file)
@@ -342,6 +342,12 @@ void __mod_node_page_state(struct pglist_data *pgdat, enum node_stat_item item,
        long t;
 
        if (vmstat_item_in_bytes(item)) {
+               /*
+                * Only cgroups use subpage accounting right now; at
+                * the global level, these items still change in
+                * multiples of whole pages. Store them as pages
+                * internally to keep the per-cpu counters compact.
+                */
                VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
                delta >>= PAGE_SHIFT;
        }
@@ -551,6 +557,12 @@ static inline void mod_node_state(struct pglist_data *pgdat,
        long o, n, t, z;
 
        if (vmstat_item_in_bytes(item)) {
+               /*
+                * Only cgroups use subpage accounting right now; at
+                * the global level, these items still change in
+                * multiples of whole pages. Store them as pages
+                * internally to keep the per-cpu counters compact.
+                */
                VM_WARN_ON_ONCE(delta & (PAGE_SIZE - 1));
                delta >>= PAGE_SHIFT;
        }
@@ -1215,6 +1227,9 @@ const char * const vmstat_text[] = {
        "nr_shadow_call_stack",
 #endif
        "nr_page_table_pages",
+#ifdef CONFIG_SWAP
+       "nr_swapcached",
+#endif
 
        /* enum writeback_stat_item counters */
        "nr_dirty_threshold",
@@ -1619,8 +1634,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
        if (is_zone_first_populated(pgdat, zone)) {
                seq_printf(m, "\n  per-node stats");
                for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
+                       unsigned long pages = node_page_state_pages(pgdat, i);
+
+                       if (vmstat_item_print_in_thp(i))
+                               pages /= HPAGE_PMD_NR;
                        seq_printf(m, "\n      %-12s %lu", node_stat_name(i),
-                                  node_page_state_pages(pgdat, i));
+                                  pages);
                }
        }
        seq_printf(m,
@@ -1630,14 +1649,16 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat,
                   "\n        high     %lu"
                   "\n        spanned  %lu"
                   "\n        present  %lu"
-                  "\n        managed  %lu",
+                  "\n        managed  %lu"
+                  "\n        cma      %lu",
                   zone_page_state(zone, NR_FREE_PAGES),
                   min_wmark_pages(zone),
                   low_wmark_pages(zone),
                   high_wmark_pages(zone),
                   zone->spanned_pages,
                   zone->present_pages,
-                  zone_managed_pages(zone));
+                  zone_managed_pages(zone),
+                  zone_cma_pages(zone));
 
        seq_printf(m,
                   "\n        protection: (%ld",
@@ -1740,8 +1761,11 @@ static void *vmstat_start(struct seq_file *m, loff_t *pos)
        v += NR_VM_NUMA_STAT_ITEMS;
 #endif
 
-       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++)
+       for (i = 0; i < NR_VM_NODE_STAT_ITEMS; i++) {
                v[i] = global_node_page_state_pages(i);
+               if (vmstat_item_print_in_thp(i))
+                       v[i] /= HPAGE_PMD_NR;
+       }
        v += NR_VM_NODE_STAT_ITEMS;
 
        global_dirty_limits(v + NR_DIRTY_BG_THRESHOLD,
@@ -1882,16 +1906,12 @@ static void vmstat_update(struct work_struct *w)
  */
 static bool need_update(int cpu)
 {
+       pg_data_t *last_pgdat = NULL;
        struct zone *zone;
 
        for_each_populated_zone(zone) {
                struct per_cpu_pageset *p = per_cpu_ptr(zone->pageset, cpu);
-
-               BUILD_BUG_ON(sizeof(p->vm_stat_diff[0]) != 1);
-#ifdef CONFIG_NUMA
-               BUILD_BUG_ON(sizeof(p->vm_numa_stat_diff[0]) != 2);
-#endif
-
+               struct per_cpu_nodestat *n;
                /*
                 * The fast way of checking if there are any vmstat diffs.
                 */
@@ -1903,6 +1923,13 @@ static bool need_update(int cpu)
                               sizeof(p->vm_numa_stat_diff[0])))
                        return true;
 #endif
+               if (last_pgdat == zone->zone_pgdat)
+                       continue;
+               last_pgdat = zone->zone_pgdat;
+               n = per_cpu_ptr(zone->zone_pgdat->per_cpu_nodestats, cpu);
+               if (memchr_inv(n->vm_node_stat_diff, 0, NR_VM_NODE_STAT_ITEMS *
+                              sizeof(n->vm_node_stat_diff[0])))
+                   return true;
        }
        return false;
 }
@@ -1953,6 +1980,8 @@ static void vmstat_shepherd(struct work_struct *w)
 
                if (!delayed_work_pending(dw) && need_update(cpu))
                        queue_delayed_work_on(cpu, mm_percpu_wq, dw, 0);
+
+               cond_resched();
        }
        put_online_cpus();
 
index 10e96de..cd39902 100644 (file)
@@ -263,10 +263,10 @@ void *workingset_eviction(struct page *page, struct mem_cgroup *target_memcg)
        VM_BUG_ON_PAGE(!PageLocked(page), page);
 
        lruvec = mem_cgroup_lruvec(target_memcg, pgdat);
-       workingset_age_nonresident(lruvec, thp_nr_pages(page));
        /* XXX: target_memcg can be NULL, go through lruvec */
        memcgid = mem_cgroup_id(lruvec_memcg(lruvec));
        eviction = atomic_long_read(&lruvec->nonresident_age);
+       workingset_age_nonresident(lruvec, thp_nr_pages(page));
        return pack_shadow(memcgid, pgdat, eviction, PageWorkingset(page));
 }
 
@@ -461,6 +461,8 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
        unsigned long pages;
 
        nodes = list_lru_shrink_count(&shadow_nodes, sc);
+       if (!nodes)
+               return SHRINK_EMPTY;
 
        /*
         * Approximate a reasonable limit for the nodes
@@ -503,9 +505,6 @@ static unsigned long count_shadow_nodes(struct shrinker *shrinker,
 
        max_nodes = pages >> (XA_CHUNK_SHIFT - 3);
 
-       if (!nodes)
-               return SHRINK_EMPTY;
-
        if (nodes <= max_nodes)
                return 0;
        return nodes - max_nodes;
index dacb0d7..9d889ad 100644 (file)
@@ -413,16 +413,10 @@ static struct z3fold_header *init_z3fold_page(struct page *page, bool headless,
        if (!slots)
                return NULL;
 
+       memset(zhdr, 0, sizeof(*zhdr));
        spin_lock_init(&zhdr->page_lock);
        kref_init(&zhdr->refcount);
-       zhdr->first_chunks = 0;
-       zhdr->middle_chunks = 0;
-       zhdr->last_chunks = 0;
-       zhdr->first_num = 0;
-       zhdr->start_middle = 0;
        zhdr->cpu = -1;
-       zhdr->foreign_handles = 0;
-       zhdr->mapped_count = 0;
        zhdr->slots = slots;
        zhdr->pool = pool;
        INIT_LIST_HEAD(&zhdr->buddy);
@@ -541,8 +535,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
        spin_unlock(&pool->stale_lock);
 }
 
-static void __attribute__((__unused__))
-                       release_z3fold_page(struct kref *ref)
+static void release_z3fold_page(struct kref *ref)
 {
        struct z3fold_header *zhdr = container_of(ref, struct z3fold_header,
                                                refcount);
@@ -1353,8 +1346,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
                        page = list_entry(pos, struct page, lru);
 
                        zhdr = page_address(page);
-                       if (test_bit(PAGE_HEADLESS, &page->private))
+                       if (test_bit(PAGE_HEADLESS, &page->private)) {
+                               /*
+                                * For non-headless pages, we wait to do this
+                                * until we have the page lock to avoid racing
+                                * with __z3fold_alloc(). Headless pages don't
+                                * have a lock (and __z3fold_alloc() will never
+                                * see them), but we still need to test and set
+                                * PAGE_CLAIMED to avoid racing with
+                                * z3fold_free(), so just do it now before
+                                * leaving the loop.
+                                */
+                               if (test_and_set_bit(PAGE_CLAIMED, &page->private))
+                                       continue;
+
                                break;
+                       }
 
                        if (kref_get_unless_zero(&zhdr->refcount) == 0) {
                                zhdr = NULL;
@@ -1778,6 +1785,7 @@ static u64 z3fold_zpool_total_size(void *pool)
 
 static struct zpool_driver z3fold_zpool_driver = {
        .type =         "z3fold",
+       .sleep_mapped = true,
        .owner =        THIS_MODULE,
        .create =       z3fold_zpool_create,
        .destroy =      z3fold_zpool_destroy,
index c49966e..7ec5f27 100644 (file)
--- a/mm/zbud.c
+++ b/mm/zbud.c
@@ -203,6 +203,7 @@ static u64 zbud_zpool_total_size(void *pool)
 
 static struct zpool_driver zbud_zpool_driver = {
        .type =         "zbud",
+       .sleep_mapped = true,
        .owner =        THIS_MODULE,
        .create =       zbud_zpool_create,
        .destroy =      zbud_zpool_destroy,
index 3744a2d..5ed7120 100644 (file)
@@ -23,6 +23,7 @@ struct zpool {
        void *pool;
        const struct zpool_ops *ops;
        bool evictable;
+       bool can_sleep_mapped;
 
        struct list_head list;
 };
@@ -183,6 +184,7 @@ struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp,
        zpool->pool = driver->create(name, gfp, ops, zpool);
        zpool->ops = ops;
        zpool->evictable = driver->shrink && ops && ops->evict;
+       zpool->can_sleep_mapped = driver->sleep_mapped;
 
        if (!zpool->pool) {
                pr_err("couldn't create %s pool\n", type);
@@ -393,6 +395,17 @@ bool zpool_evictable(struct zpool *zpool)
        return zpool->evictable;
 }
 
+/**
+ * zpool_can_sleep_mapped - Test if zpool can sleep when do mapped.
+ * @zpool:     The zpool to test
+ *
+ * Returns: true if zpool can sleep; false otherwise.
+ */
+bool zpool_can_sleep_mapped(struct zpool *zpool)
+{
+       return zpool->can_sleep_mapped;
+}
+
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Dan Streetman <ddstreet@ieee.org>");
 MODULE_DESCRIPTION("Common API for compressed memory storage");
index 7289f50..30c358b 100644 (file)
@@ -357,7 +357,7 @@ static void cache_free_handle(struct zs_pool *pool, unsigned long handle)
 
 static struct zspage *cache_alloc_zspage(struct zs_pool *pool, gfp_t flags)
 {
-       return kmem_cache_alloc(pool->zspage_cachep,
+       return kmem_cache_zalloc(pool->zspage_cachep,
                        flags & ~(__GFP_HIGHMEM|__GFP_MOVABLE));
 }
 
@@ -816,7 +816,7 @@ static int get_pages_per_zspage(int class_size)
 
 static struct zspage *get_zspage(struct page *page)
 {
-       struct zspage *zspage = (struct zspage *)page->private;
+       struct zspage *zspage = (struct zspage *)page_private(page);
 
        BUG_ON(zspage->magic != ZSPAGE_MAGIC);
        return zspage;
@@ -1064,7 +1064,6 @@ static struct zspage *alloc_zspage(struct zs_pool *pool,
        if (!zspage)
                return NULL;
 
-       memset(zspage, 0, sizeof(struct zspage));
        zspage->magic = ZSPAGE_MAGIC;
        migrate_lock_init(zspage);
 
@@ -2213,11 +2212,13 @@ static unsigned long zs_can_compact(struct size_class *class)
        return obj_wasted * class->pages_per_zspage;
 }
 
-static void __zs_compact(struct zs_pool *pool, struct size_class *class)
+static unsigned long __zs_compact(struct zs_pool *pool,
+                                 struct size_class *class)
 {
        struct zs_compact_control cc;
        struct zspage *src_zspage;
        struct zspage *dst_zspage = NULL;
+       unsigned long pages_freed = 0;
 
        spin_lock(&class->lock);
        while ((src_zspage = isolate_zspage(class, true))) {
@@ -2247,7 +2248,7 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
                putback_zspage(class, dst_zspage);
                if (putback_zspage(class, src_zspage) == ZS_EMPTY) {
                        free_zspage(pool, class, src_zspage);
-                       pool->stats.pages_compacted += class->pages_per_zspage;
+                       pages_freed += class->pages_per_zspage;
                }
                spin_unlock(&class->lock);
                cond_resched();
@@ -2258,12 +2259,15 @@ static void __zs_compact(struct zs_pool *pool, struct size_class *class)
                putback_zspage(class, src_zspage);
 
        spin_unlock(&class->lock);
+
+       return pages_freed;
 }
 
 unsigned long zs_compact(struct zs_pool *pool)
 {
        int i;
        struct size_class *class;
+       unsigned long pages_freed = 0;
 
        for (i = ZS_SIZE_CLASSES - 1; i >= 0; i--) {
                class = pool->size_class[i];
@@ -2271,10 +2275,11 @@ unsigned long zs_compact(struct zs_pool *pool)
                        continue;
                if (class->index != i)
                        continue;
-               __zs_compact(pool, class);
+               pages_freed += __zs_compact(pool, class);
        }
+       atomic_long_add(pages_freed, &pool->stats.pages_compacted);
 
-       return pool->stats.pages_compacted;
+       return pages_freed;
 }
 EXPORT_SYMBOL_GPL(zs_compact);
 
@@ -2291,13 +2296,12 @@ static unsigned long zs_shrinker_scan(struct shrinker *shrinker,
        struct zs_pool *pool = container_of(shrinker, struct zs_pool,
                        shrinker);
 
-       pages_freed = pool->stats.pages_compacted;
        /*
         * Compact classes and calculate compaction delta.
         * Can run concurrently with a manually triggered
         * (by user) compaction.
         */
-       pages_freed = zs_compact(pool) - pages_freed;
+       pages_freed = zs_compact(pool);
 
        return pages_freed ? pages_freed : SHRINK_STOP;
 }
index 182f6ad..578d9f2 100644 (file)
@@ -935,13 +935,19 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
        struct scatterlist input, output;
        struct crypto_acomp_ctx *acomp_ctx;
 
-       u8 *src;
+       u8 *src, *tmp = NULL;
        unsigned int dlen;
        int ret;
        struct writeback_control wbc = {
                .sync_mode = WB_SYNC_NONE,
        };
 
+       if (!zpool_can_sleep_mapped(pool)) {
+               tmp = kmalloc(PAGE_SIZE, GFP_ATOMIC);
+               if (!tmp)
+                       return -ENOMEM;
+       }
+
        /* extract swpentry from data */
        zhdr = zpool_map_handle(pool, handle, ZPOOL_MM_RO);
        swpentry = zhdr->swpentry; /* here */
@@ -955,6 +961,7 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
                /* entry was invalidated */
                spin_unlock(&tree->lock);
                zpool_unmap_handle(pool, handle);
+               kfree(tmp);
                return 0;
        }
        spin_unlock(&tree->lock);
@@ -979,6 +986,14 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
                dlen = PAGE_SIZE;
                src = (u8 *)zhdr + sizeof(struct zswap_header);
 
+               if (!zpool_can_sleep_mapped(pool)) {
+
+                       memcpy(tmp, src, entry->length);
+                       src = tmp;
+
+                       zpool_unmap_handle(pool, handle);
+               }
+
                mutex_lock(acomp_ctx->mutex);
                sg_init_one(&input, src, entry->length);
                sg_init_table(&output, 1);
@@ -1022,10 +1037,10 @@ static int zswap_writeback_entry(struct zpool *pool, unsigned long handle)
 
        /*
        * if we get here due to ZSWAP_SWAPCACHE_EXIST
-       * a load may happening concurrently
-       * it is safe and okay to not free the entry
+       * a load may be happening concurrently.
+       * it is safe and okay to not free the entry.
        * if we free the entry in the following put
-       * it it either okay to return !0
+       * it is also okay to return !0
        */
 fail:
        spin_lock(&tree->lock);
@@ -1033,7 +1048,11 @@ fail:
        spin_unlock(&tree->lock);
 
 end:
-       zpool_unmap_handle(pool, handle);
+       if (zpool_can_sleep_mapped(pool))
+               zpool_unmap_handle(pool, handle);
+       else
+               kfree(tmp);
+
        return ret;
 }
 
@@ -1235,7 +1254,7 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
        struct zswap_entry *entry;
        struct scatterlist input, output;
        struct crypto_acomp_ctx *acomp_ctx;
-       u8 *src, *dst;
+       u8 *src, *dst, *tmp;
        unsigned int dlen;
        int ret;
 
@@ -1253,15 +1272,33 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
                dst = kmap_atomic(page);
                zswap_fill_page(dst, entry->value);
                kunmap_atomic(dst);
+               ret = 0;
                goto freeentry;
        }
 
+       if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
+
+               tmp = kmalloc(entry->length, GFP_ATOMIC);
+               if (!tmp) {
+                       ret = -ENOMEM;
+                       goto freeentry;
+               }
+       }
+
        /* decompress */
        dlen = PAGE_SIZE;
        src = zpool_map_handle(entry->pool->zpool, entry->handle, ZPOOL_MM_RO);
        if (zpool_evictable(entry->pool->zpool))
                src += sizeof(struct zswap_header);
 
+       if (!zpool_can_sleep_mapped(entry->pool->zpool)) {
+
+               memcpy(tmp, src, entry->length);
+               src = tmp;
+
+               zpool_unmap_handle(entry->pool->zpool, entry->handle);
+       }
+
        acomp_ctx = raw_cpu_ptr(entry->pool->acomp_ctx);
        mutex_lock(acomp_ctx->mutex);
        sg_init_one(&input, src, entry->length);
@@ -1271,7 +1308,11 @@ static int zswap_frontswap_load(unsigned type, pgoff_t offset,
        ret = crypto_wait_req(crypto_acomp_decompress(acomp_ctx->req), &acomp_ctx->wait);
        mutex_unlock(acomp_ctx->mutex);
 
-       zpool_unmap_handle(entry->pool->zpool, entry->handle);
+       if (zpool_can_sleep_mapped(entry->pool->zpool))
+               zpool_unmap_handle(entry->pool->zpool, entry->handle);
+       else
+               kfree(tmp);
+
        BUG_ON(ret);
 
 freeentry:
@@ -1279,7 +1320,7 @@ freeentry:
        zswap_entry_put(tree, entry);
        spin_unlock(&tree->lock);
 
-       return 0;
+       return ret;
 }
 
 /* frees an entry in zswap */
index 4f62f29..0a9019d 100644 (file)
@@ -1623,10 +1623,6 @@ p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
        }
 
        p9_debug(P9_DEBUG_9P, "<<< RREAD count %d\n", count);
-       if (!count) {
-               p9_tag_remove(clnt, req);
-               return 0;
-       }
 
        if (non_zc) {
                int n = copy_to_iter(dataptr, count, to);
index e48f7ac..3ddd66e 100644 (file)
@@ -702,7 +702,6 @@ MODULE_LICENSE("GPL");
 
 MODULE_AUTHOR(BATADV_DRIVER_AUTHOR);
 MODULE_DESCRIPTION(BATADV_DRIVER_DESC);
-MODULE_SUPPORTED_DEVICE(BATADV_DRIVER_DEVICE);
 MODULE_VERSION(BATADV_SOURCE_VERSION);
 MODULE_ALIAS_RTNL_LINK("batadv");
 MODULE_ALIAS_GENL_FAMILY(BATADV_NL_NAME);
index b895038..1e24d9a 100644 (file)
@@ -128,6 +128,8 @@ br_switchdev_fdb_notify(const struct net_bridge_fdb_entry *fdb, int type)
 {
        if (!fdb->dst)
                return;
+       if (test_bit(BR_FDB_LOCAL, &fdb->flags))
+               return;
 
        switch (type) {
        case RTM_DELNEIGH:
index 837bb8a..cce2af1 100644 (file)
@@ -304,8 +304,8 @@ static struct can_dev_rcv_lists *can_dev_rcv_lists_find(struct net *net,
                                                        struct net_device *dev)
 {
        if (dev) {
-               struct can_ml_priv *ml_priv = dev->ml_priv;
-               return &ml_priv->dev_rcv_lists;
+               struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+               return &can_ml->dev_rcv_lists;
        } else {
                return net->can.rx_alldev_list;
        }
@@ -790,25 +790,6 @@ void can_proto_unregister(const struct can_proto *cp)
 }
 EXPORT_SYMBOL(can_proto_unregister);
 
-/* af_can notifier to create/remove CAN netdevice specific structs */
-static int can_notifier(struct notifier_block *nb, unsigned long msg,
-                       void *ptr)
-{
-       struct net_device *dev = netdev_notifier_info_to_dev(ptr);
-
-       if (dev->type != ARPHRD_CAN)
-               return NOTIFY_DONE;
-
-       switch (msg) {
-       case NETDEV_REGISTER:
-               WARN(!dev->ml_priv,
-                    "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
-               break;
-       }
-
-       return NOTIFY_DONE;
-}
-
 static int can_pernet_init(struct net *net)
 {
        spin_lock_init(&net->can.rcvlists_lock);
@@ -876,11 +857,6 @@ static const struct net_proto_family can_family_ops = {
        .owner  = THIS_MODULE,
 };
 
-/* notifier block for netdevice event */
-static struct notifier_block can_netdev_notifier __read_mostly = {
-       .notifier_call = can_notifier,
-};
-
 static struct pernet_operations can_pernet_ops __read_mostly = {
        .init = can_pernet_init,
        .exit = can_pernet_exit,
@@ -911,17 +887,12 @@ static __init int can_init(void)
        err = sock_register(&can_family_ops);
        if (err)
                goto out_sock;
-       err = register_netdevice_notifier(&can_netdev_notifier);
-       if (err)
-               goto out_notifier;
 
        dev_add_pack(&can_packet);
        dev_add_pack(&canfd_packet);
 
        return 0;
 
-out_notifier:
-       sock_unregister(PF_CAN);
 out_sock:
        unregister_pernet_subsys(&can_pernet_ops);
 out_pernet:
@@ -935,7 +906,6 @@ static __exit void can_exit(void)
        /* protocol unregister */
        dev_remove_pack(&canfd_packet);
        dev_remove_pack(&can_packet);
-       unregister_netdevice_notifier(&can_netdev_notifier);
        sock_unregister(PF_CAN);
 
        unregister_pernet_subsys(&can_pernet_ops);
index 3ef7f78..15ea123 100644 (file)
@@ -196,7 +196,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
        nskb->dev = dev;
        can_skb_set_owner(nskb, sk);
        ncf = (struct canfd_frame *)nskb->data;
-       skb_put(nskb, so->ll.mtu);
+       skb_put_zero(nskb, so->ll.mtu);
 
        /* create & send flow control reply */
        ncf->can_id = so->txid;
@@ -215,8 +215,7 @@ static int isotp_send_fc(struct sock *sk, int ae, u8 flowstatus)
        if (ae)
                ncf->data[0] = so->opt.ext_address;
 
-       if (so->ll.mtu == CANFD_MTU)
-               ncf->flags = so->ll.tx_flags;
+       ncf->flags = so->ll.tx_flags;
 
        can_send_ret = can_send(nskb, 1);
        if (can_send_ret)
@@ -780,7 +779,7 @@ isotp_tx_burst:
                can_skb_prv(skb)->skbcnt = 0;
 
                cf = (struct canfd_frame *)skb->data;
-               skb_put(skb, so->ll.mtu);
+               skb_put_zero(skb, so->ll.mtu);
 
                /* create consecutive frame */
                isotp_fill_dataframe(cf, so, ae, 0);
@@ -790,8 +789,7 @@ isotp_tx_burst:
                so->tx.sn %= 16;
                so->tx.bs++;
 
-               if (so->ll.mtu == CANFD_MTU)
-                       cf->flags = so->ll.tx_flags;
+               cf->flags = so->ll.tx_flags;
 
                skb->dev = dev;
                can_skb_set_owner(skb, sk);
@@ -897,7 +895,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        so->tx.idx = 0;
 
        cf = (struct canfd_frame *)skb->data;
-       skb_put(skb, so->ll.mtu);
+       skb_put_zero(skb, so->ll.mtu);
 
        /* check for single frame transmission depending on TX_DL */
        if (size <= so->tx.ll_dl - SF_PCI_SZ4 - ae - off) {
@@ -939,8 +937,7 @@ static int isotp_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
        }
 
        /* send the first or only CAN frame */
-       if (so->ll.mtu == CANFD_MTU)
-               cf->flags = so->ll.tx_flags;
+       cf->flags = so->ll.tx_flags;
 
        skb->dev = dev;
        skb->sk = sk;
@@ -1228,7 +1225,8 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname,
                        if (ll.mtu != CAN_MTU && ll.mtu != CANFD_MTU)
                                return -EINVAL;
 
-                       if (ll.mtu == CAN_MTU && ll.tx_dl > CAN_MAX_DLEN)
+                       if (ll.mtu == CAN_MTU &&
+                           (ll.tx_dl > CAN_MAX_DLEN || ll.tx_flags != 0))
                                return -EINVAL;
 
                        memcpy(&so->ll, &ll, sizeof(ll));
index bb914d8..da3a7a7 100644 (file)
@@ -140,9 +140,9 @@ static struct j1939_priv *j1939_priv_create(struct net_device *ndev)
 static inline void j1939_priv_set(struct net_device *ndev,
                                  struct j1939_priv *priv)
 {
-       struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+       struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
 
-       can_ml_priv->j1939_priv = priv;
+       can_ml->j1939_priv = priv;
 }
 
 static void __j1939_priv_release(struct kref *kref)
@@ -211,12 +211,9 @@ static void __j1939_rx_release(struct kref *kref)
 /* get pointer to priv without increasing ref counter */
 static inline struct j1939_priv *j1939_ndev_to_priv(struct net_device *ndev)
 {
-       struct can_ml_priv *can_ml_priv = ndev->ml_priv;
+       struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
 
-       if (!can_ml_priv)
-               return NULL;
-
-       return can_ml_priv->j1939_priv;
+       return can_ml->j1939_priv;
 }
 
 static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
@@ -225,9 +222,6 @@ static struct j1939_priv *j1939_priv_get_by_ndev_locked(struct net_device *ndev)
 
        lockdep_assert_held(&j1939_netdev_lock);
 
-       if (ndev->type != ARPHRD_CAN)
-               return NULL;
-
        priv = j1939_ndev_to_priv(ndev);
        if (priv)
                j1939_priv_get(priv);
@@ -348,15 +342,16 @@ static int j1939_netdev_notify(struct notifier_block *nb,
                               unsigned long msg, void *data)
 {
        struct net_device *ndev = netdev_notifier_info_to_dev(data);
+       struct can_ml_priv *can_ml = can_get_ml_priv(ndev);
        struct j1939_priv *priv;
 
+       if (!can_ml)
+               goto notify_done;
+
        priv = j1939_priv_get_by_ndev(ndev);
        if (!priv)
                goto notify_done;
 
-       if (ndev->type != ARPHRD_CAN)
-               goto notify_put;
-
        switch (msg) {
        case NETDEV_DOWN:
                j1939_cancel_active_session(priv, NULL);
@@ -365,7 +360,6 @@ static int j1939_netdev_notify(struct notifier_block *nb,
                break;
        }
 
-notify_put:
        j1939_priv_put(priv);
 
 notify_done:
index f239665..56aa661 100644 (file)
@@ -12,6 +12,7 @@
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
+#include <linux/can/can-ml.h>
 #include <linux/can/core.h>
 #include <linux/can/skb.h>
 #include <linux/errqueue.h>
@@ -453,6 +454,7 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                j1939_jsk_del(priv, jsk);
                j1939_local_ecu_put(priv, jsk->addr.src_name, jsk->addr.sa);
        } else {
+               struct can_ml_priv *can_ml;
                struct net_device *ndev;
 
                ndev = dev_get_by_index(net, addr->can_ifindex);
@@ -461,15 +463,8 @@ static int j1939_sk_bind(struct socket *sock, struct sockaddr *uaddr, int len)
                        goto out_release_sock;
                }
 
-               if (ndev->type != ARPHRD_CAN) {
-                       dev_put(ndev);
-                       ret = -ENODEV;
-                       goto out_release_sock;
-               }
-
-               if (!ndev->ml_priv) {
-                       netdev_warn_once(ndev,
-                                        "No CAN mid layer private allocated, please fix your driver and use alloc_candev()!\n");
+               can_ml = can_get_ml_priv(ndev);
+               if (!can_ml) {
                        dev_put(ndev);
                        ret = -ENODEV;
                        goto out_release_sock;
index 5ea8695..b15760b 100644 (file)
@@ -322,8 +322,11 @@ static int can_rcvlist_proc_show(struct seq_file *m, void *v)
 
        /* receive list for registered CAN devices */
        for_each_netdev_rcu(net, dev) {
-               if (dev->type == ARPHRD_CAN && dev->ml_priv)
-                       can_rcvlist_proc_show_one(m, idx, dev, dev->ml_priv);
+               struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+               if (can_ml)
+                       can_rcvlist_proc_show_one(m, idx, dev,
+                                                 &can_ml->dev_rcv_lists);
        }
 
        rcu_read_unlock();
@@ -375,8 +378,10 @@ static int can_rcvlist_sff_proc_show(struct seq_file *m, void *v)
 
        /* sff receive list for registered CAN devices */
        for_each_netdev_rcu(net, dev) {
-               if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-                       dev_rcv_lists = dev->ml_priv;
+               struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+               if (can_ml) {
+                       dev_rcv_lists = &can_ml->dev_rcv_lists;
                        can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_sff,
                                                    ARRAY_SIZE(dev_rcv_lists->rx_sff));
                }
@@ -406,8 +411,10 @@ static int can_rcvlist_eff_proc_show(struct seq_file *m, void *v)
 
        /* eff receive list for registered CAN devices */
        for_each_netdev_rcu(net, dev) {
-               if (dev->type == ARPHRD_CAN && dev->ml_priv) {
-                       dev_rcv_lists = dev->ml_priv;
+               struct can_ml_priv *can_ml = can_get_ml_priv(dev);
+
+               if (can_ml) {
+                       dev_rcv_lists = &can_ml->dev_rcv_lists;
                        can_rcvlist_proc_show_array(m, dev, dev_rcv_lists->rx_eff,
                                                    ARRAY_SIZE(dev_rcv_lists->rx_eff));
                }
index 6c5967e..0f72ff5 100644 (file)
@@ -1184,6 +1184,18 @@ static int __dev_alloc_name(struct net *net, const char *name, char *buf)
                        return -ENOMEM;
 
                for_each_netdev(net, d) {
+                       struct netdev_name_node *name_node;
+                       list_for_each_entry(name_node, &d->name_node->list, list) {
+                               if (!sscanf(name_node->name, name, &i))
+                                       continue;
+                               if (i < 0 || i >= max_netdevices)
+                                       continue;
+
+                               /*  avoid cases where sscanf is not exact inverse of printf */
+                               snprintf(buf, IFNAMSIZ, name, i);
+                               if (!strncmp(buf, name_node->name, IFNAMSIZ))
+                                       set_bit(i, inuse);
+                       }
                        if (!sscanf(d->name, name, &i))
                                continue;
                        if (i < 0 || i >= max_netdevices)
@@ -4294,6 +4306,13 @@ static inline void ____napi_schedule(struct softnet_data *sd,
                 */
                thread = READ_ONCE(napi->thread);
                if (thread) {
+                       /* Avoid doing set_bit() if the thread is in
+                        * INTERRUPTIBLE state, cause napi_thread_wait()
+                        * makes sure to proceed with napi polling
+                        * if the thread is explicitly woken from here.
+                        */
+                       if (READ_ONCE(thread->state) != TASK_INTERRUPTIBLE)
+                               set_bit(NAPI_STATE_SCHED_THREADED, &napi->state);
                        wake_up_process(thread);
                        return;
                }
@@ -6486,6 +6505,7 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
                WARN_ON_ONCE(!(val & NAPIF_STATE_SCHED));
 
                new = val & ~(NAPIF_STATE_MISSED | NAPIF_STATE_SCHED |
+                             NAPIF_STATE_SCHED_THREADED |
                              NAPIF_STATE_PREFER_BUSY_POLL);
 
                /* If STATE_MISSED was set, leave STATE_SCHED set,
@@ -6968,16 +6988,25 @@ static int napi_poll(struct napi_struct *n, struct list_head *repoll)
 
 static int napi_thread_wait(struct napi_struct *napi)
 {
+       bool woken = false;
+
        set_current_state(TASK_INTERRUPTIBLE);
 
        while (!kthread_should_stop() && !napi_disable_pending(napi)) {
-               if (test_bit(NAPI_STATE_SCHED, &napi->state)) {
+               /* Testing SCHED_THREADED bit here to make sure the current
+                * kthread owns this napi and could poll on this napi.
+                * Testing SCHED bit is not enough because SCHED bit might be
+                * set by some other busy poll thread or by napi_disable().
+                */
+               if (test_bit(NAPI_STATE_SCHED_THREADED, &napi->state) || woken) {
                        WARN_ON(!list_empty(&napi->poll_list));
                        __set_current_state(TASK_RUNNING);
                        return 0;
                }
 
                schedule();
+               /* woken being true indicates this thread owns this napi. */
+               woken = true;
                set_current_state(TASK_INTERRUPTIBLE);
        }
        __set_current_state(TASK_RUNNING);
@@ -11346,7 +11375,7 @@ static void __net_exit default_device_exit(struct net *net)
                        continue;
 
                /* Leave virtual devices for the generic cleanup */
-               if (dev->rtnl_link_ops)
+               if (dev->rtnl_link_ops && !dev->rtnl_link_ops->netns_refund)
                        continue;
 
                /* Push remaining network devices to init_net */
index 571f191..db65ce6 100644 (file)
@@ -1053,6 +1053,20 @@ static int net_dm_hw_monitor_start(struct netlink_ext_ack *extack)
        return 0;
 
 err_module_put:
+       for_each_possible_cpu(cpu) {
+               struct per_cpu_dm_data *hw_data = &per_cpu(dm_hw_cpu_data, cpu);
+               struct sk_buff *skb;
+
+               del_timer_sync(&hw_data->send_timer);
+               cancel_work_sync(&hw_data->dm_alert_work);
+               while ((skb = __skb_dequeue(&hw_data->drop_queue))) {
+                       struct devlink_trap_metadata *hw_metadata;
+
+                       hw_metadata = NET_DM_SKB_CB(skb)->hw_metadata;
+                       net_dm_hw_metadata_free(hw_metadata);
+                       consume_skb(skb);
+               }
+       }
        module_put(THIS_MODULE);
        return rc;
 }
@@ -1134,6 +1148,15 @@ static int net_dm_trace_on_set(struct netlink_ext_ack *extack)
 err_unregister_trace:
        unregister_trace_kfree_skb(ops->kfree_skb_probe, NULL);
 err_module_put:
+       for_each_possible_cpu(cpu) {
+               struct per_cpu_dm_data *data = &per_cpu(dm_cpu_data, cpu);
+               struct sk_buff *skb;
+
+               del_timer_sync(&data->send_timer);
+               cancel_work_sync(&data->dm_alert_work);
+               while ((skb = __skb_dequeue(&data->drop_queue)))
+                       consume_skb(skb);
+       }
        module_put(THIS_MODULE);
        return rc;
 }
index 0c01bd8..fb3bcba 100644 (file)
@@ -237,37 +237,62 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 }
 EXPORT_SYMBOL(__dst_destroy_metrics_generic);
 
-static struct dst_ops md_dst_ops = {
-       .family =               AF_UNSPEC,
-};
+struct dst_entry *dst_blackhole_check(struct dst_entry *dst, u32 cookie)
+{
+       return NULL;
+}
 
-static int dst_md_discard_out(struct net *net, struct sock *sk, struct sk_buff *skb)
+u32 *dst_blackhole_cow_metrics(struct dst_entry *dst, unsigned long old)
 {
-       WARN_ONCE(1, "Attempting to call output on metadata dst\n");
-       kfree_skb(skb);
-       return 0;
+       return NULL;
 }
 
-static int dst_md_discard(struct sk_buff *skb)
+struct neighbour *dst_blackhole_neigh_lookup(const struct dst_entry *dst,
+                                            struct sk_buff *skb,
+                                            const void *daddr)
 {
-       WARN_ONCE(1, "Attempting to call input on metadata dst\n");
-       kfree_skb(skb);
-       return 0;
+       return NULL;
+}
+
+void dst_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
+                              struct sk_buff *skb, u32 mtu,
+                              bool confirm_neigh)
+{
+}
+EXPORT_SYMBOL_GPL(dst_blackhole_update_pmtu);
+
+void dst_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
+                           struct sk_buff *skb)
+{
+}
+EXPORT_SYMBOL_GPL(dst_blackhole_redirect);
+
+unsigned int dst_blackhole_mtu(const struct dst_entry *dst)
+{
+       unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
+
+       return mtu ? : dst->dev->mtu;
 }
+EXPORT_SYMBOL_GPL(dst_blackhole_mtu);
+
+static struct dst_ops dst_blackhole_ops = {
+       .family         = AF_UNSPEC,
+       .neigh_lookup   = dst_blackhole_neigh_lookup,
+       .check          = dst_blackhole_check,
+       .cow_metrics    = dst_blackhole_cow_metrics,
+       .update_pmtu    = dst_blackhole_update_pmtu,
+       .redirect       = dst_blackhole_redirect,
+       .mtu            = dst_blackhole_mtu,
+};
 
 static void __metadata_dst_init(struct metadata_dst *md_dst,
                                enum metadata_type type, u8 optslen)
-
 {
        struct dst_entry *dst;
 
        dst = &md_dst->dst;
-       dst_init(dst, &md_dst_ops, NULL, 1, DST_OBSOLETE_NONE,
+       dst_init(dst, &dst_blackhole_ops, NULL, 1, DST_OBSOLETE_NONE,
                 DST_METADATA | DST_NOCOUNT);
-
-       dst->input = dst_md_discard;
-       dst->output = dst_md_discard_out;
-
        memset(dst + 1, 0, sizeof(*md_dst) + optslen - sizeof(*dst));
        md_dst->type = type;
 }
index adfdad2..9323d34 100644 (file)
@@ -5658,7 +5658,7 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
        if (unlikely(flags & ~(BPF_MTU_CHK_SEGS)))
                return -EINVAL;
 
-       if (unlikely(flags & BPF_MTU_CHK_SEGS && len_diff))
+       if (unlikely(flags & BPF_MTU_CHK_SEGS && (len_diff || *mtu_len)))
                return -EINVAL;
 
        dev = __dev_via_ifindex(dev, ifindex);
@@ -5668,7 +5668,11 @@ BPF_CALL_5(bpf_skb_check_mtu, struct sk_buff *, skb,
        mtu = READ_ONCE(dev->mtu);
 
        dev_len = mtu + dev->hard_header_len;
-       skb_len = skb->len + len_diff; /* minus result pass check */
+
+       /* If set use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
+       skb_len = *mtu_len ? *mtu_len + dev->hard_header_len : skb->len;
+
+       skb_len += len_diff; /* minus result pass check */
        if (skb_len <= dev_len) {
                ret = BPF_MTU_CHK_RET_SUCCESS;
                goto out;
@@ -5713,6 +5717,10 @@ BPF_CALL_5(bpf_xdp_check_mtu, struct xdp_buff *, xdp,
        /* Add L2-header as dev MTU is L3 size */
        dev_len = mtu + dev->hard_header_len;
 
+       /* Use *mtu_len as input, L3 as iph->tot_len (like fib_lookup) */
+       if (*mtu_len)
+               xdp_len = *mtu_len + dev->hard_header_len;
+
        xdp_len += len_diff; /* minus result pass check */
        if (xdp_len > dev_len)
                ret = BPF_MTU_CHK_RET_FRAG_NEEDED;
index 2ef2224..a96a4f5 100644 (file)
@@ -176,7 +176,7 @@ void skb_flow_get_icmp_tci(const struct sk_buff *skb,
         * avoid confusion with packets without such field
         */
        if (icmp_has_id(ih->type))
-               key_icmp->id = ih->un.echo.id ? : 1;
+               key_icmp->id = ih->un.echo.id ? ntohs(ih->un.echo.id) : 1;
        else
                key_icmp->id = 0;
 }
index 0edc0b2..1bdcb33 100644 (file)
@@ -2147,7 +2147,7 @@ out:
 out_err:
        cb->args[1] = idx;
        cb->args[0] = h;
-       cb->seq = net->dev_base_seq;
+       cb->seq = tgt_net->dev_base_seq;
        nl_dump_check_consistent(cb, nlmsg_hdr(skb));
        if (netnsid >= 0)
                put_net(tgt_net);
index 545a472..c421c8f 100644 (file)
@@ -3659,6 +3659,8 @@ unsigned int skb_find_text(struct sk_buff *skb, unsigned int from,
        struct ts_state state;
        unsigned int ret;
 
+       BUILD_BUG_ON(sizeof(struct skb_seq_state) > sizeof(state.cb));
+
        config->get_next_block = skb_ts_get_next_block;
        config->finish = skb_ts_finish;
 
index 0ed98f2..cc31b60 100644 (file)
@@ -3440,6 +3440,32 @@ static void tw_prot_cleanup(struct timewait_sock_ops *twsk_prot)
        twsk_prot->twsk_slab = NULL;
 }
 
+static int tw_prot_init(const struct proto *prot)
+{
+       struct timewait_sock_ops *twsk_prot = prot->twsk_prot;
+
+       if (!twsk_prot)
+               return 0;
+
+       twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s",
+                                             prot->name);
+       if (!twsk_prot->twsk_slab_name)
+               return -ENOMEM;
+
+       twsk_prot->twsk_slab =
+               kmem_cache_create(twsk_prot->twsk_slab_name,
+                                 twsk_prot->twsk_obj_size, 0,
+                                 SLAB_ACCOUNT | prot->slab_flags,
+                                 NULL);
+       if (!twsk_prot->twsk_slab) {
+               pr_crit("%s: Can't create timewait sock SLAB cache!\n",
+                       prot->name);
+               return -ENOMEM;
+       }
+
+       return 0;
+}
+
 static void req_prot_cleanup(struct request_sock_ops *rsk_prot)
 {
        if (!rsk_prot)
@@ -3496,22 +3522,8 @@ int proto_register(struct proto *prot, int alloc_slab)
                if (req_prot_init(prot))
                        goto out_free_request_sock_slab;
 
-               if (prot->twsk_prot != NULL) {
-                       prot->twsk_prot->twsk_slab_name = kasprintf(GFP_KERNEL, "tw_sock_%s", prot->name);
-
-                       if (prot->twsk_prot->twsk_slab_name == NULL)
-                               goto out_free_request_sock_slab;
-
-                       prot->twsk_prot->twsk_slab =
-                               kmem_cache_create(prot->twsk_prot->twsk_slab_name,
-                                                 prot->twsk_prot->twsk_obj_size,
-                                                 0,
-                                                 SLAB_ACCOUNT |
-                                                 prot->slab_flags,
-                                                 NULL);
-                       if (prot->twsk_prot->twsk_slab == NULL)
-                               goto out_free_timewait_sock_slab;
-               }
+               if (tw_prot_init(prot))
+                       goto out_free_timewait_sock_slab;
        }
 
        mutex_lock(&proto_list_mutex);
index 1f73603..2be5c69 100644 (file)
@@ -319,6 +319,11 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        if (!ipv6_unicast_destination(skb))
                return 0;       /* discard, don't send a reset here */
 
+       if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+               __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+               return 0;
+       }
+
        if (dccp_bad_service_code(sk, service)) {
                dcb->dccpd_reset_code = DCCP_RESET_CODE_BAD_SERVICE_CODE;
                goto drop;
index a45572c..58b8fc8 100644 (file)
@@ -9,6 +9,7 @@ menuconfig NET_DSA
        tristate "Distributed Switch Architecture"
        depends on HAVE_NET_DSA
        depends on BRIDGE || BRIDGE=n
+       depends on HSR || HSR=n
        select GRO_CELLS
        select NET_SWITCHDEV
        select PHYLINK
@@ -117,6 +118,8 @@ config NET_DSA_TAG_OCELOT
 
 config NET_DSA_TAG_OCELOT_8021Q
        tristate "Tag driver for Ocelot family of switches, using VLAN"
+       depends on MSCC_OCELOT_SWITCH_LIB || \
+                 (MSCC_OCELOT_SWITCH_LIB=n && COMPILE_TEST)
        select NET_DSA_TAG_8021Q
        help
          Say Y or M if you want to enable support for tagging frames with a
index 4d4956e..d142eb2 100644 (file)
@@ -1066,6 +1066,7 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
 {
        struct dsa_switch *ds = dp->ds;
        struct dsa_switch_tree *dst = ds->dst;
+       const struct dsa_device_ops *tag_ops;
        enum dsa_tag_protocol tag_protocol;
 
        tag_protocol = dsa_get_tag_protocol(dp, master);
@@ -1080,14 +1081,16 @@ static int dsa_port_parse_cpu(struct dsa_port *dp, struct net_device *master)
                 * nothing to do here.
                 */
        } else {
-               dst->tag_ops = dsa_tag_driver_get(tag_protocol);
-               if (IS_ERR(dst->tag_ops)) {
-                       if (PTR_ERR(dst->tag_ops) == -ENOPROTOOPT)
+               tag_ops = dsa_tag_driver_get(tag_protocol);
+               if (IS_ERR(tag_ops)) {
+                       if (PTR_ERR(tag_ops) == -ENOPROTOOPT)
                                return -EPROBE_DEFER;
                        dev_warn(ds->dev, "No tagger for this switch\n");
                        dp->master = NULL;
-                       return PTR_ERR(dst->tag_ops);
+                       return PTR_ERR(tag_ops);
                }
+
+               dst->tag_ops = tag_ops;
        }
 
        dp->master = master;
index 2eeaa42..9d4b0e9 100644 (file)
@@ -230,8 +230,8 @@ int dsa_port_hsr_join(struct dsa_port *dp, struct net_device *hsr);
 void dsa_port_hsr_leave(struct dsa_port *dp, struct net_device *hsr);
 extern const struct phylink_mac_ops dsa_port_phylink_mac_ops;
 
-static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
-                                           struct net_device *dev)
+static inline bool dsa_port_offloads_bridge_port(struct dsa_port *dp,
+                                                struct net_device *dev)
 {
        /* Switchdev offloading can be configured on: */
 
@@ -241,12 +241,6 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
                 */
                return true;
 
-       if (dp->bridge_dev == dev)
-               /* DSA ports connected to a bridge, and event was emitted
-                * for the bridge.
-                */
-               return true;
-
        if (dp->lag_dev == dev)
                /* DSA ports connected to a bridge via a LAG */
                return true;
@@ -254,14 +248,23 @@ static inline bool dsa_port_offloads_netdev(struct dsa_port *dp,
        return false;
 }
 
+static inline bool dsa_port_offloads_bridge(struct dsa_port *dp,
+                                           struct net_device *bridge_dev)
+{
+       /* DSA ports connected to a bridge, and event was emitted
+        * for the bridge.
+        */
+       return dp->bridge_dev == bridge_dev;
+}
+
 /* Returns true if any port of this tree offloads the given net_device */
-static inline bool dsa_tree_offloads_netdev(struct dsa_switch_tree *dst,
-                                           struct net_device *dev)
+static inline bool dsa_tree_offloads_bridge_port(struct dsa_switch_tree *dst,
+                                                struct net_device *dev)
 {
        struct dsa_port *dp;
 
        list_for_each_entry(dp, &dst->ports, list)
-               if (dsa_port_offloads_netdev(dp, dev))
+               if (dsa_port_offloads_bridge_port(dp, dev))
                        return true;
 
        return false;
index 491e376..992fcab 100644 (file)
@@ -278,28 +278,43 @@ static int dsa_slave_port_attr_set(struct net_device *dev,
        struct dsa_port *dp = dsa_slave_to_port(dev);
        int ret;
 
-       if (!dsa_port_offloads_netdev(dp, attr->orig_dev))
-               return -EOPNOTSUPP;
-
        switch (attr->id) {
        case SWITCHDEV_ATTR_ID_PORT_STP_STATE:
+               if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+                       return -EOPNOTSUPP;
+
                ret = dsa_port_set_state(dp, attr->u.stp_state);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING:
+               if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+                       return -EOPNOTSUPP;
+
                ret = dsa_port_vlan_filtering(dp, attr->u.vlan_filtering,
                                              extack);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_AGEING_TIME:
+               if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+                       return -EOPNOTSUPP;
+
                ret = dsa_port_ageing_time(dp, attr->u.ageing_time);
                break;
        case SWITCHDEV_ATTR_ID_PORT_PRE_BRIDGE_FLAGS:
+               if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+                       return -EOPNOTSUPP;
+
                ret = dsa_port_pre_bridge_flags(dp, attr->u.brport_flags,
                                                extack);
                break;
        case SWITCHDEV_ATTR_ID_PORT_BRIDGE_FLAGS:
+               if (!dsa_port_offloads_bridge_port(dp, attr->orig_dev))
+                       return -EOPNOTSUPP;
+
                ret = dsa_port_bridge_flags(dp, attr->u.brport_flags, extack);
                break;
        case SWITCHDEV_ATTR_ID_BRIDGE_MROUTER:
+               if (!dsa_port_offloads_bridge(dp, attr->orig_dev))
+                       return -EOPNOTSUPP;
+
                ret = dsa_port_mrouter(dp->cpu_dp, attr->u.mrouter, extack);
                break;
        default:
@@ -341,9 +356,6 @@ static int dsa_slave_vlan_add(struct net_device *dev,
        struct switchdev_obj_port_vlan vlan;
        int err;
 
-       if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
-               return -EOPNOTSUPP;
-
        if (dsa_port_skip_vlan_configuration(dp)) {
                NL_SET_ERR_MSG_MOD(extack, "skipping configuration of VLAN");
                return 0;
@@ -391,27 +403,36 @@ static int dsa_slave_port_obj_add(struct net_device *dev,
 
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_MDB:
-               if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+               if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
                        return -EOPNOTSUPP;
+
                err = dsa_port_mdb_add(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
                break;
        case SWITCHDEV_OBJ_ID_HOST_MDB:
+               if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
+                       return -EOPNOTSUPP;
+
                /* DSA can directly translate this to a normal MDB add,
                 * but on the CPU port.
                 */
                err = dsa_port_mdb_add(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
                break;
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+                       return -EOPNOTSUPP;
+
                err = dsa_slave_vlan_add(dev, obj, extack);
                break;
        case SWITCHDEV_OBJ_ID_MRP:
-               if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+               if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
                        return -EOPNOTSUPP;
+
                err = dsa_port_mrp_add(dp, SWITCHDEV_OBJ_MRP(obj));
                break;
        case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
-               if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+               if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
                        return -EOPNOTSUPP;
+
                err = dsa_port_mrp_add_ring_role(dp,
                                                 SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
                break;
@@ -431,9 +452,6 @@ static int dsa_slave_vlan_del(struct net_device *dev,
        struct switchdev_obj_port_vlan *vlan;
        int err;
 
-       if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
-               return -EOPNOTSUPP;
-
        if (dsa_port_skip_vlan_configuration(dp))
                return 0;
 
@@ -459,27 +477,36 @@ static int dsa_slave_port_obj_del(struct net_device *dev,
 
        switch (obj->id) {
        case SWITCHDEV_OBJ_ID_PORT_MDB:
-               if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+               if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
                        return -EOPNOTSUPP;
+
                err = dsa_port_mdb_del(dp, SWITCHDEV_OBJ_PORT_MDB(obj));
                break;
        case SWITCHDEV_OBJ_ID_HOST_MDB:
+               if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
+                       return -EOPNOTSUPP;
+
                /* DSA can directly translate this to a normal MDB add,
                 * but on the CPU port.
                 */
                err = dsa_port_mdb_del(dp->cpu_dp, SWITCHDEV_OBJ_PORT_MDB(obj));
                break;
        case SWITCHDEV_OBJ_ID_PORT_VLAN:
+               if (!dsa_port_offloads_bridge_port(dp, obj->orig_dev))
+                       return -EOPNOTSUPP;
+
                err = dsa_slave_vlan_del(dev, obj);
                break;
        case SWITCHDEV_OBJ_ID_MRP:
-               if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+               if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
                        return -EOPNOTSUPP;
+
                err = dsa_port_mrp_del(dp, SWITCHDEV_OBJ_MRP(obj));
                break;
        case SWITCHDEV_OBJ_ID_RING_ROLE_MRP:
-               if (!dsa_port_offloads_netdev(dp, obj->orig_dev))
+               if (!dsa_port_offloads_bridge(dp, obj->orig_dev))
                        return -EOPNOTSUPP;
+
                err = dsa_port_mrp_del_ring_role(dp,
                                                 SWITCHDEV_OBJ_RING_ROLE_MRP(obj));
                break;
@@ -2298,7 +2325,7 @@ static int dsa_slave_switchdev_event(struct notifier_block *unused,
                         * other ports bridged with the LAG should be able to
                         * autonomously forward towards it.
                         */
-                       if (dsa_tree_offloads_netdev(dp->ds->dst, dev))
+                       if (dsa_tree_offloads_bridge_port(dp->ds->dst, dev))
                                return NOTIFY_DONE;
                }
 
index 38dcdde..5974848 100644 (file)
@@ -13,6 +13,7 @@
 #define MTK_HDR_LEN            4
 #define MTK_HDR_XMIT_UNTAGGED          0
 #define MTK_HDR_XMIT_TAGGED_TPID_8100  1
+#define MTK_HDR_XMIT_TAGGED_TPID_88A8  2
 #define MTK_HDR_RECV_SOURCE_PORT_MASK  GENMASK(2, 0)
 #define MTK_HDR_XMIT_DP_BIT_MASK       GENMASK(5, 0)
 #define MTK_HDR_XMIT_SA_DIS            BIT(6)
@@ -21,8 +22,8 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
                                    struct net_device *dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
+       u8 xmit_tpid;
        u8 *mtk_tag;
-       bool is_vlan_skb = true;
        unsigned char *dest = eth_hdr(skb)->h_dest;
        bool is_multicast_skb = is_multicast_ether_addr(dest) &&
                                !is_broadcast_ether_addr(dest);
@@ -33,10 +34,17 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
         * the both special and VLAN tag at the same time and then look up VLAN
         * table with VID.
         */
-       if (!skb_vlan_tagged(skb)) {
+       switch (skb->protocol) {
+       case htons(ETH_P_8021Q):
+               xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_8100;
+               break;
+       case htons(ETH_P_8021AD):
+               xmit_tpid = MTK_HDR_XMIT_TAGGED_TPID_88A8;
+               break;
+       default:
+               xmit_tpid = MTK_HDR_XMIT_UNTAGGED;
                skb_push(skb, MTK_HDR_LEN);
                memmove(skb->data, skb->data + MTK_HDR_LEN, 2 * ETH_ALEN);
-               is_vlan_skb = false;
        }
 
        mtk_tag = skb->data + 2 * ETH_ALEN;
@@ -44,8 +52,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
        /* Mark tag attribute on special tag insertion to notify hardware
         * whether that's a combined special tag with 802.1Q header.
         */
-       mtk_tag[0] = is_vlan_skb ? MTK_HDR_XMIT_TAGGED_TPID_8100 :
-                    MTK_HDR_XMIT_UNTAGGED;
+       mtk_tag[0] = xmit_tpid;
        mtk_tag[1] = (1 << dp->index) & MTK_HDR_XMIT_DP_BIT_MASK;
 
        /* Disable SA learning for multicast frames */
@@ -53,7 +60,7 @@ static struct sk_buff *mtk_tag_xmit(struct sk_buff *skb,
                mtk_tag[1] |= MTK_HDR_XMIT_SA_DIS;
 
        /* Tag control information is kept for 802.1Q */
-       if (!is_vlan_skb) {
+       if (xmit_tpid == MTK_HDR_XMIT_UNTAGGED) {
                mtk_tag[2] = 0;
                mtk_tag[3] = 0;
        }
index c17d39b..e917647 100644 (file)
@@ -35,14 +35,12 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
                                      struct net_device *dev)
 {
        struct dsa_port *dp = dsa_slave_to_port(dev);
+       __be16 *p;
        u8 *tag;
-       u16 *p;
        u16 out;
 
        /* Pad out to at least 60 bytes */
-       if (unlikely(eth_skb_pad(skb)))
-               return NULL;
-       if (skb_cow_head(skb, RTL4_A_HDR_LEN) < 0)
+       if (unlikely(__skb_put_padto(skb, ETH_ZLEN, false)))
                return NULL;
 
        netdev_dbg(dev, "add realtek tag to package to port %d\n",
@@ -53,13 +51,13 @@ static struct sk_buff *rtl4a_tag_xmit(struct sk_buff *skb,
        tag = skb->data + 2 * ETH_ALEN;
 
        /* Set Ethertype */
-       p = (u16 *)tag;
+       p = (__be16 *)tag;
        *p = htons(RTL4_A_ETHERTYPE);
 
        out = (RTL4_A_PROTOCOL_RTL8366RB << 12) | (2 << 8);
-       /* The lower bits is the port numer */
+       /* The lower bits is the port number */
        out |= (u8)dp->index;
-       p = (u16 *)(tag + 2);
+       p = (__be16 *)(tag + 2);
        *p = htons(out);
 
        return skb;
index 25a9e56..6a070dc 100644 (file)
@@ -116,10 +116,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
        struct ethtool_channels channels = {};
        struct ethnl_req_info req_info = {};
        struct nlattr **tb = info->attrs;
-       const struct nlattr *err_attr;
+       u32 err_attr, max_rx_in_use = 0;
        const struct ethtool_ops *ops;
        struct net_device *dev;
-       u32 max_rx_in_use = 0;
        int ret;
 
        ret = ethnl_parse_header_dev_get(&req_info,
@@ -157,34 +156,35 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
 
        /* ensure new channel counts are within limits */
        if (channels.rx_count > channels.max_rx)
-               err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+               err_attr = ETHTOOL_A_CHANNELS_RX_COUNT;
        else if (channels.tx_count > channels.max_tx)
-               err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+               err_attr = ETHTOOL_A_CHANNELS_TX_COUNT;
        else if (channels.other_count > channels.max_other)
-               err_attr = tb[ETHTOOL_A_CHANNELS_OTHER_COUNT];
+               err_attr = ETHTOOL_A_CHANNELS_OTHER_COUNT;
        else if (channels.combined_count > channels.max_combined)
-               err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+               err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
        else
-               err_attr = NULL;
+               err_attr = 0;
        if (err_attr) {
                ret = -EINVAL;
-               NL_SET_ERR_MSG_ATTR(info->extack, err_attr,
+               NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
                                    "requested channel count exceeds maximum");
                goto out_ops;
        }
 
        /* ensure there is at least one RX and one TX channel */
        if (!channels.combined_count && !channels.rx_count)
-               err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+               err_attr = ETHTOOL_A_CHANNELS_RX_COUNT;
        else if (!channels.combined_count && !channels.tx_count)
-               err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+               err_attr = ETHTOOL_A_CHANNELS_TX_COUNT;
        else
-               err_attr = NULL;
+               err_attr = 0;
        if (err_attr) {
                if (mod_combined)
-                       err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+                       err_attr = ETHTOOL_A_CHANNELS_COMBINED_COUNT;
                ret = -EINVAL;
-               NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured");
+               NL_SET_ERR_MSG_ATTR(info->extack, tb[err_attr],
+                                   "requested channel counts would result in no RX or TX channel being configured");
                goto out_ops;
        }
 
index f9a8cc8..bb1351c 100644 (file)
@@ -164,8 +164,10 @@ static struct hsr_node *hsr_add_node(struct hsr_priv *hsr,
         * as initialization. (0 could trigger an spurious ring error warning).
         */
        now = jiffies;
-       for (i = 0; i < HSR_PT_PORTS; i++)
+       for (i = 0; i < HSR_PT_PORTS; i++) {
                new_node->time_in[i] = now;
+               new_node->time_out[i] = now;
+       }
        for (i = 0; i < HSR_PT_PORTS; i++)
                new_node->seq_out[i] = seq_out;
 
@@ -413,9 +415,12 @@ void hsr_register_frame_in(struct hsr_node *node, struct hsr_port *port,
 int hsr_register_frame_out(struct hsr_port *port, struct hsr_node *node,
                           u16 sequence_nr)
 {
-       if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]))
+       if (seq_nr_before_or_eq(sequence_nr, node->seq_out[port->type]) &&
+           time_is_after_jiffies(node->time_out[port->type] +
+           msecs_to_jiffies(HSR_ENTRY_FORGET_TIME)))
                return 1;
 
+       node->time_out[port->type] = jiffies;
        node->seq_out[port->type] = sequence_nr;
        return 0;
 }
index 86b43f5..d9628e7 100644 (file)
@@ -75,6 +75,7 @@ struct hsr_node {
        enum hsr_port_type      addr_B_port;
        unsigned long           time_in[HSR_PT_PORTS];
        bool                    time_in_stale[HSR_PT_PORTS];
+       unsigned long           time_out[HSR_PT_PORTS];
        /* if the node is a SAN */
        bool                    san_a;
        bool                    san_b;
index a169808..8f26467 100644 (file)
@@ -22,6 +22,7 @@
 #define HSR_LIFE_CHECK_INTERVAL                 2000 /* ms */
 #define HSR_NODE_FORGET_TIME           60000 /* ms */
 #define HSR_ANNOUNCE_INTERVAL            100 /* ms */
+#define HSR_ENTRY_FORGET_TIME            400 /* ms */
 
 /* By how much may slave1 and slave2 timestamps of latest received frame from
  * each node differ before we notify of communication problem?
index a02ce89..1355e6c 100644 (file)
@@ -1021,7 +1021,6 @@ static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned lon
 
 const struct proto_ops inet_stream_ops = {
        .family            = PF_INET,
-       .flags             = PROTO_CMSG_DATA_ONLY,
        .owner             = THIS_MODULE,
        .release           = inet_release,
        .bind              = inet_bind,
index 471d33a..bfaf327 100644 (file)
@@ -519,16 +519,10 @@ int cipso_v4_doi_remove(u32 doi, struct netlbl_audit *audit_info)
                ret_val = -ENOENT;
                goto doi_remove_return;
        }
-       if (!refcount_dec_and_test(&doi_def->refcount)) {
-               spin_unlock(&cipso_v4_doi_list_lock);
-               ret_val = -EBUSY;
-               goto doi_remove_return;
-       }
        list_del_rcu(&doi_def->list);
        spin_unlock(&cipso_v4_doi_list_lock);
 
-       cipso_v4_cache_invalidate();
-       call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
+       cipso_v4_doi_putdef(doi_def);
        ret_val = 0;
 
 doi_remove_return:
@@ -585,9 +579,6 @@ void cipso_v4_doi_putdef(struct cipso_v4_doi *doi_def)
 
        if (!refcount_dec_and_test(&doi_def->refcount))
                return;
-       spin_lock(&cipso_v4_doi_list_lock);
-       list_del_rcu(&doi_def->list);
-       spin_unlock(&cipso_v4_doi_list_lock);
 
        cipso_v4_cache_invalidate();
        call_rcu(&doi_def->rcu, cipso_v4_doi_free_rcu);
@@ -1162,7 +1153,7 @@ static void cipso_v4_gentag_hdr(const struct cipso_v4_doi *doi_def,
 {
        buf[0] = IPOPT_CIPSO;
        buf[1] = CIPSO_V4_HDR_LEN + len;
-       *(__be32 *)&buf[2] = htonl(doi_def->doi);
+       put_unaligned_be32(doi_def->doi, &buf[2]);
 }
 
 /**
index 396b492..616e2dc 100644 (file)
@@ -775,13 +775,14 @@ EXPORT_SYMBOL(__icmp_send);
 void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 {
        struct sk_buff *cloned_skb = NULL;
+       struct ip_options opts = { 0 };
        enum ip_conntrack_info ctinfo;
        struct nf_conn *ct;
        __be32 orig_ip;
 
        ct = nf_ct_get(skb_in, &ctinfo);
        if (!ct || !(ct->status & IPS_SRC_NAT)) {
-               icmp_send(skb_in, type, code, info);
+               __icmp_send(skb_in, type, code, info, &opts);
                return;
        }
 
@@ -796,7 +797,7 @@ void icmp_ndo_send(struct sk_buff *skb_in, int type, int code, __be32 info)
 
        orig_ip = ip_hdr(skb_in)->saddr;
        ip_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.ip;
-       icmp_send(skb_in, type, code, info);
+       __icmp_send(skb_in, type, code, info, &opts);
        ip_hdr(skb_in)->saddr = orig_ip;
 out:
        consume_skb(cloned_skb);
index 6bd7ca0..fd472ea 100644 (file)
@@ -705,12 +705,15 @@ static bool reqsk_queue_unlink(struct request_sock *req)
        return found;
 }
 
-void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
+bool inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req)
 {
-       if (reqsk_queue_unlink(req)) {
+       bool unlinked = reqsk_queue_unlink(req);
+
+       if (unlinked) {
                reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
                reqsk_put(req);
        }
+       return unlinked;
 }
 EXPORT_SYMBOL(inet_csk_reqsk_queue_drop);
 
index ff327a6..da21dfc 100644 (file)
@@ -65,7 +65,7 @@ EXPORT_SYMBOL_GPL(inet_peer_base_init);
 #define PEER_MAX_GC 32
 
 /* Exported for sysctl_net_ipv4.  */
-int inet_peer_threshold __read_mostly = 65536 + 128;   /* start to throw entries more
+int inet_peer_threshold __read_mostly; /* start to throw entries more
                                         * aggressively at this stage */
 int inet_peer_minttl __read_mostly = 120 * HZ; /* TTL under high load: 120 sec */
 int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;     /* usual time to live: 10 min */
@@ -73,20 +73,13 @@ int inet_peer_maxttl __read_mostly = 10 * 60 * HZ;  /* usual time to live: 10 min
 /* Called from ip_output.c:ip_init  */
 void __init inet_initpeers(void)
 {
-       struct sysinfo si;
+       u64 nr_entries;
 
-       /* Use the straight interface to information about memory. */
-       si_meminfo(&si);
-       /* The values below were suggested by Alexey Kuznetsov
-        * <kuznet@ms2.inr.ac.ru>.  I don't have any opinion about the values
-        * myself.  --SAW
-        */
-       if (si.totalram <= (32768*1024)/PAGE_SIZE)
-               inet_peer_threshold >>= 1; /* max pool size about 1MB on IA32 */
-       if (si.totalram <= (16384*1024)/PAGE_SIZE)
-               inet_peer_threshold >>= 1; /* about 512KB */
-       if (si.totalram <= (8192*1024)/PAGE_SIZE)
-               inet_peer_threshold >>= 2; /* about 128KB */
+        /* 1% of physical memory */
+       nr_entries = div64_ul((u64)totalram_pages() << PAGE_SHIFT,
+                             100 * L1_CACHE_ALIGN(sizeof(struct inet_peer)));
+
+       inet_peer_threshold = clamp_val(nr_entries, 4096, 65536 + 128);
 
        peer_cachep = kmem_cache_create("inet_peer_cache",
                        sizeof(struct inet_peer),
index 76a420c..f6cc26d 100644 (file)
@@ -502,8 +502,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
                if (!skb_is_gso(skb) &&
                    (inner_iph->frag_off & htons(IP_DF)) &&
                    mtu < pkt_size) {
-                       memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
+                       icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu));
                        return -E2BIG;
                }
        }
@@ -527,7 +526,7 @@ static int tnl_update_pmtu(struct net_device *dev, struct sk_buff *skb,
 
                if (!skb_is_gso(skb) && mtu >= IPV6_MIN_MTU &&
                                        mtu < pkt_size) {
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                       icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                        return -E2BIG;
                }
        }
index abc171e..eb20708 100644 (file)
@@ -238,13 +238,13 @@ static netdev_tx_t vti_xmit(struct sk_buff *skb, struct net_device *dev,
        if (skb->len > mtu) {
                skb_dst_update_pmtu_no_confirm(skb, mtu);
                if (skb->protocol == htons(ETH_P_IP)) {
-                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-                                 htonl(mtu));
+                       icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                     htonl(mtu));
                } else {
                        if (mtu < IPV6_MIN_MTU)
                                mtu = IPV6_MIN_MTU;
 
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                       icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                }
 
                dst_release(dst);
index 47db1bf..bc2f6ca 100644 (file)
@@ -309,7 +309,7 @@ have_carrier:
  */
 static void __init ic_close_devs(void)
 {
-       struct net_device *selected_dev = ic_dev->dev;
+       struct net_device *selected_dev = ic_dev ? ic_dev->dev : NULL;
        struct ic_device *d, *next;
        struct net_device *dev;
 
@@ -317,16 +317,18 @@ static void __init ic_close_devs(void)
        next = ic_first_dev;
        while ((d = next)) {
                bool bring_down = (d != ic_dev);
-               struct net_device *lower_dev;
+               struct net_device *lower;
                struct list_head *iter;
 
                next = d->next;
                dev = d->dev;
 
-               netdev_for_each_lower_dev(selected_dev, lower_dev, iter) {
-                       if (dev == lower_dev) {
-                               bring_down = false;
-                               break;
+               if (selected_dev) {
+                       netdev_for_each_lower_dev(selected_dev, lower, iter) {
+                               if (dev == lower) {
+                                       bring_down = false;
+                                       break;
+                               }
                        }
                }
                if (bring_down) {
index c576a63..d1e04d2 100644 (file)
@@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb,
 
        local_bh_disable();
        addend = xt_write_recseq_begin();
-       private = rcu_access_pointer(table->private);
+       private = READ_ONCE(table->private); /* Address dependency. */
        cpu     = smp_processor_id();
        table_base = private->entries;
        jumpstack  = (struct arpt_entry **)private->jumpstack[cpu];
@@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
 
        /* We need atomic snapshot of counters: rest doesn't change
         * (other than comefrom, which userspace doesn't care
@@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size,
        unsigned int off, num;
        const struct arpt_entry *e;
        struct xt_counters *counters;
-       struct xt_table_info *private = xt_table_get_private_protected(table);
+       struct xt_table_info *private = table->private;
        int ret = 0;
        void *loc_cpu_entry;
 
@@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
        t = xt_request_find_table_lock(net, NFPROTO_ARP, name);
        if (!IS_ERR(t)) {
                struct arpt_getinfo info;
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
                struct xt_table_info tmp;
 
@@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr,
 
        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
 
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
@@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
        }
 
        local_bh_disable();
-       private = xt_table_get_private_protected(t);
+       private = t->private;
        if (private->number != tmp.num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
@@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size,
                                       void __user *userptr)
 {
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
        void __user *pos;
        unsigned int size;
        int ret = 0;
@@ -1379,7 +1379,7 @@ static int compat_get_entries(struct net *net,
        xt_compat_lock(NFPROTO_ARP);
        t = xt_find_table_lock(net, NFPROTO_ARP, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
                struct xt_table_info info;
 
                ret = compat_table_info(private, &info);
index e8f6f9d..f15bc21 100644 (file)
@@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb,
        WARN_ON(!(table->valid_hooks & (1 << hook)));
        local_bh_disable();
        addend = xt_write_recseq_begin();
-       private = rcu_access_pointer(table->private);
+       private = READ_ONCE(table->private); /* Address dependency. */
        cpu        = smp_processor_id();
        table_base = private->entries;
        jumpstack  = (struct ipt_entry **)private->jumpstack[cpu];
@@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
 
        /* We need atomic snapshot of counters: rest doesn't change
           (other than comefrom, which userspace doesn't care
@@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size,
        unsigned int off, num;
        const struct ipt_entry *e;
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
        int ret = 0;
        const void *loc_cpu_entry;
 
@@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
        t = xt_request_find_table_lock(net, AF_INET, name);
        if (!IS_ERR(t)) {
                struct ipt_getinfo info;
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
                struct xt_table_info tmp;
 
@@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr,
 
        t = xt_find_table_lock(net, AF_INET, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
                                                   t, uptr->entrytable);
@@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
        }
 
        local_bh_disable();
-       private = xt_table_get_private_protected(t);
+       private = t->private;
        if (private->number != tmp.num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
@@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
                            void __user *userptr)
 {
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
        void __user *pos;
        unsigned int size;
        int ret = 0;
@@ -1589,7 +1589,7 @@ compat_get_entries(struct net *net, struct compat_ipt_get_entries __user *uptr,
        xt_compat_lock(AF_INET);
        t = xt_find_table_lock(net, AF_INET, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
                struct xt_table_info info;
                ret = compat_table_info(private, &info);
                if (!ret && get.size == info.size)
index f1c6cbd..743777b 100644 (file)
@@ -1399,7 +1399,7 @@ out:
 
 /* rtnl */
 /* remove all nexthops tied to a device being deleted */
-static void nexthop_flush_dev(struct net_device *dev)
+static void nexthop_flush_dev(struct net_device *dev, unsigned long event)
 {
        unsigned int hash = nh_dev_hashfn(dev->ifindex);
        struct net *net = dev_net(dev);
@@ -1411,6 +1411,10 @@ static void nexthop_flush_dev(struct net_device *dev)
                if (nhi->fib_nhc.nhc_dev != dev)
                        continue;
 
+               if (nhi->reject_nh &&
+                   (event == NETDEV_DOWN || event == NETDEV_CHANGE))
+                       continue;
+
                remove_nexthop(net, nhi->nh_parent, NULL);
        }
 }
@@ -2189,11 +2193,11 @@ static int nh_netdev_event(struct notifier_block *this,
        switch (event) {
        case NETDEV_DOWN:
        case NETDEV_UNREGISTER:
-               nexthop_flush_dev(dev);
+               nexthop_flush_dev(dev, event);
                break;
        case NETDEV_CHANGE:
                if (!(dev_get_flags(dev) & (IFF_RUNNING | IFF_LOWER_UP)))
-                       nexthop_flush_dev(dev);
+                       nexthop_flush_dev(dev, event);
                break;
        case NETDEV_CHANGEMTU:
                info_ext = ptr;
index 02d81d7..bba150f 100644 (file)
@@ -2687,44 +2687,15 @@ out:
        return rth;
 }
 
-static struct dst_entry *ipv4_blackhole_dst_check(struct dst_entry *dst, u32 cookie)
-{
-       return NULL;
-}
-
-static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst)
-{
-       unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
-
-       return mtu ? : dst->dev->mtu;
-}
-
-static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
-                                         struct sk_buff *skb, u32 mtu,
-                                         bool confirm_neigh)
-{
-}
-
-static void ipv4_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
-                                      struct sk_buff *skb)
-{
-}
-
-static u32 *ipv4_rt_blackhole_cow_metrics(struct dst_entry *dst,
-                                         unsigned long old)
-{
-       return NULL;
-}
-
 static struct dst_ops ipv4_dst_blackhole_ops = {
-       .family                 =       AF_INET,
-       .check                  =       ipv4_blackhole_dst_check,
-       .mtu                    =       ipv4_blackhole_mtu,
-       .default_advmss         =       ipv4_default_advmss,
-       .update_pmtu            =       ipv4_rt_blackhole_update_pmtu,
-       .redirect               =       ipv4_rt_blackhole_redirect,
-       .cow_metrics            =       ipv4_rt_blackhole_cow_metrics,
-       .neigh_lookup           =       ipv4_neigh_lookup,
+       .family                 = AF_INET,
+       .default_advmss         = ipv4_default_advmss,
+       .neigh_lookup           = ipv4_neigh_lookup,
+       .check                  = dst_blackhole_check,
+       .cow_metrics            = dst_blackhole_cow_metrics,
+       .update_pmtu            = dst_blackhole_update_pmtu,
+       .redirect               = dst_blackhole_redirect,
+       .mtu                    = dst_blackhole_mtu,
 };
 
 struct dst_entry *ipv4_blackhole_route(struct net *net, struct dst_entry *dst_orig)
index a3422e4..de7cc84 100644 (file)
@@ -3469,16 +3469,23 @@ static int do_tcp_setsockopt(struct sock *sk, int level, int optname,
                break;
 
        case TCP_QUEUE_SEQ:
-               if (sk->sk_state != TCP_CLOSE)
+               if (sk->sk_state != TCP_CLOSE) {
                        err = -EPERM;
-               else if (tp->repair_queue == TCP_SEND_QUEUE)
-                       WRITE_ONCE(tp->write_seq, val);
-               else if (tp->repair_queue == TCP_RECV_QUEUE) {
-                       WRITE_ONCE(tp->rcv_nxt, val);
-                       WRITE_ONCE(tp->copied_seq, val);
-               }
-               else
+               } else if (tp->repair_queue == TCP_SEND_QUEUE) {
+                       if (!tcp_rtx_queue_empty(sk))
+                               err = -EPERM;
+                       else
+                               WRITE_ONCE(tp->write_seq, val);
+               } else if (tp->repair_queue == TCP_RECV_QUEUE) {
+                       if (tp->rcv_nxt != tp->copied_seq) {
+                               err = -EPERM;
+                       } else {
+                               WRITE_ONCE(tp->rcv_nxt, val);
+                               WRITE_ONCE(tp->copied_seq, val);
+                       }
+               } else {
                        err = -EINVAL;
+               }
                break;
 
        case TCP_REPAIR_OPTIONS:
@@ -4143,7 +4150,8 @@ static int do_tcp_getsockopt(struct sock *sk, int level,
 
                if (get_user(len, optlen))
                        return -EFAULT;
-               if (len < offsetofend(struct tcp_zerocopy_receive, length))
+               if (len < 0 ||
+                   len < offsetofend(struct tcp_zerocopy_receive, length))
                        return -EINVAL;
                if (unlikely(len > sizeof(zc))) {
                        err = check_zeroed_user(optval + sizeof(zc),
index 0055ae0..7513ba4 100644 (file)
@@ -804,8 +804,11 @@ embryonic_reset:
                tcp_reset(sk, skb);
        }
        if (!fastopen) {
-               inet_csk_reqsk_queue_drop(sk, req);
-               __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+               bool unlinked = inet_csk_reqsk_queue_drop(sk, req);
+
+               if (unlinked)
+                       __NET_INC_STATS(sock_net(sk), LINUX_MIB_EMBRYONICRSTS);
+               *req_stolen = !unlinked;
        }
        return NULL;
 }
index b76c48e..c5b4b58 100644 (file)
@@ -526,7 +526,7 @@ struct sk_buff *udp_gro_receive(struct list_head *head, struct sk_buff *skb,
        }
 
        if (!sk || NAPI_GRO_CB(skb)->encap_mark ||
-           (skb->ip_summed != CHECKSUM_PARTIAL &&
+           (uh->check && skb->ip_summed != CHECKSUM_PARTIAL &&
             NAPI_GRO_CB(skb)->csum_cnt == 0 &&
             !NAPI_GRO_CB(skb)->csum_valid) ||
            !udp_sk(sk)->gro_receive)
index 1fb75f0..802f511 100644 (file)
@@ -665,7 +665,6 @@ int inet6_recvmsg(struct socket *sock, struct msghdr *msg, size_t size,
 
 const struct proto_ops inet6_stream_ops = {
        .family            = PF_INET6,
-       .flags             = PROTO_CMSG_DATA_ONLY,
        .owner             = THIS_MODULE,
        .release           = inet6_release,
        .bind              = inet6_bind,
index 51184a7..1578ed9 100644 (file)
@@ -83,6 +83,9 @@ struct calipso_map_cache_entry {
 
 static struct calipso_map_cache_bkt *calipso_cache;
 
+static void calipso_cache_invalidate(void);
+static void calipso_doi_putdef(struct calipso_doi *doi_def);
+
 /* Label Mapping Cache Functions
  */
 
@@ -444,15 +447,10 @@ static int calipso_doi_remove(u32 doi, struct netlbl_audit *audit_info)
                ret_val = -ENOENT;
                goto doi_remove_return;
        }
-       if (!refcount_dec_and_test(&doi_def->refcount)) {
-               spin_unlock(&calipso_doi_list_lock);
-               ret_val = -EBUSY;
-               goto doi_remove_return;
-       }
        list_del_rcu(&doi_def->list);
        spin_unlock(&calipso_doi_list_lock);
 
-       call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
+       calipso_doi_putdef(doi_def);
        ret_val = 0;
 
 doi_remove_return:
@@ -508,10 +506,8 @@ static void calipso_doi_putdef(struct calipso_doi *doi_def)
 
        if (!refcount_dec_and_test(&doi_def->refcount))
                return;
-       spin_lock(&calipso_doi_list_lock);
-       list_del_rcu(&doi_def->list);
-       spin_unlock(&calipso_doi_list_lock);
 
+       calipso_cache_invalidate();
        call_rcu(&doi_def->rcu, calipso_doi_free_rcu);
 }
 
index f3d0586..fd1f896 100644 (file)
@@ -331,10 +331,9 @@ static int icmpv6_getfrag(void *from, char *to, int offset, int len, int odd, st
 }
 
 #if IS_ENABLED(CONFIG_IPV6_MIP6)
-static void mip6_addr_swap(struct sk_buff *skb)
+static void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt)
 {
        struct ipv6hdr *iph = ipv6_hdr(skb);
-       struct inet6_skb_parm *opt = IP6CB(skb);
        struct ipv6_destopt_hao *hao;
        struct in6_addr tmp;
        int off;
@@ -351,7 +350,7 @@ static void mip6_addr_swap(struct sk_buff *skb)
        }
 }
 #else
-static inline void mip6_addr_swap(struct sk_buff *skb) {}
+static inline void mip6_addr_swap(struct sk_buff *skb, const struct inet6_skb_parm *opt) {}
 #endif
 
 static struct dst_entry *icmpv6_route_lookup(struct net *net,
@@ -446,7 +445,8 @@ static int icmp6_iif(const struct sk_buff *skb)
  *     Send an ICMP message in response to a packet in error
  */
 void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
-               const struct in6_addr *force_saddr)
+               const struct in6_addr *force_saddr,
+               const struct inet6_skb_parm *parm)
 {
        struct inet6_dev *idev = NULL;
        struct ipv6hdr *hdr = ipv6_hdr(skb);
@@ -542,7 +542,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
        if (!(skb->dev->flags & IFF_LOOPBACK) && !icmpv6_global_allow(net, type))
                goto out_bh_enable;
 
-       mip6_addr_swap(skb);
+       mip6_addr_swap(skb, parm);
 
        sk = icmpv6_xmit_lock(net);
        if (!sk)
@@ -559,7 +559,7 @@ void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
                /* select a more meaningful saddr from input if */
                struct net_device *in_netdev;
 
-               in_netdev = dev_get_by_index(net, IP6CB(skb)->iif);
+               in_netdev = dev_get_by_index(net, parm->iif);
                if (in_netdev) {
                        ipv6_dev_get_saddr(net, in_netdev, &fl6.daddr,
                                           inet6_sk(sk)->srcprefs,
@@ -640,7 +640,7 @@ EXPORT_SYMBOL(icmp6_send);
  */
 void icmpv6_param_prob(struct sk_buff *skb, u8 code, int pos)
 {
-       icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL);
+       icmp6_send(skb, ICMPV6_PARAMPROB, code, pos, NULL, IP6CB(skb));
        kfree_skb(skb);
 }
 
@@ -697,10 +697,10 @@ int ip6_err_gen_icmpv6_unreach(struct sk_buff *skb, int nhs, int type,
        }
        if (type == ICMP_TIME_EXCEEDED)
                icmp6_send(skb2, ICMPV6_TIME_EXCEED, ICMPV6_EXC_HOPLIMIT,
-                          info, &temp_saddr);
+                          info, &temp_saddr, IP6CB(skb2));
        else
                icmp6_send(skb2, ICMPV6_DEST_UNREACH, ICMPV6_ADDR_UNREACH,
-                          info, &temp_saddr);
+                          info, &temp_saddr, IP6CB(skb2));
        if (rt)
                ip6_rt_put(rt);
 
index ef9d022..679699e 100644 (file)
@@ -2486,7 +2486,7 @@ static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
        const struct net_device *dev;
 
        if (rt->nh)
-               fib6_nh = nexthop_fib6_nh(rt->nh);
+               fib6_nh = nexthop_fib6_nh_bh(rt->nh);
 
        seq_printf(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
 
index c3bc89b..1baf43a 100644 (file)
@@ -678,8 +678,8 @@ static int prepare_ip6gre_xmit_ipv6(struct sk_buff *skb,
 
                tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset];
                if (tel->encap_limit == 0) {
-                       icmpv6_send(skb, ICMPV6_PARAMPROB,
-                                   ICMPV6_HDR_FIELD, offset + 2);
+                       icmpv6_ndo_send(skb, ICMPV6_PARAMPROB,
+                                       ICMPV6_HDR_FIELD, offset + 2);
                        return -1;
                }
                *encap_limit = tel->encap_limit - 1;
@@ -805,8 +805,8 @@ static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev)
        if (err != 0) {
                /* XXX: send ICMP error even if DF is not set. */
                if (err == -EMSGSIZE)
-                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-                                 htonl(mtu));
+                       icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                     htonl(mtu));
                return -1;
        }
 
@@ -837,7 +837,7 @@ static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev)
                          &mtu, skb->protocol);
        if (err != 0) {
                if (err == -EMSGSIZE)
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                       icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                return -1;
        }
 
@@ -1063,10 +1063,10 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb,
                /* XXX: send ICMP error even if DF is not set. */
                if (err == -EMSGSIZE) {
                        if (skb->protocol == htons(ETH_P_IP))
-                               icmp_send(skb, ICMP_DEST_UNREACH,
-                                         ICMP_FRAG_NEEDED, htonl(mtu));
+                               icmp_ndo_send(skb, ICMP_DEST_UNREACH,
+                                             ICMP_FRAG_NEEDED, htonl(mtu));
                        else
-                               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                               icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                }
 
                goto tx_err;
index 70c8c2f..9e35748 100644 (file)
@@ -33,23 +33,25 @@ int inet6_unregister_icmp_sender(ip6_icmp_send_t *fn)
 }
 EXPORT_SYMBOL(inet6_unregister_icmp_sender);
 
-void icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info)
+void __icmpv6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info,
+                  const struct inet6_skb_parm *parm)
 {
        ip6_icmp_send_t *send;
 
        rcu_read_lock();
        send = rcu_dereference(ip6_icmp_send);
        if (send)
-               send(skb, type, code, info, NULL);
+               send(skb, type, code, info, NULL, parm);
        rcu_read_unlock();
 }
-EXPORT_SYMBOL(icmpv6_send);
+EXPORT_SYMBOL(__icmpv6_send);
 #endif
 
 #if IS_ENABLED(CONFIG_NF_NAT)
 #include <net/netfilter/nf_conntrack.h>
 void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 {
+       struct inet6_skb_parm parm = { 0 };
        struct sk_buff *cloned_skb = NULL;
        enum ip_conntrack_info ctinfo;
        struct in6_addr orig_ip;
@@ -57,7 +59,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 
        ct = nf_ct_get(skb_in, &ctinfo);
        if (!ct || !(ct->status & IPS_SRC_NAT)) {
-               icmpv6_send(skb_in, type, code, info);
+               __icmpv6_send(skb_in, type, code, info, &parm);
                return;
        }
 
@@ -72,7 +74,7 @@ void icmpv6_ndo_send(struct sk_buff *skb_in, u8 type, u8 code, __u32 info)
 
        orig_ip = ipv6_hdr(skb_in)->saddr;
        ipv6_hdr(skb_in)->saddr = ct->tuplehash[0].tuple.src.u3.in6;
-       icmpv6_send(skb_in, type, code, info);
+       __icmpv6_send(skb_in, type, code, info, &parm);
        ipv6_hdr(skb_in)->saddr = orig_ip;
 out:
        consume_skb(cloned_skb);
index e9d2a4a..8025671 100644 (file)
@@ -245,16 +245,6 @@ static struct sk_buff *ip6_rcv_core(struct sk_buff *skb, struct net_device *dev,
        if (ipv6_addr_is_multicast(&hdr->saddr))
                goto err;
 
-       /* While RFC4291 is not explicit about v4mapped addresses
-        * in IPv6 headers, it seems clear linux dual-stack
-        * model can not deal properly with these.
-        * Security models could be fooled by ::ffff:127.0.0.1 for example.
-        *
-        * https://tools.ietf.org/html/draft-itojun-v6ops-v4mapped-harmful-02
-        */
-       if (ipv6_addr_v4mapped(&hdr->saddr))
-               goto err;
-
        skb->transport_header = skb->network_header + sizeof(*hdr);
        IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr);
 
index a7950ba..3fa0eca 100644 (file)
@@ -1332,8 +1332,8 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
 
                                tel = (void *)&skb_network_header(skb)[offset];
                                if (tel->encap_limit == 0) {
-                                       icmpv6_send(skb, ICMPV6_PARAMPROB,
-                                               ICMPV6_HDR_FIELD, offset + 2);
+                                       icmpv6_ndo_send(skb, ICMPV6_PARAMPROB,
+                                                       ICMPV6_HDR_FIELD, offset + 2);
                                        return -1;
                                }
                                encap_limit = tel->encap_limit - 1;
@@ -1385,11 +1385,11 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
                if (err == -EMSGSIZE)
                        switch (protocol) {
                        case IPPROTO_IPIP:
-                               icmp_send(skb, ICMP_DEST_UNREACH,
-                                         ICMP_FRAG_NEEDED, htonl(mtu));
+                               icmp_ndo_send(skb, ICMP_DEST_UNREACH,
+                                             ICMP_FRAG_NEEDED, htonl(mtu));
                                break;
                        case IPPROTO_IPV6:
-                               icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                               icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                                break;
                        default:
                                break;
index 0225fd6..f10e7a7 100644 (file)
@@ -521,10 +521,10 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl)
                        if (mtu < IPV6_MIN_MTU)
                                mtu = IPV6_MIN_MTU;
 
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                       icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                } else {
-                       icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-                                 htonl(mtu));
+                       icmp_ndo_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
+                                     htonl(mtu));
                }
 
                err = -EMSGSIZE;
index 0d453fa..2e2119b 100644 (file)
@@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb,
 
        local_bh_disable();
        addend = xt_write_recseq_begin();
-       private = rcu_access_pointer(table->private);
+       private = READ_ONCE(table->private); /* Address dependency. */
        cpu        = smp_processor_id();
        table_base = private->entries;
        jumpstack  = (struct ip6t_entry **)private->jumpstack[cpu];
@@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table)
 {
        unsigned int countersize;
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
 
        /* We need atomic snapshot of counters: rest doesn't change
           (other than comefrom, which userspace doesn't care
@@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size,
        unsigned int off, num;
        const struct ip6t_entry *e;
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
        int ret = 0;
        const void *loc_cpu_entry;
 
@@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len)
        t = xt_request_find_table_lock(net, AF_INET6, name);
        if (!IS_ERR(t)) {
                struct ip6t_getinfo info;
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
 #ifdef CONFIG_COMPAT
                struct xt_table_info tmp;
 
@@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr,
 
        t = xt_find_table_lock(net, AF_INET6, get.name);
        if (!IS_ERR(t)) {
-               struct xt_table_info *private = xt_table_get_private_protected(t);
+               struct xt_table_info *private = t->private;
                if (get.size == private->size)
                        ret = copy_entries_to_user(private->size,
                                                   t, uptr->entrytable);
@@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len)
        }
 
        local_bh_disable();
-       private = xt_table_get_private_protected(t);
+       private = t->private;
        if (private->number != tmp.num_counters) {
                ret = -EINVAL;
                goto unlock_up_free;
@@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table,
                            void __user *userptr)
 {
        struct xt_counters *counters;
-       const struct xt_table_info *private = xt_table_get_private_protected(table);
+       const struct xt_table_info *private = table->private;
        void __user *pos;
        unsigned int size;
        int ret = 0;
@@ -1598,7 +1598,7 @@ compat_get_entries(struct net *net, struct compat_ip6t_get_entries __user *uptr,
        xt_compat_lock(AF_INET6);
        t = xt_find_table_lock(net, AF_INET6, get.name);
        if (!IS_ERR(t)) {
-               const struct xt_table_info *private = xt_table_get_private_protected(t);
+               const struct xt_table_info *private = t->private;
                struct xt_table_info info;
                ret = compat_table_info(private, &info);
                if (!ret && get.size == info.size)
index 1536f49..1056b02 100644 (file)
@@ -260,34 +260,16 @@ static struct dst_ops ip6_dst_ops_template = {
        .confirm_neigh          =       ip6_confirm_neigh,
 };
 
-static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst)
-{
-       unsigned int mtu = dst_metric_raw(dst, RTAX_MTU);
-
-       return mtu ? : dst->dev->mtu;
-}
-
-static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk,
-                                        struct sk_buff *skb, u32 mtu,
-                                        bool confirm_neigh)
-{
-}
-
-static void ip6_rt_blackhole_redirect(struct dst_entry *dst, struct sock *sk,
-                                     struct sk_buff *skb)
-{
-}
-
 static struct dst_ops ip6_dst_blackhole_ops = {
-       .family                 =       AF_INET6,
-       .destroy                =       ip6_dst_destroy,
-       .check                  =       ip6_dst_check,
-       .mtu                    =       ip6_blackhole_mtu,
-       .default_advmss         =       ip6_default_advmss,
-       .update_pmtu            =       ip6_rt_blackhole_update_pmtu,
-       .redirect               =       ip6_rt_blackhole_redirect,
-       .cow_metrics            =       dst_cow_metrics_generic,
-       .neigh_lookup           =       ip6_dst_neigh_lookup,
+       .family                 = AF_INET6,
+       .default_advmss         = ip6_default_advmss,
+       .neigh_lookup           = ip6_dst_neigh_lookup,
+       .check                  = ip6_dst_check,
+       .destroy                = ip6_dst_destroy,
+       .cow_metrics            = dst_cow_metrics_generic,
+       .update_pmtu            = dst_blackhole_update_pmtu,
+       .redirect               = dst_blackhole_redirect,
+       .mtu                    = dst_blackhole_mtu,
 };
 
 static const u32 ip6_template_metrics[RTAX_MAX] = {
index 9363686..63ccd9f 100644 (file)
@@ -987,7 +987,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb,
                        skb_dst_update_pmtu_no_confirm(skb, mtu);
 
                if (skb->len > mtu && !skb_is_gso(skb)) {
-                       icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+                       icmpv6_ndo_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
                        ip_rt_put(rt);
                        goto tx_error;
                }
index bd44ded..d0f0077 100644 (file)
@@ -1175,6 +1175,11 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        if (!ipv6_unicast_destination(skb))
                goto drop;
 
+       if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+               __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+               return 0;
+       }
+
        return tcp_conn_request(&tcp6_request_sock_ops,
                                &tcp_request_sock_ipv6_ops, sk, skb);
 
index 7be5103..203890e 100644 (file)
@@ -649,9 +649,9 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
        /* Parse and check optional cookie */
        if (session->peer_cookie_len > 0) {
                if (memcmp(ptr, &session->peer_cookie[0], session->peer_cookie_len)) {
-                       pr_warn_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
-                                           tunnel->name, tunnel->tunnel_id,
-                                           session->session_id);
+                       pr_debug_ratelimited("%s: cookie mismatch (%u/%u). Discarding.\n",
+                                            tunnel->name, tunnel->tunnel_id,
+                                            session->session_id);
                        atomic_long_inc(&session->stats.rx_cookie_discards);
                        goto discard;
                }
@@ -702,8 +702,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
                 * If user has configured mandatory sequence numbers, discard.
                 */
                if (session->recv_seq) {
-                       pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
-                                           session->name);
+                       pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+                                            session->name);
                        atomic_long_inc(&session->stats.rx_seq_discards);
                        goto discard;
                }
@@ -718,8 +718,8 @@ void l2tp_recv_common(struct l2tp_session *session, struct sk_buff *skb,
                        session->send_seq = 0;
                        l2tp_session_set_header_len(session, tunnel->version);
                } else if (session->send_seq) {
-                       pr_warn_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
-                                           session->name);
+                       pr_debug_ratelimited("%s: recv data has no seq numbers when required. Discarding.\n",
+                                            session->name);
                        atomic_long_inc(&session->stats.rx_seq_discards);
                        goto discard;
                }
@@ -809,9 +809,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 
        /* Short packet? */
        if (!pskb_may_pull(skb, L2TP_HDR_SIZE_MAX)) {
-               pr_warn_ratelimited("%s: recv short packet (len=%d)\n",
-                                   tunnel->name, skb->len);
-               goto error;
+               pr_debug_ratelimited("%s: recv short packet (len=%d)\n",
+                                    tunnel->name, skb->len);
+               goto invalid;
        }
 
        /* Point to L2TP header */
@@ -824,9 +824,9 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
        /* Check protocol version */
        version = hdrflags & L2TP_HDR_VER_MASK;
        if (version != tunnel->version) {
-               pr_warn_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
-                                   tunnel->name, version, tunnel->version);
-               goto error;
+               pr_debug_ratelimited("%s: recv protocol version mismatch: got %d expected %d\n",
+                                    tunnel->name, version, tunnel->version);
+               goto invalid;
        }
 
        /* Get length of L2TP packet */
@@ -834,7 +834,7 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
 
        /* If type is control packet, it is handled by userspace. */
        if (hdrflags & L2TP_HDRFLAG_T)
-               goto error;
+               goto pass;
 
        /* Skip flags */
        ptr += 2;
@@ -863,21 +863,24 @@ static int l2tp_udp_recv_core(struct l2tp_tunnel *tunnel, struct sk_buff *skb)
                        l2tp_session_dec_refcount(session);
 
                /* Not found? Pass to userspace to deal with */
-               pr_warn_ratelimited("%s: no session found (%u/%u). Passing up.\n",
-                                   tunnel->name, tunnel_id, session_id);
-               goto error;
+               pr_debug_ratelimited("%s: no session found (%u/%u). Passing up.\n",
+                                    tunnel->name, tunnel_id, session_id);
+               goto pass;
        }
 
        if (tunnel->version == L2TP_HDR_VER_3 &&
            l2tp_v3_ensure_opt_in_linear(session, skb, &ptr, &optr))
-               goto error;
+               goto invalid;
 
        l2tp_recv_common(session, skb, ptr, optr, hdrflags, length);
        l2tp_session_dec_refcount(session);
 
        return 0;
 
-error:
+invalid:
+       atomic_long_inc(&tunnel->stats.rx_invalid);
+
+pass:
        /* Put UDP header back */
        __skb_push(skb, sizeof(struct udphdr));
 
index cb21d90..98ea98e 100644 (file)
@@ -39,6 +39,7 @@ struct l2tp_stats {
        atomic_long_t           rx_oos_packets;
        atomic_long_t           rx_errors;
        atomic_long_t           rx_cookie_discards;
+       atomic_long_t           rx_invalid;
 };
 
 struct l2tp_tunnel;
index 83956c9..96eb91b 100644 (file)
@@ -428,6 +428,9 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int fla
                              L2TP_ATTR_STATS_PAD) ||
            nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS,
                              atomic_long_read(&tunnel->stats.rx_errors),
+                             L2TP_ATTR_STATS_PAD) ||
+           nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID,
+                             atomic_long_read(&tunnel->stats.rx_invalid),
                              L2TP_ATTR_STATS_PAD))
                goto nla_put_failure;
        nla_nest_end(skb, nest);
@@ -771,6 +774,9 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int fl
                              L2TP_ATTR_STATS_PAD) ||
            nla_put_u64_64bit(skb, L2TP_ATTR_RX_ERRORS,
                              atomic_long_read(&session->stats.rx_errors),
+                             L2TP_ATTR_STATS_PAD) ||
+           nla_put_u64_64bit(skb, L2TP_ATTR_RX_INVALID,
+                             atomic_long_read(&session->stats.rx_invalid),
                              L2TP_ATTR_STATS_PAD))
                goto nla_put_failure;
        nla_nest_end(skb, nest);
index d7b3d90..b00d6f5 100644 (file)
@@ -23,6 +23,7 @@ int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
        struct aead_request *aead_req;
        int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
        u8 *__aad;
+       int ret;
 
        aead_req = kzalloc(reqsize + aad_len, GFP_ATOMIC);
        if (!aead_req)
@@ -40,10 +41,10 @@ int aead_encrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
        aead_request_set_crypt(aead_req, sg, sg, data_len, b_0);
        aead_request_set_ad(aead_req, sg[0].length);
 
-       crypto_aead_encrypt(aead_req);
+       ret = crypto_aead_encrypt(aead_req);
        kfree_sensitive(aead_req);
 
-       return 0;
+       return ret;
 }
 
 int aead_decrypt(struct crypto_aead *tfm, u8 *b_0, u8 *aad, size_t aad_len,
index 6f3b3a0..512cab0 100644 (file)
@@ -22,6 +22,7 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
        struct aead_request *aead_req;
        int reqsize = sizeof(*aead_req) + crypto_aead_reqsize(tfm);
        const __le16 *fc;
+       int ret;
 
        if (data_len < GMAC_MIC_LEN)
                return -EINVAL;
@@ -59,10 +60,10 @@ int ieee80211_aes_gmac(struct crypto_aead *tfm, const u8 *aad, u8 *nonce,
        aead_request_set_crypt(aead_req, sg, sg, 0, iv);
        aead_request_set_ad(aead_req, GMAC_AAD_LEN + data_len);
 
-       crypto_aead_encrypt(aead_req);
+       ret = crypto_aead_encrypt(aead_req);
        kfree_sensitive(aead_req);
 
-       return 0;
+       return ret;
 }
 
 struct crypto_aead *ieee80211_aes_gmac_key_setup(const u8 key[],
index c4c70e3..68a0de0 100644 (file)
@@ -2950,14 +2950,14 @@ static int ieee80211_set_bitrate_mask(struct wiphy *wiphy,
                        continue;
 
                for (j = 0; j < IEEE80211_HT_MCS_MASK_LEN; j++) {
-                       if (~sdata->rc_rateidx_mcs_mask[i][j]) {
+                       if (sdata->rc_rateidx_mcs_mask[i][j] != 0xff) {
                                sdata->rc_has_mcs_mask[i] = true;
                                break;
                        }
                }
 
                for (j = 0; j < NL80211_VHT_NSS_MAX; j++) {
-                       if (~sdata->rc_rateidx_vht_mcs_mask[i][j]) {
+                       if (sdata->rc_rateidx_vht_mcs_mask[i][j] != 0xffff) {
                                sdata->rc_has_vht_mcs_mask[i] = true;
                                break;
                        }
index 1f552f3..a7ac53a 100644 (file)
@@ -1874,6 +1874,8 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata)
 
        /* remove beacon */
        kfree(sdata->u.ibss.ie);
+       sdata->u.ibss.ie = NULL;
+       sdata->u.ibss.ie_len = 0;
 
        /* on the next join, re-program HT parameters */
        memset(&ifibss->ht_capa, 0, sizeof(ifibss->ht_capa));
index 4f3f8bb..1b9c826 100644 (file)
@@ -973,8 +973,19 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
                        continue;
 
                if (!dflt_chandef.chan) {
+                       /*
+                        * Assign the first enabled channel to dflt_chandef
+                        * from the list of channels
+                        */
+                       for (i = 0; i < sband->n_channels; i++)
+                               if (!(sband->channels[i].flags &
+                                               IEEE80211_CHAN_DISABLED))
+                                       break;
+                       /* if none found then use the first anyway */
+                       if (i == sband->n_channels)
+                               i = 0;
                        cfg80211_chandef_create(&dflt_chandef,
-                                               &sband->channels[0],
+                                               &sband->channels[i],
                                                NL80211_CHAN_NO_HT);
                        /* init channel we're on */
                        if (!local->use_chanctx && !local->_oper_chandef.chan) {
index 2e33a12..ce4e385 100644 (file)
@@ -5071,7 +5071,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
                he_oper_ie = cfg80211_find_ext_ie(WLAN_EID_EXT_HE_OPERATION,
                                                  ies->data, ies->len);
                if (he_oper_ie &&
-                   he_oper_ie[1] == ieee80211_he_oper_size(&he_oper_ie[3]))
+                   he_oper_ie[1] >= ieee80211_he_oper_size(&he_oper_ie[3]))
                        he_oper = (void *)(he_oper_ie + 3);
                else
                        he_oper = NULL;
index 2f44f49..ecad9b1 100644 (file)
@@ -805,7 +805,6 @@ minstrel_ht_group_min_rate_offset(struct minstrel_ht_sta *mi, int group,
 static u16
 minstrel_ht_next_inc_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur)
 {
-       struct minstrel_mcs_group_data *mg;
        u8 type = MINSTREL_SAMPLE_TYPE_INC;
        int i, index = 0;
        u8 group;
@@ -813,7 +812,6 @@ minstrel_ht_next_inc_rate(struct minstrel_ht_sta *mi, u32 fast_rate_dur)
        group = mi->sample[type].sample_group;
        for (i = 0; i < ARRAY_SIZE(minstrel_mcs_groups); i++) {
                group = (group + 1) % ARRAY_SIZE(minstrel_mcs_groups);
-               mg = &mi->groups[group];
 
                index = minstrel_ht_group_min_rate_offset(mi, group,
                                                          fast_rate_dur);
index f080fcf..c0fa526 100644 (file)
@@ -968,7 +968,7 @@ static void ieee80211_parse_extension_element(u32 *crc,
                break;
        case WLAN_EID_EXT_HE_OPERATION:
                if (len >= sizeof(*elems->he_operation) &&
-                   len == ieee80211_he_oper_size(data) - 1) {
+                   len >= ieee80211_he_oper_size(data) - 1) {
                        if (crc)
                                *crc = crc32_be(*crc, (void *)elem,
                                                elem->datalen + 2);
index b169014..1482259 100644 (file)
@@ -14,6 +14,7 @@
 #include <linux/netdev_features.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <net/mpls.h>
 
 static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
                                       netdev_features_t features)
@@ -27,6 +28,8 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 
        skb_reset_network_header(skb);
        mpls_hlen = skb_inner_network_header(skb) - skb_network_header(skb);
+       if (unlikely(!mpls_hlen || mpls_hlen % MPLS_HLEN))
+               goto out;
        if (unlikely(!pskb_may_pull(skb, mpls_hlen)))
                goto out;
 
index b63574d..89a4225 100644 (file)
@@ -411,6 +411,7 @@ static void clear_3rdack_retransmission(struct sock *sk)
 }
 
 static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
+                                        bool snd_data_fin_enable,
                                         unsigned int *size,
                                         unsigned int remaining,
                                         struct mptcp_out_options *opts)
@@ -428,9 +429,10 @@ static bool mptcp_established_options_mp(struct sock *sk, struct sk_buff *skb,
        if (!skb)
                return false;
 
-       /* MPC/MPJ needed only on 3rd ack packet */
-       if (subflow->fully_established ||
-           subflow->snd_isn != TCP_SKB_CB(skb)->seq)
+       /* MPC/MPJ needed only on 3rd ack packet, DATA_FIN and TCP shutdown take precedence */
+       if (subflow->fully_established || snd_data_fin_enable ||
+           subflow->snd_isn != TCP_SKB_CB(skb)->seq ||
+           sk->sk_state != TCP_ESTABLISHED)
                return false;
 
        if (subflow->mp_capable) {
@@ -502,20 +504,20 @@ static void mptcp_write_data_fin(struct mptcp_subflow_context *subflow,
 }
 
 static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
+                                         bool snd_data_fin_enable,
                                          unsigned int *size,
                                          unsigned int remaining,
                                          struct mptcp_out_options *opts)
 {
        struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
        struct mptcp_sock *msk = mptcp_sk(subflow->conn);
-       u64 snd_data_fin_enable, ack_seq;
        unsigned int dss_size = 0;
        struct mptcp_ext *mpext;
        unsigned int ack_size;
        bool ret = false;
+       u64 ack_seq;
 
        mpext = skb ? mptcp_get_ext(skb) : NULL;
-       snd_data_fin_enable = mptcp_data_fin_enabled(msk);
 
        if (!skb || (mpext && mpext->use_map) || snd_data_fin_enable) {
                unsigned int map_size;
@@ -565,15 +567,15 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 }
 
 static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
-                                 struct in_addr *addr)
+                                 struct in_addr *addr, u16 port)
 {
        u8 hmac[SHA256_DIGEST_SIZE];
        u8 msg[7];
 
        msg[0] = addr_id;
        memcpy(&msg[1], &addr->s_addr, 4);
-       msg[5] = 0;
-       msg[6] = 0;
+       msg[5] = port >> 8;
+       msg[6] = port & 0xFF;
 
        mptcp_crypto_hmac_sha(key1, key2, msg, 7, hmac);
 
@@ -582,15 +584,15 @@ static u64 add_addr_generate_hmac(u64 key1, u64 key2, u8 addr_id,
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 static u64 add_addr6_generate_hmac(u64 key1, u64 key2, u8 addr_id,
-                                  struct in6_addr *addr)
+                                  struct in6_addr *addr, u16 port)
 {
        u8 hmac[SHA256_DIGEST_SIZE];
        u8 msg[19];
 
        msg[0] = addr_id;
        memcpy(&msg[1], &addr->s6_addr, 16);
-       msg[17] = 0;
-       msg[18] = 0;
+       msg[17] = port >> 8;
+       msg[18] = port & 0xFF;
 
        mptcp_crypto_hmac_sha(key1, key2, msg, 19, hmac);
 
@@ -644,7 +646,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
                        opts->ahmac = add_addr_generate_hmac(msk->local_key,
                                                             msk->remote_key,
                                                             opts->addr_id,
-                                                            &opts->addr);
+                                                            &opts->addr,
+                                                            opts->port);
                }
        }
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
@@ -655,7 +658,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff *
                        opts->ahmac = add_addr6_generate_hmac(msk->local_key,
                                                              msk->remote_key,
                                                              opts->addr_id,
-                                                             &opts->addr6);
+                                                             &opts->addr6,
+                                                             opts->port);
                }
        }
 #endif
@@ -717,12 +721,15 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
                               unsigned int *size, unsigned int remaining,
                               struct mptcp_out_options *opts)
 {
+       struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
+       struct mptcp_sock *msk = mptcp_sk(subflow->conn);
        unsigned int opt_size = 0;
+       bool snd_data_fin;
        bool ret = false;
 
        opts->suboptions = 0;
 
-       if (unlikely(mptcp_check_fallback(sk)))
+       if (unlikely(__mptcp_check_fallback(msk)))
                return false;
 
        /* prevent adding of any MPTCP related options on reset packet
@@ -731,10 +738,10 @@ bool mptcp_established_options(struct sock *sk, struct sk_buff *skb,
        if (unlikely(skb && TCP_SKB_CB(skb)->tcp_flags & TCPHDR_RST))
                return false;
 
-       if (mptcp_established_options_mp(sk, skb, &opt_size, remaining, opts))
+       snd_data_fin = mptcp_data_fin_enabled(msk);
+       if (mptcp_established_options_mp(sk, skb, snd_data_fin, &opt_size, remaining, opts))
                ret = true;
-       else if (mptcp_established_options_dss(sk, skb, &opt_size, remaining,
-                                              opts))
+       else if (mptcp_established_options_dss(sk, skb, snd_data_fin, &opt_size, remaining, opts))
                ret = true;
 
        /* we reserved enough space for the above options, and exceeding the
@@ -957,12 +964,14 @@ static bool add_addr_hmac_valid(struct mptcp_sock *msk,
        if (mp_opt->family == MPTCP_ADDR_IPVERSION_4)
                hmac = add_addr_generate_hmac(msk->remote_key,
                                              msk->local_key,
-                                             mp_opt->addr_id, &mp_opt->addr);
+                                             mp_opt->addr_id, &mp_opt->addr,
+                                             mp_opt->port);
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
        else
                hmac = add_addr6_generate_hmac(msk->remote_key,
                                               msk->local_key,
-                                              mp_opt->addr_id, &mp_opt->addr6);
+                                              mp_opt->addr_id, &mp_opt->addr6,
+                                              mp_opt->port);
 #endif
 
        pr_debug("msk=%p, ahmac=%llu, mp_opt->ahmac=%llu\n",
index a57f3ea..1590b9d 100644 (file)
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <linux/sched/signal.h>
 #include <linux/atomic.h>
+#include <linux/igmp.h>
 #include <net/sock.h>
 #include <net/inet_common.h>
 #include <net/inet_hashtables.h>
@@ -19,6 +20,7 @@
 #include <net/tcp_states.h>
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
 #include <net/transp_v6.h>
+#include <net/addrconf.h>
 #endif
 #include <net/mptcp.h>
 #include <net/xfrm.h>
@@ -1059,6 +1061,12 @@ out:
        }
 }
 
+static void __mptcp_clean_una_wakeup(struct sock *sk)
+{
+       __mptcp_clean_una(sk);
+       mptcp_write_space(sk);
+}
+
 static void mptcp_enter_memory_pressure(struct sock *sk)
 {
        struct mptcp_subflow_context *subflow;
@@ -1187,6 +1195,7 @@ static bool mptcp_tx_cache_refill(struct sock *sk, int size,
                         */
                        while (skbs->qlen > 1) {
                                skb = __skb_dequeue_tail(skbs);
+                               *total_ts -= skb->truesize;
                                __kfree_skb(skb);
                        }
                        return skbs->qlen > 0;
@@ -1442,7 +1451,7 @@ static void mptcp_push_release(struct sock *sk, struct sock *ssk,
        release_sock(ssk);
 }
 
-static void mptcp_push_pending(struct sock *sk, unsigned int flags)
+static void __mptcp_push_pending(struct sock *sk, unsigned int flags)
 {
        struct sock *prev_ssk = NULL, *ssk = NULL;
        struct mptcp_sock *msk = mptcp_sk(sk);
@@ -1694,14 +1703,14 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 
 wait_for_memory:
                mptcp_set_nospace(sk);
-               mptcp_push_pending(sk, msg->msg_flags);
+               __mptcp_push_pending(sk, msg->msg_flags);
                ret = sk_stream_wait_memory(sk, &timeo);
                if (ret)
                        goto out;
        }
 
        if (copied)
-               mptcp_push_pending(sk, msg->msg_flags);
+               __mptcp_push_pending(sk, msg->msg_flags);
 
 out:
        release_sock(sk);
@@ -2113,6 +2122,14 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
        return backup;
 }
 
+static void mptcp_dispose_initial_subflow(struct mptcp_sock *msk)
+{
+       if (msk->subflow) {
+               iput(SOCK_INODE(msk->subflow));
+               msk->subflow = NULL;
+       }
+}
+
 /* subflow sockets can be either outgoing (connect) or incoming
  * (accept).
  *
@@ -2124,6 +2141,8 @@ static struct sock *mptcp_subflow_get_retrans(const struct mptcp_sock *msk)
 static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
                              struct mptcp_subflow_context *subflow)
 {
+       struct mptcp_sock *msk = mptcp_sk(sk);
+
        list_del(&subflow->node);
 
        lock_sock_nested(ssk, SINGLE_DEPTH_NESTING);
@@ -2152,6 +2171,18 @@ static void __mptcp_close_ssk(struct sock *sk, struct sock *ssk,
        release_sock(ssk);
 
        sock_put(ssk);
+
+       if (ssk == msk->last_snd)
+               msk->last_snd = NULL;
+
+       if (ssk == msk->ack_hint)
+               msk->ack_hint = NULL;
+
+       if (ssk == msk->first)
+               msk->first = NULL;
+
+       if (msk->subflow && ssk == msk->subflow->sk)
+               mptcp_dispose_initial_subflow(msk);
 }
 
 void mptcp_close_ssk(struct sock *sk, struct sock *ssk,
@@ -2236,60 +2267,23 @@ static void mptcp_check_fastclose(struct mptcp_sock *msk)
        mptcp_close_wake_up(sk);
 }
 
-static void mptcp_worker(struct work_struct *work)
+static void __mptcp_retrans(struct sock *sk)
 {
-       struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
-       struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
+       struct mptcp_sock *msk = mptcp_sk(sk);
        struct mptcp_sendmsg_info info = {};
        struct mptcp_data_frag *dfrag;
        size_t copied = 0;
-       int state, ret;
-
-       lock_sock(sk);
-       state = sk->sk_state;
-       if (unlikely(state == TCP_CLOSE))
-               goto unlock;
-
-       mptcp_check_data_fin_ack(sk);
-       __mptcp_flush_join_list(msk);
-
-       mptcp_check_fastclose(msk);
-
-       if (msk->pm.status)
-               mptcp_pm_nl_work(msk);
-
-       if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
-               mptcp_check_for_eof(msk);
-
-       __mptcp_check_send_data_fin(sk);
-       mptcp_check_data_fin(sk);
-
-       /* if the msk data is completely acked, or the socket timedout,
-        * there is no point in keeping around an orphaned sk
-        */
-       if (sock_flag(sk, SOCK_DEAD) &&
-           (mptcp_check_close_timeout(sk) ||
-           (state != sk->sk_state &&
-           ((1 << inet_sk_state_load(sk)) & (TCPF_CLOSE | TCPF_FIN_WAIT2))))) {
-               inet_sk_state_store(sk, TCP_CLOSE);
-               __mptcp_destroy_sock(sk);
-               goto unlock;
-       }
-
-       if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
-               __mptcp_close_subflow(msk);
-
-       if (!test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
-               goto unlock;
+       struct sock *ssk;
+       int ret;
 
-       __mptcp_clean_una(sk);
+       __mptcp_clean_una_wakeup(sk);
        dfrag = mptcp_rtx_head(sk);
        if (!dfrag)
-               goto unlock;
+               return;
 
        ssk = mptcp_subflow_get_retrans(msk);
        if (!ssk)
-               goto reset_unlock;
+               goto reset_timer;
 
        lock_sock(ssk);
 
@@ -2315,9 +2309,52 @@ static void mptcp_worker(struct work_struct *work)
        mptcp_set_timeout(sk, ssk);
        release_sock(ssk);
 
-reset_unlock:
+reset_timer:
        if (!mptcp_timer_pending(sk))
                mptcp_reset_timer(sk);
+}
+
+static void mptcp_worker(struct work_struct *work)
+{
+       struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
+       struct sock *sk = &msk->sk.icsk_inet.sk;
+       int state;
+
+       lock_sock(sk);
+       state = sk->sk_state;
+       if (unlikely(state == TCP_CLOSE))
+               goto unlock;
+
+       mptcp_check_data_fin_ack(sk);
+       __mptcp_flush_join_list(msk);
+
+       mptcp_check_fastclose(msk);
+
+       if (msk->pm.status)
+               mptcp_pm_nl_work(msk);
+
+       if (test_and_clear_bit(MPTCP_WORK_EOF, &msk->flags))
+               mptcp_check_for_eof(msk);
+
+       __mptcp_check_send_data_fin(sk);
+       mptcp_check_data_fin(sk);
+
+       /* There is no point in keeping around an orphaned sk timedout or
+        * closed, but we need the msk around to reply to incoming DATA_FIN,
+        * even if it is orphaned and in FIN_WAIT2 state
+        */
+       if (sock_flag(sk, SOCK_DEAD) &&
+           (mptcp_check_close_timeout(sk) || sk->sk_state == TCP_CLOSE)) {
+               inet_sk_state_store(sk, TCP_CLOSE);
+               __mptcp_destroy_sock(sk);
+               goto unlock;
+       }
+
+       if (test_and_clear_bit(MPTCP_WORK_CLOSE_SUBFLOW, &msk->flags))
+               __mptcp_close_subflow(msk);
+
+       if (test_and_clear_bit(MPTCP_WORK_RTX, &msk->flags))
+               __mptcp_retrans(sk);
 
 unlock:
        release_sock(sk);
@@ -2522,12 +2559,6 @@ static void __mptcp_destroy_sock(struct sock *sk)
 
        might_sleep();
 
-       /* dispose the ancillatory tcp socket, if any */
-       if (msk->subflow) {
-               iput(SOCK_INODE(msk->subflow));
-               msk->subflow = NULL;
-       }
-
        /* be sure to always acquire the join list lock, to sync vs
         * mptcp_finish_join().
         */
@@ -2552,6 +2583,7 @@ static void __mptcp_destroy_sock(struct sock *sk)
        sk_stream_kill_queues(sk);
        xfrm_sk_free_policy(sk);
        sk_refcnt_debug_release(sk);
+       mptcp_dispose_initial_subflow(msk);
        sock_put(sk);
 }
 
@@ -2933,13 +2965,14 @@ static void mptcp_release_cb(struct sock *sk)
 {
        unsigned long flags, nflags;
 
-       /* push_pending may touch wmem_reserved, do it before the later
-        * cleanup
-        */
-       if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
-               __mptcp_clean_una(sk);
-       if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags)) {
-               /* mptcp_push_pending() acquires the subflow socket lock
+       for (;;) {
+               flags = 0;
+               if (test_and_clear_bit(MPTCP_PUSH_PENDING, &mptcp_sk(sk)->flags))
+                       flags |= BIT(MPTCP_PUSH_PENDING);
+               if (!flags)
+                       break;
+
+               /* the following actions acquire the subflow socket lock
                 *
                 * 1) can't be invoked in atomic scope
                 * 2) must avoid ABBA deadlock with msk socket spinlock: the RX
@@ -2948,13 +2981,21 @@ static void mptcp_release_cb(struct sock *sk)
                 */
 
                spin_unlock_bh(&sk->sk_lock.slock);
-               mptcp_push_pending(sk, 0);
+               if (flags & BIT(MPTCP_PUSH_PENDING))
+                       __mptcp_push_pending(sk, 0);
+
+               cond_resched();
                spin_lock_bh(&sk->sk_lock.slock);
        }
+
+       if (test_and_clear_bit(MPTCP_CLEAN_UNA, &mptcp_sk(sk)->flags))
+               __mptcp_clean_una_wakeup(sk);
        if (test_and_clear_bit(MPTCP_ERROR_REPORT, &mptcp_sk(sk)->flags))
                __mptcp_error_report(sk);
 
-       /* clear any wmem reservation and errors */
+       /* push_pending may touch wmem_reserved, ensure we do the cleanup
+        * later
+        */
        __mptcp_update_wmem(sk);
        __mptcp_update_rmem(sk);
 
@@ -3284,6 +3325,9 @@ static int mptcp_stream_accept(struct socket *sock, struct socket *newsock,
                /* PM/worker can now acquire the first subflow socket
                 * lock without racing with listener queue cleanup,
                 * we can notify it, if needed.
+                *
+                * Even if remote has reset the initial subflow by now
+                * the refcnt is still at least one.
                 */
                subflow = mptcp_subflow_ctx(msk->first);
                list_add(&subflow->node, &msk->conn_list);
@@ -3375,10 +3419,34 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock,
        return mask;
 }
 
+static int mptcp_release(struct socket *sock)
+{
+       struct mptcp_subflow_context *subflow;
+       struct sock *sk = sock->sk;
+       struct mptcp_sock *msk;
+
+       if (!sk)
+               return 0;
+
+       lock_sock(sk);
+
+       msk = mptcp_sk(sk);
+
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+               ip_mc_drop_socket(ssk);
+       }
+
+       release_sock(sk);
+
+       return inet_release(sock);
+}
+
 static const struct proto_ops mptcp_stream_ops = {
        .family            = PF_INET,
        .owner             = THIS_MODULE,
-       .release           = inet_release,
+       .release           = mptcp_release,
        .bind              = mptcp_bind,
        .connect           = mptcp_stream_connect,
        .socketpair        = sock_no_socketpair,
@@ -3470,10 +3538,35 @@ void __init mptcp_proto_init(void)
 }
 
 #if IS_ENABLED(CONFIG_MPTCP_IPV6)
+static int mptcp6_release(struct socket *sock)
+{
+       struct mptcp_subflow_context *subflow;
+       struct mptcp_sock *msk;
+       struct sock *sk = sock->sk;
+
+       if (!sk)
+               return 0;
+
+       lock_sock(sk);
+
+       msk = mptcp_sk(sk);
+
+       mptcp_for_each_subflow(msk, subflow) {
+               struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
+
+               ip_mc_drop_socket(ssk);
+               ipv6_sock_mc_close(ssk);
+               ipv6_sock_ac_close(ssk);
+       }
+
+       release_sock(sk);
+       return inet6_release(sock);
+}
+
 static const struct proto_ops mptcp_v6_stream_ops = {
        .family            = PF_INET6,
        .owner             = THIS_MODULE,
-       .release           = inet6_release,
+       .release           = mptcp6_release,
        .bind              = mptcp_bind,
        .connect           = mptcp_stream_connect,
        .socketpair        = sock_no_socketpair,
index 91827d9..e21a5bc 100644 (file)
 #define TCPOLEN_MPTCP_DSS_MAP64                14
 #define TCPOLEN_MPTCP_DSS_CHECKSUM     2
 #define TCPOLEN_MPTCP_ADD_ADDR         16
-#define TCPOLEN_MPTCP_ADD_ADDR_PORT    20
+#define TCPOLEN_MPTCP_ADD_ADDR_PORT    18
 #define TCPOLEN_MPTCP_ADD_ADDR_BASE    8
-#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT       12
+#define TCPOLEN_MPTCP_ADD_ADDR_BASE_PORT       10
 #define TCPOLEN_MPTCP_ADD_ADDR6                28
-#define TCPOLEN_MPTCP_ADD_ADDR6_PORT   32
+#define TCPOLEN_MPTCP_ADD_ADDR6_PORT   30
 #define TCPOLEN_MPTCP_ADD_ADDR6_BASE   20
-#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT      24
-#define TCPOLEN_MPTCP_PORT_LEN         4
+#define TCPOLEN_MPTCP_ADD_ADDR6_BASE_PORT      22
+#define TCPOLEN_MPTCP_PORT_LEN         2
+#define TCPOLEN_MPTCP_PORT_ALIGN       2
 #define TCPOLEN_MPTCP_RM_ADDR_BASE     4
 #define TCPOLEN_MPTCP_PRIO             3
 #define TCPOLEN_MPTCP_PRIO_ALIGN       4
@@ -701,8 +702,9 @@ static inline unsigned int mptcp_add_addr_len(int family, bool echo, bool port)
                len = TCPOLEN_MPTCP_ADD_ADDR6_BASE;
        if (!echo)
                len += MPTCPOPT_THMAC_LEN;
+       /* account for 2 trailing 'nop' options */
        if (port)
-               len += TCPOLEN_MPTCP_PORT_LEN;
+               len += TCPOLEN_MPTCP_PORT_LEN + TCPOLEN_MPTCP_PORT_ALIGN;
 
        return len;
 }
index 06e2334..d17d39c 100644 (file)
@@ -477,6 +477,11 @@ static int subflow_v6_conn_request(struct sock *sk, struct sk_buff *skb)
        if (!ipv6_unicast_destination(skb))
                goto drop;
 
+       if (ipv6_addr_v4mapped(&ipv6_hdr(skb)->saddr)) {
+               __IP6_INC_STATS(sock_net(sk), NULL, IPSTATS_MIB_INHDRERRORS);
+               return 0;
+       }
+
        return tcp_conn_request(&mptcp_subflow_request_sock_ops,
                                &subflow_request_sock_ipv6_ops, sk, skb);
 
@@ -687,11 +692,6 @@ create_child:
                        /* move the msk reference ownership to the subflow */
                        subflow_req->msk = NULL;
                        ctx->conn = (struct sock *)owner;
-                       if (!mptcp_finish_join(child))
-                               goto dispose_child;
-
-                       SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
-                       tcp_rsk(req)->drop_req = true;
 
                        if (subflow_use_different_sport(owner, sk)) {
                                pr_debug("ack inet_sport=%d %d",
@@ -699,10 +699,16 @@ create_child:
                                         ntohs(inet_sk((struct sock *)owner)->inet_sport));
                                if (!mptcp_pm_sport_in_anno_list(owner, sk)) {
                                        SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_MISMATCHPORTACKRX);
-                                       goto out;
+                                       goto dispose_child;
                                }
                                SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINPORTACKRX);
                        }
+
+                       if (!mptcp_finish_join(child))
+                               goto dispose_child;
+
+                       SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
+                       tcp_rsk(req)->drop_req = true;
                }
        }
 
@@ -1096,6 +1102,12 @@ static void subflow_data_ready(struct sock *sk)
 
        msk = mptcp_sk(parent);
        if (state & TCPF_LISTEN) {
+               /* MPJ subflow are removed from accept queue before reaching here,
+                * avoid stray wakeups
+                */
+               if (reqsk_queue_empty(&inet_csk(sk)->icsk_accept_queue))
+                       return;
+
                set_bit(MPTCP_DATA_READY, &msk->flags);
                parent->sk_data_ready(parent);
                return;
@@ -1291,6 +1303,7 @@ failed_unlink:
        spin_lock_bh(&msk->join_list_lock);
        list_del(&subflow->node);
        spin_unlock_bh(&msk->join_list_lock);
+       sock_put(mptcp_subflow_tcp_sock(subflow));
 
 failed:
        subflow->disposable = 1;
index 118f415..b055187 100644 (file)
@@ -219,7 +219,7 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
                        return NULL;
                pr_info("nf_conntrack: default automatic helper assignment "
                        "has been turned off for security reasons and CT-based "
-                       " firewall rule not found. Use the iptables CT target "
+                       "firewall rule not found. Use the iptables CT target "
                        "to attach helpers instead.\n");
                net->ct.auto_assign_helper_warned = 1;
                return NULL;
@@ -228,7 +228,6 @@ nf_ct_lookup_helper(struct nf_conn *ct, struct net *net)
        return __nf_ct_helper_find(&ct->tuplehash[IP_CT_DIR_REPLY].tuple);
 }
 
-
 int __nf_ct_try_assign_helper(struct nf_conn *ct, struct nf_conn *tmpl,
                              gfp_t flags)
 {
index 1469365..1d519b0 100644 (file)
@@ -2962,6 +2962,7 @@ static int ctnetlink_exp_dump_mask(struct sk_buff *skb,
        memset(&m, 0xFF, sizeof(m));
        memcpy(&m.src.u3, &mask->src.u3, sizeof(m.src.u3));
        m.src.u.all = mask->src.u.all;
+       m.src.l3num = tuple->src.l3num;
        m.dst.protonum = tuple->dst.protonum;
 
        nest_parms = nla_nest_start(skb, CTA_EXPECT_MASK);
index 5b05487..db11e40 100644 (file)
@@ -218,9 +218,6 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
                            enum ip_conntrack_info ctinfo,
                            const struct nf_hook_state *state)
 {
-       if (state->pf != NFPROTO_IPV4)
-               return -NF_ACCEPT;
-
        if (!nf_ct_is_confirmed(ct)) {
                unsigned int *timeouts = nf_ct_timeout_lookup(ct);
 
index 1d7e1c5..ec23330 100644 (file)
@@ -982,8 +982,10 @@ int nf_conntrack_tcp_packet(struct nf_conn *ct,
                                        IP_CT_EXP_CHALLENGE_ACK;
                }
                spin_unlock_bh(&ct->lock);
-               nf_ct_l4proto_log_invalid(skb, ct, "invalid packet ignored in "
-                                         "state %s ", tcp_conntrack_names[old_state]);
+               nf_ct_l4proto_log_invalid(skb, ct,
+                                         "packet (index %d) in dir %d ignored, state %s",
+                                         index, dir,
+                                         tcp_conntrack_names[old_state]);
                return NF_ACCEPT;
        case TCP_CONNTRACK_MAX:
                /* Special case for SYN proxy: when the SYN to the server or
index 5fa657b..c77ba86 100644 (file)
@@ -506,7 +506,7 @@ int nf_flow_table_init(struct nf_flowtable *flowtable)
 {
        int err;
 
-       INIT_DEFERRABLE_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
+       INIT_DELAYED_WORK(&flowtable->gc_work, nf_flow_offload_work_gc);
        flow_block_init(&flowtable->flow_block);
        init_rwsem(&flowtable->flow_block_lock);
 
index e87b6bd..4731d21 100644 (file)
@@ -646,8 +646,8 @@ nf_nat_ipv4_fn(void *priv, struct sk_buff *skb,
 }
 
 static unsigned int
-nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
-              const struct nf_hook_state *state)
+nf_nat_ipv4_pre_routing(void *priv, struct sk_buff *skb,
+                       const struct nf_hook_state *state)
 {
        unsigned int ret;
        __be32 daddr = ip_hdr(skb)->daddr;
@@ -660,6 +660,23 @@ nf_nat_ipv4_in(void *priv, struct sk_buff *skb,
 }
 
 static unsigned int
+nf_nat_ipv4_local_in(void *priv, struct sk_buff *skb,
+                    const struct nf_hook_state *state)
+{
+       __be32 saddr = ip_hdr(skb)->saddr;
+       struct sock *sk = skb->sk;
+       unsigned int ret;
+
+       ret = nf_nat_ipv4_fn(priv, skb, state);
+
+       if (ret == NF_ACCEPT && sk && saddr != ip_hdr(skb)->saddr &&
+           !inet_sk_transparent(sk))
+               skb_orphan(skb); /* TCP edemux obtained wrong socket */
+
+       return ret;
+}
+
+static unsigned int
 nf_nat_ipv4_out(void *priv, struct sk_buff *skb,
                const struct nf_hook_state *state)
 {
@@ -736,7 +753,7 @@ nf_nat_ipv4_local_fn(void *priv, struct sk_buff *skb,
 static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
        /* Before packet filtering, change destination */
        {
-               .hook           = nf_nat_ipv4_in,
+               .hook           = nf_nat_ipv4_pre_routing,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_PRE_ROUTING,
                .priority       = NF_IP_PRI_NAT_DST,
@@ -757,7 +774,7 @@ static const struct nf_hook_ops nf_nat_ipv4_ops[] = {
        },
        /* After packet filtering, change source */
        {
-               .hook           = nf_nat_ipv4_fn,
+               .hook           = nf_nat_ipv4_local_in,
                .pf             = NFPROTO_IPV4,
                .hooknum        = NF_INET_LOCAL_IN,
                .priority       = NF_IP_PRI_NAT_SRC,
index c1eb5cd..f57f1a6 100644 (file)
@@ -916,6 +916,12 @@ static int nf_tables_updtable(struct nft_ctx *ctx)
        if (flags == ctx->table->flags)
                return 0;
 
+       if ((nft_table_has_owner(ctx->table) &&
+            !(flags & NFT_TABLE_F_OWNER)) ||
+           (!nft_table_has_owner(ctx->table) &&
+            flags & NFT_TABLE_F_OWNER))
+               return -EOPNOTSUPP;
+
        trans = nft_trans_alloc(ctx, NFT_MSG_NEWTABLE,
                                sizeof(struct nft_trans_table));
        if (trans == NULL)
@@ -6777,6 +6783,9 @@ static int nft_register_flowtable_net_hooks(struct net *net,
 
        list_for_each_entry(hook, hook_list, list) {
                list_for_each_entry(ft, &table->flowtables, list) {
+                       if (!nft_is_active_next(net, ft))
+                               continue;
+
                        list_for_each_entry(hook2, &ft->hook_list, list) {
                                if (hook->ops.dev == hook2->ops.dev &&
                                    hook->ops.pf == hook2->ops.pf) {
@@ -6836,6 +6845,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
        struct nft_hook *hook, *next;
        struct nft_trans *trans;
        bool unregister = false;
+       u32 flags;
        int err;
 
        err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
@@ -6850,6 +6860,17 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
                }
        }
 
+       if (nla[NFTA_FLOWTABLE_FLAGS]) {
+               flags = ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
+               if (flags & ~NFT_FLOWTABLE_MASK)
+                       return -EOPNOTSUPP;
+               if ((flowtable->data.flags & NFT_FLOWTABLE_HW_OFFLOAD) ^
+                   (flags & NFT_FLOWTABLE_HW_OFFLOAD))
+                       return -EOPNOTSUPP;
+       } else {
+               flags = flowtable->data.flags;
+       }
+
        err = nft_register_flowtable_net_hooks(ctx->net, ctx->table,
                                               &flowtable_hook.list, flowtable);
        if (err < 0)
@@ -6863,6 +6884,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
                goto err_flowtable_update_hook;
        }
 
+       nft_trans_flowtable_flags(trans) = flags;
        nft_trans_flowtable(trans) = flowtable;
        nft_trans_flowtable_update(trans) = true;
        INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
@@ -6957,8 +6979,10 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
        if (nla[NFTA_FLOWTABLE_FLAGS]) {
                flowtable->data.flags =
                        ntohl(nla_get_be32(nla[NFTA_FLOWTABLE_FLAGS]));
-               if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK)
+               if (flowtable->data.flags & ~NFT_FLOWTABLE_MASK) {
+                       err = -EOPNOTSUPP;
                        goto err3;
+               }
        }
 
        write_pnet(&flowtable->data.net, net);
@@ -8170,6 +8194,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
                        break;
                case NFT_MSG_NEWFLOWTABLE:
                        if (nft_trans_flowtable_update(trans)) {
+                               nft_trans_flowtable(trans)->data.flags =
+                                       nft_trans_flowtable_flags(trans);
                                nf_tables_flowtable_notify(&trans->ctx,
                                                           nft_trans_flowtable(trans),
                                                           &nft_trans_flowtable_hooks(trans),
@@ -9022,8 +9048,12 @@ static void __nft_release_hooks(struct net *net)
 {
        struct nft_table *table;
 
-       list_for_each_entry(table, &net->nft.tables, list)
+       list_for_each_entry(table, &net->nft.tables, list) {
+               if (nft_table_has_owner(table))
+                       continue;
+
                __nft_release_hook(net, table);
+       }
 }
 
 static void __nft_release_table(struct net *net, struct nft_table *table)
@@ -9073,13 +9103,12 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
        nf_tables_table_destroy(&ctx);
 }
 
-static void __nft_release_tables(struct net *net, u32 nlpid)
+static void __nft_release_tables(struct net *net)
 {
        struct nft_table *table, *nt;
 
        list_for_each_entry_safe(table, nt, &net->nft.tables, list) {
-               if (nft_table_has_owner(table) &&
-                   nlpid != table->nlpid)
+               if (nft_table_has_owner(table))
                        continue;
 
                __nft_release_table(net, table);
@@ -9145,7 +9174,7 @@ static void __net_exit nf_tables_exit_net(struct net *net)
        mutex_lock(&net->nft.commit_mutex);
        if (!list_empty(&net->nft.commit_list))
                __nf_tables_abort(net, NFNL_ABORT_NONE);
-       __nft_release_tables(net, 0);
+       __nft_release_tables(net);
        mutex_unlock(&net->nft.commit_mutex);
        WARN_ON_ONCE(!list_empty(&net->nft.tables));
        WARN_ON_ONCE(!list_empty(&net->nft.module_list));
index acce622..6bd31a7 100644 (file)
@@ -330,6 +330,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
        const struct xt_match *m;
        int have_rev = 0;
 
+       mutex_lock(&xt[af].mutex);
        list_for_each_entry(m, &xt[af].match, list) {
                if (strcmp(m->name, name) == 0) {
                        if (m->revision > *bestp)
@@ -338,6 +339,7 @@ static int match_revfn(u8 af, const char *name, u8 revision, int *bestp)
                                have_rev = 1;
                }
        }
+       mutex_unlock(&xt[af].mutex);
 
        if (af != NFPROTO_UNSPEC && !have_rev)
                return match_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -350,6 +352,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
        const struct xt_target *t;
        int have_rev = 0;
 
+       mutex_lock(&xt[af].mutex);
        list_for_each_entry(t, &xt[af].target, list) {
                if (strcmp(t->name, name) == 0) {
                        if (t->revision > *bestp)
@@ -358,6 +361,7 @@ static int target_revfn(u8 af, const char *name, u8 revision, int *bestp)
                                have_rev = 1;
                }
        }
+       mutex_unlock(&xt[af].mutex);
 
        if (af != NFPROTO_UNSPEC && !have_rev)
                return target_revfn(NFPROTO_UNSPEC, name, revision, bestp);
@@ -371,12 +375,10 @@ int xt_find_revision(u8 af, const char *name, u8 revision, int target,
 {
        int have_rev, best = -1;
 
-       mutex_lock(&xt[af].mutex);
        if (target == 1)
                have_rev = target_revfn(af, name, revision, &best);
        else
                have_rev = match_revfn(af, name, revision, &best);
-       mutex_unlock(&xt[af].mutex);
 
        /* Nothing at all?  Return 0 to try loading module. */
        if (best == -1) {
@@ -1349,14 +1351,6 @@ struct xt_counters *xt_counters_alloc(unsigned int counters)
 }
 EXPORT_SYMBOL(xt_counters_alloc);
 
-struct xt_table_info
-*xt_table_get_private_protected(const struct xt_table *table)
-{
-       return rcu_dereference_protected(table->private,
-                                        mutex_is_locked(&xt[table->af].mutex));
-}
-EXPORT_SYMBOL(xt_table_get_private_protected);
-
 struct xt_table_info *
 xt_replace_table(struct xt_table *table,
              unsigned int num_counters,
@@ -1364,6 +1358,7 @@ xt_replace_table(struct xt_table *table,
              int *error)
 {
        struct xt_table_info *private;
+       unsigned int cpu;
        int ret;
 
        ret = xt_jumpstack_alloc(newinfo);
@@ -1373,20 +1368,47 @@ xt_replace_table(struct xt_table *table,
        }
 
        /* Do the substitution. */
-       private = xt_table_get_private_protected(table);
+       local_bh_disable();
+       private = table->private;
 
        /* Check inside lock: is the old number correct? */
        if (num_counters != private->number) {
                pr_debug("num_counters != table->private->number (%u/%u)\n",
                         num_counters, private->number);
+               local_bh_enable();
                *error = -EAGAIN;
                return NULL;
        }
 
        newinfo->initial_entries = private->initial_entries;
+       /*
+        * Ensure contents of newinfo are visible before assigning to
+        * private.
+        */
+       smp_wmb();
+       table->private = newinfo;
+
+       /* make sure all cpus see new ->private value */
+       smp_mb();
+
+       /*
+        * Even though table entries have now been swapped, other CPU's
+        * may still be using the old entries...
+        */
+       local_bh_enable();
 
-       rcu_assign_pointer(table->private, newinfo);
-       synchronize_rcu();
+       /* ... so wait for even xt_recseq on all cpus */
+       for_each_possible_cpu(cpu) {
+               seqcount_t *s = &per_cpu(xt_recseq, cpu);
+               u32 seq = raw_read_seqcount(s);
+
+               if (seq & 1) {
+                       do {
+                               cond_resched();
+                               cpu_relax();
+                       } while (seq == raw_read_seqcount(s));
+               }
+       }
 
        audit_log_nfcfg(table->name, table->af, private->number,
                        !private->number ? AUDIT_XT_OP_REGISTER :
@@ -1422,12 +1444,12 @@ struct xt_table *xt_register_table(struct net *net,
        }
 
        /* Simplifies replace_table code. */
-       rcu_assign_pointer(table->private, bootstrap);
+       table->private = bootstrap;
 
        if (!xt_replace_table(table, 0, newinfo, &ret))
                goto unlock;
 
-       private = xt_table_get_private_protected(table);
+       private = table->private;
        pr_debug("table->private->number = %u\n", private->number);
 
        /* save number of initial entries */
@@ -1450,8 +1472,7 @@ void *xt_unregister_table(struct xt_table *table)
        struct xt_table_info *private;
 
        mutex_lock(&xt[table->af].mutex);
-       private = xt_table_get_private_protected(table);
-       RCU_INIT_POINTER(table->private, NULL);
+       private = table->private;
        list_del(&table->list);
        mutex_unlock(&xt[table->af].mutex);
        audit_log_nfcfg(table->name, table->af, private->number,
index 726dda9..4f50a64 100644 (file)
@@ -575,6 +575,7 @@ list_start:
 
                break;
        }
+       cipso_v4_doi_putdef(doi_def);
        rcu_read_unlock();
 
        genlmsg_end(ans_skb, data);
@@ -583,12 +584,14 @@ list_start:
 list_retry:
        /* XXX - this limit is a guesstimate */
        if (nlsze_mult < 4) {
+               cipso_v4_doi_putdef(doi_def);
                rcu_read_unlock();
                kfree_skb(ans_skb);
                nlsze_mult *= 2;
                goto list_start;
        }
 list_failure_lock:
+       cipso_v4_doi_putdef(doi_def);
        rcu_read_unlock();
 list_failure:
        kfree_skb(ans_skb);
index 5eddfe7..71cec03 100644 (file)
@@ -271,9 +271,11 @@ static void ovs_ct_update_key(const struct sk_buff *skb,
 /* This is called to initialize CT key fields possibly coming in from the local
  * stack.
  */
-void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key)
+void ovs_ct_fill_key(const struct sk_buff *skb,
+                    struct sw_flow_key *key,
+                    bool post_ct)
 {
-       ovs_ct_update_key(skb, NULL, key, false, false);
+       ovs_ct_update_key(skb, NULL, key, post_ct, false);
 }
 
 int ovs_ct_put_key(const struct sw_flow_key *swkey,
@@ -1332,7 +1334,7 @@ int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key)
        if (skb_nfct(skb)) {
                nf_conntrack_put(skb_nfct(skb));
                nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
-               ovs_ct_fill_key(skb, key);
+               ovs_ct_fill_key(skb, key, false);
        }
 
        return 0;
index 59dc327..317e525 100644 (file)
@@ -25,7 +25,8 @@ int ovs_ct_execute(struct net *, struct sk_buff *, struct sw_flow_key *,
                   const struct ovs_conntrack_info *);
 int ovs_ct_clear(struct sk_buff *skb, struct sw_flow_key *key);
 
-void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key);
+void ovs_ct_fill_key(const struct sk_buff *skb, struct sw_flow_key *key,
+                    bool post_ct);
 int ovs_ct_put_key(const struct sw_flow_key *swkey,
                   const struct sw_flow_key *output, struct sk_buff *skb);
 void ovs_ct_free_action(const struct nlattr *a);
@@ -74,7 +75,8 @@ static inline int ovs_ct_clear(struct sk_buff *skb,
 }
 
 static inline void ovs_ct_fill_key(const struct sk_buff *skb,
-                                  struct sw_flow_key *key)
+                                  struct sw_flow_key *key,
+                                  bool post_ct)
 {
        key->ct_state = 0;
        key->ct_zone = 0;
index c7f34d6..e586424 100644 (file)
@@ -857,6 +857,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
        struct tc_skb_ext *tc_ext;
 #endif
+       bool post_ct = false;
        int res, err;
 
        /* Extract metadata from packet. */
@@ -895,6 +896,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
                tc_ext = skb_ext_find(skb, TC_SKB_EXT);
                key->recirc_id = tc_ext ? tc_ext->chain : 0;
                OVS_CB(skb)->mru = tc_ext ? tc_ext->mru : 0;
+               post_ct = tc_ext ? tc_ext->post_ct : false;
        } else {
                key->recirc_id = 0;
        }
@@ -904,7 +906,7 @@ int ovs_flow_key_extract(const struct ip_tunnel_info *tun_info,
 
        err = key_extract(skb, key);
        if (!err)
-               ovs_ct_fill_key(skb, key);   /* Must be after key_extract(). */
+               ovs_ct_fill_key(skb, key, post_ct);   /* Must be after key_extract(). */
        return err;
 }
 
index 33e238c..482c07f 100644 (file)
@@ -309,10 +309,10 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
        unsigned short tun_proto = ip_tunnel_info_af(tun_info);
        const struct ip_tunnel_key *tun_key = &tun_info->key;
        int tun_opts_len = tun_info->options_len;
-       int sum = 0;
+       int sum = nla_total_size(0);    /* PSAMPLE_ATTR_TUNNEL */
 
        if (tun_key->tun_flags & TUNNEL_KEY)
-               sum += nla_total_size(sizeof(u64));
+               sum += nla_total_size_64bit(sizeof(u64));
 
        if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
                sum += nla_total_size(0);
index b343582..dfc820e 100644 (file)
@@ -439,7 +439,7 @@ int qrtr_endpoint_post(struct qrtr_endpoint *ep, const void *data, size_t len)
        if (len == 0 || len & 3)
                return -EINVAL;
 
-       skb = netdev_alloc_skb(NULL, len);
+       skb = __netdev_alloc_skb(NULL, len, GFP_ATOMIC | __GFP_NOWARN);
        if (!skb)
                return -ENOMEM;
 
@@ -958,8 +958,10 @@ static int qrtr_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
        plen = (len + 3) & ~3;
        skb = sock_alloc_send_skb(sk, plen + QRTR_HDR_MAX_SIZE,
                                  msg->msg_flags & MSG_DONTWAIT, &rc);
-       if (!skb)
+       if (!skb) {
+               rc = -ENOMEM;
                goto out_node;
+       }
 
        skb_reserve(skb, QRTR_HDR_MAX_SIZE);
 
@@ -1056,6 +1058,11 @@ static int qrtr_recvmsg(struct socket *sock, struct msghdr *msg,
        rc = copied;
 
        if (addr) {
+               /* There is an anonymous 2-byte hole after sq_family,
+                * make sure to clear it.
+                */
+               memset(addr, 0, sizeof(*addr));
+
                addr->sq_family = AF_QIPCRTR;
                addr->sq_node = cb->src_node;
                addr->sq_port = cb->src_port;
index b238c40..304b41f 100644 (file)
@@ -31,6 +31,7 @@ static int qrtr_tun_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
 static int qrtr_tun_open(struct inode *inode, struct file *filp)
 {
        struct qrtr_tun *tun;
+       int ret;
 
        tun = kzalloc(sizeof(*tun), GFP_KERNEL);
        if (!tun)
@@ -43,7 +44,16 @@ static int qrtr_tun_open(struct inode *inode, struct file *filp)
 
        filp->private_data = tun;
 
-       return qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+       ret = qrtr_endpoint_register(&tun->ep, QRTR_EP_NID_AUTO);
+       if (ret)
+               goto out;
+
+       return 0;
+
+out:
+       filp->private_data = NULL;
+       kfree(tun);
+       return ret;
 }
 
 static ssize_t qrtr_tun_read_iter(struct kiocb *iocb, struct iov_iter *to)
index f0a0aa1..16e888a 100644 (file)
@@ -945,13 +945,14 @@ static int tcf_ct_act(struct sk_buff *skb, const struct tc_action *a,
        tcf_lastuse_update(&c->tcf_tm);
 
        if (clear) {
+               qdisc_skb_cb(skb)->post_ct = false;
                ct = nf_ct_get(skb, &ctinfo);
                if (ct) {
                        nf_conntrack_put(&ct->ct_general);
                        nf_ct_set(skb, NULL, IP_CT_UNTRACKED);
                }
 
-               goto out;
+               goto out_clear;
        }
 
        family = tcf_ct_skb_nf_family(skb);
@@ -1030,8 +1031,9 @@ out_push:
        skb_push_rcsum(skb, nh_ofs);
 
 out:
-       tcf_action_update_bstats(&c->common, skb);
        qdisc_skb_cb(skb)->post_ct = true;
+out_clear:
+       tcf_action_update_bstats(&c->common, skb);
        if (defrag)
                qdisc_skb_cb(skb)->pkt_len = skb->len;
        return retval;
index e37556c..13341e7 100644 (file)
@@ -1629,6 +1629,7 @@ int tcf_classify_ingress(struct sk_buff *skb,
                        return TC_ACT_SHOT;
                ext->chain = last_executed_chain;
                ext->mru = qdisc_skb_cb(skb)->mru;
+               ext->post_ct = qdisc_skb_cb(skb)->post_ct;
        }
 
        return ret;
index 2409e52..c69a4ba 100644 (file)
@@ -1417,6 +1417,21 @@ static int fl_validate_ct_state(u16 state, struct nlattr *tb,
                return -EINVAL;
        }
 
+       if (state & TCA_FLOWER_KEY_CT_FLAGS_INVALID &&
+           state & ~(TCA_FLOWER_KEY_CT_FLAGS_TRACKED |
+                     TCA_FLOWER_KEY_CT_FLAGS_INVALID)) {
+               NL_SET_ERR_MSG_ATTR(extack, tb,
+                                   "when inv is set, only trk may be set");
+               return -EINVAL;
+       }
+
+       if (state & TCA_FLOWER_KEY_CT_FLAGS_NEW &&
+           state & TCA_FLOWER_KEY_CT_FLAGS_REPLY) {
+               NL_SET_ERR_MSG_ATTR(extack, tb,
+                                   "new and rpl are mutually exclusive");
+               return -EINVAL;
+       }
+
        return 0;
 }
 
@@ -1436,7 +1451,7 @@ static int fl_set_key_ct(struct nlattr **tb,
                               &mask->ct_state, TCA_FLOWER_KEY_CT_STATE_MASK,
                               sizeof(key->ct_state));
 
-               err = fl_validate_ct_state(mask->ct_state,
+               err = fl_validate_ct_state(key->ct_state & mask->ct_state,
                                           tb[TCA_FLOWER_KEY_CT_STATE_MASK],
                                           extack);
                if (err)
index e2e4353..f87d077 100644 (file)
@@ -2168,7 +2168,7 @@ static int tc_dump_tclass_qdisc(struct Qdisc *q, struct sk_buff *skb,
 
 static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
                               struct tcmsg *tcm, struct netlink_callback *cb,
-                              int *t_p, int s_t)
+                              int *t_p, int s_t, bool recur)
 {
        struct Qdisc *q;
        int b;
@@ -2179,7 +2179,7 @@ static int tc_dump_tclass_root(struct Qdisc *root, struct sk_buff *skb,
        if (tc_dump_tclass_qdisc(root, skb, tcm, cb, t_p, s_t) < 0)
                return -1;
 
-       if (!qdisc_dev(root))
+       if (!qdisc_dev(root) || !recur)
                return 0;
 
        if (tcm->tcm_parent) {
@@ -2214,13 +2214,13 @@ static int tc_dump_tclass(struct sk_buff *skb, struct netlink_callback *cb)
        s_t = cb->args[0];
        t = 0;
 
-       if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t) < 0)
+       if (tc_dump_tclass_root(dev->qdisc, skb, tcm, cb, &t, s_t, true) < 0)
                goto done;
 
        dev_queue = dev_ingress_queue(dev);
        if (dev_queue &&
            tc_dump_tclass_root(dev_queue->qdisc_sleeping, skb, tcm, cb,
-                               &t, s_t) < 0)
+                               &t, s_t, false) < 0)
                goto done;
 
 done:
index 50f680f..2adbd94 100644 (file)
@@ -345,6 +345,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
        struct sk_buff **old = NULL;
        unsigned int mask;
        u32 max_P;
+       u8 *stab;
 
        if (opt == NULL)
                return -EINVAL;
@@ -361,8 +362,8 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
        max_P = tb[TCA_CHOKE_MAX_P] ? nla_get_u32(tb[TCA_CHOKE_MAX_P]) : 0;
 
        ctl = nla_data(tb[TCA_CHOKE_PARMS]);
-
-       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
+       stab = nla_data(tb[TCA_CHOKE_STAB]);
+       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab))
                return -EINVAL;
 
        if (ctl->limit > CHOKE_MAX_QUEUE)
@@ -412,7 +413,7 @@ static int choke_change(struct Qdisc *sch, struct nlattr *opt,
 
        red_set_parms(&q->parms, ctl->qth_min, ctl->qth_max, ctl->Wlog,
                      ctl->Plog, ctl->Scell_log,
-                     nla_data(tb[TCA_CHOKE_STAB]),
+                     stab,
                      max_P);
        red_set_vars(&q->vars);
 
index e0bc775..f4132dc 100644 (file)
@@ -480,7 +480,7 @@ static inline int gred_change_vq(struct Qdisc *sch, int dp,
        struct gred_sched *table = qdisc_priv(sch);
        struct gred_sched_data *q = table->tab[dp];
 
-       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log)) {
+       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log, stab)) {
                NL_SET_ERR_MSG_MOD(extack, "invalid RED parameters");
                return -EINVAL;
        }
index dff3adf..62e12cb 100644 (file)
@@ -1020,6 +1020,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        struct nlattr *tb[TCA_HTB_MAX + 1];
        struct tc_htb_glob *gopt;
        unsigned int ntx;
+       bool offload;
        int err;
 
        qdisc_watchdog_init(&q->watchdog, sch);
@@ -1044,9 +1045,9 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        if (gopt->version != HTB_VER >> 16)
                return -EINVAL;
 
-       q->offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
+       offload = nla_get_flag(tb[TCA_HTB_OFFLOAD]);
 
-       if (q->offload) {
+       if (offload) {
                if (sch->parent != TC_H_ROOT)
                        return -EOPNOTSUPP;
 
@@ -1076,7 +1077,7 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
                q->rate2quantum = 1;
        q->defcls = gopt->defcls;
 
-       if (!q->offload)
+       if (!offload)
                return 0;
 
        for (ntx = 0; ntx < q->num_direct_qdiscs; ntx++) {
@@ -1107,12 +1108,14 @@ static int htb_init(struct Qdisc *sch, struct nlattr *opt,
        if (err)
                goto err_free_qdiscs;
 
+       /* Defer this assignment, so that htb_destroy skips offload-related
+        * parts (especially calling ndo_setup_tc) on errors.
+        */
+       q->offload = true;
+
        return 0;
 
 err_free_qdiscs:
-       /* TC_HTB_CREATE call failed, avoid any further calls to the driver. */
-       q->offload = false;
-
        for (ntx = 0; ntx < q->num_direct_qdiscs && q->direct_qdiscs[ntx];
             ntx++)
                qdisc_put(q->direct_qdiscs[ntx]);
@@ -1340,8 +1343,12 @@ htb_select_queue(struct Qdisc *sch, struct tcmsg *tcm)
 {
        struct net_device *dev = qdisc_dev(sch);
        struct tc_htb_qopt_offload offload_opt;
+       struct htb_sched *q = qdisc_priv(sch);
        int err;
 
+       if (!q->offload)
+               return sch->dev_queue;
+
        offload_opt = (struct tc_htb_qopt_offload) {
                .command = TC_HTB_LEAF_QUERY_QUEUE,
                .classid = TC_H_MIN(tcm->tcm_parent),
index b4ae34d..40adf1f 100644 (file)
@@ -242,6 +242,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
        unsigned char flags;
        int err;
        u32 max_P;
+       u8 *stab;
 
        if (tb[TCA_RED_PARMS] == NULL ||
            tb[TCA_RED_STAB] == NULL)
@@ -250,7 +251,9 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
        max_P = tb[TCA_RED_MAX_P] ? nla_get_u32(tb[TCA_RED_MAX_P]) : 0;
 
        ctl = nla_data(tb[TCA_RED_PARMS]);
-       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog, ctl->Scell_log))
+       stab = nla_data(tb[TCA_RED_STAB]);
+       if (!red_check_params(ctl->qth_min, ctl->qth_max, ctl->Wlog,
+                             ctl->Scell_log, stab))
                return -EINVAL;
 
        err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
@@ -288,7 +291,7 @@ static int __red_change(struct Qdisc *sch, struct nlattr **tb,
        red_set_parms(&q->parms,
                      ctl->qth_min, ctl->qth_max, ctl->Wlog,
                      ctl->Plog, ctl->Scell_log,
-                     nla_data(tb[TCA_RED_STAB]),
+                     stab,
                      max_P);
        red_set_vars(&q->vars);
 
index b25e514..066754a 100644 (file)
@@ -647,7 +647,7 @@ static int sfq_change(struct Qdisc *sch, struct nlattr *opt)
        }
 
        if (ctl_v1 && !red_check_params(ctl_v1->qth_min, ctl_v1->qth_max,
-                                       ctl_v1->Wlog, ctl_v1->Scell_log))
+                                       ctl_v1->Wlog, ctl_v1->Scell_log, NULL))
                return -EINVAL;
        if (ctl_v1 && ctl_v1->qth_min) {
                p = kmalloc(sizeof(*p), GFP_KERNEL);
index 6614c9f..a6aa17d 100644 (file)
@@ -584,13 +584,6 @@ int sctp_packet_transmit(struct sctp_packet *packet, gfp_t gfp)
                goto out;
        }
 
-       rcu_read_lock();
-       if (__sk_dst_get(sk) != tp->dst) {
-               dst_hold(tp->dst);
-               sk_setup_caps(sk, tp->dst);
-       }
-       rcu_read_unlock();
-
        /* pack up chunks */
        pkt_count = sctp_packet_pack(packet, head, gso, gfp);
        if (!pkt_count) {
index 3fd06a2..5cb1aa5 100644 (file)
@@ -1135,6 +1135,7 @@ static void sctp_outq_flush_data(struct sctp_flush_ctx *ctx,
 
 static void sctp_outq_flush_transports(struct sctp_flush_ctx *ctx)
 {
+       struct sock *sk = ctx->asoc->base.sk;
        struct list_head *ltransport;
        struct sctp_packet *packet;
        struct sctp_transport *t;
@@ -1144,6 +1145,12 @@ static void sctp_outq_flush_transports(struct sctp_flush_ctx *ctx)
                t = list_entry(ltransport, struct sctp_transport, send_ready);
                packet = &t->packet;
                if (!sctp_packet_empty(packet)) {
+                       rcu_read_lock();
+                       if (t->dst && __sk_dst_get(sk) != t->dst) {
+                               dst_hold(t->dst);
+                               sk_setup_caps(sk, t->dst);
+                       }
+                       rcu_read_unlock();
                        error = sctp_packet_transmit(packet, ctx->gfp);
                        if (error < 0)
                                ctx->q->asoc->base.sk->sk_err = -error;
index a9c6af5..5ba4567 100644 (file)
@@ -75,7 +75,7 @@ int sctp_tsnmap_check(const struct sctp_tsnmap *map, __u32 tsn)
                return 1;
 
        /* Verify that we can hold this TSN and that it will not
-        * overlfow our map
+        * overflow our map
         */
        if (!TSN_lt(tsn, map->base_tsn + SCTP_TSN_MAP_SIZE))
                return -1;
index 7f0617a..84a8049 100644 (file)
@@ -334,6 +334,7 @@ static const struct xattr_handler sockfs_xattr_handler = {
 };
 
 static int sockfs_security_xattr_set(const struct xattr_handler *handler,
+                                    struct user_namespace *mnt_userns,
                                     struct dentry *dentry, struct inode *inode,
                                     const char *suffix, const void *value,
                                     size_t size, int flags)
@@ -537,9 +538,10 @@ static ssize_t sockfs_listxattr(struct dentry *dentry, char *buffer,
        return used;
 }
 
-static int sockfs_setattr(struct dentry *dentry, struct iattr *iattr)
+static int sockfs_setattr(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, struct iattr *iattr)
 {
-       int err = simple_setattr(dentry, iattr);
+       int err = simple_setattr(&init_user_ns, dentry, iattr);
 
        if (!err && (iattr->ia_valid & ATTR_UID)) {
                struct socket *sock = SOCKET_I(d_inode(dentry));
@@ -2411,10 +2413,6 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 long __sys_sendmsg_sock(struct socket *sock, struct msghdr *msg,
                        unsigned int flags)
 {
-       /* disallow ancillary data requests from this path */
-       if (msg->msg_control || msg->msg_controllen)
-               return -EINVAL;
-
        return ____sys_sendmsg(sock, msg, flags, NULL, 0);
 }
 
@@ -2623,12 +2621,6 @@ long __sys_recvmsg_sock(struct socket *sock, struct msghdr *msg,
                        struct user_msghdr __user *umsg,
                        struct sockaddr __user *uaddr, unsigned int flags)
 {
-       if (msg->msg_control || msg->msg_controllen) {
-               /* disallow ancillary data reqs unless cmsg is plain data */
-               if (!(sock->ops->flags & PROTO_CMSG_DATA_ONLY))
-                       return -EINVAL;
-       }
-
        return ____sys_recvmsg(sock, msg, umsg, uaddr, flags, 0);
 }
 
index bd4678d..6dff643 100644 (file)
@@ -1825,11 +1825,14 @@ static int
 svcauth_gss_release(struct svc_rqst *rqstp)
 {
        struct gss_svc_data *gsd = (struct gss_svc_data *)rqstp->rq_auth_data;
-       struct rpc_gss_wire_cred *gc = &gsd->clcred;
+       struct rpc_gss_wire_cred *gc;
        struct xdr_buf *resbuf = &rqstp->rq_res;
        int stat = -EINVAL;
        struct sunrpc_net *sn = net_generic(SVC_NET(rqstp), sunrpc_net_id);
 
+       if (!gsd)
+               goto out;
+       gc = &gsd->clcred;
        if (gc->gc_proc != RPC_GSS_PROC_DATA)
                goto out;
        /* Release can be called twice, but we only wrap once. */
@@ -1870,10 +1873,10 @@ out_err:
        if (rqstp->rq_cred.cr_group_info)
                put_group_info(rqstp->rq_cred.cr_group_info);
        rqstp->rq_cred.cr_group_info = NULL;
-       if (gsd->rsci)
+       if (gsd && gsd->rsci) {
                cache_put(&gsd->rsci->h, sn->rsc_cache);
-       gsd->rsci = NULL;
-
+               gsd->rsci = NULL;
+       }
        return stat;
 }
 
index 8241f5a..09c000d 100644 (file)
@@ -478,6 +478,7 @@ rpc_get_inode(struct super_block *sb, umode_t mode)
                inode->i_fop = &simple_dir_operations;
                inode->i_op = &simple_dir_inode_operations;
                inc_nlink(inode);
+               break;
        default:
                break;
        }
index cf702a5..39ed0e0 100644 (file)
@@ -963,8 +963,11 @@ void rpc_execute(struct rpc_task *task)
 
        rpc_set_active(task);
        rpc_make_runnable(rpciod_workqueue, task);
-       if (!is_async)
+       if (!is_async) {
+               unsigned int pflags = memalloc_nofs_save();
                __rpc_execute(task);
+               memalloc_nofs_restore(pflags);
+       }
 }
 
 static void rpc_async_schedule(struct work_struct *work)
index 61fb8a1..d76dc9d 100644 (file)
@@ -1413,7 +1413,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 
  sendit:
        if (svc_authorise(rqstp))
-               goto close;
+               goto close_xprt;
        return 1;               /* Caller can now send it */
 
 release_dropit:
@@ -1425,6 +1425,8 @@ release_dropit:
        return 0;
 
  close:
+       svc_authorise(rqstp);
+close_xprt:
        if (rqstp->rq_xprt && test_bit(XPT_TEMP, &rqstp->rq_xprt->xpt_flags))
                svc_close_xprt(rqstp->rq_xprt);
        dprintk("svc: svc_process close\n");
@@ -1433,7 +1435,7 @@ release_dropit:
 err_short_len:
        svc_printk(rqstp, "short len %zd, dropping request\n",
                        argv->iov_len);
-       goto close;
+       goto close_xprt;
 
 err_bad_rpc:
        serv->sv_stats->rpcbadfmt++;
index dcc50ae..3cdd71a 100644 (file)
@@ -1060,7 +1060,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
        struct svc_xprt *xprt;
        int ret = 0;
 
-       spin_lock(&serv->sv_lock);
+       spin_lock_bh(&serv->sv_lock);
        list_for_each_entry(xprt, xprt_list, xpt_list) {
                if (xprt->xpt_net != net)
                        continue;
@@ -1068,7 +1068,7 @@ static int svc_close_list(struct svc_serv *serv, struct list_head *xprt_list, st
                set_bit(XPT_CLOSE, &xprt->xpt_flags);
                svc_xprt_enqueue(xprt);
        }
-       spin_unlock(&serv->sv_lock);
+       spin_unlock_bh(&serv->sv_lock);
        return ret;
 }
 
index 946edf2..a249837 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * Copyright (c) 2015-2020, Oracle and/or its affiliates.
  *
- * Support for backward direction RPCs on RPC/RDMA.
+ * Support for reverse-direction RPCs on RPC/RDMA.
  */
 
 #include <linux/sunrpc/xprt.h>
@@ -208,7 +208,7 @@ create_req:
 }
 
 /**
- * rpcrdma_bc_receive_call - Handle a backward direction call
+ * rpcrdma_bc_receive_call - Handle a reverse-direction Call
  * @r_xprt: transport receiving the call
  * @rep: receive buffer containing the call
  *
index baca49f..766a104 100644 (file)
@@ -306,20 +306,14 @@ struct rpcrdma_mr_seg *frwr_map(struct rpcrdma_xprt *r_xprt,
        if (nsegs > ep->re_max_fr_depth)
                nsegs = ep->re_max_fr_depth;
        for (i = 0; i < nsegs;) {
-               if (seg->mr_page)
-                       sg_set_page(&mr->mr_sg[i],
-                                   seg->mr_page,
-                                   seg->mr_len,
-                                   offset_in_page(seg->mr_offset));
-               else
-                       sg_set_buf(&mr->mr_sg[i], seg->mr_offset,
-                                  seg->mr_len);
+               sg_set_page(&mr->mr_sg[i], seg->mr_page,
+                           seg->mr_len, seg->mr_offset);
 
                ++seg;
                ++i;
                if (ep->re_mrtype == IB_MR_TYPE_SG_GAPS)
                        continue;
-               if ((i < nsegs && offset_in_page(seg->mr_offset)) ||
+               if ((i < nsegs && seg->mr_offset) ||
                    offset_in_page((seg-1)->mr_offset + (seg-1)->mr_len))
                        break;
        }
index 8f5d0cb..292f066 100644 (file)
@@ -204,9 +204,7 @@ rpcrdma_alloc_sparse_pages(struct xdr_buf *buf)
        return 0;
 }
 
-/* Split @vec on page boundaries into SGEs. FMR registers pages, not
- * a byte range. Other modes coalesce these SGEs into a single MR
- * when they can.
+/* Convert @vec to a single SGL element.
  *
  * Returns pointer to next available SGE, and bumps the total number
  * of SGEs consumed.
@@ -215,22 +213,11 @@ static struct rpcrdma_mr_seg *
 rpcrdma_convert_kvec(struct kvec *vec, struct rpcrdma_mr_seg *seg,
                     unsigned int *n)
 {
-       u32 remaining, page_offset;
-       char *base;
-
-       base = vec->iov_base;
-       page_offset = offset_in_page(base);
-       remaining = vec->iov_len;
-       while (remaining) {
-               seg->mr_page = NULL;
-               seg->mr_offset = base;
-               seg->mr_len = min_t(u32, PAGE_SIZE - page_offset, remaining);
-               remaining -= seg->mr_len;
-               base += seg->mr_len;
-               ++seg;
-               ++(*n);
-               page_offset = 0;
-       }
+       seg->mr_page = virt_to_page(vec->iov_base);
+       seg->mr_offset = offset_in_page(vec->iov_base);
+       seg->mr_len = vec->iov_len;
+       ++seg;
+       ++(*n);
        return seg;
 }
 
@@ -259,7 +246,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
        page_base = offset_in_page(xdrbuf->page_base);
        while (len) {
                seg->mr_page = *ppages;
-               seg->mr_offset = (char *)page_base;
+               seg->mr_offset = page_base;
                seg->mr_len = min_t(u32, PAGE_SIZE - page_base, len);
                len -= seg->mr_len;
                ++ppages;
@@ -268,10 +255,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
                page_base = 0;
        }
 
-       /* When encoding a Read chunk, the tail iovec contains an
-        * XDR pad and may be omitted.
-        */
-       if (type == rpcrdma_readch && r_xprt->rx_ep->re_implicit_roundup)
+       if (type == rpcrdma_readch)
                goto out;
 
        /* When encoding a Write chunk, some servers need to see an
@@ -283,7 +267,7 @@ rpcrdma_convert_iovs(struct rpcrdma_xprt *r_xprt, struct xdr_buf *xdrbuf,
                goto out;
 
        if (xdrbuf->tail[0].iov_len)
-               seg = rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
+               rpcrdma_convert_kvec(&xdrbuf->tail[0], seg, &n);
 
 out:
        if (unlikely(n > RPCRDMA_MAX_SEGS))
@@ -644,9 +628,8 @@ out_mapping_err:
        return false;
 }
 
-/* The tail iovec may include an XDR pad for the page list,
- * as well as additional content, and may not reside in the
- * same page as the head iovec.
+/* The tail iovec might not reside in the same page as the
+ * head iovec.
  */
 static bool rpcrdma_prepare_tail_iov(struct rpcrdma_req *req,
                                     struct xdr_buf *xdr,
@@ -764,27 +747,19 @@ static bool rpcrdma_prepare_readch(struct rpcrdma_xprt *r_xprt,
                                   struct rpcrdma_req *req,
                                   struct xdr_buf *xdr)
 {
+       struct kvec *tail = &xdr->tail[0];
+
        if (!rpcrdma_prepare_head_iov(r_xprt, req, xdr->head[0].iov_len))
                return false;
 
-       /* If there is a Read chunk, the page list is being handled
+       /* If there is a Read chunk, the page list is handled
         * via explicit RDMA, and thus is skipped here.
         */
 
-       /* Do not include the tail if it is only an XDR pad */
-       if (xdr->tail[0].iov_len > 3) {
-               unsigned int page_base, len;
-
-               /* If the content in the page list is an odd length,
-                * xdr_write_pages() adds a pad at the beginning of
-                * the tail iovec. Force the tail's non-pad content to
-                * land at the next XDR position in the Send message.
-                */
-               page_base = offset_in_page(xdr->tail[0].iov_base);
-               len = xdr->tail[0].iov_len;
-               page_base += len & 3;
-               len -= len & 3;
-               if (!rpcrdma_prepare_tail_iov(req, xdr, page_base, len))
+       if (tail->iov_len) {
+               if (!rpcrdma_prepare_tail_iov(req, xdr,
+                                             offset_in_page(tail->iov_base),
+                                             tail->iov_len))
                        return false;
                kref_get(&req->rl_kref);
        }
@@ -1164,14 +1139,10 @@ rpcrdma_is_bcall(struct rpcrdma_xprt *r_xprt, struct rpcrdma_rep *rep)
         */
        p = xdr_inline_decode(xdr, 3 * sizeof(*p));
        if (unlikely(!p))
-               goto out_short;
+               return true;
 
        rpcrdma_bc_receive_call(r_xprt, rep);
        return true;
-
-out_short:
-       pr_warn("RPC/RDMA short backward direction call\n");
-       return true;
 }
 #else  /* CONFIG_SUNRPC_BACKCHANNEL */
 {
index 63f8be9..9150df3 100644 (file)
@@ -2,7 +2,7 @@
 /*
  * Copyright (c) 2015-2018 Oracle.  All rights reserved.
  *
- * Support for backward direction RPCs on RPC/RDMA (server-side).
+ * Support for reverse-direction RPCs on RPC/RDMA (server-side).
  */
 
 #include <linux/sunrpc/svc_rdma.h>
@@ -59,7 +59,7 @@ out_unlock:
        spin_unlock(&xprt->queue_lock);
 }
 
-/* Send a backwards direction RPC call.
+/* Send a reverse-direction RPC Call.
  *
  * Caller holds the connection's mutex and has already marshaled
  * the RPC/RDMA request.
@@ -252,9 +252,9 @@ xprt_setup_rdma_bc(struct xprt_create *args)
        xprt->timeout = &xprt_rdma_bc_timeout;
        xprt_set_bound(xprt);
        xprt_set_connected(xprt);
-       xprt->bind_timeout = RPCRDMA_BIND_TO;
-       xprt->reestablish_timeout = RPCRDMA_INIT_REEST_TO;
-       xprt->idle_timeout = RPCRDMA_IDLE_DISC_TO;
+       xprt->bind_timeout = 0;
+       xprt->reestablish_timeout = 0;
+       xprt->idle_timeout = 0;
 
        xprt->prot = XPRT_TRANSPORT_BC_RDMA;
        xprt->ops = &xprt_rdma_bc_procs;
index 6d28f23..7d34290 100644 (file)
@@ -266,46 +266,33 @@ void svc_rdma_release_rqst(struct svc_rqst *rqstp)
                svc_rdma_recv_ctxt_put(rdma, ctxt);
 }
 
-static bool svc_rdma_refresh_recvs(struct svcxprt_rdma *rdma,
-                                  unsigned int wanted, bool temp)
+static int __svc_rdma_post_recv(struct svcxprt_rdma *rdma,
+                               struct svc_rdma_recv_ctxt *ctxt)
 {
-       const struct ib_recv_wr *bad_wr = NULL;
-       struct svc_rdma_recv_ctxt *ctxt;
-       struct ib_recv_wr *recv_chain;
        int ret;
 
-       recv_chain = NULL;
-       while (wanted--) {
-               ctxt = svc_rdma_recv_ctxt_get(rdma);
-               if (!ctxt)
-                       break;
-
-               trace_svcrdma_post_recv(ctxt);
-               ctxt->rc_temp = temp;
-               ctxt->rc_recv_wr.next = recv_chain;
-               recv_chain = &ctxt->rc_recv_wr;
-               rdma->sc_pending_recvs++;
-       }
-       if (!recv_chain)
-               return false;
-
-       ret = ib_post_recv(rdma->sc_qp, recv_chain, &bad_wr);
+       trace_svcrdma_post_recv(ctxt);
+       ret = ib_post_recv(rdma->sc_qp, &ctxt->rc_recv_wr, NULL);
        if (ret)
                goto err_post;
-       return true;
+       return 0;
 
 err_post:
-       while (bad_wr) {
-               ctxt = container_of(bad_wr, struct svc_rdma_recv_ctxt,
-                                   rc_recv_wr);
-               bad_wr = bad_wr->next;
-               svc_rdma_recv_ctxt_put(rdma, ctxt);
-       }
-
        trace_svcrdma_rq_post_err(rdma, ret);
-       /* Since we're destroying the xprt, no need to reset
-        * sc_pending_recvs. */
-       return false;
+       svc_rdma_recv_ctxt_put(rdma, ctxt);
+       return ret;
+}
+
+static int svc_rdma_post_recv(struct svcxprt_rdma *rdma)
+{
+       struct svc_rdma_recv_ctxt *ctxt;
+
+       if (test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags))
+               return 0;
+       ctxt = svc_rdma_recv_ctxt_get(rdma);
+       if (!ctxt)
+               return -ENOMEM;
+       return __svc_rdma_post_recv(rdma, ctxt);
 }
 
 /**
@@ -316,7 +303,20 @@ err_post:
  */
 bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
 {
-       return svc_rdma_refresh_recvs(rdma, rdma->sc_max_requests, true);
+       struct svc_rdma_recv_ctxt *ctxt;
+       unsigned int i;
+       int ret;
+
+       for (i = 0; i < rdma->sc_max_requests; i++) {
+               ctxt = svc_rdma_recv_ctxt_get(rdma);
+               if (!ctxt)
+                       return false;
+               ctxt->rc_temp = true;
+               ret = __svc_rdma_post_recv(rdma, ctxt);
+               if (ret)
+                       return false;
+       }
+       return true;
 }
 
 /**
@@ -324,6 +324,8 @@ bool svc_rdma_post_recvs(struct svcxprt_rdma *rdma)
  * @cq: Completion Queue context
  * @wc: Work Completion object
  *
+ * NB: The svc_xprt/svcxprt_rdma is pinned whenever it's possible that
+ * the Receive completion handler could be running.
  */
 static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
 {
@@ -331,8 +333,6 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        struct ib_cqe *cqe = wc->wr_cqe;
        struct svc_rdma_recv_ctxt *ctxt;
 
-       rdma->sc_pending_recvs--;
-
        /* WARNING: Only wc->wr_cqe and wc->status are reliable */
        ctxt = container_of(cqe, struct svc_rdma_recv_ctxt, rc_cqe);
 
@@ -340,6 +340,9 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        if (wc->status != IB_WC_SUCCESS)
                goto flushed;
 
+       if (svc_rdma_post_recv(rdma))
+               goto post_err;
+
        /* All wc fields are now known to be valid */
        ctxt->rc_byte_len = wc->byte_len;
 
@@ -350,18 +353,11 @@ static void svc_rdma_wc_receive(struct ib_cq *cq, struct ib_wc *wc)
        spin_unlock(&rdma->sc_rq_dto_lock);
        if (!test_bit(RDMAXPRT_CONN_PENDING, &rdma->sc_flags))
                svc_xprt_enqueue(&rdma->sc_xprt);
-
-       if (!test_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags) &&
-           rdma->sc_pending_recvs < rdma->sc_max_requests)
-               if (!svc_rdma_refresh_recvs(rdma, RPCRDMA_MAX_RECV_BATCH,
-                                           false))
-                       goto post_err;
-
        return;
 
 flushed:
-       svc_rdma_recv_ctxt_put(rdma, ctxt);
 post_err:
+       svc_rdma_recv_ctxt_put(rdma, ctxt);
        set_bit(XPT_CLOSE, &rdma->sc_xprt.xpt_flags);
        svc_xprt_enqueue(&rdma->sc_xprt);
 }
index 94b2865..fe3be98 100644 (file)
@@ -98,9 +98,9 @@ struct rpcrdma_ep {
        atomic_t                re_completion_ids;
 };
 
-/* Pre-allocate extra Work Requests for handling backward receives
- * and sends. This is a fixed value because the Work Queues are
- * allocated when the forward channel is set up, long before the
+/* Pre-allocate extra Work Requests for handling reverse-direction
+ * Receives and Sends. This is a fixed value because the Work Queues
+ * are allocated when the forward channel is set up, long before the
  * backchannel is provisioned. This value is two times
  * NFS4_DEF_CB_SLOT_TABLE_SIZE.
  */
@@ -283,10 +283,11 @@ enum {
                                  RPCRDMA_MAX_IOV_SEGS,
 };
 
-struct rpcrdma_mr_seg {                /* chunk descriptors */
-       u32             mr_len;         /* length of chunk or segment */
-       struct page     *mr_page;       /* owning page, if any */
-       char            *mr_offset;     /* kva if no page, else offset */
+/* Arguments for DMA mapping and registration */
+struct rpcrdma_mr_seg {
+       u32             mr_len;         /* length of segment */
+       struct page     *mr_page;       /* underlying struct page */
+       u64             mr_offset;      /* IN: page offset, OUT: iova */
 };
 
 /* The Send SGE array is provisioned to send a maximum size
index c56a66c..e35760f 100644 (file)
@@ -829,7 +829,7 @@ xs_stream_record_marker(struct xdr_buf *xdr)
  *   EAGAIN:   The socket was blocked, please call again later to
  *             complete the request
  * ENOTCONN:   Caller needs to invoke connect logic then call again
- *    other:   Some other error occured, the request was not sent
+ *    other:   Some other error occurred, the request was not sent
  */
 static int xs_local_send_request(struct rpc_rqst *req)
 {
@@ -1665,7 +1665,7 @@ static int xs_bind(struct sock_xprt *transport, struct socket *sock)
         * This ensures that we can continue to establish TCP
         * connections even when all local ephemeral ports are already
         * a part of some TCP connection.  This makes no difference
-        * for UDP sockets, but also doens't harm them.
+        * for UDP sockets, but also doesn't harm them.
         *
         * If we're asking for any reserved port (i.e. port == 0 &&
         * transport->xprt.resvport == 1) xs_get_srcport above will
@@ -1875,6 +1875,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
                xprt->stat.connect_time += (long)jiffies -
                                           xprt->stat.connect_start;
                xprt_set_connected(xprt);
+               break;
        case -ENOBUFS:
                break;
        case -ENOENT:
@@ -2276,10 +2277,8 @@ static void xs_tcp_setup_socket(struct work_struct *work)
        case -EHOSTUNREACH:
        case -EADDRINUSE:
        case -ENOBUFS:
-               /*
-                * xs_tcp_force_close() wakes tasks with -EIO.
-                * We need to wake them first to ensure the
-                * correct error code.
+               /* xs_tcp_force_close() wakes tasks with a fixed error code.
+                * We need to wake them first to ensure the correct error code.
                 */
                xprt_wake_pending_tasks(xprt, status);
                xs_tcp_force_close(xprt);
@@ -2380,7 +2379,7 @@ static void xs_error_handle(struct work_struct *work)
 }
 
 /**
- * xs_local_print_stats - display AF_LOCAL socket-specifc stats
+ * xs_local_print_stats - display AF_LOCAL socket-specific stats
  * @xprt: rpc_xprt struct containing statistics
  * @seq: output file
  *
@@ -2409,7 +2408,7 @@ static void xs_local_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 }
 
 /**
- * xs_udp_print_stats - display UDP socket-specifc stats
+ * xs_udp_print_stats - display UDP socket-specific stats
  * @xprt: rpc_xprt struct containing statistics
  * @seq: output file
  *
@@ -2433,7 +2432,7 @@ static void xs_udp_print_stats(struct rpc_xprt *xprt, struct seq_file *seq)
 }
 
 /**
- * xs_tcp_print_stats - display TCP socket-specifc stats
+ * xs_tcp_print_stats - display TCP socket-specific stats
  * @xprt: rpc_xprt struct containing statistics
  * @seq: output file
  *
index 008670d..136338b 100644 (file)
@@ -2895,17 +2895,22 @@ int tipc_nl_node_dump_monitor_peer(struct sk_buff *skb,
 
 #ifdef CONFIG_TIPC_CRYPTO
 static int tipc_nl_retrieve_key(struct nlattr **attrs,
-                               struct tipc_aead_key **key)
+                               struct tipc_aead_key **pkey)
 {
        struct nlattr *attr = attrs[TIPC_NLA_NODE_KEY];
+       struct tipc_aead_key *key;
 
        if (!attr)
                return -ENODATA;
 
-       *key = (struct tipc_aead_key *)nla_data(attr);
-       if (nla_len(attr) < tipc_aead_key_size(*key))
+       if (nla_len(attr) < sizeof(*key))
+               return -EINVAL;
+       key = (struct tipc_aead_key *)nla_data(attr);
+       if (key->keylen > TIPC_AEAD_KEYLEN_MAX ||
+           nla_len(attr) < tipc_aead_key_size(key))
                return -EINVAL;
 
+       *pkey = key;
        return 0;
 }
 
index 41c3303..5a31307 100644 (file)
@@ -936,7 +936,7 @@ static struct sock *unix_find_other(struct net *net,
                if (err)
                        goto fail;
                inode = d_backing_inode(path.dentry);
-               err = inode_permission(inode, MAY_WRITE);
+               err = path_permission(&path, MAY_WRITE);
                if (err)
                        goto put_fail;
 
@@ -996,7 +996,8 @@ static int unix_mknod(const char *sun_path, umode_t mode, struct path *res)
         */
        err = security_path_mknod(&path, dentry, mode, 0);
        if (!err) {
-               err = vfs_mknod(d_inode(path.dentry), dentry, mode, 0);
+               err = vfs_mknod(mnt_user_ns(path.mnt), d_inode(path.dentry),
+                               dentry, mode, 0);
                if (!err) {
                        res->mnt = mntget(path.mnt);
                        res->dentry = dget(dentry);
index 5546710..bc7fb9b 100644 (file)
@@ -755,6 +755,7 @@ static struct sock *__vsock_create(struct net *net,
                vsk->buffer_size = psk->buffer_size;
                vsk->buffer_min_size = psk->buffer_min_size;
                vsk->buffer_max_size = psk->buffer_max_size;
+               security_sk_clone(parent, sk);
        } else {
                vsk->trusted = ns_capable_noaudit(&init_user_ns, CAP_NET_ADMIN);
                vsk->owner = get_current_cred();
index 521d36b..034af85 100644 (file)
@@ -70,7 +70,7 @@ __cfg80211_wdev_from_attrs(struct cfg80211_registered_device *rdev,
        struct wireless_dev *result = NULL;
        bool have_ifidx = attrs[NL80211_ATTR_IFINDEX];
        bool have_wdev_id = attrs[NL80211_ATTR_WDEV];
-       u64 wdev_id;
+       u64 wdev_id = 0;
        int wiphy_idx = -1;
        int ifidx = -1;
 
@@ -14789,6 +14789,7 @@ bad_tid_conf:
 #define NL80211_FLAG_NEED_WDEV_UP      (NL80211_FLAG_NEED_WDEV |\
                                         NL80211_FLAG_CHECK_NETDEV_UP)
 #define NL80211_FLAG_CLEAR_SKB         0x20
+#define NL80211_FLAG_NO_WIPHY_MTX      0x40
 
 static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
                            struct genl_info *info)
@@ -14840,7 +14841,7 @@ static int nl80211_pre_doit(const struct genl_ops *ops, struct sk_buff *skb,
                info->user_ptr[0] = rdev;
        }
 
-       if (rdev) {
+       if (rdev && !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
                wiphy_lock(&rdev->wiphy);
                /* we keep the mutex locked until post_doit */
                __release(&rdev->wiphy.mtx);
@@ -14865,7 +14866,8 @@ static void nl80211_post_doit(const struct genl_ops *ops, struct sk_buff *skb,
                }
        }
 
-       if (info->user_ptr[0]) {
+       if (info->user_ptr[0] &&
+           !(ops->internal_flags & NL80211_FLAG_NO_WIPHY_MTX)) {
                struct cfg80211_registered_device *rdev = info->user_ptr[0];
 
                /* we kept the mutex locked since pre_doit */
@@ -15329,7 +15331,9 @@ static const struct genl_small_ops nl80211_small_ops[] = {
                .validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
                .doit = nl80211_wiphy_netns,
                .flags = GENL_UNS_ADMIN_PERM,
-               .internal_flags = NL80211_FLAG_NEED_WIPHY,
+               .internal_flags = NL80211_FLAG_NEED_WIPHY |
+                                 NL80211_FLAG_NEED_RTNL |
+                                 NL80211_FLAG_NO_WIPHY_MTX,
        },
        {
                .cmd = NL80211_CMD_GET_SURVEY,
index 0ed6e4d..e76cdfc 100644 (file)
@@ -210,7 +210,7 @@ config SAMPLE_WATCHDOG
        depends on CC_CAN_LINK
 
 config SAMPLE_WATCH_QUEUE
-       bool "Build example /dev/watch_queue notification consumer"
+       bool "Build example watch_queue notification API consumer"
        depends on CC_CAN_LINK && HEADERS_INSTALL
        help
          Build example userspace program to use the new mount_notify(),
diff --git a/samples/acrn/Makefile b/samples/acrn/Makefile
new file mode 100644 (file)
index 0000000..c8e3ed9
--- /dev/null
@@ -0,0 +1,12 @@
+# SPDX-License-Identifier: GPL-2.0
+
+.PHONY: vm-sample
+
+vm-sample: vm-sample.o payload.o
+       $(CC) $^ -o $@
+
+payload.o: payload.ld guest16.o
+       $(LD) -T $< -o $@
+
+clean:
+       rm *.o vm-sample
diff --git a/samples/acrn/guest.ld b/samples/acrn/guest.ld
new file mode 100644 (file)
index 0000000..5127c68
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+OUTPUT_FORMAT(binary)
+SECTIONS
+{
+        .start : { *(.start) }
+        .text : { *(.text*) }
+        .rodata : { *(.rodata) }
+        .data : { *(.data) }
+}
diff --git a/samples/acrn/payload.ld b/samples/acrn/payload.ld
new file mode 100644 (file)
index 0000000..e8d9a49
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+SECTIONS
+{
+        .payload16 0 : {
+                guest16 = .;
+                guest16.o(.text)
+                guest16_end = .;
+        }
+}
diff --git a/samples/acrn/vm-sample.c b/samples/acrn/vm-sample.c
new file mode 100644 (file)
index 0000000..b2dad47
--- /dev/null
@@ -0,0 +1,136 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * A sample program to run a User VM on the ACRN hypervisor
+ *
+ * This sample runs in a Service VM, which is a privileged VM of ACRN.
+ * CONFIG_ACRN_HSM need to be enabled in the Service VM.
+ *
+ * Guest VM code in guest16.s will be executed after the VM launched.
+ *
+ * Copyright (C) 2020 Intel Corporation. All rights reserved.
+ */
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <malloc.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <linux/acrn.h>
+
+#define GUEST_MEMORY_SIZE      (1024*1024)
+void *guest_memory;
+
+extern const unsigned char guest16[], guest16_end[];
+static char io_request_page[4096] __attribute__((aligned(4096)));
+static struct acrn_io_request *io_req_buf = (struct acrn_io_request *)io_request_page;
+
+__u16 vcpu_num;
+__u16 vmid;
+/* POST_STANDARD_VM_UUID1, refer to https://github.com/projectacrn/acrn-hypervisor/blob/master/hypervisor/include/common/vm_uuids.h */
+guid_t vm_uuid = GUID_INIT(0x385479d2, 0xd625, 0xe811, 0x86, 0x4e, 0xcb, 0x7a, 0x18, 0xb3, 0x46, 0x43);
+
+int hsm_fd;
+int is_running = 1;
+
+void vm_exit(int sig)
+{
+       sig = sig;
+
+       is_running = 0;
+       ioctl(hsm_fd, ACRN_IOCTL_PAUSE_VM, vmid);
+       ioctl(hsm_fd, ACRN_IOCTL_DESTROY_IOREQ_CLIENT, 0);
+}
+
+int main(int argc, char **argv)
+{
+       int vcpu_id, ret;
+       struct acrn_vm_creation create_vm = {0};
+       struct acrn_vm_memmap ram_map = {0};
+       struct acrn_vcpu_regs regs;
+       struct acrn_io_request *io_req;
+       struct acrn_ioreq_notify __attribute__((aligned(8))) notify;
+
+       argc = argc;
+       argv = argv;
+
+       guest_memory = memalign(4096, GUEST_MEMORY_SIZE);
+       if (!guest_memory) {
+               printf("No enough memory!\n");
+               return -1;
+       }
+       hsm_fd = open("/dev/acrn_hsm", O_RDWR|O_CLOEXEC);
+
+       memcpy(&create_vm.uuid, &vm_uuid, 16);
+       create_vm.ioreq_buf = (__u64)io_req_buf;
+       ret = ioctl(hsm_fd, ACRN_IOCTL_CREATE_VM, &create_vm);
+       printf("Created VM! [%d]\n", ret);
+       vcpu_num = create_vm.vcpu_num;
+       vmid = create_vm.vmid;
+
+       /* setup guest memory */
+       ram_map.type = ACRN_MEMMAP_RAM;
+       ram_map.vma_base = (__u64)guest_memory;
+       ram_map.len = GUEST_MEMORY_SIZE;
+       ram_map.user_vm_pa = 0;
+       ram_map.attr = ACRN_MEM_ACCESS_RWX;
+       ret = ioctl(hsm_fd, ACRN_IOCTL_SET_MEMSEG, &ram_map);
+       printf("Set up VM memory! [%d]\n", ret);
+
+       memcpy(guest_memory, guest16, guest16_end-guest16);
+
+       /* setup vcpu registers */
+       memset(&regs, 0, sizeof(regs));
+       regs.vcpu_id = 0;
+       regs.vcpu_regs.rip = 0;
+
+       /* CR0_ET | CR0_NE */
+       regs.vcpu_regs.cr0 = 0x30U;
+       regs.vcpu_regs.cs_ar = 0x009FU;
+       regs.vcpu_regs.cs_sel = 0xF000U;
+       regs.vcpu_regs.cs_limit = 0xFFFFU;
+       regs.vcpu_regs.cs_base = 0 & 0xFFFF0000UL;
+       regs.vcpu_regs.rip = 0 & 0xFFFFUL;
+
+       ret = ioctl(hsm_fd, ACRN_IOCTL_SET_VCPU_REGS, &regs);
+       printf("Set up VM BSP registers! [%d]\n", ret);
+
+       /* create an ioreq client for this VM */
+       ret = ioctl(hsm_fd, ACRN_IOCTL_CREATE_IOREQ_CLIENT, 0);
+       printf("Created IO request client! [%d]\n", ret);
+
+       /* run vm */
+       ret = ioctl(hsm_fd, ACRN_IOCTL_START_VM, vmid);
+       printf("Start VM! [%d]\n", ret);
+
+       signal(SIGINT, vm_exit);
+       while (is_running) {
+               ret = ioctl(hsm_fd, ACRN_IOCTL_ATTACH_IOREQ_CLIENT, 0);
+
+               for (vcpu_id = 0; vcpu_id < vcpu_num; vcpu_id++) {
+                       io_req = &io_req_buf[vcpu_id];
+                       if ((__sync_add_and_fetch(&io_req->processed, 0) == ACRN_IOREQ_STATE_PROCESSING)
+                                       && (!io_req->kernel_handled))
+                               if (io_req->type == ACRN_IOREQ_TYPE_PORTIO) {
+                                       int bytes, port, in;
+
+                                       port = io_req->reqs.pio_request.address;
+                                       bytes = io_req->reqs.pio_request.size;
+                                       in = (io_req->reqs.pio_request.direction == ACRN_IOREQ_DIR_READ);
+                                       printf("Guest VM %s PIO[%x] with size[%x]\n", in ? "read" : "write", port, bytes);
+
+                                       notify.vmid = vmid;
+                                       notify.vcpu = vcpu_id;
+                                       ioctl(hsm_fd, ACRN_IOCTL_NOTIFY_REQUEST_FINISH, &notify);
+                               }
+               }
+       }
+
+       ret = ioctl(hsm_fd, ACRN_IOCTL_DESTROY_VM, NULL);
+       printf("Destroy VM! [%d]\n", ret);
+       close(hsm_fd);
+       free(guest_memory);
+       return 0;
+}
index bfeab44..2e3bb73 100644 (file)
@@ -4,7 +4,7 @@
  *     Version: 0.1.0
  * Description: cfag12864b LCD userspace example program
  *
- *      Author: Copyright (C) Miguel Ojeda Sandonis
+ *      Author: Copyright (C) Miguel Ojeda <ojeda@kernel.org>
  *        Date: 2006-10-31
  */
 
index db0cb73..1e2a110 100644 (file)
@@ -1699,5 +1699,7 @@ int main(int argc, char **argv)
 
        xdpsock_cleanup();
 
+       munmap(bufs, NUM_FRAMES * opt_xsk_frame_size);
+
        return 0;
 }
index 46e618a..8c6cb57 100644 (file)
@@ -1,5 +1,5 @@
 // SPDX-License-Identifier: GPL-2.0
-/* Use /dev/watch_queue to watch for notifications.
+/* Use watch_queue API to watch for notifications.
  *
  * Copyright (C) 2020 Red Hat, Inc. All Rights Reserved.
  * Written by David Howells (dhowells@redhat.com)
index 08e0111..509e085 100644 (file)
@@ -141,13 +141,9 @@ cc-ifversion = $(shell [ $(CONFIG_GCC_VERSION)0 $(1) $(2)000 ] && echo $(3) || e
 # Usage: KBUILD_LDFLAGS += $(call ld-option, -X, -Y)
 ld-option = $(call try-run, $(LD) $(KBUILD_LDFLAGS) $(1) -v,$(1),$(2),$(3))
 
-# ld-version
-# Note this is mainly for HJ Lu's 3 number binutil versions
-ld-version = $(shell $(LD) --version | $(srctree)/scripts/ld-version.sh)
-
 # ld-ifversion
 # Usage:  $(call ld-ifversion, -ge, 22252, y)
-ld-ifversion = $(shell [ $(ld-version) $(1) $(2) ] && echo $(3) || echo $(4))
+ld-ifversion = $(shell [ $(CONFIG_LD_VERSION)0 $(1) $(2)0 ] && echo $(3) || echo $(4))
 
 ######
 
index a5fe72c..58fdb53 100644 (file)
@@ -39,8 +39,17 @@ as-instr = $(success,printf "%b\n" "$(1)" | $(CC) $(CLANG_FLAGS) -c -x assembler
 $(error-if,$(failure,command -v $(CC)),compiler '$(CC)' not found)
 $(error-if,$(failure,command -v $(LD)),linker '$(LD)' not found)
 
-# Fail if the linker is gold as it's not capable of linking the kernel proper
-$(error-if,$(success, $(LD) -v | grep -q gold), gold linker '$(LD)' not supported)
+# Get the compiler name, version, and error out if it is not supported.
+cc-info := $(shell,$(srctree)/scripts/cc-version.sh $(CC))
+$(error-if,$(success,test -z "$(cc-info)"),Sorry$(comma) this compiler is not supported.)
+cc-name := $(shell,set -- $(cc-info) && echo $1)
+cc-version := $(shell,set -- $(cc-info) && echo $2)
+
+# Get the linker name, version, and error out if it is not supported.
+ld-info := $(shell,$(srctree)/scripts/ld-version.sh $(LD))
+$(error-if,$(success,test -z "$(ld-info)"),Sorry$(comma) this linker is not supported.)
+ld-name := $(shell,set -- $(ld-info) && echo $1)
+ld-version := $(shell,set -- $(ld-info) && echo $2)
 
 # machine bit flags
 #  $(m32-flag): -m32 if the compiler supports it, or an empty string otherwise.
index 4c058f1..1b6094a 100644 (file)
@@ -15,7 +15,6 @@ obj-y :=
 obj-m :=
 lib-y :=
 lib-m :=
-always :=
 always-y :=
 always-m :=
 targets :=
@@ -111,7 +110,7 @@ endif
 # ---------------------------------------------------------------------------
 
 quiet_cmd_cc_s_c = CC $(quiet_modtag)  $@
-      cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS), $(c_flags)) -fverbose-asm -S -o $@ $<
+      cmd_cc_s_c = $(CC) $(filter-out $(DEBUG_CFLAGS) $(CC_FLAGS_LTO), $(c_flags)) -fverbose-asm -S -o $@ $<
 
 $(obj)/%.s: $(src)/%.c FORCE
        $(call if_changed_dep,cc_s_c)
@@ -166,6 +165,15 @@ ifdef CONFIG_MODVERSIONS
 #   the actual value of the checksum generated by genksyms
 # o remove .tmp_<file>.o to <file>.o
 
+ifdef CONFIG_LTO_CLANG
+# Generate .o.symversions files for each .o with exported symbols, and link these
+# to the kernel and/or modules at the end.
+cmd_modversions_c =                                                            \
+       if $(NM) $@ 2>/dev/null | grep -q __ksymtab; then                       \
+               $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))  \
+                   > $@.symversions;                                           \
+       fi;
+else
 cmd_modversions_c =                                                            \
        if $(OBJDUMP) -h $@ | grep -q __ksymtab; then                           \
                $(call cmd_gensymtypes_c,$(KBUILD_SYMTYPES),$(@:.o=.symtypes))  \
@@ -177,9 +185,9 @@ cmd_modversions_c =                                                         \
                rm -f $(@D)/.tmp_$(@F:.o=.ver);                                 \
        fi
 endif
+endif
 
-ifdef CONFIG_FTRACE_MCOUNT_RECORD
-ifndef CC_USING_RECORD_MCOUNT
+ifdef CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
 # compiler will not generate __mcount_loc use recordmcount or recordmcount.pl
 ifdef BUILD_C_RECORDMCOUNT
 ifeq ("$(origin RECORDMCOUNT_WARN)", "command line")
@@ -206,31 +214,14 @@ recordmcount_source := $(srctree)/scripts/recordmcount.pl
 endif # BUILD_C_RECORDMCOUNT
 cmd_record_mcount = $(if $(findstring $(strip $(CC_FLAGS_FTRACE)),$(_c_flags)),        \
        $(sub_cmd_record_mcount))
-endif # CC_USING_RECORD_MCOUNT
-endif # CONFIG_FTRACE_MCOUNT_RECORD
+endif # CONFIG_FTRACE_MCOUNT_USE_RECORDMCOUNT
 
 ifdef CONFIG_STACK_VALIDATION
+ifndef CONFIG_LTO_CLANG
 ifneq ($(SKIP_STACK_VALIDATION),1)
 
 __objtool_obj := $(objtree)/tools/objtool/objtool
 
-objtool_args = $(if $(CONFIG_UNWINDER_ORC),orc generate,check)
-
-objtool_args += $(if $(part-of-module), --module,)
-
-ifndef CONFIG_FRAME_POINTER
-objtool_args += --no-fp
-endif
-ifdef CONFIG_GCOV_KERNEL
-objtool_args += --no-unreachable
-endif
-ifdef CONFIG_RETPOLINE
-  objtool_args += --retpoline
-endif
-ifdef CONFIG_X86_SMAP
-  objtool_args += --uaccess
-endif
-
 # 'OBJECT_FILES_NON_STANDARD := y': skip objtool checking for a directory
 # 'OBJECT_FILES_NON_STANDARD_foo.o := 'y': skip objtool checking for a file
 # 'OBJECT_FILES_NON_STANDARD_foo.o := 'n': override directory skip for a file
@@ -242,6 +233,7 @@ objtool_obj = $(if $(patsubst y%,, \
        $(__objtool_obj))
 
 endif # SKIP_STACK_VALIDATION
+endif # CONFIG_LTO_CLANG
 endif # CONFIG_STACK_VALIDATION
 
 # Rebuild all objects when objtool changes, or is enabled/disabled.
@@ -388,6 +380,18 @@ $(obj)/%.asn1.c $(obj)/%.asn1.h: $(src)/%.asn1 $(objtree)/scripts/asn1_compiler
 $(subdir-builtin): $(obj)/%/built-in.a: $(obj)/% ;
 $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
 
+# combine symversions for later processing
+quiet_cmd_update_lto_symversions = SYMVER  $@
+ifeq ($(CONFIG_LTO_CLANG) $(CONFIG_MODVERSIONS),y y)
+      cmd_update_lto_symversions =                                     \
+       rm -f $@.symversions                                            \
+       $(foreach n, $(filter-out FORCE,$^),                            \
+               $(if $(wildcard $(n).symversions),                      \
+                       ; cat $(n).symversions >> $@.symversions))
+else
+      cmd_update_lto_symversions = echo >/dev/null
+endif
+
 #
 # Rule to compile a set of .o files into one .a file (without symbol table)
 #
@@ -395,8 +399,11 @@ $(subdir-modorder): $(obj)/%/modules.order: $(obj)/% ;
 quiet_cmd_ar_builtin = AR      $@
       cmd_ar_builtin = rm -f $@; $(AR) cDPrST $@ $(real-prereqs)
 
+quiet_cmd_ar_and_symver = AR      $@
+      cmd_ar_and_symver = $(cmd_update_lto_symversions); $(cmd_ar_builtin)
+
 $(obj)/built-in.a: $(real-obj-y) FORCE
-       $(call if_changed,ar_builtin)
+       $(call if_changed,ar_and_symver)
 
 #
 # Rule to create modules.order file
@@ -416,15 +423,26 @@ $(obj)/modules.order: $(obj-m) FORCE
 #
 # Rule to compile a set of .o files into one .a file (with symbol table)
 #
+quiet_cmd_ar_lib = AR      $@
+      cmd_ar_lib = $(cmd_update_lto_symversions); $(cmd_ar)
+
 $(obj)/lib.a: $(lib-y) FORCE
-       $(call if_changed,ar)
+       $(call if_changed,ar_lib)
 
 # NOTE:
 # Do not replace $(filter %.o,^) with $(real-prereqs). When a single object
 # module is turned into a multi object module, $^ will contain header file
 # dependencies recorded in the .*.cmd file.
+ifdef CONFIG_LTO_CLANG
+quiet_cmd_link_multi-m = AR [M]  $@
+cmd_link_multi-m =                                             \
+       $(cmd_update_lto_symversions);                          \
+       rm -f $@;                                               \
+       $(AR) cDPrsT $@ $(filter %.o,$^)
+else
 quiet_cmd_link_multi-m = LD [M]  $@
       cmd_link_multi-m = $(LD) $(ld_flags) -r -o $@ $(filter %.o,$^)
+endif
 
 $(multi-used-m): FORCE
        $(call if_changed,link_multi-m)
index d9e0cea..22a8172 100644 (file)
@@ -34,9 +34,6 @@ __clean-files := \
        $(hostprogs-always-y) $(hostprogs-always-m) $(hostprogs-always-) \
        $(userprogs-always-y) $(userprogs-always-m) $(userprogs-always-)
 
-# deprecated
-__clean-files  += $(always) $(hostprogs-y) $(hostprogs-m) $(hostprogs-)
-
 __clean-files   := $(filter-out $(no-clean-files), $(__clean-files))
 
 # clean-files is given relative to the current directory, unless it
index b00855b..8cd67b1 100644 (file)
@@ -4,18 +4,6 @@ asflags-y  += $(EXTRA_AFLAGS)
 ccflags-y  += $(EXTRA_CFLAGS)
 cppflags-y += $(EXTRA_CPPFLAGS)
 ldflags-y  += $(EXTRA_LDFLAGS)
-ifneq ($(always),)
-$(warning 'always' is deprecated. Please use 'always-y' instead)
-always-y   += $(always)
-endif
-ifneq ($(hostprogs-y),)
-$(warning 'hostprogs-y' is deprecated. Please use 'hostprogs' instead)
-hostprogs  += $(hostprogs-y)
-endif
-ifneq ($(hostprogs-m),)
-$(warning 'hostprogs-m' is deprecated. Please use 'hostprogs' instead)
-hostprogs  += $(hostprogs-m)
-endif
 
 # flags that take effect in current and sub directories
 KBUILD_AFLAGS += $(subdir-asflags-y)
@@ -56,15 +44,19 @@ else
 obj-y          := $(filter-out %/, $(obj-y))
 endif
 
+# Expand $(foo-objs) $(foo-y) by calling $(call suffix-search,foo.o,-objs -y)
+suffix-search = $(foreach s,$(2),$($(1:.o=$s)))
 # If $(foo-objs), $(foo-y), $(foo-m), or $(foo-) exists, foo.o is a composite object
-multi-used-y := $(sort $(foreach m,$(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-))), $(m))))
-multi-used-m := $(sort $(foreach m,$(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)) $($(m:.o=-))), $(m))))
+multi-search = $(sort $(foreach m,$(1), $(if $(strip $(call suffix-search,$(m),$(2) -)), $(m))))
+multi-used-y := $(call multi-search,$(obj-y),-objs -y)
+multi-used-m := $(call multi-search,$(obj-m),-objs -y -m)
 multi-used   := $(multi-used-y) $(multi-used-m)
 
 # Replace multi-part objects by their individual parts,
 # including built-in.a from subdirectories
-real-obj-y := $(foreach m, $(obj-y), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-))),$($(m:.o=-objs)) $($(m:.o=-y)),$(m)))
-real-obj-m := $(foreach m, $(obj-m), $(if $(strip $($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)) $($(m:.o=-))),$($(m:.o=-objs)) $($(m:.o=-y)) $($(m:.o=-m)),$(m)))
+real-search = $(foreach m,$(1), $(if $(strip $(call suffix-search,$(m),$(2) -)),$(call suffix-search,$(m),$(2)),$(m)))
+real-obj-y := $(call real-search, $(obj-y),-objs -y)
+real-obj-m := $(call real-search, $(obj-m),-objs -y -m)
 
 always-y += $(always-m)
 
@@ -81,14 +73,14 @@ always-y += $(userprogs-always-y) $(userprogs-always-m)
 
 # DTB
 # If CONFIG_OF_ALL_DTBS is enabled, all DT blobs are built
-extra-y                                += $(dtb-y)
-extra-$(CONFIG_OF_ALL_DTBS)    += $(dtb-)
+always-y                       += $(dtb-y)
+always-$(CONFIG_OF_ALL_DTBS)   += $(dtb-)
 
 ifneq ($(CHECK_DTBS),)
-extra-y += $(patsubst %.dtb,%.dt.yaml, $(dtb-y))
-extra-y += $(patsubst %.dtbo,%.dt.yaml, $(dtb-y))
-extra-$(CONFIG_OF_ALL_DTBS) += $(patsubst %.dtb,%.dt.yaml, $(dtb-))
-extra-$(CONFIG_OF_ALL_DTBS) += $(patsubst %.dtbo,%.dt.yaml, $(dtb-))
+always-y += $(patsubst %.dtb,%.dt.yaml, $(dtb-y))
+always-y += $(patsubst %.dtbo,%.dt.yaml, $(dtb-y))
+always-$(CONFIG_OF_ALL_DTBS) += $(patsubst %.dtb,%.dt.yaml, $(dtb-))
+always-$(CONFIG_OF_ALL_DTBS) += $(patsubst %.dtbo,%.dt.yaml, $(dtb-))
 endif
 
 # Add subdir path
@@ -119,9 +111,11 @@ target-stem = $(basename $(patsubst $(obj)/%,%,$@))
 # These flags are needed for modversions and compiling, so we define them here
 # $(modname_flags) defines KBUILD_MODNAME as the name of the module it will
 # end up in (or would, if it gets compiled in)
-name-fix = $(call stringify,$(subst $(comma),_,$(subst -,_,$1)))
+name-fix-token = $(subst $(comma),_,$(subst -,_,$1))
+name-fix = $(call stringify,$(call name-fix-token,$1))
 basename_flags = -DKBUILD_BASENAME=$(call name-fix,$(basetarget))
-modname_flags  = -DKBUILD_MODNAME=$(call name-fix,$(modname))
+modname_flags  = -DKBUILD_MODNAME=$(call name-fix,$(modname)) \
+                -D__KBUILD_MODNAME=kmod_$(call name-fix-token,$(modname))
 modfile_flags  = -DKBUILD_MODFILE=$(call stringify,$(modfile))
 
 _c_flags       = $(filter-out $(CFLAGS_REMOVE_$(target-stem).o), \
@@ -220,6 +214,18 @@ dtc_cpp_flags  = -Wp,-MMD,$(depfile).pre.tmp -nostdinc                    \
                 $(addprefix -I,$(DTC_INCLUDE))                          \
                 -undef -D__DTS__
 
+# Objtool arguments are also needed for modfinal with LTO, so we define
+# then here to avoid duplication.
+objtool_args =                                                         \
+       $(if $(CONFIG_UNWINDER_ORC),orc generate,check)                 \
+       $(if $(part-of-module), --module,)                              \
+       $(if $(CONFIG_FRAME_POINTER),, --no-fp)                         \
+       $(if $(or $(CONFIG_GCOV_KERNEL),$(CONFIG_LTO_CLANG)),           \
+               --no-unreachable,)                                      \
+       $(if $(CONFIG_RETPOLINE), --retpoline,)                         \
+       $(if $(CONFIG_X86_SMAP), --uaccess,)                            \
+       $(if $(CONFIG_FTRACE_MCOUNT_USE_OBJTOOL), --mcount,)
+
 # Useful for describing the dependency of composite objects
 # Usage:
 #   $(call multi_depend, multi_used_targets, suffix_to_remove, suffix_to_add)
@@ -249,7 +255,7 @@ $(obj)/%: $(src)/%_shipped
 #      target: source(s) FORCE
 #              $(if_changed,ld/objcopy/gzip)
 #
-#      and add target to extra-y so that we know we have to
+#      and add target to 'targets' so that we know we have to
 #      read in the saved command line
 
 # Linking
@@ -321,7 +327,7 @@ $(obj)/%.dtb.S: $(obj)/%.dtb FORCE
 
 quiet_cmd_dtc = DTC     $@
 cmd_dtc = $(HOSTCC) -E $(dtc_cpp_flags) -x assembler-with-cpp -o $(dtc-tmp) $< ; \
-       $(DTC) -O $(patsubst .%,%,$(suffix $@)) -o $@ -b 0 \
+       $(DTC) -o $@ -b 0 \
                $(addprefix -i,$(dir $<) $(DTC_INCLUDE)) $(DTC_FLAGS) \
                -d $(depfile).dtc.tmp $(dtc-tmp) ; \
        cat $(depfile).pre.tmp $(depfile).dtc.tmp > $(depfile)
@@ -346,7 +352,7 @@ define rule_dtc
 endef
 
 $(obj)/%.dt.yaml: $(src)/%.dts $(DTC) $(DT_TMP_SCHEMA) FORCE
-       $(call if_changed_rule,dtc,yaml)
+       $(call if_changed_rule,dtc)
 
 dtc-tmp = $(subst $(comma),_,$(dot-target).dts.tmp)
 
index d49ec00..735e11e 100644 (file)
@@ -9,7 +9,7 @@ __modfinal:
 include include/config/auto.conf
 include $(srctree)/scripts/Kbuild.include
 
-# for c_flags
+# for c_flags and objtool_args
 include $(srctree)/scripts/Makefile.lib
 
 # find all modules listed in modules.order
@@ -30,8 +30,27 @@ quiet_cmd_cc_o_c = CC [M]  $@
 
 ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink)
 
+ifdef CONFIG_LTO_CLANG
+# With CONFIG_LTO_CLANG, reuse the object file we compiled for modpost to
+# avoid a second slow LTO link
+prelink-ext := .lto
+
+# ELF processing was skipped earlier because we didn't have native code,
+# so let's now process the prelinked binary before we link the module.
+
+ifdef CONFIG_STACK_VALIDATION
+ifneq ($(SKIP_STACK_VALIDATION),1)
+cmd_ld_ko_o +=                                                         \
+       $(objtree)/tools/objtool/objtool $(objtool_args)                \
+               $(@:.ko=$(prelink-ext).o);
+
+endif # SKIP_STACK_VALIDATION
+endif # CONFIG_STACK_VALIDATION
+
+endif # CONFIG_LTO_CLANG
+
 quiet_cmd_ld_ko_o = LD [M]  $@
-      cmd_ld_ko_o =                                                     \
+      cmd_ld_ko_o +=                                                   \
        $(LD) -r $(KBUILD_LDFLAGS)                                      \
                $(KBUILD_LDFLAGS_MODULE) $(LDFLAGS_MODULE)              \
                -T scripts/module.lds -o $@ $(filter %.o, $^);          \
@@ -53,8 +72,9 @@ if_changed_except = $(if $(call newer_prereqs_except,$(2))$(cmd-check),      \
        $(cmd);                                                              \
        printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd, @:)
 
+
 # Re-generate module BTFs if either module's .ko or vmlinux changed
-$(modules): %.ko: %.o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
+$(modules): %.ko: %$(prelink-ext).o %.mod.o scripts/module.lds $(if $(KBUILD_BUILTIN),vmlinux) FORCE
        +$(call if_changed_except,ld_ko_o,vmlinux)
 ifdef CONFIG_DEBUG_INFO_BTF_MODULES
        +$(if $(newer-prereqs),$(call cmd,btf_ko))
index f54b6ac..066beff 100644 (file)
@@ -43,6 +43,9 @@ __modpost:
 include include/config/auto.conf
 include scripts/Kbuild.include
 
+# for ld_flags
+include scripts/Makefile.lib
+
 MODPOST = scripts/mod/modpost                                                          \
        $(if $(CONFIG_MODVERSIONS),-m)                                                  \
        $(if $(CONFIG_MODULE_SRCVERSION_ALL),-a)                                        \
@@ -102,12 +105,30 @@ $(input-symdump):
        @echo >&2 'WARNING: Symbol version dump "$@" is missing.'
        @echo >&2 '         Modules may not have dependencies or modversions.'
 
+ifdef CONFIG_LTO_CLANG
+# With CONFIG_LTO_CLANG, .o files might be LLVM bitcode, so we need to run
+# LTO to compile them into native code before running modpost
+prelink-ext := .lto
+
+quiet_cmd_cc_lto_link_modules = LTO [M] $@
+cmd_cc_lto_link_modules =                                              \
+       $(LD) $(ld_flags) -r -o $@                                      \
+               $(shell [ -s $(@:.lto.o=.o.symversions) ] &&            \
+                       echo -T $(@:.lto.o=.o.symversions))             \
+               --whole-archive $^
+
+%.lto.o: %.o
+       $(call if_changed,cc_lto_link_modules)
+endif
+
+modules := $(sort $(shell cat $(MODORDER)))
+
 # Read out modules.order to pass in modpost.
 # Otherwise, allmodconfig would fail with "Argument list too long".
 quiet_cmd_modpost = MODPOST $@
-      cmd_modpost = sed 's/ko$$/o/' $< | $(MODPOST) -T -
+      cmd_modpost = sed 's/\.ko$$/$(prelink-ext)\.o/' $< | $(MODPOST) -T -
 
-$(output-symdump): $(MODORDER) $(input-symdump) FORCE
+$(output-symdump): $(MODORDER) $(input-symdump) $(modules:.ko=$(prelink-ext).o) FORCE
        $(call if_changed,modpost)
 
 targets += $(output-symdump)
index 0e53a93..9e2092f 100644 (file)
@@ -8,8 +8,6 @@ ubsan-cflags-$(CONFIG_UBSAN_LOCAL_BOUNDS)       += -fsanitize=local-bounds
 ubsan-cflags-$(CONFIG_UBSAN_SHIFT)             += -fsanitize=shift
 ubsan-cflags-$(CONFIG_UBSAN_DIV_ZERO)          += -fsanitize=integer-divide-by-zero
 ubsan-cflags-$(CONFIG_UBSAN_UNREACHABLE)       += -fsanitize=unreachable
-ubsan-cflags-$(CONFIG_UBSAN_SIGNED_OVERFLOW)   += -fsanitize=signed-integer-overflow
-ubsan-cflags-$(CONFIG_UBSAN_UNSIGNED_OVERFLOW) += -fsanitize=unsigned-integer-overflow
 ubsan-cflags-$(CONFIG_UBSAN_OBJECT_SIZE)       += -fsanitize=object-size
 ubsan-cflags-$(CONFIG_UBSAN_BOOL)              += -fsanitize=bool
 ubsan-cflags-$(CONFIG_UBSAN_ENUM)              += -fsanitize=enum
index 2b366d9..d8f6f9c 100755 (executable)
@@ -34,9 +34,6 @@ case "$KBUILD_VERBOSE" in
        ;;
 esac
 
-# We need access to CONFIG_ symbols
-. include/config/auto.conf
-
 # Generate a new symbol list file
 $CONFIG_SHELL $srctree/scripts/gen_autoksyms.sh "$new_ksyms_file"
 
diff --git a/scripts/cc-version.sh b/scripts/cc-version.sh
new file mode 100755 (executable)
index 0000000..3f2ee88
--- /dev/null
@@ -0,0 +1,82 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Print the compiler name and its version in a 5 or 6-digit form.
+# Also, perform the minimum version check.
+
+set -e
+
+# When you raise the minimum compiler version, please update
+# Documentation/process/changes.rst as well.
+gcc_min_version=4.9.0
+clang_min_version=10.0.1
+icc_min_version=16.0.3 # temporary
+
+# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=63293
+# https://lore.kernel.org/r/20210107111841.GN1551@shell.armlinux.org.uk
+if [ "$SRCARCH" = arm64 ]; then
+       gcc_min_version=5.1.0
+fi
+
+# Print the compiler name and some version components.
+get_compiler_info()
+{
+       cat <<- EOF | "$@" -E -P -x c - 2>/dev/null
+       #if defined(__clang__)
+       Clang   __clang_major__  __clang_minor__  __clang_patchlevel__
+       #elif defined(__INTEL_COMPILER)
+       ICC     __INTEL_COMPILER  __INTEL_COMPILER_UPDATE
+       #elif defined(__GNUC__)
+       GCC     __GNUC__  __GNUC_MINOR__  __GNUC_PATCHLEVEL__
+       #else
+       unknown
+       #endif
+       EOF
+}
+
+# Convert the version string x.y.z to a canonical 5 or 6-digit form.
+get_canonical_version()
+{
+       IFS=.
+       set -- $1
+       echo $((10000 * $1 + 100 * $2 + $3))
+}
+
+# $@ instead of $1 because multiple words might be given, e.g. CC="ccache gcc".
+orig_args="$@"
+set -- $(get_compiler_info "$@")
+
+name=$1
+
+case "$name" in
+GCC)
+       version=$2.$3.$4
+       min_version=$gcc_min_version
+       ;;
+Clang)
+       version=$2.$3.$4
+       min_version=$clang_min_version
+       ;;
+ICC)
+       version=$(($2 / 100)).$(($2 % 100)).$3
+       min_version=$icc_min_version
+       ;;
+*)
+       echo "$orig_args: unknown compiler" >&2
+       exit 1
+       ;;
+esac
+
+cversion=$(get_canonical_version $version)
+min_cversion=$(get_canonical_version $min_version)
+
+if [ "$cversion" -lt "$min_cversion" ]; then
+       echo >&2 "***"
+       echo >&2 "*** Compiler is too old."
+       echo >&2 "***   Your $name version:    $version"
+       echo >&2 "***   Minimum $name version: $min_version"
+       echo >&2 "***"
+       exit 1
+fi
+
+echo $name $cversion
index 4b2775f..df8b23d 100755 (executable)
@@ -382,6 +382,7 @@ our $InitAttribute = qr{$InitAttributeData|$InitAttributeConst|$InitAttributeIni
 # We need \b after 'init' otherwise 'initconst' will cause a false positive in a check
 our $Attribute = qr{
                        const|
+                       volatile|
                        __percpu|
                        __nocast|
                        __safe|
@@ -486,7 +487,7 @@ our $logFunctions = qr{(?x:
 
 our $allocFunctions = qr{(?x:
        (?:(?:devm_)?
-               (?:kv|k|v)[czm]alloc(?:_node|_array)? |
+               (?:kv|k|v)[czm]alloc(?:_array)?(?:_node)? |
                kstrdup(?:_const)? |
                kmemdup(?:_nul)?) |
        (?:\w+)?alloc_skb(?:_ip_align)? |
@@ -506,6 +507,30 @@ our $signature_tags = qr{(?xi:
        Cc:
 )};
 
+our $tracing_logging_tags = qr{(?xi:
+       [=-]*> |
+       <[=-]* |
+       \[ |
+       \] |
+       start |
+       called |
+       entered |
+       entry |
+       enter |
+       in |
+       inside |
+       here |
+       begin |
+       exit |
+       end |
+       done |
+       leave |
+       completed |
+       out |
+       return |
+       [\.\!:\s]*
+)};
+
 sub edit_distance_min {
        my (@arr) = @_;
        my $len = scalar @arr;
@@ -2428,6 +2453,15 @@ sub get_raw_comment {
        return $comment;
 }
 
+sub exclude_global_initialisers {
+       my ($realfile) = @_;
+
+       # Do not check for BPF programs (tools/testing/selftests/bpf/progs/*.c, samples/bpf/*_kern.c, *.bpf.c).
+       return $realfile =~ m@^tools/testing/selftests/bpf/progs/.*\.c$@ ||
+               $realfile =~ m@^samples/bpf/.*_kern\.c$@ ||
+               $realfile =~ m@/bpf/.*\.bpf\.c$@;
+}
+
 sub process {
        my $filename = shift;
 
@@ -2973,7 +3007,7 @@ sub process {
                                }
                                if (!defined $lines[$linenr]) {
                                        WARN("BAD_SIGN_OFF",
-                                             "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline);
+                                            "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline);
                                } elsif ($rawlines[$linenr] !~ /^\s*signed-off-by:\s*(.*)/i) {
                                        WARN("BAD_SIGN_OFF",
                                             "Co-developed-by: must be immediately followed by Signed-off-by:\n" . "$here\n" . $rawline . "\n" .$rawlines[$linenr]);
@@ -2996,8 +3030,8 @@ sub process {
                        if (ERROR("GERRIT_CHANGE_ID",
                                  "Remove Gerrit Change-Id's before submitting upstream\n" . $herecurr) &&
                            $fix) {
-                                fix_delete_line($fixlinenr, $rawline);
-                        }
+                               fix_delete_line($fixlinenr, $rawline);
+                       }
                }
 
 # Check if the commit log is in a possible stack dump
@@ -3239,10 +3273,10 @@ sub process {
                                next if ($start_char =~ /^\S$/);
                                next if (index(" \t.,;?!", $end_char) == -1);
 
-                                # avoid repeating hex occurrences like 'ff ff fe 09 ...'
-                                if ($first =~ /\b[0-9a-f]{2,}\b/i) {
-                                        next if (!exists($allow_repeated_words{lc($first)}));
-                                }
+                               # avoid repeating hex occurrences like 'ff ff fe 09 ...'
+                               if ($first =~ /\b[0-9a-f]{2,}\b/i) {
+                                       next if (!exists($allow_repeated_words{lc($first)}));
+                               }
 
                                if (WARN("REPEATED_WORD",
                                         "Possible repeated word: '$first'\n" . $herecurr) &&
@@ -3574,6 +3608,13 @@ sub process {
                        }
                }
 
+# check for .L prefix local symbols in .S files
+               if ($realfile =~ /\.S$/ &&
+                   $line =~ /^\+\s*(?:[A-Z]+_)?SYM_[A-Z]+_(?:START|END)(?:_[A-Z_]+)?\s*\(\s*\.L/) {
+                       WARN("AVOID_L_PREFIX",
+                            "Avoid using '.L' prefixed local symbol names for denoting a range of code via 'SYM_*_START/END' annotations; see Documentation/asm-annotations.rst\n" . $herecurr);
+               }
+
 # check we are in a valid source file C or perl if not then ignore this hunk
                next if ($realfile !~ /\.(h|c|pl|dtsi|dts)$/);
 
@@ -3776,43 +3817,48 @@ sub process {
                }
 
 # check for missing blank lines after declarations
-               if ($sline =~ /^\+\s+\S/ &&                     #Not at char 1
-                       # actual declarations
-                   ($prevline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+# (declarations must have the same indentation and not be at the start of line)
+               if (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/) {
+                       # use temporaries
+                       my $sl = $sline;
+                       my $pl = $prevline;
+                       # remove $Attribute/$Sparse uses to simplify comparisons
+                       $sl =~ s/\b(?:$Attribute|$Sparse)\b//g;
+                       $pl =~ s/\b(?:$Attribute|$Sparse)\b//g;
+                       if (($pl =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
                        # function pointer declarations
-                    $prevline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
+                            $pl =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                        # foo bar; where foo is some local typedef or #define
-                    $prevline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+                            $pl =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                        # known declaration macros
-                    $prevline =~ /^\+\s+$declaration_macros/) &&
+                            $pl =~ /^\+\s+$declaration_macros/) &&
                        # for "else if" which can look like "$Ident $Ident"
-                   !($prevline =~ /^\+\s+$c90_Keywords\b/ ||
+                           !($pl =~ /^\+\s+$c90_Keywords\b/ ||
                        # other possible extensions of declaration lines
-                     $prevline =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
+                             $pl =~ /(?:$Compare|$Assignment|$Operators)\s*$/ ||
                        # not starting a section or a macro "\" extended line
-                     $prevline =~ /(?:\{\s*|\\)$/) &&
+                             $pl =~ /(?:\{\s*|\\)$/) &&
                        # looks like a declaration
-                   !($sline =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
+                           !($sl =~ /^\+\s+$Declare\s*$Ident\s*[=,;:\[]/ ||
                        # function pointer declarations
-                     $sline =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
+                             $sl =~ /^\+\s+$Declare\s*\(\s*\*\s*$Ident\s*\)\s*[=,;:\[\(]/ ||
                        # foo bar; where foo is some local typedef or #define
-                     $sline =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
+                             $sl =~ /^\+\s+$Ident(?:\s+|\s*\*\s*)$Ident\s*[=,;\[]/ ||
                        # known declaration macros
-                     $sline =~ /^\+\s+$declaration_macros/ ||
+                             $sl =~ /^\+\s+$declaration_macros/ ||
                        # start of struct or union or enum
-                     $sline =~ /^\+\s+(?:static\s+)?(?:const\s+)?(?:union|struct|enum|typedef)\b/ ||
+                             $sl =~ /^\+\s+(?:static\s+)?(?:const\s+)?(?:union|struct|enum|typedef)\b/ ||
                        # start or end of block or continuation of declaration
-                     $sline =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
+                             $sl =~ /^\+\s+(?:$|[\{\}\.\#\"\?\:\(\[])/ ||
                        # bitfield continuation
-                     $sline =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
+                             $sl =~ /^\+\s+$Ident\s*:\s*\d+\s*[,;]/ ||
                        # other possible extensions of declaration lines
-                     $sline =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/) &&
-                       # indentation of previous and current line are the same
-                   (($prevline =~ /\+(\s+)\S/) && $sline =~ /^\+$1\S/)) {
-                       if (WARN("LINE_SPACING",
-                                "Missing a blank line after declarations\n" . $hereprev) &&
-                           $fix) {
-                               fix_insert_line($fixlinenr, "\+");
+                             $sl =~ /^\+\s+\(?\s*(?:$Compare|$Assignment|$Operators)/)) {
+                               if (WARN("LINE_SPACING",
+                                        "Missing a blank line after declarations\n" . $hereprev) &&
+                                   $fix) {
+                                       fix_insert_line($fixlinenr, "\+");
+                               }
                        }
                }
 
@@ -4283,8 +4329,7 @@ sub process {
                if (defined $realline_next &&
                    exists $lines[$realline_next - 1] &&
                    !defined $suppress_export{$realline_next} &&
-                   ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
-                    $lines[$realline_next - 1] =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
+                   ($lines[$realline_next - 1] =~ /EXPORT_SYMBOL.*\((.*)\)/)) {
                        # Handle definitions which produce identifiers with
                        # a prefix:
                        #   XXX(foo);
@@ -4311,8 +4356,7 @@ sub process {
                }
                if (!defined $suppress_export{$linenr} &&
                    $prevline =~ /^.\s*$/ &&
-                   ($line =~ /EXPORT_SYMBOL.*\((.*)\)/ ||
-                    $line =~ /EXPORT_UNUSED_SYMBOL.*\((.*)\)/)) {
+                   ($line =~ /EXPORT_SYMBOL.*\((.*)\)/)) {
 #print "FOO B <$lines[$linenr - 1]>\n";
                        $suppress_export{$linenr} = 2;
                }
@@ -4323,7 +4367,8 @@ sub process {
                }
 
 # check for global initialisers.
-               if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/) {
+               if ($line =~ /^\+$Type\s*$Ident(?:\s+$Modifier)*\s*=\s*($zero_initializer)\s*;/ &&
+                   !exclude_global_initialisers($realfile)) {
                        if (ERROR("GLOBAL_INITIALISERS",
                                  "do not initialise globals to $1\n" . $herecurr) &&
                            $fix) {
@@ -4419,7 +4464,7 @@ sub process {
                        WARN("STATIC_CONST_CHAR_ARRAY",
                             "char * array declaration might be better as static const\n" .
                                $herecurr);
-               }
+               }
 
 # check for sizeof(foo)/sizeof(foo[0]) that could be ARRAY_SIZE(foo)
                if ($line =~ m@\bsizeof\s*\(\s*($Lval)\s*\)@) {
@@ -5009,7 +5054,7 @@ sub process {
                                # A colon needs no spaces before when it is
                                # terminating a case value or a label.
                                } elsif ($opv eq ':C' || $opv eq ':L') {
-                                       if ($ctx =~ /Wx./) {
+                                       if ($ctx =~ /Wx./ and $realfile !~ m@.*\.lds\.h$@) {
                                                if (ERROR("SPACING",
                                                          "space prohibited before that '$op' $at\n" . $hereptr)) {
                                                        $good = rtrim($fix_elements[$n]) . trim($fix_elements[$n + 1]);
@@ -5272,7 +5317,7 @@ sub process {
                    $lines[$linenr - 3] !~ /^[ +]\s*$Ident\s*:/) {
                        WARN("RETURN_VOID",
                             "void function return statements are not generally useful\n" . $hereprev);
-               }
+               }
 
 # if statements using unnecessary parentheses - ie: if ((foo == bar))
                if ($perl_version_ok &&
@@ -5968,6 +6013,17 @@ sub process {
                             "Prefer using '\"%s...\", __func__' to using '$context_function', this function's name, in a string\n" . $herecurr);
                }
 
+# check for unnecessary function tracing like uses
+# This does not use $logFunctions because there are many instances like
+# 'dprintk(FOO, "%s()\n", __func__);' which do not match $logFunctions
+               if ($rawline =~ /^\+.*\([^"]*"$tracing_logging_tags{0,3}%s(?:\s*\(\s*\)\s*)?$tracing_logging_tags{0,3}(?:\\n)?"\s*,\s*__func__\s*\)\s*;/) {
+                       if (WARN("TRACING_LOGGING",
+                                "Unnecessary ftrace-like logging - prefer using ftrace\n" . $herecurr) &&
+                           $fix) {
+                                fix_delete_line($fixlinenr, $rawline);
+                       }
+               }
+
 # check for spaces before a quoted newline
                if ($rawline =~ /^.*\".*\s\\n/) {
                        if (WARN("QUOTED_WHITESPACE_BEFORE_NEWLINE",
@@ -6479,18 +6535,18 @@ sub process {
                if ($line =~ /(\(\s*$C90_int_types\s*\)\s*)($Constant)\b/) {
                        my $cast = $1;
                        my $const = $2;
+                       my $suffix = "";
+                       my $newconst = $const;
+                       $newconst =~ s/${Int_type}$//;
+                       $suffix .= 'U' if ($cast =~ /\bunsigned\b/);
+                       if ($cast =~ /\blong\s+long\b/) {
+                           $suffix .= 'LL';
+                       } elsif ($cast =~ /\blong\b/) {
+                           $suffix .= 'L';
+                       }
                        if (WARN("TYPECAST_INT_CONSTANT",
-                                "Unnecessary typecast of c90 int constant\n" . $herecurr) &&
+                                "Unnecessary typecast of c90 int constant - '$cast$const' could be '$const$suffix'\n" . $herecurr) &&
                            $fix) {
-                               my $suffix = "";
-                               my $newconst = $const;
-                               $newconst =~ s/${Int_type}$//;
-                               $suffix .= 'U' if ($cast =~ /\bunsigned\b/);
-                               if ($cast =~ /\blong\s+long\b/) {
-                                       $suffix .= 'LL';
-                               } elsif ($cast =~ /\blong\b/) {
-                                       $suffix .= 'L';
-                               }
                                $fixed[$fixlinenr] =~ s/\Q$cast\E$const\b/$newconst$suffix/;
                        }
                }
@@ -7021,12 +7077,14 @@ sub process {
 
 # use of NR_CPUS is usually wrong
 # ignore definitions of NR_CPUS and usage to define arrays as likely right
+# ignore designated initializers using NR_CPUS
                if ($line =~ /\bNR_CPUS\b/ &&
                    $line !~ /^.\s*\s*#\s*if\b.*\bNR_CPUS\b/ &&
                    $line !~ /^.\s*\s*#\s*define\b.*\bNR_CPUS\b/ &&
                    $line !~ /^.\s*$Declare\s.*\[[^\]]*NR_CPUS[^\]]*\]/ &&
                    $line !~ /\[[^\]]*\.\.\.[^\]]*NR_CPUS[^\]]*\]/ &&
-                   $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/)
+                   $line !~ /\[[^\]]*NR_CPUS[^\]]*\.\.\.[^\]]*\]/ &&
+                   $line !~ /^.\s*\.\w+\s*=\s*.*\bNR_CPUS\b/)
                {
                        WARN("NR_CPUS",
                             "usage of NR_CPUS is often wrong - consider using cpu_possible(), num_possible_cpus(), for_each_possible_cpu(), etc\n" . $herecurr);
index 8ddb5d0..b7e9ecf 100755 (executable)
@@ -20,7 +20,9 @@ _DEFAULT_LOG_LEVEL = 'WARNING'
 _FILENAME_PATTERN = r'^\..*\.cmd$'
 _LINE_PATTERN = r'^cmd_[^ ]*\.o := (.* )([^ ]*\.c)$'
 _VALID_LOG_LEVELS = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
-
+# The tools/ directory adopts a different build system, and produces .cmd
+# files in a different format. Do not support it.
+_EXCLUDE_DIRS = ['.git', 'Documentation', 'include', 'tools']
 
 def parse_arguments():
     """Sets up and parses command-line arguments.
@@ -80,8 +82,14 @@ def cmdfiles_in_dir(directory):
     """
 
     filename_matcher = re.compile(_FILENAME_PATTERN)
+    exclude_dirs = [ os.path.join(directory, d) for d in _EXCLUDE_DIRS ]
+
+    for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
+        # Prune unwanted directories.
+        if dirpath in exclude_dirs:
+            dirnames[:] = []
+            continue
 
-    for dirpath, _, filenames in os.walk(directory):
         for filename in filenames:
             if filename_matcher.match(filename):
                 yield os.path.join(dirpath, filename)
diff --git a/scripts/clang-version.sh b/scripts/clang-version.sh
deleted file mode 100755 (executable)
index 6fabf06..0000000
+++ /dev/null
@@ -1,19 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# clang-version clang-command
-#
-# Print the compiler version of `clang-command' in a 5 or 6-digit form
-# such as `50001' for clang-5.0.1 etc.
-
-compiler="$*"
-
-if ! ( $compiler --version | grep -q clang) ; then
-       echo 0
-       exit 1
-fi
-
-MAJOR=$(echo __clang_major__ | $compiler -E -x c - | tail -n 1)
-MINOR=$(echo __clang_minor__ | $compiler -E -x c - | tail -n 1)
-PATCHLEVEL=$(echo __clang_patchlevel__ | $compiler -E -x c - | tail -n 1)
-printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
index b814e60..8a8b62b 100644 (file)
@@ -1,2 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
 dtc
+fdtoverlay
index c8c21e0..95aaf74 100644 (file)
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 # scripts/dtc makefile
 
+# *** Also keep .gitignore in sync when changing ***
 hostprogs-always-$(CONFIG_DTC)         += dtc fdtoverlay
 hostprogs-always-$(CHECK_DT_BINDING)   += dtc
 
index 5c113ca..39e65fe 100755 (executable)
@@ -57,9 +57,9 @@ if arg_contain --version "$@"; then
 fi
 
 if arg_contain -E "$@"; then
-       # For scripts/gcc-version.sh; This emulates GCC 20.0.0
+       # For scripts/cc-version.sh; This emulates GCC 20.0.0
        if arg_contain - "$@"; then
-               sed 's/^__GNUC__$/20/; s/^__GNUC_MINOR__$/0/; s/^__GNUC_PATCHLEVEL__$/0/'
+               sed -n '/^GCC/{s/__GNUC__/20/; s/__GNUC_MINOR__/0/; s/__GNUC_PATCHLEVEL__/0/; p;}'
                exit 0
        else
                echo "no input files" >&2
@@ -73,6 +73,15 @@ if arg_contain -S "$@"; then
                echo "%gs"
                exit 0
        fi
+
+       # For arch/powerpc/tools/gcc-check-mprofile-kernel.sh
+       if arg_contain -m64 "$@" && arg_contain -mlittle-endian "$@" &&
+               arg_contain -mprofile-kernel "$@"; then
+               if ! test -t 0 && ! grep -q notrace; then
+                       echo "_mcount"
+               fi
+               exit 0
+       fi
 fi
 
 # To set GCC_PLUGINS
@@ -85,3 +94,8 @@ if arg_contain -print-file-name=plugin "$@"; then
        echo $plugin_dir
        exit 0
 fi
+
+# inverted return value
+if arg_contain -D__SIZEOF_INT128__=0 "$@"; then
+       exit 1
+fi
index b5487cc..1952d3b 100644 (file)
@@ -22,6 +22,7 @@ always-y += $(GCC_PLUGIN)
 GCC_PLUGINS_DIR = $(shell $(CC) -print-file-name=plugin)
 
 plugin_cxxflags        = -Wp,-MMD,$(depfile) $(KBUILD_HOSTCXXFLAGS) -fPIC \
+                 -include $(srctree)/include/linux/compiler-version.h \
                   -I $(GCC_PLUGINS_DIR)/include -I $(obj) -std=gnu++11 \
                   -fno-rtti -fno-exceptions -fasynchronous-unwind-tables \
                   -ggdb -Wno-narrowing -Wno-unused-variable \
index 9dced66..589454b 100644 (file)
@@ -524,7 +524,7 @@ static unsigned int latent_entropy_execute(void)
        while (bb != EXIT_BLOCK_PTR_FOR_FN(cfun)) {
                perturb_local_entropy(bb, local_entropy);
                bb = bb->next_bb;
-       };
+       }
 
        /* 4. mix local entropy into the global entropy variable */
        perturb_latent_entropy(local_entropy);
index 29b480c..d7190e4 100644 (file)
@@ -170,7 +170,6 @@ static void initialize(tree var)
 static unsigned int structleak_execute(void)
 {
        basic_block bb;
-       unsigned int ret = 0;
        tree var;
        unsigned int i;
 
@@ -200,7 +199,7 @@ static unsigned int structleak_execute(void)
                        initialize(var);
        }
 
-       return ret;
+       return 0;
 }
 
 #define PASS_NAME structleak
diff --git a/scripts/gcc-version.sh b/scripts/gcc-version.sh
deleted file mode 100755 (executable)
index ae35343..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# gcc-version gcc-command
-#
-# Print the gcc version of `gcc-command' in a 5 or 6-digit form
-# such as `29503' for gcc-2.95.3, `30301' for gcc-3.3.1, etc.
-
-compiler="$*"
-
-if [ ${#compiler} -eq 0 ]; then
-       echo "Error: No compiler specified." >&2
-       printf "Usage:\n\t$0 <gcc-command>\n" >&2
-       exit 1
-fi
-
-MAJOR=$(echo __GNUC__ | $compiler -E -x c - | tail -n 1)
-MINOR=$(echo __GNUC_MINOR__ | $compiler -E -x c - | tail -n 1)
-PATCHLEVEL=$(echo __GNUC_PATCHLEVEL__ | $compiler -E -x c - | tail -n 1)
-printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
index 1247550..48941fa 100644 (file)
@@ -7,7 +7,7 @@ symlinks := $(patsubst $(srctree)/$(src)/%,%,$(wildcard $(srctree)/$(src)/*.py))
 quiet_cmd_symlink = SYMLINK $@
       cmd_symlink = ln -fsn $(patsubst $(obj)/%,$(abspath $(srctree))/$(src)/%,$@) $@
 
-extra-y += $(symlinks)
+always-y += $(symlinks)
 $(addprefix $(obj)/, $(symlinks)): FORCE
        $(call if_changed,symlink)
 
@@ -18,7 +18,7 @@ quiet_cmd_gen_constants_py = GEN     $@
        $(CPP) -E -x c -P $(c_flags) $< > $@ ;\
        sed -i '1,/<!-- end-c-headers -->/d;' $@
 
-extra-y += constants.py
+always-y += constants.py
 $(obj)/constants.py: $(src)/constants.py.in FORCE
        $(call if_changed_dep,gen_constants_py)
 
index c487ddf..bae4d70 100644 (file)
@@ -27,6 +27,11 @@ def list_for_each(head):
         raise TypeError("Must be struct list_head not {}"
                            .format(head.type))
 
+    if head['next'] == 0:
+        gdb.write("list_for_each: Uninitialized list '{}' treated as empty\n"
+                     .format(head.address))
+        return
+
     node = head['next'].dereference()
     while node.address != head.address:
         yield node.address
index d54dfba..da32015 100755 (executable)
@@ -19,7 +19,26 @@ esac
 # We need access to CONFIG_ symbols
 . include/config/auto.conf
 
-ksym_wl=/dev/null
+needed_symbols=
+
+# Special case for modversions (see modpost.c)
+if [ -n "$CONFIG_MODVERSIONS" ]; then
+       needed_symbols="$needed_symbols module_layout"
+fi
+
+# With CONFIG_LTO_CLANG, LLVM bitcode has not yet been compiled into a binary
+# when the .mod files are generated, which means they don't yet contain
+# references to certain symbols that will be present in the final binaries.
+if [ -n "$CONFIG_LTO_CLANG" ]; then
+       # intrinsic functions
+       needed_symbols="$needed_symbols memcpy memmove memset"
+       # ftrace
+       needed_symbols="$needed_symbols _mcount"
+       # stack protector symbols
+       needed_symbols="$needed_symbols __stack_chk_fail __stack_chk_guard"
+fi
+
+ksym_wl=
 if [ -n "$CONFIG_UNUSED_KSYMS_WHITELIST" ]; then
        # Use 'eval' to expand the whitelist path and check if it is relative
        eval ksym_wl="$CONFIG_UNUSED_KSYMS_WHITELIST"
@@ -40,16 +59,14 @@ cat > "$output_file" << EOT
 EOT
 
 [ -f modules.order ] && modlist=modules.order || modlist=/dev/null
-sed 's/ko$/mod/' $modlist |
-xargs -n1 sed -n -e '2{s/ /\n/g;/^$/!p;}' -- |
-cat - "$ksym_wl" |
+
+{
+       sed 's/ko$/mod/' $modlist | xargs -n1 sed -n -e '2p'
+       echo "$needed_symbols"
+       [ -n "$ksym_wl" ] && cat "$ksym_wl"
+} | sed -e 's/ /\n/g' | sed -n -e '/^$/!p' |
 # Remove the dot prefix for ppc64; symbol names with a dot (.) hold entry
 # point addresses.
 sed -e 's/^\.//' |
 sort -u |
 sed -e 's/\(.*\)/#define __KSYM_\1 1/' >> "$output_file"
-
-# Special case for modversions (see modpost.c)
-if [ -n "$CONFIG_MODVERSIONS" ]; then
-       echo "#define __KSYM_module_layout 1" >> "$output_file"
-fi
diff --git a/scripts/generate_initcall_order.pl b/scripts/generate_initcall_order.pl
new file mode 100755 (executable)
index 0000000..1a88d3f
--- /dev/null
@@ -0,0 +1,270 @@
+#!/usr/bin/env perl
+# SPDX-License-Identifier: GPL-2.0
+#
+# Generates a linker script that specifies the correct initcall order.
+#
+# Copyright (C) 2019 Google LLC
+
+use strict;
+use warnings;
+use IO::Handle;
+use IO::Select;
+use POSIX ":sys_wait_h";
+
+my $nm = $ENV{'NM'} || die "$0: ERROR: NM not set?";
+my $objtree = $ENV{'objtree'} || '.';
+
+## currently active child processes
+my $jobs = {};         # child process pid -> file handle
+## results from child processes
+my $results = {};      # object index -> [ { level, secname }, ... ]
+
+## reads _NPROCESSORS_ONLN to determine the maximum number of processes to
+## start
+sub get_online_processors {
+       open(my $fh, "getconf _NPROCESSORS_ONLN 2>/dev/null |")
+               or die "$0: ERROR: failed to execute getconf: $!";
+       my $procs = <$fh>;
+       close($fh);
+
+       if (!($procs =~ /^\d+$/)) {
+               return 1;
+       }
+
+       return int($procs);
+}
+
+## writes results to the parent process
+## format: <file index> <initcall level> <base initcall section name>
+sub write_results {
+       my ($index, $initcalls) = @_;
+
+       # sort by the counter value to ensure the order of initcalls within
+       # each object file is correct
+       foreach my $counter (sort { $a <=> $b } keys(%{$initcalls})) {
+               my $level = $initcalls->{$counter}->{'level'};
+
+               # section name for the initcall function
+               my $secname = $initcalls->{$counter}->{'module'} . '__' .
+                             $counter . '_' .
+                             $initcalls->{$counter}->{'line'} . '_' .
+                             $initcalls->{$counter}->{'function'};
+
+               print "$index $level $secname\n";
+       }
+}
+
+## reads a result line from a child process and adds it to the $results array
+sub read_results{
+       my ($fh) = @_;
+
+       # each child prints out a full line w/ autoflush and exits after the
+       # last line, so even if buffered I/O blocks here, it shouldn't block
+       # very long
+       my $data = <$fh>;
+
+       if (!defined($data)) {
+               return 0;
+       }
+
+       chomp($data);
+
+       my ($index, $level, $secname) = $data =~
+               /^(\d+)\ ([^\ ]+)\ (.*)$/;
+
+       if (!defined($index) ||
+               !defined($level) ||
+               !defined($secname)) {
+               die "$0: ERROR: child process returned invalid data: $data\n";
+       }
+
+       $index = int($index);
+
+       if (!exists($results->{$index})) {
+               $results->{$index} = [];
+       }
+
+       push (@{$results->{$index}}, {
+               'level'   => $level,
+               'secname' => $secname
+       });
+
+       return 1;
+}
+
+## finds initcalls from an object file or all object files in an archive, and
+## writes results back to the parent process
+sub find_initcalls {
+       my ($index, $file) = @_;
+
+       die "$0: ERROR: file $file doesn't exist?" if (! -f $file);
+
+       open(my $fh, "\"$nm\" --defined-only \"$file\" 2>/dev/null |")
+               or die "$0: ERROR: failed to execute \"$nm\": $!";
+
+       my $initcalls = {};
+
+       while (<$fh>) {
+               chomp;
+
+               # check for the start of a new object file (if processing an
+               # archive)
+               my ($path)= $_ =~ /^(.+)\:$/;
+
+               if (defined($path)) {
+                       write_results($index, $initcalls);
+                       $initcalls = {};
+                       next;
+               }
+
+               # look for an initcall
+               my ($module, $counter, $line, $symbol) = $_ =~
+                       /[a-z]\s+__initcall__(\S*)__(\d+)_(\d+)_(.*)$/;
+
+               if (!defined($module)) {
+                       $module = ''
+               }
+
+               if (!defined($counter) ||
+                       !defined($line) ||
+                       !defined($symbol)) {
+                       next;
+               }
+
+               # parse initcall level
+               my ($function, $level) = $symbol =~
+                       /^(.*)((early|rootfs|con|[0-9])s?)$/;
+
+               die "$0: ERROR: invalid initcall name $symbol in $file($path)"
+                       if (!defined($function) || !defined($level));
+
+               $initcalls->{$counter} = {
+                       'module'   => $module,
+                       'line'     => $line,
+                       'function' => $function,
+                       'level'    => $level,
+               };
+       }
+
+       close($fh);
+       write_results($index, $initcalls);
+}
+
+## waits for any child process to complete, reads the results, and adds them to
+## the $results array for later processing
+sub wait_for_results {
+       my ($select) = @_;
+
+       my $pid = 0;
+       do {
+               # unblock children that may have a full write buffer
+               foreach my $fh ($select->can_read(0)) {
+                       read_results($fh);
+               }
+
+               # check for children that have exited, read the remaining data
+               # from them, and clean up
+               $pid = waitpid(-1, WNOHANG);
+               if ($pid > 0) {
+                       if (!exists($jobs->{$pid})) {
+                               next;
+                       }
+
+                       my $fh = $jobs->{$pid};
+                       $select->remove($fh);
+
+                       while (read_results($fh)) {
+                               # until eof
+                       }
+
+                       close($fh);
+                       delete($jobs->{$pid});
+               }
+       } while ($pid > 0);
+}
+
+## forks a child to process each file passed in the command line and collects
+## the results
+sub process_files {
+       my $index = 0;
+       my $njobs = $ENV{'PARALLELISM'} || get_online_processors();
+       my $select = IO::Select->new();
+
+       while (my $file = shift(@ARGV)) {
+               # fork a child process and read it's stdout
+               my $pid = open(my $fh, '-|');
+
+               if (!defined($pid)) {
+                       die "$0: ERROR: failed to fork: $!";
+               } elsif ($pid) {
+                       # save the child process pid and the file handle
+                       $select->add($fh);
+                       $jobs->{$pid} = $fh;
+               } else {
+                       # in the child process
+                       STDOUT->autoflush(1);
+                       find_initcalls($index, "$objtree/$file");
+                       exit;
+               }
+
+               $index++;
+
+               # limit the number of children to $njobs
+               if (scalar(keys(%{$jobs})) >= $njobs) {
+                       wait_for_results($select);
+               }
+       }
+
+       # wait for the remaining children to complete
+       while (scalar(keys(%{$jobs})) > 0) {
+               wait_for_results($select);
+       }
+}
+
+sub generate_initcall_lds() {
+       process_files();
+
+       my $sections = {};      # level -> [ secname, ...]
+
+       # sort results to retain link order and split to sections per
+       # initcall level
+       foreach my $index (sort { $a <=> $b } keys(%{$results})) {
+               foreach my $result (@{$results->{$index}}) {
+                       my $level = $result->{'level'};
+
+                       if (!exists($sections->{$level})) {
+                               $sections->{$level} = [];
+                       }
+
+                       push(@{$sections->{$level}}, $result->{'secname'});
+               }
+       }
+
+       die "$0: ERROR: no initcalls?" if (!keys(%{$sections}));
+
+       # print out a linker script that defines the order of initcalls for
+       # each level
+       print "SECTIONS {\n";
+
+       foreach my $level (sort(keys(%{$sections}))) {
+               my $section;
+
+               if ($level eq 'con') {
+                       $section = '.con_initcall.init';
+               } else {
+                       $section = ".initcall${level}.init";
+               }
+
+               print "\t${section} : {\n";
+
+               foreach my $secname (@{$sections->{$level}}) {
+                       print "\t\t*(${section}..${secname}) ;\n";
+               }
+
+               print "\t}\n";
+       }
+
+       print "}\n";
+}
+
+generate_initcall_lds();
index 23eff23..4827c5a 100644 (file)
@@ -29,7 +29,7 @@ static struct symbol *symtab[HASH_BUCKETS];
 static FILE *debugfile;
 
 int cur_line = 1;
-char *cur_filename, *source_file;
+char *cur_filename;
 int in_source_file;
 
 static int flag_debug, flag_dump_defs, flag_reference, flag_dump_types,
index 2bcdb9b..21ed2ec 100644 (file)
@@ -47,7 +47,7 @@ typedef struct string_list **yystype;
 #define YYSTYPE yystype
 
 extern int cur_line;
-extern char *cur_filename, *source_file;
+extern char *cur_filename;
 extern int in_source_file;
 
 struct symbol *find_symbol(const char *name, enum symbol_type ns, int exact);
index ae76472..a4d7495 100644 (file)
@@ -119,12 +119,11 @@ yylex(void)
   static enum {
     ST_NOTSTARTED, ST_NORMAL, ST_ATTRIBUTE, ST_ASM, ST_TYPEOF, ST_TYPEOF_1,
     ST_BRACKET, ST_BRACE, ST_EXPRESSION, ST_STATIC_ASSERT,
-    ST_TABLE_1, ST_TABLE_2, ST_TABLE_3, ST_TABLE_4,
-    ST_TABLE_5, ST_TABLE_6
   } lexstate = ST_NOTSTARTED;
 
   static int suppress_type_lookup, dont_want_brace_phrase;
   static struct string_list *next_node;
+  static char *source_file;
 
   int token, count = 0;
   struct string_list *cur_node;
@@ -235,7 +234,6 @@ repeat:
          lexstate = ST_EXPRESSION;
          break;
 
-       case DOTS:
        default:
          APP;
          break;
@@ -426,58 +424,6 @@ repeat:
        }
       break;
 
-    case ST_TABLE_1:
-      goto repeat;
-
-    case ST_TABLE_2:
-      if (token == IDENT && yyleng == 1 && yytext[0] == 'X')
-       {
-         token = EXPORT_SYMBOL_KEYW;
-         lexstate = ST_TABLE_5;
-         APP;
-         break;
-       }
-      lexstate = ST_TABLE_6;
-      /* FALLTHRU */
-
-    case ST_TABLE_6:
-      switch (token)
-       {
-       case '{': case '[': case '(':
-         ++count;
-         break;
-       case '}': case ']': case ')':
-         --count;
-         break;
-       case ',':
-         if (count == 0)
-           lexstate = ST_TABLE_2;
-         break;
-       };
-      goto repeat;
-
-    case ST_TABLE_3:
-      goto repeat;
-
-    case ST_TABLE_4:
-      if (token == ';')
-       lexstate = ST_NORMAL;
-      goto repeat;
-
-    case ST_TABLE_5:
-      switch (token)
-       {
-       case ',':
-         token = ';';
-         lexstate = ST_TABLE_2;
-         APP;
-         break;
-       default:
-         APP;
-         break;
-       }
-      break;
-
     default:
       exit(1);
     }
index 2c40e68..8c19b82 100644 (file)
@@ -3,9 +3,6 @@
 # Kernel configuration targets
 # These targets are used from top-level makefile
 
-PHONY += xconfig gconfig menuconfig config localmodconfig localyesconfig \
-       build_menuconfig build_nconfig build_gconfig build_xconfig
-
 ifdef KBUILD_KCONFIG
 Kconfig := $(KBUILD_KCONFIG)
 else
@@ -19,29 +16,24 @@ endif
 # We need this, in case the user has it in its environment
 unexport CONFIG_
 
-xconfig: $(obj)/qconf
-       $(Q)$< $(silent) $(Kconfig)
-
-gconfig: $(obj)/gconf
-       $(Q)$< $(silent) $(Kconfig)
-
-menuconfig: $(obj)/mconf
-       $(Q)$< $(silent) $(Kconfig)
-
-config: $(obj)/conf
-       $(Q)$< $(silent) --oldaskconfig $(Kconfig)
-
-nconfig: $(obj)/nconf
-       $(Q)$< $(silent) $(Kconfig)
-
-build_menuconfig: $(obj)/mconf
+config-prog    := conf
+menuconfig-prog        := mconf
+nconfig-prog   := nconf
+gconfig-prog   := gconf
+xconfig-prog   := qconf
 
-build_nconfig: $(obj)/nconf
+define config_rule
+PHONY += $(1)
+$(1): $(obj)/$($(1)-prog)
+       $(Q)$$< $(silent) $(Kconfig)
 
-build_gconfig: $(obj)/gconf
+PHONY += build_$(1)
+build_$(1): $(obj)/$($(1)-prog)
+endef
 
-build_xconfig: $(obj)/qconf
+$(foreach c, config menuconfig nconfig gconfig xconfig, $(eval $(call config_rule,$(c))))
 
+PHONY += localmodconfig localyesconfig
 localyesconfig localmodconfig: $(obj)/conf
        $(Q)$(PERL) $(srctree)/$(src)/streamline_config.pl --$@ $(srctree) $(Kconfig) > .tmp.config
        $(Q)if [ -f .config ]; then                             \
index db03e2f..957d2a0 100644 (file)
@@ -84,8 +84,6 @@ static void xfgets(char *str, int size, FILE *in)
 
 static int conf_askvalue(struct symbol *sym, const char *def)
 {
-       enum symbol_type type = sym_get_type(sym);
-
        if (!sym_has_value(sym))
                printf("(NEW) ");
 
@@ -107,24 +105,12 @@ static int conf_askvalue(struct symbol *sym, const char *def)
                        return 0;
                }
                /* fall through */
-       case oldaskconfig:
+       default:
                fflush(stdout);
                xfgets(line, sizeof(line), stdin);
-               return 1;
-       default:
                break;
        }
 
-       switch (type) {
-       case S_INT:
-       case S_HEX:
-       case S_STRING:
-               printf("%s\n", def);
-               return 1;
-       default:
-               ;
-       }
-       printf("%s", line);
        return 1;
 }
 
@@ -137,7 +123,7 @@ static int conf_string(struct menu *menu)
                printf("%*s%s ", indent - 1, "", menu->prompt->text);
                printf("(%s) ", sym->name);
                def = sym_get_string_value(sym);
-               if (sym_get_string_value(sym))
+               if (def)
                        printf("[%s] ", def);
                if (!conf_askvalue(sym, def))
                        return 0;
@@ -419,34 +405,37 @@ static void check_conf(struct menu *menu)
                return;
 
        sym = menu->sym;
-       if (sym && !sym_has_value(sym)) {
-               if (sym_is_changeable(sym) ||
-                   (sym_is_choice(sym) && sym_get_tristate_value(sym) == yes)) {
-                       if (input_mode == listnewconfig) {
-                               if (sym->name) {
-                                       const char *str;
-
-                                       if (sym->type == S_STRING) {
-                                               str = sym_get_string_value(sym);
-                                               str = sym_escape_string_value(str);
-                                               printf("%s%s=%s\n", CONFIG_, sym->name, str);
-                                               free((void *)str);
-                                       } else {
-                                               str = sym_get_string_value(sym);
-                                               printf("%s%s=%s\n", CONFIG_, sym->name, str);
-                                       }
-                               }
-                       } else if (input_mode == helpnewconfig) {
-                               printf("-----\n");
-                               print_help(menu);
-                               printf("-----\n");
+       if (sym && !sym_has_value(sym) &&
+           (sym_is_changeable(sym) ||
+            (sym_is_choice(sym) && sym_get_tristate_value(sym) == yes))) {
 
-                       } else {
-                               if (!conf_cnt++)
-                                       printf("*\n* Restart config...\n*\n");
-                               rootEntry = menu_get_parent_menu(menu);
-                               conf(rootEntry);
+               switch (input_mode) {
+               case listnewconfig:
+                       if (sym->name) {
+                               const char *str;
+
+                               if (sym->type == S_STRING) {
+                                       str = sym_get_string_value(sym);
+                                       str = sym_escape_string_value(str);
+                                       printf("%s%s=%s\n", CONFIG_, sym->name, str);
+                                       free((void *)str);
+                               } else {
+                                       str = sym_get_string_value(sym);
+                                       printf("%s%s=%s\n", CONFIG_, sym->name, str);
+                               }
                        }
+                       break;
+               case helpnewconfig:
+                       printf("-----\n");
+                       print_help(menu);
+                       printf("-----\n");
+                       break;
+               default:
+                       if (!conf_cnt++)
+                               printf("*\n* Restart config...\n*\n");
+                       rootEntry = menu_get_parent_menu(menu);
+                       conf(rootEntry);
+                       break;
                }
        }
 
@@ -494,6 +483,7 @@ static void conf_usage(const char *progname)
        printf("  --randconfig            New config with random answer to all options\n");
        printf("  --yes2modconfig         Change answers from yes to mod if possible\n");
        printf("  --mod2yesconfig         Change answers from mod to yes if possible\n");
+       printf("  (If none of the above is given, --oldaskconfig is the default)\n");
 }
 
 int main(int ac, char **av)
@@ -505,7 +495,7 @@ int main(int ac, char **av)
 
        tty_stdio = isatty(0) && isatty(1);
 
-       while ((opt = getopt_long(ac, av, "s", long_opts, NULL)) != -1) {
+       while ((opt = getopt_long(ac, av, "hs", long_opts, NULL)) != -1) {
                if (opt == 's') {
                        conf_set_message_callback(NULL);
                        continue;
@@ -561,7 +551,7 @@ int main(int ac, char **av)
                case yes2modconfig:
                case mod2yesconfig:
                        break;
-               case '?':
+               case 'h':
                        conf_usage(progname);
                        exit(1);
                        break;
index e046e16..8b5bc7b 100755 (executable)
@@ -1553,7 +1553,7 @@ sub create_parameterlist($$$$) {
        } elsif ($arg =~ m/\(.+\)\s*\(/) {
            # pointer-to-function
            $arg =~ tr/#/,/;
-           $arg =~ m/[^\(]+\(\*?\s*([\w\.]*)\s*\)/;
+           $arg =~ m/[^\(]+\(\*?\s*([\w\[\]\.]*)\s*\)/;
            $param = $1;
            $type = $arg;
            $type =~ s/([^\(]+\(\*?)\s*$param/$1/;
index f2be0ff..1bf3aad 100755 (executable)
@@ -1,11 +1,79 @@
-#!/usr/bin/awk -f
+#!/bin/sh
 # SPDX-License-Identifier: GPL-2.0
-# extract linker version number from stdin and turn into single number
-       {
-       gsub(".*\\)", "");
-       gsub(".*version ", "");
-       gsub("-.*", "");
-       split($1,a, ".");
-       print a[1]*100000000 + a[2]*1000000 + a[3]*10000;
-       exit
-       }
+#
+# Print the linker name and its version in a 5 or 6-digit form.
+# Also, perform the minimum version check.
+
+set -e
+
+# When you raise the minimum linker version, please update
+# Documentation/process/changes.rst as well.
+bfd_min_version=2.23.0
+lld_min_version=10.0.1
+
+# Convert the version string x.y.z to a canonical 5 or 6-digit form.
+get_canonical_version()
+{
+       IFS=.
+       set -- $1
+
+       # If the 2nd or 3rd field is missing, fill it with a zero.
+       #
+       # The 4th field, if present, is ignored.
+       # This occurs in development snapshots as in 2.35.1.20201116
+       echo $((10000 * $1 + 100 * ${2:-0} + ${3:-0}))
+}
+
+orig_args="$@"
+
+# Get the first line of the --version output.
+IFS='
+'
+set -- $(LC_ALL=C "$@" --version)
+
+# Split the line on spaces.
+IFS=' '
+set -- $1
+
+if [ "$1" = GNU -a "$2" = ld ]; then
+       shift $(($# - 1))
+       version=$1
+       min_version=$bfd_min_version
+       name=BFD
+       disp_name="GNU ld"
+elif [ "$1" = GNU -a "$2" = gold ]; then
+       echo "gold linker is not supported as it is not capable of linking the kernel proper." >&2
+       exit 1
+else
+       while [ $# -gt 1 -a "$1" != "LLD" ]; do
+               shift
+       done
+
+       if [ "$1" = LLD ]; then
+               version=$2
+               min_version=$lld_min_version
+               name=LLD
+               disp_name=LLD
+       else
+               echo "$orig_args: unknown linker" >&2
+               exit 1
+       fi
+fi
+
+# Some distributions append a package release number, as in 2.34-4.fc32
+# Trim the hyphen and any characters that follow.
+version=${version%-*}
+
+cversion=$(get_canonical_version $version)
+min_cversion=$(get_canonical_version $min_version)
+
+if [ "$cversion" -lt "$min_cversion" ]; then
+       echo >&2 "***"
+       echo >&2 "*** Linker is too old."
+       echo >&2 "***   Your $disp_name version:    $version"
+       echo >&2 "***   Minimum $disp_name version: $min_version"
+       echo >&2 "***"
+       exit 1
+fi
+
+echo $name $cversion
index 6eded32..3b261b0 100755 (executable)
@@ -43,11 +43,37 @@ info()
        fi
 }
 
+# Generate a linker script to ensure correct ordering of initcalls.
+gen_initcalls()
+{
+       info GEN .tmp_initcalls.lds
+
+       ${PYTHON} ${srctree}/scripts/jobserver-exec             \
+       ${PERL} ${srctree}/scripts/generate_initcall_order.pl   \
+               ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}   \
+               > .tmp_initcalls.lds
+}
+
+# If CONFIG_LTO_CLANG is selected, collect generated symbol versions into
+# .tmp_symversions.lds
+gen_symversions()
+{
+       info GEN .tmp_symversions.lds
+       rm -f .tmp_symversions.lds
+
+       for o in ${KBUILD_VMLINUX_OBJS} ${KBUILD_VMLINUX_LIBS}; do
+               if [ -f ${o}.symversions ]; then
+                       cat ${o}.symversions >> .tmp_symversions.lds
+               fi
+       done
+}
+
 # Link of vmlinux.o used for section mismatch analysis
 # ${1} output file
 modpost_link()
 {
        local objects
+       local lds=""
 
        objects="--whole-archive                                \
                ${KBUILD_VMLINUX_OBJS}                          \
@@ -56,19 +82,57 @@ modpost_link()
                ${KBUILD_VMLINUX_LIBS}                          \
                --end-group"
 
-       ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${objects}
+       if [ -n "${CONFIG_LTO_CLANG}" ]; then
+               gen_initcalls
+               lds="-T .tmp_initcalls.lds"
+
+               if [ -n "${CONFIG_MODVERSIONS}" ]; then
+                       gen_symversions
+                       lds="${lds} -T .tmp_symversions.lds"
+               fi
+
+               # This might take a while, so indicate that we're doing
+               # an LTO link
+               info LTO ${1}
+       else
+               info LD ${1}
+       fi
+
+       ${LD} ${KBUILD_LDFLAGS} -r -o ${1} ${lds} ${objects}
 }
 
 objtool_link()
 {
+       local objtoolcmd;
        local objtoolopt;
 
+       if [ "${CONFIG_LTO_CLANG} ${CONFIG_STACK_VALIDATION}" = "y y" ]; then
+               # Don't perform vmlinux validation unless explicitly requested,
+               # but run objtool on vmlinux.o now that we have an object file.
+               if [ -n "${CONFIG_UNWINDER_ORC}" ]; then
+                       objtoolcmd="orc generate"
+               fi
+
+               objtoolopt="${objtoolopt} --duplicate"
+
+               if [ -n "${CONFIG_FTRACE_MCOUNT_USE_OBJTOOL}" ]; then
+                       objtoolopt="${objtoolopt} --mcount"
+               fi
+       fi
+
        if [ -n "${CONFIG_VMLINUX_VALIDATION}" ]; then
-               objtoolopt="check"
+               objtoolopt="${objtoolopt} --noinstr"
+       fi
+
+       if [ -n "${objtoolopt}" ]; then
+               if [ -z "${objtoolcmd}" ]; then
+                       objtoolcmd="check"
+               fi
+               objtoolopt="${objtoolopt} --vmlinux"
                if [ -z "${CONFIG_FRAME_POINTER}" ]; then
                        objtoolopt="${objtoolopt} --no-fp"
                fi
-               if [ -n "${CONFIG_GCOV_KERNEL}" ]; then
+               if [ -n "${CONFIG_GCOV_KERNEL}" ] || [ -n "${CONFIG_LTO_CLANG}" ]; then
                        objtoolopt="${objtoolopt} --no-unreachable"
                fi
                if [ -n "${CONFIG_RETPOLINE}" ]; then
@@ -78,7 +142,7 @@ objtool_link()
                        objtoolopt="${objtoolopt} --uaccess"
                fi
                info OBJTOOL ${1}
-               tools/objtool/objtool ${objtoolopt} ${1}
+               tools/objtool/objtool ${objtoolcmd} ${objtoolopt} ${1}
        fi
 }
 
@@ -103,13 +167,22 @@ vmlinux_link()
        fi
 
        if [ "${SRCARCH}" != "um" ]; then
-               objects="--whole-archive                        \
-                       ${KBUILD_VMLINUX_OBJS}                  \
-                       --no-whole-archive                      \
-                       --start-group                           \
-                       ${KBUILD_VMLINUX_LIBS}                  \
-                       --end-group                             \
-                       ${@}"
+               if [ -n "${CONFIG_LTO_CLANG}" ]; then
+                       # Use vmlinux.o instead of performing the slow LTO
+                       # link again.
+                       objects="--whole-archive                \
+                               vmlinux.o                       \
+                               --no-whole-archive              \
+                               ${@}"
+               else
+                       objects="--whole-archive                \
+                               ${KBUILD_VMLINUX_OBJS}          \
+                               --no-whole-archive              \
+                               --start-group                   \
+                               ${KBUILD_VMLINUX_LIBS}          \
+                               --end-group                     \
+                               ${@}"
+               fi
 
                ${LD} ${KBUILD_LDFLAGS} ${LDFLAGS_vmlinux}      \
                        ${strip_debug#-Wl,}                     \
@@ -225,6 +298,8 @@ cleanup()
 {
        rm -f .btf.*
        rm -f .tmp_System.map
+       rm -f .tmp_initcalls.lds
+       rm -f .tmp_symversions.lds
        rm -f .tmp_vmlinux*
        rm -f System.map
        rm -f vmlinux
@@ -274,7 +349,6 @@ fi;
 ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init need-builtin=1
 
 #link vmlinux.o
-info LD vmlinux.o
 modpost_link vmlinux.o
 objtool_link vmlinux.o
 
diff --git a/scripts/lld-version.sh b/scripts/lld-version.sh
deleted file mode 100755 (executable)
index d70edb4..0000000
+++ /dev/null
@@ -1,20 +0,0 @@
-#!/bin/sh
-# SPDX-License-Identifier: GPL-2.0
-#
-# Usage: $ ./scripts/lld-version.sh ld.lld
-#
-# Print the linker version of `ld.lld' in a 5 or 6-digit form
-# such as `100001' for ld.lld 10.0.1 etc.
-
-linker_string="$($* --version)"
-
-if ! ( echo $linker_string | grep -q LLD ); then
-       echo 0
-       exit 1
-fi
-
-VERSION=$(echo $linker_string | cut -d ' ' -f 2)
-MAJOR=$(echo $VERSION | cut -d . -f 1)
-MINOR=$(echo $VERSION | cut -d . -f 2)
-PATCHLEVEL=$(echo $VERSION | cut -d . -f 3)
-printf "%d%02d%02d\\n" $MAJOR $MINOR $PATCHLEVEL
index 7807168..c9e38ad 100644 (file)
@@ -1,5 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0
 OBJECT_FILES_NON_STANDARD := y
+CFLAGS_REMOVE_empty.o += $(CC_FLAGS_LTO)
 
 hostprogs-always-y     += modpost mk_elfconfig
 always-y               += empty.o
index f078eeb..9bb6c7e 100644 (file)
@@ -254,5 +254,9 @@ int main(void)
        DEVID_FIELD(ssam_device_id, instance);
        DEVID_FIELD(ssam_device_id, function);
 
+       DEVID(dfl_device_id);
+       DEVID_FIELD(dfl_device_id, type);
+       DEVID_FIELD(dfl_device_id, feature_id);
+
        return 0;
 }
index d21d287..7c97fa8 100644 (file)
@@ -1397,6 +1397,18 @@ static int do_ssam_entry(const char *filename, void *symval, char *alias)
        return 1;
 }
 
+/* Looks like: dfl:tNfN */
+static int do_dfl_entry(const char *filename, void *symval, char *alias)
+{
+       DEF_FIELD(symval, dfl_device_id, type);
+       DEF_FIELD(symval, dfl_device_id, feature_id);
+
+       sprintf(alias, "dfl:t%04Xf%04X", type, feature_id);
+
+       add_wildcard(alias);
+       return 1;
+}
+
 /* Does namelen bytes of name exactly match the symbol? */
 static bool sym_is(const char *name, unsigned namelen, const char *symbol)
 {
@@ -1473,6 +1485,7 @@ static const struct devtable devtable[] = {
        {"mhi", SIZE_mhi_device_id, do_mhi_entry},
        {"auxiliary", SIZE_auxiliary_device_id, do_auxiliary_entry},
        {"ssam", SIZE_ssam_device_id, do_ssam_entry},
+       {"dfl", SIZE_dfl_device_id, do_dfl_entry},
 };
 
 /* Create MODULE_ALIAS() statements.
index d6c8165..24725e5 100644 (file)
@@ -17,7 +17,6 @@
 #include <ctype.h>
 #include <string.h>
 #include <limits.h>
-#include <stdbool.h>
 #include <errno.h>
 #include "modpost.h"
 #include "../../include/linux/license.h"
@@ -43,8 +42,9 @@ static int allow_missing_ns_imports;
 static bool error_occurred;
 
 enum export {
-       export_plain,      export_unused,     export_gpl,
-       export_unused_gpl, export_gpl_future, export_unknown
+       export_plain,
+       export_gpl,
+       export_unknown
 };
 
 /* In kernel, this size is defined in linux/module.h;
@@ -84,14 +84,6 @@ modpost_log(enum loglevel loglevel, const char *fmt, ...)
                error_occurred = true;
 }
 
-static inline bool strends(const char *str, const char *postfix)
-{
-       if (strlen(str) < strlen(postfix))
-               return false;
-
-       return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
-}
-
 void *do_nofail(void *ptr, const char *expr)
 {
        if (!ptr)
@@ -301,10 +293,7 @@ static const struct {
        enum export export;
 } export_list[] = {
        { .str = "EXPORT_SYMBOL",            .export = export_plain },
-       { .str = "EXPORT_UNUSED_SYMBOL",     .export = export_unused },
        { .str = "EXPORT_SYMBOL_GPL",        .export = export_gpl },
-       { .str = "EXPORT_UNUSED_SYMBOL_GPL", .export = export_unused_gpl },
-       { .str = "EXPORT_SYMBOL_GPL_FUTURE", .export = export_gpl_future },
        { .str = "(unknown)",                .export = export_unknown },
 };
 
@@ -363,14 +352,8 @@ static enum export export_from_secname(struct elf_info *elf, unsigned int sec)
 
        if (strstarts(secname, "___ksymtab+"))
                return export_plain;
-       else if (strstarts(secname, "___ksymtab_unused+"))
-               return export_unused;
        else if (strstarts(secname, "___ksymtab_gpl+"))
                return export_gpl;
-       else if (strstarts(secname, "___ksymtab_unused_gpl+"))
-               return export_unused_gpl;
-       else if (strstarts(secname, "___ksymtab_gpl_future+"))
-               return export_gpl_future;
        else
                return export_unknown;
 }
@@ -379,14 +362,8 @@ static enum export export_from_sec(struct elf_info *elf, unsigned int sec)
 {
        if (sec == elf->export_sec)
                return export_plain;
-       else if (sec == elf->export_unused_sec)
-               return export_unused;
        else if (sec == elf->export_gpl_sec)
                return export_gpl;
-       else if (sec == elf->export_unused_gpl_sec)
-               return export_unused_gpl;
-       else if (sec == elf->export_gpl_future_sec)
-               return export_gpl_future;
        else
                return export_unknown;
 }
@@ -590,14 +567,8 @@ static int parse_elf(struct elf_info *info, const char *filename)
                        info->modinfo_len = sechdrs[i].sh_size;
                } else if (strcmp(secname, "__ksymtab") == 0)
                        info->export_sec = i;
-               else if (strcmp(secname, "__ksymtab_unused") == 0)
-                       info->export_unused_sec = i;
                else if (strcmp(secname, "__ksymtab_gpl") == 0)
                        info->export_gpl_sec = i;
-               else if (strcmp(secname, "__ksymtab_unused_gpl") == 0)
-                       info->export_unused_gpl_sec = i;
-               else if (strcmp(secname, "__ksymtab_gpl_future") == 0)
-                       info->export_gpl_future_sec = i;
 
                if (sechdrs[i].sh_type == SHT_SYMTAB) {
                        unsigned int sh_link_idx;
@@ -1988,6 +1959,10 @@ static char *remove_dot(char *s)
                size_t m = strspn(s + n + 1, "0123456789");
                if (m && (s[n + m] == '.' || s[n + m] == 0))
                        s[n] = 0;
+
+               /* strip trailing .lto */
+               if (strends(s, ".lto"))
+                       s[strlen(s) - 4] = '\0';
        }
        return s;
 }
@@ -2011,6 +1986,9 @@ static void read_symbols(const char *modname)
                /* strip trailing .o */
                tmp = NOFAIL(strdup(modname));
                tmp[strlen(tmp) - 2] = '\0';
+               /* strip trailing .lto */
+               if (strends(tmp, ".lto"))
+                       tmp[strlen(tmp) - 4] = '\0';
                mod = new_module(tmp);
                free(tmp);
        }
@@ -2148,36 +2126,13 @@ static void check_for_gpl_usage(enum export exp, const char *m, const char *s)
                error("GPL-incompatible module %s.ko uses GPL-only symbol '%s'\n",
                      m, s);
                break;
-       case export_unused_gpl:
-               error("GPL-incompatible module %s.ko uses GPL-only symbol marked UNUSED '%s'\n",
-                     m, s);
-               break;
-       case export_gpl_future:
-               warn("GPL-incompatible module %s.ko uses future GPL-only symbol '%s'\n",
-                    m, s);
-               break;
        case export_plain:
-       case export_unused:
        case export_unknown:
                /* ignore */
                break;
        }
 }
 
-static void check_for_unused(enum export exp, const char *m, const char *s)
-{
-       switch (exp) {
-       case export_unused:
-       case export_unused_gpl:
-               warn("module %s.ko uses symbol '%s' marked UNUSED\n",
-                    m, s);
-               break;
-       default:
-               /* ignore */
-               break;
-       }
-}
-
 static void check_exports(struct module *mod)
 {
        struct symbol *s, *exp;
@@ -2208,7 +2163,6 @@ static void check_exports(struct module *mod)
 
                if (!mod->gpl_compatible)
                        check_for_gpl_usage(exp->export, basename, exp->name);
-               check_for_unused(exp->export, basename, exp->name);
        }
 }
 
index e6f46ee..c1a895c 100644 (file)
@@ -2,6 +2,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <stdarg.h>
+#include <stdbool.h>
 #include <string.h>
 #include <sys/types.h>
 #include <sys/stat.h>
@@ -139,10 +140,7 @@ struct elf_info {
        Elf_Sym      *symtab_start;
        Elf_Sym      *symtab_stop;
        Elf_Section  export_sec;
-       Elf_Section  export_unused_sec;
        Elf_Section  export_gpl_sec;
-       Elf_Section  export_unused_gpl_sec;
-       Elf_Section  export_gpl_future_sec;
        char         *strtab;
        char         *modinfo;
        unsigned int modinfo_len;
@@ -180,6 +178,14 @@ static inline unsigned int get_secindex(const struct elf_info *info,
        return info->symtab_shndx_start[sym - info->symtab_start];
 }
 
+static inline bool strends(const char *str, const char *postfix)
+{
+       if (strlen(str) < strlen(postfix))
+               return false;
+
+       return strcmp(str + strlen(str) - strlen(postfix), postfix) == 0;
+}
+
 /* file2alias.c */
 extern unsigned int cross_build;
 void handle_moddevtable(struct module *mod, struct elf_info *info,
index d587f40..760e6ba 100644 (file)
@@ -391,10 +391,14 @@ void get_src_version(const char *modname, char sum[], unsigned sumlen)
        struct md4_ctx md;
        char *fname;
        char filelist[PATH_MAX + 1];
+       int postfix_len = 1;
+
+       if (strends(modname, ".lto.o"))
+               postfix_len = 5;
 
        /* objects for a module are listed in the first line of *.mod file. */
        snprintf(filelist, sizeof(filelist), "%.*smod",
-                (int)strlen(modname) - 1, modname);
+                (int)strlen(modname) - postfix_len, modname);
 
        buf = read_text_file(filelist);
 
index 69b9b71..2c52535 100644 (file)
@@ -11,18 +11,38 @@ SECTIONS {
 
        __ksymtab               0 : { *(SORT(___ksymtab+*)) }
        __ksymtab_gpl           0 : { *(SORT(___ksymtab_gpl+*)) }
-       __ksymtab_unused        0 : { *(SORT(___ksymtab_unused+*)) }
-       __ksymtab_unused_gpl    0 : { *(SORT(___ksymtab_unused_gpl+*)) }
-       __ksymtab_gpl_future    0 : { *(SORT(___ksymtab_gpl_future+*)) }
        __kcrctab               0 : { *(SORT(___kcrctab+*)) }
        __kcrctab_gpl           0 : { *(SORT(___kcrctab_gpl+*)) }
-       __kcrctab_unused        0 : { *(SORT(___kcrctab_unused+*)) }
-       __kcrctab_unused_gpl    0 : { *(SORT(___kcrctab_unused_gpl+*)) }
-       __kcrctab_gpl_future    0 : { *(SORT(___kcrctab_gpl_future+*)) }
 
        .init_array             0 : ALIGN(8) { *(SORT(.init_array.*)) *(.init_array) }
 
        __jump_table            0 : ALIGN(8) { KEEP(*(__jump_table)) }
+
+       __patchable_function_entries : { *(__patchable_function_entries) }
+
+#ifdef CONFIG_LTO_CLANG
+       /*
+        * With CONFIG_LTO_CLANG, LLD always enables -fdata-sections and
+        * -ffunction-sections, which increases the size of the final module.
+        * Merge the split sections in the final binary.
+        */
+       .bss : {
+               *(.bss .bss.[0-9a-zA-Z_]*)
+               *(.bss..L*)
+       }
+
+       .data : {
+               *(.data .data.[0-9a-zA-Z_]*)
+               *(.data..L*)
+       }
+
+       .rodata : {
+               *(.rodata .rodata.[0-9a-zA-Z_]*)
+               *(.rodata..L*)
+       }
+
+       .text : { *(.text .text.[0-9a-zA-Z_]*) }
+#endif
 }
 
 /* bring in arch-specific sections */
index b9c2ee7..cce12e1 100644 (file)
@@ -438,7 +438,7 @@ static int arm_is_fake_mcount(Elf32_Rel const *rp)
 
 static int arm64_is_fake_mcount(Elf64_Rel const *rp)
 {
-       return ELF64_R_TYPE(w(rp->r_info)) != R_AARCH64_CALL26;
+       return ELF64_R_TYPE(w8(rp->r_info)) != R_AARCH64_CALL26;
 }
 
 /* 64-bit EM_MIPS has weird ELF64_Rela.r_info.
index bc87200..cbdb5c8 100755 (executable)
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+#!/usr/bin/env python3
 # SPDX-License-Identifier: GPL-2.0
 # Copyright Thomas Gleixner <tglx@linutronix.de>
 
index 953f4a2..2e3ba91 100644 (file)
@@ -103,6 +103,7 @@ alloated||allocated
 allocatote||allocate
 allocatrd||allocated
 allocte||allocate
+allocted||allocated
 allpication||application
 alocate||allocate
 alogirhtms||algorithms
@@ -339,6 +340,7 @@ comppatible||compatible
 compres||compress
 compresion||compression
 comression||compression
+comunicate||communicate
 comunication||communication
 conbination||combination
 conditionaly||conditionally
@@ -466,6 +468,7 @@ developpment||development
 deveolpment||development
 devided||divided
 deviece||device
+devision||division
 diable||disable
 dicline||decline
 dictionnary||dictionary
@@ -479,6 +482,7 @@ difinition||definition
 digial||digital
 dimention||dimension
 dimesions||dimensions
+diconnected||disconnected
 disgest||digest
 dispalying||displaying
 diplay||display
@@ -518,6 +522,7 @@ downlads||downloads
 droped||dropped
 droput||dropout
 druing||during
+dyanmic||dynamic
 dynmaic||dynamic
 eanable||enable
 eanble||enable
@@ -542,6 +547,7 @@ encrupted||encrypted
 encrypiton||encryption
 encryptio||encryption
 endianess||endianness
+enpoint||endpoint
 enhaced||enhanced
 enlightnment||enlightenment
 enqueing||enqueuing
@@ -566,6 +572,7 @@ estbalishment||establishment
 etsablishment||establishment
 etsbalishment||establishment
 evalution||evaluation
+exeeds||exceeds
 excecutable||executable
 exceded||exceeded
 exceds||exceeds
@@ -574,6 +581,7 @@ excellant||excellent
 execeeded||exceeded
 execeeds||exceeds
 exeed||exceed
+exeeds||exceeds
 exeuction||execution
 existance||existence
 existant||existent
@@ -641,6 +649,7 @@ forwardig||forwarding
 frambuffer||framebuffer
 framming||framing
 framwork||framework
+frequence||frequency
 frequncy||frequency
 frequancy||frequency
 frome||from
@@ -683,10 +692,12 @@ handfull||handful
 hanlde||handle
 hanled||handled
 happend||happened
+hardare||hardware
 harware||hardware
 havind||having
 heirarchically||hierarchically
 helpfull||helpful
+heterogenous||heterogeneous
 hexdecimal||hexadecimal
 hybernate||hibernate
 hierachy||hierarchy
@@ -731,6 +742,7 @@ inconsistant||inconsistent
 increas||increase
 incremeted||incremented
 incrment||increment
+incuding||including
 inculde||include
 indendation||indentation
 indended||intended
@@ -741,6 +753,7 @@ indiate||indicate
 indicat||indicate
 inexpect||inexpected
 inferface||interface
+infinit||infinite
 infomation||information
 informatiom||information
 informations||information
@@ -771,6 +784,7 @@ instace||instance
 instal||install
 instanciate||instantiate
 instanciated||instantiated
+instuments||instruments
 insufficent||insufficient
 inteface||interface
 integreated||integrated
@@ -869,12 +883,14 @@ mailformed||malformed
 malplaced||misplaced
 malplace||misplace
 managable||manageable
+managament||management
 managment||management
 mangement||management
 manger||manager
 manoeuvering||maneuvering
 manufaucturing||manufacturing
 mappping||mapping
+maping||mapping
 matchs||matches
 mathimatical||mathematical
 mathimatic||mathematic
@@ -886,6 +902,7 @@ meetign||meeting
 memeory||memory
 memmber||member
 memoery||memory
+memroy||memory
 ment||meant
 mergable||mergeable
 mesage||message
@@ -999,6 +1016,7 @@ overlaping||overlapping
 overide||override
 overrided||overridden
 overriden||overridden
+overrrun||overrun
 overun||overrun
 overwritting||overwriting
 overwriten||overwritten
@@ -1035,6 +1053,7 @@ peforming||performing
 peice||piece
 pendantic||pedantic
 peprocessor||preprocessor
+perfomance||performance
 perfoming||performing
 perfomring||performing
 periperal||peripheral
@@ -1100,6 +1119,7 @@ prodecure||procedure
 progamming||programming
 progams||programs
 progess||progress
+programable||programmable
 programers||programmers
 programm||program
 programms||programs
@@ -1144,6 +1164,7 @@ recieved||received
 recieve||receive
 reciever||receiver
 recieves||receives
+recieving||receiving
 recogniced||recognised
 recognizeable||recognizable
 recommanded||recommended
@@ -1247,6 +1268,7 @@ searchs||searches
 secquence||sequence
 secund||second
 segement||segment
+seleted||selected
 semaphone||semaphore
 senario||scenario
 senarios||scenarios
@@ -1263,6 +1285,7 @@ seqeunce||sequence
 seqeuncer||sequencer
 seqeuencer||sequencer
 sequece||sequence
+sequemce||sequence
 sequencial||sequential
 serivce||service
 serveral||several
@@ -1333,6 +1356,7 @@ suble||subtle
 substract||subtract
 submited||submitted
 submition||submission
+succeded||succeeded
 suceed||succeed
 succesfully||successfully
 succesful||successful
@@ -1353,6 +1377,7 @@ supportin||supporting
 suppoted||supported
 suppported||supported
 suppport||support
+supprot||support
 supress||suppress
 surpressed||suppressed
 surpresses||suppresses
@@ -1401,6 +1426,7 @@ thresold||threshold
 throught||through
 trackling||tracking
 troughput||throughput
+trys||tries
 thses||these
 tiggers||triggers
 tiggered||triggered
@@ -1414,7 +1440,9 @@ traking||tracking
 tramsmitted||transmitted
 tramsmit||transmit
 tranasction||transaction
+tranceiver||transceiver
 tranfer||transfer
+tranmission||transmission
 transcevier||transceiver
 transciever||transceiver
 transferd||transferred
@@ -1468,6 +1496,7 @@ unnecesary||unnecessary
 unneedingly||unnecessarily
 unnsupported||unsupported
 unmached||unmatched
+unprecise||imprecise
 unregester||unregister
 unresgister||unregister
 unrgesiter||unregister
@@ -1503,6 +1532,7 @@ varient||variant
 vaule||value
 verbse||verbose
 veify||verify
+veriosn||version
 verisons||versions
 verison||version
 verson||version
diff --git a/scripts/syscallhdr.sh b/scripts/syscallhdr.sh
new file mode 100755 (executable)
index 0000000..848ac27
--- /dev/null
@@ -0,0 +1,98 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Generate a syscall number header.
+#
+# Each line of the syscall table should have the following format:
+#
+# NR ABI NAME [NATIVE] [COMPAT]
+#
+# NR       syscall number
+# ABI      ABI name
+# NAME     syscall name
+# NATIVE   native entry point (optional)
+# COMPAT   compat entry point (optional)
+
+set -e
+
+usage() {
+       echo >&2 "usage: $0 [--abis ABIS] [--emit-nr] [--offset OFFSET] [--prefix PREFIX] INFILE OUTFILE" >&2
+       echo >&2
+       echo >&2 "  INFILE    input syscall table"
+       echo >&2 "  OUTFILE   output header file"
+       echo >&2
+       echo >&2 "options:"
+       echo >&2 "  --abis ABIS        ABI(s) to handle (By default, all lines are handled)"
+       echo >&2 "  --emit-nr          Emit the macro of the number of syscalls (__NR_syscalls)"
+       echo >&2 "  --offset OFFSET    The offset of syscall numbers"
+       echo >&2 "  --prefix PREFIX    The prefix to the macro like __NR_<PREFIX><NAME>"
+       exit 1
+}
+
+# default unless specified by options
+abis=
+emit_nr=
+offset=
+prefix=
+
+while [ $# -gt 0 ]
+do
+       case $1 in
+       --abis)
+               abis=$(echo "($2)" | tr ',' '|')
+               shift 2;;
+       --emit-nr)
+               emit_nr=1
+               shift 1;;
+       --offset)
+               offset=$2
+               shift 2;;
+       --prefix)
+               prefix=$2
+               shift 2;;
+       -*)
+               echo "$1: unknown option" >&2
+               usage;;
+       *)
+               break;;
+       esac
+done
+
+if [ $# -ne 2 ]; then
+       usage
+fi
+
+infile="$1"
+outfile="$2"
+
+guard=_UAPI_ASM_$(basename "$outfile" |
+       sed -e 'y/abcdefghijklmnopqrstuvwxyz/ABCDEFGHIJKLMNOPQRSTUVWXYZ/' \
+       -e 's/[^A-Z0-9_]/_/g' -e 's/__/_/g')
+
+grep -E "^[0-9A-Fa-fXx]+[[:space:]]+$abis" "$infile" | sort -n | {
+       echo "#ifndef $guard"
+       echo "#define $guard"
+       echo
+
+       max=0
+       while read nr abi name native compat ; do
+
+               max=$nr
+
+               if [ -n "$offset" ]; then
+                       nr="($offset + $nr)"
+               fi
+
+               echo "#define __NR_$prefix$name $nr"
+       done
+
+       if [ -n "$emit_nr" ]; then
+               echo
+               echo "#ifdef __KERNEL__"
+               echo "#define __NR_${prefix}syscalls $(($max + 1))"
+               echo "#endif"
+       fi
+
+       echo
+       echo "#endif /* $guard */"
+} > "$outfile"
diff --git a/scripts/syscalltbl.sh b/scripts/syscalltbl.sh
new file mode 100755 (executable)
index 0000000..aa6ab15
--- /dev/null
@@ -0,0 +1,73 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# Generate a syscall table header.
+#
+# Each line of the syscall table should have the following format:
+#
+# NR ABI NAME [NATIVE] [COMPAT]
+#
+# NR       syscall number
+# ABI      ABI name
+# NAME     syscall name
+# NATIVE   native entry point (optional)
+# COMPAT   compat entry point (optional)
+
+set -e
+
+usage() {
+       echo >&2 "usage: $0 [--abis ABIS] INFILE OUTFILE" >&2
+       echo >&2
+       echo >&2 "  INFILE    input syscall table"
+       echo >&2 "  OUTFILE   output header file"
+       echo >&2
+       echo >&2 "options:"
+       echo >&2 "  --abis ABIS        ABI(s) to handle (By default, all lines are handled)"
+       exit 1
+}
+
+# default unless specified by options
+abis=
+
+while [ $# -gt 0 ]
+do
+       case $1 in
+       --abis)
+               abis=$(echo "($2)" | tr ',' '|')
+               shift 2;;
+       -*)
+               echo "$1: unknown option" >&2
+               usage;;
+       *)
+               break;;
+       esac
+done
+
+if [ $# -ne 2 ]; then
+       usage
+fi
+
+infile="$1"
+outfile="$2"
+
+nxt=0
+
+grep -E "^[0-9]+[[:space:]]+$abis" "$infile" | sort -n | {
+
+       while read nr abi name native compat ; do
+
+               while [ $nxt -lt $nr ]; do
+                       echo "__SYSCALL($nxt, sys_ni_syscall)"
+                       nxt=$((nxt + 1))
+               done
+
+               if [ -n "$compat" ]; then
+                       echo "__SYSCALL_WITH_COMPAT($nr, $native, $compat)"
+               elif [ -n "$native" ]; then
+                       echo "__SYSCALL($nr, $native)"
+               else
+                       echo "__SYSCALL($nr, sys_ni_syscall)"
+               fi
+               nxt=$((nr + 1))
+       done
+} > "$outfile"
diff --git a/scripts/test_dwarf5_support.sh b/scripts/test_dwarf5_support.sh
new file mode 100755 (executable)
index 0000000..c46e245
--- /dev/null
@@ -0,0 +1,8 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+# Test that the assembler doesn't need -Wa,-gdwarf-5 when presented with DWARF
+# v5 input, such as `.file 0` and `md5 0x00`. Should be fixed in GNU binutils
+# 2.35.2. https://sourceware.org/bugzilla/show_bug.cgi?id=25611
+echo '.file 0 "filename" md5 0x7a0b65214090b6693bd1dc24dd248245' | \
+  $* -gdwarf-5 -Wno-unused-command-line-argument -c -x assembler -o /dev/null -
index 0968a30..a92acc7 100755 (executable)
@@ -15,7 +15,7 @@ BEGIN {
 
        vernum = "[0-9]+([.]?[0-9]+)+"
        libc = "libc[.]so[.][0-9]+$"
-       libcpp = "(libg|stdc)[+]+[.]so[.][0-9]+$"
+       libcpp = "(libg|stdc)[+]+[.]so([.][0-9]+)+$"
 
        printversion("GNU C", version("gcc -dumpversion"))
        printversion("GNU Make", version("make --version"))
@@ -37,12 +37,10 @@ BEGIN {
        printversion("Bison", version("bison --version"))
        printversion("Flex", version("flex --version"))
 
-       while ("ldconfig -p 2>/dev/null" | getline > 0) {
-               if ($NF ~ libc && !seen[ver = version("readlink " $NF)]++)
-                       printversion("Linux C Library", ver)
-               else if ($NF ~ libcpp && !seen[ver = version("readlink " $NF)]++)
-                       printversion("Linux C++ Library", ver)
-       }
+       while ("ldconfig -p 2>/dev/null" | getline > 0)
+               if ($NF ~ libc || $NF ~ libcpp)
+                       if (!seen[ver = version("readlink " $NF)]++)
+                               printversion("Linux C" ($NF ~ libcpp? "++" : "") " Library", ver)
 
        printversion("Dynamic linker (ldd)", version("ldd --version"))
        printversion("Procps", version("ps --version"))
index f95c6bf..2ee3b3d 100644 (file)
@@ -1773,7 +1773,8 @@ fail2:
        return error;
 }
 
-static int ns_mkdir_op(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int ns_mkdir_op(struct user_namespace *mnt_userns, struct inode *dir,
+                      struct dentry *dentry, umode_t mode)
 {
        struct aa_ns *ns, *parent;
        /* TODO: improve permission check */
index f919ebd..583680f 100644 (file)
 
 #include <linux/errno.h>
 #include <linux/fdtable.h>
+#include <linux/fs.h>
 #include <linux/file.h>
 #include <linux/mount.h>
 #include <linux/syscalls.h>
 #include <linux/tracehook.h>
 #include <linux/personality.h>
 #include <linux/xattr.h>
+#include <linux/user_namespace.h>
 
 #include "include/audit.h"
 #include "include/apparmorfs.h"
@@ -324,8 +326,8 @@ static int aa_xattrs_match(const struct linux_binprm *bprm,
        d = bprm->file->f_path.dentry;
 
        for (i = 0; i < profile->xattr_count; i++) {
-               size = vfs_getxattr_alloc(d, profile->xattrs[i], &value,
-                                         value_size, GFP_KERNEL);
+               size = vfs_getxattr_alloc(&init_user_ns, d, profile->xattrs[i],
+                                         &value, value_size, GFP_KERNEL);
                if (size >= 0) {
                        u32 perm;
 
@@ -858,8 +860,10 @@ int apparmor_bprm_creds_for_exec(struct linux_binprm *bprm)
        const char *info = NULL;
        int error = 0;
        bool unsafe = false;
+       kuid_t i_uid = i_uid_into_mnt(file_mnt_user_ns(bprm->file),
+                                     file_inode(bprm->file));
        struct path_cond cond = {
-               file_inode(bprm->file)->i_uid,
+               i_uid,
                file_inode(bprm->file)->i_mode
        };
 
@@ -967,8 +971,7 @@ audit:
        error = fn_for_each(label, profile,
                        aa_audit_file(profile, &nullperms, OP_EXEC, MAY_EXEC,
                                      bprm->filename, NULL, new,
-                                     file_inode(bprm->file)->i_uid, info,
-                                     error));
+                                     i_uid, info, error));
        aa_put_label(new);
        goto done;
 }
index 92acf9a..e1b7e93 100644 (file)
@@ -11,6 +11,8 @@
 #include <linux/tty.h>
 #include <linux/fdtable.h>
 #include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/mount.h>
 
 #include "include/apparmor.h"
 #include "include/audit.h"
@@ -509,7 +511,7 @@ static int __file_path_perm(const char *op, struct aa_label *label,
        struct aa_profile *profile;
        struct aa_perms perms = {};
        struct path_cond cond = {
-               .uid = file_inode(file)->i_uid,
+               .uid = i_uid_into_mnt(file_mnt_user_ns(file), file_inode(file)),
                .mode = file_inode(file)->i_mode
        };
        char *buffer;
index 1b0aba8..240a533 100644 (file)
@@ -224,8 +224,10 @@ static int common_perm(const char *op, const struct path *path, u32 mask,
  */
 static int common_perm_cond(const char *op, const struct path *path, u32 mask)
 {
-       struct path_cond cond = { d_backing_inode(path->dentry)->i_uid,
-                                 d_backing_inode(path->dentry)->i_mode
+       struct user_namespace *mnt_userns = mnt_user_ns(path->mnt);
+       struct path_cond cond = {
+               i_uid_into_mnt(mnt_userns, d_backing_inode(path->dentry)),
+               d_backing_inode(path->dentry)->i_mode
        };
 
        if (!path_mediated_fs(path->dentry))
@@ -266,12 +268,13 @@ static int common_perm_rm(const char *op, const struct path *dir,
                          struct dentry *dentry, u32 mask)
 {
        struct inode *inode = d_backing_inode(dentry);
+       struct user_namespace *mnt_userns = mnt_user_ns(dir->mnt);
        struct path_cond cond = { };
 
        if (!inode || !path_mediated_fs(dentry))
                return 0;
 
-       cond.uid = inode->i_uid;
+       cond.uid = i_uid_into_mnt(mnt_userns, inode);
        cond.mode = inode->i_mode;
 
        return common_perm_dir_dentry(op, dir, dentry, mask, &cond);
@@ -361,12 +364,14 @@ static int apparmor_path_rename(const struct path *old_dir, struct dentry *old_d
 
        label = begin_current_label_crit_section();
        if (!unconfined(label)) {
+               struct user_namespace *mnt_userns = mnt_user_ns(old_dir->mnt);
                struct path old_path = { .mnt = old_dir->mnt,
                                         .dentry = old_dentry };
                struct path new_path = { .mnt = new_dir->mnt,
                                         .dentry = new_dentry };
-               struct path_cond cond = { d_backing_inode(old_dentry)->i_uid,
-                                         d_backing_inode(old_dentry)->i_mode
+               struct path_cond cond = {
+                       i_uid_into_mnt(mnt_userns, d_backing_inode(old_dentry)),
+                       d_backing_inode(old_dentry)->i_mode
                };
 
                error = aa_path_perm(OP_RENAME_SRC, label, &old_path, 0,
@@ -420,8 +425,12 @@ static int apparmor_file_open(struct file *file)
 
        label = aa_get_newest_cred_label(file->f_cred);
        if (!unconfined(label)) {
+               struct user_namespace *mnt_userns = file_mnt_user_ns(file);
                struct inode *inode = file_inode(file);
-               struct path_cond cond = { inode->i_uid, inode->i_mode };
+               struct path_cond cond = {
+                       i_uid_into_mnt(mnt_userns, inode),
+                       inode->i_mode
+               };
 
                error = aa_path_perm(OP_OPEN, label, &file->f_path, 0,
                                     aa_map_file_to_perms(file), &cond);
index 78598be..1c519c8 100644 (file)
@@ -303,17 +303,25 @@ int cap_inode_need_killpriv(struct dentry *dentry)
 
 /**
  * cap_inode_killpriv - Erase the security markings on an inode
- * @dentry: The inode/dentry to alter
+ *
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dentry:    The inode/dentry to alter
  *
  * Erase the privilege-enhancing security markings on an inode.
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
  * Returns 0 if successful, -ve on error.
  */
-int cap_inode_killpriv(struct dentry *dentry)
+int cap_inode_killpriv(struct user_namespace *mnt_userns, struct dentry *dentry)
 {
        int error;
 
-       error = __vfs_removexattr(dentry, XATTR_NAME_CAPS);
+       error = __vfs_removexattr(mnt_userns, dentry, XATTR_NAME_CAPS);
        if (error == -EOPNOTSUPP)
                error = 0;
        return error;
@@ -366,7 +374,8 @@ static bool is_v3header(size_t size, const struct vfs_cap_data *cap)
  * by the integrity subsystem, which really wants the unconverted values -
  * so that's good.
  */
-int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
+int cap_inode_getsecurity(struct user_namespace *mnt_userns,
+                         struct inode *inode, const char *name, void **buffer,
                          bool alloc)
 {
        int size, ret;
@@ -387,8 +396,8 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
                return -EINVAL;
 
        size = sizeof(struct vfs_ns_cap_data);
-       ret = (int) vfs_getxattr_alloc(dentry, XATTR_NAME_CAPS,
-                                &tmpbuf, size, GFP_NOFS);
+       ret = (int)vfs_getxattr_alloc(mnt_userns, dentry, XATTR_NAME_CAPS,
+                                     &tmpbuf, size, GFP_NOFS);
        dput(dentry);
 
        if (ret < 0)
@@ -408,6 +417,9 @@ int cap_inode_getsecurity(struct inode *inode, const char *name, void **buffer,
 
        kroot = make_kuid(fs_ns, root);
 
+       /* If this is an idmapped mount shift the kuid. */
+       kroot = kuid_into_mnt(mnt_userns, kroot);
+
        /* If the root kuid maps to a valid uid in current ns, then return
         * this as a nscap. */
        mappedroot = from_kuid(current_user_ns(), kroot);
@@ -469,16 +481,33 @@ out_free:
        return size;
 }
 
+/**
+ * rootid_from_xattr - translate root uid of vfs caps
+ *
+ * @value:     vfs caps value which may be modified by this function
+ * @size:      size of @ivalue
+ * @task_ns:   user namespace of the caller
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ */
 static kuid_t rootid_from_xattr(const void *value, size_t size,
-                               struct user_namespace *task_ns)
+                               struct user_namespace *task_ns,
+                               struct user_namespace *mnt_userns)
 {
        const struct vfs_ns_cap_data *nscap = value;
+       kuid_t rootkid;
        uid_t rootid = 0;
 
        if (size == XATTR_CAPS_SZ_3)
                rootid = le32_to_cpu(nscap->rootid);
 
-       return make_kuid(task_ns, rootid);
+       rootkid = make_kuid(task_ns, rootid);
+       return kuid_from_mnt(mnt_userns, rootkid);
 }
 
 static bool validheader(size_t size, const struct vfs_cap_data *cap)
@@ -486,13 +515,27 @@ static bool validheader(size_t size, const struct vfs_cap_data *cap)
        return is_v2header(size, cap) || is_v3header(size, cap);
 }
 
-/*
+/**
+ * cap_convert_nscap - check vfs caps
+ *
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dentry:    used to retrieve inode to check permissions on
+ * @ivalue:    vfs caps value which may be modified by this function
+ * @size:      size of @ivalue
+ *
  * User requested a write of security.capability.  If needed, update the
  * xattr to change from v2 to v3, or to fixup the v3 rootid.
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
  * If all is ok, we return the new size, on error return < 0.
  */
-int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
+int cap_convert_nscap(struct user_namespace *mnt_userns, struct dentry *dentry,
+                     const void **ivalue, size_t size)
 {
        struct vfs_ns_cap_data *nscap;
        uid_t nsrootid;
@@ -500,8 +543,7 @@ int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
        __u32 magic, nsmagic;
        struct inode *inode = d_backing_inode(dentry);
        struct user_namespace *task_ns = current_user_ns(),
-               *fs_ns = inode->i_sb->s_user_ns,
-               *ancestor;
+               *fs_ns = inode->i_sb->s_user_ns;
        kuid_t rootid;
        size_t newsize;
 
@@ -509,14 +551,14 @@ int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
                return -EINVAL;
        if (!validheader(size, cap))
                return -EINVAL;
-       if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
+       if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
                return -EPERM;
-       if (size == XATTR_CAPS_SZ_2)
+       if (size == XATTR_CAPS_SZ_2 && (mnt_userns == &init_user_ns))
                if (ns_capable(inode->i_sb->s_user_ns, CAP_SETFCAP))
                        /* user is privileged, just write the v2 */
                        return size;
 
-       rootid = rootid_from_xattr(*ivalue, size, task_ns);
+       rootid = rootid_from_xattr(*ivalue, size, task_ns, mnt_userns);
        if (!uid_valid(rootid))
                return -EINVAL;
 
@@ -524,15 +566,6 @@ int cap_convert_nscap(struct dentry *dentry, const void **ivalue, size_t size)
        if (nsrootid == -1)
                return -EINVAL;
 
-       /*
-        * Do not allow allow adding a v3 filesystem capability xattr
-        * if the rootid field is ambiguous.
-        */
-       for (ancestor = task_ns->parent; ancestor; ancestor = ancestor->parent) {
-               if (from_kuid(ancestor, rootid) == 0)
-                       return -EINVAL;
-       }
-
        newsize = sizeof(struct vfs_ns_cap_data);
        nscap = kmalloc(newsize, GFP_ATOMIC);
        if (!nscap)
@@ -593,10 +626,24 @@ static inline int bprm_caps_from_vfs_caps(struct cpu_vfs_cap_data *caps,
        return *effective ? ret : 0;
 }
 
-/*
+/**
+ * get_vfs_caps_from_disk - retrieve vfs caps from disk
+ *
+ * @mnt_userns:        user namespace of the mount the inode was found from
+ * @dentry:    dentry from which @inode is retrieved
+ * @cpu_caps:  vfs capabilities
+ *
  * Extract the on-exec-apply capability sets for an executable file.
+ *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
  */
-int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps)
+int get_vfs_caps_from_disk(struct user_namespace *mnt_userns,
+                          const struct dentry *dentry,
+                          struct cpu_vfs_cap_data *cpu_caps)
 {
        struct inode *inode = d_backing_inode(dentry);
        __u32 magic_etc;
@@ -652,6 +699,7 @@ int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data
        /* Limit the caps to the mounter of the filesystem
         * or the more limited uid specified in the xattr.
         */
+       rootkuid = kuid_into_mnt(mnt_userns, rootkuid);
        if (!rootid_owns_currentns(rootkuid))
                return -ENODATA;
 
@@ -697,7 +745,8 @@ static int get_file_caps(struct linux_binprm *bprm, struct file *file,
        if (!current_in_userns(file->f_path.mnt->mnt_sb->s_user_ns))
                return 0;
 
-       rc = get_vfs_caps_from_disk(file->f_path.dentry, &vcaps);
+       rc = get_vfs_caps_from_disk(file_mnt_user_ns(file),
+                                   file->f_path.dentry, &vcaps);
        if (rc < 0) {
                if (rc == -EINVAL)
                        printk(KERN_NOTICE "Invalid argument reading file caps for %s\n",
@@ -962,16 +1011,25 @@ int cap_inode_setxattr(struct dentry *dentry, const char *name,
 
 /**
  * cap_inode_removexattr - Determine whether an xattr may be removed
- * @dentry: The inode/dentry being altered
- * @name: The name of the xattr to be changed
+ *
+ * @mnt_userns:        User namespace of the mount the inode was found from
+ * @dentry:    The inode/dentry being altered
+ * @name:      The name of the xattr to be changed
  *
  * Determine whether an xattr may be removed from an inode, returning 0 if
  * permission is granted, -ve if denied.
  *
+ * If the inode has been found through an idmapped mount the user namespace of
+ * the vfsmount must be passed through @mnt_userns. This function will then
+ * take care to map the inode according to @mnt_userns before checking
+ * permissions. On non-idmapped mounts or if permission checking is to be
+ * performed on the raw inode simply passs init_user_ns.
+ *
  * This is used to make sure security xattrs don't get removed by those who
  * aren't privileged to remove them.
  */
-int cap_inode_removexattr(struct dentry *dentry, const char *name)
+int cap_inode_removexattr(struct user_namespace *mnt_userns,
+                         struct dentry *dentry, const char *name)
 {
        struct user_namespace *user_ns = dentry->d_sb->s_user_ns;
 
@@ -985,7 +1043,7 @@ int cap_inode_removexattr(struct dentry *dentry, const char *name)
                struct inode *inode = d_backing_inode(dentry);
                if (!inode)
                        return -EINVAL;
-               if (!capable_wrt_inode_uidgid(inode, CAP_SETFCAP))
+               if (!capable_wrt_inode_uidgid(mnt_userns, inode, CAP_SETFCAP))
                        return -EPERM;
                return 0;
        }
index a6dd47e..d76b006 100644 (file)
@@ -225,7 +225,7 @@ static int evm_calc_hmac_or_hash(struct dentry *dentry,
                                ima_present = true;
                        continue;
                }
-               size = vfs_getxattr_alloc(dentry, xattr->name,
+               size = vfs_getxattr_alloc(&init_user_ns, dentry, xattr->name,
                                          &xattr_value, xattr_size, GFP_NOFS);
                if (size == -ENOMEM) {
                        error = -ENOMEM;
@@ -278,8 +278,8 @@ static int evm_is_immutable(struct dentry *dentry, struct inode *inode)
                return 1;
 
        /* Do this the hard way */
-       rc = vfs_getxattr_alloc(dentry, XATTR_NAME_EVM, (char **)&xattr_data, 0,
-                               GFP_NOFS);
+       rc = vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_EVM,
+                               (char **)&xattr_data, 0, GFP_NOFS);
        if (rc <= 0) {
                if (rc == -ENODATA)
                        return 0;
@@ -322,11 +322,12 @@ int evm_update_evmxattr(struct dentry *dentry, const char *xattr_name,
                           xattr_value_len, &data);
        if (rc == 0) {
                data.hdr.xattr.sha1.type = EVM_XATTR_HMAC;
-               rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_EVM,
+               rc = __vfs_setxattr_noperm(&init_user_ns, dentry,
+                                          XATTR_NAME_EVM,
                                           &data.hdr.xattr.data[1],
                                           SHA1_DIGEST_SIZE + 1, 0);
        } else if (rc == -ENODATA && (inode->i_opflags & IOP_XATTR)) {
-               rc = __vfs_removexattr(dentry, XATTR_NAME_EVM);
+               rc = __vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_EVM);
        }
        return rc;
 }
index 76d1914..0de367a 100644 (file)
@@ -146,8 +146,8 @@ static enum integrity_status evm_verify_hmac(struct dentry *dentry,
        /* if status is not PASS, try to check again - against -ENOMEM */
 
        /* first need to know the sig type */
-       rc = vfs_getxattr_alloc(dentry, XATTR_NAME_EVM, (char **)&xattr_data, 0,
-                               GFP_NOFS);
+       rc = vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_EVM,
+                               (char **)&xattr_data, 0, GFP_NOFS);
        if (rc <= 0) {
                evm_status = INTEGRITY_FAIL;
                if (rc == -ENODATA) {
index cfc3075..bbc8563 100644 (file)
@@ -219,7 +219,7 @@ static ssize_t evm_write_xattrs(struct file *file, const char __user *buf,
                newattrs.ia_valid = ATTR_MODE;
                inode = evm_xattrs->d_inode;
                inode_lock(inode);
-               err = simple_setattr(evm_xattrs, &newattrs);
+               err = simple_setattr(&init_user_ns, evm_xattrs, &newattrs);
                inode_unlock(inode);
                if (!err)
                        err = count;
index 1d20003..0ba0184 100644 (file)
@@ -98,6 +98,14 @@ struct integrity_iint_cache *integrity_inode_get(struct inode *inode)
        struct rb_node *node, *parent = NULL;
        struct integrity_iint_cache *iint, *test_iint;
 
+       /*
+        * The integrity's "iint_cache" is initialized at security_init(),
+        * unless it is not included in the ordered list of LSMs enabled
+        * on the boot command line.
+        */
+       if (!iint_cache)
+               panic("%s: lsm=integrity required.\n", __func__);
+
        iint = integrity_iint_find(inode);
        if (iint)
                return iint;
index aa31247..8e8b525 100644 (file)
@@ -254,8 +254,9 @@ static inline void ima_process_queued_keys(void) {}
 #endif /* CONFIG_IMA_QUEUE_EARLY_BOOT_KEYS */
 
 /* LIM API function definitions */
-int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
-                  int mask, enum ima_hooks func, int *pcr,
+int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
+                  const struct cred *cred, u32 secid, int mask,
+                  enum ima_hooks func, int *pcr,
                   struct ima_template_desc **template_desc,
                   const char *func_data);
 int ima_must_measure(struct inode *inode, int mask, enum ima_hooks func);
@@ -267,7 +268,8 @@ void ima_store_measurement(struct integrity_iint_cache *iint, struct file *file,
                           struct evm_ima_xattr_data *xattr_value,
                           int xattr_len, const struct modsig *modsig, int pcr,
                           struct ima_template_desc *template_desc);
-void process_buffer_measurement(struct inode *inode, const void *buf, int size,
+void process_buffer_measurement(struct user_namespace *mnt_userns,
+                               struct inode *inode, const void *buf, int size,
                                const char *eventname, enum ima_hooks func,
                                int pcr, const char *func_data,
                                bool buf_hash);
@@ -283,8 +285,9 @@ void ima_free_template_entry(struct ima_template_entry *entry);
 const char *ima_d_path(const struct path *path, char **pathbuf, char *filename);
 
 /* IMA policy related functions */
-int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
-                    enum ima_hooks func, int mask, int flags, int *pcr,
+int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
+                    const struct cred *cred, u32 secid, enum ima_hooks func,
+                    int mask, int flags, int *pcr,
                     struct ima_template_desc **template_desc,
                     const char *func_data);
 void ima_init_policy(void);
@@ -315,7 +318,8 @@ int ima_appraise_measurement(enum ima_hooks func,
                             struct file *file, const unsigned char *filename,
                             struct evm_ima_xattr_data *xattr_value,
                             int xattr_len, const struct modsig *modsig);
-int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
+int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
+                     int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
                                           enum ima_hooks func);
@@ -342,7 +346,8 @@ static inline int ima_appraise_measurement(enum ima_hooks func,
        return INTEGRITY_UNKNOWN;
 }
 
-static inline int ima_must_appraise(struct inode *inode, int mask,
+static inline int ima_must_appraise(struct user_namespace *mnt_userns,
+                                   struct inode *inode, int mask,
                                    enum ima_hooks func)
 {
        return 0;
index 1dd70dc..d8e321c 100644 (file)
@@ -162,6 +162,7 @@ err_out:
 
 /**
  * ima_get_action - appraise & measure decision based on policy.
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode: pointer to the inode associated with the object being validated
  * @cred: pointer to credentials structure to validate
  * @secid: secid of the task being validated
@@ -183,8 +184,9 @@ err_out:
  * Returns IMA_MEASURE, IMA_APPRAISE mask.
  *
  */
-int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
-                  int mask, enum ima_hooks func, int *pcr,
+int ima_get_action(struct user_namespace *mnt_userns, struct inode *inode,
+                  const struct cred *cred, u32 secid, int mask,
+                  enum ima_hooks func, int *pcr,
                   struct ima_template_desc **template_desc,
                   const char *func_data)
 {
@@ -192,8 +194,8 @@ int ima_get_action(struct inode *inode, const struct cred *cred, u32 secid,
 
        flags &= ima_policy_flag;
 
-       return ima_match_policy(inode, cred, secid, func, mask, flags, pcr,
-                               template_desc, func_data);
+       return ima_match_policy(mnt_userns, inode, cred, secid, func, mask,
+                               flags, pcr, template_desc, func_data);
 }
 
 /*
index 46ffa38..565e33f 100644 (file)
@@ -68,7 +68,8 @@ bool is_ima_appraise_enabled(void)
  *
  * Return 1 to appraise or hash
  */
-int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
+int ima_must_appraise(struct user_namespace *mnt_userns, struct inode *inode,
+                     int mask, enum ima_hooks func)
 {
        u32 secid;
 
@@ -76,8 +77,8 @@ int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func)
                return 0;
 
        security_task_getsecid(current, &secid);
-       return ima_match_policy(inode, current_cred(), secid, func, mask,
-                               IMA_APPRAISE | IMA_HASH, NULL, NULL, NULL);
+       return ima_match_policy(mnt_userns, inode, current_cred(), secid, func,
+                               mask, IMA_APPRAISE | IMA_HASH, NULL, NULL, NULL);
 }
 
 static int ima_fix_xattr(struct dentry *dentry,
@@ -94,7 +95,7 @@ static int ima_fix_xattr(struct dentry *dentry,
                iint->ima_hash->xattr.ng.type = IMA_XATTR_DIGEST_NG;
                iint->ima_hash->xattr.ng.algo = algo;
        }
-       rc = __vfs_setxattr_noperm(dentry, XATTR_NAME_IMA,
+       rc = __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_IMA,
                                   &iint->ima_hash->xattr.data[offset],
                                   (sizeof(iint->ima_hash->xattr) - offset) +
                                   iint->ima_hash->length, 0);
@@ -215,8 +216,8 @@ int ima_read_xattr(struct dentry *dentry,
 {
        ssize_t ret;
 
-       ret = vfs_getxattr_alloc(dentry, XATTR_NAME_IMA, (char **)xattr_value,
-                                0, GFP_NOFS);
+       ret = vfs_getxattr_alloc(&init_user_ns, dentry, XATTR_NAME_IMA,
+                                (char **)xattr_value, 0, GFP_NOFS);
        if (ret == -EOPNOTSUPP)
                ret = 0;
        return ret;
@@ -350,7 +351,7 @@ int ima_check_blacklist(struct integrity_iint_cache *iint,
 
                rc = is_binary_blacklisted(digest, digestsize);
                if ((rc == -EPERM) && (iint->flags & IMA_MEASURE))
-                       process_buffer_measurement(NULL, digest, digestsize,
+                       process_buffer_measurement(&init_user_ns, NULL, digest, digestsize,
                                                   "blacklisted-hash", NONE,
                                                   pcr, NULL, false);
        }
@@ -501,6 +502,7 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
 
 /**
  * ima_inode_post_setattr - reflect file metadata changes
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dentry: pointer to the affected dentry
  *
  * Changes to a dentry's metadata might result in needing to appraise.
@@ -508,7 +510,8 @@ void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
  * This function is called from notify_change(), which expects the caller
  * to lock the inode's i_mutex.
  */
-void ima_inode_post_setattr(struct dentry *dentry)
+void ima_inode_post_setattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry)
 {
        struct inode *inode = d_backing_inode(dentry);
        struct integrity_iint_cache *iint;
@@ -518,9 +521,9 @@ void ima_inode_post_setattr(struct dentry *dentry)
            || !(inode->i_opflags & IOP_XATTR))
                return;
 
-       action = ima_must_appraise(inode, MAY_ACCESS, POST_SETATTR);
+       action = ima_must_appraise(mnt_userns, inode, MAY_ACCESS, POST_SETATTR);
        if (!action)
-               __vfs_removexattr(dentry, XATTR_NAME_IMA);
+               __vfs_removexattr(&init_user_ns, dentry, XATTR_NAME_IMA);
        iint = integrity_iint_find(inode);
        if (iint) {
                set_bit(IMA_CHANGE_ATTR, &iint->atomic_flags);
index a740957..1fb0b0e 100644 (file)
@@ -10,6 +10,7 @@
  */
 
 #include <keys/asymmetric-type.h>
+#include <linux/user_namespace.h>
 #include "ima.h"
 
 /**
@@ -58,7 +59,7 @@ void ima_post_key_create_or_update(struct key *keyring, struct key *key,
         * if the IMA policy is configured to measure a key linked
         * to the given keyring.
         */
-       process_buffer_measurement(NULL, payload, payload_len,
+       process_buffer_measurement(&init_user_ns, NULL, payload, payload_len,
                                   keyring->description, KEY_CHECK, 0,
                                   keyring->description, false);
 }
index 6a42984..9ef748e 100644 (file)
@@ -218,8 +218,8 @@ static int process_measurement(struct file *file, const struct cred *cred,
         * bitmask based on the appraise/audit/measurement policy.
         * Included is the appraise submask.
         */
-       action = ima_get_action(inode, cred, secid, mask, func, &pcr,
-                               &template_desc, NULL);
+       action = ima_get_action(file_mnt_user_ns(file), inode, cred, secid,
+                               mask, func, &pcr, &template_desc, NULL);
        violation_check = ((func == FILE_CHECK || func == MMAP_CHECK) &&
                           (ima_policy_flag & IMA_MEASURE));
        if (!action && !violation_check)
@@ -431,8 +431,9 @@ int ima_file_mprotect(struct vm_area_struct *vma, unsigned long prot)
 
        security_task_getsecid(current, &secid);
        inode = file_inode(vma->vm_file);
-       action = ima_get_action(inode, current_cred(), secid, MAY_EXEC,
-                               MMAP_CHECK, &pcr, &template, 0);
+       action = ima_get_action(file_mnt_user_ns(vma->vm_file), inode,
+                               current_cred(), secid, MAY_EXEC, MMAP_CHECK,
+                               &pcr, &template, 0);
 
        /* Is the mmap'ed file in policy? */
        if (!(action & (IMA_MEASURE | IMA_APPRAISE_SUBMASK)))
@@ -592,18 +593,21 @@ EXPORT_SYMBOL_GPL(ima_inode_hash);
 
 /**
  * ima_post_create_tmpfile - mark newly created tmpfile as new
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @file : newly created tmpfile
  *
  * No measuring, appraising or auditing of newly created tmpfiles is needed.
  * Skip calling process_measurement(), but indicate which newly, created
  * tmpfiles are in policy.
  */
-void ima_post_create_tmpfile(struct inode *inode)
+void ima_post_create_tmpfile(struct user_namespace *mnt_userns,
+                            struct inode *inode)
 {
        struct integrity_iint_cache *iint;
        int must_appraise;
 
-       must_appraise = ima_must_appraise(inode, MAY_ACCESS, FILE_CHECK);
+       must_appraise = ima_must_appraise(mnt_userns, inode, MAY_ACCESS,
+                                         FILE_CHECK);
        if (!must_appraise)
                return;
 
@@ -619,18 +623,21 @@ void ima_post_create_tmpfile(struct inode *inode)
 
 /**
  * ima_post_path_mknod - mark as a new inode
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @dentry: newly created dentry
  *
  * Mark files created via the mknodat syscall as new, so that the
  * file data can be written later.
  */
-void ima_post_path_mknod(struct dentry *dentry)
+void ima_post_path_mknod(struct user_namespace *mnt_userns,
+                        struct dentry *dentry)
 {
        struct integrity_iint_cache *iint;
        struct inode *inode = dentry->d_inode;
        int must_appraise;
 
-       must_appraise = ima_must_appraise(inode, MAY_ACCESS, FILE_CHECK);
+       must_appraise = ima_must_appraise(mnt_userns, inode, MAY_ACCESS,
+                                         FILE_CHECK);
        if (!must_appraise)
                return;
 
@@ -810,6 +817,7 @@ int ima_post_load_data(char *buf, loff_t size,
 
 /*
  * process_buffer_measurement - Measure the buffer or the buffer data hash
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode: inode associated with the object being measured (NULL for KEY_CHECK)
  * @buf: pointer to the buffer that needs to be added to the log.
  * @size: size of buffer(in bytes).
@@ -821,7 +829,8 @@ int ima_post_load_data(char *buf, loff_t size,
  *
  * Based on policy, either the buffer data or buffer data hash is measured
  */
-void process_buffer_measurement(struct inode *inode, const void *buf, int size,
+void process_buffer_measurement(struct user_namespace *mnt_userns,
+                               struct inode *inode, const void *buf, int size,
                                const char *eventname, enum ima_hooks func,
                                int pcr, const char *func_data,
                                bool buf_hash)
@@ -864,8 +873,9 @@ void process_buffer_measurement(struct inode *inode, const void *buf, int size,
         */
        if (func) {
                security_task_getsecid(current, &secid);
-               action = ima_get_action(inode, current_cred(), secid, 0, func,
-                                       &pcr, &template, func_data);
+               action = ima_get_action(mnt_userns, inode, current_cred(),
+                                       secid, 0, func, &pcr, &template,
+                                       func_data);
                if (!(action & IMA_MEASURE))
                        return;
        }
@@ -937,9 +947,9 @@ void ima_kexec_cmdline(int kernel_fd, const void *buf, int size)
        if (!f.file)
                return;
 
-       process_buffer_measurement(file_inode(f.file), buf, size,
-                                  "kexec-cmdline", KEXEC_CMDLINE, 0, NULL,
-                                  false);
+       process_buffer_measurement(file_mnt_user_ns(f.file), file_inode(f.file),
+                                  buf, size, "kexec-cmdline", KEXEC_CMDLINE, 0,
+                                  NULL, false);
        fdput(f);
 }
 
@@ -964,7 +974,7 @@ void ima_measure_critical_data(const char *event_label,
        if (!event_name || !event_label || !buf || !buf_len)
                return;
 
-       process_buffer_measurement(NULL, buf, buf_len, event_name,
+       process_buffer_measurement(&init_user_ns, NULL, buf, buf_len, event_name,
                                   CRITICAL_DATA, 0, event_label,
                                   hash);
 }
index 36cadad..1e5c019 100644 (file)
@@ -38,13 +38,12 @@ __init int ima_mok_init(void)
                                (KEY_POS_ALL & ~KEY_POS_SETATTR) |
                                KEY_USR_VIEW | KEY_USR_READ |
                                KEY_USR_WRITE | KEY_USR_SEARCH,
-                               KEY_ALLOC_NOT_IN_QUOTA,
+                               KEY_ALLOC_NOT_IN_QUOTA |
+                               KEY_ALLOC_SET_KEEP,
                                restriction, NULL);
 
        if (IS_ERR(ima_blacklist_keyring))
                panic("Can't allocate IMA blacklist keyring.");
-
-       set_bit(KEY_FLAG_KEEP, &ima_blacklist_keyring->flags);
        return 0;
 }
 device_initcall(ima_mok_init);
index 9b45d06..4f8cb15 100644 (file)
@@ -513,6 +513,7 @@ static bool ima_match_rule_data(struct ima_rule_entry *rule,
 /**
  * ima_match_rules - determine whether an inode matches the policy rule.
  * @rule: a pointer to a rule
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode: a pointer to an inode
  * @cred: a pointer to a credentials structure for user validation
  * @secid: the secid of the task to be validated
@@ -522,9 +523,10 @@ static bool ima_match_rule_data(struct ima_rule_entry *rule,
  *
  * Returns true on rule match, false on failure.
  */
-static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
-                           const struct cred *cred, u32 secid,
-                           enum ima_hooks func, int mask,
+static bool ima_match_rules(struct ima_rule_entry *rule,
+                           struct user_namespace *mnt_userns,
+                           struct inode *inode, const struct cred *cred,
+                           u32 secid, enum ima_hooks func, int mask,
                            const char *func_data)
 {
        int i;
@@ -570,7 +572,7 @@ static bool ima_match_rules(struct ima_rule_entry *rule, struct inode *inode,
        }
 
        if ((rule->flags & IMA_FOWNER) &&
-           !rule->fowner_op(inode->i_uid, rule->fowner))
+           !rule->fowner_op(i_uid_into_mnt(mnt_userns, inode), rule->fowner))
                return false;
        for (i = 0; i < MAX_LSM_RULES; i++) {
                int rc = 0;
@@ -633,6 +635,7 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
 
 /**
  * ima_match_policy - decision based on LSM and other conditions
+ * @mnt_userns:        user namespace of the mount the inode was found from
  * @inode: pointer to an inode for which the policy decision is being made
  * @cred: pointer to a credentials structure for which the policy decision is
  *        being made
@@ -650,8 +653,9 @@ static int get_subaction(struct ima_rule_entry *rule, enum ima_hooks func)
  * list when walking it.  Reads are many orders of magnitude more numerous
  * than writes so ima_match_policy() is classical RCU candidate.
  */
-int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
-                    enum ima_hooks func, int mask, int flags, int *pcr,
+int ima_match_policy(struct user_namespace *mnt_userns, struct inode *inode,
+                    const struct cred *cred, u32 secid, enum ima_hooks func,
+                    int mask, int flags, int *pcr,
                     struct ima_template_desc **template_desc,
                     const char *func_data)
 {
@@ -667,8 +671,8 @@ int ima_match_policy(struct inode *inode, const struct cred *cred, u32 secid,
                if (!(entry->action & actmask))
                        continue;
 
-               if (!ima_match_rules(entry, inode, cred, secid, func, mask,
-                                    func_data))
+               if (!ima_match_rules(entry, mnt_userns, inode, cred, secid,
+                                    func, mask, func_data))
                        continue;
 
                action |= entry->flags & IMA_ACTION_FLAGS;
index c2f2ad3..979ef6c 100644 (file)
@@ -8,6 +8,7 @@
  *       Enables deferred processing of keys
  */
 
+#include <linux/user_namespace.h>
 #include <linux/workqueue.h>
 #include <keys/asymmetric-type.h>
 #include "ima.h"
@@ -158,7 +159,8 @@ void ima_process_queued_keys(void)
 
        list_for_each_entry_safe(entry, tmp, &ima_keys, list) {
                if (!timer_expired)
-                       process_buffer_measurement(NULL, entry->payload,
+                       process_buffer_measurement(&init_user_ns, NULL,
+                                                  entry->payload,
                                                   entry->payload_len,
                                                   entry->keyring_name,
                                                   KEY_CHECK, 0,
index 83bc234..c161642 100644 (file)
@@ -119,7 +119,7 @@ config KEY_NOTIFICATIONS
        bool "Provide key/keyring change notifications"
        depends on KEYS && WATCH_QUEUE
        help
-         This option provides support for getting change notifications on keys
-         and keyrings on which the caller has View permission.  This makes use
-         of the /dev/watch_queue misc device to handle the notification
-         buffer and provides KEYCTL_WATCH_KEY to enable/disable watches.
+         This option provides support for getting change notifications
+         on keys and keyrings on which the caller has View permission.
+         This makes use of pipes to handle the notification buffer and
+         provides KEYCTL_WATCH_KEY to enable/disable watches.
index 691347d..d17e5f0 100644 (file)
@@ -121,8 +121,7 @@ int big_key_preparse(struct key_preparsed_payload *prep)
                *path = file->f_path;
                path_get(path);
                fput(file);
-               memzero_explicit(buf, enclen);
-               kvfree(buf);
+               kvfree_sensitive(buf, enclen);
        } else {
                /* Just store the data in a buffer */
                void *data = kmalloc(datalen, GFP_KERNEL);
@@ -140,8 +139,7 @@ err_fput:
 err_enckey:
        kfree_sensitive(enckey);
 error:
-       memzero_explicit(buf, enclen);
-       kvfree(buf);
+       kvfree_sensitive(buf, enclen);
        return ret;
 }
 
@@ -273,8 +271,7 @@ long big_key_read(const struct key *key, char *buffer, size_t buflen)
 err_fput:
                fput(file);
 error:
-               memzero_explicit(buf, enclen);
-               kvfree(buf);
+               kvfree_sensitive(buf, enclen);
        } else {
                ret = datalen;
                memcpy(buffer, key->payload.data[big_key_data], datalen);
index ebe752b..c45afdd 100644 (file)
@@ -303,6 +303,8 @@ struct key *key_alloc(struct key_type *type, const char *desc,
                key->flags |= 1 << KEY_FLAG_BUILTIN;
        if (flags & KEY_ALLOC_UID_KEYRING)
                key->flags |= 1 << KEY_FLAG_UID_KEYRING;
+       if (flags & KEY_ALLOC_SET_KEEP)
+               key->flags |= 1 << KEY_FLAG_KEEP;
 
 #ifdef KEY_DEBUGGING
        key->magic = KEY_DEBUG_MAGIC;
index 61a614c..96a92a6 100644 (file)
@@ -506,7 +506,7 @@ error:
  * keyring, otherwise replace the link to the matching key with a link to the
  * new key.
  *
- * The key must grant the caller Link permission and the the keyring must grant
+ * The key must grant the caller Link permission and the keyring must grant
  * the caller Write permission.  Furthermore, if an additional link is created,
  * the keyring's quota will be extended.
  *
index 931d8df..5de0d59 100644 (file)
@@ -166,8 +166,6 @@ long keyctl_pkey_query(key_serial_t id,
        struct kernel_pkey_query res;
        long ret;
 
-       memset(&params, 0, sizeof(params));
-
        ret = keyctl_pkey_params_get(id, _info, &params);
        if (ret < 0)
                goto error;
index 14abfe7..5e6a907 100644 (file)
@@ -452,7 +452,7 @@ static void keyring_describe(const struct key *keyring, struct seq_file *m)
 struct keyring_read_iterator_context {
        size_t                  buflen;
        size_t                  count;
-       key_serial_t __user     *buffer;
+       key_serial_t            *buffer;
 };
 
 static int keyring_read_iterator(const void *object, void *data)
@@ -479,7 +479,7 @@ static int keyring_read_iterator(const void *object, void *data)
  * times.
  */
 static long keyring_read(const struct key *keyring,
-                        char __user *buffer, size_t buflen)
+                        char *buffer, size_t buflen)
 {
        struct keyring_read_iterator_context ctx;
        long ret;
@@ -491,7 +491,7 @@ static long keyring_read(const struct key *keyring,
 
        /* Copy as many key IDs as fit into the buffer */
        if (buffer && buflen) {
-               ctx.buffer = (key_serial_t __user *)buffer;
+               ctx.buffer = (key_serial_t *)buffer;
                ctx.buflen = buflen;
                ctx.count = 0;
                ret = assoc_array_iterate(&keyring->keys,
@@ -881,7 +881,7 @@ found:
  *
  * Keys are matched to the type provided and are then filtered by the match
  * function, which is given the description to use in any way it sees fit.  The
- * match function may use any attributes of a key that it wishes to to
+ * match function may use any attributes of a key that it wishes to
  * determine the match.  Normally the match function from the key type would be
  * used.
  *
@@ -1204,7 +1204,7 @@ static int keyring_detect_cycle_iterator(const void *object,
 }
 
 /*
- * See if a cycle will will be created by inserting acyclic tree B in acyclic
+ * See if a cycle will be created by inserting acyclic tree B in acyclic
  * tree A at the topmost level (ie: as a direct child of A).
  *
  * Since we are adding B to A at the top level, checking for cycles should just
index 1fe8b93..e3d79a7 100644 (file)
@@ -783,6 +783,7 @@ try_again:
                                if (need_perm != KEY_AUTHTOKEN_OVERRIDE &&
                                    need_perm != KEY_DEFER_PERM_CHECK)
                                        goto invalid_key;
+                               break;
                        case 0:
                                break;
                        }
index 401663b..5ac96b1 100644 (file)
@@ -1288,7 +1288,8 @@ int security_inode_getattr(const struct path *path)
        return call_int_hook(inode_getattr, 0, path);
 }
 
-int security_inode_setxattr(struct dentry *dentry, const char *name,
+int security_inode_setxattr(struct user_namespace *mnt_userns,
+                           struct dentry *dentry, const char *name,
                            const void *value, size_t size, int flags)
 {
        int ret;
@@ -1299,8 +1300,8 @@ int security_inode_setxattr(struct dentry *dentry, const char *name,
         * SELinux and Smack integrate the cap call,
         * so assume that all LSMs supplying this call do so.
         */
-       ret = call_int_hook(inode_setxattr, 1, dentry, name, value, size,
-                               flags);
+       ret = call_int_hook(inode_setxattr, 1, mnt_userns, dentry, name, value,
+                           size, flags);
 
        if (ret == 1)
                ret = cap_inode_setxattr(dentry, name, value, size, flags);
@@ -1335,7 +1336,8 @@ int security_inode_listxattr(struct dentry *dentry)
        return call_int_hook(inode_listxattr, 0, dentry);
 }
 
-int security_inode_removexattr(struct dentry *dentry, const char *name)
+int security_inode_removexattr(struct user_namespace *mnt_userns,
+                              struct dentry *dentry, const char *name)
 {
        int ret;
 
@@ -1345,9 +1347,9 @@ int security_inode_removexattr(struct dentry *dentry, const char *name)
         * SELinux and Smack integrate the cap call,
         * so assume that all LSMs supplying this call do so.
         */
-       ret = call_int_hook(inode_removexattr, 1, dentry, name);
+       ret = call_int_hook(inode_removexattr, 1, mnt_userns, dentry, name);
        if (ret == 1)
-               ret = cap_inode_removexattr(dentry, name);
+               ret = cap_inode_removexattr(mnt_userns, dentry, name);
        if (ret)
                return ret;
        ret = ima_inode_removexattr(dentry, name);
@@ -1361,12 +1363,15 @@ int security_inode_need_killpriv(struct dentry *dentry)
        return call_int_hook(inode_need_killpriv, 0, dentry);
 }
 
-int security_inode_killpriv(struct dentry *dentry)
+int security_inode_killpriv(struct user_namespace *mnt_userns,
+                           struct dentry *dentry)
 {
-       return call_int_hook(inode_killpriv, 0, dentry);
+       return call_int_hook(inode_killpriv, 0, mnt_userns, dentry);
 }
 
-int security_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc)
+int security_inode_getsecurity(struct user_namespace *mnt_userns,
+                              struct inode *inode, const char *name,
+                              void **buffer, bool alloc)
 {
        struct security_hook_list *hp;
        int rc;
@@ -1377,7 +1382,7 @@ int security_inode_getsecurity(struct inode *inode, const char *name, void **buf
         * Only one module will provide an attribute with a given name.
         */
        hlist_for_each_entry(hp, &security_hook_heads.inode_getsecurity, list) {
-               rc = hp->hook.inode_getsecurity(inode, name, buffer, alloc);
+               rc = hp->hook.inode_getsecurity(mnt_userns, inode, name, buffer, alloc);
                if (rc != LSM_RET_DEFAULT(inode_getsecurity))
                        return rc;
        }
index af2994a..ddd0977 100644 (file)
@@ -3203,7 +3203,8 @@ static bool has_cap_mac_admin(bool audit)
        return true;
 }
 
-static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
+static int selinux_inode_setxattr(struct user_namespace *mnt_userns,
+                                 struct dentry *dentry, const char *name,
                                  const void *value, size_t size, int flags)
 {
        struct inode *inode = d_backing_inode(dentry);
@@ -3224,13 +3225,13 @@ static int selinux_inode_setxattr(struct dentry *dentry, const char *name,
        }
 
        if (!selinux_initialized(&selinux_state))
-               return (inode_owner_or_capable(inode) ? 0 : -EPERM);
+               return (inode_owner_or_capable(mnt_userns, inode) ? 0 : -EPERM);
 
        sbsec = inode->i_sb->s_security;
        if (!(sbsec->flags & SBLABEL_MNT))
                return -EOPNOTSUPP;
 
-       if (!inode_owner_or_capable(inode))
+       if (!inode_owner_or_capable(mnt_userns, inode))
                return -EPERM;
 
        ad.type = LSM_AUDIT_DATA_DENTRY;
@@ -3351,10 +3352,11 @@ static int selinux_inode_listxattr(struct dentry *dentry)
        return dentry_has_perm(cred, dentry, FILE__GETATTR);
 }
 
-static int selinux_inode_removexattr(struct dentry *dentry, const char *name)
+static int selinux_inode_removexattr(struct user_namespace *mnt_userns,
+                                    struct dentry *dentry, const char *name)
 {
        if (strcmp(name, XATTR_NAME_SELINUX)) {
-               int rc = cap_inode_removexattr(dentry, name);
+               int rc = cap_inode_removexattr(mnt_userns, dentry, name);
                if (rc)
                        return rc;
 
@@ -3420,7 +3422,9 @@ static int selinux_path_notify(const struct path *path, u64 mask,
  *
  * Permission check is handled by selinux_inode_getxattr hook.
  */
-static int selinux_inode_getsecurity(struct inode *inode, const char *name, void **buffer, bool alloc)
+static int selinux_inode_getsecurity(struct user_namespace *mnt_userns,
+                                    struct inode *inode, const char *name,
+                                    void **buffer, bool alloc)
 {
        u32 size;
        int error;
@@ -6614,14 +6618,15 @@ static int selinux_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen
  */
 static int selinux_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
 {
-       return __vfs_setxattr_noperm(dentry, XATTR_NAME_SELINUX, ctx, ctxlen, 0);
+       return __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_SELINUX,
+                                    ctx, ctxlen, 0);
 }
 
 static int selinux_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
 {
        int len = 0;
-       len = selinux_inode_getsecurity(inode, XATTR_SELINUX_SUFFIX,
-                                               ctx, true);
+       len = selinux_inode_getsecurity(&init_user_ns, inode,
+                                       XATTR_SELINUX_SUFFIX, ctx, true);
        if (len < 0)
                return len;
        *ctxlen = len;
index 6fe2530..7650de0 100644 (file)
@@ -219,14 +219,21 @@ static inline bool selinux_policycap_genfs_seclabel_symlinks(void)
        return READ_ONCE(state->policycap[POLICYDB_CAPABILITY_GENFS_SECLABEL_SYMLINKS]);
 }
 
+struct selinux_policy_convert_data;
+
+struct selinux_load_state {
+       struct selinux_policy *policy;
+       struct selinux_policy_convert_data *convert_data;
+};
+
 int security_mls_enabled(struct selinux_state *state);
 int security_load_policy(struct selinux_state *state,
-                       void *data, size_t len,
-                       struct selinux_policy **newpolicyp);
+                        void *data, size_t len,
+                        struct selinux_load_state *load_state);
 void selinux_policy_commit(struct selinux_state *state,
-                       struct selinux_policy *newpolicy);
+                          struct selinux_load_state *load_state);
 void selinux_policy_cancel(struct selinux_state *state,
-                       struct selinux_policy *policy);
+                          struct selinux_load_state *load_state);
 int security_read_policy(struct selinux_state *state,
                         void **data, size_t *len);
 int security_read_state_kernel(struct selinux_state *state,
index 01a7d50..fff6bab 100644 (file)
@@ -563,17 +563,13 @@ static int sel_make_policy_nodes(struct selinux_fs_info *fsi,
 
        ret = sel_make_bools(newpolicy, tmp_bool_dir, &tmp_bool_num,
                             &tmp_bool_names, &tmp_bool_values);
-       if (ret) {
-               pr_err("SELinux: failed to load policy booleans\n");
+       if (ret)
                goto out;
-       }
 
        ret = sel_make_classes(newpolicy, tmp_class_dir,
                               &fsi->last_class_ino);
-       if (ret) {
-               pr_err("SELinux: failed to load policy classes\n");
+       if (ret)
                goto out;
-       }
 
        /* booleans */
        old_dentry = fsi->bool_dir;
@@ -616,7 +612,7 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf,
 
 {
        struct selinux_fs_info *fsi = file_inode(file)->i_sb->s_fs_info;
-       struct selinux_policy *newpolicy;
+       struct selinux_load_state load_state;
        ssize_t length;
        void *data = NULL;
 
@@ -642,23 +638,23 @@ static ssize_t sel_write_load(struct file *file, const char __user *buf,
        if (copy_from_user(data, buf, count) != 0)
                goto out;
 
-       length = security_load_policy(fsi->state, data, count, &newpolicy);
+       length = security_load_policy(fsi->state, data, count, &load_state);
        if (length) {
                pr_warn_ratelimited("SELinux: failed to load policy\n");
                goto out;
        }
 
-       length = sel_make_policy_nodes(fsi, newpolicy);
+       length = sel_make_policy_nodes(fsi, load_state.policy);
        if (length) {
-               selinux_policy_cancel(fsi->state, newpolicy);
-               goto out1;
+               pr_warn_ratelimited("SELinux: failed to initialize selinuxfs\n");
+               selinux_policy_cancel(fsi->state, &load_state);
+               goto out;
        }
 
-       selinux_policy_commit(fsi->state, newpolicy);
+       selinux_policy_commit(fsi->state, &load_state);
 
        length = count;
 
-out1:
        audit_log(audit_context(), GFP_KERNEL, AUDIT_MAC_POLICY_LOAD,
                "auid=%u ses=%u lsm=selinux res=1",
                from_kuid(&init_user_ns, audit_get_loginuid(current)),
index 6dcb6aa..75df329 100644 (file)
@@ -109,7 +109,7 @@ static int avtab_insert(struct avtab *h, struct avtab_key *key, struct avtab_dat
        struct avtab_node *prev, *cur, *newnode;
        u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
 
-       if (!h)
+       if (!h || !h->nslot)
                return -EINVAL;
 
        hvalue = avtab_hash(key, h->mask);
@@ -154,7 +154,7 @@ avtab_insert_nonunique(struct avtab *h, struct avtab_key *key, struct avtab_datu
        struct avtab_node *prev, *cur;
        u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
 
-       if (!h)
+       if (!h || !h->nslot)
                return NULL;
        hvalue = avtab_hash(key, h->mask);
        for (prev = NULL, cur = h->htable[hvalue];
@@ -184,7 +184,7 @@ struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *key)
        struct avtab_node *cur;
        u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
 
-       if (!h)
+       if (!h || !h->nslot)
                return NULL;
 
        hvalue = avtab_hash(key, h->mask);
@@ -220,7 +220,7 @@ avtab_search_node(struct avtab *h, struct avtab_key *key)
        struct avtab_node *cur;
        u16 specified = key->specified & ~(AVTAB_ENABLED|AVTAB_ENABLED_OLD);
 
-       if (!h)
+       if (!h || !h->nslot)
                return NULL;
 
        hvalue = avtab_hash(key, h->mask);
@@ -295,6 +295,7 @@ void avtab_destroy(struct avtab *h)
        }
        kvfree(h->htable);
        h->htable = NULL;
+       h->nel = 0;
        h->nslot = 0;
        h->mask = 0;
 }
@@ -303,88 +304,52 @@ void avtab_init(struct avtab *h)
 {
        h->htable = NULL;
        h->nel = 0;
+       h->nslot = 0;
+       h->mask = 0;
 }
 
-int avtab_alloc(struct avtab *h, u32 nrules)
+static int avtab_alloc_common(struct avtab *h, u32 nslot)
 {
-       u32 mask = 0;
-       u32 shift = 0;
-       u32 work = nrules;
-       u32 nslot = 0;
-
-       if (nrules == 0)
-               goto avtab_alloc_out;
-
-       while (work) {
-               work  = work >> 1;
-               shift++;
-       }
-       if (shift > 2)
-               shift = shift - 2;
-       nslot = 1 << shift;
-       if (nslot > MAX_AVTAB_HASH_BUCKETS)
-               nslot = MAX_AVTAB_HASH_BUCKETS;
-       mask = nslot - 1;
+       if (!nslot)
+               return 0;
 
        h->htable = kvcalloc(nslot, sizeof(void *), GFP_KERNEL);
        if (!h->htable)
                return -ENOMEM;
 
- avtab_alloc_out:
-       h->nel = 0;
        h->nslot = nslot;
-       h->mask = mask;
-       pr_debug("SELinux: %d avtab hash slots, %d rules.\n",
-              h->nslot, nrules);
+       h->mask = nslot - 1;
        return 0;
 }
 
-int avtab_duplicate(struct avtab *new, struct avtab *orig)
+int avtab_alloc(struct avtab *h, u32 nrules)
 {
-       int i;
-       struct avtab_node *node, *tmp, *tail;
-
-       memset(new, 0, sizeof(*new));
+       int rc;
+       u32 nslot = 0;
 
-       new->htable = kvcalloc(orig->nslot, sizeof(void *), GFP_KERNEL);
-       if (!new->htable)
-               return -ENOMEM;
-       new->nslot = orig->nslot;
-       new->mask = orig->mask;
-
-       for (i = 0; i < orig->nslot; i++) {
-               tail = NULL;
-               for (node = orig->htable[i]; node; node = node->next) {
-                       tmp = kmem_cache_zalloc(avtab_node_cachep, GFP_KERNEL);
-                       if (!tmp)
-                               goto error;
-                       tmp->key = node->key;
-                       if (tmp->key.specified & AVTAB_XPERMS) {
-                               tmp->datum.u.xperms =
-                                       kmem_cache_zalloc(avtab_xperms_cachep,
-                                                       GFP_KERNEL);
-                               if (!tmp->datum.u.xperms) {
-                                       kmem_cache_free(avtab_node_cachep, tmp);
-                                       goto error;
-                               }
-                               tmp->datum.u.xperms = node->datum.u.xperms;
-                       } else
-                               tmp->datum.u.data = node->datum.u.data;
-
-                       if (tail)
-                               tail->next = tmp;
-                       else
-                               new->htable[i] = tmp;
-
-                       tail = tmp;
-                       new->nel++;
+       if (nrules != 0) {
+               u32 shift = 1;
+               u32 work = nrules >> 3;
+               while (work) {
+                       work >>= 1;
+                       shift++;
                }
+               nslot = 1 << shift;
+               if (nslot > MAX_AVTAB_HASH_BUCKETS)
+                       nslot = MAX_AVTAB_HASH_BUCKETS;
+
+               rc = avtab_alloc_common(h, nslot);
+               if (rc)
+                       return rc;
        }
 
+       pr_debug("SELinux: %d avtab hash slots, %d rules.\n", nslot, nrules);
        return 0;
-error:
-       avtab_destroy(new);
-       return -ENOMEM;
+}
+
+int avtab_alloc_dup(struct avtab *new, const struct avtab *orig)
+{
+       return avtab_alloc_common(new, orig->nslot);
 }
 
 void avtab_hash_eval(struct avtab *h, char *tag)
index 4c4445c..f2eeb36 100644 (file)
@@ -89,7 +89,7 @@ struct avtab {
 
 void avtab_init(struct avtab *h);
 int avtab_alloc(struct avtab *, u32);
-int avtab_duplicate(struct avtab *new, struct avtab *orig);
+int avtab_alloc_dup(struct avtab *new, const struct avtab *orig);
 struct avtab_datum *avtab_search(struct avtab *h, struct avtab_key *k);
 void avtab_destroy(struct avtab *h);
 void avtab_hash_eval(struct avtab *h, char *tag);
index 0b32f3a..1ef74c0 100644 (file)
@@ -605,7 +605,6 @@ static int cond_dup_av_list(struct cond_av_list *new,
                        struct cond_av_list *orig,
                        struct avtab *avtab)
 {
-       struct avtab_node *avnode;
        u32 i;
 
        memset(new, 0, sizeof(*new));
@@ -615,10 +614,11 @@ static int cond_dup_av_list(struct cond_av_list *new,
                return -ENOMEM;
 
        for (i = 0; i < orig->len; i++) {
-               avnode = avtab_search_node(avtab, &orig->nodes[i]->key);
-               if (WARN_ON(!avnode))
-                       return -EINVAL;
-               new->nodes[i] = avnode;
+               new->nodes[i] = avtab_insert_nonunique(avtab,
+                                                      &orig->nodes[i]->key,
+                                                      &orig->nodes[i]->datum);
+               if (!new->nodes[i])
+                       return -ENOMEM;
                new->len++;
        }
 
@@ -630,7 +630,7 @@ static int duplicate_policydb_cond_list(struct policydb *newp,
 {
        int rc, i, j;
 
-       rc = avtab_duplicate(&newp->te_cond_avtab, &origp->te_cond_avtab);
+       rc = avtab_alloc_dup(&newp->te_cond_avtab, &origp->te_cond_avtab);
        if (rc)
                return rc;
 
index 3438d01..3016331 100644 (file)
 #include "policycap_names.h"
 #include "ima.h"
 
+struct convert_context_args {
+       struct selinux_state *state;
+       struct policydb *oldp;
+       struct policydb *newp;
+};
+
+struct selinux_policy_convert_data {
+       struct convert_context_args args;
+       struct sidtab_convert_params sidtab_params;
+};
+
 /* Forward declaration. */
 static int context_struct_to_string(struct policydb *policydb,
                                    struct context *context,
@@ -1541,6 +1552,7 @@ static int security_context_to_sid_core(struct selinux_state *state,
                if (!str)
                        goto out;
        }
+retry:
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -1554,6 +1566,15 @@ static int security_context_to_sid_core(struct selinux_state *state,
        } else if (rc)
                goto out_unlock;
        rc = sidtab_context_to_sid(sidtab, &context, sid);
+       if (rc == -ESTALE) {
+               rcu_read_unlock();
+               if (context.str) {
+                       str = context.str;
+                       context.str = NULL;
+               }
+               context_destroy(&context);
+               goto retry;
+       }
        context_destroy(&context);
 out_unlock:
        rcu_read_unlock();
@@ -1703,7 +1724,7 @@ static int security_compute_sid(struct selinux_state *state,
        struct selinux_policy *policy;
        struct policydb *policydb;
        struct sidtab *sidtab;
-       struct class_datum *cladatum = NULL;
+       struct class_datum *cladatum;
        struct context *scontext, *tcontext, newcontext;
        struct sidtab_entry *sentry, *tentry;
        struct avtab_key avkey;
@@ -1725,6 +1746,8 @@ static int security_compute_sid(struct selinux_state *state,
                goto out;
        }
 
+retry:
+       cladatum = NULL;
        context_init(&newcontext);
 
        rcu_read_lock();
@@ -1869,6 +1892,11 @@ static int security_compute_sid(struct selinux_state *state,
        }
        /* Obtain the sid for the context. */
        rc = sidtab_context_to_sid(sidtab, &newcontext, out_sid);
+       if (rc == -ESTALE) {
+               rcu_read_unlock();
+               context_destroy(&newcontext);
+               goto retry;
+       }
 out_unlock:
        rcu_read_unlock();
        context_destroy(&newcontext);
@@ -1974,12 +2002,6 @@ static inline int convert_context_handle_invalid_context(
        return 0;
 }
 
-struct convert_context_args {
-       struct selinux_state *state;
-       struct policydb *oldp;
-       struct policydb *newp;
-};
-
 /*
  * Convert the values in the security context
  * structure `oldc' from the values specified
@@ -2159,7 +2181,7 @@ static void selinux_policy_cond_free(struct selinux_policy *policy)
 }
 
 void selinux_policy_cancel(struct selinux_state *state,
-                       struct selinux_policy *policy)
+                          struct selinux_load_state *load_state)
 {
        struct selinux_policy *oldpolicy;
 
@@ -2167,7 +2189,8 @@ void selinux_policy_cancel(struct selinux_state *state,
                                        lockdep_is_held(&state->policy_mutex));
 
        sidtab_cancel_convert(oldpolicy->sidtab);
-       selinux_policy_free(policy);
+       selinux_policy_free(load_state->policy);
+       kfree(load_state->convert_data);
 }
 
 static void selinux_notify_policy_change(struct selinux_state *state,
@@ -2183,9 +2206,10 @@ static void selinux_notify_policy_change(struct selinux_state *state,
 }
 
 void selinux_policy_commit(struct selinux_state *state,
-                       struct selinux_policy *newpolicy)
+                          struct selinux_load_state *load_state)
 {
-       struct selinux_policy *oldpolicy;
+       struct selinux_policy *oldpolicy, *newpolicy = load_state->policy;
+       unsigned long flags;
        u32 seqno;
 
        oldpolicy = rcu_dereference_protected(state->policy,
@@ -2207,7 +2231,13 @@ void selinux_policy_commit(struct selinux_state *state,
        seqno = newpolicy->latest_granting;
 
        /* Install the new policy. */
-       rcu_assign_pointer(state->policy, newpolicy);
+       if (oldpolicy) {
+               sidtab_freeze_begin(oldpolicy->sidtab, &flags);
+               rcu_assign_pointer(state->policy, newpolicy);
+               sidtab_freeze_end(oldpolicy->sidtab, &flags);
+       } else {
+               rcu_assign_pointer(state->policy, newpolicy);
+       }
 
        /* Load the policycaps from the new policy */
        security_load_policycaps(state, newpolicy);
@@ -2225,6 +2255,7 @@ void selinux_policy_commit(struct selinux_state *state,
        /* Free the old policy */
        synchronize_rcu();
        selinux_policy_free(oldpolicy);
+       kfree(load_state->convert_data);
 
        /* Notify others of the policy change */
        selinux_notify_policy_change(state, seqno);
@@ -2241,11 +2272,10 @@ void selinux_policy_commit(struct selinux_state *state,
  * loading the new policy.
  */
 int security_load_policy(struct selinux_state *state, void *data, size_t len,
-                       struct selinux_policy **newpolicyp)
+                        struct selinux_load_state *load_state)
 {
        struct selinux_policy *newpolicy, *oldpolicy;
-       struct sidtab_convert_params convert_params;
-       struct convert_context_args args;
+       struct selinux_policy_convert_data *convert_data;
        int rc = 0;
        struct policy_file file = { data, len }, *fp = &file;
 
@@ -2275,10 +2305,10 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len,
                goto err_mapping;
        }
 
-
        if (!selinux_initialized(state)) {
                /* First policy load, so no need to preserve state from old policy */
-               *newpolicyp = newpolicy;
+               load_state->policy = newpolicy;
+               load_state->convert_data = NULL;
                return 0;
        }
 
@@ -2292,29 +2322,38 @@ int security_load_policy(struct selinux_state *state, void *data, size_t len,
                goto err_free_isids;
        }
 
+       convert_data = kmalloc(sizeof(*convert_data), GFP_KERNEL);
+       if (!convert_data) {
+               rc = -ENOMEM;
+               goto err_free_isids;
+       }
+
        /*
         * Convert the internal representations of contexts
         * in the new SID table.
         */
-       args.state = state;
-       args.oldp = &oldpolicy->policydb;
-       args.newp = &newpolicy->policydb;
+       convert_data->args.state = state;
+       convert_data->args.oldp = &oldpolicy->policydb;
+       convert_data->args.newp = &newpolicy->policydb;
 
-       convert_params.func = convert_context;
-       convert_params.args = &args;
-       convert_params.target = newpolicy->sidtab;
+       convert_data->sidtab_params.func = convert_context;
+       convert_data->sidtab_params.args = &convert_data->args;
+       convert_data->sidtab_params.target = newpolicy->sidtab;
 
-       rc = sidtab_convert(oldpolicy->sidtab, &convert_params);
+       rc = sidtab_convert(oldpolicy->sidtab, &convert_data->sidtab_params);
        if (rc) {
                pr_err("SELinux:  unable to convert the internal"
                        " representation of contexts in the new SID"
                        " table\n");
-               goto err_free_isids;
+               goto err_free_convert_data;
        }
 
-       *newpolicyp = newpolicy;
+       load_state->policy = newpolicy;
+       load_state->convert_data = convert_data;
        return 0;
 
+err_free_convert_data:
+       kfree(convert_data);
 err_free_isids:
        sidtab_destroy(newpolicy->sidtab);
 err_mapping:
@@ -2342,13 +2381,15 @@ int security_port_sid(struct selinux_state *state,
        struct policydb *policydb;
        struct sidtab *sidtab;
        struct ocontext *c;
-       int rc = 0;
+       int rc;
 
        if (!selinux_initialized(state)) {
                *out_sid = SECINITSID_PORT;
                return 0;
        }
 
+retry:
+       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2367,6 +2408,10 @@ int security_port_sid(struct selinux_state *state,
                if (!c->sid[0]) {
                        rc = sidtab_context_to_sid(sidtab, &c->context[0],
                                                   &c->sid[0]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                }
@@ -2393,13 +2438,15 @@ int security_ib_pkey_sid(struct selinux_state *state,
        struct policydb *policydb;
        struct sidtab *sidtab;
        struct ocontext *c;
-       int rc = 0;
+       int rc;
 
        if (!selinux_initialized(state)) {
                *out_sid = SECINITSID_UNLABELED;
                return 0;
        }
 
+retry:
+       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2420,6 +2467,10 @@ int security_ib_pkey_sid(struct selinux_state *state,
                        rc = sidtab_context_to_sid(sidtab,
                                                   &c->context[0],
                                                   &c->sid[0]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                }
@@ -2445,13 +2496,15 @@ int security_ib_endport_sid(struct selinux_state *state,
        struct policydb *policydb;
        struct sidtab *sidtab;
        struct ocontext *c;
-       int rc = 0;
+       int rc;
 
        if (!selinux_initialized(state)) {
                *out_sid = SECINITSID_UNLABELED;
                return 0;
        }
 
+retry:
+       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2472,6 +2525,10 @@ int security_ib_endport_sid(struct selinux_state *state,
                if (!c->sid[0]) {
                        rc = sidtab_context_to_sid(sidtab, &c->context[0],
                                                   &c->sid[0]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                }
@@ -2495,7 +2552,7 @@ int security_netif_sid(struct selinux_state *state,
        struct selinux_policy *policy;
        struct policydb *policydb;
        struct sidtab *sidtab;
-       int rc = 0;
+       int rc;
        struct ocontext *c;
 
        if (!selinux_initialized(state)) {
@@ -2503,6 +2560,8 @@ int security_netif_sid(struct selinux_state *state,
                return 0;
        }
 
+retry:
+       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2519,10 +2578,18 @@ int security_netif_sid(struct selinux_state *state,
                if (!c->sid[0] || !c->sid[1]) {
                        rc = sidtab_context_to_sid(sidtab, &c->context[0],
                                                   &c->sid[0]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                        rc = sidtab_context_to_sid(sidtab, &c->context[1],
                                                   &c->sid[1]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                }
@@ -2572,6 +2639,7 @@ int security_node_sid(struct selinux_state *state,
                return 0;
        }
 
+retry:
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2620,6 +2688,10 @@ int security_node_sid(struct selinux_state *state,
                        rc = sidtab_context_to_sid(sidtab,
                                                   &c->context[0],
                                                   &c->sid[0]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                }
@@ -2661,18 +2733,24 @@ int security_get_user_sids(struct selinux_state *state,
        struct sidtab *sidtab;
        struct context *fromcon, usercon;
        u32 *mysids = NULL, *mysids2, sid;
-       u32 mynel = 0, maxnel = SIDS_NEL;
+       u32 i, j, mynel, maxnel = SIDS_NEL;
        struct user_datum *user;
        struct role_datum *role;
        struct ebitmap_node *rnode, *tnode;
-       int rc = 0, i, j;
+       int rc;
 
        *sids = NULL;
        *nel = 0;
 
        if (!selinux_initialized(state))
-               goto out;
+               return 0;
+
+       mysids = kcalloc(maxnel, sizeof(*mysids), GFP_KERNEL);
+       if (!mysids)
+               return -ENOMEM;
 
+retry:
+       mynel = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2692,11 +2770,6 @@ int security_get_user_sids(struct selinux_state *state,
 
        usercon.user = user->value;
 
-       rc = -ENOMEM;
-       mysids = kcalloc(maxnel, sizeof(*mysids), GFP_ATOMIC);
-       if (!mysids)
-               goto out_unlock;
-
        ebitmap_for_each_positive_bit(&user->roles, rnode, i) {
                role = policydb->role_val_to_struct[i];
                usercon.role = i + 1;
@@ -2708,6 +2781,10 @@ int security_get_user_sids(struct selinux_state *state,
                                continue;
 
                        rc = sidtab_context_to_sid(sidtab, &usercon, &sid);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out_unlock;
                        if (mynel < maxnel) {
@@ -2730,14 +2807,14 @@ out_unlock:
        rcu_read_unlock();
        if (rc || !mynel) {
                kfree(mysids);
-               goto out;
+               return rc;
        }
 
        rc = -ENOMEM;
        mysids2 = kcalloc(mynel, sizeof(*mysids2), GFP_KERNEL);
        if (!mysids2) {
                kfree(mysids);
-               goto out;
+               return rc;
        }
        for (i = 0, j = 0; i < mynel; i++) {
                struct av_decision dummy_avd;
@@ -2750,12 +2827,10 @@ out_unlock:
                        mysids2[j++] = mysids[i];
                cond_resched();
        }
-       rc = 0;
        kfree(mysids);
        *sids = mysids2;
        *nel = j;
-out:
-       return rc;
+       return 0;
 }
 
 /**
@@ -2768,6 +2843,9 @@ out:
  * Obtain a SID to use for a file in a filesystem that
  * cannot support xattr or use a fixed labeling behavior like
  * transition SIDs or task SIDs.
+ *
+ * WARNING: This function may return -ESTALE, indicating that the caller
+ * must retry the operation after re-acquiring the policy pointer!
  */
 static inline int __security_genfs_sid(struct selinux_policy *policy,
                                       const char *fstype,
@@ -2846,11 +2924,13 @@ int security_genfs_sid(struct selinux_state *state,
                return 0;
        }
 
-       rcu_read_lock();
-       policy = rcu_dereference(state->policy);
-       retval = __security_genfs_sid(policy,
-                               fstype, path, orig_sclass, sid);
-       rcu_read_unlock();
+       do {
+               rcu_read_lock();
+               policy = rcu_dereference(state->policy);
+               retval = __security_genfs_sid(policy, fstype, path,
+                                             orig_sclass, sid);
+               rcu_read_unlock();
+       } while (retval == -ESTALE);
        return retval;
 }
 
@@ -2873,7 +2953,7 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
        struct selinux_policy *policy;
        struct policydb *policydb;
        struct sidtab *sidtab;
-       int rc = 0;
+       int rc;
        struct ocontext *c;
        struct superblock_security_struct *sbsec = sb->s_security;
        const char *fstype = sb->s_type->name;
@@ -2884,6 +2964,8 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
                return 0;
        }
 
+retry:
+       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -2901,6 +2983,10 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
                if (!c->sid[0]) {
                        rc = sidtab_context_to_sid(sidtab, &c->context[0],
                                                   &c->sid[0]);
+                       if (rc == -ESTALE) {
+                               rcu_read_unlock();
+                               goto retry;
+                       }
                        if (rc)
                                goto out;
                }
@@ -2908,6 +2994,10 @@ int security_fs_use(struct selinux_state *state, struct super_block *sb)
        } else {
                rc = __security_genfs_sid(policy, fstype, "/",
                                        SECCLASS_DIR, &sbsec->sid);
+               if (rc == -ESTALE) {
+                       rcu_read_unlock();
+                       goto retry;
+               }
                if (rc) {
                        sbsec->behavior = SECURITY_FS_USE_NONE;
                        rc = 0;
@@ -3117,12 +3207,13 @@ int security_sid_mls_copy(struct selinux_state *state,
        u32 len;
        int rc;
 
-       rc = 0;
        if (!selinux_initialized(state)) {
                *new_sid = sid;
-               goto out;
+               return 0;
        }
 
+retry:
+       rc = 0;
        context_init(&newcon);
 
        rcu_read_lock();
@@ -3181,10 +3272,14 @@ int security_sid_mls_copy(struct selinux_state *state,
                }
        }
        rc = sidtab_context_to_sid(sidtab, &newcon, new_sid);
+       if (rc == -ESTALE) {
+               rcu_read_unlock();
+               context_destroy(&newcon);
+               goto retry;
+       }
 out_unlock:
        rcu_read_unlock();
        context_destroy(&newcon);
-out:
        return rc;
 }
 
@@ -3777,6 +3872,8 @@ int security_netlbl_secattr_to_sid(struct selinux_state *state,
                return 0;
        }
 
+retry:
+       rc = 0;
        rcu_read_lock();
        policy = rcu_dereference(state->policy);
        policydb = &policy->policydb;
@@ -3803,23 +3900,24 @@ int security_netlbl_secattr_to_sid(struct selinux_state *state,
                                goto out;
                }
                rc = -EIDRM;
-               if (!mls_context_isvalid(policydb, &ctx_new))
-                       goto out_free;
+               if (!mls_context_isvalid(policydb, &ctx_new)) {
+                       ebitmap_destroy(&ctx_new.range.level[0].cat);
+                       goto out;
+               }
 
                rc = sidtab_context_to_sid(sidtab, &ctx_new, sid);
+               ebitmap_destroy(&ctx_new.range.level[0].cat);
+               if (rc == -ESTALE) {
+                       rcu_read_unlock();
+                       goto retry;
+               }
                if (rc)
-                       goto out_free;
+                       goto out;
 
                security_netlbl_cache_add(secattr, *sid);
-
-               ebitmap_destroy(&ctx_new.range.level[0].cat);
        } else
                *sid = SECSID_NULL;
 
-       rcu_read_unlock();
-       return 0;
-out_free:
-       ebitmap_destroy(&ctx_new.range.level[0].cat);
 out:
        rcu_read_unlock();
        return rc;
index 5ee190b..656d50b 100644 (file)
@@ -39,6 +39,7 @@ int sidtab_init(struct sidtab *s)
        for (i = 0; i < SECINITSID_NUM; i++)
                s->isids[i].set = 0;
 
+       s->frozen = false;
        s->count = 0;
        s->convert = NULL;
        hash_init(s->context_to_sid);
@@ -281,6 +282,15 @@ int sidtab_context_to_sid(struct sidtab *s, struct context *context,
        if (*sid)
                goto out_unlock;
 
+       if (unlikely(s->frozen)) {
+               /*
+                * This sidtab is now frozen - tell the caller to abort and
+                * get the new one.
+                */
+               rc = -ESTALE;
+               goto out_unlock;
+       }
+
        count = s->count;
        convert = s->convert;
 
@@ -474,6 +484,17 @@ void sidtab_cancel_convert(struct sidtab *s)
        spin_unlock_irqrestore(&s->lock, flags);
 }
 
+void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags) __acquires(&s->lock)
+{
+       spin_lock_irqsave(&s->lock, *flags);
+       s->frozen = true;
+       s->convert = NULL;
+}
+void sidtab_freeze_end(struct sidtab *s, unsigned long *flags) __releases(&s->lock)
+{
+       spin_unlock_irqrestore(&s->lock, *flags);
+}
+
 static void sidtab_destroy_entry(struct sidtab_entry *entry)
 {
        context_destroy(&entry->context);
index 80c744d..4eff0e4 100644 (file)
@@ -86,6 +86,7 @@ struct sidtab {
        u32 count;
        /* access only under spinlock */
        struct sidtab_convert_params *convert;
+       bool frozen;
        spinlock_t lock;
 
 #if CONFIG_SECURITY_SELINUX_SID2STR_CACHE_SIZE > 0
@@ -125,6 +126,9 @@ int sidtab_convert(struct sidtab *s, struct sidtab_convert_params *params);
 
 void sidtab_cancel_convert(struct sidtab *s);
 
+void sidtab_freeze_begin(struct sidtab *s, unsigned long *flags) __acquires(&s->lock);
+void sidtab_freeze_end(struct sidtab *s, unsigned long *flags) __releases(&s->lock);
+
 int sidtab_context_to_sid(struct sidtab *s, struct context *context, u32 *sid);
 
 void sidtab_destroy(struct sidtab *s);
index f69c3dd..12a45e6 100644 (file)
@@ -1240,7 +1240,8 @@ static int smack_inode_getattr(const struct path *path)
  *
  * Returns 0 if access is permitted, an error code otherwise
  */
-static int smack_inode_setxattr(struct dentry *dentry, const char *name,
+static int smack_inode_setxattr(struct user_namespace *mnt_userns,
+                               struct dentry *dentry, const char *name,
                                const void *value, size_t size, int flags)
 {
        struct smk_audit_info ad;
@@ -1362,7 +1363,8 @@ static int smack_inode_getxattr(struct dentry *dentry, const char *name)
  *
  * Returns 0 if access is permitted, an error code otherwise
  */
-static int smack_inode_removexattr(struct dentry *dentry, const char *name)
+static int smack_inode_removexattr(struct user_namespace *mnt_userns,
+                                  struct dentry *dentry, const char *name)
 {
        struct inode_smack *isp;
        struct smk_audit_info ad;
@@ -1377,7 +1379,7 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
                if (!smack_privileged(CAP_MAC_ADMIN))
                        rc = -EPERM;
        } else
-               rc = cap_inode_removexattr(dentry, name);
+               rc = cap_inode_removexattr(mnt_userns, dentry, name);
 
        if (rc != 0)
                return rc;
@@ -1420,9 +1422,9 @@ static int smack_inode_removexattr(struct dentry *dentry, const char *name)
  *
  * Returns the size of the attribute or an error code
  */
-static int smack_inode_getsecurity(struct inode *inode,
-                                  const char *name, void **buffer,
-                                  bool alloc)
+static int smack_inode_getsecurity(struct user_namespace *mnt_userns,
+                                  struct inode *inode, const char *name,
+                                  void **buffer, bool alloc)
 {
        struct socket_smack *ssp;
        struct socket *sock;
@@ -3425,7 +3427,7 @@ static void smack_d_instantiate(struct dentry *opt_dentry, struct inode *inode)
                         */
                        if (isp->smk_flags & SMK_INODE_CHANGED) {
                                isp->smk_flags &= ~SMK_INODE_CHANGED;
-                               rc = __vfs_setxattr(dp, inode,
+                               rc = __vfs_setxattr(&init_user_ns, dp, inode,
                                        XATTR_NAME_SMACKTRANSMUTE,
                                        TRANS_TRUE, TRANS_TRUE_SIZE,
                                        0);
@@ -4597,12 +4599,14 @@ static int smack_secctx_to_secid(const char *secdata, u32 seclen, u32 *secid)
 
 static int smack_inode_notifysecctx(struct inode *inode, void *ctx, u32 ctxlen)
 {
-       return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx, ctxlen, 0);
+       return smack_inode_setsecurity(inode, XATTR_SMACK_SUFFIX, ctx,
+                                      ctxlen, 0);
 }
 
 static int smack_inode_setsecctx(struct dentry *dentry, void *ctx, u32 ctxlen)
 {
-       return __vfs_setxattr_noperm(dentry, XATTR_NAME_SMACK, ctx, ctxlen, 0);
+       return __vfs_setxattr_noperm(&init_user_ns, dentry, XATTR_NAME_SMACK,
+                                    ctx, ctxlen, 0);
 }
 
 static int smack_inode_getsecctx(struct inode *inode, void **ctx, u32 *ctxlen)
index 478f757..8dc6133 100644 (file)
@@ -613,7 +613,7 @@ static int tomoyo_check_unix_address(struct sockaddr *addr,
 static bool tomoyo_kernel_service(void)
 {
        /* Nothing to do if I am a kernel service. */
-       return (current->flags & (PF_KTHREAD | PF_IO_WORKER)) == PF_KTHREAD;
+       return current->flags & PF_KTHREAD;
 }
 
 /**
index 8a24e5a..80b814b 100644 (file)
@@ -33,7 +33,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("A loopback soundcard");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ALSA,Loopback soundcard}}");
 
 #define MAX_PCM_SUBSTREAMS     8
 
@@ -1572,6 +1571,14 @@ static int loopback_mixer_new(struct loopback *loopback, int notify)
                                        return -ENOMEM;
                                kctl->id.device = dev;
                                kctl->id.subdevice = substr;
+
+                               /* Add the control before copying the id so that
+                                * the numid field of the id is set in the copy.
+                                */
+                               err = snd_ctl_add(card, kctl);
+                               if (err < 0)
+                                       return err;
+
                                switch (idx) {
                                case ACTIVE_IDX:
                                        setup->active_id = kctl->id;
@@ -1588,9 +1595,6 @@ static int loopback_mixer_new(struct loopback *loopback, int notify)
                                default:
                                        break;
                                }
-                               err = snd_ctl_add(card, kctl);
-                               if (err < 0)
-                                       return err;
                        }
                }
        }
index 316c9af..01a3eab 100644 (file)
@@ -25,7 +25,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Dummy soundcard (/dev/null)");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ALSA,Dummy soundcard}}");
 
 #define MAX_PCM_DEVICES                4
 #define MAX_PCM_SUBSTREAMS     128
index ce5fd17..df4b7f9 100644 (file)
@@ -53,7 +53,6 @@
 MODULE_AUTHOR("Michael T. Mayers");
 MODULE_DESCRIPTION("MOTU MidiTimePiece AV multiport MIDI");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{MOTU,MidiTimePiece AV multiport MIDI}}");
 
 // io resources
 #define MTPAV_IOBASE           0x378
index 9c708b6..322d530 100644 (file)
@@ -37,7 +37,6 @@ MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
 MODULE_AUTHOR("Matthias Koenig <mk@phasorlab.de>");
 MODULE_DESCRIPTION("ESI Miditerminal 4140");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ESI,Miditerminal 4140}}");
 
 /*********************************************************************
  * Chip specific
index fd79e57..7689fa2 100644 (file)
@@ -22,7 +22,6 @@
 MODULE_AUTHOR("Stas Sergeev <stsp@users.sourceforge.net>");
 MODULE_DESCRIPTION("PC-Speaker driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{PC-Speaker, pcsp}}");
 MODULE_ALIAS("platform:pcspkr");
 
 static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */
index c876cf9..2f4514e 100644 (file)
@@ -57,7 +57,6 @@ MODULE_PARM_DESC(enable, "Enable " CARD_NAME " soundcard.");
 MODULE_AUTHOR("Levent Guendogdu, Tobias Gehrig, Matthias Koenig");
 MODULE_DESCRIPTION("Midiman Portman2x4");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Midiman,Portman2x4}}");
 
 /*********************************************************************
  * Chip specific
index 3947f08..6d5d1ca 100644 (file)
@@ -34,7 +34,6 @@
 
 MODULE_DESCRIPTION("MIDI serial u16550");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ALSA, MIDI serial u16550}}");
 
 #define SNDRV_SERIAL_SOUNDCANVAS 0 /* Roland Soundcanvas; F5 NN selects part */
 #define SNDRV_SERIAL_MS124T 1      /* Midiator MS-124T */
index f1fb68b..4206d93 100644 (file)
@@ -43,7 +43,6 @@
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("Dummy soundcard for virtual rawmidi devices");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ALSA,Virtual rawmidi device}}");
 
 #define MAX_MIDI_DEVICES       4
 
index 8e0c038..1a14c08 100644 (file)
@@ -493,11 +493,10 @@ void snd_dice_stream_stop_duplex(struct snd_dice *dice)
        struct reg_params tx_params, rx_params;
 
        if (dice->substreams_counter == 0) {
-               if (get_register_params(dice, &tx_params, &rx_params) >= 0) {
-                       amdtp_domain_stop(&dice->domain);
+               if (get_register_params(dice, &tx_params, &rx_params) >= 0)
                        finish_session(dice, &tx_params, &rx_params);
-               }
 
+               amdtp_domain_stop(&dice->domain);
                release_resources(dice);
        }
 }
index 9ed5cfa..57595f1 100644 (file)
@@ -44,9 +44,13 @@ config SND_INTEL_NHLT
 config SND_INTEL_DSP_CONFIG
        tristate
        select SND_INTEL_NHLT if ACPI
+       select SND_INTEL_SOUNDWIRE_ACPI if ACPI
        # this config should be selected only for Intel DSP platforms.
        # A fallback is provided so that the code compiles in all cases.
 
+config SND_INTEL_SOUNDWIRE_ACPI
+       tristate
+
 config SND_INTEL_BYT_PREFER_SOF
        bool "Prefer SOF driver over SST on BY/CHT platforms"
        depends on SND_SST_ATOM_HIFI2_PLATFORM_ACPI && SND_SOC_SOF_BAYTRAIL
index 601e617..78f487a 100644 (file)
@@ -17,3 +17,6 @@ obj-$(CONFIG_SND_HDA_EXT_CORE) += ext/
 snd-intel-dspcfg-objs := intel-dsp-config.o
 snd-intel-dspcfg-$(CONFIG_SND_INTEL_NHLT) += intel-nhlt.o
 obj-$(CONFIG_SND_INTEL_DSP_CONFIG) += snd-intel-dspcfg.o
+
+snd-intel-sdw-acpi-objs := intel-sdw-acpi.o
+obj-$(CONFIG_SND_INTEL_SOUNDWIRE_ACPI) += snd-intel-sdw-acpi.o
index a9bd39b..b2df7b4 100644 (file)
@@ -133,7 +133,7 @@ void snd_hdac_link_free_all(struct hdac_bus *bus)
 EXPORT_SYMBOL_GPL(snd_hdac_link_free_all);
 
 /**
- * snd_hdac_ext_bus_get_link_index - get link based on codec name
+ * snd_hdac_ext_bus_get_link - get link based on codec name
  * @bus: the pointer to HDAC bus object
  * @codec_name: codec name
  */
index c4d54a8..0c005d6 100644 (file)
@@ -133,7 +133,7 @@ void snd_hdac_ext_stream_decouple(struct hdac_bus *bus,
 EXPORT_SYMBOL_GPL(snd_hdac_ext_stream_decouple);
 
 /**
- * snd_hdac_ext_linkstream_start - start a stream
+ * snd_hdac_ext_link_stream_start - start a stream
  * @stream: HD-audio ext core stream to start
  */
 void snd_hdac_ext_link_stream_start(struct hdac_ext_stream *stream)
index d75f31e..fe35875 100644 (file)
@@ -386,7 +386,7 @@ int snd_hdac_regmap_init(struct hdac_device *codec)
 EXPORT_SYMBOL_GPL(snd_hdac_regmap_init);
 
 /**
- * snd_hdac_regmap_init - Release the regmap from HDA codec
+ * snd_hdac_regmap_exit - Release the regmap from HDA codec
  * @codec: the codec object
  */
 void snd_hdac_regmap_exit(struct hdac_device *codec)
index d1eb9d3..ab5ff78 100644 (file)
@@ -557,4 +557,4 @@ EXPORT_SYMBOL_GPL(snd_intel_acpi_dsp_driver_probe);
 
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Intel DSP config driver");
-MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
+MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
index 059aaf0..e223723 100644 (file)
@@ -31,18 +31,49 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
        struct nhlt_endpoint *epnt;
        struct nhlt_dmic_array_config *cfg;
        struct nhlt_vendor_dmic_array_config *cfg_vendor;
+       struct nhlt_fmt *fmt_configs;
        unsigned int dmic_geo = 0;
-       u8 j;
+       u16 max_ch = 0;
+       u8 i, j;
 
        if (!nhlt)
                return 0;
 
-       epnt = (struct nhlt_endpoint *)nhlt->desc;
+       if (nhlt->header.length <= sizeof(struct acpi_table_header)) {
+               dev_warn(dev, "Invalid DMIC description table\n");
+               return 0;
+       }
+
+       for (j = 0, epnt = nhlt->desc; j < nhlt->endpoint_count; j++,
+            epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length)) {
+
+               if (epnt->linktype != NHLT_LINK_DMIC)
+                       continue;
+
+               cfg = (struct nhlt_dmic_array_config  *)(epnt->config.caps);
+               fmt_configs = (struct nhlt_fmt *)(epnt->config.caps + epnt->config.size);
+
+               /* find max number of channels based on format_configuration */
+               if (fmt_configs->fmt_count) {
+                       dev_dbg(dev, "%s: found %d format definitions\n",
+                               __func__, fmt_configs->fmt_count);
+
+                       for (i = 0; i < fmt_configs->fmt_count; i++) {
+                               struct wav_fmt_ext *fmt_ext;
+
+                               fmt_ext = &fmt_configs->fmt_config[i].fmt_ext;
 
-       for (j = 0; j < nhlt->endpoint_count; j++) {
-               if (epnt->linktype == NHLT_LINK_DMIC) {
-                       cfg = (struct nhlt_dmic_array_config  *)
-                                       (epnt->config.caps);
+                               if (fmt_ext->fmt.channels > max_ch)
+                                       max_ch = fmt_ext->fmt.channels;
+                       }
+                       dev_dbg(dev, "%s: max channels found %d\n", __func__, max_ch);
+               } else {
+                       dev_dbg(dev, "%s: No format information found\n", __func__);
+               }
+
+               if (cfg->device_config.config_type != NHLT_CONFIG_TYPE_MIC_ARRAY) {
+                       dmic_geo = max_ch;
+               } else {
                        switch (cfg->array_type) {
                        case NHLT_MIC_ARRAY_2CH_SMALL:
                        case NHLT_MIC_ARRAY_2CH_BIG:
@@ -59,13 +90,23 @@ int intel_nhlt_get_dmic_geo(struct device *dev, struct nhlt_acpi_table *nhlt)
                                dmic_geo = cfg_vendor->nb_mics;
                                break;
                        default:
-                               dev_warn(dev, "undefined DMIC array_type 0x%0x\n",
-                                        cfg->array_type);
+                               dev_warn(dev, "%s: undefined DMIC array_type 0x%0x\n",
+                                        __func__, cfg->array_type);
+                       }
+
+                       if (dmic_geo > 0) {
+                               dev_dbg(dev, "%s: Array with %d dmics\n", __func__, dmic_geo);
+                       }
+                       if (max_ch > dmic_geo) {
+                               dev_dbg(dev, "%s: max channels %d exceed dmic number %d\n",
+                                       __func__, max_ch, dmic_geo);
                        }
                }
-               epnt = (struct nhlt_endpoint *)((u8 *)epnt + epnt->length);
        }
 
+       dev_dbg(dev, "%s: dmic number %d max_ch %d\n",
+               __func__, dmic_geo, max_ch);
+
        return dmic_geo;
 }
 EXPORT_SYMBOL_GPL(intel_nhlt_get_dmic_geo);
diff --git a/sound/hda/intel-sdw-acpi.c b/sound/hda/intel-sdw-acpi.c
new file mode 100644 (file)
index 0000000..c0123bc
--- /dev/null
@@ -0,0 +1,179 @@
+// SPDX-License-Identifier: (GPL-2.0 OR BSD-3-Clause)
+// Copyright(c) 2015-2021 Intel Corporation.
+
+/*
+ * SDW Intel ACPI scan helpers
+ */
+
+#include <linux/acpi.h>
+#include <linux/bits.h>
+#include <linux/bitfield.h>
+#include <linux/device.h>
+#include <linux/errno.h>
+#include <linux/export.h>
+#include <linux/fwnode.h>
+#include <linux/module.h>
+#include <linux/soundwire/sdw_intel.h>
+#include <linux/string.h>
+
+#define SDW_LINK_TYPE          4 /* from Intel ACPI documentation */
+#define SDW_MAX_LINKS          4
+
+static int ctrl_link_mask;
+module_param_named(sdw_link_mask, ctrl_link_mask, int, 0444);
+MODULE_PARM_DESC(sdw_link_mask, "Intel link mask (one bit per link)");
+
+static bool is_link_enabled(struct fwnode_handle *fw_node, int i)
+{
+       struct fwnode_handle *link;
+       char name[32];
+       u32 quirk_mask = 0;
+
+       /* Find master handle */
+       snprintf(name, sizeof(name),
+                "mipi-sdw-link-%d-subproperties", i);
+
+       link = fwnode_get_named_child_node(fw_node, name);
+       if (!link)
+               return false;
+
+       fwnode_property_read_u32(link,
+                                "intel-quirk-mask",
+                                &quirk_mask);
+
+       if (quirk_mask & SDW_INTEL_QUIRK_MASK_BUS_DISABLE)
+               return false;
+
+       return true;
+}
+
+static int
+sdw_intel_scan_controller(struct sdw_intel_acpi_info *info)
+{
+       struct acpi_device *adev;
+       int ret, i;
+       u8 count;
+
+       if (acpi_bus_get_device(info->handle, &adev))
+               return -EINVAL;
+
+       /* Found controller, find links supported */
+       count = 0;
+       ret = fwnode_property_read_u8_array(acpi_fwnode_handle(adev),
+                                           "mipi-sdw-master-count", &count, 1);
+
+       /*
+        * In theory we could check the number of links supported in
+        * hardware, but in that step we cannot assume SoundWire IP is
+        * powered.
+        *
+        * In addition, if the BIOS doesn't even provide this
+        * 'master-count' property then all the inits based on link
+        * masks will fail as well.
+        *
+        * We will check the hardware capabilities in the startup() step
+        */
+
+       if (ret) {
+               dev_err(&adev->dev,
+                       "Failed to read mipi-sdw-master-count: %d\n", ret);
+               return -EINVAL;
+       }
+
+       /* Check count is within bounds */
+       if (count > SDW_MAX_LINKS) {
+               dev_err(&adev->dev, "Link count %d exceeds max %d\n",
+                       count, SDW_MAX_LINKS);
+               return -EINVAL;
+       }
+
+       if (!count) {
+               dev_warn(&adev->dev, "No SoundWire links detected\n");
+               return -EINVAL;
+       }
+       dev_dbg(&adev->dev, "ACPI reports %d SDW Link devices\n", count);
+
+       info->count = count;
+       info->link_mask = 0;
+
+       for (i = 0; i < count; i++) {
+               if (ctrl_link_mask && !(ctrl_link_mask & BIT(i))) {
+                       dev_dbg(&adev->dev,
+                               "Link %d masked, will not be enabled\n", i);
+                       continue;
+               }
+
+               if (!is_link_enabled(acpi_fwnode_handle(adev), i)) {
+                       dev_dbg(&adev->dev,
+                               "Link %d not selected in firmware\n", i);
+                       continue;
+               }
+
+               info->link_mask |= BIT(i);
+       }
+
+       return 0;
+}
+
+static acpi_status sdw_intel_acpi_cb(acpi_handle handle, u32 level,
+                                    void *cdata, void **return_value)
+{
+       struct sdw_intel_acpi_info *info = cdata;
+       struct acpi_device *adev;
+       acpi_status status;
+       u64 adr;
+
+       status = acpi_evaluate_integer(handle, METHOD_NAME__ADR, NULL, &adr);
+       if (ACPI_FAILURE(status))
+               return AE_OK; /* keep going */
+
+       if (acpi_bus_get_device(handle, &adev)) {
+               pr_err("%s: Couldn't find ACPI handle\n", __func__);
+               return AE_NOT_FOUND;
+       }
+
+       info->handle = handle;
+
+       /*
+        * On some Intel platforms, multiple children of the HDAS
+        * device can be found, but only one of them is the SoundWire
+        * controller. The SNDW device is always exposed with
+        * Name(_ADR, 0x40000000), with bits 31..28 representing the
+        * SoundWire link so filter accordingly
+        */
+       if (FIELD_GET(GENMASK(31, 28), adr) != SDW_LINK_TYPE)
+               return AE_OK; /* keep going */
+
+       /* device found, stop namespace walk */
+       return AE_CTRL_TERMINATE;
+}
+
+/**
+ * sdw_intel_acpi_scan() - SoundWire Intel init routine
+ * @parent_handle: ACPI parent handle
+ * @info: description of what firmware/DSDT tables expose
+ *
+ * This scans the namespace and queries firmware to figure out which
+ * links to enable. A follow-up use of sdw_intel_probe() and
+ * sdw_intel_startup() is required for creation of devices and bus
+ * startup
+ */
+int sdw_intel_acpi_scan(acpi_handle *parent_handle,
+                       struct sdw_intel_acpi_info *info)
+{
+       acpi_status status;
+
+       info->handle = NULL;
+       status = acpi_walk_namespace(ACPI_TYPE_DEVICE,
+                                    parent_handle, 1,
+                                    sdw_intel_acpi_cb,
+                                    NULL, info, NULL);
+       if (ACPI_FAILURE(status) || info->handle == NULL)
+               return -ENODEV;
+
+       return sdw_intel_scan_controller(info);
+}
+EXPORT_SYMBOL_NS(sdw_intel_acpi_scan, SND_INTEL_SOUNDWIRE_ACPI);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_DESCRIPTION("Intel Soundwire ACPI helpers");
index ca18fe3..f11af98 100644 (file)
 MODULE_AUTHOR("Massimo Piccioni <dafastidio@libero.it>");
 MODULE_DESCRIPTION("AD1816A, AD1815");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Highscreen,Sound-Boostar 16 3D},"
-               "{Analog Devices,AD1815},"
-               "{Analog Devices,AD1816A},"
-               "{TerraTec,Base 64},"
-               "{TerraTec,AudioSystem EWS64S},"
-               "{Aztech/Newcom SC-16 3D},"
-               "{Shark Predator ISA}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 1-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 6f221ee..edafb49 100644 (file)
@@ -22,9 +22,6 @@
 MODULE_DESCRIPTION(CRD_NAME);
 MODULE_AUTHOR("Tugrul Galatali <galatalt@stuy.edu>, Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Analog Devices,AD1848},"
-               "{Analog Devices,AD1847},"
-               "{Crystal Semiconductors,CS4248}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 1085f5b..bacb7a1 100644 (file)
 #define PFX "als100: "
 
 MODULE_DESCRIPTION("Avance Logic ALS007/ALS1X0");
-MODULE_SUPPORTED_DEVICE("{{Diamond Technologies DT-019X},"
-               "{Avance Logic ALS-007}}"
-               "{{Avance Logic,ALS100 - PRO16PNP},"
-               "{Avance Logic,ALS110},"
-               "{Avance Logic,ALS120},"
-               "{Avance Logic,ALS200},"
-               "{3D Melody,MF1000},"
-               "{Digimate,3D Sound},"
-               "{Avance Logic,ALS120},"
-               "{RTL,RTL3000}}");
-
 MODULE_AUTHOR("Massimo Piccioni <dafastidio@libero.it>");
 MODULE_LICENSE("GPL");
 
index 4ed5209..867e9ae 100644 (file)
 MODULE_AUTHOR("Massimo Piccioni <dafastidio@libero.it>");
 MODULE_DESCRIPTION("Aztech Systems AZT2320");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Aztech Systems,PRO16V},"
-               "{Aztech Systems,AZT2320},"
-               "{Aztech Systems,AZT3300},"
-               "{Aztech Systems,AZT2320},"
-               "{Aztech Systems,AZT3000}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 19e2585..bc112df 100644 (file)
@@ -51,7 +51,6 @@
 MODULE_AUTHOR("George Talusan <gstalusan@uwaterloo.ca>");
 MODULE_DESCRIPTION("C-Media CMI8330/CMI8329");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8330,isapnp:{CMI0001,@@@0001,@X@0001}}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
index c56cbc0..ec054b9 100644 (file)
@@ -23,7 +23,6 @@
 MODULE_DESCRIPTION(CRD_NAME);
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Crystal Semiconductors,CS4231}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 63fb0cb..186d7d4 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cirrus Logic CS4232-9");
-MODULE_SUPPORTED_DEVICE("{{Turtle Beach,TBS-2000},"
-               "{Turtle Beach,Tropez Plus},"
-               "{SIC CrystalWave 32},"
-               "{Hewlett Packard,Omnibook 5500},"
-               "{TerraTec,Maestro 32/96},"
-               "{Philips,PCA70PS}},"
-               "{{Crystal Semiconductors,CS4235},"
-               "{Crystal Semiconductors,CS4236},"
-               "{Crystal Semiconductors,CS4237},"
-               "{Crystal Semiconductors,CS4238},"
-               "{Crystal Semiconductors,CS4239},"
-               "{Acer,AW37},"
-               "{Acer,AW35/Pro},"
-               "{Crystal,3D},"
-               "{Crystal Computer,TidalWave128},"
-               "{Dell,Optiplex GX1},"
-               "{Dell,Workstation 400 sound},"
-               "{EliteGroup,P5TX-LA sound},"
-               "{Gallant,SC-70P},"
-               "{Gateway,E1000 Onboard CS4236B},"
-               "{Genius,Sound Maker 3DJ},"
-               "{Hewlett Packard,HP6330 sound},"
-               "{IBM,PC 300PL sound},"
-               "{IBM,Aptiva 2137 E24},"
-               "{IBM,IntelliStation M Pro},"
-               "{Intel,Marlin Spike Mobo CS4235},"
-               "{Intel PR440FX Onboard},"
-               "{Guillemot,MaxiSound 16 PnP},"
-               "{NewClear,3D},"
-               "{TerraTec,AudioSystem EWS64L/XL},"
-               "{Typhoon Soundsystem,CS4236B},"
-               "{Turtle Beach,Malibu},"
-               "{Unknown,Digital PC 5000 Onboard}}");
-
 MODULE_ALIAS("snd_cs4232");
 
 #define IDENT "CS4232+"
index 4a1f61f..750d499 100644 (file)
 MODULE_DESCRIPTION(CRD_NAME);
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ESS,ES688 PnP AudioDrive,pnp:ESS0100},"
-               "{ESS,ES1688 PnP AudioDrive,pnp:ESS0102},"
-               "{ESS,ES688 AudioDrive,pnp:ESS6881},"
-               "{ESS,ES1688 AudioDrive,pnp:ESS1681}}");
-
 MODULE_ALIAS("snd_es968");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
index 9beef80..375a4a6 100644 (file)
@@ -1929,17 +1929,9 @@ static int snd_es18xx_mixer(struct snd_card *card)
 
 /* Card level */
 
-MODULE_AUTHOR("Christian Fischbach <fishbach@pool.informatik.rwth-aachen.de>, Abramo Bagnara <abramo@alsa-project.org>");  
+MODULE_AUTHOR("Christian Fischbach <fishbach@pool.informatik.rwth-aachen.de>, Abramo Bagnara <abramo@alsa-project.org>");
 MODULE_DESCRIPTION("ESS ES18xx AudioDrive");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ESS,ES1868 PnP AudioDrive},"
-               "{ESS,ES1869 PnP AudioDrive},"
-               "{ESS,ES1878 PnP AudioDrive},"
-               "{ESS,ES1879 PnP AudioDrive},"
-               "{ESS,ES1887 PnP AudioDrive},"
-               "{ESS,ES1888 PnP AudioDrive},"
-               "{ESS,ES1887 AudioDrive},"
-               "{ESS,ES1888 AudioDrive}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 015f88a..0fba5d8 100644 (file)
@@ -23,7 +23,6 @@
 MODULE_DESCRIPTION(CRD_NAME);
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound Classic}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index c9f31b4..da2b2ca 100644 (file)
@@ -27,7 +27,6 @@
 MODULE_DESCRIPTION(CRD_NAME);
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound Extreme}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index dc09fbd..24b945f 100644 (file)
@@ -21,7 +21,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Gravis UltraSound MAX");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound MAX}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index e4d412e..99581fb 100644 (file)
@@ -28,14 +28,8 @@ MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
 #ifndef SNDRV_STB
 MODULE_DESCRIPTION("AMD InterWave");
-MODULE_SUPPORTED_DEVICE("{{Gravis,UltraSound Plug & Play},"
-               "{STB,SoundRage32},"
-               "{MED,MED3210},"
-               "{Dynasonix,Dynasonix Pro},"
-               "{Panasonic,PCA761AW}}");
 #else
 MODULE_DESCRIPTION("AMD InterWave STB with TEA6330T");
-MODULE_SUPPORTED_DEVICE("{{AMD,InterWave STB with TEA6330T}}");
 #endif
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
index 7649a8a..9bde11d 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Yamaha OPL3SA2+");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Yamaha,YMF719E-S},"
-               "{Genius,Sound Maker 3DX},"
-               "{Yamaha,OPL3SA3},"
-               "{Intel,AL440LX sound},"
-               "{NeoMagic,MagicWave 3DX}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 2093334..a510b20 100644 (file)
@@ -33,9 +33,6 @@
 MODULE_AUTHOR("Martin Langer <martin-langer@gmx.de>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Miro miroSOUND PCM1 pro, PCM12, PCM20 Radio");
-MODULE_SUPPORTED_DEVICE("{{Miro,miroSOUND PCM1 pro}, "
-                       "{Miro,miroSOUND PCM12}, "
-                       "{Miro,miroSOUND PCM20 Radio}}");
 
 static int index = SNDRV_DEFAULT_IDX1;         /* Index 0-MAX */
 static char *id = SNDRV_DEFAULT_STR1;          /* ID for this card */
index 758f5b5..08e61d9 100644 (file)
@@ -36,17 +36,11 @@ MODULE_AUTHOR("Massimo Piccioni <dafastidio@libero.it>");
 MODULE_LICENSE("GPL");
 #ifdef OPTi93X
 MODULE_DESCRIPTION("OPTi93X");
-MODULE_SUPPORTED_DEVICE("{{OPTi,82C931/3}}");
 #else  /* OPTi93X */
 #ifdef CS4231
 MODULE_DESCRIPTION("OPTi92X - CS4231");
-MODULE_SUPPORTED_DEVICE("{{OPTi,82C924 (CS4231)},"
-               "{OPTi,82C925 (CS4231)}}");
 #else  /* CS4231 */
 MODULE_DESCRIPTION("OPTi92X - AD1848");
-MODULE_SUPPORTED_DEVICE("{{OPTi,82C924 (AD1848)},"
-               "{OPTi,82C925 (AD1848)},"
-               "{OAK,Mozart}}");
 #endif /* CS4231 */
 #endif /* OPTi93X */
 
index 0e2e0ab..7ba5dd1 100644 (file)
@@ -28,9 +28,6 @@
 #define PFX "jazz16: "
 
 MODULE_DESCRIPTION("Media Vision Jazz16");
-MODULE_SUPPORTED_DEVICE("{{Media Vision ??? },"
-               "{RTL,RTL3000}}");
-
 MODULE_AUTHOR("Krzysztof Helt <krzysztof.h1@wp.pl>");
 MODULE_LICENSE("GPL");
 
index db284b7..63ef960 100644 (file)
@@ -31,16 +31,8 @@ MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_LICENSE("GPL");
 #ifndef SNDRV_SBAWE
 MODULE_DESCRIPTION("Sound Blaster 16");
-MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB 16},"
-               "{Creative Labs,SB Vibra16S},"
-               "{Creative Labs,SB Vibra16C},"
-               "{Creative Labs,SB Vibra16CL},"
-               "{Creative Labs,SB Vibra16X}}");
 #else
 MODULE_DESCRIPTION("Sound Blaster AWE");
-MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB AWE 32},"
-               "{Creative Labs,SB AWE 64},"
-               "{Creative Labs,SB AWE 64 Gold}}");
 #endif
 
 #if 0
index 8e3e67b..6c9d534 100644 (file)
@@ -17,7 +17,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Sound Blaster 1.0/2.0/Pro");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB 1.0/SB 2.0/SB Pro}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index def1375..3462663 100644 (file)
@@ -29,9 +29,6 @@
 MODULE_AUTHOR("Krzysztof Helt");
 MODULE_DESCRIPTION("Gallant SC-6000");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Gallant, SC-6000},"
-                       "{AudioExcel, Audio Excel DSP 16},"
-                       "{Zoltrix, AV302}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index b750a4f..a443797 100644 (file)
@@ -21,7 +21,6 @@
 MODULE_AUTHOR("Paul Barton-Davis <pbd@op.net>");
 MODULE_DESCRIPTION("Turtle Beach Wavefront");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Turtle Beach,Maui/Tropez/Tropez+}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;         /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;          /* ID for this card */
index 5bf1ea1..989f656 100644 (file)
@@ -32,7 +32,6 @@
 MODULE_AUTHOR("Vivien Chappelier <vivien.chappelier@linux-mips.org>");
 MODULE_DESCRIPTION("SGI O2 Audio");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Silicon Graphics, O2 Audio}}");
 
 static int index = SNDRV_DEFAULT_IDX1;  /* Index 0-MAX */
 static char *id = SNDRV_DEFAULT_STR1;   /* ID for this card */
index ca6b4b9..e35e931 100644 (file)
@@ -312,14 +312,14 @@ static int __init n64audio_probe(struct platform_device *pdev)
        }
 
        priv->mi_reg_base = devm_platform_ioremap_resource(pdev, 0);
-       if (!priv->mi_reg_base) {
-               err = -EINVAL;
+       if (IS_ERR(priv->mi_reg_base)) {
+               err = PTR_ERR(priv->mi_reg_base);
                goto fail_dma_alloc;
        }
 
        priv->ai_reg_base = devm_platform_ioremap_resource(pdev, 1);
-       if (!priv->ai_reg_base) {
-               err = -EINVAL;
+       if (IS_ERR(priv->ai_reg_base)) {
+               err = PTR_ERR(priv->ai_reg_base);
                goto fail_dma_alloc;
        }
 
index 5d835d2..4520022 100644 (file)
@@ -43,7 +43,6 @@
 MODULE_AUTHOR("Kyle McMartin <kyle@parisc-linux.org>, Thibaut Varene <t-bone@parisc-linux.org>");
 MODULE_DESCRIPTION("Analog Devices AD1889 ALSA sound driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Analog Devices,AD1889}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 module_param_array(index, int, NULL, 0444);
index 51f2479..0d66b92 100644 (file)
@@ -29,7 +29,6 @@
 MODULE_AUTHOR("Matt Wu <Matt_Wu@acersoftech.com.cn>");
 MODULE_DESCRIPTION("ALI M5451");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ALI,M5451,pci},{ALI,M5451}}");
 
 static int index = SNDRV_DEFAULT_IDX1; /* Index */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index 1dc8c4e..bd4fd09 100644 (file)
@@ -86,7 +86,6 @@ enum {DEVICE_ALS300, DEVICE_ALS300_PLUS};
 MODULE_AUTHOR("Ash Willis <ashwillis@programmer.net>");
 MODULE_DESCRIPTION("Avance Logic ALS300");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Avance Logic,ALS300},{Avance Logic,ALS300+}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
index 2edc745..139ac2a 100644 (file)
@@ -68,7 +68,6 @@
 MODULE_AUTHOR("Bart Hartgers <bart@etpmod.phys.tue.nl>, Andreas Mohr");
 MODULE_DESCRIPTION("Avance Logic ALS4000");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Avance Logic,ALS4000}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_JOYSTICK 1
index a25d754..579425c 100644 (file)
@@ -23,7 +23,6 @@
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("ATI IXP AC97 controller");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ATI,IXP150/200/250/300/400/600}}");
 
 static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index ae88217..45e75af 100644 (file)
@@ -23,7 +23,6 @@
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("ATI IXP MC97 controller");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ATI,IXP150/200/250}}");
 
 static int index = -2; /* Exclude the first card */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index 5dd98e6..1b37b72 100644 (file)
@@ -41,8 +41,6 @@ MODULE_PARM_DESC(pcifix, "Enable VIA-workaround for " CARD_NAME " soundcard.");
 
 MODULE_DESCRIPTION("Aureal vortex");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Aureal Semiconductor Inc., Aureal Vortex Sound Processor}}");
-
 MODULE_DEVICE_TABLE(pci, snd_vortex_ids);
 
 static void vortex_fix_latency(struct pci_dev *vortex)
index 2ac594d..51dcf1b 100644 (file)
 MODULE_AUTHOR("Andreas Mohr <andi AT lisas.de>");
 MODULE_DESCRIPTION("Aztech AZF3328 (PCI168)");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Aztech,AZF3328}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_GAMEPORT 1
index cf9f8d8..91512b3 100644 (file)
@@ -23,8 +23,6 @@
 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_DESCRIPTION("Brooktree Bt87x audio driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Brooktree,Bt878},"
-               "{Brooktree,Bt879}}");
 
 static int index[SNDRV_CARDS] = {[0 ... (SNDRV_CARDS - 1)] = -2}; /* Exclude the first card */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index ee20f9a..bee4710 100644 (file)
 MODULE_AUTHOR("James Courtier-Dutton <James@superbug.demon.co.uk>");
 MODULE_DESCRIPTION("CA0106");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Creative,SB CA0106 chip}}");
 
 // module parameters (see "Module Parameters")
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
index 7363d61..5984463 100644 (file)
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("C-Media CMI8x38 PCI");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8738},"
-               "{C-Media,CMI8738B},"
-               "{C-Media,CMI8338A},"
-               "{C-Media,CMI8338B}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_JOYSTICK 1
index 94d2a6a..bf3bb70 100644 (file)
@@ -25,7 +25,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Cirrus Logic CS4281");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Cirrus Logic,CS4281}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index a6e0a44..1db7b41 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Cirrus Logic Sound Fusion CS46XX");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Cirrus Logic,Sound Fusion (CS4280)},"
-               "{Cirrus Logic,Sound Fusion (CS4610)},"
-               "{Cirrus Logic,Sound Fusion (CS4612)},"
-               "{Cirrus Logic,Sound Fusion (CS4615)},"
-               "{Cirrus Logic,Sound Fusion (CS4622)},"
-               "{Cirrus Logic,Sound Fusion (CS4624)},"
-               "{Cirrus Logic,Sound Fusion (CS4630)}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 359bc6a..9b716b5 100644 (file)
@@ -393,4 +393,3 @@ module_pci_driver(cs5535audio_driver);
 MODULE_AUTHOR("Jaya Kumar");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CS5535 Audio");
-MODULE_SUPPORTED_DEVICE("CS5535 Audio");
index a855fb8..55af8ef 100644 (file)
@@ -991,7 +991,7 @@ static int daio_mgr_dao_init(void *blk, unsigned int idx, unsigned int conf)
 
        if (idx < 4) {
                /* S/PDIF output */
-               switch ((conf & 0x7)) {
+               switch ((conf & 0xf)) {
                case 1:
                        set_field(&ctl->txctl[idx], ATXCTL_NUC, 0);
                        break;
index 8c07c64..713d36e 100644 (file)
@@ -18,7 +18,6 @@
 MODULE_AUTHOR("Creative Technology Ltd");
 MODULE_DESCRIPTION("X-Fi driver version 1.03");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{Creative Labs, Sound Blaster X-Fi}");
 
 static unsigned int reference_rate = 48000;
 static unsigned int multiple = 2;
index a20b2bb..9bd67ac 100644 (file)
@@ -10,7 +10,6 @@
 MODULE_AUTHOR("Giuliano Pochini <pochini@shiny.it>");
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Echoaudio " ECHOCARD_NAME " soundcards driver");
-MODULE_SUPPORTED_DEVICE("{{Echoaudio," ECHOCARD_NAME "}}");
 MODULE_DEVICE_TABLE(pci, snd_echo_ids);
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
index 353934c..45833bc 100644 (file)
@@ -18,8 +18,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("EMU10K1");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Creative Labs,SB Live!/PCI512/E-mu APS},"
-              "{Creative Labs,SB Audigy}}");
 
 #if IS_ENABLED(CONFIG_SND_SEQUENCER)
 #define ENABLE_SYNTH
index 785ec0c..d9a12cd 100644 (file)
@@ -31,7 +31,6 @@
 MODULE_AUTHOR("Francisco Moraes <fmoraes@nc.rr.com>");
 MODULE_DESCRIPTION("EMU10K1X");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Dell Creative Labs,SB Live!}");
 
 // module parameters (see "Module Parameters")
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
index 93c4fd3..3ccccdb 100644 (file)
@@ -52,17 +52,9 @@ MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, Thomas Sailer <sailer@ife.ee.et
 MODULE_LICENSE("GPL");
 #ifdef CHIP1370
 MODULE_DESCRIPTION("Ensoniq AudioPCI ES1370");
-MODULE_SUPPORTED_DEVICE("{{Ensoniq,AudioPCI-97 ES1370},"
-               "{Creative Labs,SB PCI64/128 (ES1370)}}");
 #endif
 #ifdef CHIP1371
 MODULE_DESCRIPTION("Ensoniq/Creative AudioPCI ES1371+");
-MODULE_SUPPORTED_DEVICE("{{Ensoniq,AudioPCI ES1371/73},"
-               "{Ensoniq,AudioPCI ES1373},"
-               "{Creative Labs,Ectiva EV1938},"
-               "{Creative Labs,SB PCI64/128 (ES1371/73)},"
-               "{Creative Labs,Vibra PCI128},"
-               "{Ectiva,EV1938}}");
 #endif
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
index 3b5d68c..afc6634 100644 (file)
 MODULE_AUTHOR("Jaromir Koutek <miri@punknet.cz>");
 MODULE_DESCRIPTION("ESS Solo-1");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ESS,ES1938},"
-                "{ESS,ES1946},"
-                "{ESS,ES1969},"
-               "{TerraTec,128i PCI}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_JOYSTICK 1
index 747fa69..5fa1861 100644 (file)
 
 MODULE_DESCRIPTION("ESS Maestro");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ESS,Maestro 2e},"
-               "{ESS,Maestro 2},"
-               "{ESS,Maestro 1},"
-               "{TerraTec,DMX}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_JOYSTICK 1
index c6ad623..6279eb1 100644 (file)
@@ -26,8 +26,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("ForteMedia FM801");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ForteMedia,FM801},"
-               "{Genius,SoundMaker Live 5.1}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 6a85645..17a25e4 100644 (file)
@@ -47,6 +47,10 @@ static void hda_codec_unsol_event(struct hdac_device *dev, unsigned int ev)
        if (codec->bus->shutdown)
                return;
 
+       /* ignore unsol events during system suspend/resume */
+       if (codec->core.dev.power.power_state.event != PM_EVENT_ON)
+               return;
+
        if (codec->patch_ops.unsol_event)
                codec->patch_ops.unsol_event(codec, ev);
 }
index 9b75506..2026f1c 100644 (file)
@@ -3483,7 +3483,7 @@ EXPORT_SYMBOL_GPL(snd_hda_check_amp_list_power);
  */
 
 /**
- * snd_hda_input_mux_info_info - Info callback helper for the input-mux enum
+ * snd_hda_input_mux_info - Info callback helper for the input-mux enum
  * @imux: imux helper object
  * @uinfo: pointer to get/store the data
  */
@@ -3506,7 +3506,7 @@ int snd_hda_input_mux_info(const struct hda_input_mux *imux,
 EXPORT_SYMBOL_GPL(snd_hda_input_mux_info);
 
 /**
- * snd_hda_input_mux_info_put - Put callback helper for the input-mux enum
+ * snd_hda_input_mux_put - Put callback helper for the input-mux enum
  * @codec: the HDA codec
  * @imux: imux helper object
  * @ucontrol: pointer to get/store the data
@@ -3941,7 +3941,7 @@ unsigned int snd_hda_correct_pin_ctl(struct hda_codec *codec,
 EXPORT_SYMBOL_GPL(snd_hda_correct_pin_ctl);
 
 /**
- * _snd_hda_pin_ctl - Helper to set pin ctl value
+ * _snd_hda_set_pin_ctl - Helper to set pin ctl value
  * @codec: the HDA codec
  * @pin: referred pin NID
  * @val: pin control value to set
index 9087981..ca2f2ec 100644 (file)
@@ -609,13 +609,6 @@ static int azx_pcm_open(struct snd_pcm_substream *substream)
                                     20,
                                     178000000);
 
-       /* by some reason, the playback stream stalls on PulseAudio with
-        * tsched=1 when a capture stream triggers.  Until we figure out the
-        * real cause, disable tsched mode by telling the PCM info flag.
-        */
-       if (chip->driver_caps & AZX_DCAPS_AMD_WORKAROUND)
-               runtime->hw.info |= SNDRV_PCM_INFO_BATCH;
-
        if (chip->align_buffer_size)
                /* constrain buffer sizes to be multiple of 128
                   bytes. This is more efficient in terms of memory
index 5e40944..f5cba7a 100644 (file)
@@ -3923,7 +3923,7 @@ static void vmaster_update_mute_led(void *private_data, int enabled)
 }
 
 /**
- * snd_dha_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED
+ * snd_hda_gen_add_mute_led_cdev - Create a LED classdev and enable as vmaster mute LED
  * @codec: the HDA codec
  * @callback: the callback for LED classdev brightness_set_blocking
  */
@@ -4065,7 +4065,7 @@ static int add_micmute_led_hook(struct hda_codec *codec)
 
        spec->micmute_led.led_mode = MICMUTE_LED_FOLLOW_MUTE;
        spec->micmute_led.capture = 0;
-       spec->micmute_led.led_value = 0;
+       spec->micmute_led.led_value = -1;
        spec->micmute_led.old_hook = spec->cap_sync_hook;
        spec->cap_sync_hook = update_micmute_led;
        if (!snd_hda_gen_add_kctl(spec, NULL, &micmute_led_mode_ctl))
@@ -4074,7 +4074,7 @@ static int add_micmute_led_hook(struct hda_codec *codec)
 }
 
 /**
- * snd_dha_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED
+ * snd_hda_gen_add_micmute_led_cdev - Create a LED classdev and enable as mic-mute LED
  * @codec: the HDA codec
  * @callback: the callback for LED classdev brightness_set_blocking
  *
index 5b492c3..79ade33 100644 (file)
@@ -208,40 +208,6 @@ MODULE_PARM_DESC(snoop, "Enable/disable snooping");
 
 
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Intel, ICH6},"
-                        "{Intel, ICH6M},"
-                        "{Intel, ICH7},"
-                        "{Intel, ESB2},"
-                        "{Intel, ICH8},"
-                        "{Intel, ICH9},"
-                        "{Intel, ICH10},"
-                        "{Intel, PCH},"
-                        "{Intel, CPT},"
-                        "{Intel, PPT},"
-                        "{Intel, LPT},"
-                        "{Intel, LPT_LP},"
-                        "{Intel, WPT_LP},"
-                        "{Intel, SPT},"
-                        "{Intel, SPT_LP},"
-                        "{Intel, HPT},"
-                        "{Intel, PBG},"
-                        "{Intel, SCH},"
-                        "{ATI, SB450},"
-                        "{ATI, SB600},"
-                        "{ATI, RS600},"
-                        "{ATI, RS690},"
-                        "{ATI, RS780},"
-                        "{ATI, R600},"
-                        "{ATI, RV630},"
-                        "{ATI, RV610},"
-                        "{ATI, RV670},"
-                        "{ATI, RV635},"
-                        "{ATI, RV620},"
-                        "{ATI, RV770},"
-                        "{VIA, VT8251},"
-                        "{VIA, VT8237A},"
-                        "{SiS, SIS966},"
-                        "{ULI, M5461}}");
 MODULE_DESCRIPTION("Intel HDA driver");
 
 #if defined(CONFIG_PM) && defined(CONFIG_VGA_SWITCHEROO)
@@ -1023,8 +989,14 @@ static int azx_prepare(struct device *dev)
        struct snd_card *card = dev_get_drvdata(dev);
        struct azx *chip;
 
+       if (!azx_is_pm_ready(card))
+               return 0;
+
        chip = card->private_data;
        chip->pm_prepared = 1;
+       snd_power_change_state(card, SNDRV_CTL_POWER_D3hot);
+
+       flush_work(&azx_bus(chip)->unsol_work);
 
        /* HDA controller always requires different WAKEEN for runtime suspend
         * and system suspend, so don't use direct-complete here.
@@ -1037,7 +1009,11 @@ static void azx_complete(struct device *dev)
        struct snd_card *card = dev_get_drvdata(dev);
        struct azx *chip;
 
+       if (!azx_is_pm_ready(card))
+               return;
+
        chip = card->private_data;
+       snd_power_change_state(card, SNDRV_CTL_POWER_D0);
        chip->pm_prepared = 0;
 }
 
index b8b5680..ac00866 100644 (file)
@@ -213,7 +213,7 @@ static void jack_detect_update(struct hda_codec *codec,
 }
 
 /**
- * snd_hda_set_dirty_all - Mark all the cached as dirty
+ * snd_hda_jack_set_dirty_all - Mark all the cached as dirty
  * @codec: the HDA codec
  *
  * This function sets the dirty flag to all entries of jack table.
@@ -293,7 +293,7 @@ find_callback_from_list(struct hda_jack_tbl *jack,
 }
 
 /**
- * snd_hda_jack_detect_enable_mst - enable the jack-detection
+ * snd_hda_jack_detect_enable_callback_mst - enable the jack-detection
  * @codec: the HDA codec
  * @nid: pin NID to enable
  * @func: callback function to register
index 7e62aed..b2b620f 100644 (file)
@@ -1309,6 +1309,7 @@ static const struct snd_pci_quirk ca0132_quirks[] = {
        SND_PCI_QUIRK(0x1102, 0x0013, "Recon3D", QUIRK_R3D),
        SND_PCI_QUIRK(0x1102, 0x0018, "Recon3D", QUIRK_R3D),
        SND_PCI_QUIRK(0x1102, 0x0051, "Sound Blaster AE-5", QUIRK_AE5),
+       SND_PCI_QUIRK(0x1102, 0x0191, "Sound Blaster AE-5 Plus", QUIRK_AE5),
        SND_PCI_QUIRK(0x1102, 0x0081, "Sound Blaster AE-7", QUIRK_AE7),
        {}
 };
@@ -2338,7 +2339,7 @@ static int dspio_send_scp_message(struct hda_codec *codec,
 }
 
 /**
- * Prepare and send the SCP message to DSP
+ * dspio_scp - Prepare and send the SCP message to DSP
  * @codec: the HDA codec
  * @mod_id: ID of the DSP module to send the command
  * @src_id: ID of the source
@@ -2865,7 +2866,7 @@ static int dsp_dma_stop(struct hda_codec *codec,
 }
 
 /**
- * Allocate router ports
+ * dsp_allocate_router_ports - Allocate router ports
  *
  * @codec: the HDA codec
  * @num_chans: number of channels in the stream
@@ -3178,8 +3179,7 @@ static int dspxfr_hci_write(struct hda_codec *codec,
 }
 
 /**
- * Write a block of data into DSP code or data RAM using pre-allocated
- * DMA engine.
+ * dspxfr_one_seg - Write a block of data into DSP code or data RAM using pre-allocated DMA engine.
  *
  * @codec: the HDA codec
  * @fls: pointer to a fast load image
@@ -3376,7 +3376,7 @@ static int dspxfr_one_seg(struct hda_codec *codec,
 }
 
 /**
- * Write the entire DSP image of a DSP code/data overlay to DSP memories
+ * dspxfr_image - Write the entire DSP image of a DSP code/data overlay to DSP memories
  *
  * @codec: the HDA codec
  * @fls_data: pointer to a fast load image
index f2aa226..dfef9c1 100644 (file)
@@ -149,6 +149,21 @@ static int cx_auto_vmaster_mute_led(struct led_classdev *led_cdev,
        return 0;
 }
 
+static void cxt_init_gpio_led(struct hda_codec *codec)
+{
+       struct conexant_spec *spec = codec->spec;
+       unsigned int mask = spec->gpio_mute_led_mask | spec->gpio_mic_led_mask;
+
+       if (mask) {
+               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_MASK,
+                                   mask);
+               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DIRECTION,
+                                   mask);
+               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
+                                   spec->gpio_led);
+       }
+}
+
 static int cx_auto_init(struct hda_codec *codec)
 {
        struct conexant_spec *spec = codec->spec;
@@ -156,6 +171,7 @@ static int cx_auto_init(struct hda_codec *codec)
        if (!spec->dynamic_eapd)
                cx_auto_turn_eapd(codec, spec->num_eapds, spec->eapds, true);
 
+       cxt_init_gpio_led(codec);
        snd_hda_apply_fixup(codec, HDA_FIXUP_ACT_INIT);
 
        return 0;
@@ -215,6 +231,7 @@ enum {
        CXT_FIXUP_HP_SPECTRE,
        CXT_FIXUP_HP_GATE_MIC,
        CXT_FIXUP_MUTE_LED_GPIO,
+       CXT_FIXUP_HP_ZBOOK_MUTE_LED,
        CXT_FIXUP_HEADSET_MIC,
        CXT_FIXUP_HP_MIC_NO_PRESENCE,
 };
@@ -654,31 +671,36 @@ static int cxt_gpio_micmute_update(struct led_classdev *led_cdev,
        return 0;
 }
 
-
-static void cxt_fixup_mute_led_gpio(struct hda_codec *codec,
-                               const struct hda_fixup *fix, int action)
+static void cxt_setup_mute_led(struct hda_codec *codec,
+                              unsigned int mute, unsigned int mic_mute)
 {
        struct conexant_spec *spec = codec->spec;
-       static const struct hda_verb gpio_init[] = {
-               { 0x01, AC_VERB_SET_GPIO_MASK, 0x03 },
-               { 0x01, AC_VERB_SET_GPIO_DIRECTION, 0x03 },
-               {}
-       };
 
-       if (action == HDA_FIXUP_ACT_PRE_PROBE) {
+       spec->gpio_led = 0;
+       spec->mute_led_polarity = 0;
+       if (mute) {
                snd_hda_gen_add_mute_led_cdev(codec, cxt_gpio_mute_update);
-               spec->gpio_led = 0;
-               spec->mute_led_polarity = 0;
-               spec->gpio_mute_led_mask = 0x01;
-               spec->gpio_mic_led_mask = 0x02;
+               spec->gpio_mute_led_mask = mute;
+       }
+       if (mic_mute) {
                snd_hda_gen_add_micmute_led_cdev(codec, cxt_gpio_micmute_update);
+               spec->gpio_mic_led_mask = mic_mute;
        }
-       snd_hda_add_verbs(codec, gpio_init);
-       if (spec->gpio_led)
-               snd_hda_codec_write(codec, 0x01, 0, AC_VERB_SET_GPIO_DATA,
-                                   spec->gpio_led);
 }
 
+static void cxt_fixup_mute_led_gpio(struct hda_codec *codec,
+                               const struct hda_fixup *fix, int action)
+{
+       if (action == HDA_FIXUP_ACT_PRE_PROBE)
+               cxt_setup_mute_led(codec, 0x01, 0x02);
+}
+
+static void cxt_fixup_hp_zbook_mute_led(struct hda_codec *codec,
+                                       const struct hda_fixup *fix, int action)
+{
+       if (action == HDA_FIXUP_ACT_PRE_PROBE)
+               cxt_setup_mute_led(codec, 0x10, 0x20);
+}
 
 /* ThinkPad X200 & co with cxt5051 */
 static const struct hda_pintbl cxt_pincfg_lenovo_x200[] = {
@@ -839,6 +861,10 @@ static const struct hda_fixup cxt_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_mute_led_gpio,
        },
+       [CXT_FIXUP_HP_ZBOOK_MUTE_LED] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = cxt_fixup_hp_zbook_mute_led,
+       },
        [CXT_FIXUP_HEADSET_MIC] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = cxt_fixup_headset_mic,
@@ -917,6 +943,8 @@ static const struct snd_pci_quirk cxt5066_fixups[] = {
        SND_PCI_QUIRK(0x103c, 0x8299, "HP 800 G3 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x829a, "HP 800 G3 DM", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x8402, "HP ProBook 645 G4", CXT_FIXUP_MUTE_LED_GPIO),
+       SND_PCI_QUIRK(0x103c, 0x8427, "HP ZBook Studio G5", CXT_FIXUP_HP_ZBOOK_MUTE_LED),
+       SND_PCI_QUIRK(0x103c, 0x844f, "HP ZBook Studio G5", CXT_FIXUP_HP_ZBOOK_MUTE_LED),
        SND_PCI_QUIRK(0x103c, 0x8455, "HP Z2 G4", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x8456, "HP Z2 G4 SFF", CXT_FIXUP_HP_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x103c, 0x8457, "HP Z2 G4 mini", CXT_FIXUP_HP_MIC_NO_PRESENCE),
@@ -956,6 +984,7 @@ static const struct hda_model_fixup cxt5066_fixup_models[] = {
        { .id = CXT_FIXUP_MUTE_LED_EAPD, .name = "mute-led-eapd" },
        { .id = CXT_FIXUP_HP_DOCK, .name = "hp-dock" },
        { .id = CXT_FIXUP_MUTE_LED_GPIO, .name = "mute-led-gpio" },
+       { .id = CXT_FIXUP_HP_ZBOOK_MUTE_LED, .name = "hp-zbook-mute-led" },
        { .id = CXT_FIXUP_HP_MIC_NO_PRESENCE, .name = "hp-mic-fix" },
        {}
 };
index e405be7..45ae845 100644 (file)
@@ -157,6 +157,7 @@ struct hdmi_spec {
 
        bool dyn_pin_out;
        bool dyn_pcm_assign;
+       bool dyn_pcm_no_legacy;
        bool intel_hsw_fixup;   /* apply Intel platform-specific fixups */
        /*
         * Non-generic VIA/NVIDIA specific
@@ -1345,6 +1346,12 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
 {
        int i;
 
+       /* on the new machines, try to assign the pcm slot dynamically,
+        * not use the preferred fixed map (legacy way) anymore.
+        */
+       if (spec->dyn_pcm_no_legacy)
+               goto last_try;
+
        /*
         * generic_hdmi_build_pcms() may allocate extra PCMs on some
         * platforms (with maximum of 'num_nids + dev_num - 1')
@@ -1374,6 +1381,7 @@ static int hdmi_find_pcm_slot(struct hdmi_spec *spec,
                        return i;
        }
 
+ last_try:
        /* the last try; check the empty slots in pins */
        for (i = 0; i < spec->num_nids; i++) {
                if (!test_bit(i, &spec->pcm_bitmap))
@@ -2472,6 +2480,18 @@ static void generic_hdmi_free(struct hda_codec *codec)
 }
 
 #ifdef CONFIG_PM
+static int generic_hdmi_suspend(struct hda_codec *codec)
+{
+       struct hdmi_spec *spec = codec->spec;
+       int pin_idx;
+
+       for (pin_idx = 0; pin_idx < spec->num_pins; pin_idx++) {
+               struct hdmi_spec_per_pin *per_pin = get_pin(spec, pin_idx);
+               cancel_delayed_work_sync(&per_pin->work);
+       }
+       return 0;
+}
+
 static int generic_hdmi_resume(struct hda_codec *codec)
 {
        struct hdmi_spec *spec = codec->spec;
@@ -2495,6 +2515,7 @@ static const struct hda_codec_ops generic_hdmi_patch_ops = {
        .build_controls         = generic_hdmi_build_controls,
        .unsol_event            = hdmi_unsol_event,
 #ifdef CONFIG_PM
+       .suspend                = generic_hdmi_suspend,
        .resume                 = generic_hdmi_resume,
 #endif
 };
@@ -2987,8 +3008,16 @@ static int patch_i915_tgl_hdmi(struct hda_codec *codec)
         * the index indicate the port number.
         */
        static const int map[] = {0x4, 0x6, 0x8, 0xa, 0xb, 0xc, 0xd, 0xe, 0xf};
+       int ret;
 
-       return intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+       ret = intel_hsw_common_init(codec, 0x02, map, ARRAY_SIZE(map));
+       if (!ret) {
+               struct hdmi_spec *spec = codec->spec;
+
+               spec->dyn_pcm_no_legacy = true;
+       }
+
+       return ret;
 }
 
 /* Intel Baytrail and Braswell; with eld notifier */
index 1927605..a7544b7 100644 (file)
@@ -2532,6 +2532,7 @@ static const struct snd_pci_quirk alc882_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1462, 0x1276, "MSI-GL73", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x1293, "MSI-GP65", ALC1220_FIXUP_CLEVO_P950),
        SND_PCI_QUIRK(0x1462, 0x7350, "MSI-7350", ALC889_FIXUP_CD),
+       SND_PCI_QUIRK(0x1462, 0xcc34, "MSI Godlike X570", ALC1220_FIXUP_GB_DUAL_CODECS),
        SND_PCI_QUIRK(0x1462, 0xda57, "MSI Z270-Gaming", ALC1220_FIXUP_GB_DUAL_CODECS),
        SND_PCI_QUIRK_VENDOR(0x1462, "MSI", ALC882_FIXUP_GPIO3),
        SND_PCI_QUIRK(0x147b, 0x107a, "Abit AW9D-MAX", ALC882_FIXUP_ABIT_AW9D_MAX),
@@ -3926,6 +3927,15 @@ static void alc271_fixup_dmic(struct hda_codec *codec,
                snd_hda_sequence_write(codec, verbs);
 }
 
+/* Fix the speaker amp after resume, etc */
+static void alc269vb_fixup_aspire_e1_coef(struct hda_codec *codec,
+                                         const struct hda_fixup *fix,
+                                         int action)
+{
+       if (action == HDA_FIXUP_ACT_INIT)
+               alc_update_coef_idx(codec, 0x0d, 0x6000, 0x6000);
+}
+
 static void alc269_fixup_pcm_44k(struct hda_codec *codec,
                                 const struct hda_fixup *fix, int action)
 {
@@ -4224,6 +4234,12 @@ static void alc_fixup_hp_gpio_led(struct hda_codec *codec,
        }
 }
 
+static void alc236_fixup_hp_gpio_led(struct hda_codec *codec,
+                               const struct hda_fixup *fix, int action)
+{
+       alc_fixup_hp_gpio_led(codec, action, 0x02, 0x01);
+}
+
 static void alc269_fixup_hp_gpio_led(struct hda_codec *codec,
                                const struct hda_fixup *fix, int action)
 {
@@ -5249,7 +5265,7 @@ static void alc_determine_headset_type(struct hda_codec *codec)
        case 0x10ec0274:
        case 0x10ec0294:
                alc_process_coef_fw(codec, coef0274);
-               msleep(80);
+               msleep(850);
                val = alc_read_coef_idx(codec, 0x46);
                is_ctia = (val & 0x00f0) == 0x00f0;
                break;
@@ -5433,6 +5449,7 @@ static void alc_update_headset_jack_cb(struct hda_codec *codec,
                                       struct hda_jack_callback *jack)
 {
        snd_hda_gen_hp_automute(codec, jack);
+       alc_update_headset_mode(codec);
 }
 
 static void alc_probe_headset_mode(struct hda_codec *codec)
@@ -6293,6 +6310,7 @@ enum {
        ALC283_FIXUP_HEADSET_MIC,
        ALC255_FIXUP_MIC_MUTE_LED,
        ALC282_FIXUP_ASPIRE_V5_PINS,
+       ALC269VB_FIXUP_ASPIRE_E1_COEF,
        ALC280_FIXUP_HP_GPIO4,
        ALC286_FIXUP_HP_GPIO_LED,
        ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY,
@@ -6380,6 +6398,7 @@ enum {
        ALC294_FIXUP_ASUS_GX502_VERBS,
        ALC285_FIXUP_HP_GPIO_LED,
        ALC285_FIXUP_HP_MUTE_LED,
+       ALC236_FIXUP_HP_GPIO_LED,
        ALC236_FIXUP_HP_MUTE_LED,
        ALC298_FIXUP_SAMSUNG_HEADPHONE_VERY_QUIET,
        ALC295_FIXUP_ASUS_MIC_NO_PRESENCE,
@@ -6396,6 +6415,7 @@ enum {
        ALC269_FIXUP_LEMOTE_A1802,
        ALC269_FIXUP_LEMOTE_A190X,
        ALC256_FIXUP_INTEL_NUC8_RUGGED,
+       ALC256_FIXUP_INTEL_NUC10,
        ALC255_FIXUP_XIAOMI_HEADSET_MIC,
        ALC274_FIXUP_HP_MIC,
        ALC274_FIXUP_HP_HEADSET_MIC,
@@ -6406,6 +6426,7 @@ enum {
        ALC236_FIXUP_DELL_AIO_HEADSET_MIC,
        ALC282_FIXUP_ACER_DISABLE_LINEOUT,
        ALC255_FIXUP_ACER_LIMIT_INT_MIC_BOOST,
+       ALC256_FIXUP_ACER_HEADSET_MIC,
 };
 
 static const struct hda_fixup alc269_fixups[] = {
@@ -6968,6 +6989,10 @@ static const struct hda_fixup alc269_fixups[] = {
                        { },
                },
        },
+       [ALC269VB_FIXUP_ASPIRE_E1_COEF] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc269vb_fixup_aspire_e1_coef,
+       },
        [ALC280_FIXUP_HP_GPIO4] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc280_fixup_hp_gpio4,
@@ -7613,6 +7638,10 @@ static const struct hda_fixup alc269_fixups[] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc285_fixup_hp_mute_led,
        },
+       [ALC236_FIXUP_HP_GPIO_LED] = {
+               .type = HDA_FIXUP_FUNC,
+               .v.func = alc236_fixup_hp_gpio_led,
+       },
        [ALC236_FIXUP_HP_MUTE_LED] = {
                .type = HDA_FIXUP_FUNC,
                .v.func = alc236_fixup_hp_mute_led,
@@ -7782,6 +7811,15 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC269_FIXUP_HEADSET_MODE
        },
+       [ALC256_FIXUP_INTEL_NUC10] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x01a1913c }, /* use as headset mic, without its own jack detect */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE
+       },
        [ALC255_FIXUP_XIAOMI_HEADSET_MIC] = {
                .type = HDA_FIXUP_VERBS,
                .v.verbs = (const struct hda_verb[]) {
@@ -7853,6 +7891,16 @@ static const struct hda_fixup alc269_fixups[] = {
                .chained = true,
                .chain_id = ALC255_FIXUP_ACER_MIC_NO_PRESENCE,
        },
+       [ALC256_FIXUP_ACER_HEADSET_MIC] = {
+               .type = HDA_FIXUP_PINS,
+               .v.pins = (const struct hda_pintbl[]) {
+                       { 0x19, 0x02a1113c }, /* use as headset mic, without its own jack detect */
+                       { 0x1a, 0x90a1092f }, /* use as internal mic */
+                       { }
+               },
+               .chained = true,
+               .chain_id = ALC269_FIXUP_HEADSET_MODE_NO_HP_MIC
+       },
 };
 
 static const struct snd_pci_quirk alc269_fixup_tbl[] = {
@@ -7867,6 +7915,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x0762, "Acer Aspire E1-472", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
        SND_PCI_QUIRK(0x1025, 0x0775, "Acer Aspire E1-572", ALC271_FIXUP_HP_GATE_MIC_JACK_E1_572),
        SND_PCI_QUIRK(0x1025, 0x079b, "Acer Aspire V5-573G", ALC282_FIXUP_ASPIRE_V5_PINS),
+       SND_PCI_QUIRK(0x1025, 0x0840, "Acer Aspire E1", ALC269VB_FIXUP_ASPIRE_E1_COEF),
        SND_PCI_QUIRK(0x1025, 0x101c, "Acer Veriton N2510G", ALC269_FIXUP_LIFEBOOK),
        SND_PCI_QUIRK(0x1025, 0x102b, "Acer Aspire C24-860", ALC286_FIXUP_ACER_AIO_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1025, 0x1065, "Acer Aspire C20-820", ALC269VC_FIXUP_ACER_HEADSET_MIC),
@@ -7879,9 +7928,11 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1025, 0x1246, "Acer Predator Helios 500", ALC299_FIXUP_PREDATOR_SPK),
        SND_PCI_QUIRK(0x1025, 0x1247, "Acer vCopperbox", ALC269VC_FIXUP_ACER_VCOPPERBOX_PINS),
        SND_PCI_QUIRK(0x1025, 0x1248, "Acer Veriton N4660G", ALC269VC_FIXUP_ACER_MIC_NO_PRESENCE),
+       SND_PCI_QUIRK(0x1025, 0x1269, "Acer SWIFT SF314-54", ALC256_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x128f, "Acer Veriton Z6860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1290, "Acer Veriton Z4860G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1291, "Acer Veriton Z4660G", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1025, 0x129c, "Acer SWIFT SF314-55", ALC256_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1308, "Acer Aspire Z24-890", ALC286_FIXUP_ACER_AIO_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x132a, "Acer TravelMate B114-21", ALC233_FIXUP_ACER_HEADSET_MIC),
        SND_PCI_QUIRK(0x1025, 0x1330, "Acer TravelMate X514-51T", ALC255_FIXUP_ACER_HEADSET_MIC),
@@ -8021,9 +8072,13 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x103c, 0x8783, "HP ZBook Fury 15 G7 Mobile Workstation",
                      ALC285_FIXUP_HP_GPIO_AMP_INIT),
        SND_PCI_QUIRK(0x103c, 0x87c8, "HP", ALC287_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x87e5, "HP ProBook 440 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x87f2, "HP ProBook 640 G8 Notebook PC", ALC236_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x87f4, "HP", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x87f5, "HP", ALC287_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x103c, 0x87f7, "HP Spectre x360 14", ALC245_FIXUP_HP_X360_AMP),
+       SND_PCI_QUIRK(0x103c, 0x8846, "HP EliteBook 850 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
+       SND_PCI_QUIRK(0x103c, 0x884c, "HP EliteBook 840 G8 Notebook PC", ALC285_FIXUP_HP_GPIO_LED),
        SND_PCI_QUIRK(0x1043, 0x103e, "ASUS X540SA", ALC256_FIXUP_ASUS_MIC),
        SND_PCI_QUIRK(0x1043, 0x103f, "ASUS TX300", ALC282_FIXUP_ASUS_TX300),
        SND_PCI_QUIRK(0x1043, 0x106d, "Asus K53BE", ALC269_FIXUP_LIMIT_INT_MIC_BOOST),
@@ -8128,6 +8183,7 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1558, 0x8551, "System76 Gazelle (gaze14)", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8560, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
        SND_PCI_QUIRK(0x1558, 0x8561, "System76 Gazelle (gaze14)", ALC269_FIXUP_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1558, 0x8562, "Clevo NH[5|7][0-9]RZ[Q]", ALC269_FIXUP_DMIC),
        SND_PCI_QUIRK(0x1558, 0x8668, "Clevo NP50B[BE]", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8680, "Clevo NJ50LU", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1558, 0x8686, "Clevo NH50[CZ]U", ALC293_FIXUP_SYSTEM76_MIC_NO_PRESENCE),
@@ -8217,11 +8273,14 @@ static const struct snd_pci_quirk alc269_fixup_tbl[] = {
        SND_PCI_QUIRK(0x1b35, 0x1237, "CZC L101", ALC269_FIXUP_CZC_L101),
        SND_PCI_QUIRK(0x1b7d, 0xa831, "Ordissimo EVE2 ", ALC269VB_FIXUP_ORDISSIMO_EVE2), /* Also known as Malata PC-B1303 */
        SND_PCI_QUIRK(0x1d72, 0x1602, "RedmiBook", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1d72, 0x1701, "XiaomiNotebook Pro", ALC298_FIXUP_DELL1_MIC_NO_PRESENCE),
        SND_PCI_QUIRK(0x1d72, 0x1901, "RedmiBook 14", ALC256_FIXUP_ASUS_HEADSET_MIC),
+       SND_PCI_QUIRK(0x1d72, 0x1947, "RedmiBook Air", ALC255_FIXUP_XIAOMI_HEADSET_MIC),
        SND_PCI_QUIRK(0x10ec, 0x118c, "Medion EE4254 MD62100", ALC256_FIXUP_MEDION_HEADSET_NO_PRESENCE),
        SND_PCI_QUIRK(0x1c06, 0x2013, "Lemote A1802", ALC269_FIXUP_LEMOTE_A1802),
        SND_PCI_QUIRK(0x1c06, 0x2015, "Lemote A190X", ALC269_FIXUP_LEMOTE_A190X),
        SND_PCI_QUIRK(0x8086, 0x2080, "Intel NUC 8 Rugged", ALC256_FIXUP_INTEL_NUC8_RUGGED),
+       SND_PCI_QUIRK(0x8086, 0x2081, "Intel NUC 10", ALC256_FIXUP_INTEL_NUC10),
 
 #if 0
        /* Below is a quirk table taken from the old code.
@@ -8351,6 +8410,7 @@ static const struct hda_model_fixup alc269_fixup_models[] = {
        {.id = ALC283_FIXUP_HEADSET_MIC, .name = "alc283-headset"},
        {.id = ALC255_FIXUP_MIC_MUTE_LED, .name = "alc255-dell-mute"},
        {.id = ALC282_FIXUP_ASPIRE_V5_PINS, .name = "aspire-v5"},
+       {.id = ALC269VB_FIXUP_ASPIRE_E1_COEF, .name = "aspire-e1-coef"},
        {.id = ALC280_FIXUP_HP_GPIO4, .name = "hp-gpio4"},
        {.id = ALC286_FIXUP_HP_GPIO_LED, .name = "hp-gpio-led"},
        {.id = ALC280_FIXUP_HP_GPIO2_MIC_HOTKEY, .name = "hp-gpio2-hotkey"},
index f814dbb..d54cd51 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("ICEnsemble ICE1712 (Envy24)");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{"
-              HOONTECH_DEVICE_DESC
-              DELTA_DEVICE_DESC
-              EWS_DEVICE_DESC
-              "{ICEnsemble,Generic ICE1712},"
-              "{ICEnsemble,Generic Envy24}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index c0fca94..ef2367d 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("VIA ICEnsemble ICE1724/1720 (Envy24HT/PT)");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{"
-              REVO_DEVICE_DESC
-              AMP_AUDIO2000_DEVICE_DESC
-              AUREON_DEVICE_DESC
-              VT1720_MOBO_DEVICE_DESC
-              PONTIS_DEVICE_DESC
-              PRODIGY192_DEVICE_DESC
-              PRODIGY_HIFI_DEVICE_DESC
-              JULI_DEVICE_DESC
-              MAYA44_DEVICE_DESC
-              PHASE_DEVICE_DESC
-              WTM_DEVICE_DESC
-              SE_DEVICE_DESC
-              QTET_DEVICE_DESC
-               "{VIA,VT1720},"
-               "{VIA,VT1724},"
-               "{ICEnsemble,Generic ICE1724},"
-               "{ICEnsemble,Generic Envy24HT}"
-               "{ICEnsemble,Generic Envy24PT}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 3349e45..35903d1 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; SiS 7012; Ali 5455");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Intel,82801AA-ICH},"
-               "{Intel,82901AB-ICH0},"
-               "{Intel,82801BA-ICH2},"
-               "{Intel,82801CA-ICH3},"
-               "{Intel,82801DB-ICH4},"
-               "{Intel,ICH5},"
-               "{Intel,ICH6},"
-               "{Intel,ICH7},"
-               "{Intel,6300ESB},"
-               "{Intel,ESB2},"
-               "{Intel,MX440},"
-               "{SiS,SI7012},"
-               "{NVidia,nForce Audio},"
-               "{NVidia,nForce2 Audio},"
-               "{NVidia,nForce3 Audio},"
-               "{NVidia,MCP04},"
-               "{NVidia,MCP501},"
-               "{NVidia,CK804},"
-               "{NVidia,CK8},"
-               "{NVidia,CK8S},"
-               "{AMD,AMD768},"
-               "{AMD,AMD8111},"
-               "{ALI,M5455}}");
 
 static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index 19872ce..13ef838 100644 (file)
@@ -25,21 +25,6 @@ MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Intel 82801AA,82901AB,i810,i820,i830,i840,i845,MX440; "
                   "SiS 7013; NVidia MCP/2/2S/3 modems");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Intel,82801AA-ICH},"
-               "{Intel,82901AB-ICH0},"
-               "{Intel,82801BA-ICH2},"
-               "{Intel,82801CA-ICH3},"
-               "{Intel,82801DB-ICH4},"
-               "{Intel,ICH5},"
-               "{Intel,ICH6},"
-               "{Intel,ICH7},"
-               "{Intel,MX440},"
-               "{SiS,7013},"
-               "{NVidia,NForce Modem},"
-               "{NVidia,NForce2 Modem},"
-               "{NVidia,NForce2s Modem},"
-               "{NVidia,NForce3 Modem},"
-               "{AMD,AMD768}}");
 
 static int index = -2; /* Exclude the first card */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index 2eddd9d..80ac3c6 100644 (file)
@@ -388,7 +388,6 @@ struct snd_korg1212 {
 
 MODULE_DESCRIPTION("korg1212");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{KORG,korg1212}}");
 MODULE_FIRMWARE("korg/k1212.dsp");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
index 491c90f..03b4be4 100644 (file)
@@ -54,7 +54,6 @@ MODULE_PARM_DESC(sample_rate_min, "Minimal sample rate");
  */
 
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Digigram, Lola}}");
 MODULE_DESCRIPTION("Digigram Lola driver");
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 
index b92ea07..1be97c3 100644 (file)
@@ -21,8 +21,6 @@
 MODULE_AUTHOR("Tim Blechmann");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("digigram lx6464es");
-MODULE_SUPPORTED_DEVICE("{digigram lx6464es{}}");
-
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
index d2c2cd6..cdc4b61 100644 (file)
 MODULE_AUTHOR("Zach Brown <zab@zabbo.net>, Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("ESS Maestro3 PCI");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{ESS,Maestro3 PCI},"
-               "{ESS,ES1988},"
-               "{ESS,Allegro PCI},"
-               "{ESS,Allegro-1 PCI},"
-               "{ESS,Canyon3D-2/LE PCI}}");
 MODULE_FIRMWARE("ess/maestro3_assp_kernel.fw");
 MODULE_FIRMWARE("ess/maestro3_assp_minisrc.fw");
 
index efff220..a0bbb38 100644 (file)
@@ -32,7 +32,6 @@
 MODULE_AUTHOR("Digigram <alsa@digigram.com>");
 MODULE_DESCRIPTION("Digigram " CARD_NAME);
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Digigram," CARD_NAME "}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;             /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;              /* ID for this card */
index 9759946..6cb689a 100644 (file)
@@ -32,8 +32,6 @@
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("NeoMagic NM256AV/ZX");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{NeoMagic,NM256AV},"
-               "{NeoMagic,NM256ZX}}");
 
 /*
  * some compile conditions.
index a751fcc..e335c4b 100644 (file)
@@ -56,9 +56,6 @@
 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_DESCRIPTION("C-Media CMI8788 driver");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{C-Media,CMI8786}"
-                       ",{C-Media,CMI8787}"
-                       ",{C-Media,CMI8788}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
index 78c35a0..434f885 100644 (file)
@@ -29,7 +29,6 @@
 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_DESCRIPTION("Studio Evolution SE6X driver");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{Studio Evolution,SE6X}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
index 98ab163..baa3244 100644 (file)
@@ -16,7 +16,6 @@
 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_DESCRIPTION("Asus Virtuoso driver");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{Asus,AV66},{Asus,AV100},{Asus,AV200}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;
index c2e4831..751f974 100644 (file)
@@ -35,7 +35,6 @@ MODULE_AUTHOR("Markus Bollinger <bollinger@digigram.com>, "
              "Marc Titinger <titinger@digigram.com>");
 MODULE_DESCRIPTION("Digigram " DRIVER_NAME " " PCXHR_DRIVER_VERSION_STRING);
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Digigram," DRIVER_NAME "}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index fcc2073..56827db 100644 (file)
 MODULE_AUTHOR("Peter Gruber <nokos@gmx.net>");
 MODULE_DESCRIPTION("riptide");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Conexant,Riptide}}");
 MODULE_FIRMWARE("riptide.hex");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;
index 4eabece..54f3e39 100644 (file)
@@ -88,7 +88,6 @@ MODULE_PARM_DESC(fullduplex, "Support full-duplex mode.");
 MODULE_AUTHOR("Martin Langer <martin-langer@gmx.de>, Pilo Chambert <pilo.c@wanadoo.fr>");
 MODULE_DESCRIPTION("RME Digi32, Digi32/8, Digi32 PRO");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{RME,Digi32}," "{RME,Digi32/8}," "{RME,Digi32 PRO}}");
 
 /* Defines for RME Digi32 series */
 #define RME32_SPDIF_NCHANNELS 2
index 84eef6a..66082e9 100644 (file)
@@ -31,11 +31,6 @@ MODULE_AUTHOR("Anders Torger <torger@ludd.luth.se>");
 MODULE_DESCRIPTION("RME Digi96, Digi96/8, Digi96/8 PRO, Digi96/8 PST, "
                   "Digi96/8 PAD");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{RME,Digi96},"
-               "{RME,Digi96/8},"
-               "{RME,Digi96/8 PRO},"
-               "{RME,Digi96/8 PST},"
-               "{RME,Digi96/8 PAD}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 6d90293..4cf879c 100644 (file)
@@ -44,9 +44,6 @@ MODULE_PARM_DESC(enable, "Enable/disable specific Hammerfall DSP soundcards.");
 MODULE_AUTHOR("Paul Davis <paul@linuxaudiosystems.com>, Marcus Andersson, Thomas Charbonnel <thomas@undata.org>");
 MODULE_DESCRIPTION("RME Hammerfall DSP");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{RME Hammerfall-DSP},"
-               "{RME HDSP-9652},"
-               "{RME HDSP-9632}}");
 MODULE_FIRMWARE("rpm_firmware.bin");
 MODULE_FIRMWARE("multiface_firmware.bin");
 MODULE_FIRMWARE("multiface_firmware_rev11.bin");
index b667115..8d900c1 100644 (file)
@@ -165,7 +165,6 @@ MODULE_AUTHOR
 );
 MODULE_DESCRIPTION("RME HDSPM");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{RME HDSPM-MADI}}");
 
 /* --- Write registers. ---
   These are defined as byte-offsets from the iobase value.  */
index 012fbec..4df992e 100644 (file)
@@ -39,8 +39,6 @@ MODULE_PARM_DESC(precise_ptr, "Enable precise pointer (doesn't work reliably).")
 MODULE_AUTHOR("Paul Davis <pbd@op.net>, Winfried Ritsch");
 MODULE_DESCRIPTION("RME Digi9652/Digi9636");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{RME,Hammerfall},"
-               "{RME,Hammerfall-Light}}");
 
 /* The Hammerfall has two sets of 24 ADAT + 2 S/PDIF channels, one for
    capture, one for playback. Both the ADAT and S/PDIF channels appear
index 8ffa2f5..00ab51c 100644 (file)
@@ -24,7 +24,6 @@
 MODULE_AUTHOR("David Dillow <dave@thedillows.org>");
 MODULE_DESCRIPTION("SiS7019");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{SiS,SiS7019 Audio Accelerator}}");
 
 static int index = SNDRV_DEFAULT_IDX1; /* Index 0-MAX */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index 26fd1d0..7de1099 100644 (file)
@@ -29,7 +29,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("S3 SonicVibes PCI");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{S3,SonicVibes PCI}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_JOYSTICK 1
index 5bc79da..a510412 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>, <audio@tridentmicro.com>");
 MODULE_DESCRIPTION("Trident 4D-WaveDX/NX & SiS SI7018");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Trident,4DWave DX},"
-               "{Trident,4DWave NX},"
-               "{SiS,SI7018 PCI Audio},"
-               "{Best Union,Miss Melody 4DWave PCI},"
-               "{HIS,4DWave PCI},"
-               "{Warpspeed,ONSpeed 4DWave PCI},"
-               "{Aztech Systems,PCI 64-Q3D},"
-               "{Addonics,SV 750},"
-               "{CHIC,True Sound 4Dwave},"
-               "{Shark,Predator4D-PCI},"
-               "{Jaton,SonicWave 4D},"
-               "{Hoontech,SoundTrack Digital 4DWave NX}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 154d88c..fd1f2f9 100644 (file)
@@ -56,7 +56,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("VIA VT82xx audio");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{VIA,VT82C686A/B/C,pci},{VIA,VT8233A/C,8235}}");
 
 #if IS_REACHABLE(CONFIG_GAMEPORT)
 #define SUPPORT_JOYSTICK 1
index addfa19..3025330 100644 (file)
@@ -38,7 +38,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("VIA VT82xx modem");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{VIA,VT82C686A/B/C modem,pci}}");
 
 static int index = -2; /* Exclude the first card */
 static char *id = SNDRV_DEFAULT_STR1;  /* ID for this card */
index f7800ed..2a9e1a7 100644 (file)
@@ -20,7 +20,6 @@
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("Digigram VX222 V2/Mic");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Digigram," CARD_NAME "}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 9b0d18a..99be149 100644 (file)
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Yamaha DS-1 PCI");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Yamaha,YMF724},"
-               "{Yamaha,YMF724F},"
-               "{Yamaha,YMF740},"
-               "{Yamaha,YMF740C},"
-               "{Yamaha,YMF744},"
-               "{Yamaha,YMF754}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 27d9da6..1445823 100644 (file)
@@ -22,7 +22,6 @@
 MODULE_AUTHOR("Jaroslav Kysela <perex@perex.cz>");
 MODULE_DESCRIPTION("Sound Core " CARD_NAME);
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Sound Core," CARD_NAME "}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index afd30a9..6363204 100644 (file)
 #include <sound/initval.h>
 #include <sound/tlv.h>
 
-/*
- */
-
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("Digigram VXPocket");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Digigram,VXPocket},{Digigram,VXPocket440}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 96ef550..9fb51eb 100644 (file)
@@ -18,7 +18,6 @@
 #define CHIP_NAME "PMac"
 
 MODULE_DESCRIPTION("PowerMac");
-MODULE_SUPPORTED_DEVICE("{{Apple,PowerMac}}");
 MODULE_LICENSE("GPL");
 
 static int index = SNDRV_DEFAULT_IDX1;         /* Index 0-MAX */
index 8fa6843..6e9d6bd 100644 (file)
@@ -32,7 +32,6 @@
 MODULE_AUTHOR("Adrian McMenamin <adrian@mcmen.demon.co.uk>");
 MODULE_DESCRIPTION("Dreamcast AICA sound (pcm) driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Yamaha/SEGA, AICA}}");
 MODULE_FIRMWARE("aica_firmware.bin");
 
 /* module parameters */
index feb2850..8ebd972 100644 (file)
@@ -25,7 +25,6 @@
 MODULE_AUTHOR("Rafael Ignacio Zurita <rizurita@yahoo.com>");
 MODULE_DESCRIPTION("SuperH DAC audio driver");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{SuperH DAC audio support}}");
 
 /* Module Parameters */
 static int index = SNDRV_DEFAULT_IDX1;
index 6e634b4..aa16a23 100644 (file)
@@ -1348,8 +1348,10 @@ static int cygnus_ssp_probe(struct platform_device *pdev)
                                        &cygnus_ssp_dai[active_port_count]);
 
                /* negative is err, 0 is active and good, 1 is disabled */
-               if (err < 0)
+               if (err < 0) {
+                       of_node_put(child_node);
                        return err;
+               }
                else if (!err) {
                        dev_dbg(dev, "Activating DAI: %s\n",
                                cygnus_ssp_dai[active_port_count].name);
index e4cf14e..1c87b42 100644 (file)
@@ -186,7 +186,6 @@ config SND_SOC_ALL_CODECS
        imply SND_SOC_SI476X
        imply SND_SOC_SIMPLE_AMPLIFIER
        imply SND_SOC_SIMPLE_MUX
-       imply SND_SOC_SIRF_AUDIO_CODEC
        imply SND_SOC_SPDIF
        imply SND_SOC_SSM2305
        imply SND_SOC_SSM2518
@@ -1279,10 +1278,6 @@ config SND_SOC_SIMPLE_MUX
        tristate "Simple Audio Mux"
        select GPIOLIB
 
-config SND_SOC_SIRF_AUDIO_CODEC
-       tristate "SiRF SoC internal audio codec"
-       select REGMAP_MMIO
-
 config SND_SOC_SPDIF
        tristate "S/PDIF CODEC"
 
index 472caad..85a1d00 100644 (file)
@@ -812,6 +812,7 @@ static const struct of_device_id ak4458_of_match[] = {
        { .compatible = "asahi-kasei,ak4497", .data = &ak4497_drvdata},
        { },
 };
+MODULE_DEVICE_TABLE(of, ak4458_of_match);
 
 static struct i2c_driver ak4458_i2c_driver = {
        .driver = {
index 8a32b01..85bdd05 100644 (file)
@@ -419,6 +419,7 @@ static const struct of_device_id ak5558_i2c_dt_ids[] __maybe_unused = {
        { .compatible = "asahi-kasei,ak5558"},
        { }
 };
+MODULE_DEVICE_TABLE(of, ak5558_i2c_dt_ids);
 
 static struct i2c_driver ak5558_i2c_driver = {
        .driver = {
index 210fcbe..811b7b1 100644 (file)
@@ -401,7 +401,7 @@ static const struct regmap_config cs42l42_regmap = {
 };
 
 static DECLARE_TLV_DB_SCALE(adc_tlv, -9600, 100, false);
-static DECLARE_TLV_DB_SCALE(mixer_tlv, -6200, 100, false);
+static DECLARE_TLV_DB_SCALE(mixer_tlv, -6300, 100, true);
 
 static const char * const cs42l42_hpf_freq_text[] = {
        "1.86Hz", "120Hz", "235Hz", "466Hz"
@@ -458,7 +458,7 @@ static const struct snd_kcontrol_new cs42l42_snd_controls[] = {
                                CS42L42_DAC_HPF_EN_SHIFT, true, false),
        SOC_DOUBLE_R_TLV("Mixer Volume", CS42L42_MIXER_CHA_VOL,
                         CS42L42_MIXER_CHB_VOL, CS42L42_MIXER_CH_VOL_SHIFT,
-                               0x3e, 1, mixer_tlv)
+                               0x3f, 1, mixer_tlv)
 };
 
 static int cs42l42_hpdrv_evt(struct snd_soc_dapm_widget *w,
@@ -511,43 +511,6 @@ static const struct snd_soc_dapm_route cs42l42_audio_map[] = {
        {"HP", NULL, "HPDRV"}
 };
 
-static int cs42l42_set_bias_level(struct snd_soc_component *component,
-                                       enum snd_soc_bias_level level)
-{
-       struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component);
-       int ret;
-
-       switch (level) {
-       case SND_SOC_BIAS_ON:
-               break;
-       case SND_SOC_BIAS_PREPARE:
-               break;
-       case SND_SOC_BIAS_STANDBY:
-               if (snd_soc_component_get_bias_level(component) == SND_SOC_BIAS_OFF) {
-                       regcache_cache_only(cs42l42->regmap, false);
-                       regcache_sync(cs42l42->regmap);
-                       ret = regulator_bulk_enable(
-                                               ARRAY_SIZE(cs42l42->supplies),
-                                               cs42l42->supplies);
-                       if (ret != 0) {
-                               dev_err(component->dev,
-                                       "Failed to enable regulators: %d\n",
-                                       ret);
-                               return ret;
-                       }
-               }
-               break;
-       case SND_SOC_BIAS_OFF:
-
-               regcache_cache_only(cs42l42->regmap, true);
-               regulator_bulk_disable(ARRAY_SIZE(cs42l42->supplies),
-                                                   cs42l42->supplies);
-               break;
-       }
-
-       return 0;
-}
-
 static int cs42l42_component_probe(struct snd_soc_component *component)
 {
        struct cs42l42_private *cs42l42 =
@@ -560,7 +523,6 @@ static int cs42l42_component_probe(struct snd_soc_component *component)
 
 static const struct snd_soc_component_driver soc_component_dev_cs42l42 = {
        .probe                  = cs42l42_component_probe,
-       .set_bias_level         = cs42l42_set_bias_level,
        .dapm_widgets           = cs42l42_dapm_widgets,
        .num_dapm_widgets       = ARRAY_SIZE(cs42l42_dapm_widgets),
        .dapm_routes            = cs42l42_audio_map,
@@ -691,24 +653,6 @@ static int cs42l42_pll_config(struct snd_soc_component *component)
                                        CS42L42_CLK_OASRC_SEL_MASK,
                                        CS42L42_CLK_OASRC_SEL_12 <<
                                        CS42L42_CLK_OASRC_SEL_SHIFT);
-                       /* channel 1 on low LRCLK, 32 bit */
-                       snd_soc_component_update_bits(component,
-                                       CS42L42_ASP_RX_DAI0_CH1_AP_RES,
-                                       CS42L42_ASP_RX_CH_AP_MASK |
-                                       CS42L42_ASP_RX_CH_RES_MASK,
-                                       (CS42L42_ASP_RX_CH_AP_LOW <<
-                                       CS42L42_ASP_RX_CH_AP_SHIFT) |
-                                       (CS42L42_ASP_RX_CH_RES_32 <<
-                                       CS42L42_ASP_RX_CH_RES_SHIFT));
-                       /* Channel 2 on high LRCLK, 32 bit */
-                       snd_soc_component_update_bits(component,
-                                       CS42L42_ASP_RX_DAI0_CH2_AP_RES,
-                                       CS42L42_ASP_RX_CH_AP_MASK |
-                                       CS42L42_ASP_RX_CH_RES_MASK,
-                                       (CS42L42_ASP_RX_CH_AP_HI <<
-                                       CS42L42_ASP_RX_CH_AP_SHIFT) |
-                                       (CS42L42_ASP_RX_CH_RES_32 <<
-                                       CS42L42_ASP_RX_CH_RES_SHIFT));
                        if (pll_ratio_table[i].mclk_src_sel == 0) {
                                /* Pass the clock straight through */
                                snd_soc_component_update_bits(component,
@@ -797,27 +741,23 @@ static int cs42l42_set_dai_fmt(struct snd_soc_dai *codec_dai, unsigned int fmt)
        /* Bitclock/frame inversion */
        switch (fmt & SND_SOC_DAIFMT_INV_MASK) {
        case SND_SOC_DAIFMT_NB_NF:
+               asp_cfg_val |= CS42L42_ASP_SCPOL_NOR << CS42L42_ASP_SCPOL_SHIFT;
                break;
        case SND_SOC_DAIFMT_NB_IF:
-               asp_cfg_val |= CS42L42_ASP_POL_INV <<
-                               CS42L42_ASP_LCPOL_IN_SHIFT;
+               asp_cfg_val |= CS42L42_ASP_SCPOL_NOR << CS42L42_ASP_SCPOL_SHIFT;
+               asp_cfg_val |= CS42L42_ASP_LCPOL_INV << CS42L42_ASP_LCPOL_SHIFT;
                break;
        case SND_SOC_DAIFMT_IB_NF:
-               asp_cfg_val |= CS42L42_ASP_POL_INV <<
-                               CS42L42_ASP_SCPOL_IN_DAC_SHIFT;
                break;
        case SND_SOC_DAIFMT_IB_IF:
-               asp_cfg_val |= CS42L42_ASP_POL_INV <<
-                               CS42L42_ASP_LCPOL_IN_SHIFT;
-               asp_cfg_val |= CS42L42_ASP_POL_INV <<
-                               CS42L42_ASP_SCPOL_IN_DAC_SHIFT;
+               asp_cfg_val |= CS42L42_ASP_LCPOL_INV << CS42L42_ASP_LCPOL_SHIFT;
                break;
        }
 
-       snd_soc_component_update_bits(component, CS42L42_ASP_CLK_CFG,
-                               CS42L42_ASP_MODE_MASK |
-                               CS42L42_ASP_SCPOL_IN_DAC_MASK |
-                               CS42L42_ASP_LCPOL_IN_MASK, asp_cfg_val);
+       snd_soc_component_update_bits(component, CS42L42_ASP_CLK_CFG, CS42L42_ASP_MODE_MASK |
+                                                                     CS42L42_ASP_SCPOL_MASK |
+                                                                     CS42L42_ASP_LCPOL_MASK,
+                                                                     asp_cfg_val);
 
        return 0;
 }
@@ -828,14 +768,29 @@ static int cs42l42_pcm_hw_params(struct snd_pcm_substream *substream,
 {
        struct snd_soc_component *component = dai->component;
        struct cs42l42_private *cs42l42 = snd_soc_component_get_drvdata(component);
-       int retval;
+       unsigned int width = (params_width(params) / 8) - 1;
+       unsigned int val = 0;
 
        cs42l42->srate = params_rate(params);
-       cs42l42->swidth = params_width(params);
 
-       retval = cs42l42_pll_config(component);
+       switch(substream->stream) {
+       case SNDRV_PCM_STREAM_PLAYBACK:
+               val |= width << CS42L42_ASP_RX_CH_RES_SHIFT;
+               /* channel 1 on low LRCLK */
+               snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH1_AP_RES,
+                                                        CS42L42_ASP_RX_CH_AP_MASK |
+                                                        CS42L42_ASP_RX_CH_RES_MASK, val);
+               /* Channel 2 on high LRCLK */
+               val |= CS42L42_ASP_RX_CH_AP_HI << CS42L42_ASP_RX_CH_AP_SHIFT;
+               snd_soc_component_update_bits(component, CS42L42_ASP_RX_DAI0_CH2_AP_RES,
+                                                        CS42L42_ASP_RX_CH_AP_MASK |
+                                                        CS42L42_ASP_RX_CH_RES_MASK, val);
+               break;
+       default:
+               break;
+       }
 
-       return retval;
+       return cs42l42_pll_config(component);
 }
 
 static int cs42l42_set_sysclk(struct snd_soc_dai *dai,
@@ -900,9 +855,9 @@ static int cs42l42_mute(struct snd_soc_dai *dai, int mute, int direction)
        return 0;
 }
 
-#define CS42L42_FORMATS (SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S18_3LE | \
-                       SNDRV_PCM_FMTBIT_S20_3LE | SNDRV_PCM_FMTBIT_S24_LE | \
-                       SNDRV_PCM_FMTBIT_S32_LE)
+#define CS42L42_FORMATS (SNDRV_PCM_FMTBIT_S16_LE |\
+                        SNDRV_PCM_FMTBIT_S24_LE |\
+                        SNDRV_PCM_FMTBIT_S32_LE )
 
 
 static const struct snd_soc_dai_ops cs42l42_ops = {
@@ -1801,7 +1756,7 @@ static int cs42l42_i2c_probe(struct i2c_client *i2c_client,
                dev_dbg(&i2c_client->dev, "Found reset GPIO\n");
                gpiod_set_value_cansleep(cs42l42->reset_gpio, 1);
        }
-       mdelay(3);
+       usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2);
 
        /* Request IRQ */
        ret = devm_request_threaded_irq(&i2c_client->dev,
@@ -1926,6 +1881,7 @@ static int cs42l42_runtime_resume(struct device *dev)
        }
 
        gpiod_set_value_cansleep(cs42l42->reset_gpio, 1);
+       usleep_range(CS42L42_BOOT_TIME_US, CS42L42_BOOT_TIME_US * 2);
 
        regcache_cache_only(cs42l42->regmap, false);
        regcache_sync(cs42l42->regmap);
index 9e3cc52..866d7c8 100644 (file)
 #define CS42L42_ASP_SLAVE_MODE         0x00
 #define CS42L42_ASP_MODE_SHIFT         4
 #define CS42L42_ASP_MODE_MASK          (1 << CS42L42_ASP_MODE_SHIFT)
-#define CS42L42_ASP_SCPOL_IN_DAC_SHIFT 2
-#define CS42L42_ASP_SCPOL_IN_DAC_MASK  (1 << CS42L42_ASP_SCPOL_IN_DAC_SHIFT)
-#define CS42L42_ASP_LCPOL_IN_SHIFT     0
-#define CS42L42_ASP_LCPOL_IN_MASK      (1 << CS42L42_ASP_LCPOL_IN_SHIFT)
-#define CS42L42_ASP_POL_INV            1
+#define CS42L42_ASP_SCPOL_SHIFT                2
+#define CS42L42_ASP_SCPOL_MASK         (3 << CS42L42_ASP_SCPOL_SHIFT)
+#define CS42L42_ASP_SCPOL_NOR          3
+#define CS42L42_ASP_LCPOL_SHIFT                0
+#define CS42L42_ASP_LCPOL_MASK         (3 << CS42L42_ASP_LCPOL_SHIFT)
+#define CS42L42_ASP_LCPOL_INV          3
 
 #define CS42L42_ASP_FRM_CFG            (CS42L42_PAGE_12 + 0x08)
 #define CS42L42_ASP_STP_SHIFT          4
 #define CS42L42_FRAC2_VAL(val) (((val) & 0xff0000) >> 16)
 
 #define CS42L42_NUM_SUPPLIES   5
+#define CS42L42_BOOT_TIME_US   3000
 
 static const char *const cs42l42_supply_names[CS42L42_NUM_SUPPLIES] = {
        "VA",
@@ -756,7 +758,6 @@ struct  cs42l42_private {
        struct completion pdn_done;
        u32 sclk;
        u32 srate;
-       u32 swidth;
        u8 plug_state;
        u8 hs_type;
        u8 ts_inv;
index d632055..067757d 100644 (file)
@@ -63,13 +63,8 @@ static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(adc_pga_gain_tlv,
        1, 1, TLV_DB_SCALE_ITEM(0, 0, 0),
        2, 2, TLV_DB_SCALE_ITEM(250, 0, 0),
        3, 3, TLV_DB_SCALE_ITEM(450, 0, 0),
-       4, 4, TLV_DB_SCALE_ITEM(700, 0, 0),
-       5, 5, TLV_DB_SCALE_ITEM(1000, 0, 0),
-       6, 6, TLV_DB_SCALE_ITEM(1300, 0, 0),
-       7, 7, TLV_DB_SCALE_ITEM(1600, 0, 0),
-       8, 8, TLV_DB_SCALE_ITEM(1800, 0, 0),
-       9, 9, TLV_DB_SCALE_ITEM(2100, 0, 0),
-       10, 10, TLV_DB_SCALE_ITEM(2400, 0, 0),
+       4, 7, TLV_DB_SCALE_ITEM(700, 300, 0),
+       8, 10, TLV_DB_SCALE_ITEM(1800, 300, 0),
 );
 
 static const SNDRV_CTL_TLVD_DECLARE_DB_RANGE(hpout_vol_tlv,
index c9c21d2..7878da8 100644 (file)
@@ -2895,7 +2895,7 @@ static int rx_macro_enable_echo(struct snd_soc_dapm_widget *w,
 {
        struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
        u16 val, ec_hq_reg;
-       int ec_tx;
+       int ec_tx = -1;
 
        val = snd_soc_component_read(component,
                        CDC_RX_INP_MUX_RX_MIX_CFG4);
@@ -3551,7 +3551,7 @@ static int rx_macro_probe(struct platform_device *pdev)
 
        /* set MCLK and NPL rates */
        clk_set_rate(rx->clks[2].clk, MCLK_FREQ);
-       clk_set_rate(rx->clks[3].clk, MCLK_FREQ);
+       clk_set_rate(rx->clks[3].clk, 2 * MCLK_FREQ);
 
        ret = clk_bulk_prepare_enable(RX_NUM_CLKS_MAX, rx->clks);
        if (ret)
index 36d7a64..e8c6c73 100644 (file)
@@ -1811,7 +1811,7 @@ static int tx_macro_probe(struct platform_device *pdev)
 
        /* set MCLK and NPL rates */
        clk_set_rate(tx->clks[2].clk, MCLK_FREQ);
-       clk_set_rate(tx->clks[3].clk, MCLK_FREQ);
+       clk_set_rate(tx->clks[3].clk, 2 * MCLK_FREQ);
 
        ret = clk_bulk_prepare_enable(TX_NUM_CLKS_MAX, tx->clks);
        if (ret)
index 91e6890..3d6976a 100644 (file)
@@ -189,7 +189,6 @@ struct va_macro {
        struct device *dev;
        unsigned long active_ch_mask[VA_MACRO_MAX_DAIS];
        unsigned long active_ch_cnt[VA_MACRO_MAX_DAIS];
-       unsigned long active_decimator[VA_MACRO_MAX_DAIS];
        u16 dmic_clk_div;
 
        int dec_mode[VA_MACRO_NUM_DECIMATORS];
@@ -549,11 +548,9 @@ static int va_macro_tx_mixer_put(struct snd_kcontrol *kcontrol,
        if (enable) {
                set_bit(dec_id, &va->active_ch_mask[dai_id]);
                va->active_ch_cnt[dai_id]++;
-               va->active_decimator[dai_id] = dec_id;
        } else {
                clear_bit(dec_id, &va->active_ch_mask[dai_id]);
                va->active_ch_cnt[dai_id]--;
-               va->active_decimator[dai_id] = -1;
        }
 
        snd_soc_dapm_mixer_update_power(widget->dapm, kcontrol, enable, update);
@@ -880,18 +877,19 @@ static int va_macro_digital_mute(struct snd_soc_dai *dai, int mute, int stream)
        struct va_macro *va = snd_soc_component_get_drvdata(component);
        u16 tx_vol_ctl_reg, decimator;
 
-       decimator = va->active_decimator[dai->id];
-
-       tx_vol_ctl_reg = CDC_VA_TX0_TX_PATH_CTL +
-                               VA_MACRO_TX_PATH_OFFSET * decimator;
-       if (mute)
-               snd_soc_component_update_bits(component, tx_vol_ctl_reg,
-                                             CDC_VA_TX_PATH_PGA_MUTE_EN_MASK,
-                                             CDC_VA_TX_PATH_PGA_MUTE_EN);
-       else
-               snd_soc_component_update_bits(component, tx_vol_ctl_reg,
-                                             CDC_VA_TX_PATH_PGA_MUTE_EN_MASK,
-                                             CDC_VA_TX_PATH_PGA_MUTE_DISABLE);
+       for_each_set_bit(decimator, &va->active_ch_mask[dai->id],
+                        VA_MACRO_DEC_MAX) {
+               tx_vol_ctl_reg = CDC_VA_TX0_TX_PATH_CTL +
+                                       VA_MACRO_TX_PATH_OFFSET * decimator;
+               if (mute)
+                       snd_soc_component_update_bits(component, tx_vol_ctl_reg,
+                                       CDC_VA_TX_PATH_PGA_MUTE_EN_MASK,
+                                       CDC_VA_TX_PATH_PGA_MUTE_EN);
+               else
+                       snd_soc_component_update_bits(component, tx_vol_ctl_reg,
+                                       CDC_VA_TX_PATH_PGA_MUTE_EN_MASK,
+                                       CDC_VA_TX_PATH_PGA_MUTE_DISABLE);
+       }
 
        return 0;
 }
index 5ebcd93..9ca49a1 100644 (file)
@@ -1211,14 +1211,16 @@ static int wsa_macro_enable_mix_path(struct snd_soc_dapm_widget *w,
                                     struct snd_kcontrol *kcontrol, int event)
 {
        struct snd_soc_component *component = snd_soc_dapm_to_component(w->dapm);
-       u16 gain_reg;
+       u16 path_reg, gain_reg;
        int val;
 
-       switch (w->reg) {
-       case CDC_WSA_RX0_RX_PATH_MIX_CTL:
+       switch (w->shift) {
+       case WSA_MACRO_RX_MIX0:
+               path_reg = CDC_WSA_RX0_RX_PATH_MIX_CTL;
                gain_reg = CDC_WSA_RX0_RX_VOL_MIX_CTL;
                break;
-       case CDC_WSA_RX1_RX_PATH_MIX_CTL:
+       case WSA_MACRO_RX_MIX1:
+               path_reg = CDC_WSA_RX1_RX_PATH_MIX_CTL;
                gain_reg = CDC_WSA_RX1_RX_VOL_MIX_CTL;
                break;
        default:
@@ -1231,7 +1233,7 @@ static int wsa_macro_enable_mix_path(struct snd_soc_dapm_widget *w,
                snd_soc_component_write(component, gain_reg, val);
                break;
        case SND_SOC_DAPM_POST_PMD:
-               snd_soc_component_update_bits(component, w->reg,
+               snd_soc_component_update_bits(component, path_reg,
                                              CDC_WSA_RX_PATH_MIX_CLK_EN_MASK,
                                              CDC_WSA_RX_PATH_MIX_CLK_DISABLE);
                break;
@@ -2068,14 +2070,14 @@ static const struct snd_soc_dapm_widget wsa_macro_dapm_widgets[] = {
        SND_SOC_DAPM_MUX("WSA_RX0 INP0", SND_SOC_NOPM, 0, 0, &rx0_prim_inp0_mux),
        SND_SOC_DAPM_MUX("WSA_RX0 INP1", SND_SOC_NOPM, 0, 0, &rx0_prim_inp1_mux),
        SND_SOC_DAPM_MUX("WSA_RX0 INP2", SND_SOC_NOPM, 0, 0, &rx0_prim_inp2_mux),
-       SND_SOC_DAPM_MUX_E("WSA_RX0 MIX INP", CDC_WSA_RX0_RX_PATH_MIX_CTL,
-                          0, 0, &rx0_mix_mux, wsa_macro_enable_mix_path,
+       SND_SOC_DAPM_MUX_E("WSA_RX0 MIX INP", SND_SOC_NOPM, WSA_MACRO_RX_MIX0,
+                          0, &rx0_mix_mux, wsa_macro_enable_mix_path,
                           SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
        SND_SOC_DAPM_MUX("WSA_RX1 INP0", SND_SOC_NOPM, 0, 0, &rx1_prim_inp0_mux),
        SND_SOC_DAPM_MUX("WSA_RX1 INP1", SND_SOC_NOPM, 0, 0, &rx1_prim_inp1_mux),
        SND_SOC_DAPM_MUX("WSA_RX1 INP2", SND_SOC_NOPM, 0, 0, &rx1_prim_inp2_mux),
-       SND_SOC_DAPM_MUX_E("WSA_RX1 MIX INP", CDC_WSA_RX1_RX_PATH_MIX_CTL,
-                          0, 0, &rx1_mix_mux, wsa_macro_enable_mix_path,
+       SND_SOC_DAPM_MUX_E("WSA_RX1 MIX INP", SND_SOC_NOPM, WSA_MACRO_RX_MIX1,
+                          0, &rx1_mix_mux, wsa_macro_enable_mix_path,
                           SND_SOC_DAPM_PRE_PMU | SND_SOC_DAPM_POST_PMD),
 
        SND_SOC_DAPM_MIXER_E("WSA_RX INT0 MIX", SND_SOC_NOPM, 0, 0, NULL, 0,
index 85f6865..ddb6436 100644 (file)
@@ -446,6 +446,7 @@ static bool max98373_volatile_reg(struct device *dev, unsigned int reg)
        case MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK:
        case MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK:
        case MAX98373_R20B6_BDE_CUR_STATE_READBACK:
+       case MAX98373_R20FF_GLOBAL_SHDN:
        case MAX98373_R21FF_REV_ID:
                return true;
        default:
index d8c4766..f3a1220 100644 (file)
@@ -220,6 +220,7 @@ static bool max98373_volatile_reg(struct device *dev, unsigned int reg)
        case MAX98373_R2054_MEAS_ADC_PVDD_CH_READBACK:
        case MAX98373_R2055_MEAS_ADC_THERM_CH_READBACK:
        case MAX98373_R20B6_BDE_CUR_STATE_READBACK:
+       case MAX98373_R20FF_GLOBAL_SHDN:
        case MAX98373_R21FF_REV_ID:
        /* SoundWire Control Port Registers */
        case MAX98373_R0040_SCP_INIT_STAT_1 ... MAX98373_R0070_SCP_FRAME_CTLR:
index 746c829..1346a98 100644 (file)
@@ -28,11 +28,13 @@ static int max98373_dac_event(struct snd_soc_dapm_widget *w,
                regmap_update_bits(max98373->regmap,
                        MAX98373_R20FF_GLOBAL_SHDN,
                        MAX98373_GLOBAL_EN_MASK, 1);
+               usleep_range(30000, 31000);
                break;
        case SND_SOC_DAPM_POST_PMD:
                regmap_update_bits(max98373->regmap,
                        MAX98373_R20FF_GLOBAL_SHDN,
                        MAX98373_GLOBAL_EN_MASK, 0);
+               usleep_range(30000, 31000);
                max98373->tdm_mode = false;
                break;
        default:
index 37b5795..844e407 100644 (file)
@@ -209,6 +209,7 @@ static bool rt1015_volatile_register(struct device *dev, unsigned int reg)
        case RT1015_VENDOR_ID:
        case RT1015_DEVICE_ID:
        case RT1015_PRO_ALT:
+       case RT1015_MAN_I2C:
        case RT1015_DAC3:
        case RT1015_VBAT_TEST_OUT1:
        case RT1015_VBAT_TEST_OUT2:
@@ -513,6 +514,7 @@ static void rt1015_calibrate(struct rt1015_priv *rt1015)
        msleep(300);
        regmap_write(regmap, RT1015_PWR_STATE_CTRL, 0x0008);
        regmap_write(regmap, RT1015_SYS_RST1, 0x05F5);
+       regmap_write(regmap, RT1015_CLK_DET, 0x8000);
 
        regcache_cache_bypass(regmap, false);
        regcache_mark_dirty(regmap);
index 1414ad1..a5674c2 100644 (file)
@@ -339,9 +339,9 @@ static bool rt5640_readable_register(struct device *dev, unsigned int reg)
 }
 
 static const DECLARE_TLV_DB_SCALE(out_vol_tlv, -4650, 150, 0);
-static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0);
+static const DECLARE_TLV_DB_MINMAX(dac_vol_tlv, -6562, 0);
 static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -3450, 150, 0);
-static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0);
+static const DECLARE_TLV_DB_MINMAX(adc_vol_tlv, -1762, 3000);
 static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0);
 
 /* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */
index d198e19..e59fdc8 100644 (file)
@@ -285,9 +285,9 @@ static bool rt5651_readable_register(struct device *dev, unsigned int reg)
 }
 
 static const DECLARE_TLV_DB_SCALE(out_vol_tlv, -4650, 150, 0);
-static const DECLARE_TLV_DB_SCALE(dac_vol_tlv, -65625, 375, 0);
+static const DECLARE_TLV_DB_MINMAX(dac_vol_tlv, -6562, 0);
 static const DECLARE_TLV_DB_SCALE(in_vol_tlv, -3450, 150, 0);
-static const DECLARE_TLV_DB_SCALE(adc_vol_tlv, -17625, 375, 0);
+static const DECLARE_TLV_DB_MINMAX(adc_vol_tlv, -1762, 3000);
 static const DECLARE_TLV_DB_SCALE(adc_bst_tlv, 0, 1200, 0);
 
 /* {0, +20, +24, +30, +35, +40, +44, +50, +52} dB */
index 41e5917..91a4ef7 100644 (file)
@@ -3426,12 +3426,17 @@ static int rt5659_set_component_sysclk(struct snd_soc_component *component, int
 {
        struct rt5659_priv *rt5659 = snd_soc_component_get_drvdata(component);
        unsigned int reg_val = 0;
+       int ret;
 
        if (freq == rt5659->sysclk && clk_id == rt5659->sysclk_src)
                return 0;
 
        switch (clk_id) {
        case RT5659_SCLK_S_MCLK:
+               ret = clk_set_rate(rt5659->mclk, freq);
+               if (ret)
+                       return ret;
+
                reg_val |= RT5659_SCLK_SRC_MCLK;
                break;
        case RT5659_SCLK_S_PLL1:
index c29317e..4063aac 100644 (file)
@@ -629,21 +629,69 @@ static SOC_ENUM_SINGLE_DECL(rt5670_if2_dac_enum, RT5670_DIG_INF1_DATA,
 static SOC_ENUM_SINGLE_DECL(rt5670_if2_adc_enum, RT5670_DIG_INF1_DATA,
                                RT5670_IF2_ADC_SEL_SFT, rt5670_data_select);
 
+/*
+ * For reliable output-mute LED control we need a "DAC1 Playback Switch" control.
+ * We emulate this by only clearing the RT5670_M_DAC1_L/_R AD_DA_MIXER register
+ * bits when both our emulated DAC1 Playback Switch control and the DAC1 MIXL/R
+ * DAPM-mixer DAC1 input are enabled.
+ */
+static void rt5670_update_ad_da_mixer_dac1_m_bits(struct rt5670_priv *rt5670)
+{
+       int val = RT5670_M_DAC1_L | RT5670_M_DAC1_R;
+
+       if (rt5670->dac1_mixl_dac1_switch && rt5670->dac1_playback_switch_l)
+               val &= ~RT5670_M_DAC1_L;
+
+       if (rt5670->dac1_mixr_dac1_switch && rt5670->dac1_playback_switch_r)
+               val &= ~RT5670_M_DAC1_R;
+
+       regmap_update_bits(rt5670->regmap, RT5670_AD_DA_MIXER,
+                          RT5670_M_DAC1_L | RT5670_M_DAC1_R, val);
+}
+
+static int rt5670_dac1_playback_switch_get(struct snd_kcontrol *kcontrol,
+                                          struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+       struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component);
+
+       ucontrol->value.integer.value[0] = rt5670->dac1_playback_switch_l;
+       ucontrol->value.integer.value[1] = rt5670->dac1_playback_switch_r;
+
+       return 0;
+}
+
+static int rt5670_dac1_playback_switch_put(struct snd_kcontrol *kcontrol,
+                                          struct snd_ctl_elem_value *ucontrol)
+{
+       struct snd_soc_component *component = snd_soc_kcontrol_component(kcontrol);
+       struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component);
+
+       if (rt5670->dac1_playback_switch_l == ucontrol->value.integer.value[0] &&
+           rt5670->dac1_playback_switch_r == ucontrol->value.integer.value[1])
+               return 0;
+
+       rt5670->dac1_playback_switch_l = ucontrol->value.integer.value[0];
+       rt5670->dac1_playback_switch_r = ucontrol->value.integer.value[1];
+
+       rt5670_update_ad_da_mixer_dac1_m_bits(rt5670);
+
+       return 1;
+}
+
 static const struct snd_kcontrol_new rt5670_snd_controls[] = {
        /* Headphone Output Volume */
-       SOC_DOUBLE("HP Playback Switch", RT5670_HP_VOL,
-               RT5670_L_MUTE_SFT, RT5670_R_MUTE_SFT, 1, 1),
        SOC_DOUBLE_TLV("HP Playback Volume", RT5670_HP_VOL,
                RT5670_L_VOL_SFT, RT5670_R_VOL_SFT,
                39, 1, out_vol_tlv),
        /* OUTPUT Control */
-       SOC_DOUBLE("OUT Channel Switch", RT5670_LOUT1,
-               RT5670_VOL_L_SFT, RT5670_VOL_R_SFT, 1, 1),
        SOC_DOUBLE_TLV("OUT Playback Volume", RT5670_LOUT1,
                RT5670_L_VOL_SFT, RT5670_R_VOL_SFT, 39, 1, out_vol_tlv),
        /* DAC Digital Volume */
        SOC_DOUBLE("DAC2 Playback Switch", RT5670_DAC_CTRL,
                RT5670_M_DAC_L2_VOL_SFT, RT5670_M_DAC_R2_VOL_SFT, 1, 1),
+       SOC_DOUBLE_EXT("DAC1 Playback Switch", SND_SOC_NOPM, 0, 1, 1, 0,
+                       rt5670_dac1_playback_switch_get, rt5670_dac1_playback_switch_put),
        SOC_DOUBLE_TLV("DAC1 Playback Volume", RT5670_DAC1_DIG_VOL,
                        RT5670_L_VOL_SFT, RT5670_R_VOL_SFT,
                        175, 0, dac_vol_tlv),
@@ -913,18 +961,44 @@ static const struct snd_kcontrol_new rt5670_mono_adc_r_mix[] = {
                        RT5670_M_MONO_ADC_R2_SFT, 1, 1),
 };
 
+/* See comment above rt5670_update_ad_da_mixer_dac1_m_bits() */
+static int rt5670_put_dac1_mix_dac1_switch(struct snd_kcontrol *kcontrol,
+                                          struct snd_ctl_elem_value *ucontrol)
+{
+       struct soc_mixer_control *mc = (struct soc_mixer_control *)kcontrol->private_value;
+       struct snd_soc_component *component = snd_soc_dapm_kcontrol_component(kcontrol);
+       struct rt5670_priv *rt5670 = snd_soc_component_get_drvdata(component);
+       int ret;
+
+       if (mc->shift == 0)
+               rt5670->dac1_mixl_dac1_switch = ucontrol->value.integer.value[0];
+       else
+               rt5670->dac1_mixr_dac1_switch = ucontrol->value.integer.value[0];
+
+       /* Apply the update (if any) */
+       ret = snd_soc_dapm_put_volsw(kcontrol, ucontrol);
+       if (ret == 0)
+               return 0;
+
+       rt5670_update_ad_da_mixer_dac1_m_bits(rt5670);
+
+       return 1;
+}
+
+#define SOC_DAPM_SINGLE_RT5670_DAC1_SW(name, shift) \
+       SOC_SINGLE_EXT(name, SND_SOC_NOPM, shift, 1, 0, \
+                      snd_soc_dapm_get_volsw, rt5670_put_dac1_mix_dac1_switch)
+
 static const struct snd_kcontrol_new rt5670_dac_l_mix[] = {
        SOC_DAPM_SINGLE("Stereo ADC Switch", RT5670_AD_DA_MIXER,
                        RT5670_M_ADCMIX_L_SFT, 1, 1),
-       SOC_DAPM_SINGLE("DAC1 Switch", RT5670_AD_DA_MIXER,
-                       RT5670_M_DAC1_L_SFT, 1, 1),
+       SOC_DAPM_SINGLE_RT5670_DAC1_SW("DAC1 Switch", 0),
 };
 
 static const struct snd_kcontrol_new rt5670_dac_r_mix[] = {
        SOC_DAPM_SINGLE("Stereo ADC Switch", RT5670_AD_DA_MIXER,
                        RT5670_M_ADCMIX_R_SFT, 1, 1),
-       SOC_DAPM_SINGLE("DAC1 Switch", RT5670_AD_DA_MIXER,
-                       RT5670_M_DAC1_R_SFT, 1, 1),
+       SOC_DAPM_SINGLE_RT5670_DAC1_SW("DAC1 Switch", 1),
 };
 
 static const struct snd_kcontrol_new rt5670_sto_dac_l_mix[] = {
@@ -1656,12 +1730,10 @@ static const struct snd_soc_dapm_widget rt5670_dapm_widgets[] = {
                            RT5670_PWR_ADC_S1F_BIT, 0, NULL, 0),
        SND_SOC_DAPM_SUPPLY("ADC Stereo2 Filter", RT5670_PWR_DIG2,
                            RT5670_PWR_ADC_S2F_BIT, 0, NULL, 0),
-       SND_SOC_DAPM_MIXER("Sto1 ADC MIXL", RT5670_STO1_ADC_DIG_VOL,
-                          RT5670_L_MUTE_SFT, 1, rt5670_sto1_adc_l_mix,
-                          ARRAY_SIZE(rt5670_sto1_adc_l_mix)),
-       SND_SOC_DAPM_MIXER("Sto1 ADC MIXR", RT5670_STO1_ADC_DIG_VOL,
-                          RT5670_R_MUTE_SFT, 1, rt5670_sto1_adc_r_mix,
-                          ARRAY_SIZE(rt5670_sto1_adc_r_mix)),
+       SND_SOC_DAPM_MIXER("Sto1 ADC MIXL", SND_SOC_NOPM, 0, 0,
+                          rt5670_sto1_adc_l_mix, ARRAY_SIZE(rt5670_sto1_adc_l_mix)),
+       SND_SOC_DAPM_MIXER("Sto1 ADC MIXR", SND_SOC_NOPM, 0, 0,
+                          rt5670_sto1_adc_r_mix, ARRAY_SIZE(rt5670_sto1_adc_r_mix)),
        SND_SOC_DAPM_MIXER("Sto2 ADC MIXL", SND_SOC_NOPM, 0, 0,
                           rt5670_sto2_adc_l_mix,
                           ARRAY_SIZE(rt5670_sto2_adc_l_mix)),
@@ -2999,6 +3071,16 @@ static int rt5670_i2c_probe(struct i2c_client *i2c,
                dev_info(&i2c->dev, "quirk JD mode 3\n");
        }
 
+       /*
+        * Enable the emulated "DAC1 Playback Switch" by default to avoid
+        * muting the output with older UCM profiles.
+        */
+       rt5670->dac1_playback_switch_l = true;
+       rt5670->dac1_playback_switch_r = true;
+       /* The Power-On-Reset values for the DAC1 mixer have the DAC1 input enabled. */
+       rt5670->dac1_mixl_dac1_switch = true;
+       rt5670->dac1_mixr_dac1_switch = true;
+
        rt5670->regmap = devm_regmap_init_i2c(i2c, &rt5670_regmap);
        if (IS_ERR(rt5670->regmap)) {
                ret = PTR_ERR(rt5670->regmap);
index 56b13fe..6fb3c36 100644 (file)
 /* global definition */
 #define RT5670_L_MUTE                          (0x1 << 15)
 #define RT5670_L_MUTE_SFT                      15
-#define RT5670_VOL_L_MUTE                      (0x1 << 14)
-#define RT5670_VOL_L_SFT                       14
 #define RT5670_R_MUTE                          (0x1 << 7)
 #define RT5670_R_MUTE_SFT                      7
-#define RT5670_VOL_R_MUTE                      (0x1 << 6)
-#define RT5670_VOL_R_SFT                       6
 #define RT5670_L_VOL_MASK                      (0x3f << 8)
 #define RT5670_L_VOL_SFT                       8
 #define RT5670_R_VOL_MASK                      (0x3f)
@@ -2017,6 +2013,11 @@ struct rt5670_priv {
        int dsp_rate;
        int jack_type;
        int jack_type_saved;
+
+       bool dac1_mixl_dac1_switch;
+       bool dac1_mixr_dac1_switch;
+       bool dac1_playback_switch_l;
+       bool dac1_playback_switch_r;
 };
 
 void rt5670_jack_suspend(struct snd_soc_component *component);
index 85f7441..047f4e6 100644 (file)
@@ -895,6 +895,13 @@ static int rt711_probe(struct snd_soc_component *component)
        return 0;
 }
 
+static void rt711_remove(struct snd_soc_component *component)
+{
+       struct rt711_priv *rt711 = snd_soc_component_get_drvdata(component);
+
+       regcache_cache_only(rt711->regmap, true);
+}
+
 static const struct snd_soc_component_driver soc_codec_dev_rt711 = {
        .probe = rt711_probe,
        .set_bias_level = rt711_set_bias_level,
@@ -905,6 +912,7 @@ static const struct snd_soc_component_driver soc_codec_dev_rt711 = {
        .dapm_routes = rt711_audio_map,
        .num_dapm_routes = ARRAY_SIZE(rt711_audio_map),
        .set_jack = rt711_set_jack_detect,
+       .remove = rt711_remove,
 };
 
 static int rt711_set_sdw_stream(struct snd_soc_dai *dai, void *sdw_stream,
index 73551e3..6d9bb25 100644 (file)
@@ -71,7 +71,7 @@ static const struct reg_default sgtl5000_reg_defaults[] = {
        { SGTL5000_DAP_EQ_BASS_BAND4,           0x002f },
        { SGTL5000_DAP_MAIN_CHAN,               0x8000 },
        { SGTL5000_DAP_MIX_CHAN,                0x0000 },
-       { SGTL5000_DAP_AVC_CTRL,                0x0510 },
+       { SGTL5000_DAP_AVC_CTRL,                0x5100 },
        { SGTL5000_DAP_AVC_THRESHOLD,           0x1473 },
        { SGTL5000_DAP_AVC_ATTACK,              0x0028 },
        { SGTL5000_DAP_AVC_DECAY,               0x0050 },
diff --git a/sound/soc/codecs/sirf-audio-codec.h b/sound/soc/codecs/sirf-audio-codec.h
deleted file mode 100644 (file)
index a7fe268..0000000
+++ /dev/null
@@ -1,124 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * SiRF inner codec controllers define
- *
- * Copyright (c) 2011 Cambridge Silicon Radio Limited, a CSR plc group company.
- */
-
-#ifndef _SIRF_AUDIO_CODEC_H
-#define _SIRF_AUDIO_CODEC_H
-
-
-#define AUDIO_IC_CODEC_PWR                     (0x00E0)
-#define AUDIO_IC_CODEC_CTRL0                   (0x00E4)
-#define AUDIO_IC_CODEC_CTRL1                   (0x00E8)
-#define AUDIO_IC_CODEC_CTRL2                   (0x00EC)
-#define AUDIO_IC_CODEC_CTRL3                   (0x00F0)
-
-#define MICBIASEN              (1 << 3)
-
-#define IC_RDACEN              (1 << 0)
-#define IC_LDACEN              (1 << 1)
-#define IC_HSREN               (1 << 2)
-#define IC_HSLEN               (1 << 3)
-#define IC_SPEN                        (1 << 4)
-#define IC_CPEN                        (1 << 5)
-
-#define IC_HPRSELR             (1 << 6)
-#define IC_HPLSELR             (1 << 7)
-#define IC_HPRSELL             (1 << 8)
-#define IC_HPLSELL             (1 << 9)
-#define IC_SPSELR              (1 << 10)
-#define IC_SPSELL              (1 << 11)
-
-#define IC_MONOR               (1 << 12)
-#define IC_MONOL               (1 << 13)
-
-#define IC_RXOSRSEL            (1 << 28)
-#define IC_CPFREQ              (1 << 29)
-#define IC_HSINVEN             (1 << 30)
-
-#define IC_MICINREN            (1 << 0)
-#define IC_MICINLEN            (1 << 1)
-#define IC_MICIN1SEL           (1 << 2)
-#define IC_MICIN2SEL           (1 << 3)
-#define IC_MICDIFSEL           (1 << 4)
-#define        IC_LINEIN1SEL           (1 << 5)
-#define        IC_LINEIN2SEL           (1 << 6)
-#define        IC_RADCEN               (1 << 7)
-#define        IC_LADCEN               (1 << 8)
-#define        IC_ALM                  (1 << 9)
-
-#define IC_DIGMICEN             (1 << 22)
-#define IC_DIGMICFREQ           (1 << 23)
-#define IC_ADC14B_12            (1 << 24)
-#define IC_FIRDAC_HSL_EN        (1 << 25)
-#define IC_FIRDAC_HSR_EN        (1 << 26)
-#define IC_FIRDAC_LOUT_EN       (1 << 27)
-#define IC_POR                  (1 << 28)
-#define IC_CODEC_CLK_EN         (1 << 29)
-#define IC_HP_3DB_BOOST         (1 << 30)
-
-#define IC_ADC_LEFT_GAIN_SHIFT 16
-#define IC_ADC_RIGHT_GAIN_SHIFT 10
-#define IC_ADC_GAIN_MASK       0x3F
-#define IC_MIC_MAX_GAIN                0x39
-
-#define IC_RXPGAR_MASK         0x3F
-#define IC_RXPGAR_SHIFT                14
-#define IC_RXPGAL_MASK         0x3F
-#define IC_RXPGAL_SHIFT                21
-#define IC_RXPGAR              0x7B
-#define IC_RXPGAL              0x7B
-
-#define AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK     0x3F
-#define AUDIO_PORT_TX_FIFO_SC_OFFSET    0
-#define AUDIO_PORT_TX_FIFO_LC_OFFSET    10
-#define AUDIO_PORT_TX_FIFO_HC_OFFSET    20
-
-#define TX_FIFO_SC(x)           (((x) & AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK) \
-                               << AUDIO_PORT_TX_FIFO_SC_OFFSET)
-#define TX_FIFO_LC(x)           (((x) & AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK) \
-                               << AUDIO_PORT_TX_FIFO_LC_OFFSET)
-#define TX_FIFO_HC(x)           (((x) & AUDIO_PORT_TX_FIFO_LEVEL_CHECK_MASK) \
-                               << AUDIO_PORT_TX_FIFO_HC_OFFSET)
-
-#define AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK     0x0F
-#define AUDIO_PORT_RX_FIFO_SC_OFFSET    0
-#define AUDIO_PORT_RX_FIFO_LC_OFFSET    10
-#define AUDIO_PORT_RX_FIFO_HC_OFFSET    20
-
-#define RX_FIFO_SC(x)           (((x) & AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK) \
-                               << AUDIO_PORT_RX_FIFO_SC_OFFSET)
-#define RX_FIFO_LC(x)           (((x) & AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK) \
-                               << AUDIO_PORT_RX_FIFO_LC_OFFSET)
-#define RX_FIFO_HC(x)           (((x) & AUDIO_PORT_RX_FIFO_LEVEL_CHECK_MASK) \
-                               << AUDIO_PORT_RX_FIFO_HC_OFFSET)
-#define AUDIO_PORT_IC_CODEC_TX_CTRL            (0x00F4)
-#define AUDIO_PORT_IC_CODEC_RX_CTRL            (0x00F8)
-
-#define AUDIO_PORT_IC_TXFIFO_OP                        (0x00FC)
-#define AUDIO_PORT_IC_TXFIFO_LEV_CHK           (0x0100)
-#define AUDIO_PORT_IC_TXFIFO_STS               (0x0104)
-#define AUDIO_PORT_IC_TXFIFO_INT               (0x0108)
-#define AUDIO_PORT_IC_TXFIFO_INT_MSK           (0x010C)
-
-#define AUDIO_PORT_IC_RXFIFO_OP                        (0x0110)
-#define AUDIO_PORT_IC_RXFIFO_LEV_CHK           (0x0114)
-#define AUDIO_PORT_IC_RXFIFO_STS               (0x0118)
-#define AUDIO_PORT_IC_RXFIFO_INT               (0x011C)
-#define AUDIO_PORT_IC_RXFIFO_INT_MSK           (0x0120)
-
-#define AUDIO_FIFO_START               (1 << 0)
-#define AUDIO_FIFO_RESET               (1 << 1)
-
-#define AUDIO_FIFO_FULL                        (1 << 0)
-#define AUDIO_FIFO_EMPTY               (1 << 1)
-#define AUDIO_FIFO_OFLOW               (1 << 2)
-#define AUDIO_FIFO_UFLOW               (1 << 3)
-
-#define IC_TX_ENABLE           (0x03)
-#define IC_RX_ENABLE_MONO      (0x01)
-#define IC_RX_ENABLE_STEREO    (0x03)
-
-#endif /*__SIRF_AUDIO_CODEC_H*/
index 40f682f..d18ae5e 100644 (file)
@@ -1873,6 +1873,12 @@ static int wcd934x_set_channel_map(struct snd_soc_dai *dai,
 
        wcd = snd_soc_component_get_drvdata(dai->component);
 
+       if (tx_num > WCD934X_TX_MAX || rx_num > WCD934X_RX_MAX) {
+               dev_err(wcd->dev, "Invalid tx %d or rx %d channel count\n",
+                       tx_num, rx_num);
+               return -EINVAL;
+       }
+
        if (!tx_slot || !rx_slot) {
                dev_err(wcd->dev, "Invalid tx_slot=%p, rx_slot=%p\n",
                        tx_slot, rx_slot);
index df35151..cda9cd9 100644 (file)
@@ -707,7 +707,13 @@ int wm8960_configure_pll(struct snd_soc_component *component, int freq_in,
        best_freq_out = -EINVAL;
        *sysclk_idx = *dac_idx = *bclk_idx = -1;
 
-       for (i = 0; i < ARRAY_SIZE(sysclk_divs); ++i) {
+       /*
+        * From Datasheet, the PLL performs best when f2 is between
+        * 90MHz and 100MHz, the desired sysclk output is 11.2896MHz
+        * or 12.288MHz, then sysclkdiv = 2 is the best choice.
+        * So search sysclk_divs from 2 to 1 other than from 1 to 2.
+        */
+       for (i = ARRAY_SIZE(sysclk_divs) - 1; i >= 0; --i) {
                if (sysclk_divs[i] == -1)
                        continue;
                for (j = 0; j < ARRAY_SIZE(dac_divs); ++j) {
index 08056fa..a857a62 100644 (file)
@@ -519,11 +519,13 @@ static int fsl_esai_startup(struct snd_pcm_substream *substream,
                                   ESAI_SAICR_SYNC, esai_priv->synchronous ?
                                   ESAI_SAICR_SYNC : 0);
 
-               /* Set a default slot number -- 2 */
+               /* Set slots count */
                regmap_update_bits(esai_priv->regmap, REG_ESAI_TCCR,
-                                  ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(2));
+                                  ESAI_xCCR_xDC_MASK,
+                                  ESAI_xCCR_xDC(esai_priv->slots));
                regmap_update_bits(esai_priv->regmap, REG_ESAI_RCCR,
-                                  ESAI_xCCR_xDC_MASK, ESAI_xCCR_xDC(2));
+                                  ESAI_xCCR_xDC_MASK,
+                                  ESAI_xCCR_xDC(esai_priv->slots));
        }
 
        return 0;
index 5781174..ad8af3f 100644 (file)
@@ -878,6 +878,7 @@ static int fsl_ssi_hw_free(struct snd_pcm_substream *substream,
 static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt)
 {
        u32 strcr = 0, scr = 0, stcr, srcr, mask;
+       unsigned int slots;
 
        ssi->dai_fmt = fmt;
 
@@ -909,10 +910,11 @@ static int _fsl_ssi_set_dai_fmt(struct fsl_ssi *ssi, unsigned int fmt)
                        return -EINVAL;
                }
 
+               slots = ssi->slots ? : 2;
                regmap_update_bits(ssi->regs, REG_SSI_STCCR,
-                                  SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2));
+                                  SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots));
                regmap_update_bits(ssi->regs, REG_SSI_SRCCR,
-                                  SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(2));
+                                  SSI_SxCCR_DC_MASK, SSI_SxCCR_DC(slots));
 
                /* Data on rising edge of bclk, frame low, 1clk before data */
                strcr |= SSI_STCR_TFSI | SSI_STCR_TSCKP | SSI_STCR_TEFS;
index ab31045..6cada4c 100644 (file)
@@ -172,15 +172,16 @@ int asoc_simple_parse_clk(struct device *dev,
         *  or device's module clock.
         */
        clk = devm_get_clk_from_child(dev, node, NULL);
-       if (IS_ERR(clk))
-               clk = devm_get_clk_from_child(dev, dlc->of_node, NULL);
-
        if (!IS_ERR(clk)) {
-               simple_dai->clk = clk;
                simple_dai->sysclk = clk_get_rate(clk);
-       } else if (!of_property_read_u32(node, "system-clock-frequency",
-                                        &val)) {
+
+               simple_dai->clk = clk;
+       } else if (!of_property_read_u32(node, "system-clock-frequency", &val)) {
                simple_dai->sysclk = val;
+       } else {
+               clk = devm_get_clk_from_child(dev, dlc->of_node, NULL);
+               if (!IS_ERR(clk))
+                       simple_dai->sysclk = clk_get_rate(clk);
        }
 
        if (of_property_read_bool(node, "system-clock-direction-out"))
index 9e9b058..4124aa2 100644 (file)
@@ -487,15 +487,15 @@ static struct snd_soc_dai_driver sst_platform_dai[] = {
                .stream_name = "Headset Playback",
                .channels_min = SST_STEREO,
                .channels_max = SST_STEREO,
-               .rates = SNDRV_PCM_RATE_44100|SNDRV_PCM_RATE_48000,
-               .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE,
+               .rates = SNDRV_PCM_RATE_48000,
+               .formats = SNDRV_PCM_FMTBIT_S16_LE,
        },
        .capture = {
                .stream_name = "Headset Capture",
                .channels_min = 1,
                .channels_max = 2,
-               .rates = SNDRV_PCM_RATE_44100|SNDRV_PCM_RATE_48000,
-               .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE,
+               .rates = SNDRV_PCM_RATE_48000,
+               .formats = SNDRV_PCM_FMTBIT_S16_LE,
        },
 },
 {
@@ -505,8 +505,8 @@ static struct snd_soc_dai_driver sst_platform_dai[] = {
                .stream_name = "Deepbuffer Playback",
                .channels_min = SST_STEREO,
                .channels_max = SST_STEREO,
-               .rates = SNDRV_PCM_RATE_44100|SNDRV_PCM_RATE_48000,
-               .formats = SNDRV_PCM_FMTBIT_S16_LE | SNDRV_PCM_FMTBIT_S24_LE,
+               .rates = SNDRV_PCM_RATE_48000,
+               .formats = SNDRV_PCM_FMTBIT_S16_LE,
        },
 },
 {
index 782f2b4..5d48cc3 100644 (file)
@@ -581,7 +581,7 @@ static const struct dmi_system_id byt_rt5640_quirk_table[] = {
                },
                .driver_data = (void *)(BYT_RT5640_DMIC1_MAP |
                                        BYT_RT5640_JD_SRC_JD1_IN4P |
-                                       BYT_RT5640_OVCD_TH_1500UA |
+                                       BYT_RT5640_OVCD_TH_2000UA |
                                        BYT_RT5640_OVCD_SF_0P75 |
                                        BYT_RT5640_MCLK_EN),
        },
index f5de1d7..f3bebed 100644 (file)
@@ -555,7 +555,9 @@ static int mtk_dai_tdm_hw_params(struct snd_pcm_substream *substream,
 
        /* set tdm */
        if (tdm_priv->bck_invert)
-               tdm_con |= 1 << BCK_INVERSE_SFT;
+               regmap_update_bits(afe->regmap, AUDIO_TOP_CON3,
+                                  BCK_INVERSE_MASK_SFT,
+                                  0x1 << BCK_INVERSE_SFT);
 
        if (tdm_priv->lck_invert)
                tdm_con |= 1 << LRCK_INVERSE_SFT;
index 562f25c..b9fb80d 100644 (file)
@@ -21,6 +21,11 @@ enum {
 /*****************************************************************************
  *                  R E G I S T E R       D E F I N I T I O N
  *****************************************************************************/
+/* AUDIO_TOP_CON3 */
+#define BCK_INVERSE_SFT                              3
+#define BCK_INVERSE_MASK                             0x1
+#define BCK_INVERSE_MASK_SFT                         (0x1 << 3)
+
 /* AFE_DAC_CON0 */
 #define VUL12_ON_SFT                                   31
 #define VUL12_ON_MASK                                  0x1
@@ -2079,9 +2084,6 @@ enum {
 #define TDM_EN_SFT                                     0
 #define TDM_EN_MASK                                    0x1
 #define TDM_EN_MASK_SFT                                (0x1 << 0)
-#define BCK_INVERSE_SFT                                1
-#define BCK_INVERSE_MASK                               0x1
-#define BCK_INVERSE_MASK_SFT                           (0x1 << 1)
 #define LRCK_INVERSE_SFT                               2
 #define LRCK_INVERSE_MASK                              0x1
 #define LRCK_INVERSE_MASK_SFT                          (0x1 << 2)
index c642e5f..be360a4 100644 (file)
@@ -739,7 +739,7 @@ static void of_lpass_cpu_parse_dai_data(struct device *dev,
 
        for_each_child_of_node(dev->of_node, node) {
                ret = of_property_read_u32(node, "reg", &id);
-               if (ret || id < 0 || id >= data->variant->num_dai) {
+               if (ret || id < 0) {
                        dev_err(dev, "valid dai id not found: %d\n", ret);
                        continue;
                }
index 6c2760e..153e9b2 100644 (file)
 #define SPK_TDM_RX_MASK         0x03
 #define NUM_TDM_SLOTS           8
 #define SLIM_MAX_TX_PORTS 16
-#define SLIM_MAX_RX_PORTS 16
+#define SLIM_MAX_RX_PORTS 13
 #define WCD934X_DEFAULT_MCLK_RATE      9600000
 
 struct sdm845_snd_data {
        struct snd_soc_jack jack;
        bool jack_setup;
-       bool stream_prepared[SLIM_MAX_RX_PORTS];
+       bool stream_prepared[AFE_PORT_MAX];
        struct snd_soc_card *card;
        uint32_t pri_mi2s_clk_count;
        uint32_t sec_mi2s_clk_count;
        uint32_t quat_tdm_clk_count;
-       struct sdw_stream_runtime *sruntime[SLIM_MAX_RX_PORTS];
+       struct sdw_stream_runtime *sruntime[AFE_PORT_MAX];
 };
 
 static unsigned int tdm_slot_offset[8] = {0, 4, 8, 12, 16, 20, 24, 28};
index f6d4e99..0cffc95 100644 (file)
@@ -31,6 +31,7 @@
 #include <linux/of.h>
 #include <linux/of_graph.h>
 #include <linux/dmi.h>
+#include <linux/acpi.h>
 #include <sound/core.h>
 #include <sound/pcm.h>
 #include <sound/pcm_params.h>
@@ -1573,6 +1574,9 @@ int snd_soc_set_dmi_name(struct snd_soc_card *card, const char *flavour)
        if (card->long_name)
                return 0; /* long name already set by driver or from DMI */
 
+       if (!is_acpi_device_node(card->dev->fwnode))
+               return 0;
+
        /* make up dmi long name as: vendor-product-version-board */
        vendor = dmi_get_system_info(DMI_BOARD_VENDOR);
        if (!vendor || !is_dmi_valid(vendor)) {
index 3e8b6c0..8dfc165 100644 (file)
@@ -9,29 +9,34 @@ config SND_SOC_SOF_TOPLEVEL
 
 if SND_SOC_SOF_TOPLEVEL
 
+config SND_SOC_SOF_PCI_DEV
+       tristate
+
 config SND_SOC_SOF_PCI
        tristate "SOF PCI enumeration support"
        depends on PCI
-       select SND_SOC_SOF
-       select SND_SOC_ACPI if ACPI
        help
          This adds support for PCI enumeration. This option is
          required to enable Intel Skylake+ devices.
+         For backwards-compatibility with previous configurations the selection will
+         be used as default for platform-specific drivers.
          Say Y if you need this option.
          If unsure select "N".
 
 config SND_SOC_SOF_ACPI
        tristate "SOF ACPI enumeration support"
        depends on ACPI || COMPILE_TEST
-       select SND_SOC_SOF
-       select SND_SOC_ACPI if ACPI
-       select IOSF_MBI if X86 && PCI
        help
          This adds support for ACPI enumeration. This option is required
          to enable Intel Broadwell/Baytrail/Cherrytrail devices.
+         For backwards-compatibility with previous configurations the selection will
+         be used as default for platform-specific drivers.
          Say Y if you need this option.
          If unsure select "N".
 
+config SND_SOC_SOF_ACPI_DEV
+       tristate
+
 config SND_SOC_SOF_OF
        tristate "SOF OF enumeration support"
        depends on OF || COMPILE_TEST
index 05718df..606d813 100644 (file)
@@ -14,9 +14,9 @@ obj-$(CONFIG_SND_SOC_SOF) += snd-sof.o
 obj-$(CONFIG_SND_SOC_SOF_NOCODEC) += snd-sof-nocodec.o
 
 
-obj-$(CONFIG_SND_SOC_SOF_ACPI) += snd-sof-acpi.o
+obj-$(CONFIG_SND_SOC_SOF_ACPI_DEV) += snd-sof-acpi.o
 obj-$(CONFIG_SND_SOC_SOF_OF) += snd-sof-of.o
-obj-$(CONFIG_SND_SOC_SOF_PCI) += snd-sof-pci.o
+obj-$(CONFIG_SND_SOC_SOF_PCI_DEV) += snd-sof-pci.o
 
 obj-$(CONFIG_SND_SOC_SOF_INTEL_TOPLEVEL) += intel/
 obj-$(CONFIG_SND_SOC_SOF_IMX_TOPLEVEL) += imx/
index 6d8f7d9..4a3d522 100644 (file)
@@ -399,7 +399,13 @@ int snd_sof_device_shutdown(struct device *dev)
 {
        struct snd_sof_dev *sdev = dev_get_drvdata(dev);
 
-       return snd_sof_shutdown(sdev);
+       if (IS_ENABLED(CONFIG_SND_SOC_SOF_PROBE_WORK_QUEUE))
+               cancel_work_sync(&sdev->probe_work);
+
+       if (sdev->fw_state == SOF_FW_BOOT_COMPLETE)
+               return snd_sof_shutdown(sdev);
+
+       return 0;
 }
 EXPORT_SYMBOL(snd_sof_device_shutdown);
 
index 4797a1c..da1c396 100644 (file)
@@ -9,31 +9,6 @@ config SND_SOC_SOF_INTEL_TOPLEVEL
 
 if SND_SOC_SOF_INTEL_TOPLEVEL
 
-config SND_SOC_SOF_INTEL_ACPI
-       def_tristate SND_SOC_SOF_ACPI
-       select SND_SOC_SOF_BAYTRAIL  if SND_SOC_SOF_BAYTRAIL_SUPPORT
-       select SND_SOC_SOF_BROADWELL if SND_SOC_SOF_BROADWELL_SUPPORT
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_INTEL_PCI
-       def_tristate SND_SOC_SOF_PCI
-       select SND_SOC_SOF_MERRIFIELD  if SND_SOC_SOF_MERRIFIELD_SUPPORT
-       select SND_SOC_SOF_APOLLOLAKE  if SND_SOC_SOF_APOLLOLAKE_SUPPORT
-       select SND_SOC_SOF_GEMINILAKE  if SND_SOC_SOF_GEMINILAKE_SUPPORT
-       select SND_SOC_SOF_CANNONLAKE  if SND_SOC_SOF_CANNONLAKE_SUPPORT
-       select SND_SOC_SOF_COFFEELAKE  if SND_SOC_SOF_COFFEELAKE_SUPPORT
-       select SND_SOC_SOF_ICELAKE     if SND_SOC_SOF_ICELAKE_SUPPORT
-       select SND_SOC_SOF_COMETLAKE   if SND_SOC_SOF_COMETLAKE_SUPPORT
-       select SND_SOC_SOF_TIGERLAKE   if SND_SOC_SOF_TIGERLAKE_SUPPORT
-       select SND_SOC_SOF_ELKHARTLAKE if SND_SOC_SOF_ELKHARTLAKE_SUPPORT
-       select SND_SOC_SOF_JASPERLAKE  if SND_SOC_SOF_JASPERLAKE_SUPPORT
-       select SND_SOC_SOF_ALDERLAKE   if SND_SOC_SOF_ALDERLAKE_SUPPORT
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
 config SND_SOC_SOF_INTEL_HIFI_EP_IPC
        tristate
        help
@@ -50,18 +25,25 @@ config SND_SOC_SOF_INTEL_ATOM_HIFI_EP
 
 config SND_SOC_SOF_INTEL_COMMON
        tristate
+       select SND_SOC_SOF
        select SND_SOC_ACPI_INTEL_MATCH
        select SND_SOC_SOF_XTENSA
        select SND_SOC_INTEL_MACH
        select SND_SOC_ACPI if ACPI
+       select SND_INTEL_DSP_CONFIG
        help
          This option is not user-selectable but automagically handled by
          'select' statements at a higher level.
 
-if SND_SOC_SOF_INTEL_ACPI
+if SND_SOC_SOF_ACPI
 
-config SND_SOC_SOF_BAYTRAIL_SUPPORT
-       bool "SOF support for Baytrail, Braswell and Cherrytrail"
+config SND_SOC_SOF_BAYTRAIL
+       tristate "SOF support for Baytrail, Braswell and Cherrytrail"
+       default SND_SOC_SOF_ACPI
+       select SND_SOC_SOF_INTEL_COMMON
+       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
+       select SND_SOC_SOF_ACPI_DEV
+       select IOSF_MBI if X86 && PCI
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Baytrail, Braswell or Cherrytrail processors.
@@ -75,17 +57,12 @@ config SND_SOC_SOF_BAYTRAIL_SUPPORT
          Say Y if you want to enable SOF on Baytrail/Cherrytrail.
          If unsure select "N".
 
-config SND_SOC_SOF_BAYTRAIL
-       tristate
-       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
-       select SND_INTEL_DSP_CONFIG
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_BROADWELL_SUPPORT
-       bool "SOF support for Broadwell"
-       select SND_INTEL_DSP_CONFIG
+config SND_SOC_SOF_BROADWELL
+       tristate "SOF support for Broadwell"
+       default SND_SOC_SOF_ACPI
+       select SND_SOC_SOF_INTEL_COMMON
+       select SND_SOC_SOF_INTEL_HIFI_EP_IPC
+       select SND_SOC_SOF_ACPI_DEV
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Broadwell processors.
@@ -100,197 +77,143 @@ config SND_SOC_SOF_BROADWELL_SUPPORT
          Say Y if you want to enable SOF on Broadwell.
          If unsure select "N".
 
-config SND_SOC_SOF_BROADWELL
-       tristate
-       select SND_SOC_SOF_INTEL_COMMON
-       select SND_SOC_SOF_INTEL_HIFI_EP_IPC
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-endif ## SND_SOC_SOF_INTEL_ACPI
+endif ## SND_SOC_SOF_ACPI
 
-if SND_SOC_SOF_INTEL_PCI
+if SND_SOC_SOF_PCI
 
-config SND_SOC_SOF_MERRIFIELD_SUPPORT
-       bool "SOF support for Tangier/Merrifield"
+config SND_SOC_SOF_MERRIFIELD
+       tristate "SOF support for Tangier/Merrifield"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Tangier/Merrifield processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_MERRIFIELD
+config SND_SOC_SOF_INTEL_APL
        tristate
-       select SND_SOC_SOF_INTEL_ATOM_HIFI_EP
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
+       select SND_SOC_SOF_HDA_COMMON
 
-config SND_SOC_SOF_APOLLOLAKE_SUPPORT
-       bool "SOF support for Apollolake"
+config SND_SOC_SOF_APOLLOLAKE
+       tristate "SOF support for Apollolake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_APL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Apollolake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_APOLLOLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_GEMINILAKE_SUPPORT
-       bool "SOF support for GeminiLake"
+config SND_SOC_SOF_GEMINILAKE
+       tristate "SOF support for GeminiLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_APL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Geminilake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_GEMINILAKE
+config SND_SOC_SOF_INTEL_CNL
        tristate
        select SND_SOC_SOF_HDA_COMMON
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
+       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
 
-config SND_SOC_SOF_CANNONLAKE_SUPPORT
-       bool "SOF support for Cannonlake"
+config SND_SOC_SOF_CANNONLAKE
+       tristate "SOF support for Cannonlake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_CNL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Cannonlake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_CANNONLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_COFFEELAKE_SUPPORT
-       bool "SOF support for CoffeeLake"
+config SND_SOC_SOF_COFFEELAKE
+       tristate "SOF support for CoffeeLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_CNL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Coffeelake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_COFFEELAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_ICELAKE_SUPPORT
-       bool "SOF support for Icelake"
+config SND_SOC_SOF_COMETLAKE
+       tristate "SOF support for CometLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_CNL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the Icelake processors.
-         Say Y if you have such a device.
+         using the Cometlake processors.
          If unsure select "N".
 
-config SND_SOC_SOF_ICELAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_COMETLAKE
+config SND_SOC_SOF_INTEL_ICL
        tristate
        select SND_SOC_SOF_HDA_COMMON
        select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_COMETLAKE_SUPPORT
-       bool
 
-config SND_SOC_SOF_COMETLAKE_LP_SUPPORT
-       bool "SOF support for CometLake"
-       select SND_SOC_SOF_COMETLAKE_SUPPORT
+config SND_SOC_SOF_ICELAKE
+       tristate "SOF support for Icelake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_ICL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the Cometlake processors.
+         using the Icelake processors.
+         Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_TIGERLAKE_SUPPORT
-       bool "SOF support for Tigerlake"
+config SND_SOC_SOF_JASPERLAKE
+       tristate "SOF support for JasperLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_ICL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the Tigerlake processors.
+         using the JasperLake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_TIGERLAKE
+config SND_SOC_SOF_INTEL_TGL
        tristate
        select SND_SOC_SOF_HDA_COMMON
        select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
 
-config SND_SOC_SOF_ELKHARTLAKE_SUPPORT
-       bool "SOF support for ElkhartLake"
+config SND_SOC_SOF_TIGERLAKE
+       tristate "SOF support for Tigerlake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_TGL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the ElkhartLake processors.
+         using the Tigerlake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
 config SND_SOC_SOF_ELKHARTLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_JASPERLAKE_SUPPORT
-       bool "SOF support for JasperLake"
+       tristate "SOF support for ElkhartLake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_TGL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
-         using the JasperLake processors.
+         using the ElkhartLake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_JASPERLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
-
-config SND_SOC_SOF_ALDERLAKE_SUPPORT
-       bool "SOF support for Alderlake"
+config SND_SOC_SOF_ALDERLAKE
+       tristate "SOF support for Alderlake"
+       default SND_SOC_SOF_PCI
+       select SND_SOC_SOF_INTEL_TGL
        help
          This adds support for Sound Open Firmware for Intel(R) platforms
          using the Alderlake processors.
          Say Y if you have such a device.
          If unsure select "N".
 
-config SND_SOC_SOF_ALDERLAKE
-       tristate
-       select SND_SOC_SOF_HDA_COMMON
-       select SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level
-
 config SND_SOC_SOF_HDA_COMMON
        tristate
-       select SND_INTEL_DSP_CONFIG
        select SND_SOC_SOF_INTEL_COMMON
+       select SND_SOC_SOF_PCI_DEV
+       select SND_INTEL_DSP_CONFIG
        select SND_SOC_SOF_HDA_LINK_BASELINE
        help
          This option is not user-selectable but automagically handled by
@@ -353,29 +276,22 @@ config SND_SOC_SOF_HDA
          This option is not user-selectable but automagically handled by
          'select' statements at a higher level.
 
-config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK
-       bool "SOF support for SoundWire"
-       depends on ACPI
-       help
-         This adds support for SoundWire with Sound Open Firmware
-         for Intel(R) platforms.
-         Say Y if you want to enable SoundWire links with SOF.
-         If unsure select "N".
-
 config SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
        tristate
-       select SND_SOC_SOF_INTEL_SOUNDWIRE if SND_SOC_SOF_INTEL_SOUNDWIRE_LINK
-       help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
 
 config SND_SOC_SOF_INTEL_SOUNDWIRE
-       tristate
-       select SOUNDWIRE
+       tristate "SOF support for SoundWire"
+       default SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
+       depends on SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE
+       depends on ACPI && SOUNDWIRE
+       depends on !(SOUNDWIRE=m && SND_SOC_SOF_INTEL_SOUNDWIRE_LINK_BASELINE=y)
        select SOUNDWIRE_INTEL
+       select SND_INTEL_SOUNDWIRE_ACPI
        help
-         This option is not user-selectable but automagically handled by
-         'select' statements at a higher level.
+         This adds support for SoundWire with Sound Open Firmware
+         for Intel(R) platforms.
+         Say Y if you want to enable SoundWire links with SOF.
+         If unsure select "N".
 
 endif ## SND_SOC_SOF_INTEL_PCI
 
index 2589111..f3d6f70 100644 (file)
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
 
-snd-sof-intel-byt-objs := byt.o
-snd-sof-intel-bdw-objs := bdw.o
+snd-sof-acpi-intel-byt-objs := byt.o
+snd-sof-acpi-intel-bdw-objs := bdw.o
 
 snd-sof-intel-ipc-objs := intel-ipc.o
 
@@ -13,8 +13,20 @@ snd-sof-intel-hda-common-$(CONFIG_SND_SOC_SOF_HDA_PROBES) += hda-compress.o
 
 snd-sof-intel-hda-objs := hda-codec.o
 
-obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-intel-byt.o
-obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-intel-bdw.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_ATOM_HIFI_EP) += snd-sof-acpi-intel-byt.o
+obj-$(CONFIG_SND_SOC_SOF_BROADWELL) += snd-sof-acpi-intel-bdw.o
 obj-$(CONFIG_SND_SOC_SOF_INTEL_HIFI_EP_IPC) += snd-sof-intel-ipc.o
 obj-$(CONFIG_SND_SOC_SOF_HDA_COMMON) += snd-sof-intel-hda-common.o
 obj-$(CONFIG_SND_SOC_SOF_HDA) += snd-sof-intel-hda.o
+
+snd-sof-pci-intel-tng-objs := pci-tng.o
+snd-sof-pci-intel-apl-objs := pci-apl.o
+snd-sof-pci-intel-cnl-objs := pci-cnl.o
+snd-sof-pci-intel-icl-objs := pci-icl.o
+snd-sof-pci-intel-tgl-objs := pci-tgl.o
+
+obj-$(CONFIG_SND_SOC_SOF_MERRIFIELD) += snd-sof-pci-intel-tng.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_APL) += snd-sof-pci-intel-apl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_CNL) += snd-sof-pci-intel-cnl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_ICL) += snd-sof-pci-intel-icl.o
+obj-$(CONFIG_SND_SOC_SOF_INTEL_TGL) += snd-sof-pci-intel-tgl.o
index fc29b91..c7ed2b3 100644 (file)
@@ -27,9 +27,10 @@ static const struct snd_sof_debugfs_map apl_dsp_debugfs[] = {
 
 /* apollolake ops */
 const struct snd_sof_dsp_ops sof_apl_ops = {
-       /* probe and remove */
+       /* probe/remove/shutdown */
        .probe          = hda_dsp_probe,
        .remove         = hda_dsp_remove,
+       .shutdown       = hda_dsp_shutdown,
 
        /* Register IO */
        .write          = sof_io_write,
index 50a4a73..fd5ae62 100644 (file)
 #include <linux/module.h>
 #include <sound/sof.h>
 #include <sound/sof/xtensa.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/intel-dsp-config.h>
 #include "../ops.h"
 #include "shim.h"
+#include "../sof-acpi-dev.h"
 #include "../sof-audio.h"
 
 /* BARs */
@@ -590,7 +594,7 @@ static struct snd_soc_dai_driver bdw_dai[] = {
 };
 
 /* broadwell ops */
-const struct snd_sof_dsp_ops sof_bdw_ops = {
+static const struct snd_sof_dsp_ops sof_bdw_ops = {
        /*Device init */
        .probe          = bdw_probe,
 
@@ -651,14 +655,69 @@ const struct snd_sof_dsp_ops sof_bdw_ops = {
 
        .arch_ops = &sof_xtensa_arch_ops,
 };
-EXPORT_SYMBOL_NS(sof_bdw_ops, SND_SOC_SOF_BROADWELL);
 
-const struct sof_intel_dsp_desc bdw_chip_info = {
+static const struct sof_intel_dsp_desc bdw_chip_info = {
        .cores_num = 1,
        .host_managed_cores_mask = 1,
 };
-EXPORT_SYMBOL_NS(bdw_chip_info, SND_SOC_SOF_BROADWELL);
+
+static const struct sof_dev_desc sof_acpi_broadwell_desc = {
+       .machines = snd_soc_acpi_intel_broadwell_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = -1,
+       .irqindex_host_ipc = 0,
+       .chip_info = &bdw_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-bdw.ri",
+       .nocodec_tplg_filename = "sof-bdw-nocodec.tplg",
+       .ops = &sof_bdw_ops,
+};
+
+static const struct acpi_device_id sof_broadwell_match[] = {
+       { "INT3438", (unsigned long)&sof_acpi_broadwell_desc },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, sof_broadwell_match);
+
+static int sof_broadwell_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       const struct acpi_device_id *id;
+       const struct sof_dev_desc *desc;
+       int ret;
+
+       id = acpi_match_device(dev->driver->acpi_match_table, dev);
+       if (!id)
+               return -ENODEV;
+
+       ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
+       if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+               dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
+               return -ENODEV;
+       }
+
+       desc = device_get_match_data(dev);
+       if (!desc)
+               return -ENODEV;
+
+       return sof_acpi_probe(pdev, device_get_match_data(dev));
+}
+
+/* acpi_driver definition */
+static struct platform_driver snd_sof_acpi_intel_bdw_driver = {
+       .probe = sof_broadwell_probe,
+       .remove = sof_acpi_remove,
+       .driver = {
+               .name = "sof-audio-acpi-intel-bdw",
+               .pm = &sof_acpi_pm,
+               .acpi_match_table = sof_broadwell_match,
+       },
+};
+module_platform_driver(snd_sof_acpi_intel_bdw_driver);
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
index 19260db..2846fde 100644 (file)
 #include <linux/module.h>
 #include <sound/sof.h>
 #include <sound/sof/xtensa.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/intel-dsp-config.h>
 #include "../ops.h"
 #include "shim.h"
+#include "../sof-acpi-dev.h"
 #include "../sof-audio.h"
 #include "../../intel/common/soc-intel-quirks.h"
 
@@ -822,7 +826,7 @@ irq:
 }
 
 /* baytrail ops */
-const struct snd_sof_dsp_ops sof_byt_ops = {
+static const struct snd_sof_dsp_ops sof_byt_ops = {
        /* device init */
        .probe          = byt_acpi_probe,
        .remove         = byt_remove,
@@ -892,16 +896,14 @@ const struct snd_sof_dsp_ops sof_byt_ops = {
 
        .arch_ops = &sof_xtensa_arch_ops,
 };
-EXPORT_SYMBOL_NS(sof_byt_ops, SND_SOC_SOF_BAYTRAIL);
 
-const struct sof_intel_dsp_desc byt_chip_info = {
+static const struct sof_intel_dsp_desc byt_chip_info = {
        .cores_num = 1,
        .host_managed_cores_mask = 1,
 };
-EXPORT_SYMBOL_NS(byt_chip_info, SND_SOC_SOF_BAYTRAIL);
 
 /* cherrytrail and braswell ops */
-const struct snd_sof_dsp_ops sof_cht_ops = {
+static const struct snd_sof_dsp_ops sof_cht_ops = {
        /* device init */
        .probe          = byt_acpi_probe,
        .remove         = byt_remove,
@@ -972,16 +974,104 @@ const struct snd_sof_dsp_ops sof_cht_ops = {
 
        .arch_ops = &sof_xtensa_arch_ops,
 };
-EXPORT_SYMBOL_NS(sof_cht_ops, SND_SOC_SOF_BAYTRAIL);
 
-const struct sof_intel_dsp_desc cht_chip_info = {
+static const struct sof_intel_dsp_desc cht_chip_info = {
        .cores_num = 1,
        .host_managed_cores_mask = 1,
 };
-EXPORT_SYMBOL_NS(cht_chip_info, SND_SOC_SOF_BAYTRAIL);
+
+/* BYTCR uses different IRQ index */
+static const struct sof_dev_desc sof_acpi_baytrailcr_desc = {
+       .machines = snd_soc_acpi_intel_baytrail_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = 2,
+       .irqindex_host_ipc = 0,
+       .chip_info = &byt_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-byt.ri",
+       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
+       .ops = &sof_byt_ops,
+};
+
+static const struct sof_dev_desc sof_acpi_baytrail_desc = {
+       .machines = snd_soc_acpi_intel_baytrail_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = 2,
+       .irqindex_host_ipc = 5,
+       .chip_info = &byt_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-byt.ri",
+       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
+       .ops = &sof_byt_ops,
+};
+
+static const struct sof_dev_desc sof_acpi_cherrytrail_desc = {
+       .machines = snd_soc_acpi_intel_cherrytrail_machines,
+       .resindex_lpe_base = 0,
+       .resindex_pcicfg_base = 1,
+       .resindex_imr_base = 2,
+       .irqindex_host_ipc = 5,
+       .chip_info = &cht_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cht.ri",
+       .nocodec_tplg_filename = "sof-cht-nocodec.tplg",
+       .ops = &sof_cht_ops,
+};
+
+static const struct acpi_device_id sof_baytrail_match[] = {
+       { "80860F28", (unsigned long)&sof_acpi_baytrail_desc },
+       { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc },
+       { }
+};
+MODULE_DEVICE_TABLE(acpi, sof_baytrail_match);
+
+static int sof_baytrail_probe(struct platform_device *pdev)
+{
+       struct device *dev = &pdev->dev;
+       const struct sof_dev_desc *desc;
+       const struct acpi_device_id *id;
+       int ret;
+
+       id = acpi_match_device(dev->driver->acpi_match_table, dev);
+       if (!id)
+               return -ENODEV;
+
+       ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
+       if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+               dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
+               return -ENODEV;
+       }
+
+       desc = device_get_match_data(&pdev->dev);
+       if (!desc)
+               return -ENODEV;
+
+       if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev))
+               desc = &sof_acpi_baytrailcr_desc;
+
+       return sof_acpi_probe(pdev, desc);
+}
+
+/* acpi_driver definition */
+static struct platform_driver snd_sof_acpi_intel_byt_driver = {
+       .probe = sof_baytrail_probe,
+       .remove = sof_acpi_remove,
+       .driver = {
+               .name = "sof-audio-acpi-intel-byt",
+               .pm = &sof_acpi_pm,
+               .acpi_match_table = sof_baytrail_match,
+       },
+};
+module_platform_driver(snd_sof_acpi_intel_byt_driver);
 
 #endif /* CONFIG_SND_SOC_SOF_BAYTRAIL */
 
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HIFI_EP_IPC);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_SOC_SOF_ACPI_DEV);
index e38db51..821f25f 100644 (file)
@@ -232,9 +232,10 @@ void cnl_ipc_dump(struct snd_sof_dev *sdev)
 
 /* cannonlake ops */
 const struct snd_sof_dsp_ops sof_cnl_ops = {
-       /* probe and remove */
+       /* probe/remove/shutdown */
        .probe          = hda_dsp_probe,
        .remove         = hda_dsp_remove,
+       .shutdown       = hda_dsp_shutdown,
 
        /* Register IO */
        .write          = sof_io_write,
@@ -349,22 +350,6 @@ const struct sof_intel_dsp_desc cnl_chip_info = {
 };
 EXPORT_SYMBOL_NS(cnl_chip_info, SND_SOC_SOF_INTEL_HDA_COMMON);
 
-const struct sof_intel_dsp_desc ehl_chip_info = {
-       /* Elkhartlake */
-       .cores_num = 4,
-       .init_core_mask = 1,
-       .host_managed_cores_mask = BIT(0),
-       .ipc_req = CNL_DSP_REG_HIPCIDR,
-       .ipc_req_mask = CNL_DSP_REG_HIPCIDR_BUSY,
-       .ipc_ack = CNL_DSP_REG_HIPCIDA,
-       .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
-       .ipc_ctl = CNL_DSP_REG_HIPCCTL,
-       .rom_init_timeout       = 300,
-       .ssp_count = ICL_SSP_COUNT,
-       .ssp_base_offset = CNL_SSP_BASE_OFFSET,
-};
-EXPORT_SYMBOL_NS(ehl_chip_info, SND_SOC_SOF_INTEL_HDA_COMMON);
-
 const struct sof_intel_dsp_desc jsl_chip_info = {
        /* Jasperlake */
        .cores_num = 2,
index 5788fe3..736a54b 100644 (file)
@@ -207,7 +207,7 @@ int hda_dsp_core_power_down(struct snd_sof_dev *sdev, unsigned int core_mask)
 
        ret = snd_sof_dsp_read_poll_timeout(sdev, HDA_DSP_BAR,
                                HDA_DSP_REG_ADSPCS, adspcs,
-                               !(adspcs & HDA_DSP_ADSPCS_SPA_MASK(core_mask)),
+                               !(adspcs & HDA_DSP_ADSPCS_CPA_MASK(core_mask)),
                                HDA_DSP_REG_POLL_INTERVAL_US,
                                HDA_DSP_PD_TIMEOUT * USEC_PER_MSEC);
        if (ret < 0)
@@ -226,10 +226,17 @@ bool hda_dsp_core_is_enabled(struct snd_sof_dev *sdev,
 
        val = snd_sof_dsp_read(sdev, HDA_DSP_BAR, HDA_DSP_REG_ADSPCS);
 
-       is_enable = (val & HDA_DSP_ADSPCS_CPA_MASK(core_mask)) &&
-                   (val & HDA_DSP_ADSPCS_SPA_MASK(core_mask)) &&
-                   !(val & HDA_DSP_ADSPCS_CRST_MASK(core_mask)) &&
-                   !(val & HDA_DSP_ADSPCS_CSTALL_MASK(core_mask));
+#define MASK_IS_EQUAL(v, m, field) ({  \
+       u32 _m = field(m);              \
+       ((v) & _m) == _m;               \
+})
+
+       is_enable = MASK_IS_EQUAL(val, core_mask, HDA_DSP_ADSPCS_CPA_MASK) &&
+               MASK_IS_EQUAL(val, core_mask, HDA_DSP_ADSPCS_SPA_MASK) &&
+               !(val & HDA_DSP_ADSPCS_CRST_MASK(core_mask)) &&
+               !(val & HDA_DSP_ADSPCS_CSTALL_MASK(core_mask));
+
+#undef MASK_IS_EQUAL
 
        dev_dbg(sdev->dev, "DSP core(s) enabled? %d : core_mask %x\n",
                is_enable, core_mask);
@@ -885,6 +892,12 @@ int hda_dsp_suspend(struct snd_sof_dev *sdev, u32 target_state)
        return snd_sof_dsp_set_power_state(sdev, &target_dsp_state);
 }
 
+int hda_dsp_shutdown(struct snd_sof_dev *sdev)
+{
+       sdev->system_suspend_target = SOF_SUSPEND_S3;
+       return snd_sof_suspend(sdev->dev);
+}
+
 int hda_dsp_set_hw_params_upon_resume(struct snd_sof_dev *sdev)
 {
 #if IS_ENABLED(CONFIG_SND_SOC_SOF_HDA)
index 0dc3a8c..0c096db 100644 (file)
 #include <linux/module.h>
 #include <linux/soundwire/sdw.h>
 #include <linux/soundwire/sdw_intel.h>
+#include <sound/intel-dsp-config.h>
 #include <sound/intel-nhlt.h>
 #include <sound/sof.h>
 #include <sound/sof/xtensa.h>
 #include "../sof-audio.h"
+#include "../sof-pci-dev.h"
 #include "../ops.h"
 #include "hda.h"
 
@@ -895,6 +897,7 @@ free_streams:
 /* dsp_unmap: not currently used */
        iounmap(sdev->bar[HDA_DSP_BAR]);
 hdac_bus_unmap:
+       platform_device_unregister(hdev->dmic_dev);
        iounmap(bus->remap_addr);
        hda_codec_i915_exit(sdev);
 err:
@@ -1258,8 +1261,24 @@ void hda_machine_select(struct snd_sof_dev *sdev)
                dev_warn(sdev->dev, "warning: No matching ASoC machine driver found\n");
 }
 
+int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
+{
+       int ret;
+
+       ret = snd_intel_dsp_driver_probe(pci);
+       if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
+               dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n");
+               return -ENODEV;
+       }
+
+       return sof_pci_probe(pci, pci_id);
+}
+EXPORT_SYMBOL_NS(hda_pci_intel_probe, SND_SOC_SOF_INTEL_HDA_COMMON);
+
 MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC);
 MODULE_IMPORT_NS(SND_SOC_SOF_HDA_AUDIO_CODEC_I915);
 MODULE_IMPORT_NS(SND_SOC_SOF_XTENSA);
+MODULE_IMPORT_NS(SND_INTEL_SOUNDWIRE_ACPI);
 MODULE_IMPORT_NS(SOUNDWIRE_INTEL_INIT);
index d1c38c3..ae80725 100644 (file)
@@ -517,6 +517,7 @@ int hda_dsp_resume(struct snd_sof_dev *sdev);
 int hda_dsp_runtime_suspend(struct snd_sof_dev *sdev);
 int hda_dsp_runtime_resume(struct snd_sof_dev *sdev);
 int hda_dsp_runtime_idle(struct snd_sof_dev *sdev);
+int hda_dsp_shutdown(struct snd_sof_dev *sdev);
 int hda_dsp_set_hw_params_upon_resume(struct snd_sof_dev *sdev);
 void hda_dsp_dump(struct snd_sof_dev *sdev, u32 flags);
 void hda_ipc_dump(struct snd_sof_dev *sdev);
@@ -764,4 +765,7 @@ void hda_machine_select(struct snd_sof_dev *sdev);
 void hda_set_mach_params(const struct snd_soc_acpi_mach *mach,
                         struct device *dev);
 
+/* PCI driver selection and probe */
+int hda_pci_intel_probe(struct pci_dev *pci, const struct pci_device_id *pci_id);
+
 #endif
index e9d5a0a..88a74be 100644 (file)
@@ -26,9 +26,10 @@ static const struct snd_sof_debugfs_map icl_dsp_debugfs[] = {
 
 /* Icelake ops */
 const struct snd_sof_dsp_ops sof_icl_ops = {
-       /* probe and remove */
+       /* probe/remove/shutdown */
        .probe          = hda_dsp_probe,
        .remove         = hda_dsp_remove,
+       .shutdown       = hda_dsp_shutdown,
 
        /* Register IO */
        .write          = sof_io_write,
diff --git a/sound/soc/sof/intel/pci-apl.c b/sound/soc/sof/intel/pci-apl.c
new file mode 100644 (file)
index 0000000..f89e746
--- /dev/null
@@ -0,0 +1,81 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc bxt_desc = {
+       .machines               = snd_soc_acpi_intel_bxt_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &apl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-apl.ri",
+       .nocodec_tplg_filename = "sof-apl-nocodec.tplg",
+       .ops = &sof_apl_ops,
+};
+
+static const struct sof_dev_desc glk_desc = {
+       .machines               = snd_soc_acpi_intel_glk_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &apl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-glk.ri",
+       .nocodec_tplg_filename = "sof-glk-nocodec.tplg",
+       .ops = &sof_apl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x5a98), /* BXT-P (ApolloLake) */
+               .driver_data = (unsigned long)&bxt_desc},
+       { PCI_DEVICE(0x8086, 0x1a98),/* BXT-T */
+               .driver_data = (unsigned long)&bxt_desc},
+       { PCI_DEVICE(0x8086, 0x3198), /* GeminiLake */
+               .driver_data = (unsigned long)&glk_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_apl_driver = {
+       .name = "sof-audio-pci-intel-apl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_apl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-cnl.c b/sound/soc/sof/intel/pci-cnl.c
new file mode 100644 (file)
index 0000000..f23257a
--- /dev/null
@@ -0,0 +1,104 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc cnl_desc = {
+       .machines               = snd_soc_acpi_intel_cnl_machines,
+       .alt_machines           = snd_soc_acpi_intel_cnl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &cnl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cnl.ri",
+       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc cfl_desc = {
+       .machines               = snd_soc_acpi_intel_cfl_machines,
+       .alt_machines           = snd_soc_acpi_intel_cfl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &cnl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cfl.ri",
+       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+static const struct sof_dev_desc cml_desc = {
+       .machines               = snd_soc_acpi_intel_cml_machines,
+       .alt_machines           = snd_soc_acpi_intel_cml_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &cnl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-cml.ri",
+       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x9dc8), /* CNL-LP */
+               .driver_data = (unsigned long)&cnl_desc},
+       { PCI_DEVICE(0x8086, 0xa348), /* CNL-H */
+               .driver_data = (unsigned long)&cfl_desc},
+       { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */
+               .driver_data = (unsigned long)&cml_desc},
+       { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */
+               .driver_data = (unsigned long)&cml_desc},
+       { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */
+               .driver_data = (unsigned long)&cml_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_cnl_driver = {
+       .name = "sof-audio-pci-intel-cnl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_cnl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-icl.c b/sound/soc/sof/intel/pci-icl.c
new file mode 100644 (file)
index 0000000..2f60c28
--- /dev/null
@@ -0,0 +1,84 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc icl_desc = {
+       .machines               = snd_soc_acpi_intel_icl_machines,
+       .alt_machines           = snd_soc_acpi_intel_icl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &icl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-icl.ri",
+       .nocodec_tplg_filename = "sof-icl-nocodec.tplg",
+       .ops = &sof_icl_ops,
+};
+
+static const struct sof_dev_desc jsl_desc = {
+       .machines               = snd_soc_acpi_intel_jsl_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &jsl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-jsl.ri",
+       .nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
+       .ops = &sof_cnl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */
+               .driver_data = (unsigned long)&icl_desc},
+       { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */
+               .driver_data = (unsigned long)&icl_desc},
+       { PCI_DEVICE(0x8086, 0x38c8), /* ICL-N */
+               .driver_data = (unsigned long)&jsl_desc},
+       { PCI_DEVICE(0x8086, 0x4dc8), /* JSL-N */
+               .driver_data = (unsigned long)&jsl_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_icl_driver = {
+       .name = "sof-audio-pci-intel-icl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_icl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
diff --git a/sound/soc/sof/intel/pci-tgl.c b/sound/soc/sof/intel/pci-tgl.c
new file mode 100644 (file)
index 0000000..38bc353
--- /dev/null
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "hda.h"
+
+static const struct sof_dev_desc tgl_desc = {
+       .machines               = snd_soc_acpi_intel_tgl_machines,
+       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &tgl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-tgl.ri",
+       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc tglh_desc = {
+       .machines               = snd_soc_acpi_intel_tgl_machines,
+       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &tglh_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-tgl-h.ri",
+       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc ehl_desc = {
+       .machines               = snd_soc_acpi_intel_ehl_machines,
+       .use_acpi_target_states = true,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &ehl_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-ehl.ri",
+       .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+static const struct sof_dev_desc adls_desc = {
+       .machines               = snd_soc_acpi_intel_adl_machines,
+       .alt_machines           = snd_soc_acpi_intel_adl_sdw_machines,
+       .resindex_lpe_base      = 0,
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = -1,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &adls_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-adl-s.ri",
+       .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
+       .ops = &sof_tgl_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */
+               .driver_data = (unsigned long)&tgl_desc},
+       { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */
+               .driver_data = (unsigned long)&tglh_desc},
+       { PCI_DEVICE(0x8086, 0x4b55), /* EHL */
+               .driver_data = (unsigned long)&ehl_desc},
+       { PCI_DEVICE(0x8086, 0x4b58), /* EHL */
+               .driver_data = (unsigned long)&ehl_desc},
+       { PCI_DEVICE(0x8086, 0x7ad0), /* ADL-S */
+               .driver_data = (unsigned long)&adls_desc},
+       { PCI_DEVICE(0x8086, 0x51c8), /* ADL-P */
+               .driver_data = (unsigned long)&tgl_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_tgl_driver = {
+       .name = "sof-audio-pci-intel-tgl",
+       .id_table = sof_pci_ids,
+       .probe = hda_pci_intel_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_tgl_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
+
diff --git a/sound/soc/sof/intel/pci-tng.c b/sound/soc/sof/intel/pci-tng.c
new file mode 100644 (file)
index 0000000..94b9704
--- /dev/null
@@ -0,0 +1,70 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause)
+//
+// This file is provided under a dual BSD/GPLv2 license.  When using or
+// redistributing this file, you may do so under either license.
+//
+// Copyright(c) 2018-2021 Intel Corporation. All rights reserved.
+//
+// Author: Liam Girdwood <liam.r.girdwood@linux.intel.com>
+//
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <sound/soc-acpi.h>
+#include <sound/soc-acpi-intel-match.h>
+#include <sound/sof.h>
+#include "../ops.h"
+#include "../sof-pci-dev.h"
+
+/* platform specific devices */
+#include "shim.h"
+
+static struct snd_soc_acpi_mach sof_tng_machines[] = {
+       {
+               .id = "INT343A",
+               .drv_name = "edison",
+               .sof_fw_filename = "sof-byt.ri",
+               .sof_tplg_filename = "sof-byt.tplg",
+       },
+       {}
+};
+
+static const struct sof_dev_desc tng_desc = {
+       .machines               = sof_tng_machines,
+       .resindex_lpe_base      = 3,    /* IRAM, but subtract IRAM offset */
+       .resindex_pcicfg_base   = -1,
+       .resindex_imr_base      = 0,
+       .irqindex_host_ipc      = -1,
+       .resindex_dma_base      = -1,
+       .chip_info = &tng_chip_info,
+       .default_fw_path = "intel/sof",
+       .default_tplg_path = "intel/sof-tplg",
+       .default_fw_filename = "sof-byt.ri",
+       .nocodec_tplg_filename = "sof-byt.tplg",
+       .ops = &sof_tng_ops,
+};
+
+/* PCI IDs */
+static const struct pci_device_id sof_pci_ids[] = {
+       { PCI_DEVICE(0x8086, 0x119a),
+               .driver_data = (unsigned long)&tng_desc},
+       { 0, }
+};
+MODULE_DEVICE_TABLE(pci, sof_pci_ids);
+
+/* pci_driver definition */
+static struct pci_driver snd_sof_pci_intel_tng_driver = {
+       .name = "sof-audio-pci-intel-tng",
+       .id_table = sof_pci_ids,
+       .probe = sof_pci_probe,
+       .remove = sof_pci_remove,
+       .shutdown = sof_pci_shutdown,
+       .driver = {
+               .pm = &sof_pci_pm,
+       },
+};
+module_pci_driver(snd_sof_pci_intel_tng_driver);
+
+MODULE_LICENSE("Dual BSD/GPL");
+MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD);
+MODULE_IMPORT_NS(SND_SOC_SOF_PCI_DEV);
index 1e0afb5..529f68d 100644 (file)
@@ -167,13 +167,7 @@ struct sof_intel_dsp_desc {
 };
 
 extern const struct snd_sof_dsp_ops sof_tng_ops;
-extern const struct snd_sof_dsp_ops sof_byt_ops;
-extern const struct snd_sof_dsp_ops sof_cht_ops;
-extern const struct snd_sof_dsp_ops sof_bdw_ops;
 
-extern const struct sof_intel_dsp_desc byt_chip_info;
-extern const struct sof_intel_dsp_desc cht_chip_info;
-extern const struct sof_intel_dsp_desc bdw_chip_info;
 extern const struct sof_intel_dsp_desc tng_chip_info;
 
 struct sof_intel_stream {
index 419f05b..54ba1b8 100644 (file)
@@ -25,7 +25,7 @@ const struct snd_sof_dsp_ops sof_tgl_ops = {
        /* probe/remove/shutdown */
        .probe          = hda_dsp_probe,
        .remove         = hda_dsp_remove,
-       .shutdown       = hda_dsp_remove,
+       .shutdown       = hda_dsp_shutdown,
 
        /* Register IO */
        .write          = sof_io_write,
@@ -156,6 +156,22 @@ const struct sof_intel_dsp_desc tglh_chip_info = {
 };
 EXPORT_SYMBOL_NS(tglh_chip_info, SND_SOC_SOF_INTEL_HDA_COMMON);
 
+const struct sof_intel_dsp_desc ehl_chip_info = {
+       /* Elkhartlake */
+       .cores_num = 4,
+       .init_core_mask = 1,
+       .host_managed_cores_mask = BIT(0),
+       .ipc_req = CNL_DSP_REG_HIPCIDR,
+       .ipc_req_mask = CNL_DSP_REG_HIPCIDR_BUSY,
+       .ipc_ack = CNL_DSP_REG_HIPCIDA,
+       .ipc_ack_mask = CNL_DSP_REG_HIPCIDA_DONE,
+       .ipc_ctl = CNL_DSP_REG_HIPCCTL,
+       .rom_init_timeout       = 300,
+       .ssp_count = ICL_SSP_COUNT,
+       .ssp_base_offset = CNL_SSP_BASE_OFFSET,
+};
+EXPORT_SYMBOL_NS(ehl_chip_info, SND_SOC_SOF_INTEL_HDA_COMMON);
+
 const struct sof_intel_dsp_desc adls_chip_info = {
        /* Alderlake-S */
        .cores_num = 2,
index cc2e257..1fec042 100644 (file)
 #include <linux/firmware.h>
 #include <linux/module.h>
 #include <linux/pm_runtime.h>
-#include <sound/intel-dsp-config.h>
 #include <sound/soc-acpi.h>
 #include <sound/soc-acpi-intel-match.h>
 #include <sound/sof.h>
 #include "../intel/common/soc-intel-quirks.h"
 #include "ops.h"
+#include "sof-acpi-dev.h"
 
 /* platform specific devices */
 #include "intel/shim.h"
@@ -36,74 +36,12 @@ MODULE_PARM_DESC(sof_acpi_debug, "SOF ACPI debug options (0x0 all off)");
 
 #define SOF_ACPI_DISABLE_PM_RUNTIME BIT(0)
 
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL)
-static const struct sof_dev_desc sof_acpi_broadwell_desc = {
-       .machines = snd_soc_acpi_intel_broadwell_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = -1,
-       .irqindex_host_ipc = 0,
-       .chip_info = &bdw_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-bdw.ri",
-       .nocodec_tplg_filename = "sof-bdw-nocodec.tplg",
-       .ops = &sof_bdw_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-
-/* BYTCR uses different IRQ index */
-static const struct sof_dev_desc sof_acpi_baytrailcr_desc = {
-       .machines = snd_soc_acpi_intel_baytrail_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = 2,
-       .irqindex_host_ipc = 0,
-       .chip_info = &byt_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-byt.ri",
-       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
-       .ops = &sof_byt_ops,
-};
-
-static const struct sof_dev_desc sof_acpi_baytrail_desc = {
-       .machines = snd_soc_acpi_intel_baytrail_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = 2,
-       .irqindex_host_ipc = 5,
-       .chip_info = &byt_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-byt.ri",
-       .nocodec_tplg_filename = "sof-byt-nocodec.tplg",
-       .ops = &sof_byt_ops,
-};
-
-static const struct sof_dev_desc sof_acpi_cherrytrail_desc = {
-       .machines = snd_soc_acpi_intel_cherrytrail_machines,
-       .resindex_lpe_base = 0,
-       .resindex_pcicfg_base = 1,
-       .resindex_imr_base = 2,
-       .irqindex_host_ipc = 5,
-       .chip_info = &cht_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cht.ri",
-       .nocodec_tplg_filename = "sof-cht-nocodec.tplg",
-       .ops = &sof_cht_ops,
-};
-
-#endif
-
-static const struct dev_pm_ops sof_acpi_pm = {
+const struct dev_pm_ops sof_acpi_pm = {
        SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume)
        SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume,
                           snd_sof_runtime_idle)
 };
+EXPORT_SYMBOL_NS(sof_acpi_pm, SND_SOC_SOF_ACPI_DEV);
 
 static void sof_acpi_probe_complete(struct device *dev)
 {
@@ -118,41 +56,19 @@ static void sof_acpi_probe_complete(struct device *dev)
        pm_runtime_enable(dev);
 }
 
-static int sof_acpi_probe(struct platform_device *pdev)
+int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc)
 {
        struct device *dev = &pdev->dev;
-       const struct acpi_device_id *id;
-       const struct sof_dev_desc *desc;
        struct snd_sof_pdata *sof_pdata;
        const struct snd_sof_dsp_ops *ops;
        int ret;
 
-       id = acpi_match_device(dev->driver->acpi_match_table, dev);
-       if (!id)
-               return -ENODEV;
-
-       if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) {
-               ret = snd_intel_acpi_dsp_driver_probe(dev, id->id);
-               if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
-                       dev_dbg(dev, "SOF ACPI driver not selected, aborting probe\n");
-                       return -ENODEV;
-               }
-       }
        dev_dbg(dev, "ACPI DSP detected");
 
        sof_pdata = devm_kzalloc(dev, sizeof(*sof_pdata), GFP_KERNEL);
        if (!sof_pdata)
                return -ENOMEM;
 
-       desc = device_get_match_data(dev);
-       if (!desc)
-               return -ENODEV;
-
-#if IS_ENABLED(CONFIG_ACPI) && IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-       if (desc == &sof_acpi_baytrail_desc && soc_intel_is_byt_cr(pdev))
-               desc = &sof_acpi_baytrailcr_desc;
-#endif
-
        /* get ops for platform */
        ops = desc->ops;
        if (!ops) {
@@ -194,44 +110,20 @@ static int sof_acpi_probe(struct platform_device *pdev)
 
        return ret;
 }
+EXPORT_SYMBOL_NS(sof_acpi_probe, SND_SOC_SOF_ACPI_DEV);
 
-static int sof_acpi_remove(struct platform_device *pdev)
+int sof_acpi_remove(struct platform_device *pdev)
 {
+       struct device *dev = &pdev->dev;
+
        if (!(sof_acpi_debug & SOF_ACPI_DISABLE_PM_RUNTIME))
-               pm_runtime_disable(&pdev->dev);
+               pm_runtime_disable(dev);
 
        /* call sof helper for DSP hardware remove */
-       snd_sof_device_remove(&pdev->dev);
+       snd_sof_device_remove(dev);
 
        return 0;
 }
-
-#ifdef CONFIG_ACPI
-static const struct acpi_device_id sof_acpi_match[] = {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_BROADWELL)
-       { "INT3438", (unsigned long)&sof_acpi_broadwell_desc },
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_BAYTRAIL)
-       { "80860F28", (unsigned long)&sof_acpi_baytrail_desc },
-       { "808622A8", (unsigned long)&sof_acpi_cherrytrail_desc },
-#endif
-       { }
-};
-MODULE_DEVICE_TABLE(acpi, sof_acpi_match);
-#endif
-
-/* acpi_driver definition */
-static struct platform_driver snd_sof_acpi_driver = {
-       .probe = sof_acpi_probe,
-       .remove = sof_acpi_remove,
-       .driver = {
-               .name = "sof-audio-acpi",
-               .pm = &sof_acpi_pm,
-               .acpi_match_table = ACPI_PTR(sof_acpi_match),
-       },
-};
-module_platform_driver(snd_sof_acpi_driver);
+EXPORT_SYMBOL_NS(sof_acpi_remove, SND_SOC_SOF_ACPI_DEV);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_IMPORT_NS(SND_SOC_SOF_BAYTRAIL);
-MODULE_IMPORT_NS(SND_SOC_SOF_BROADWELL);
diff --git a/sound/soc/sof/sof-acpi-dev.h b/sound/soc/sof/sof-acpi-dev.h
new file mode 100644 (file)
index 0000000..5c2b558
--- /dev/null
@@ -0,0 +1,16 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __SOUND_SOC_SOF_ACPI_H
+#define __SOUND_SOC_SOF_ACPI_H
+
+extern const struct dev_pm_ops sof_acpi_pm;
+int sof_acpi_probe(struct platform_device *pdev, const struct sof_dev_desc *desc);
+int sof_acpi_remove(struct platform_device *pdev);
+
+#endif
index fd1f0d8..b842a41 100644 (file)
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/pm_runtime.h>
-#include <sound/intel-dsp-config.h>
 #include <sound/soc-acpi.h>
 #include <sound/soc-acpi-intel-match.h>
 #include <sound/sof.h>
 #include "ops.h"
-
-/* platform specific devices */
-#include "intel/shim.h"
-#include "intel/hda.h"
+#include "sof-pci-dev.h"
 
 static char *fw_path;
 module_param(fw_path, charp, 0444);
@@ -81,243 +77,14 @@ static const struct dmi_system_id community_key_platforms[] = {
        {},
 };
 
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
-static const struct sof_dev_desc bxt_desc = {
-       .machines               = snd_soc_acpi_intel_bxt_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &apl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-apl.ri",
-       .nocodec_tplg_filename = "sof-apl-nocodec.tplg",
-       .ops = &sof_apl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
-static const struct sof_dev_desc glk_desc = {
-       .machines               = snd_soc_acpi_intel_glk_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &apl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-glk.ri",
-       .nocodec_tplg_filename = "sof-glk-nocodec.tplg",
-       .ops = &sof_apl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD)
-static struct snd_soc_acpi_mach sof_tng_machines[] = {
-       {
-               .id = "INT343A",
-               .drv_name = "edison",
-               .sof_fw_filename = "sof-byt.ri",
-               .sof_tplg_filename = "sof-byt.tplg",
-       },
-       {}
-};
-
-static const struct sof_dev_desc tng_desc = {
-       .machines               = sof_tng_machines,
-       .resindex_lpe_base      = 3,    /* IRAM, but subtract IRAM offset */
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = 0,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &tng_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-byt.ri",
-       .nocodec_tplg_filename = "sof-byt.tplg",
-       .ops = &sof_tng_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE)
-static const struct sof_dev_desc cnl_desc = {
-       .machines               = snd_soc_acpi_intel_cnl_machines,
-       .alt_machines           = snd_soc_acpi_intel_cnl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &cnl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cnl.ri",
-       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE)
-static const struct sof_dev_desc cfl_desc = {
-       .machines               = snd_soc_acpi_intel_cfl_machines,
-       .alt_machines           = snd_soc_acpi_intel_cfl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &cnl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cfl.ri",
-       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE)
-static const struct sof_dev_desc cml_desc = {
-       .machines               = snd_soc_acpi_intel_cml_machines,
-       .alt_machines           = snd_soc_acpi_intel_cml_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &cnl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-cml.ri",
-       .nocodec_tplg_filename = "sof-cnl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE)
-static const struct sof_dev_desc icl_desc = {
-       .machines               = snd_soc_acpi_intel_icl_machines,
-       .alt_machines           = snd_soc_acpi_intel_icl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &icl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-icl.ri",
-       .nocodec_tplg_filename = "sof-icl-nocodec.tplg",
-       .ops = &sof_icl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE) || IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-static const struct sof_dev_desc tgl_desc = {
-       .machines               = snd_soc_acpi_intel_tgl_machines,
-       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &tgl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-tgl.ri",
-       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
-       .ops = &sof_tgl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
-static const struct sof_dev_desc tglh_desc = {
-       .machines               = snd_soc_acpi_intel_tgl_machines,
-       .alt_machines           = snd_soc_acpi_intel_tgl_sdw_machines,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &tglh_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-tgl-h.ri",
-       .nocodec_tplg_filename = "sof-tgl-nocodec.tplg",
-       .ops = &sof_tgl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE)
-static const struct sof_dev_desc ehl_desc = {
-       .machines               = snd_soc_acpi_intel_ehl_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &ehl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-ehl.ri",
-       .nocodec_tplg_filename = "sof-ehl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
-static const struct sof_dev_desc jsl_desc = {
-       .machines               = snd_soc_acpi_intel_jsl_machines,
-       .use_acpi_target_states = true,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &jsl_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-jsl.ri",
-       .nocodec_tplg_filename = "sof-jsl-nocodec.tplg",
-       .ops = &sof_cnl_ops,
-};
-#endif
-
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-static const struct sof_dev_desc adls_desc = {
-       .machines               = snd_soc_acpi_intel_adl_machines,
-       .alt_machines           = snd_soc_acpi_intel_adl_sdw_machines,
-       .resindex_lpe_base      = 0,
-       .resindex_pcicfg_base   = -1,
-       .resindex_imr_base      = -1,
-       .irqindex_host_ipc      = -1,
-       .resindex_dma_base      = -1,
-       .chip_info = &adls_chip_info,
-       .default_fw_path = "intel/sof",
-       .default_tplg_path = "intel/sof-tplg",
-       .default_fw_filename = "sof-adl-s.ri",
-       .nocodec_tplg_filename = "sof-adl-nocodec.tplg",
-       .ops = &sof_tgl_ops,
-};
-#endif
-
-static const struct dev_pm_ops sof_pci_pm = {
+const struct dev_pm_ops sof_pci_pm = {
        .prepare = snd_sof_prepare,
        .complete = snd_sof_complete,
        SET_SYSTEM_SLEEP_PM_OPS(snd_sof_suspend, snd_sof_resume)
        SET_RUNTIME_PM_OPS(snd_sof_runtime_suspend, snd_sof_runtime_resume,
                           snd_sof_runtime_idle)
 };
+EXPORT_SYMBOL_NS(sof_pci_pm, SND_SOC_SOF_PCI_DEV);
 
 static void sof_pci_probe_complete(struct device *dev)
 {
@@ -343,8 +110,7 @@ static void sof_pci_probe_complete(struct device *dev)
        pm_runtime_put_noidle(dev);
 }
 
-static int sof_pci_probe(struct pci_dev *pci,
-                        const struct pci_device_id *pci_id)
+int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id)
 {
        struct device *dev = &pci->dev;
        const struct sof_dev_desc *desc =
@@ -353,13 +119,6 @@ static int sof_pci_probe(struct pci_dev *pci,
        const struct snd_sof_dsp_ops *ops;
        int ret;
 
-       if (IS_REACHABLE(CONFIG_SND_INTEL_DSP_CONFIG)) {
-               ret = snd_intel_dsp_driver_probe(pci);
-               if (ret != SND_INTEL_DSP_DRIVER_ANY && ret != SND_INTEL_DSP_DRIVER_SOF) {
-                       dev_dbg(&pci->dev, "SOF PCI driver not selected, aborting probe\n");
-                       return -ENODEV;
-               }
-       }
        dev_dbg(&pci->dev, "PCI DSP detected");
 
        /* get ops for platform */
@@ -447,8 +206,9 @@ release_regions:
 
        return ret;
 }
+EXPORT_SYMBOL_NS(sof_pci_probe, SND_SOC_SOF_PCI_DEV);
 
-static void sof_pci_remove(struct pci_dev *pci)
+void sof_pci_remove(struct pci_dev *pci)
 {
        /* call sof helper for DSP hardware remove */
        snd_sof_device_remove(&pci->dev);
@@ -461,94 +221,12 @@ static void sof_pci_remove(struct pci_dev *pci)
        /* release pci regions and disable device */
        pci_release_regions(pci);
 }
+EXPORT_SYMBOL_NS(sof_pci_remove, SND_SOC_SOF_PCI_DEV);
 
-static void sof_pci_shutdown(struct pci_dev *pci)
+void sof_pci_shutdown(struct pci_dev *pci)
 {
        snd_sof_device_shutdown(&pci->dev);
 }
-
-/* PCI IDs */
-static const struct pci_device_id sof_pci_ids[] = {
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_MERRIFIELD)
-       { PCI_DEVICE(0x8086, 0x119a),
-               .driver_data = (unsigned long)&tng_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_APOLLOLAKE)
-       /* BXT-P & Apollolake */
-       { PCI_DEVICE(0x8086, 0x5a98),
-               .driver_data = (unsigned long)&bxt_desc},
-       { PCI_DEVICE(0x8086, 0x1a98),
-               .driver_data = (unsigned long)&bxt_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_GEMINILAKE)
-       { PCI_DEVICE(0x8086, 0x3198),
-               .driver_data = (unsigned long)&glk_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_CANNONLAKE)
-       { PCI_DEVICE(0x8086, 0x9dc8),
-               .driver_data = (unsigned long)&cnl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COFFEELAKE)
-       { PCI_DEVICE(0x8086, 0xa348),
-               .driver_data = (unsigned long)&cfl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ICELAKE)
-       { PCI_DEVICE(0x8086, 0x34C8), /* ICL-LP */
-               .driver_data = (unsigned long)&icl_desc},
-       { PCI_DEVICE(0x8086, 0x3dc8), /* ICL-H */
-               .driver_data = (unsigned long)&icl_desc},
-
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_JASPERLAKE)
-       { PCI_DEVICE(0x8086, 0x38c8),
-               .driver_data = (unsigned long)&jsl_desc},
-       { PCI_DEVICE(0x8086, 0x4dc8),
-               .driver_data = (unsigned long)&jsl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_COMETLAKE)
-       { PCI_DEVICE(0x8086, 0x02c8), /* CML-LP */
-               .driver_data = (unsigned long)&cml_desc},
-       { PCI_DEVICE(0x8086, 0x06c8), /* CML-H */
-               .driver_data = (unsigned long)&cml_desc},
-       { PCI_DEVICE(0x8086, 0xa3f0), /* CML-S */
-               .driver_data = (unsigned long)&cml_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_TIGERLAKE)
-       { PCI_DEVICE(0x8086, 0xa0c8), /* TGL-LP */
-               .driver_data = (unsigned long)&tgl_desc},
-       { PCI_DEVICE(0x8086, 0x43c8), /* TGL-H */
-               .driver_data = (unsigned long)&tglh_desc},
-
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ELKHARTLAKE)
-       { PCI_DEVICE(0x8086, 0x4b55),
-               .driver_data = (unsigned long)&ehl_desc},
-       { PCI_DEVICE(0x8086, 0x4b58),
-               .driver_data = (unsigned long)&ehl_desc},
-#endif
-#if IS_ENABLED(CONFIG_SND_SOC_SOF_ALDERLAKE)
-       { PCI_DEVICE(0x8086, 0x7ad0),
-               .driver_data = (unsigned long)&adls_desc},
-       { PCI_DEVICE(0x8086, 0x51c8),
-               .driver_data = (unsigned long)&tgl_desc},
-#endif
-       { 0, }
-};
-MODULE_DEVICE_TABLE(pci, sof_pci_ids);
-
-/* pci_driver definition */
-static struct pci_driver snd_sof_pci_driver = {
-       .name = "sof-audio-pci",
-       .id_table = sof_pci_ids,
-       .probe = sof_pci_probe,
-       .remove = sof_pci_remove,
-       .shutdown = sof_pci_shutdown,
-       .driver = {
-               .pm = &sof_pci_pm,
-       },
-};
-module_pci_driver(snd_sof_pci_driver);
+EXPORT_SYMBOL_NS(sof_pci_shutdown, SND_SOC_SOF_PCI_DEV);
 
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_IMPORT_NS(SND_SOC_SOF_MERRIFIELD);
-MODULE_IMPORT_NS(SND_SOC_SOF_INTEL_HDA_COMMON);
diff --git a/sound/soc/sof/sof-pci-dev.h b/sound/soc/sof/sof-pci-dev.h
new file mode 100644 (file)
index 0000000..81155a5
--- /dev/null
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-3-Clause) */
+/*
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * Copyright(c) 2021 Intel Corporation. All rights reserved.
+ */
+
+#ifndef __SOUND_SOC_SOF_PCI_H
+#define __SOUND_SOC_SOF_PCI_H
+
+extern const struct dev_pm_ops sof_pci_pm;
+int sof_pci_probe(struct pci_dev *pci, const struct pci_device_id *pci_id);
+void sof_pci_remove(struct pci_dev *pci);
+void sof_pci_shutdown(struct pci_dev *pci);
+
+#endif
index 6c13cc8..2173991 100644 (file)
@@ -1364,6 +1364,7 @@ static struct snd_soc_card *sun4i_codec_create_card(struct device *dev)
                return ERR_PTR(-ENOMEM);
 
        card->dev               = dev;
+       card->owner             = THIS_MODULE;
        card->name              = "sun4i-codec";
        card->dapm_widgets      = sun4i_codec_card_dapm_widgets;
        card->num_dapm_widgets  = ARRAY_SIZE(sun4i_codec_card_dapm_widgets);
@@ -1396,6 +1397,7 @@ static struct snd_soc_card *sun6i_codec_create_card(struct device *dev)
                return ERR_PTR(-ENOMEM);
 
        card->dev               = dev;
+       card->owner             = THIS_MODULE;
        card->name              = "A31 Audio Codec";
        card->dapm_widgets      = sun6i_codec_card_dapm_widgets;
        card->num_dapm_widgets  = ARRAY_SIZE(sun6i_codec_card_dapm_widgets);
@@ -1449,6 +1451,7 @@ static struct snd_soc_card *sun8i_a23_codec_create_card(struct device *dev)
                return ERR_PTR(-ENOMEM);
 
        card->dev               = dev;
+       card->owner             = THIS_MODULE;
        card->name              = "A23 Audio Codec";
        card->dapm_widgets      = sun6i_codec_card_dapm_widgets;
        card->num_dapm_widgets  = ARRAY_SIZE(sun6i_codec_card_dapm_widgets);
@@ -1487,6 +1490,7 @@ static struct snd_soc_card *sun8i_h3_codec_create_card(struct device *dev)
                return ERR_PTR(-ENOMEM);
 
        card->dev               = dev;
+       card->owner             = THIS_MODULE;
        card->name              = "H3 Audio Codec";
        card->dapm_widgets      = sun6i_codec_card_dapm_widgets;
        card->num_dapm_widgets  = ARRAY_SIZE(sun6i_codec_card_dapm_widgets);
@@ -1525,6 +1529,7 @@ static struct snd_soc_card *sun8i_v3s_codec_create_card(struct device *dev)
                return ERR_PTR(-ENOMEM);
 
        card->dev               = dev;
+       card->owner             = THIS_MODULE;
        card->name              = "V3s Audio Codec";
        card->dapm_widgets      = sun6i_codec_card_dapm_widgets;
        card->num_dapm_widgets  = ARRAY_SIZE(sun6i_codec_card_dapm_widgets);
index 9d0da5f..d24ae00 100644 (file)
@@ -62,7 +62,6 @@ MODULE_PARM_DESC(enable, "Enable Sun AMD7930 soundcard.");
 MODULE_AUTHOR("Thomas K. Dyas and David S. Miller");
 MODULE_DESCRIPTION("Sun AMD7930");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Sun,AMD7930}}");
 
 /* Device register layout.  */
 
index 0eed5f7..35c1780 100644 (file)
@@ -52,7 +52,6 @@ MODULE_PARM_DESC(enable, "Enable Sun CS4231 soundcard.");
 MODULE_AUTHOR("Jaroslav Kysela, Derrick J. Brashear and David S. Miller");
 MODULE_DESCRIPTION("Sun CS4231");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Sun,CS4231}}");
 
 #ifdef SBUS_SUPPORT
 struct sbus_dma_info {
index 5a6fb66..b055f58 100644 (file)
@@ -76,7 +76,6 @@
 MODULE_AUTHOR("Rudolf Koenig, Brent Baccala and Martin Habets");
 MODULE_DESCRIPTION("Sun DBRI");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Sun,DBRI}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
index 08c6e6a..33e9621 100644 (file)
@@ -26,7 +26,6 @@
 MODULE_AUTHOR("Torsten Schenk <torsten.schenk@zoho.com>");
 MODULE_DESCRIPTION("TerraTec DMX 6Fire USB audio driver");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{TerraTec,DMX 6Fire USB}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for card */
index e03481c..49f63f8 100644 (file)
 MODULE_AUTHOR("Daniel Mack <daniel@caiaq.de>");
 MODULE_DESCRIPTION("caiaq USB audio");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Native Instruments,RigKontrol2},"
-                        "{Native Instruments,RigKontrol3},"
-                        "{Native Instruments,Kore Controller},"
-                        "{Native Instruments,Kore Controller 2},"
-                        "{Native Instruments,Audio Kontrol 1},"
-                        "{Native Instruments,Audio 2 DJ},"
-                        "{Native Instruments,Audio 4 DJ},"
-                        "{Native Instruments,Audio 8 DJ},"
-                        "{Native Instruments,Traktor Audio 2},"
-                        "{Native Instruments,Session I/O},"
-                        "{Native Instruments,GuitarRig mobile},"
-                        "{Native Instruments,Traktor Kontrol X1},"
-                        "{Native Instruments,Traktor Kontrol S4},"
-                        "{Native Instruments,Maschine Controller}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */
 static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */
index 85ed850..0826a43 100644 (file)
@@ -58,8 +58,6 @@
 MODULE_AUTHOR("Takashi Iwai <tiwai@suse.de>");
 MODULE_DESCRIPTION("USB Audio");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{Generic,USB Audio}}");
-
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX;     /* Index 0-MAX */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR;      /* ID for this card */
@@ -830,6 +828,9 @@ static int usb_audio_probe(struct usb_interface *intf,
                snd_media_device_create(chip, intf);
        }
 
+       if (quirk)
+               chip->quirk_type = quirk->type;
+
        usb_chip[chip->index] = chip;
        chip->intf[chip->num_interfaces] = intf;
        chip->num_interfaces++;
@@ -904,6 +905,9 @@ static void usb_audio_disconnect(struct usb_interface *intf)
                }
        }
 
+       if (chip->quirk_type & QUIRK_SETUP_DISABLE_AUTOSUSPEND)
+               usb_enable_autosuspend(interface_to_usbdev(intf));
+
        chip->num_interfaces--;
        if (chip->num_interfaces <= 0) {
                usb_chip[chip->index] = NULL;
index 8243652..a746802 100644 (file)
@@ -652,10 +652,10 @@ static int set_sample_rate_v2v3(struct snd_usb_audio *chip,
                cur_rate = prev_rate;
 
        if (cur_rate != rate) {
-               usb_audio_warn(chip,
-                              "%d:%d: freq mismatch (RO clock): req %d, clock runs @%d\n",
-                              fmt->iface, fmt->altsetting, rate, cur_rate);
-               return -ENXIO;
+               usb_audio_dbg(chip,
+                             "%d:%d: freq mismatch: req %d, clock runs @%d\n",
+                             fmt->iface, fmt->altsetting, rate, cur_rate);
+               /* continue processing */
        }
 
 validation:
index c282418..95385e9 100644 (file)
@@ -21,23 +21,6 @@ MODULE_AUTHOR("Michael Trimarchi <michael@amarulasolutions.com>");
 MODULE_AUTHOR("Antonio Ospite <ao2@amarulasolutions.com>");
 MODULE_DESCRIPTION("M2Tech hiFace USB-SPDIF audio driver");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{M2Tech,Young},"
-                        "{M2Tech,hiFace},"
-                        "{M2Tech,North Star},"
-                        "{M2Tech,W4S Young},"
-                        "{M2Tech,Corrson},"
-                        "{M2Tech,AUDIA},"
-                        "{M2Tech,SL Audio},"
-                        "{M2Tech,Empirical},"
-                        "{M2Tech,Rockna},"
-                        "{M2Tech,Pathos},"
-                        "{M2Tech,Metronome},"
-                        "{M2Tech,CAD},"
-                        "{M2Tech,Audio Esclusive},"
-                        "{M2Tech,Rotel},"
-                        "{M2Tech,Eeaudio},"
-                        "{The Chord Company,CHORD},"
-                        "{AVA Group A/S,Vitus}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */
 static char *id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for card */
index 6b30155..5834d1d 100644 (file)
@@ -19,7 +19,6 @@
 MODULE_DESCRIPTION("Edirol UA-101/1000 driver");
 MODULE_AUTHOR("Clemens Ladisch <clemens@ladisch.de>");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{{Edirol,UA-101},{Edirol,UA-1000}}");
 
 /*
  * Should not be lower than the minimum scheduling delay of the host
index b1c78db..b004b2e 100644 (file)
@@ -1307,6 +1307,17 @@ no_res_check:
                        /* totally crap, return an error */
                        return -EINVAL;
                }
+       } else {
+               /* if the max volume is too low, it's likely a bogus range;
+                * here we use -96dB as the threshold
+                */
+               if (cval->dBmax <= -9600) {
+                       usb_audio_info(cval->head.mixer->chip,
+                                      "%d:%d: bogus dB values (%d/%d), disabling dB reporting\n",
+                                      cval->head.id, mixer_ctrl_intf(cval->head.mixer),
+                                      cval->dBmin, cval->dBmax);
+                       cval->dBmin = cval->dBmax = 0;
+               }
        }
 
        return 0;
index a7212f1..646deb6 100644 (file)
@@ -537,6 +537,16 @@ static const struct usbmix_ctl_map usbmix_ctl_maps[] = {
                .map = bose_companion5_map,
        },
        {
+               /* Corsair Virtuoso SE (wired mode) */
+               .id = USB_ID(0x1b1c, 0x0a3d),
+               .map = corsair_virtuoso_map,
+       },
+       {
+               /* Corsair Virtuoso SE (wireless mode) */
+               .id = USB_ID(0x1b1c, 0x0a3e),
+               .map = corsair_virtuoso_map,
+       },
+       {
                /* Corsair Virtuoso (wired mode) */
                .id = USB_ID(0x1b1c, 0x0a41),
                .map = corsair_virtuoso_map,
index 08873d2..ffd9223 100644 (file)
@@ -2883,7 +2883,7 @@ static int snd_djm_controls_put(struct snd_kcontrol *kctl, struct snd_ctl_elem_v
        u8 group = (private_value & SND_DJM_GROUP_MASK) >> SND_DJM_GROUP_SHIFT;
        u16 value = elem->value.enumerated.item[0];
 
-       kctl->private_value = ((device << SND_DJM_DEVICE_SHIFT) |
+       kctl->private_value = (((unsigned long)device << SND_DJM_DEVICE_SHIFT) |
                              (group << SND_DJM_GROUP_SHIFT) |
                              value);
 
@@ -2921,7 +2921,7 @@ static int snd_djm_controls_create(struct usb_mixer_interface *mixer,
                value = device->controls[i].default_value;
                knew.name = device->controls[i].name;
                knew.private_value = (
-                       (device_idx << SND_DJM_DEVICE_SHIFT) |
+                       ((unsigned long)device_idx << SND_DJM_DEVICE_SHIFT) |
                        (i << SND_DJM_GROUP_SHIFT) |
                        value);
                err = snd_djm_controls_update(mixer, device_idx, i, value);
index bf5a0f3..e5311b6 100644 (file)
@@ -845,13 +845,19 @@ get_sync_ep_from_substream(struct snd_usb_substream *subs)
 
        list_for_each_entry(fp, &subs->fmt_list, list) {
                ep = snd_usb_get_endpoint(chip, fp->endpoint);
-               if (ep && ep->cur_rate)
-                       return ep;
+               if (ep && ep->cur_audiofmt) {
+                       /* if EP is already opened solely for this substream,
+                        * we still allow us to change the parameter; otherwise
+                        * this substream has to follow the existing parameter
+                        */
+                       if (ep->cur_audiofmt != subs->cur_audiofmt || ep->opened > 1)
+                               return ep;
+               }
                if (!fp->implicit_fb)
                        continue;
                /* for the implicit fb, check the sync ep as well */
                ep = snd_usb_get_endpoint(chip, fp->sync_ep);
-               if (ep && ep->cur_rate)
+               if (ep && ep->cur_audiofmt)
                        return ep;
        }
        return NULL;
index 9ba4682..176437a 100644 (file)
@@ -547,7 +547,7 @@ static int setup_disable_autosuspend(struct snd_usb_audio *chip,
                                       struct usb_driver *driver,
                                       const struct snd_usb_audio_quirk *quirk)
 {
-       driver->supports_autosuspend = 0;
+       usb_disable_autosuspend(interface_to_usbdev(iface));
        return 1;       /* Continue with creating streams and mixer */
 }
 
@@ -1482,7 +1482,7 @@ static int pioneer_djm_set_format_quirk(struct snd_usb_substream *subs,
        usb_set_interface(subs->dev, 0, 1);
        // we should derive windex from fmt-sync_ep but it's not set
        snd_usb_ctl_msg(subs->stream->chip->dev,
-               usb_rcvctrlpipe(subs->stream->chip->dev, 0),
+               usb_sndctrlpipe(subs->stream->chip->dev, 0),
                0x01, 0x22, 0x0100, windex, &sr, 0x0003);
        return 0;
 }
@@ -1520,6 +1520,8 @@ bool snd_usb_get_sample_rate_quirk(struct snd_usb_audio *chip)
        case USB_ID(0x1901, 0x0191): /* GE B850V3 CP2114 audio interface */
        case USB_ID(0x21b4, 0x0081): /* AudioQuest DragonFly */
        case USB_ID(0x2912, 0x30c8): /* Audioengine D1 */
+       case USB_ID(0x413c, 0xa506): /* Dell AE515 sound bar */
+       case USB_ID(0x046d, 0x084c): /* Logitech ConferenceCam Connect */
                return true;
        }
 
@@ -1670,6 +1672,14 @@ void snd_usb_ctl_msg_quirk(struct usb_device *dev, unsigned int pipe,
            && (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
                msleep(20);
 
+       /*
+        * Plantronics headsets (C320, C320-M, etc) need a delay to avoid
+        * random microhpone failures.
+        */
+       if (USB_ID_VENDOR(chip->usb_id) == 0x047f &&
+           (requesttype & USB_TYPE_MASK) == USB_TYPE_CLASS)
+               msleep(20);
+
        /* Zoom R16/24, many Logitech(at least H650e/H570e/BCC950),
         * Jabra 550a, Kingston HyperX needs a tiny delay here,
         * otherwise requests like get/set frequency return
index 215c177..60b9dd7 100644 (file)
@@ -27,6 +27,7 @@ struct snd_usb_audio {
        struct snd_card *card;
        struct usb_interface *intf[MAX_CARD_INTERFACES];
        u32 usb_id;
+       uint16_t quirk_type;
        struct mutex mutex;
        unsigned int system_suspend;
        atomic_t active;
index c541581..3cd28d2 100644 (file)
 MODULE_AUTHOR("Karsten Wiese <annabellesgarden@yahoo.de>");
 MODULE_DESCRIPTION("TASCAM "NAME_ALLCAPS" Version 0.8.7.2");
 MODULE_LICENSE("GPL");
-MODULE_SUPPORTED_DEVICE("{{TASCAM(0x1604),"NAME_ALLCAPS"(0x8001)(0x8005)(0x8007)}}");
 
 static int index[SNDRV_CARDS] = SNDRV_DEFAULT_IDX; /* Index 0-max */
 static char* id[SNDRV_CARDS] = SNDRV_DEFAULT_STR; /* Id for this card */
index 1d66c3a..33b12aa 100644 (file)
@@ -1887,4 +1887,3 @@ MODULE_AUTHOR("Vaibhav Agarwal <vaibhav.agarwal@intel.com>");
 MODULE_AUTHOR("Jerome Anand <jerome.anand@intel.com>");
 MODULE_DESCRIPTION("Intel HDMI Audio driver");
 MODULE_LICENSE("GPL v2");
-MODULE_SUPPORTED_DEVICE("{Intel,Intel_HAD}");
index 228d820..2cb0a19 100644 (file)
@@ -391,4 +391,3 @@ module_exit(xen_drv_fini);
 MODULE_DESCRIPTION("Xen virtual sound device frontend");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("xen:" XENSND_DRIVER_NAME);
-MODULE_SUPPORTED_DEVICE("{{ALSA,Virtual soundcard}}");
index 543dd70..ad64d67 100644 (file)
 #define ACR_SIZE       4
 
 
-#define PTRACE_OLDSETOPTIONS        21
-
+#define PTRACE_OLDSETOPTIONS           21
+#define PTRACE_SYSEMU                  31
+#define PTRACE_SYSEMU_SINGLESTEP       32
 #ifndef __ASSEMBLY__
 #include <linux/stddef.h>
 #include <linux/types.h>
index 84b8878..cc96e26 100644 (file)
@@ -13,7 +13,7 @@
 /*
  * Defines x86 CPU feature bits
  */
-#define NCAPINTS                       19         /* N 32-bit words worth of info */
+#define NCAPINTS                       20         /* N 32-bit words worth of info */
 #define NBUGINTS                       1          /* N 32-bit bug flags */
 
 /*
@@ -96,7 +96,7 @@
 #define X86_FEATURE_SYSCALL32          ( 3*32+14) /* "" syscall in IA32 userspace */
 #define X86_FEATURE_SYSENTER32         ( 3*32+15) /* "" sysenter in IA32 userspace */
 #define X86_FEATURE_REP_GOOD           ( 3*32+16) /* REP microcode works well */
-#define X86_FEATURE_SME_COHERENT       ( 3*32+17) /* "" AMD hardware-enforced cache coherency */
+/* FREE!                                ( 3*32+17) */
 #define X86_FEATURE_LFENCE_RDTSC       ( 3*32+18) /* "" LFENCE synchronizes RDTSC */
 #define X86_FEATURE_ACC_POWER          ( 3*32+19) /* AMD Accumulated Power Mechanism */
 #define X86_FEATURE_NOPL               ( 3*32+20) /* The NOPL (0F 1F) instructions */
 #define X86_FEATURE_INVPCID_SINGLE     ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */
 #define X86_FEATURE_HW_PSTATE          ( 7*32+ 8) /* AMD HW-PState */
 #define X86_FEATURE_PROC_FEEDBACK      ( 7*32+ 9) /* AMD ProcFeedbackInterface */
-#define X86_FEATURE_SME                        ( 7*32+10) /* AMD Secure Memory Encryption */
+/* FREE!                                ( 7*32+10) */
 #define X86_FEATURE_PTI                        ( 7*32+11) /* Kernel Page Table Isolation enabled */
 #define X86_FEATURE_RETPOLINE          ( 7*32+12) /* "" Generic Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_RETPOLINE_AMD      ( 7*32+13) /* "" AMD Retpoline mitigation for Spectre variant 2 */
 #define X86_FEATURE_SSBD               ( 7*32+17) /* Speculative Store Bypass Disable */
 #define X86_FEATURE_MBA                        ( 7*32+18) /* Memory Bandwidth Allocation */
 #define X86_FEATURE_RSB_CTXSW          ( 7*32+19) /* "" Fill RSB on context switches */
-#define X86_FEATURE_SEV                        ( 7*32+20) /* AMD Secure Encrypted Virtualization */
+/* FREE!                                ( 7*32+20) */
 #define X86_FEATURE_USE_IBPB           ( 7*32+21) /* "" Indirect Branch Prediction Barrier enabled */
 #define X86_FEATURE_USE_IBRS_FW                ( 7*32+22) /* "" Use IBRS during runtime firmware calls */
 #define X86_FEATURE_SPEC_STORE_BYPASS_DISABLE  ( 7*32+23) /* "" Disable Speculative Store Bypass. */
 #define X86_FEATURE_EPT_AD             ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
-#define X86_FEATURE_SEV_ES             ( 8*32+20) /* AMD Secure Encrypted Virtualization - Encrypted State */
-#define X86_FEATURE_VM_PAGE_FLUSH      ( 8*32+21) /* "" VM Page Flush MSR is supported */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_PER_THREAD_MBA     (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
+#define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVX512_BF16                (12*32+ 5) /* AVX512 BFLOAT16 instructions */
 
 /* AMD-defined CPU features, CPUID level 0x80000008 (EBX), word 13 */
 #define X86_FEATURE_AVIC               (15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD    (15*32+15) /* Virtual VMSAVE VMLOAD */
 #define X86_FEATURE_VGIF               (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_SVME_ADDR_CHK      (15*32+28) /* "" SVME addr check */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
 #define X86_FEATURE_AVX512VBMI         (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/
 #define X86_FEATURE_CORE_CAPABILITIES  (18*32+30) /* "" IA32_CORE_CAPABILITIES MSR */
 #define X86_FEATURE_SPEC_CTRL_SSBD     (18*32+31) /* "" Speculative Store Bypass Disable */
 
+/* AMD-defined memory encryption features, CPUID level 0x8000001f (EAX), word 19 */
+#define X86_FEATURE_SME                        (19*32+ 0) /* AMD Secure Memory Encryption */
+#define X86_FEATURE_SEV                        (19*32+ 1) /* AMD Secure Encrypted Virtualization */
+#define X86_FEATURE_VM_PAGE_FLUSH      (19*32+ 2) /* "" VM Page Flush MSR is supported */
+#define X86_FEATURE_SEV_ES             (19*32+ 3) /* AMD Secure Encrypted Virtualization - Encrypted State */
+#define X86_FEATURE_SME_COHERENT       (19*32+10) /* "" AMD hardware-enforced cache coherency */
+
 /*
  * BUG word(s)
  */
index 52c6262..cc777c1 100644 (file)
@@ -7,9 +7,12 @@
  * Copyright (C) IBM Corporation, 2009
  */
 
+#include <asm/byteorder.h>
 /* insn_attr_t is defined in inat.h */
 #include "inat.h"
 
+#if defined(__BYTE_ORDER) ? __BYTE_ORDER == __LITTLE_ENDIAN : defined(__LITTLE_ENDIAN)
+
 struct insn_field {
        union {
                insn_value_t value;
@@ -20,6 +23,48 @@ struct insn_field {
        unsigned char nbytes;
 };
 
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+                                 unsigned char n)
+{
+       p->value = v;
+       p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+                                insn_byte_t v)
+{
+       p->bytes[n] = v;
+}
+
+#else
+
+struct insn_field {
+       insn_value_t value;
+       union {
+               insn_value_t little;
+               insn_byte_t bytes[4];
+       };
+       /* !0 if we've run insn_get_xxx() for this field */
+       unsigned char got;
+       unsigned char nbytes;
+};
+
+static inline void insn_field_set(struct insn_field *p, insn_value_t v,
+                                 unsigned char n)
+{
+       p->value = v;
+       p->little = __cpu_to_le32(v);
+       p->nbytes = n;
+}
+
+static inline void insn_set_byte(struct insn_field *p, unsigned char n,
+                                insn_byte_t v)
+{
+       p->bytes[n] = v;
+       p->value = __le32_to_cpu(p->little);
+}
+#endif
+
 struct insn {
        struct insn_field prefixes;     /*
                                         * Prefixes
index fdbffec..5a2baf2 100644 (file)
@@ -40,6 +40,8 @@
 #define ORC_REG_MAX                    15
 
 #ifndef __ASSEMBLY__
+#include <asm/byteorder.h>
+
 /*
  * This struct is more or less a vastly simplified version of the DWARF Call
  * Frame Information standard.  It contains only the necessary parts of DWARF
 struct orc_entry {
        s16             sp_offset;
        s16             bp_offset;
+#if defined(__LITTLE_ENDIAN_BITFIELD)
        unsigned        sp_reg:4;
        unsigned        bp_reg:4;
        unsigned        type:2;
        unsigned        end:1;
+#elif defined(__BIG_ENDIAN_BITFIELD)
+       unsigned        bp_reg:4;
+       unsigned        sp_reg:4;
+       unsigned        unused:5;
+       unsigned        end:1;
+       unsigned        type:2;
+#endif
 } __packed;
 
 #endif /* __ASSEMBLY__ */
index 8e76d37..5a3022c 100644 (file)
@@ -112,6 +112,7 @@ struct kvm_ioapic_state {
 #define KVM_NR_IRQCHIPS          3
 
 #define KVM_RUN_X86_SMM                 (1 << 0)
+#define KVM_RUN_X86_BUS_LOCK     (1 << 1)
 
 /* for KVM_GET_REGS and KVM_SET_REGS */
 struct kvm_regs {
index ada955c..b8e650a 100644 (file)
@@ -89,6 +89,7 @@
 #define EXIT_REASON_XRSTORS             64
 #define EXIT_REASON_UMWAIT              67
 #define EXIT_REASON_TPAUSE              68
+#define EXIT_REASON_BUS_LOCK            74
 
 #define VMX_EXIT_REASONS \
        { EXIT_REASON_EXCEPTION_NMI,         "EXCEPTION_NMI" }, \
        { EXIT_REASON_XSAVES,                "XSAVES" }, \
        { EXIT_REASON_XRSTORS,               "XRSTORS" }, \
        { EXIT_REASON_UMWAIT,                "UMWAIT" }, \
-       { EXIT_REASON_TPAUSE,                "TPAUSE" }
+       { EXIT_REASON_TPAUSE,                "TPAUSE" }, \
+       { EXIT_REASON_BUS_LOCK,              "BUS_LOCK" }
 
 #define VMX_EXIT_REASON_FLAGS \
        { VMX_EXIT_REASONS_FAILED_VMENTRY,      "FAILED_VMENTRY" }
index 0151dfc..3d9355e 100644 (file)
@@ -5,6 +5,7 @@
  * Copyright (C) IBM Corporation, 2002, 2004, 2009
  */
 
+#include <linux/kernel.h>
 #ifdef __KERNEL__
 #include <linux/string.h>
 #else
 
 #include "../include/asm/emulate_prefix.h"
 
+#define leXX_to_cpu(t, r)                                              \
+({                                                                     \
+       __typeof__(t) v;                                                \
+       switch (sizeof(t)) {                                            \
+       case 4: v = le32_to_cpu(r); break;                              \
+       case 2: v = le16_to_cpu(r); break;                              \
+       case 1: v = r; break;                                           \
+       default:                                                        \
+               BUILD_BUG(); break;                                     \
+       }                                                               \
+       v;                                                              \
+})
+
 /* Verify next sizeof(t) bytes can be on the same instruction */
 #define validate_next(t, insn, n)      \
        ((insn)->next_byte + sizeof(t) + n <= (insn)->end_kaddr)
 
 #define __get_next(t, insn)    \
-       ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); r; })
+       ({ t r = *(t*)insn->next_byte; insn->next_byte += sizeof(t); leXX_to_cpu(t, r); })
 
 #define __peek_nbyte_next(t, insn, n)  \
-       ({ t r = *(t*)((insn)->next_byte + n); r; })
+       ({ t r = *(t*)((insn)->next_byte + n); leXX_to_cpu(t, r); })
 
 #define get_next(t, insn)      \
        ({ if (unlikely(!validate_next(t, insn, 0))) goto err_out; __get_next(t, insn); })
@@ -147,9 +161,9 @@ found:
                        b = insn->prefixes.bytes[3];
                        for (i = 0; i < nb; i++)
                                if (prefixes->bytes[i] == lb)
-                                       prefixes->bytes[i] = b;
+                                       insn_set_byte(prefixes, i, b);
                }
-               insn->prefixes.bytes[3] = lb;
+               insn_set_byte(&insn->prefixes, 3, lb);
        }
 
        /* Decode REX prefix */
@@ -157,8 +171,7 @@ found:
                b = peek_next(insn_byte_t, insn);
                attr = inat_get_opcode_attribute(b);
                if (inat_is_rex_prefix(attr)) {
-                       insn->rex_prefix.value = b;
-                       insn->rex_prefix.nbytes = 1;
+                       insn_field_set(&insn->rex_prefix, b, 1);
                        insn->next_byte++;
                        if (X86_REX_W(b))
                                /* REX.W overrides opnd_size */
@@ -181,13 +194,13 @@ found:
                        if (X86_MODRM_MOD(b2) != 3)
                                goto vex_end;
                }
-               insn->vex_prefix.bytes[0] = b;
-               insn->vex_prefix.bytes[1] = b2;
+               insn_set_byte(&insn->vex_prefix, 0, b);
+               insn_set_byte(&insn->vex_prefix, 1, b2);
                if (inat_is_evex_prefix(attr)) {
                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
-                       insn->vex_prefix.bytes[2] = b2;
+                       insn_set_byte(&insn->vex_prefix, 2, b2);
                        b2 = peek_nbyte_next(insn_byte_t, insn, 3);
-                       insn->vex_prefix.bytes[3] = b2;
+                       insn_set_byte(&insn->vex_prefix, 3, b2);
                        insn->vex_prefix.nbytes = 4;
                        insn->next_byte += 4;
                        if (insn->x86_64 && X86_VEX_W(b2))
@@ -195,7 +208,7 @@ found:
                                insn->opnd_bytes = 8;
                } else if (inat_is_vex3_prefix(attr)) {
                        b2 = peek_nbyte_next(insn_byte_t, insn, 2);
-                       insn->vex_prefix.bytes[2] = b2;
+                       insn_set_byte(&insn->vex_prefix, 2, b2);
                        insn->vex_prefix.nbytes = 3;
                        insn->next_byte += 3;
                        if (insn->x86_64 && X86_VEX_W(b2))
@@ -207,7 +220,7 @@ found:
                         * Makes it easier to decode vex.W, vex.vvvv,
                         * vex.L and vex.pp. Masking with 0x7f sets vex.W == 0.
                         */
-                       insn->vex_prefix.bytes[2] = b2 & 0x7f;
+                       insn_set_byte(&insn->vex_prefix, 2, b2 & 0x7f);
                        insn->vex_prefix.nbytes = 2;
                        insn->next_byte += 2;
                }
@@ -243,7 +256,7 @@ void insn_get_opcode(struct insn *insn)
 
        /* Get first opcode */
        op = get_next(insn_byte_t, insn);
-       opcode->bytes[0] = op;
+       insn_set_byte(opcode, 0, op);
        opcode->nbytes = 1;
 
        /* Check if there is VEX prefix or not */
@@ -295,8 +308,7 @@ void insn_get_modrm(struct insn *insn)
 
        if (inat_has_modrm(insn->attr)) {
                mod = get_next(insn_byte_t, insn);
-               modrm->value = mod;
-               modrm->nbytes = 1;
+               insn_field_set(modrm, mod, 1);
                if (inat_is_group(insn->attr)) {
                        pfx_id = insn_last_prefix_id(insn);
                        insn->attr = inat_get_group_attribute(mod, pfx_id,
@@ -334,7 +346,7 @@ int insn_rip_relative(struct insn *insn)
         * For rip-relative instructions, the mod field (top 2 bits)
         * is zero and the r/m field (bottom 3 bits) is 0x5.
         */
-       return (modrm->nbytes && (modrm->value & 0xc7) == 0x5);
+       return (modrm->nbytes && (modrm->bytes[0] & 0xc7) == 0x5);
 }
 
 /**
@@ -353,11 +365,11 @@ void insn_get_sib(struct insn *insn)
        if (!insn->modrm.got)
                insn_get_modrm(insn);
        if (insn->modrm.nbytes) {
-               modrm = (insn_byte_t)insn->modrm.value;
+               modrm = insn->modrm.bytes[0];
                if (insn->addr_bytes != 2 &&
                    X86_MODRM_MOD(modrm) != 3 && X86_MODRM_RM(modrm) == 4) {
-                       insn->sib.value = get_next(insn_byte_t, insn);
-                       insn->sib.nbytes = 1;
+                       insn_field_set(&insn->sib,
+                                      get_next(insn_byte_t, insn), 1);
                }
        }
        insn->sib.got = 1;
@@ -407,19 +419,18 @@ void insn_get_displacement(struct insn *insn)
                if (mod == 3)
                        goto out;
                if (mod == 1) {
-                       insn->displacement.value = get_next(signed char, insn);
-                       insn->displacement.nbytes = 1;
+                       insn_field_set(&insn->displacement,
+                                      get_next(signed char, insn), 1);
                } else if (insn->addr_bytes == 2) {
                        if ((mod == 0 && rm == 6) || mod == 2) {
-                               insn->displacement.value =
-                                        get_next(short, insn);
-                               insn->displacement.nbytes = 2;
+                               insn_field_set(&insn->displacement,
+                                              get_next(short, insn), 2);
                        }
                } else {
                        if ((mod == 0 && rm == 5) || mod == 2 ||
                            (mod == 0 && base == 5)) {
-                               insn->displacement.value = get_next(int, insn);
-                               insn->displacement.nbytes = 4;
+                               insn_field_set(&insn->displacement,
+                                              get_next(int, insn), 4);
                        }
                }
        }
@@ -435,18 +446,14 @@ static int __get_moffset(struct insn *insn)
 {
        switch (insn->addr_bytes) {
        case 2:
-               insn->moffset1.value = get_next(short, insn);
-               insn->moffset1.nbytes = 2;
+               insn_field_set(&insn->moffset1, get_next(short, insn), 2);
                break;
        case 4:
-               insn->moffset1.value = get_next(int, insn);
-               insn->moffset1.nbytes = 4;
+               insn_field_set(&insn->moffset1, get_next(int, insn), 4);
                break;
        case 8:
-               insn->moffset1.value = get_next(int, insn);
-               insn->moffset1.nbytes = 4;
-               insn->moffset2.value = get_next(int, insn);
-               insn->moffset2.nbytes = 4;
+               insn_field_set(&insn->moffset1, get_next(int, insn), 4);
+               insn_field_set(&insn->moffset2, get_next(int, insn), 4);
                break;
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
@@ -464,13 +471,11 @@ static int __get_immv32(struct insn *insn)
 {
        switch (insn->opnd_bytes) {
        case 2:
-               insn->immediate.value = get_next(short, insn);
-               insn->immediate.nbytes = 2;
+               insn_field_set(&insn->immediate, get_next(short, insn), 2);
                break;
        case 4:
        case 8:
-               insn->immediate.value = get_next(int, insn);
-               insn->immediate.nbytes = 4;
+               insn_field_set(&insn->immediate, get_next(int, insn), 4);
                break;
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
@@ -487,18 +492,15 @@ static int __get_immv(struct insn *insn)
 {
        switch (insn->opnd_bytes) {
        case 2:
-               insn->immediate1.value = get_next(short, insn);
-               insn->immediate1.nbytes = 2;
+               insn_field_set(&insn->immediate1, get_next(short, insn), 2);
                break;
        case 4:
-               insn->immediate1.value = get_next(int, insn);
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
                insn->immediate1.nbytes = 4;
                break;
        case 8:
-               insn->immediate1.value = get_next(int, insn);
-               insn->immediate1.nbytes = 4;
-               insn->immediate2.value = get_next(int, insn);
-               insn->immediate2.nbytes = 4;
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+               insn_field_set(&insn->immediate2, get_next(int, insn), 4);
                break;
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
@@ -515,12 +517,10 @@ static int __get_immptr(struct insn *insn)
 {
        switch (insn->opnd_bytes) {
        case 2:
-               insn->immediate1.value = get_next(short, insn);
-               insn->immediate1.nbytes = 2;
+               insn_field_set(&insn->immediate1, get_next(short, insn), 2);
                break;
        case 4:
-               insn->immediate1.value = get_next(int, insn);
-               insn->immediate1.nbytes = 4;
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
                break;
        case 8:
                /* ptr16:64 is not exist (no segment) */
@@ -528,8 +528,7 @@ static int __get_immptr(struct insn *insn)
        default:        /* opnd_bytes must be modified manually */
                goto err_out;
        }
-       insn->immediate2.value = get_next(unsigned short, insn);
-       insn->immediate2.nbytes = 2;
+       insn_field_set(&insn->immediate2, get_next(unsigned short, insn), 2);
        insn->immediate1.got = insn->immediate2.got = 1;
 
        return 1;
@@ -565,22 +564,17 @@ void insn_get_immediate(struct insn *insn)
 
        switch (inat_immediate_size(insn->attr)) {
        case INAT_IMM_BYTE:
-               insn->immediate.value = get_next(signed char, insn);
-               insn->immediate.nbytes = 1;
+               insn_field_set(&insn->immediate, get_next(signed char, insn), 1);
                break;
        case INAT_IMM_WORD:
-               insn->immediate.value = get_next(short, insn);
-               insn->immediate.nbytes = 2;
+               insn_field_set(&insn->immediate, get_next(short, insn), 2);
                break;
        case INAT_IMM_DWORD:
-               insn->immediate.value = get_next(int, insn);
-               insn->immediate.nbytes = 4;
+               insn_field_set(&insn->immediate, get_next(int, insn), 4);
                break;
        case INAT_IMM_QWORD:
-               insn->immediate1.value = get_next(int, insn);
-               insn->immediate1.nbytes = 4;
-               insn->immediate2.value = get_next(int, insn);
-               insn->immediate2.nbytes = 4;
+               insn_field_set(&insn->immediate1, get_next(int, insn), 4);
+               insn_field_set(&insn->immediate2, get_next(int, insn), 4);
                break;
        case INAT_IMM_PTR:
                if (!__get_immptr(insn))
@@ -599,8 +593,7 @@ void insn_get_immediate(struct insn *insn)
                goto err_out;
        }
        if (inat_has_second_immediate(insn->attr)) {
-               insn->immediate2.value = get_next(signed char, insn);
-               insn->immediate2.nbytes = 1;
+               insn_field_set(&insn->immediate2, get_next(signed char, insn), 1);
        }
 done:
        insn->immediate.got = 1;
index 7409d78..80d966c 100644 (file)
@@ -260,6 +260,11 @@ static struct btf_id *add_symbol(struct rb_root *root, char *name, size_t size)
        return btf_id__add(root, id, false);
 }
 
+/* Older libelf.h and glibc elf.h might not yet define the ELF compression types. */
+#ifndef SHF_COMPRESSED
+#define SHF_COMPRESSED (1 << 11) /* Section with compressed data. */
+#endif
+
 /*
  * The data of compressed section should be aligned to 4
  * (for 32bit) or 8 (for 64 bit) bytes. The binutils ld
index bae48e6..5ed41b9 100644 (file)
@@ -30,12 +30,18 @@ build     := -f $(srctree)/tools/build/Makefile.build dir=. obj
 
 all: $(OUTPUT)fixdep
 
+# Make sure there's anything to clean,
+# feature contains check for existing OUTPUT
+TMP_O := $(if $(OUTPUT),$(OUTPUT)/feature,./)
+
 clean:
        $(call QUIET_CLEAN, fixdep)
        $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
        $(Q)rm -f $(OUTPUT)fixdep
        $(call QUIET_CLEAN, feature-detect)
-       $(Q)$(MAKE) -C feature/ clean >/dev/null
+ifneq ($(wildcard $(TMP_O)),)
+       $(Q)$(MAKE) -C feature OUTPUT=$(TMP_O) clean >/dev/null
+endif
 
 $(OUTPUT)fixdep-in.o: FORCE
        $(Q)$(MAKE) $(build)=fixdep
index b0e35ee..4ac5c08 100644 (file)
 #define CORESIGHT_ETM_PMU_NAME "cs_etm"
 #define CORESIGHT_ETM_PMU_SEED  0x10
 
-/* ETMv3.5/PTM's ETMCR config bit */
-#define ETM_OPT_CYCACC  12
-#define ETM_OPT_CTXTID 14
-#define ETM_OPT_TS      28
-#define ETM_OPT_RETSTK 29
+/*
+ * Below are the definition of bit offsets for perf option, and works as
+ * arbitrary values for all ETM versions.
+ *
+ * Most of them are orignally from ETMv3.5/PTM's ETMCR config, therefore,
+ * ETMv3.5/PTM doesn't define ETMCR config bits with prefix "ETM3_" and
+ * directly use below macros as config bits.
+ */
+#define ETM_OPT_CYCACC         12
+#define ETM_OPT_CTXTID         14
+#define ETM_OPT_CTXTID2                15
+#define ETM_OPT_TS             28
+#define ETM_OPT_RETSTK         29
 
 /* ETMv4 CONFIGR programming bits for the ETM OPTs */
 #define ETM4_CFG_BIT_CYCACC    4
 #define ETM4_CFG_BIT_CTXTID    6
+#define ETM4_CFG_BIT_VMID      7
 #define ETM4_CFG_BIT_TS                11
 #define ETM4_CFG_BIT_RETSTK    12
+#define ETM4_CFG_BIT_VMID_OPT  15
 
 static inline int coresight_get_trace_id(int cpu)
 {
index d07e586..acb6f4d 100644 (file)
@@ -3,8 +3,5 @@
 
 #define EXPORT_SYMBOL(sym)
 #define EXPORT_SYMBOL_GPL(sym)
-#define EXPORT_SYMBOL_GPL_FUTURE(sym)
-#define EXPORT_UNUSED_SYMBOL(sym)
-#define EXPORT_UNUSED_SYMBOL_GPL(sym)
 
 #endif
index 577f514..7e72d97 100644 (file)
@@ -29,11 +29,14 @@ struct unwind_hint {
  *
  * UNWIND_HINT_TYPE_REGS_PARTIAL: Used in entry code to indicate that
  * sp_reg+sp_offset points to the iret return frame.
+ *
+ * UNWIND_HINT_FUNC: Generate the unwind metadata of a callable function.
+ * Useful for code which doesn't have an ELF function annotation.
  */
 #define UNWIND_HINT_TYPE_CALL          0
 #define UNWIND_HINT_TYPE_REGS          1
 #define UNWIND_HINT_TYPE_REGS_PARTIAL  2
-#define UNWIND_HINT_TYPE_RET_OFFSET    3
+#define UNWIND_HINT_TYPE_FUNC          3
 
 #ifdef CONFIG_STACK_VALIDATION
 
@@ -109,6 +112,12 @@ struct unwind_hint {
        .popsection
 .endm
 
+.macro STACK_FRAME_NON_STANDARD func:req
+       .pushsection .discard.func_stack_frame_non_standard, "aw"
+               .long \func - .
+       .popsection
+.endm
+
 #endif /* __ASSEMBLY__ */
 
 #else /* !CONFIG_STACK_VALIDATION */
@@ -122,6 +131,8 @@ struct unwind_hint {
 #define ANNOTATE_INTRA_FUNCTION_CALL
 .macro UNWIND_HINT sp_reg:req sp_offset=0 type:req end=0
 .endm
+.macro STACK_FRAME_NON_STANDARD func:req
+.endm
 #endif
 
 #endif /* CONFIG_STACK_VALIDATION */
index 7287529..ce58cff 100644 (file)
@@ -861,9 +861,11 @@ __SYSCALL(__NR_faccessat2, sys_faccessat2)
 __SYSCALL(__NR_process_madvise, sys_process_madvise)
 #define __NR_epoll_pwait2 441
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
+#define __NR_mount_setattr 442
+__SYSCALL(__NR_mount_setattr, sys_mount_setattr)
 
 #undef __NR_syscalls
-#define __NR_syscalls 442
+#define __NR_syscalls 443
 
 /*
  * 32 bit systems traditionally used different
index 808b48a..0827037 100644 (file)
@@ -1,11 +1,10 @@
-/**
- * \file drm.h
+/*
  * Header for the Direct Rendering Manager
  *
- * \author Rickard E. (Rik) Faith <faith@valinux.com>
+ * Author: Rickard E. (Rik) Faith <faith@valinux.com>
  *
- * \par Acknowledgments:
- * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic \c cmpxchg.
+ * Acknowledgments:
+ * Dec 1999, Richard Henderson <rth@twiddle.net>, move to generic cmpxchg.
  */
 
 /*
@@ -85,7 +84,7 @@ typedef unsigned int drm_context_t;
 typedef unsigned int drm_drawable_t;
 typedef unsigned int drm_magic_t;
 
-/**
+/*
  * Cliprect.
  *
  * \warning: If you change this structure, make sure you change
@@ -101,7 +100,7 @@ struct drm_clip_rect {
        unsigned short y2;
 };
 
-/**
+/*
  * Drawable information.
  */
 struct drm_drawable_info {
@@ -109,7 +108,7 @@ struct drm_drawable_info {
        struct drm_clip_rect *rects;
 };
 
-/**
+/*
  * Texture region,
  */
 struct drm_tex_region {
@@ -120,7 +119,7 @@ struct drm_tex_region {
        unsigned int age;
 };
 
-/**
+/*
  * Hardware lock.
  *
  * The lock structure is a simple cache-line aligned integer.  To avoid
@@ -132,7 +131,7 @@ struct drm_hw_lock {
        char padding[60];                       /**< Pad to cache line */
 };
 
-/**
+/*
  * DRM_IOCTL_VERSION ioctl argument type.
  *
  * \sa drmGetVersion().
@@ -149,7 +148,7 @@ struct drm_version {
        char __user *desc;        /**< User-space buffer to hold desc */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_UNIQUE ioctl argument type.
  *
  * \sa drmGetBusid() and drmSetBusId().
@@ -168,7 +167,7 @@ struct drm_block {
        int unused;
 };
 
-/**
+/*
  * DRM_IOCTL_CONTROL ioctl argument type.
  *
  * \sa drmCtlInstHandler() and drmCtlUninstHandler().
@@ -183,7 +182,7 @@ struct drm_control {
        int irq;
 };
 
-/**
+/*
  * Type of memory to map.
  */
 enum drm_map_type {
@@ -195,7 +194,7 @@ enum drm_map_type {
        _DRM_CONSISTENT = 5       /**< Consistent memory for PCI DMA */
 };
 
-/**
+/*
  * Memory mapping flags.
  */
 enum drm_map_flags {
@@ -214,7 +213,7 @@ struct drm_ctx_priv_map {
        void *handle;            /**< Handle of map */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_MAP, DRM_IOCTL_ADD_MAP and DRM_IOCTL_RM_MAP ioctls
  * argument type.
  *
@@ -231,7 +230,7 @@ struct drm_map {
        /*   Private data */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_CLIENT ioctl argument type.
  */
 struct drm_client {
@@ -263,7 +262,7 @@ enum drm_stat_type {
            /* Add to the *END* of the list */
 };
 
-/**
+/*
  * DRM_IOCTL_GET_STATS ioctl argument type.
  */
 struct drm_stats {
@@ -274,7 +273,7 @@ struct drm_stats {
        } data[15];
 };
 
-/**
+/*
  * Hardware locking flags.
  */
 enum drm_lock_flags {
@@ -289,7 +288,7 @@ enum drm_lock_flags {
        _DRM_HALT_CUR_QUEUES = 0x20  /**< Halt all current queues */
 };
 
-/**
+/*
  * DRM_IOCTL_LOCK, DRM_IOCTL_UNLOCK and DRM_IOCTL_FINISH ioctl argument type.
  *
  * \sa drmGetLock() and drmUnlock().
@@ -299,7 +298,7 @@ struct drm_lock {
        enum drm_lock_flags flags;
 };
 
-/**
+/*
  * DMA flags
  *
  * \warning
@@ -328,7 +327,7 @@ enum drm_dma_flags {
        _DRM_DMA_LARGER_OK = 0x40     /**< Larger-than-requested buffers OK */
 };
 
-/**
+/*
  * DRM_IOCTL_ADD_BUFS and DRM_IOCTL_MARK_BUFS ioctl argument type.
  *
  * \sa drmAddBufs().
@@ -351,7 +350,7 @@ struct drm_buf_desc {
                                  */
 };
 
-/**
+/*
  * DRM_IOCTL_INFO_BUFS ioctl argument type.
  */
 struct drm_buf_info {
@@ -359,7 +358,7 @@ struct drm_buf_info {
        struct drm_buf_desc __user *list;
 };
 
-/**
+/*
  * DRM_IOCTL_FREE_BUFS ioctl argument type.
  */
 struct drm_buf_free {
@@ -367,7 +366,7 @@ struct drm_buf_free {
        int __user *list;
 };
 
-/**
+/*
  * Buffer information
  *
  * \sa drm_buf_map.
@@ -379,7 +378,7 @@ struct drm_buf_pub {
        void __user *address;          /**< Address of buffer */
 };
 
-/**
+/*
  * DRM_IOCTL_MAP_BUFS ioctl argument type.
  */
 struct drm_buf_map {
@@ -392,7 +391,7 @@ struct drm_buf_map {
        struct drm_buf_pub __user *list;        /**< Buffer information */
 };
 
-/**
+/*
  * DRM_IOCTL_DMA ioctl argument type.
  *
  * Indices here refer to the offset into the buffer list in drm_buf_get.
@@ -417,7 +416,7 @@ enum drm_ctx_flags {
        _DRM_CONTEXT_2DONLY = 0x02
 };
 
-/**
+/*
  * DRM_IOCTL_ADD_CTX ioctl argument type.
  *
  * \sa drmCreateContext() and drmDestroyContext().
@@ -427,7 +426,7 @@ struct drm_ctx {
        enum drm_ctx_flags flags;
 };
 
-/**
+/*
  * DRM_IOCTL_RES_CTX ioctl argument type.
  */
 struct drm_ctx_res {
@@ -435,14 +434,14 @@ struct drm_ctx_res {
        struct drm_ctx __user *contexts;
 };
 
-/**
+/*
  * DRM_IOCTL_ADD_DRAW and DRM_IOCTL_RM_DRAW ioctl argument type.
  */
 struct drm_draw {
        drm_drawable_t handle;
 };
 
-/**
+/*
  * DRM_IOCTL_UPDATE_DRAW ioctl argument type.
  */
 typedef enum {
@@ -456,14 +455,14 @@ struct drm_update_draw {
        unsigned long long data;
 };
 
-/**
+/*
  * DRM_IOCTL_GET_MAGIC and DRM_IOCTL_AUTH_MAGIC ioctl argument type.
  */
 struct drm_auth {
        drm_magic_t magic;
 };
 
-/**
+/*
  * DRM_IOCTL_IRQ_BUSID ioctl argument type.
  *
  * \sa drmGetInterruptFromBusID().
@@ -505,7 +504,7 @@ struct drm_wait_vblank_reply {
        long tval_usec;
 };
 
-/**
+/*
  * DRM_IOCTL_WAIT_VBLANK ioctl argument type.
  *
  * \sa drmWaitVBlank().
@@ -518,7 +517,7 @@ union drm_wait_vblank {
 #define _DRM_PRE_MODESET 1
 #define _DRM_POST_MODESET 2
 
-/**
+/*
  * DRM_IOCTL_MODESET_CTL ioctl argument type
  *
  * \sa drmModesetCtl().
@@ -528,7 +527,7 @@ struct drm_modeset_ctl {
        __u32 cmd;
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_ENABLE ioctl argument type.
  *
  * \sa drmAgpEnable().
@@ -537,7 +536,7 @@ struct drm_agp_mode {
        unsigned long mode;     /**< AGP mode */
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_ALLOC and DRM_IOCTL_AGP_FREE ioctls argument type.
  *
  * \sa drmAgpAlloc() and drmAgpFree().
@@ -549,7 +548,7 @@ struct drm_agp_buffer {
        unsigned long physical; /**< Physical used by i810 */
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_BIND and DRM_IOCTL_AGP_UNBIND ioctls argument type.
  *
  * \sa drmAgpBind() and drmAgpUnbind().
@@ -559,7 +558,7 @@ struct drm_agp_binding {
        unsigned long offset;   /**< In bytes -- will round to page boundary */
 };
 
-/**
+/*
  * DRM_IOCTL_AGP_INFO ioctl argument type.
  *
  * \sa drmAgpVersionMajor(), drmAgpVersionMinor(), drmAgpGetMode(),
@@ -580,7 +579,7 @@ struct drm_agp_info {
        unsigned short id_device;
 };
 
-/**
+/*
  * DRM_IOCTL_SG_ALLOC ioctl argument type.
  */
 struct drm_scatter_gather {
@@ -588,7 +587,7 @@ struct drm_scatter_gather {
        unsigned long handle;   /**< Used for mapping / unmapping */
 };
 
-/**
+/*
  * DRM_IOCTL_SET_VERSION ioctl argument type.
  */
 struct drm_set_version {
@@ -598,14 +597,14 @@ struct drm_set_version {
        int drm_dd_minor;
 };
 
-/** DRM_IOCTL_GEM_CLOSE ioctl argument type */
+/* DRM_IOCTL_GEM_CLOSE ioctl argument type */
 struct drm_gem_close {
        /** Handle of the object to be closed. */
        __u32 handle;
        __u32 pad;
 };
 
-/** DRM_IOCTL_GEM_FLINK ioctl argument type */
+/* DRM_IOCTL_GEM_FLINK ioctl argument type */
 struct drm_gem_flink {
        /** Handle for the object being named */
        __u32 handle;
@@ -614,7 +613,7 @@ struct drm_gem_flink {
        __u32 name;
 };
 
-/** DRM_IOCTL_GEM_OPEN ioctl argument type */
+/* DRM_IOCTL_GEM_OPEN ioctl argument type */
 struct drm_gem_open {
        /** Name of object being opened */
        __u32 name;
@@ -652,7 +651,7 @@ struct drm_gem_open {
 #define DRM_CAP_SYNCOBJ                0x13
 #define DRM_CAP_SYNCOBJ_TIMELINE       0x14
 
-/** DRM_IOCTL_GET_CAP ioctl argument type */
+/* DRM_IOCTL_GET_CAP ioctl argument type */
 struct drm_get_cap {
        __u64 capability;
        __u64 value;
@@ -678,7 +677,9 @@ struct drm_get_cap {
 /**
  * DRM_CLIENT_CAP_ATOMIC
  *
- * If set to 1, the DRM core will expose atomic properties to userspace
+ * If set to 1, the DRM core will expose atomic properties to userspace. This
+ * implicitly enables &DRM_CLIENT_CAP_UNIVERSAL_PLANES and
+ * &DRM_CLIENT_CAP_ASPECT_RATIO.
  */
 #define DRM_CLIENT_CAP_ATOMIC  3
 
@@ -698,7 +699,7 @@ struct drm_get_cap {
  */
 #define DRM_CLIENT_CAP_WRITEBACK_CONNECTORS    5
 
-/** DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
+/* DRM_IOCTL_SET_CLIENT_CAP ioctl argument type */
 struct drm_set_client_cap {
        __u64 capability;
        __u64 value;
@@ -950,7 +951,7 @@ extern "C" {
 
 #define DRM_IOCTL_MODE_GETFB2          DRM_IOWR(0xCE, struct drm_mode_fb_cmd2)
 
-/**
+/*
  * Device specific ioctls should only be in their respective headers
  * The device specific ioctl range is from 0x40 to 0x9f.
  * Generic IOCTLS restart at 0xA0.
@@ -961,7 +962,7 @@ extern "C" {
 #define DRM_COMMAND_BASE                0x40
 #define DRM_COMMAND_END                        0xA0
 
-/**
+/*
  * Header for events written back to userspace on the drm fd.  The
  * type defines the type of event, the length specifies the total
  * length of the event (including the header), and user_data is
index fa1f3d6..1987e2e 100644 (file)
@@ -177,8 +177,9 @@ enum drm_i915_pmu_engine_sample {
 #define I915_PMU_REQUESTED_FREQUENCY   __I915_PMU_OTHER(1)
 #define I915_PMU_INTERRUPTS            __I915_PMU_OTHER(2)
 #define I915_PMU_RC6_RESIDENCY         __I915_PMU_OTHER(3)
+#define I915_PMU_SOFTWARE_GT_AWAKE_TIME        __I915_PMU_OTHER(4)
 
-#define I915_PMU_LAST I915_PMU_RC6_RESIDENCY
+#define I915_PMU_LAST /* Deprecated - do not use */ I915_PMU_RC6_RESIDENCY
 
 /* Each region is a minimum of 16k, and there are at most 255 of them.
  */
index 4c24daa..79c8933 100644 (file)
@@ -3850,7 +3850,6 @@ union bpf_attr {
  *
  * long bpf_check_mtu(void *ctx, u32 ifindex, u32 *mtu_len, s32 len_diff, u64 flags)
  *     Description
-
  *             Check ctx packet size against exceeding MTU of net device (based
  *             on *ifindex*).  This helper will likely be used in combination
  *             with helpers that adjust/change the packet size.
index abb89bb..f6afee2 100644 (file)
@@ -216,6 +216,20 @@ struct kvm_hyperv_exit {
        } u;
 };
 
+struct kvm_xen_exit {
+#define KVM_EXIT_XEN_HCALL          1
+       __u32 type;
+       union {
+               struct {
+                       __u32 longmode;
+                       __u32 cpl;
+                       __u64 input;
+                       __u64 result;
+                       __u64 params[6];
+               } hcall;
+       } u;
+};
+
 #define KVM_S390_GET_SKEYS_NONE   1
 #define KVM_S390_SKEYS_MAX        1048576
 
@@ -252,6 +266,8 @@ struct kvm_hyperv_exit {
 #define KVM_EXIT_X86_WRMSR        30
 #define KVM_EXIT_DIRTY_RING_FULL  31
 #define KVM_EXIT_AP_RESET_HOLD    32
+#define KVM_EXIT_X86_BUS_LOCK     33
+#define KVM_EXIT_XEN              34
 
 /* For KVM_EXIT_INTERNAL_ERROR */
 /* Emulate instruction failed. */
@@ -428,6 +444,8 @@ struct kvm_run {
                        __u32 index; /* kernel -> user */
                        __u64 data; /* kernel <-> user */
                } msr;
+               /* KVM_EXIT_XEN */
+               struct kvm_xen_exit xen;
                /* Fix the size of the union. */
                char padding[256];
        };
@@ -1058,6 +1076,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ENFORCE_PV_FEATURE_CPUID 190
 #define KVM_CAP_SYS_HYPERV_CPUID 191
 #define KVM_CAP_DIRTY_LOG_RING 192
+#define KVM_CAP_X86_BUS_LOCK_EXIT 193
 #define KVM_CAP_PPC_DAWR1 194
 
 #ifdef KVM_CAP_IRQ_ROUTING
@@ -1132,6 +1151,11 @@ struct kvm_x86_mce {
 #endif
 
 #ifdef KVM_CAP_XEN_HVM
+#define KVM_XEN_HVM_CONFIG_HYPERCALL_MSR       (1 << 0)
+#define KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL     (1 << 1)
+#define KVM_XEN_HVM_CONFIG_SHARED_INFO         (1 << 2)
+#define KVM_XEN_HVM_CONFIG_RUNSTATE            (1 << 3)
+
 struct kvm_xen_hvm_config {
        __u32 flags;
        __u32 msr;
@@ -1566,6 +1590,57 @@ struct kvm_pv_cmd {
 /* Available with KVM_CAP_DIRTY_LOG_RING */
 #define KVM_RESET_DIRTY_RINGS          _IO(KVMIO, 0xc7)
 
+/* Per-VM Xen attributes */
+#define KVM_XEN_HVM_GET_ATTR   _IOWR(KVMIO, 0xc8, struct kvm_xen_hvm_attr)
+#define KVM_XEN_HVM_SET_ATTR   _IOW(KVMIO,  0xc9, struct kvm_xen_hvm_attr)
+
+struct kvm_xen_hvm_attr {
+       __u16 type;
+       __u16 pad[3];
+       union {
+               __u8 long_mode;
+               __u8 vector;
+               struct {
+                       __u64 gfn;
+               } shared_info;
+               __u64 pad[8];
+       } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_ATTR_TYPE_LONG_MODE            0x0
+#define KVM_XEN_ATTR_TYPE_SHARED_INFO          0x1
+#define KVM_XEN_ATTR_TYPE_UPCALL_VECTOR                0x2
+
+/* Per-vCPU Xen attributes */
+#define KVM_XEN_VCPU_GET_ATTR  _IOWR(KVMIO, 0xca, struct kvm_xen_vcpu_attr)
+#define KVM_XEN_VCPU_SET_ATTR  _IOW(KVMIO,  0xcb, struct kvm_xen_vcpu_attr)
+
+struct kvm_xen_vcpu_attr {
+       __u16 type;
+       __u16 pad[3];
+       union {
+               __u64 gpa;
+               __u64 pad[8];
+               struct {
+                       __u64 state;
+                       __u64 state_entry_time;
+                       __u64 time_running;
+                       __u64 time_runnable;
+                       __u64 time_blocked;
+                       __u64 time_offline;
+               } runstate;
+       } u;
+};
+
+/* Available with KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO */
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_INFO       0x0
+#define KVM_XEN_VCPU_ATTR_TYPE_VCPU_TIME_INFO  0x1
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR   0x2
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT        0x3
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA   0x4
+#define KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST 0x5
+
 /* Secure Encrypted Virtualization command */
 enum sev_cmd_id {
        /* Guest initialization commands */
@@ -1594,6 +1669,8 @@ enum sev_cmd_id {
        KVM_SEV_DBG_ENCRYPT,
        /* Guest certificates commands */
        KVM_SEV_CERT_EXPORT,
+       /* Attestation report */
+       KVM_SEV_GET_ATTESTATION_REPORT,
 
        KVM_SEV_NR_MAX,
 };
@@ -1646,6 +1723,12 @@ struct kvm_sev_dbg {
        __u32 len;
 };
 
+struct kvm_sev_attestation_report {
+       __u8 mnonce[16];
+       __u64 uaddr;
+       __u32 len;
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX       (1 << 2)
@@ -1767,4 +1850,7 @@ struct kvm_dirty_gfn {
        __u64 offset;
 };
 
+#define KVM_BUS_LOCK_DETECTION_OFF             (1 << 0)
+#define KVM_BUS_LOCK_DETECTION_EXIT            (1 << 1)
+
 #endif /* __LINUX_KVM_H */
index dd8306e..e6524ea 100644 (file)
@@ -1,6 +1,8 @@
 #ifndef _UAPI_LINUX_MOUNT_H
 #define _UAPI_LINUX_MOUNT_H
 
+#include <linux/types.h>
+
 /*
  * These are the fs-independent mount-flags: up to 32 flags are supported
  *
@@ -117,5 +119,19 @@ enum fsconfig_command {
 #define MOUNT_ATTR_NOATIME     0x00000010 /* - Do not update access times. */
 #define MOUNT_ATTR_STRICTATIME 0x00000020 /* - Always perform atime updates */
 #define MOUNT_ATTR_NODIRATIME  0x00000080 /* Do not update directory access times */
+#define MOUNT_ATTR_IDMAP       0x00100000 /* Idmap mount to @userns_fd in struct mount_attr. */
+
+/*
+ * mount_setattr()
+ */
+struct mount_attr {
+       __u64 attr_set;
+       __u64 attr_clr;
+       __u64 propagation;
+       __u64 userns_fd;
+};
+
+/* List of all mount_attr versions. */
+#define MOUNT_ATTR_SIZE_VER0   32 /* sizeof first published struct */
 
 #endif /* _UAPI_LINUX_MOUNT_H */
index 58b1eb7..a5feb76 100644 (file)
@@ -35,5 +35,9 @@ struct open_how {
 #define RESOLVE_IN_ROOT                0x10 /* Make all jumps to "/" and ".."
                                        be scoped inside the dirfd
                                        (similar to chroot(2)). */
+#define RESOLVE_CACHED         0x20 /* Only complete if resolution can be
+                                       completed through cached lookup. May
+                                       return -EAGAIN if that's not
+                                       possible. */
 
 #endif /* _UAPI_LINUX_OPENAT2_H */
index 71aabaf..8f13b84 100644 (file)
@@ -9,6 +9,7 @@ Type=simple
 ExecStart=/usr/bin/kvm_stat -dtcz -s 10 -L /var/log/kvm_stat.csv
 ExecReload=/bin/kill -HUP $MAINPID
 Restart=always
+RestartSec=60s
 SyslogIdentifier=kvm_stat
 SyslogLevel=debug
 
index 887a494..e9eb6a6 100644 (file)
@@ -215,7 +215,7 @@ define do_install
        if [ ! -d '$(DESTDIR_SQ)$2' ]; then             \
                $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \
        fi;                                             \
-       $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2'
+       $(INSTALL) $(if $3,-m $3,) $1 '$(DESTDIR_SQ)$2'
 endef
 
 install_lib: all_cmd
index 2f9d685..0911aea 100644 (file)
@@ -462,7 +462,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)
                return err;
 
        case BTF_KIND_ARRAY:
-               return btf_dump_order_type(d, btf_array(t)->type, through_ptr);
+               return btf_dump_order_type(d, btf_array(t)->type, false);
 
        case BTF_KIND_STRUCT:
        case BTF_KIND_UNION: {
index d43cc3f..4181d17 100644 (file)
@@ -1181,7 +1181,8 @@ static int bpf_object__elf_init(struct bpf_object *obj)
        if (!elf_rawdata(elf_getscn(obj->efile.elf, obj->efile.shstrndx), NULL)) {
                pr_warn("elf: failed to get section names strings from %s: %s\n",
                        obj->path, elf_errmsg(-1));
-               return -LIBBPF_ERRNO__FORMAT;
+               err = -LIBBPF_ERRNO__FORMAT;
+               goto errout;
        }
 
        /* Old LLVM set e_machine to EM_NONE */
index 4dd73de..d2cb28e 100644 (file)
@@ -40,7 +40,7 @@ static int libbpf_netlink_open(__u32 *nl_pid)
        memset(&sa, 0, sizeof(sa));
        sa.nl_family = AF_NETLINK;
 
-       sock = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+       sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, NETLINK_ROUTE);
        if (sock < 0)
                return -errno;
 
index ffbb588..526fc35 100644 (file)
@@ -610,15 +610,16 @@ static int xsk_lookup_bpf_maps(struct xsk_socket *xsk)
                if (fd < 0)
                        continue;
 
+               memset(&map_info, 0, map_len);
                err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len);
                if (err) {
                        close(fd);
                        continue;
                }
 
-               if (!strcmp(map_info.name, "xsks_map")) {
+               if (!strncmp(map_info.name, "xsks_map", sizeof(map_info.name))) {
                        ctx->xsks_map_fd = fd;
-                       continue;
+                       break;
                }
 
                close(fd);
index 17465d4..a0aaf38 100644 (file)
 
 void perf_evlist__init(struct perf_evlist *evlist)
 {
-       int i;
-
-       for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
-               INIT_HLIST_HEAD(&evlist->heads[i]);
        INIT_LIST_HEAD(&evlist->entries);
        evlist->nr_entries = 0;
        fdarray__init(&evlist->pollfd, 64);
+       perf_evlist__reset_id_hash(evlist);
 }
 
 static void __perf_evlist__propagate_maps(struct perf_evlist *evlist,
@@ -237,6 +234,14 @@ static void perf_evlist__id_hash(struct perf_evlist *evlist,
        hlist_add_head(&sid->node, &evlist->heads[hash]);
 }
 
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist)
+{
+       int i;
+
+       for (i = 0; i < PERF_EVLIST__HLIST_SIZE; ++i)
+               INIT_HLIST_HEAD(&evlist->heads[i]);
+}
+
 void perf_evlist__id_add(struct perf_evlist *evlist,
                         struct perf_evsel *evsel,
                         int cpu, int thread, u64 id)
index 2d0fa02..212c290 100644 (file)
@@ -124,4 +124,6 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,
                           struct perf_evsel *evsel,
                           int cpu, int thread, int fd);
 
+void perf_evlist__reset_id_hash(struct perf_evlist *evlist);
+
 #endif /* __LIBPERF_INTERNAL_EVLIST_H */
index 45cefda..14236db 100644 (file)
@@ -1,4 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 arch/x86/lib/inat-tables.c
-objtool
+/objtool
 fixdep
index 0542e46..30f38fd 100644 (file)
@@ -315,13 +315,15 @@ they mean, and suggestions for how to fix them.
       function tracing inserts additional calls, which is not obvious from the
       sources).
 
-10. file.o: warning: func()+0x5c: alternative modifies stack
-
-    This means that an alternative includes instructions that modify the
-    stack. The problem is that there is only one ORC unwind table, this means
-    that the ORC unwind entries must be valid for each of the alternatives.
-    The easiest way to enforce this is to ensure alternatives do not contain
-    any ORC entries, which in turn implies the above constraint.
+10. file.o: warning: func()+0x5c: stack layout conflict in alternatives
+
+    This means that in the use of the alternative() or ALTERNATIVE()
+    macro, the code paths have conflicting modifications to the stack.
+    The problem is that there is only one ORC unwind table, which means
+    that the ORC unwind entries must be consistent for all possible
+    instruction boundaries regardless of which code has been patched.
+    This limitation can be overcome by massaging the alternatives with
+    NOPs to shift the stack changes around so they no longer conflict.
 
 11. file.o: warning: unannotated intra-function call
 
index 5cdb190..92ce4fc 100644 (file)
@@ -27,6 +27,7 @@ all: $(OBJTOOL)
 INCLUDES := -I$(srctree)/tools/include \
            -I$(srctree)/tools/arch/$(HOSTARCH)/include/uapi \
            -I$(srctree)/tools/arch/$(SRCARCH)/include  \
+           -I$(srctree)/tools/objtool/include \
            -I$(srctree)/tools/objtool/arch/$(SRCARCH)/include
 WARNINGS := $(EXTRA_WARNINGS) -Wno-switch-default -Wno-switch-enum -Wno-packed -Wno-nested-externs
 CFLAGS   := -Werror $(WARNINGS) $(KBUILD_HOSTCFLAGS) -g $(INCLUDES) $(LIBELF_FLAGS)
@@ -46,10 +47,6 @@ ifeq ($(SRCARCH),x86)
        SUBCMD_ORC := y
 endif
 
-ifeq ($(SUBCMD_ORC),y)
-       CFLAGS += -DINSN_USE_ORC
-endif
-
 export SUBCMD_CHECK SUBCMD_ORC
 export srctree OUTPUT CFLAGS SRCARCH AWK
 include $(srctree)/tools/build/Makefile.include
index cde9c36..549813c 100644 (file)
 #include "../../../arch/x86/lib/inat.c"
 #include "../../../arch/x86/lib/insn.c"
 
-#include "../../check.h"
-#include "../../elf.h"
-#include "../../arch.h"
-#include "../../warn.h"
 #include <asm/orc_types.h>
+#include <objtool/check.h>
+#include <objtool/elf.h>
+#include <objtool/arch.h>
+#include <objtool/warn.h>
 
 static unsigned char op_to_cfi_reg[][2] = {
        {CFI_AX, CFI_R8},
@@ -222,15 +222,38 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
                break;
 
        case 0x89:
-               if (rex_w && !rex_r && modrm_mod == 3 && modrm_reg == 4) {
+               if (rex_w && !rex_r && modrm_reg == 4) {
 
-                       /* mov %rsp, reg */
-                       ADD_OP(op) {
-                               op->src.type = OP_SRC_REG;
-                               op->src.reg = CFI_SP;
-                               op->dest.type = OP_DEST_REG;
-                               op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+                       if (modrm_mod == 3) {
+                               /* mov %rsp, reg */
+                               ADD_OP(op) {
+                                       op->src.type = OP_SRC_REG;
+                                       op->src.reg = CFI_SP;
+                                       op->dest.type = OP_DEST_REG;
+                                       op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+                               }
+                               break;
+
+                       } else {
+                               /* skip nontrivial SIB */
+                               if (modrm_rm == 4 && !(sib == 0x24 && rex_b == rex_x))
+                                       break;
+
+                               /* skip RIP relative displacement */
+                               if (modrm_rm == 5 && modrm_mod == 0)
+                                       break;
+
+                               /* mov %rsp, disp(%reg) */
+                               ADD_OP(op) {
+                                       op->src.type = OP_SRC_REG;
+                                       op->src.reg = CFI_SP;
+                                       op->dest.type = OP_DEST_REG_INDIRECT;
+                                       op->dest.reg = op_to_cfi_reg[modrm_rm][rex_b];
+                                       op->dest.offset = insn.displacement.value;
+                               }
+                               break;
                        }
+
                        break;
                }
 
@@ -259,8 +282,10 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
                                op->dest.reg = CFI_BP;
                                op->dest.offset = insn.displacement.value;
                        }
+                       break;
+               }
 
-               } else if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
+               if (rex_w && !rex_b && modrm_rm == 4 && sib == 0x24) {
 
                        /* mov reg, disp(%rsp) */
                        ADD_OP(op) {
@@ -270,6 +295,7 @@ int arch_decode_instruction(const struct elf *elf, const struct section *sec,
                                op->dest.reg = CFI_SP;
                                op->dest.offset = insn.displacement.value;
                        }
+                       break;
                }
 
                break;
@@ -563,8 +589,8 @@ void arch_initial_func_cfi_state(struct cfi_init_state *state)
        state->cfa.offset = 8;
 
        /* initial RA (return address) */
-       state->regs[16].base = CFI_CFA;
-       state->regs[16].offset = -8;
+       state->regs[CFI_RA].base = CFI_CFA;
+       state->regs[CFI_RA].offset = -8;
 }
 
 const char *arch_nop_insn(int len)
diff --git a/tools/objtool/arch/x86/include/arch/endianness.h b/tools/objtool/arch/x86/include/arch/endianness.h
new file mode 100644 (file)
index 0000000..7c36252
--- /dev/null
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _ARCH_ENDIANNESS_H
+#define _ARCH_ENDIANNESS_H
+
+#include <endian.h>
+
+#define __TARGET_BYTE_ORDER __LITTLE_ENDIAN
+
+#endif /* _ARCH_ENDIANNESS_H */
index fd4af88..e707d9b 100644 (file)
@@ -1,8 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-or-later
 #include <string.h>
 
-#include "../../special.h"
-#include "../../builtin.h"
+#include <objtool/special.h>
+#include <objtool/builtin.h>
 
 #define X86_FEATURE_POPCNT (4 * 32 + 23)
 #define X86_FEATURE_SMAP   (9 * 32 + 20)
@@ -48,7 +48,7 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
         * replacement group.
         */
        return insn->offset == special_alt->new_off &&
-              (insn->type == INSN_CALL || is_static_jump(insn));
+              (insn->type == INSN_CALL || is_jump(insn));
 }
 
 /*
index c6d199b..c3a85d8 100644 (file)
 
 #include <subcmd/parse-options.h>
 #include <string.h>
-#include "builtin.h"
-#include "objtool.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
 
-bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
 
 static const char * const check_usage[] = {
        "objtool check [<options>] file.o",
@@ -34,13 +34,15 @@ const struct option check_options[] = {
        OPT_BOOLEAN('a', "uaccess", &uaccess, "enable uaccess checking"),
        OPT_BOOLEAN('s', "stats", &stats, "print statistics"),
        OPT_BOOLEAN('d', "duplicate", &validate_dup, "duplicate validation for vmlinux.o"),
+       OPT_BOOLEAN('n', "noinstr", &noinstr, "noinstr validation for vmlinux.o"),
        OPT_BOOLEAN('l', "vmlinux", &vmlinux, "vmlinux.o validation"),
+       OPT_BOOLEAN('M', "mcount", &mcount, "generate __mcount_loc"),
        OPT_END(),
 };
 
 int cmd_check(int argc, const char **argv)
 {
-       const char *objname, *s;
+       const char *objname;
        struct objtool_file *file;
        int ret;
 
@@ -51,10 +53,6 @@ int cmd_check(int argc, const char **argv)
 
        objname = argv[0];
 
-       s = strstr(objname, "vmlinux.o");
-       if (s && !s[9])
-               vmlinux = true;
-
        file = objtool_open_read(objname);
        if (!file)
                return 1;
index 7b31121..8273bbf 100644 (file)
@@ -13,8 +13,8 @@
  */
 
 #include <string.h>
-#include "builtin.h"
-#include "objtool.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
 
 static const char *orc_usage[] = {
        "objtool orc generate [<options>] file.o",
@@ -51,11 +51,7 @@ int cmd_orc(int argc, const char **argv)
                if (list_empty(&file->insn_list))
                        return 0;
 
-               ret = create_orc(file);
-               if (ret)
-                       return ret;
-
-               ret = create_orc_sections(file);
+               ret = orc_create(file);
                if (ret)
                        return ret;
 
index f2e5e5c..5e5388a 100644 (file)
@@ -6,21 +6,20 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include "builtin.h"
-#include "cfi.h"
-#include "arch.h"
-#include "check.h"
-#include "special.h"
-#include "warn.h"
-#include "arch_elf.h"
+#include <arch/elf.h>
+#include <objtool/builtin.h>
+#include <objtool/cfi.h>
+#include <objtool/arch.h>
+#include <objtool/check.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
 
 #include <linux/objtool.h>
 #include <linux/hashtable.h>
 #include <linux/kernel.h>
 #include <linux/static_call_types.h>
 
-#define FAKE_JUMP_OFFSET -1
-
 struct alternative {
        struct list_head list;
        struct instruction *insn;
@@ -111,15 +110,20 @@ static struct instruction *prev_insn_same_sym(struct objtool_file *file,
 
 static bool is_sibling_call(struct instruction *insn)
 {
+       /*
+        * Assume only ELF functions can make sibling calls.  This ensures
+        * sibling call detection consistency between vmlinux.o and individual
+        * objects.
+        */
+       if (!insn->func)
+               return false;
+
        /* An indirect jump is either a sibling call or a jump to a table. */
        if (insn->type == INSN_JUMP_DYNAMIC)
                return list_empty(&insn->alts);
 
-       if (!is_static_jump(insn))
-               return false;
-
        /* add_jump_destinations() sets insn->call_dest for sibling calls. */
-       return !!insn->call_dest;
+       return (is_static_jump(insn) && insn->call_dest);
 }
 
 /*
@@ -156,6 +160,7 @@ static bool __dead_end_function(struct objtool_file *file, struct symbol *func,
                "machine_real_restart",
                "rewind_stack_do_exit",
                "kunit_try_catch_throw",
+               "xen_start_kernel",
        };
 
        if (!func)
@@ -244,7 +249,7 @@ static void init_insn_state(struct insn_state *state, struct section *sec)
         * not correctly determine insn->call_dest->sec (external symbols do
         * not have a section).
         */
-       if (vmlinux && sec)
+       if (vmlinux && noinstr && sec)
                state->noinstr = sec->noinstr;
 }
 
@@ -543,6 +548,78 @@ static int create_static_call_sections(struct objtool_file *file)
        return 0;
 }
 
+static int create_mcount_loc_sections(struct objtool_file *file)
+{
+       struct section *sec, *reloc_sec;
+       struct reloc *reloc;
+       unsigned long *loc;
+       struct instruction *insn;
+       int idx;
+
+       sec = find_section_by_name(file->elf, "__mcount_loc");
+       if (sec) {
+               INIT_LIST_HEAD(&file->mcount_loc_list);
+               WARN("file already has __mcount_loc section, skipping");
+               return 0;
+       }
+
+       if (list_empty(&file->mcount_loc_list))
+               return 0;
+
+       idx = 0;
+       list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node)
+               idx++;
+
+       sec = elf_create_section(file->elf, "__mcount_loc", 0, sizeof(unsigned long), idx);
+       if (!sec)
+               return -1;
+
+       reloc_sec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+       if (!reloc_sec)
+               return -1;
+
+       idx = 0;
+       list_for_each_entry(insn, &file->mcount_loc_list, mcount_loc_node) {
+
+               loc = (unsigned long *)sec->data->d_buf + idx;
+               memset(loc, 0, sizeof(unsigned long));
+
+               reloc = malloc(sizeof(*reloc));
+               if (!reloc) {
+                       perror("malloc");
+                       return -1;
+               }
+               memset(reloc, 0, sizeof(*reloc));
+
+               if (insn->sec->sym) {
+                       reloc->sym = insn->sec->sym;
+                       reloc->addend = insn->offset;
+               } else {
+                       reloc->sym = find_symbol_containing(insn->sec, insn->offset);
+
+                       if (!reloc->sym) {
+                               WARN("missing symbol for insn at offset 0x%lx\n",
+                                    insn->offset);
+                               return -1;
+                       }
+
+                       reloc->addend = insn->offset - reloc->sym->offset;
+               }
+
+               reloc->type = R_X86_64_64;
+               reloc->offset = idx * sizeof(unsigned long);
+               reloc->sec = reloc_sec;
+               elf_add_reloc(file->elf, reloc);
+
+               idx++;
+       }
+
+       if (elf_rebuild_reloc_section(file->elf, reloc_sec))
+               return -1;
+
+       return 0;
+}
+
 /*
  * Warnings shouldn't be reported for ignored functions.
  */
@@ -589,7 +666,7 @@ static void add_ignores(struct objtool_file *file)
 static const char *uaccess_safe_builtin[] = {
        /* KASAN */
        "kasan_report",
-       "check_memory_region",
+       "kasan_check_range",
        /* KASAN out-of-line */
        "__asan_loadN_noabort",
        "__asan_load1_noabort",
@@ -787,22 +864,16 @@ static int add_jump_destinations(struct objtool_file *file)
                if (!is_static_jump(insn))
                        continue;
 
-               if (insn->offset == FAKE_JUMP_OFFSET)
-                       continue;
-
                reloc = find_reloc_by_dest_range(file->elf, insn->sec,
-                                              insn->offset, insn->len);
+                                                insn->offset, insn->len);
                if (!reloc) {
                        dest_sec = insn->sec;
                        dest_off = arch_jump_destination(insn);
                } else if (reloc->sym->type == STT_SECTION) {
                        dest_sec = reloc->sym->sec;
                        dest_off = arch_dest_reloc_offset(reloc->addend);
-               } else if (reloc->sym->sec->idx) {
-                       dest_sec = reloc->sym->sec;
-                       dest_off = reloc->sym->sym.st_value +
-                                  arch_dest_reloc_offset(reloc->addend);
-               } else if (strstr(reloc->sym->name, "_indirect_thunk_")) {
+               } else if (!strncmp(reloc->sym->name, "__x86_indirect_thunk_", 21) ||
+                          !strncmp(reloc->sym->name, "__x86_retpoline_", 16)) {
                        /*
                         * Retpoline jumps are really dynamic jumps in
                         * disguise, so convert them accordingly.
@@ -814,14 +885,21 @@ static int add_jump_destinations(struct objtool_file *file)
 
                        insn->retpoline_safe = true;
                        continue;
-               } else {
-                       /* external sibling call */
+               } else if (insn->func) {
+                       /* internal or external sibling call (with reloc) */
                        insn->call_dest = reloc->sym;
                        if (insn->call_dest->static_call_tramp) {
                                list_add_tail(&insn->static_call_node,
                                              &file->static_call_list);
                        }
                        continue;
+               } else if (reloc->sym->sec->idx) {
+                       dest_sec = reloc->sym->sec;
+                       dest_off = reloc->sym->sym.st_value +
+                                  arch_dest_reloc_offset(reloc->addend);
+               } else {
+                       /* non-func asm code jumping to another file */
+                       continue;
                }
 
                insn->jump_dest = find_insn(file, dest_sec, dest_off);
@@ -862,15 +940,15 @@ static int add_jump_destinations(struct objtool_file *file)
                         * case where the parent function's only reference to a
                         * subfunction is through a jump table.
                         */
-                       if (!strstr(insn->func->name, ".cold.") &&
-                           strstr(insn->jump_dest->func->name, ".cold.")) {
+                       if (!strstr(insn->func->name, ".cold") &&
+                           strstr(insn->jump_dest->func->name, ".cold")) {
                                insn->func->cfunc = insn->jump_dest->func;
                                insn->jump_dest->func->pfunc = insn->func;
 
                        } else if (insn->jump_dest->func->pfunc != insn->func->pfunc &&
                                   insn->jump_dest->offset == insn->jump_dest->func->offset) {
 
-                               /* internal sibling call */
+                               /* internal sibling call (without reloc) */
                                insn->call_dest = insn->jump_dest->func;
                                if (insn->call_dest->static_call_tramp) {
                                        list_add_tail(&insn->static_call_node,
@@ -969,6 +1047,22 @@ static int add_call_destinations(struct objtool_file *file)
                        insn->type = INSN_NOP;
                }
 
+               if (mcount && !strcmp(insn->call_dest->name, "__fentry__")) {
+                       if (reloc) {
+                               reloc->type = R_NONE;
+                               elf_write_reloc(file->elf, reloc);
+                       }
+
+                       elf_write_insn(file->elf, insn->sec,
+                                      insn->offset, insn->len,
+                                      arch_nop_insn(insn->len));
+
+                       insn->type = INSN_NOP;
+
+                       list_add_tail(&insn->mcount_loc_node,
+                                     &file->mcount_loc_list);
+               }
+
                /*
                 * Whatever stack impact regular CALLs have, should be undone
                 * by the RETURN of the called function.
@@ -983,73 +1077,83 @@ static int add_call_destinations(struct objtool_file *file)
 }
 
 /*
- * The .alternatives section requires some extra special care, over and above
- * what other special sections require:
- *
- * 1. Because alternatives are patched in-place, we need to insert a fake jump
- *    instruction at the end so that validate_branch() skips all the original
- *    replaced instructions when validating the new instruction path.
- *
- * 2. An added wrinkle is that the new instruction length might be zero.  In
- *    that case the old instructions are replaced with noops.  We simulate that
- *    by creating a fake jump as the only new instruction.
- *
- * 3. In some cases, the alternative section includes an instruction which
- *    conditionally jumps to the _end_ of the entry.  We have to modify these
- *    jumps' destinations to point back to .text rather than the end of the
- *    entry in .altinstr_replacement.
+ * The .alternatives section requires some extra special care over and above
+ * other special sections because alternatives are patched in place.
  */
 static int handle_group_alt(struct objtool_file *file,
                            struct special_alt *special_alt,
                            struct instruction *orig_insn,
                            struct instruction **new_insn)
 {
-       static unsigned int alt_group_next_index = 1;
-       struct instruction *last_orig_insn, *last_new_insn, *insn, *fake_jump = NULL;
-       unsigned int alt_group = alt_group_next_index++;
+       struct instruction *last_orig_insn, *last_new_insn = NULL, *insn, *nop = NULL;
+       struct alt_group *orig_alt_group, *new_alt_group;
        unsigned long dest_off;
 
+
+       orig_alt_group = malloc(sizeof(*orig_alt_group));
+       if (!orig_alt_group) {
+               WARN("malloc failed");
+               return -1;
+       }
+       orig_alt_group->cfi = calloc(special_alt->orig_len,
+                                    sizeof(struct cfi_state *));
+       if (!orig_alt_group->cfi) {
+               WARN("calloc failed");
+               return -1;
+       }
+
        last_orig_insn = NULL;
        insn = orig_insn;
        sec_for_each_insn_from(file, insn) {
                if (insn->offset >= special_alt->orig_off + special_alt->orig_len)
                        break;
 
-               insn->alt_group = alt_group;
+               insn->alt_group = orig_alt_group;
                last_orig_insn = insn;
        }
+       orig_alt_group->orig_group = NULL;
+       orig_alt_group->first_insn = orig_insn;
+       orig_alt_group->last_insn = last_orig_insn;
+
+
+       new_alt_group = malloc(sizeof(*new_alt_group));
+       if (!new_alt_group) {
+               WARN("malloc failed");
+               return -1;
+       }
 
-       if (next_insn_same_sec(file, last_orig_insn)) {
-               fake_jump = malloc(sizeof(*fake_jump));
-               if (!fake_jump) {
+       if (special_alt->new_len < special_alt->orig_len) {
+               /*
+                * Insert a fake nop at the end to make the replacement
+                * alt_group the same size as the original.  This is needed to
+                * allow propagate_alt_cfi() to do its magic.  When the last
+                * instruction affects the stack, the instruction after it (the
+                * nop) will propagate the new state to the shared CFI array.
+                */
+               nop = malloc(sizeof(*nop));
+               if (!nop) {
                        WARN("malloc failed");
                        return -1;
                }
-               memset(fake_jump, 0, sizeof(*fake_jump));
-               INIT_LIST_HEAD(&fake_jump->alts);
-               INIT_LIST_HEAD(&fake_jump->stack_ops);
-               init_cfi_state(&fake_jump->cfi);
+               memset(nop, 0, sizeof(*nop));
+               INIT_LIST_HEAD(&nop->alts);
+               INIT_LIST_HEAD(&nop->stack_ops);
+               init_cfi_state(&nop->cfi);
 
-               fake_jump->sec = special_alt->new_sec;
-               fake_jump->offset = FAKE_JUMP_OFFSET;
-               fake_jump->type = INSN_JUMP_UNCONDITIONAL;
-               fake_jump->jump_dest = list_next_entry(last_orig_insn, list);
-               fake_jump->func = orig_insn->func;
+               nop->sec = special_alt->new_sec;
+               nop->offset = special_alt->new_off + special_alt->new_len;
+               nop->len = special_alt->orig_len - special_alt->new_len;
+               nop->type = INSN_NOP;
+               nop->func = orig_insn->func;
+               nop->alt_group = new_alt_group;
+               nop->ignore = orig_insn->ignore_alts;
        }
 
        if (!special_alt->new_len) {
-               if (!fake_jump) {
-                       WARN("%s: empty alternative at end of section",
-                            special_alt->orig_sec->name);
-                       return -1;
-               }
-
-               *new_insn = fake_jump;
-               return 0;
+               *new_insn = nop;
+               goto end;
        }
 
-       last_new_insn = NULL;
-       alt_group = alt_group_next_index++;
        insn = *new_insn;
        sec_for_each_insn_from(file, insn) {
                struct reloc *alt_reloc;
@@ -1061,7 +1165,7 @@ static int handle_group_alt(struct objtool_file *file,
 
                insn->ignore = orig_insn->ignore_alts;
                insn->func = orig_insn->func;
-               insn->alt_group = alt_group;
+               insn->alt_group = new_alt_group;
 
                /*
                 * Since alternative replacement code is copy/pasted by the
@@ -1088,14 +1192,8 @@ static int handle_group_alt(struct objtool_file *file,
                        continue;
 
                dest_off = arch_jump_destination(insn);
-               if (dest_off == special_alt->new_off + special_alt->new_len) {
-                       if (!fake_jump) {
-                               WARN("%s: alternative jump to end of section",
-                                    special_alt->orig_sec->name);
-                               return -1;
-                       }
-                       insn->jump_dest = fake_jump;
-               }
+               if (dest_off == special_alt->new_off + special_alt->new_len)
+                       insn->jump_dest = next_insn_same_sec(file, last_orig_insn);
 
                if (!insn->jump_dest) {
                        WARN_FUNC("can't find alternative jump destination",
@@ -1110,9 +1208,13 @@ static int handle_group_alt(struct objtool_file *file,
                return -1;
        }
 
-       if (fake_jump)
-               list_add(&fake_jump->list, &last_new_insn->list);
-
+       if (nop)
+               list_add(&nop->list, &last_new_insn->list);
+end:
+       new_alt_group->orig_group = orig_alt_group;
+       new_alt_group->first_insn = *new_insn;
+       new_alt_group->last_insn = nop ? : last_new_insn;
+       new_alt_group->cfi = orig_alt_group->cfi;
        return 0;
 }
 
@@ -1404,13 +1506,20 @@ static int add_jump_table_alts(struct objtool_file *file)
        return 0;
 }
 
+static void set_func_state(struct cfi_state *state)
+{
+       state->cfa = initial_func_cfi.cfa;
+       memcpy(&state->regs, &initial_func_cfi.regs,
+              CFI_NUM_REGS * sizeof(struct cfi_reg));
+       state->stack_size = initial_func_cfi.cfa.offset;
+}
+
 static int read_unwind_hints(struct objtool_file *file)
 {
        struct section *sec, *relocsec;
        struct reloc *reloc;
        struct unwind_hint *hint;
        struct instruction *insn;
-       struct cfi_reg *cfa;
        int i;
 
        sec = find_section_by_name(file->elf, ".discard.unwind_hints");
@@ -1445,22 +1554,20 @@ static int read_unwind_hints(struct objtool_file *file)
                        return -1;
                }
 
-               cfa = &insn->cfi.cfa;
+               insn->hint = true;
 
-               if (hint->type == UNWIND_HINT_TYPE_RET_OFFSET) {
-                       insn->ret_offset = hint->sp_offset;
+               if (hint->type == UNWIND_HINT_TYPE_FUNC) {
+                       set_func_state(&insn->cfi);
                        continue;
                }
 
-               insn->hint = true;
-
                if (arch_decode_hint_reg(insn, hint->sp_reg)) {
                        WARN_FUNC("unsupported unwind_hint sp base reg %d",
                                  insn->sec, insn->offset, hint->sp_reg);
                        return -1;
                }
 
-               cfa->offset = hint->sp_offset;
+               insn->cfi.cfa.offset = bswap_if_needed(hint->sp_offset);
                insn->cfi.type = hint->type;
                insn->cfi.end = hint->end;
        }
@@ -1716,27 +1823,18 @@ static bool is_fentry_call(struct instruction *insn)
 
 static bool has_modified_stack_frame(struct instruction *insn, struct insn_state *state)
 {
-       u8 ret_offset = insn->ret_offset;
        struct cfi_state *cfi = &state->cfi;
        int i;
 
        if (cfi->cfa.base != initial_func_cfi.cfa.base || cfi->drap)
                return true;
 
-       if (cfi->cfa.offset != initial_func_cfi.cfa.offset + ret_offset)
+       if (cfi->cfa.offset != initial_func_cfi.cfa.offset)
                return true;
 
-       if (cfi->stack_size != initial_func_cfi.cfa.offset + ret_offset)
+       if (cfi->stack_size != initial_func_cfi.cfa.offset)
                return true;
 
-       /*
-        * If there is a ret offset hint then don't check registers
-        * because a callee-saved register might have been pushed on
-        * the stack.
-        */
-       if (ret_offset)
-               return false;
-
        for (i = 0; i < CFI_NUM_REGS; i++) {
                if (cfi->regs[i].base != initial_func_cfi.regs[i].base ||
                    cfi->regs[i].offset != initial_func_cfi.regs[i].offset)
@@ -1746,12 +1844,20 @@ static bool has_modified_stack_frame(struct instruction *insn, struct insn_state
        return false;
 }
 
+static bool check_reg_frame_pos(const struct cfi_reg *reg,
+                               int expected_offset)
+{
+       return reg->base == CFI_CFA &&
+              reg->offset == expected_offset;
+}
+
 static bool has_valid_stack_frame(struct insn_state *state)
 {
        struct cfi_state *cfi = &state->cfi;
 
-       if (cfi->cfa.base == CFI_BP && cfi->regs[CFI_BP].base == CFI_CFA &&
-           cfi->regs[CFI_BP].offset == -16)
+       if (cfi->cfa.base == CFI_BP &&
+           check_reg_frame_pos(&cfi->regs[CFI_BP], -cfi->cfa.offset) &&
+           check_reg_frame_pos(&cfi->regs[CFI_RA], -cfi->cfa.offset + 8))
                return true;
 
        if (cfi->drap && cfi->regs[CFI_BP].base == CFI_BP)
@@ -1880,8 +1986,7 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
                case OP_SRC_REG:
                        if (op->src.reg == CFI_SP && op->dest.reg == CFI_BP &&
                            cfa->base == CFI_SP &&
-                           regs[CFI_BP].base == CFI_CFA &&
-                           regs[CFI_BP].offset == -cfa->offset) {
+                           check_reg_frame_pos(&regs[CFI_BP], -cfa->offset)) {
 
                                /* mov %rsp, %rbp */
                                cfa->base = op->dest.reg;
@@ -1941,12 +2046,58 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
                                        cfa->offset = -cfi->vals[op->src.reg].offset;
                                        cfi->stack_size = cfa->offset;
 
+                               } else if (cfa->base == CFI_SP &&
+                                          cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
+                                          cfi->vals[op->src.reg].offset == cfa->offset) {
+
+                                       /*
+                                        * Stack swizzle:
+                                        *
+                                        * 1: mov %rsp, (%[tos])
+                                        * 2: mov %[tos], %rsp
+                                        *    ...
+                                        * 3: pop %rsp
+                                        *
+                                        * Where:
+                                        *
+                                        * 1 - places a pointer to the previous
+                                        *     stack at the Top-of-Stack of the
+                                        *     new stack.
+                                        *
+                                        * 2 - switches to the new stack.
+                                        *
+                                        * 3 - pops the Top-of-Stack to restore
+                                        *     the original stack.
+                                        *
+                                        * Note: we set base to SP_INDIRECT
+                                        * here and preserve offset. Therefore
+                                        * when the unwinder reaches ToS it
+                                        * will dereference SP and then add the
+                                        * offset to find the next frame, IOW:
+                                        * (%rsp) + offset.
+                                        */
+                                       cfa->base = CFI_SP_INDIRECT;
+
                                } else {
                                        cfa->base = CFI_UNDEFINED;
                                        cfa->offset = 0;
                                }
                        }
 
+                       else if (op->dest.reg == CFI_SP &&
+                                cfi->vals[op->src.reg].base == CFI_SP_INDIRECT &&
+                                cfi->vals[op->src.reg].offset == cfa->offset) {
+
+                               /*
+                                * The same stack swizzle case 2) as above. But
+                                * because we can't change cfa->base, case 3)
+                                * will become a regular POP. Pretend we're a
+                                * PUSH so things don't go unbalanced.
+                                */
+                               cfi->stack_size += 8;
+                       }
+
+
                        break;
 
                case OP_SRC_ADD:
@@ -1966,6 +2117,17 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
                                break;
                        }
 
+                       if (!cfi->drap && op->src.reg == CFI_SP &&
+                           op->dest.reg == CFI_BP && cfa->base == CFI_SP &&
+                           check_reg_frame_pos(&regs[CFI_BP], -cfa->offset + op->src.offset)) {
+
+                               /* lea disp(%rsp), %rbp */
+                               cfa->base = CFI_BP;
+                               cfa->offset -= op->src.offset;
+                               cfi->bp_scratch = false;
+                               break;
+                       }
+
                        if (op->src.reg == CFI_SP && cfa->base == CFI_SP) {
 
                                /* drap: lea disp(%rsp), %drap */
@@ -2032,6 +2194,13 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
 
                case OP_SRC_POP:
                case OP_SRC_POPF:
+                       if (op->dest.reg == CFI_SP && cfa->base == CFI_SP_INDIRECT) {
+
+                               /* pop %rsp; # restore from a stack swizzle */
+                               cfa->base = CFI_SP;
+                               break;
+                       }
+
                        if (!cfi->drap && op->dest.reg == cfa->base) {
 
                                /* pop %rbp */
@@ -2060,6 +2229,14 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
                        break;
 
                case OP_SRC_REG_INDIRECT:
+                       if (!cfi->drap && op->dest.reg == cfa->base &&
+                           op->dest.reg == CFI_BP) {
+
+                               /* mov disp(%rsp), %rbp */
+                               cfa->base = CFI_SP;
+                               cfa->offset = cfi->stack_size;
+                       }
+
                        if (cfi->drap && op->src.reg == CFI_BP &&
                            op->src.offset == cfi->drap_offset) {
 
@@ -2081,6 +2258,12 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
                                /* mov disp(%rbp), %reg */
                                /* mov disp(%rsp), %reg */
                                restore_reg(cfi, op->dest.reg);
+
+                       } else if (op->src.reg == CFI_SP &&
+                                  op->src.offset == regs[op->dest.reg].offset + cfi->stack_size) {
+
+                               /* mov disp(%rsp), %reg */
+                               restore_reg(cfi, op->dest.reg);
                        }
 
                        break;
@@ -2158,6 +2341,18 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
                        /* mov reg, disp(%rsp) */
                        save_reg(cfi, op->src.reg, CFI_CFA,
                                 op->dest.offset - cfi->cfa.offset);
+
+               } else if (op->dest.reg == CFI_SP) {
+
+                       /* mov reg, disp(%rsp) */
+                       save_reg(cfi, op->src.reg, CFI_CFA,
+                                op->dest.offset - cfi->stack_size);
+
+               } else if (op->src.reg == CFI_SP && op->dest.offset == 0) {
+
+                       /* mov %rsp, (%reg); # setup a stack swizzle. */
+                       cfi->vals[op->dest.reg].base = CFI_SP_INDIRECT;
+                       cfi->vals[op->dest.reg].offset = cfa->offset;
                }
 
                break;
@@ -2205,22 +2400,50 @@ static int update_cfi_state(struct instruction *insn, struct cfi_state *cfi,
        return 0;
 }
 
+/*
+ * The stack layouts of alternatives instructions can sometimes diverge when
+ * they have stack modifications.  That's fine as long as the potential stack
+ * layouts don't conflict at any given potential instruction boundary.
+ *
+ * Flatten the CFIs of the different alternative code streams (both original
+ * and replacement) into a single shared CFI array which can be used to detect
+ * conflicts and nicely feed a linear array of ORC entries to the unwinder.
+ */
+static int propagate_alt_cfi(struct objtool_file *file, struct instruction *insn)
+{
+       struct cfi_state **alt_cfi;
+       int group_off;
+
+       if (!insn->alt_group)
+               return 0;
+
+       alt_cfi = insn->alt_group->cfi;
+       group_off = insn->offset - insn->alt_group->first_insn->offset;
+
+       if (!alt_cfi[group_off]) {
+               alt_cfi[group_off] = &insn->cfi;
+       } else {
+               if (memcmp(alt_cfi[group_off], &insn->cfi, sizeof(struct cfi_state))) {
+                       WARN_FUNC("stack layout conflict in alternatives",
+                                 insn->sec, insn->offset);
+                       return -1;
+               }
+       }
+
+       return 0;
+}
+
 static int handle_insn_ops(struct instruction *insn, struct insn_state *state)
 {
        struct stack_op *op;
 
        list_for_each_entry(op, &insn->stack_ops, list) {
-               struct cfi_state old_cfi = state->cfi;
-               int res;
 
-               res = update_cfi_state(insn, &state->cfi, op);
-               if (res)
-                       return res;
+               if (update_cfi_state(insn, &state->cfi, op))
+                       return 1;
 
-               if (insn->alt_group && memcmp(&state->cfi, &old_cfi, sizeof(struct cfi_state))) {
-                       WARN_FUNC("alternative modifies stack", insn->sec, insn->offset);
-                       return -1;
-               }
+               if (!insn->alt_group)
+                       continue;
 
                if (op->dest.type == OP_DEST_PUSHF) {
                        if (!state->uaccess_stack) {
@@ -2410,28 +2633,20 @@ static int validate_return(struct symbol *func, struct instruction *insn, struct
        return 0;
 }
 
-/*
- * Alternatives should not contain any ORC entries, this in turn means they
- * should not contain any CFI ops, which implies all instructions should have
- * the same same CFI state.
- *
- * It is possible to constuct alternatives that have unreachable holes that go
- * unreported (because they're NOPs), such holes would result in CFI_UNDEFINED
- * states which then results in ORC entries, which we just said we didn't want.
- *
- * Avoid them by copying the CFI entry of the first instruction into the whole
- * alternative.
- */
-static void fill_alternative_cfi(struct objtool_file *file, struct instruction *insn)
+static struct instruction *next_insn_to_validate(struct objtool_file *file,
+                                                struct instruction *insn)
 {
-       struct instruction *first_insn = insn;
-       int alt_group = insn->alt_group;
+       struct alt_group *alt_group = insn->alt_group;
 
-       sec_for_each_insn_continue(file, insn) {
-               if (insn->alt_group != alt_group)
-                       break;
-               insn->cfi = first_insn->cfi;
-       }
+       /*
+        * Simulate the fact that alternatives are patched in-place.  When the
+        * end of a replacement alt_group is reached, redirect objtool flow to
+        * the end of the original alt_group.
+        */
+       if (alt_group && insn == alt_group->last_insn && alt_group->orig_group)
+               return next_insn_same_sec(file, alt_group->orig_group->last_insn);
+
+       return next_insn_same_sec(file, insn);
 }
 
 /*
@@ -2452,7 +2667,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
        sec = insn->sec;
 
        while (1) {
-               next_insn = next_insn_same_sec(file, insn);
+               next_insn = next_insn_to_validate(file, insn);
 
                if (file->c_file && func && insn->func && func != insn->func->pfunc) {
                        WARN("%s() falls through to next function %s()",
@@ -2485,6 +2700,9 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
                insn->visited |= visited;
 
+               if (propagate_alt_cfi(file, insn))
+                       return 1;
+
                if (!insn->ignore_alts && !list_empty(&insn->alts)) {
                        bool skip_orig = false;
 
@@ -2500,9 +2718,6 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                                }
                        }
 
-                       if (insn->alt_group)
-                               fill_alternative_cfi(file, insn);
-
                        if (skip_orig)
                                return 0;
                }
@@ -2540,7 +2755,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
                case INSN_JUMP_CONDITIONAL:
                case INSN_JUMP_UNCONDITIONAL:
-                       if (func && is_sibling_call(insn)) {
+                       if (is_sibling_call(insn)) {
                                ret = validate_sibling_call(insn, &state);
                                if (ret)
                                        return ret;
@@ -2562,7 +2777,7 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
 
                case INSN_JUMP_DYNAMIC:
                case INSN_JUMP_DYNAMIC_CONDITIONAL:
-                       if (func && is_sibling_call(insn)) {
+                       if (is_sibling_call(insn)) {
                                ret = validate_sibling_call(insn, &state);
                                if (ret)
                                        return ret;
@@ -2605,15 +2820,19 @@ static int validate_branch(struct objtool_file *file, struct symbol *func,
                        break;
 
                case INSN_STD:
-                       if (state.df)
+                       if (state.df) {
                                WARN_FUNC("recursive STD", sec, insn->offset);
+                               return 1;
+                       }
 
                        state.df = true;
                        break;
 
                case INSN_CLD:
-                       if (!state.df && func)
+                       if (!state.df && func) {
                                WARN_FUNC("redundant CLD", sec, insn->offset);
+                               return 1;
+                       }
 
                        state.df = false;
                        break;
@@ -2736,9 +2955,6 @@ static bool ignore_unreachable_insn(struct objtool_file *file, struct instructio
            !strcmp(insn->sec->name, ".altinstr_aux"))
                return true;
 
-       if (insn->type == INSN_JUMP_UNCONDITIONAL && insn->offset == FAKE_JUMP_OFFSET)
-               return true;
-
        if (!insn->func)
                return false;
 
@@ -2824,10 +3040,7 @@ static int validate_section(struct objtool_file *file, struct section *sec)
                        continue;
 
                init_insn_state(&state, sec);
-               state.cfi.cfa = initial_func_cfi.cfa;
-               memcpy(&state.cfi.regs, &initial_func_cfi.regs,
-                      CFI_NUM_REGS * sizeof(struct cfi_reg));
-               state.cfi.stack_size = initial_func_cfi.cfa.offset;
+               set_func_state(&state.cfi);
 
                warnings += validate_symbol(file, sec, func, &state);
        }
@@ -2940,6 +3153,13 @@ int check(struct objtool_file *file)
                goto out;
        warnings += ret;
 
+       if (mcount) {
+               ret = create_mcount_loc_sections(file);
+               if (ret < 0)
+                       goto out;
+               warnings += ret;
+       }
+
 out:
        /*
         *  For now, don't fail the kernel build on fatal warnings.  These
index e85988c..93fa833 100644 (file)
 #include <string.h>
 #include <unistd.h>
 #include <errno.h>
-#include "builtin.h"
+#include <objtool/builtin.h>
 
-#include "elf.h"
-#include "warn.h"
+#include <objtool/elf.h>
+#include <objtool/warn.h>
 
 #define MAX_NAME_LEN 128
 
@@ -814,25 +814,27 @@ static int elf_rebuild_rel_reloc_section(struct section *sec, int nr)
 {
        struct reloc *reloc;
        int idx = 0, size;
-       GElf_Rel *relocs;
+       void *buf;
 
        /* Allocate a buffer for relocations */
-       size = nr * sizeof(*relocs);
-       relocs = malloc(size);
-       if (!relocs) {
+       size = nr * sizeof(GElf_Rel);
+       buf = malloc(size);
+       if (!buf) {
                perror("malloc");
                return -1;
        }
 
-       sec->data->d_buf = relocs;
+       sec->data->d_buf = buf;
        sec->data->d_size = size;
+       sec->data->d_type = ELF_T_REL;
 
        sec->sh.sh_size = size;
 
        idx = 0;
        list_for_each_entry(reloc, &sec->reloc_list, list) {
-               relocs[idx].r_offset = reloc->offset;
-               relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               reloc->rel.r_offset = reloc->offset;
+               reloc->rel.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               gelf_update_rel(sec->data, idx, &reloc->rel);
                idx++;
        }
 
@@ -843,26 +845,28 @@ static int elf_rebuild_rela_reloc_section(struct section *sec, int nr)
 {
        struct reloc *reloc;
        int idx = 0, size;
-       GElf_Rela *relocs;
+       void *buf;
 
        /* Allocate a buffer for relocations with addends */
-       size = nr * sizeof(*relocs);
-       relocs = malloc(size);
-       if (!relocs) {
+       size = nr * sizeof(GElf_Rela);
+       buf = malloc(size);
+       if (!buf) {
                perror("malloc");
                return -1;
        }
 
-       sec->data->d_buf = relocs;
+       sec->data->d_buf = buf;
        sec->data->d_size = size;
+       sec->data->d_type = ELF_T_RELA;
 
        sec->sh.sh_size = size;
 
        idx = 0;
        list_for_each_entry(reloc, &sec->reloc_list, list) {
-               relocs[idx].r_offset = reloc->offset;
-               relocs[idx].r_addend = reloc->addend;
-               relocs[idx].r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               reloc->rela.r_offset = reloc->offset;
+               reloc->rela.r_addend = reloc->addend;
+               reloc->rela.r_info = GELF_R_INFO(reloc->sym->idx, reloc->type);
+               gelf_update_rela(sec->data, idx, &reloc->rela);
                idx++;
        }
 
similarity index 94%
rename from tools/objtool/arch.h
rename to tools/objtool/include/objtool/arch.h
index 4a84c30..6ff0685 100644 (file)
@@ -8,12 +8,8 @@
 
 #include <stdbool.h>
 #include <linux/list.h>
-#include "objtool.h"
-#include "cfi.h"
-
-#ifdef INSN_USE_ORC
-#include <asm/orc_types.h>
-#endif
+#include <objtool/objtool.h>
+#include <objtool/cfi.h>
 
 enum insn_type {
        INSN_JUMP_CONDITIONAL,
similarity index 88%
rename from tools/objtool/builtin.h
rename to tools/objtool/include/objtool/builtin.h
index 85c979c..2502bb2 100644 (file)
@@ -8,7 +8,7 @@
 #include <subcmd/parse-options.h>
 
 extern const struct option check_options[];
-extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux;
+extern bool no_fp, no_unreachable, retpoline, module, backtrace, uaccess, stats, validate_dup, vmlinux, mcount, noinstr;
 
 extern int cmd_check(int argc, const char **argv);
 extern int cmd_orc(int argc, const char **argv);
similarity index 96%
rename from tools/objtool/cfi.h
rename to tools/objtool/include/objtool/cfi.h
index c7c59c6..fd5cb0b 100644 (file)
@@ -6,7 +6,7 @@
 #ifndef _OBJTOOL_CFI_H
 #define _OBJTOOL_CFI_H
 
-#include "cfi_regs.h"
+#include <arch/cfi_regs.h>
 
 #define CFI_UNDEFINED          -1
 #define CFI_CFA                        -2
similarity index 63%
rename from tools/objtool/check.h
rename to tools/objtool/include/objtool/check.h
index 5ec00a4..f5be798 100644 (file)
@@ -7,8 +7,8 @@
 #define _CHECK_H
 
 #include <stdbool.h>
-#include "cfi.h"
-#include "arch.h"
+#include <objtool/cfi.h>
+#include <objtool/arch.h>
 
 struct insn_state {
        struct cfi_state cfi;
@@ -19,10 +19,28 @@ struct insn_state {
        s8 instr;
 };
 
+struct alt_group {
+       /*
+        * Pointer from a replacement group to the original group.  NULL if it
+        * *is* the original group.
+        */
+       struct alt_group *orig_group;
+
+       /* First and last instructions in the group */
+       struct instruction *first_insn, *last_insn;
+
+       /*
+        * Byte-offset-addressed len-sized array of pointers to CFI structs.
+        * This is shared with the other alt_groups in the same alternative.
+        */
+       struct cfi_state **cfi;
+};
+
 struct instruction {
        struct list_head list;
        struct hlist_node hash;
        struct list_head static_call_node;
+       struct list_head mcount_loc_node;
        struct section *sec;
        unsigned long offset;
        unsigned int len;
@@ -33,8 +51,7 @@ struct instruction {
        bool retpoline_safe;
        s8 instr;
        u8 visited;
-       u8 ret_offset;
-       int alt_group;
+       struct alt_group *alt_group;
        struct symbol *call_dest;
        struct instruction *jump_dest;
        struct instruction *first_jump_src;
@@ -43,9 +60,6 @@ struct instruction {
        struct symbol *func;
        struct list_head stack_ops;
        struct cfi_state cfi;
-#ifdef INSN_USE_ORC
-       struct orc_entry orc;
-#endif
 };
 
 static inline bool is_static_jump(struct instruction *insn)
@@ -54,6 +68,17 @@ static inline bool is_static_jump(struct instruction *insn)
               insn->type == INSN_JUMP_UNCONDITIONAL;
 }
 
+static inline bool is_dynamic_jump(struct instruction *insn)
+{
+       return insn->type == INSN_JUMP_DYNAMIC ||
+              insn->type == INSN_JUMP_DYNAMIC_CONDITIONAL;
+}
+
+static inline bool is_jump(struct instruction *insn)
+{
+       return is_static_jump(insn) || is_dynamic_jump(insn);
+}
+
 struct instruction *find_insn(struct objtool_file *file,
                              struct section *sec, unsigned long offset);
 
diff --git a/tools/objtool/include/objtool/endianness.h b/tools/objtool/include/objtool/endianness.h
new file mode 100644 (file)
index 0000000..1024134
--- /dev/null
@@ -0,0 +1,38 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#ifndef _OBJTOOL_ENDIANNESS_H
+#define _OBJTOOL_ENDIANNESS_H
+
+#include <arch/endianness.h>
+#include <linux/kernel.h>
+#include <endian.h>
+
+#ifndef __TARGET_BYTE_ORDER
+#error undefined arch __TARGET_BYTE_ORDER
+#endif
+
+#if __BYTE_ORDER != __TARGET_BYTE_ORDER
+#define __NEED_BSWAP 1
+#else
+#define __NEED_BSWAP 0
+#endif
+
+/*
+ * Does a byte swap if target endianness doesn't match the host, i.e. cross
+ * compilation for little endian on big endian and vice versa.
+ * To be used for multi-byte values conversion, which are read from / about
+ * to be written to a target native endianness ELF file.
+ */
+#define bswap_if_needed(val)                                           \
+({                                                                     \
+       __typeof__(val) __ret;                                          \
+       switch (sizeof(val)) {                                          \
+       case 8: __ret = __NEED_BSWAP ? bswap_64(val) : (val); break;    \
+       case 4: __ret = __NEED_BSWAP ? bswap_32(val) : (val); break;    \
+       case 2: __ret = __NEED_BSWAP ? bswap_16(val) : (val); break;    \
+       default:                                                        \
+               BUILD_BUG(); break;                                     \
+       }                                                               \
+       __ret;                                                          \
+})
+
+#endif /* _OBJTOOL_ENDIANNESS_H */
similarity index 84%
rename from tools/objtool/objtool.h
rename to tools/objtool/include/objtool/objtool.h
index 4125d45..e68e374 100644 (file)
@@ -10,7 +10,7 @@
 #include <linux/list.h>
 #include <linux/hashtable.h>
 
-#include "elf.h"
+#include <objtool/elf.h>
 
 #define __weak __attribute__((weak))
 
@@ -19,6 +19,7 @@ struct objtool_file {
        struct list_head insn_list;
        DECLARE_HASHTABLE(insn_hash, 20);
        struct list_head static_call_list;
+       struct list_head mcount_loc_list;
        bool ignore_unreachables, c_file, hints, rodata;
 };
 
@@ -26,7 +27,6 @@ struct objtool_file *objtool_open_read(const char *_objname);
 
 int check(struct objtool_file *file);
 int orc_dump(const char *objname);
-int create_orc(struct objtool_file *file);
-int create_orc_sections(struct objtool_file *file);
+int orc_create(struct objtool_file *file);
 
 #endif /* _OBJTOOL_H */
similarity index 94%
rename from tools/objtool/special.h
rename to tools/objtool/include/objtool/special.h
index abddf38..8a09f4e 100644 (file)
@@ -7,8 +7,8 @@
 #define _SPECIAL_H
 
 #include <stdbool.h>
-#include "check.h"
-#include "elf.h"
+#include <objtool/check.h>
+#include <objtool/elf.h>
 
 #define C_JUMP_TABLE_SECTION ".rodata..c_jump_table"
 
similarity index 98%
rename from tools/objtool/warn.h
rename to tools/objtool/include/objtool/warn.h
index 7799f60..d99c467 100644 (file)
@@ -11,7 +11,7 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <fcntl.h>
-#include "elf.h"
+#include <objtool/elf.h>
 
 extern const char *objname;
 
index 9df0cd8..7b97ce4 100644 (file)
@@ -21,9 +21,9 @@
 #include <subcmd/pager.h>
 #include <linux/kernel.h>
 
-#include "builtin.h"
-#include "objtool.h"
-#include "warn.h"
+#include <objtool/builtin.h>
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
 
 struct cmd_struct {
        const char *name;
@@ -62,6 +62,7 @@ struct objtool_file *objtool_open_read(const char *_objname)
        INIT_LIST_HEAD(&file.insn_list);
        hash_init(file.insn_hash);
        INIT_LIST_HEAD(&file.static_call_list);
+       INIT_LIST_HEAD(&file.mcount_loc_list);
        file.c_file = !vmlinux && find_section_by_name(file.elf, ".comment");
        file.ignore_unreachables = no_unreachable;
        file.hints = false;
index 5e6a953..f5a8508 100644 (file)
@@ -6,8 +6,9 @@
 #include <unistd.h>
 #include <linux/objtool.h>
 #include <asm/orc_types.h>
-#include "objtool.h"
-#include "warn.h"
+#include <objtool/objtool.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
 
 static const char *reg_name(unsigned int reg)
 {
@@ -54,7 +55,7 @@ static void print_reg(unsigned int reg, int offset)
        if (reg == ORC_REG_BP_INDIRECT)
                printf("(bp%+d)", offset);
        else if (reg == ORC_REG_SP_INDIRECT)
-               printf("(sp%+d)", offset);
+               printf("(sp)%+d", offset);
        else if (reg == ORC_REG_UNDEFINED)
                printf("(und)");
        else
@@ -197,11 +198,11 @@ int orc_dump(const char *_objname)
 
                printf(" sp:");
 
-               print_reg(orc[i].sp_reg, orc[i].sp_offset);
+               print_reg(orc[i].sp_reg, bswap_if_needed(orc[i].sp_offset));
 
                printf(" bp:");
 
-               print_reg(orc[i].bp_reg, orc[i].bp_offset);
+               print_reg(orc[i].bp_reg, bswap_if_needed(orc[i].bp_offset));
 
                printf(" type:%s end:%d\n",
                       orc_type_name(orc[i].type), orc[i].end);
index 9ce68b3..738aa50 100644 (file)
@@ -9,93 +9,91 @@
 #include <linux/objtool.h>
 #include <asm/orc_types.h>
 
-#include "check.h"
-#include "warn.h"
+#include <objtool/check.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
 
-int create_orc(struct objtool_file *file)
+static int init_orc_entry(struct orc_entry *orc, struct cfi_state *cfi)
 {
-       struct instruction *insn;
+       struct instruction *insn = container_of(cfi, struct instruction, cfi);
+       struct cfi_reg *bp = &cfi->regs[CFI_BP];
 
-       for_each_insn(file, insn) {
-               struct orc_entry *orc = &insn->orc;
-               struct cfi_reg *cfa = &insn->cfi.cfa;
-               struct cfi_reg *bp = &insn->cfi.regs[CFI_BP];
+       memset(orc, 0, sizeof(*orc));
 
-               if (!insn->sec->text)
-                       continue;
-
-               orc->end = insn->cfi.end;
-
-               if (cfa->base == CFI_UNDEFINED) {
-                       orc->sp_reg = ORC_REG_UNDEFINED;
-                       continue;
-               }
+       orc->end = cfi->end;
 
-               switch (cfa->base) {
-               case CFI_SP:
-                       orc->sp_reg = ORC_REG_SP;
-                       break;
-               case CFI_SP_INDIRECT:
-                       orc->sp_reg = ORC_REG_SP_INDIRECT;
-                       break;
-               case CFI_BP:
-                       orc->sp_reg = ORC_REG_BP;
-                       break;
-               case CFI_BP_INDIRECT:
-                       orc->sp_reg = ORC_REG_BP_INDIRECT;
-                       break;
-               case CFI_R10:
-                       orc->sp_reg = ORC_REG_R10;
-                       break;
-               case CFI_R13:
-                       orc->sp_reg = ORC_REG_R13;
-                       break;
-               case CFI_DI:
-                       orc->sp_reg = ORC_REG_DI;
-                       break;
-               case CFI_DX:
-                       orc->sp_reg = ORC_REG_DX;
-                       break;
-               default:
-                       WARN_FUNC("unknown CFA base reg %d",
-                                 insn->sec, insn->offset, cfa->base);
-                       return -1;
-               }
+       if (cfi->cfa.base == CFI_UNDEFINED) {
+               orc->sp_reg = ORC_REG_UNDEFINED;
+               return 0;
+       }
 
-               switch(bp->base) {
-               case CFI_UNDEFINED:
-                       orc->bp_reg = ORC_REG_UNDEFINED;
-                       break;
-               case CFI_CFA:
-                       orc->bp_reg = ORC_REG_PREV_SP;
-                       break;
-               case CFI_BP:
-                       orc->bp_reg = ORC_REG_BP;
-                       break;
-               default:
-                       WARN_FUNC("unknown BP base reg %d",
-                                 insn->sec, insn->offset, bp->base);
-                       return -1;
-               }
+       switch (cfi->cfa.base) {
+       case CFI_SP:
+               orc->sp_reg = ORC_REG_SP;
+               break;
+       case CFI_SP_INDIRECT:
+               orc->sp_reg = ORC_REG_SP_INDIRECT;
+               break;
+       case CFI_BP:
+               orc->sp_reg = ORC_REG_BP;
+               break;
+       case CFI_BP_INDIRECT:
+               orc->sp_reg = ORC_REG_BP_INDIRECT;
+               break;
+       case CFI_R10:
+               orc->sp_reg = ORC_REG_R10;
+               break;
+       case CFI_R13:
+               orc->sp_reg = ORC_REG_R13;
+               break;
+       case CFI_DI:
+               orc->sp_reg = ORC_REG_DI;
+               break;
+       case CFI_DX:
+               orc->sp_reg = ORC_REG_DX;
+               break;
+       default:
+               WARN_FUNC("unknown CFA base reg %d",
+                         insn->sec, insn->offset, cfi->cfa.base);
+               return -1;
+       }
 
-               orc->sp_offset = cfa->offset;
-               orc->bp_offset = bp->offset;
-               orc->type = insn->cfi.type;
+       switch (bp->base) {
+       case CFI_UNDEFINED:
+               orc->bp_reg = ORC_REG_UNDEFINED;
+               break;
+       case CFI_CFA:
+               orc->bp_reg = ORC_REG_PREV_SP;
+               break;
+       case CFI_BP:
+               orc->bp_reg = ORC_REG_BP;
+               break;
+       default:
+               WARN_FUNC("unknown BP base reg %d",
+                         insn->sec, insn->offset, bp->base);
+               return -1;
        }
 
+       orc->sp_offset = cfi->cfa.offset;
+       orc->bp_offset = bp->offset;
+       orc->type = cfi->type;
+
        return 0;
 }
 
-static int create_orc_entry(struct elf *elf, struct section *u_sec, struct section *ip_relocsec,
-                               unsigned int idx, struct section *insn_sec,
-                               unsigned long insn_off, struct orc_entry *o)
+static int write_orc_entry(struct elf *elf, struct section *orc_sec,
+                          struct section *ip_rsec, unsigned int idx,
+                          struct section *insn_sec, unsigned long insn_off,
+                          struct orc_entry *o)
 {
        struct orc_entry *orc;
        struct reloc *reloc;
 
        /* populate ORC data */
-       orc = (struct orc_entry *)u_sec->data->d_buf + idx;
+       orc = (struct orc_entry *)orc_sec->data->d_buf + idx;
        memcpy(orc, o, sizeof(*orc));
+       orc->sp_offset = bswap_if_needed(orc->sp_offset);
+       orc->bp_offset = bswap_if_needed(orc->bp_offset);
 
        /* populate reloc for ip */
        reloc = malloc(sizeof(*reloc));
@@ -114,102 +112,149 @@ static int create_orc_entry(struct elf *elf, struct section *u_sec, struct secti
 
        reloc->type = R_X86_64_PC32;
        reloc->offset = idx * sizeof(int);
-       reloc->sec = ip_relocsec;
+       reloc->sec = ip_rsec;
 
        elf_add_reloc(elf, reloc);
 
        return 0;
 }
 
-int create_orc_sections(struct objtool_file *file)
-{
-       struct instruction *insn, *prev_insn;
-       struct section *sec, *u_sec, *ip_relocsec;
-       unsigned int idx;
+struct orc_list_entry {
+       struct list_head list;
+       struct orc_entry orc;
+       struct section *insn_sec;
+       unsigned long insn_off;
+};
 
-       struct orc_entry empty = {
-               .sp_reg = ORC_REG_UNDEFINED,
-               .bp_reg  = ORC_REG_UNDEFINED,
-               .type    = UNWIND_HINT_TYPE_CALL,
-       };
+static int orc_list_add(struct list_head *orc_list, struct orc_entry *orc,
+                       struct section *sec, unsigned long offset)
+{
+       struct orc_list_entry *entry = malloc(sizeof(*entry));
 
-       sec = find_section_by_name(file->elf, ".orc_unwind");
-       if (sec) {
-               WARN("file already has .orc_unwind section, skipping");
+       if (!entry) {
+               WARN("malloc failed");
                return -1;
        }
 
-       /* count the number of needed orcs */
-       idx = 0;
-       for_each_sec(file, sec) {
-               if (!sec->text)
-                       continue;
-
-               prev_insn = NULL;
-               sec_for_each_insn(file, sec, insn) {
-                       if (!prev_insn ||
-                           memcmp(&insn->orc, &prev_insn->orc,
-                                  sizeof(struct orc_entry))) {
-                               idx++;
-                       }
-                       prev_insn = insn;
-               }
-
-               /* section terminator */
-               if (prev_insn)
-                       idx++;
-       }
-       if (!idx)
-               return -1;
+       entry->orc      = *orc;
+       entry->insn_sec = sec;
+       entry->insn_off = offset;
 
+       list_add_tail(&entry->list, orc_list);
+       return 0;
+}
 
-       /* create .orc_unwind_ip and .rela.orc_unwind_ip sections */
-       sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), idx);
-       if (!sec)
-               return -1;
+static unsigned long alt_group_len(struct alt_group *alt_group)
+{
+       return alt_group->last_insn->offset +
+              alt_group->last_insn->len -
+              alt_group->first_insn->offset;
+}
 
-       ip_relocsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
-       if (!ip_relocsec)
-               return -1;
+int orc_create(struct objtool_file *file)
+{
+       struct section *sec, *ip_rsec, *orc_sec;
+       unsigned int nr = 0, idx = 0;
+       struct orc_list_entry *entry;
+       struct list_head orc_list;
 
-       /* create .orc_unwind section */
-       u_sec = elf_create_section(file->elf, ".orc_unwind", 0,
-                                  sizeof(struct orc_entry), idx);
+       struct orc_entry null = {
+               .sp_reg  = ORC_REG_UNDEFINED,
+               .bp_reg  = ORC_REG_UNDEFINED,
+               .type    = UNWIND_HINT_TYPE_CALL,
+       };
 
-       /* populate sections */
-       idx = 0;
+       /* Build a deduplicated list of ORC entries: */
+       INIT_LIST_HEAD(&orc_list);
        for_each_sec(file, sec) {
+               struct orc_entry orc, prev_orc = {0};
+               struct instruction *insn;
+               bool empty = true;
+
                if (!sec->text)
                        continue;
 
-               prev_insn = NULL;
                sec_for_each_insn(file, sec, insn) {
-                       if (!prev_insn || memcmp(&insn->orc, &prev_insn->orc,
-                                                sizeof(struct orc_entry))) {
+                       struct alt_group *alt_group = insn->alt_group;
+                       int i;
 
-                               if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
-                                                    insn->sec, insn->offset,
-                                                    &insn->orc))
+                       if (!alt_group) {
+                               if (init_orc_entry(&orc, &insn->cfi))
                                        return -1;
+                               if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+                                       continue;
+                               if (orc_list_add(&orc_list, &orc, sec,
+                                                insn->offset))
+                                       return -1;
+                               nr++;
+                               prev_orc = orc;
+                               empty = false;
+                               continue;
+                       }
 
-                               idx++;
+                       /*
+                        * Alternatives can have different stack layout
+                        * possibilities (but they shouldn't conflict).
+                        * Instead of traversing the instructions, use the
+                        * alt_group's flattened byte-offset-addressed CFI
+                        * array.
+                        */
+                       for (i = 0; i < alt_group_len(alt_group); i++) {
+                               struct cfi_state *cfi = alt_group->cfi[i];
+                               if (!cfi)
+                                       continue;
+                               if (init_orc_entry(&orc, cfi))
+                                       return -1;
+                               if (!memcmp(&prev_orc, &orc, sizeof(orc)))
+                                       continue;
+                               if (orc_list_add(&orc_list, &orc, insn->sec,
+                                                insn->offset + i))
+                                       return -1;
+                               nr++;
+                               prev_orc = orc;
+                               empty = false;
                        }
-                       prev_insn = insn;
-               }
 
-               /* section terminator */
-               if (prev_insn) {
-                       if (create_orc_entry(file->elf, u_sec, ip_relocsec, idx,
-                                            prev_insn->sec,
-                                            prev_insn->offset + prev_insn->len,
-                                            &empty))
-                               return -1;
+                       /* Skip to the end of the alt_group */
+                       insn = alt_group->last_insn;
+               }
 
-                       idx++;
+               /* Add a section terminator */
+               if (!empty) {
+                       orc_list_add(&orc_list, &null, sec, sec->len);
+                       nr++;
                }
        }
+       if (!nr)
+               return 0;
+
+       /* Create .orc_unwind, .orc_unwind_ip and .rela.orc_unwind_ip sections: */
+       sec = find_section_by_name(file->elf, ".orc_unwind");
+       if (sec) {
+               WARN("file already has .orc_unwind section, skipping");
+               return -1;
+       }
+       orc_sec = elf_create_section(file->elf, ".orc_unwind", 0,
+                                    sizeof(struct orc_entry), nr);
+       if (!orc_sec)
+               return -1;
+
+       sec = elf_create_section(file->elf, ".orc_unwind_ip", 0, sizeof(int), nr);
+       if (!sec)
+               return -1;
+       ip_rsec = elf_create_reloc_section(file->elf, sec, SHT_RELA);
+       if (!ip_rsec)
+               return -1;
+
+       /* Write ORC entries to sections: */
+       list_for_each_entry(entry, &orc_list, list) {
+               if (write_orc_entry(file->elf, orc_sec, ip_rsec, idx++,
+                                   entry->insn_sec, entry->insn_off,
+                                   &entry->orc))
+                       return -1;
+       }
 
-       if (elf_rebuild_reloc_section(file->elf, ip_relocsec))
+       if (elf_rebuild_reloc_section(file->elf, ip_rsec))
                return -1;
 
        return 0;
index 1a2420f..2c7fbda 100644 (file)
 #include <stdlib.h>
 #include <string.h>
 
-#include "builtin.h"
-#include "special.h"
-#include "warn.h"
-#include "arch_special.h"
+#include <arch/special.h>
+#include <objtool/builtin.h>
+#include <objtool/special.h>
+#include <objtool/warn.h>
+#include <objtool/endianness.h>
 
 struct special_entry {
        const char *sec;
@@ -77,8 +78,9 @@ static int get_alt_entry(struct elf *elf, struct special_entry *entry,
        if (entry->feature) {
                unsigned short feature;
 
-               feature = *(unsigned short *)(sec->data->d_buf + offset +
-                                             entry->feature);
+               feature = bswap_if_needed(*(unsigned short *)(sec->data->d_buf +
+                                                             offset +
+                                                             entry->feature));
                arch_handle_alternative(feature, alt);
        }
 
index 7843e9a..8314e82 100644 (file)
@@ -7,7 +7,7 @@
 
 #include <stdbool.h>
 #include <errno.h>
-#include "objtool.h"
+#include <objtool/objtool.h>
 
 #define UNSUPPORTED(name)                                              \
 ({                                                                     \
@@ -25,12 +25,7 @@ int __weak orc_dump(const char *_objname)
        UNSUPPORTED("orc");
 }
 
-int __weak create_orc(struct objtool_file *file)
-{
-       UNSUPPORTED("orc");
-}
-
-int __weak create_orc_sections(struct objtool_file *file)
+int __weak orc_create(struct objtool_file *file)
 {
        UNSUPPORTED("orc");
 }
index c0a6640..9af8b8d 100644 (file)
@@ -29,7 +29,7 @@ OPTIONS
        Show just the sample frequency used for each event.
 
 -v::
---verbose=::
+--verbose::
        Show all fields.
 
 -g::
index 1e91121..6e82b7c 100644 (file)
@@ -28,8 +28,8 @@ OPTIONS
        specified: function_graph or function.
 
 -v::
---verbose=::
-        Verbosity level.
+--verbose::
+        Increase the verbosity level.
 
 -F::
 --funcs::
index f3c6209..c97527d 100644 (file)
@@ -20,5 +20,5 @@ modules).
 OPTIONS
 -------
 -v::
---verbose=::
+--verbose::
        Increase verbosity level, showing details about symbol table loading, etc.
index abc9b5d..f0da8cf 100644 (file)
@@ -97,8 +97,8 @@ filter out the startup phase of the program, which is often very different.
        Filter out events for these pids and for 'trace' itself (comma separated list).
 
 -v::
---verbose=::
-        Verbosity level.
+--verbose::
+        Increase the verbosity level.
 
 --no-inherit::
        Child tasks do not inherit counters.
index 5345ac7..f6e6096 100644 (file)
@@ -607,7 +607,7 @@ arch_errno_hdr_dir := $(srctree)/tools
 arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh
 
 $(arch_errno_name_array): $(arch_errno_tbl)
-       $(Q)$(SHELL) '$(arch_errno_tbl)' $(firstword $(CC)) $(arch_errno_hdr_dir) > $@
+       $(Q)$(SHELL) '$(arch_errno_tbl)' '$(patsubst -%,,$(CC))' $(arch_errno_hdr_dir) > $@
 
 sync_file_range_arrays := $(beauty_outdir)/sync_file_range_arrays.c
 sync_file_range_tbls := $(srctree)/tools/perf/trace/beauty/sync_file_range.sh
@@ -1001,14 +1001,6 @@ $(INSTALL_DOC_TARGETS):
 
 ### Cleaning rules
 
-#
-# This is here, not in Makefile.config, because Makefile.config does
-# not get included for the clean target:
-#
-config-clean:
-       $(call QUIET_CLEAN, config)
-       $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ $(if $(OUTPUT),OUTPUT=$(OUTPUT)feature/,) clean >/dev/null
-
 python-clean:
        $(python-clean)
 
@@ -1048,7 +1040,7 @@ endif # BUILD_BPF_SKEL
 bpf-skel-clean:
        $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS)
 
-clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean config-clean fixdep-clean python-clean bpf-skel-clean
+clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean
        $(call QUIET_CLEAN, core-objs)  $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS)
        $(Q)find $(if $(OUTPUT),$(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete
        $(Q)$(RM) $(OUTPUT).config-detected
index bd446ab..c25c878 100644 (file)
@@ -156,6 +156,10 @@ out:
        return err;
 }
 
+#define ETM_SET_OPT_CTXTID     (1 << 0)
+#define ETM_SET_OPT_TS         (1 << 1)
+#define ETM_SET_OPT_MASK       (ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS)
+
 static int cs_etm_set_option(struct auxtrace_record *itr,
                             struct evsel *evsel, u32 option)
 {
@@ -169,17 +173,17 @@ static int cs_etm_set_option(struct auxtrace_record *itr,
                    !cpu_map__has(online_cpus, i))
                        continue;
 
-               if (option & ETM_OPT_CTXTID) {
+               if (option & ETM_SET_OPT_CTXTID) {
                        err = cs_etm_set_context_id(itr, evsel, i);
                        if (err)
                                goto out;
                }
-               if (option & ETM_OPT_TS) {
+               if (option & ETM_SET_OPT_TS) {
                        err = cs_etm_set_timestamp(itr, evsel, i);
                        if (err)
                                goto out;
                }
-               if (option & ~(ETM_OPT_CTXTID | ETM_OPT_TS))
+               if (option & ~(ETM_SET_OPT_MASK))
                        /* Nothing else is currently supported */
                        goto out;
        }
@@ -406,7 +410,7 @@ static int cs_etm_recording_options(struct auxtrace_record *itr,
                evsel__set_sample_bit(cs_etm_evsel, CPU);
 
                err = cs_etm_set_option(itr, cs_etm_evsel,
-                                       ETM_OPT_CTXTID | ETM_OPT_TS);
+                                       ETM_SET_OPT_CTXTID | ETM_SET_OPT_TS);
                if (err)
                        goto out;
        }
index f744eb5..0b2480c 100644 (file)
@@ -9,9 +9,7 @@
 #
 0      nospu   restart_syscall                 sys_restart_syscall
 1      nospu   exit                            sys_exit
-2      32      fork                            ppc_fork                        sys_fork
-2      64      fork                            sys_fork
-2      spu     fork                            sys_ni_syscall
+2      nospu   fork                            sys_fork
 3      common  read                            sys_read
 4      common  write                           sys_write
 5      common  open                            sys_open                        compat_sys_open
 119    32      sigreturn                       sys_sigreturn                   compat_sys_sigreturn
 119    64      sigreturn                       sys_ni_syscall
 119    spu     sigreturn                       sys_ni_syscall
-120    32      clone                           ppc_clone                       sys_clone
-120    64      clone                           sys_clone
-120    spu     clone                           sys_ni_syscall
+120    nospu   clone                           sys_clone
 121    common  setdomainname                   sys_setdomainname
 122    common  uname                           sys_newuname
 123    common  modify_ldt                      sys_ni_syscall
 186    spu     sendfile                        sys_sendfile64
 187    common  getpmsg                         sys_ni_syscall
 188    common  putpmsg                         sys_ni_syscall
-189    32      vfork                           ppc_vfork                       sys_vfork
-189    64      vfork                           sys_vfork
-189    spu     vfork                           sys_ni_syscall
+189    nospu   vfork                           sys_vfork
 190    common  ugetrlimit                      sys_getrlimit                   compat_sys_getrlimit
 191    common  readahead                       sys_readahead                   compat_sys_readahead
 192    32      mmap2                           sys_mmap2                       compat_sys_mmap2
 248    32      clock_nanosleep                 sys_clock_nanosleep_time32
 248    64      clock_nanosleep                 sys_clock_nanosleep
 248    spu     clock_nanosleep                 sys_clock_nanosleep
-249    32      swapcontext                     ppc_swapcontext                 compat_sys_swapcontext
-249    64      swapcontext                     sys_swapcontext
-249    spu     swapcontext                     sys_ni_syscall
+249    nospu   swapcontext                     sys_swapcontext                 compat_sys_swapcontext
 250    common  tgkill                          sys_tgkill
 251    32      utimes                          sys_utimes_time32
 251    64      utimes                          sys_utimes
 432    common  fsmount                         sys_fsmount
 433    common  fspick                          sys_fspick
 434    common  pidfd_open                      sys_pidfd_open
-435    32      clone3                          ppc_clone3                      sys_clone3
-435    64      clone3                          sys_clone3
-435    spu     clone3                          sys_ni_syscall
+435    nospu   clone3                          sys_clone3
 436    common  close_range                     sys_close_range
 437    common  openat2                         sys_openat2
 438    common  pidfd_getfd                     sys_pidfd_getfd
 439    common  faccessat2                      sys_faccessat2
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
+442    common  mount_setattr                   sys_mount_setattr
index d443423..3abef21 100644 (file)
 439  common    faccessat2              sys_faccessat2                  sys_faccessat2
 440  common    process_madvise         sys_process_madvise             sys_process_madvise
 441  common    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
+442  common    mount_setattr           sys_mount_setattr               sys_mount_setattr
index 8cc6642..5a9f9a7 100644 (file)
@@ -10,10 +10,11 @@ PERF_HAVE_JITDUMP := 1
 # Syscall table generation
 #
 
-out    := $(OUTPUT)arch/x86/include/generated/asm
-header := $(out)/syscalls_64.c
-sys    := $(srctree)/tools/perf/arch/x86/entry/syscalls
-systbl := $(sys)/syscalltbl.sh
+generated := $(OUTPUT)arch/x86/include/generated
+out       := $(generated)/asm
+header    := $(out)/syscalls_64.c
+sys       := $(srctree)/tools/perf/arch/x86/entry/syscalls
+systbl    := $(sys)/syscalltbl.sh
 
 # Create output directory if not already present
 _dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)')
@@ -22,6 +23,6 @@ $(header): $(sys)/syscall_64.tbl $(systbl)
        $(Q)$(SHELL) '$(systbl)' $(sys)/syscall_64.tbl 'x86_64' > $@
 
 clean::
-       $(call QUIET_CLEAN, x86) $(RM) $(header)
+       $(call QUIET_CLEAN, x86) $(RM) -r $(header) $(generated)
 
 archheaders: $(header)
index 7867212..7bf01cb 100644 (file)
 439    common  faccessat2              sys_faccessat2
 440    common  process_madvise         sys_process_madvise
 441    common  epoll_pwait2            sys_epoll_pwait2
+442    common  mount_setattr           sys_mount_setattr
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index 6a54b94..0e20f3d 100644 (file)
@@ -10,6 +10,7 @@ int test__rdpmc(struct test *test __maybe_unused, int subtest);
 int test__insn_x86(struct test *test __maybe_unused, int subtest);
 int test__intel_pt_pkt_decoder(struct test *test, int subtest);
 int test__bp_modify(struct test *test, int subtest);
+int test__x86_sample_parsing(struct test *test, int subtest);
 
 #ifdef HAVE_DWARF_UNWIND_SUPPORT
 struct thread;
index 36d4f24..28d7933 100644 (file)
@@ -3,5 +3,6 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o
 
 perf-y += arch-tests.o
 perf-y += rdpmc.o
+perf-y += sample-parsing.o
 perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o
 perf-$(CONFIG_X86_64) += bp-modify.o
index bc25d72..71aa673 100644 (file)
@@ -31,6 +31,10 @@ struct test arch_tests[] = {
        },
 #endif
        {
+               .desc = "x86 Sample parsing",
+               .func = test__x86_sample_parsing,
+       },
+       {
                .func = NULL,
        },
 
index f782ef8..4f75ae9 100644 (file)
@@ -1,11 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/types.h>
-#include "../../../../arch/x86/include/asm/insn.h"
 #include <string.h>
 
 #include "debug.h"
 #include "tests/tests.h"
 #include "arch-tests.h"
+#include "../../../../arch/x86/include/asm/insn.h"
 
 #include "intel-pt-decoder/intel-pt-insn-decoder.h"
 
diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c
new file mode 100644 (file)
index 0000000..c92db87
--- /dev/null
@@ -0,0 +1,121 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <stdbool.h>
+#include <inttypes.h>
+#include <stdlib.h>
+#include <string.h>
+#include <linux/bitops.h>
+#include <linux/kernel.h>
+#include <linux/types.h>
+
+#include "event.h"
+#include "evsel.h"
+#include "debug.h"
+#include "util/synthetic-events.h"
+
+#include "tests/tests.h"
+#include "arch-tests.h"
+
+#define COMP(m) do {                                   \
+       if (s1->m != s2->m) {                           \
+               pr_debug("Samples differ at '"#m"'\n"); \
+               return false;                           \
+       }                                               \
+} while (0)
+
+static bool samples_same(const struct perf_sample *s1,
+                        const struct perf_sample *s2,
+                        u64 type)
+{
+       if (type & PERF_SAMPLE_WEIGHT_STRUCT)
+               COMP(ins_lat);
+
+       return true;
+}
+
+static int do_test(u64 sample_type)
+{
+       struct evsel evsel = {
+               .needs_swap = false,
+               .core = {
+                       . attr = {
+                               .sample_type = sample_type,
+                               .read_format = 0,
+                       },
+               },
+       };
+       union perf_event *event;
+       struct perf_sample sample = {
+               .weight         = 101,
+               .ins_lat        = 102,
+       };
+       struct perf_sample sample_out;
+       size_t i, sz, bufsz;
+       int err, ret = -1;
+
+       sz = perf_event__sample_event_size(&sample, sample_type, 0);
+       bufsz = sz + 4096; /* Add a bit for overrun checking */
+       event = malloc(bufsz);
+       if (!event) {
+               pr_debug("malloc failed\n");
+               return -1;
+       }
+
+       memset(event, 0xff, bufsz);
+       event->header.type = PERF_RECORD_SAMPLE;
+       event->header.misc = 0;
+       event->header.size = sz;
+
+       err = perf_event__synthesize_sample(event, sample_type, 0, &sample);
+       if (err) {
+               pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+                        "perf_event__synthesize_sample", sample_type, err);
+               goto out_free;
+       }
+
+       /* The data does not contain 0xff so we use that to check the size */
+       for (i = bufsz; i > 0; i--) {
+               if (*(i - 1 + (u8 *)event) != 0xff)
+                       break;
+       }
+       if (i != sz) {
+               pr_debug("Event size mismatch: actual %zu vs expected %zu\n",
+                        i, sz);
+               goto out_free;
+       }
+
+       evsel.sample_size = __evsel__sample_size(sample_type);
+
+       err = evsel__parse_sample(&evsel, event, &sample_out);
+       if (err) {
+               pr_debug("%s failed for sample_type %#"PRIx64", error %d\n",
+                        "evsel__parse_sample", sample_type, err);
+               goto out_free;
+       }
+
+       if (!samples_same(&sample, &sample_out, sample_type)) {
+               pr_debug("parsing failed for sample_type %#"PRIx64"\n",
+                        sample_type);
+               goto out_free;
+       }
+
+       ret = 0;
+out_free:
+       free(event);
+
+       return ret;
+}
+
+/**
+ * test__x86_sample_parsing - test X86 specific sample parsing
+ *
+ * This function implements a test that synthesizes a sample event, parses it
+ * and then checks that the parsed sample matches the original sample. If the
+ * test passes %0 is returned, otherwise %-1 is returned.
+ *
+ * For now, the PERF_SAMPLE_WEIGHT_STRUCT is the only X86 specific sample type.
+ * The test only checks the PERF_SAMPLE_WEIGHT_STRUCT type.
+ */
+int test__x86_sample_parsing(struct test *test __maybe_unused, int subtest __maybe_unused)
+{
+       return do_test(PERF_SAMPLE_WEIGHT_STRUCT);
+}
index 3e67915..34d600c 100644 (file)
@@ -1,10 +1,10 @@
 // SPDX-License-Identifier: GPL-2.0
-#include "../../../../arch/x86/include/asm/insn.h"
 #include "archinsn.h"
 #include "event.h"
 #include "machine.h"
 #include "thread.h"
 #include "symbol.h"
+#include "../../../../arch/x86/include/asm/insn.h"
 
 void arch_fetch_insn(struct perf_sample *sample,
                     struct thread *thread,
index 11726ec..20b87e2 100644 (file)
@@ -344,18 +344,22 @@ static void mempol_restore(void)
 
 static void bind_to_memnode(int node)
 {
-       unsigned long nodemask;
+       struct bitmask *node_mask;
        int ret;
 
        if (node == NUMA_NO_NODE)
                return;
 
-       BUG_ON(g->p.nr_nodes > (int)sizeof(nodemask)*8);
-       nodemask = 1L << node;
+       node_mask = numa_allocate_nodemask();
+       BUG_ON(!node_mask);
 
-       ret = set_mempolicy(MPOL_BIND, &nodemask, sizeof(nodemask)*8);
-       dprintf("binding to node %d, mask: %016lx => %d\n", node, nodemask, ret);
+       numa_bitmask_clearall(node_mask);
+       numa_bitmask_setbit(node_mask, node);
 
+       ret = set_mempolicy(MPOL_BIND, node_mask->maskp, node_mask->size + 1);
+       dprintf("binding to node %d, mask: %016lx => %d\n", node, *node_mask->maskp, ret);
+
+       numa_bitmask_free(node_mask);
        BUG_ON(ret);
 }
 
@@ -876,8 +880,6 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
        prctl(0, bytes_worked);
 }
 
-#define MAX_NR_NODES   64
-
 /*
  * Count the number of nodes a process's threads
  * are spread out on.
@@ -888,10 +890,15 @@ static void update_curr_cpu(int task_nr, unsigned long bytes_worked)
  */
 static int count_process_nodes(int process_nr)
 {
-       char node_present[MAX_NR_NODES] = { 0, };
+       char *node_present;
        int nodes;
        int n, t;
 
+       node_present = (char *)malloc(g->p.nr_nodes * sizeof(char));
+       BUG_ON(!node_present);
+       for (nodes = 0; nodes < g->p.nr_nodes; nodes++)
+               node_present[nodes] = 0;
+
        for (t = 0; t < g->p.nr_threads; t++) {
                struct thread_data *td;
                int task_nr;
@@ -901,17 +908,20 @@ static int count_process_nodes(int process_nr)
                td = g->threads + task_nr;
 
                node = numa_node_of_cpu(td->curr_cpu);
-               if (node < 0) /* curr_cpu was likely still -1 */
+               if (node < 0) /* curr_cpu was likely still -1 */ {
+                       free(node_present);
                        return 0;
+               }
 
                node_present[node] = 1;
        }
 
        nodes = 0;
 
-       for (n = 0; n < MAX_NR_NODES; n++)
+       for (n = 0; n < g->p.nr_nodes; n++)
                nodes += node_present[n];
 
+       free(node_present);
        return nodes;
 }
 
@@ -980,7 +990,7 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 {
        unsigned int loops_done_min, loops_done_max;
        int process_groups;
-       int nodes[MAX_NR_NODES];
+       int *nodes;
        int distance;
        int nr_min;
        int nr_max;
@@ -994,6 +1004,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
        if (!g->p.show_convergence && !g->p.measure_convergence)
                return;
 
+       nodes = (int *)malloc(g->p.nr_nodes * sizeof(int));
+       BUG_ON(!nodes);
        for (node = 0; node < g->p.nr_nodes; node++)
                nodes[node] = 0;
 
@@ -1035,8 +1047,10 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
 
        BUG_ON(sum > g->p.nr_tasks);
 
-       if (0 && (sum < g->p.nr_tasks))
+       if (0 && (sum < g->p.nr_tasks)) {
+               free(nodes);
                return;
+       }
 
        /*
         * Count the number of distinct process groups present
@@ -1088,6 +1102,8 @@ static void calc_convergence(double runtime_ns_max, double *convergence)
                }
                tprintf("\n");
        }
+
+       free(nodes);
 }
 
 static void show_summary(double runtime_ns_max, int l, double *convergence)
@@ -1413,7 +1429,7 @@ static int init(void)
        g->p.nr_nodes = numa_max_node() + 1;
 
        /* char array in count_process_nodes(): */
-       BUG_ON(g->p.nr_nodes > MAX_NR_NODES || g->p.nr_nodes < 0);
+       BUG_ON(g->p.nr_nodes < 0);
 
        if (g->p.show_quiet && !g->p.show_details)
                g->p.show_details = -1;
index cecce93..488f6e6 100644 (file)
@@ -309,11 +309,11 @@ int bench_sched_messaging(int argc, const char **argv)
                       num_groups, num_groups * 2 * num_fds,
                       thread_mode ? "threads" : "processes");
                printf(" %14s: %lu.%03lu [sec]\n", "Total time",
-                      diff.tv_sec,
+                      (unsigned long) diff.tv_sec,
                       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
                break;
        case BENCH_FORMAT_SIMPLE:
-               printf("%lu.%03lu\n", diff.tv_sec,
+               printf("%lu.%03lu\n", (unsigned long) diff.tv_sec,
                       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
                break;
        default:
index 3c88d1f..a960e7a 100644 (file)
@@ -156,7 +156,7 @@ int bench_sched_pipe(int argc, const char **argv)
                result_usec += diff.tv_usec;
 
                printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
-                      diff.tv_sec,
+                      (unsigned long) diff.tv_sec,
                       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
 
                printf(" %14lf usecs/op\n",
@@ -168,7 +168,7 @@ int bench_sched_pipe(int argc, const char **argv)
 
        case BENCH_FORMAT_SIMPLE:
                printf("%lu.%03lu\n",
-                      diff.tv_sec,
+                      (unsigned long) diff.tv_sec,
                       (unsigned long) (diff.tv_usec / USEC_PER_MSEC));
                break;
 
index 5fe621c..9b75101 100644 (file)
@@ -54,7 +54,7 @@ int bench_syscall_basic(int argc, const char **argv)
                result_usec += diff.tv_usec;
 
                printf(" %14s: %lu.%03lu [sec]\n\n", "Total time",
-                      diff.tv_sec,
+                      (unsigned long) diff.tv_sec,
                       (unsigned long) (diff.tv_usec/1000));
 
                printf(" %14lf usecs/op\n",
@@ -66,7 +66,7 @@ int bench_syscall_basic(int argc, const char **argv)
 
        case BENCH_FORMAT_SIMPLE:
                printf("%lu.%03lu\n",
-                      diff.tv_sec,
+                      (unsigned long) diff.tv_sec,
                       (unsigned long) (diff.tv_usec / 1000));
                break;
 
index 617feaf..7c4a9d4 100644 (file)
@@ -161,7 +161,7 @@ static int session_config(struct daemon *daemon, const char *var, const char *va
        struct daemon_session *session;
        char name[100];
 
-       if (get_session_name(var, name, sizeof(name)))
+       if (get_session_name(var, name, sizeof(name) - 1))
                return -EINVAL;
 
        var = strchr(var, '.');
@@ -373,12 +373,12 @@ static int daemon_session__run(struct daemon_session *session,
        dup2(fd, 2);
        close(fd);
 
-       if (mkfifo(SESSION_CONTROL, O_RDWR) && errno != EEXIST) {
+       if (mkfifo(SESSION_CONTROL, 0600) && errno != EEXIST) {
                perror("failed: create control fifo");
                return -1;
        }
 
-       if (mkfifo(SESSION_ACK, O_RDWR) && errno != EEXIST) {
+       if (mkfifo(SESSION_ACK, 0600) && errno != EEXIST) {
                perror("failed: create ack fifo");
                return -1;
        }
@@ -402,35 +402,42 @@ static pid_t handle_signalfd(struct daemon *daemon)
        int status;
        pid_t pid;
 
+       /*
+        * Take signal fd data as pure signal notification and check all
+        * the sessions state. The reason is that multiple signals can get
+        * coalesced in kernel and we can receive only single signal even
+        * if multiple SIGCHLD were generated.
+        */
        err = read(daemon->signal_fd, &si, sizeof(struct signalfd_siginfo));
-       if (err != sizeof(struct signalfd_siginfo))
+       if (err != sizeof(struct signalfd_siginfo)) {
+               pr_err("failed to read signal fd\n");
                return -1;
+       }
 
        list_for_each_entry(session, &daemon->sessions, list) {
+               if (session->pid == -1)
+                       continue;
 
-               if (session->pid != (int) si.ssi_pid)
+               pid = waitpid(session->pid, &status, WNOHANG);
+               if (pid <= 0)
                        continue;
 
-               pid = waitpid(session->pid, &status, 0);
-               if (pid == session->pid) {
-                       if (WIFEXITED(status)) {
-                               pr_info("session '%s' exited, status=%d\n",
-                                       session->name, WEXITSTATUS(status));
-                       } else if (WIFSIGNALED(status)) {
-                               pr_info("session '%s' killed (signal %d)\n",
-                                       session->name, WTERMSIG(status));
-                       } else if (WIFSTOPPED(status)) {
-                               pr_info("session '%s' stopped (signal %d)\n",
-                                       session->name, WSTOPSIG(status));
-                       } else {
-                               pr_info("session '%s' Unexpected status (0x%x)\n",
-                                       session->name, status);
-                       }
+               if (WIFEXITED(status)) {
+                       pr_info("session '%s' exited, status=%d\n",
+                               session->name, WEXITSTATUS(status));
+               } else if (WIFSIGNALED(status)) {
+                       pr_info("session '%s' killed (signal %d)\n",
+                               session->name, WTERMSIG(status));
+               } else if (WIFSTOPPED(status)) {
+                       pr_info("session '%s' stopped (signal %d)\n",
+                               session->name, WSTOPSIG(status));
+               } else {
+                       pr_info("session '%s' Unexpected status (0x%x)\n",
+                               session->name, status);
                }
 
                session->state = KILL;
                session->pid = -1;
-               return pid;
        }
 
        return 0;
@@ -443,7 +450,6 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
                .fd     = daemon->signal_fd,
                .events = POLLIN,
        };
-       pid_t wpid = 0, pid = session->pid;
        time_t start;
 
        start = time(NULL);
@@ -452,7 +458,7 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
                int err = poll(&pollfd, 1, 1000);
 
                if (err > 0) {
-                       wpid = handle_signalfd(daemon);
+                       handle_signalfd(daemon);
                } else if (err < 0) {
                        perror("failed: poll\n");
                        return -1;
@@ -460,7 +466,7 @@ static int daemon_session__wait(struct daemon_session *session, struct daemon *d
 
                if (start + secs < time(NULL))
                        return -1;
-       } while (wpid != pid);
+       } while (session->pid != -1);
 
        return 0;
 }
@@ -902,7 +908,9 @@ static void daemon_session__kill(struct daemon_session *session,
                        daemon_session__signal(session, SIGKILL);
                        break;
                default:
-                       break;
+                       pr_err("failed to wait for session %s\n",
+                              session->name);
+                       return;
                }
                how++;
 
@@ -955,7 +963,8 @@ static void daemon__kill(struct daemon *daemon)
                        daemon__signal(daemon, SIGKILL);
                        break;
                default:
-                       break;
+                       pr_err("failed to wait for sessions\n");
+                       return;
                }
                how++;
 
@@ -1344,7 +1353,7 @@ out:
                close(sock_fd);
        if (conf_fd != -1)
                close(conf_fd);
-       if (conf_fd != -1)
+       if (signal_fd != -1)
                close(signal_fd);
 
        pr_info("daemon exited\n");
index 8f6c784..878e04b 100644 (file)
@@ -1236,7 +1236,8 @@ static int __cmd_diff(void)
 
  out_delete:
        data__for_each_file(i, d) {
-               perf_session__delete(d->session);
+               if (!IS_ERR(d->session))
+                       perf_session__delete(d->session);
                data__free(d);
        }
 
index 85b6a46..7ec18ff 100644 (file)
@@ -3964,9 +3964,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv)
 
        evlist__config(evlist, &trace->opts, &callchain_param);
 
-       signal(SIGCHLD, sig_handler);
-       signal(SIGINT, sig_handler);
-
        if (forks) {
                err = evlist__prepare_workload(evlist, &trace->opts.target, argv, false, NULL);
                if (err < 0) {
@@ -4827,6 +4824,8 @@ int cmd_trace(int argc, const char **argv)
 
        signal(SIGSEGV, sighandler_dump_stack);
        signal(SIGFPE, sighandler_dump_stack);
+       signal(SIGCHLD, sig_handler);
+       signal(SIGINT, sig_handler);
 
        trace.evlist = evlist__new();
        trace.sctbl = syscalltbl__new();
index 0cfb3e2..133f0ed 100644 (file)
@@ -20,9 +20,8 @@ else
 fi
 
 BUILDIDS=$(mktemp /tmp/perf-archive-buildids.XXXXXX)
-NOBUILDID=0000000000000000000000000000000000000000
 
-perf buildid-list -i $PERF_DATA --with-hits | grep -v "^$NOBUILDID " > $BUILDIDS
+perf buildid-list -i $PERF_DATA --with-hits | grep -v "^ " > $BUILDIDS
 if [ ! -s $BUILDIDS ] ; then
        echo "perf archive: no build-ids found"
        rm $BUILDIDS || true
index ec972e0..dd39ce9 100644 (file)
@@ -182,14 +182,20 @@ int test__attr(struct test *test __maybe_unused, int subtest __maybe_unused)
        struct stat st;
        char path_perf[PATH_MAX];
        char path_dir[PATH_MAX];
+       char *exec_path;
 
        /* First try development tree tests. */
        if (!lstat("./tests", &st))
                return run_dir("./tests", "./perf");
 
+       exec_path = get_argv_exec_path();
+       if (exec_path == NULL)
+               return -1;
+
        /* Then installed path. */
-       snprintf(path_dir,  PATH_MAX, "%s/tests", get_argv_exec_path());
+       snprintf(path_dir,  PATH_MAX, "%s/tests", exec_path);
        snprintf(path_perf, PATH_MAX, "%s/perf", BINDIR);
+       free(exec_path);
 
        if (!lstat(path_dir, &st) &&
            !lstat(path_perf, &st))
index f57e075..c72adbd 100644 (file)
@@ -86,7 +86,7 @@ static struct {
                .msg_load_fail    = "check your vmlinux setting?",
                .target_func      = &epoll_pwait_loop,
                .expect_result    = (NR_ITERS + 1) / 2,
-               .pin              = true,
+               .pin              = true,
        },
 #ifdef HAVE_BPF_PROLOGUE
        {
@@ -99,13 +99,6 @@ static struct {
                .expect_result    = (NR_ITERS + 1) / 4,
        },
 #endif
-       {
-               .prog_id          = LLVM_TESTCASE_BPF_RELOCATION,
-               .desc             = "BPF relocation checker",
-               .name             = "[bpf_relocation_test]",
-               .msg_compile_fail = "fix 'perf test LLVM' first",
-               .msg_load_fail    = "libbpf error when dealing with relocation",
-       },
 };
 
 static int do_test(struct bpf_object *obj, int (*func)(void),
index 280f034..2fdc7b2 100644 (file)
@@ -706,13 +706,9 @@ static int do_test_code_reading(bool try_kcore)
 out_put:
        thread__put(thread);
 out_err:
-
-       if (evlist) {
-               evlist__delete(evlist);
-       } else {
-               perf_cpu_map__put(cpus);
-               perf_thread_map__put(threads);
-       }
+       evlist__delete(evlist);
+       perf_cpu_map__put(cpus);
+       perf_thread_map__put(threads);
        machine__delete_threads(machine);
        machine__delete(machine);
 
index 29c793a..0472b11 100644 (file)
@@ -106,6 +106,8 @@ static int cpu_map_print(const char *str)
                return -1;
 
        cpu_map__snprint(map, buf, sizeof(buf));
+       perf_cpu_map__put(map);
+
        return !strcmp(buf, str);
 }
 
index e6f1b2a..a0438b0 100644 (file)
@@ -154,10 +154,9 @@ out_err:
        if (evlist) {
                evlist__disable(evlist);
                evlist__delete(evlist);
-       } else {
-               perf_cpu_map__put(cpus);
-               perf_thread_map__put(threads);
        }
+       perf_cpu_map__put(cpus);
+       perf_thread_map__put(threads);
 
        return err;
 }
index 57093ae..73ae8f7 100644 (file)
@@ -158,8 +158,6 @@ out_init:
 
 out_delete_evlist:
        evlist__delete(evlist);
-       cpus    = NULL;
-       threads = NULL;
 out_free_cpus:
        perf_cpu_map__put(cpus);
 out_free_threads:
index 7cff026..680c3cf 100644 (file)
@@ -167,6 +167,8 @@ next_event:
 
 out_err:
        evlist__delete(evlist);
+       perf_cpu_map__put(cpus);
+       perf_thread_map__put(threads);
        return err;
 }
 
index 0dbe3aa..8fd8a4e 100644 (file)
@@ -129,9 +129,6 @@ static bool samples_same(const struct perf_sample *s1,
        if (type & PERF_SAMPLE_WEIGHT)
                COMP(weight);
 
-       if (type & PERF_SAMPLE_WEIGHT_STRUCT)
-               COMP(ins_lat);
-
        if (type & PERF_SAMPLE_DATA_SRC)
                COMP(data_src);
 
@@ -245,7 +242,6 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format)
                .cgroup         = 114,
                .data_page_size = 115,
                .code_page_size = 116,
-               .ins_lat        = 117,
                .aux_sample     = {
                        .size   = sizeof(aux_data),
                        .data   = (void *)aux_data,
index e5b824d..5898438 100755 (executable)
@@ -1,4 +1,4 @@
-#!/bin/sh
+#!/bin/bash
 # daemon operations
 # SPDX-License-Identifier: GPL-2.0
 
@@ -140,10 +140,10 @@ test_list()
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
        sed -i -e "s|BASE|${base}|" ${config}
@@ -159,14 +159,14 @@ EOF
        # check 1st session
        # pid:size:-e cpu-clock:base/size:base/size/output:base/size/control:base/size/ack:0
        local line=`perf daemon --config ${config} -x: | head -2 | tail -1`
-       check_line_other "${line}" size "-e cpu-clock" ${base}/session-size \
+       check_line_other "${line}" size "-e cpu-clock -m 1 sleep 10" ${base}/session-size \
                         ${base}/session-size/output ${base}/session-size/control \
                         ${base}/session-size/ack "0"
 
        # check 2nd session
        # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
        local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
-       check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+       check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
                         ${base}/session-time/output ${base}/session-time/control \
                         ${base}/session-time/ack "0"
 
@@ -190,10 +190,10 @@ test_reconfig()
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
        sed -i -e "s|BASE|${base}|" ${config}
@@ -204,7 +204,7 @@ EOF
        # check 2nd session
        # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
        local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
-       check_line_other "${line}" time "-e task-clock" ${base}/session-time \
+       check_line_other "${line}" time "-e task-clock -m 1 sleep 10" ${base}/session-time \
                         ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
        local pid=`echo "${line}" | awk 'BEGIN { FS = ":" } ; { print $1 }'`
 
@@ -215,10 +215,10 @@ EOF
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 EOF
 
        # TEST 1 - change config
@@ -238,7 +238,7 @@ EOF
        # check reconfigured 2nd session
        # pid:time:-e task-clock:base/time:base/time/output:base/time/control:base/time/ack:0
        local line=`perf daemon --config ${config} -x: | head -3 | tail -1`
-       check_line_other "${line}" time "-e cpu-clock" ${base}/session-time \
+       check_line_other "${line}" time "-e cpu-clock -m 1 sleep 10" ${base}/session-time \
                         ${base}/session-time/output ${base}/session-time/control ${base}/session-time/ack "0"
 
        # TEST 2 - empty config
@@ -309,10 +309,10 @@ test_stop()
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
        sed -i -e "s|BASE|${base}|" ${config}
@@ -361,7 +361,7 @@ test_signal()
 base=BASE
 
 [session-test]
-run = -e cpu-clock --switch-output
+run = -e cpu-clock --switch-output -m 1 sleep 10
 EOF
 
        sed -i -e "s|BASE|${base}|" ${config}
@@ -400,10 +400,10 @@ test_ping()
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 
 [session-time]
-run = -e task-clock
+run = -e task-clock -m 1 sleep 10
 EOF
 
        sed -i -e "s|BASE|${base}|" ${config}
@@ -439,7 +439,7 @@ test_lock()
 base=BASE
 
 [session-size]
-run = -e cpu-clock
+run = -e cpu-clock -m 1 sleep 10
 EOF
 
        sed -i -e "s|BASE|${base}|" ${config}
index a49c9e2..7498884 100644 (file)
@@ -42,8 +42,8 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
                .disabled = 1,
                .freq = 1,
        };
-       struct perf_cpu_map *cpus;
-       struct perf_thread_map *threads;
+       struct perf_cpu_map *cpus = NULL;
+       struct perf_thread_map *threads = NULL;
        struct mmap *md;
 
        attr.sample_freq = 500;
@@ -66,14 +66,11 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id)
        if (!cpus || !threads) {
                err = -ENOMEM;
                pr_debug("Not enough memory to create thread/cpu maps\n");
-               goto out_free_maps;
+               goto out_delete_evlist;
        }
 
        perf_evlist__set_maps(&evlist->core, cpus, threads);
 
-       cpus    = NULL;
-       threads = NULL;
-
        if (evlist__open(evlist)) {
                const char *knob = "/proc/sys/kernel/perf_event_max_sample_rate";
 
@@ -129,10 +126,9 @@ out_init:
                err = -1;
        }
 
-out_free_maps:
+out_delete_evlist:
        perf_cpu_map__put(cpus);
        perf_thread_map__put(threads);
-out_delete_evlist:
        evlist__delete(evlist);
        return err;
 }
index 15a2ab7..3ebaa75 100644 (file)
@@ -574,10 +574,9 @@ out:
        if (evlist) {
                evlist__disable(evlist);
                evlist__delete(evlist);
-       } else {
-               perf_cpu_map__put(cpus);
-               perf_thread_map__put(threads);
        }
+       perf_cpu_map__put(cpus);
+       perf_thread_map__put(threads);
 
        return err;
 
index bbf94e4..4c2969d 100644 (file)
@@ -75,14 +75,11 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused
        if (!cpus || !threads) {
                err = -ENOMEM;
                pr_debug("Not enough memory to create thread/cpu maps\n");
-               goto out_free_maps;
+               goto out_delete_evlist;
        }
 
        perf_evlist__set_maps(&evlist->core, cpus, threads);
 
-       cpus    = NULL;
-       threads = NULL;
-
        err = evlist__prepare_workload(evlist, &target, argv, false, workload_exec_failed_signal);
        if (err < 0) {
                pr_debug("Couldn't run the workload!\n");
@@ -137,7 +134,7 @@ out_init:
                if (retry_count++ > 1000) {
                        pr_debug("Failed after retrying 1000 times\n");
                        err = -1;
-                       goto out_free_maps;
+                       goto out_delete_evlist;
                }
 
                goto retry;
@@ -148,10 +145,9 @@ out_init:
                err = -1;
        }
 
-out_free_maps:
+out_delete_evlist:
        perf_cpu_map__put(cpus);
        perf_thread_map__put(threads);
-out_delete_evlist:
        evlist__delete(evlist);
        return err;
 }
index 28f51c4..d1e208b 100644 (file)
@@ -102,6 +102,7 @@ int test__thread_map_synthesize(struct test *test __maybe_unused, int subtest __
        TEST_ASSERT_VAL("failed to synthesize map",
                !perf_event__synthesize_thread_map2(NULL, threads, process_event, NULL));
 
+       perf_thread_map__put(threads);
        return 0;
 }
 
@@ -109,12 +110,12 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
 {
        struct perf_thread_map *threads;
        char *str;
-       int i;
 
        TEST_ASSERT_VAL("failed to allocate map string",
                        asprintf(&str, "%d,%d", getpid(), getppid()) >= 0);
 
        threads = thread_map__new_str(str, NULL, 0, false);
+       free(str);
 
        TEST_ASSERT_VAL("failed to allocate thread_map",
                        threads);
@@ -141,9 +142,6 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb
        TEST_ASSERT_VAL("failed to not remove thread",
                        thread_map__remove(threads, 0));
 
-       for (i = 0; i < threads->nr; i++)
-               zfree(&threads->map[i].comm);
-
-       free(threads);
+       perf_thread_map__put(threads);
        return 0;
 }
index 953f4af..5b6ccb9 100644 (file)
@@ -298,10 +298,6 @@ static int auxtrace_queues__queue_buffer(struct auxtrace_queues *queues,
                queue->set = true;
                queue->tid = buffer->tid;
                queue->cpu = buffer->cpu;
-       } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) {
-               pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n",
-                      queue->cpu, queue->tid, buffer->cpu, buffer->tid);
-               return -EINVAL;
        }
 
        buffer->buffer_nr = queues->next_buffer_nr++;
index 57d58c8..cdecda1 100644 (file)
@@ -196,25 +196,32 @@ static int perf_event__synthesize_one_bpf_prog(struct perf_session *session,
        }
 
        if (info_linear->info_len < offsetof(struct bpf_prog_info, prog_tags)) {
+               free(info_linear);
                pr_debug("%s: the kernel is too old, aborting\n", __func__);
                return -2;
        }
 
        info = &info_linear->info;
+       if (!info->jited_ksyms) {
+               free(info_linear);
+               return -1;
+       }
 
        /* number of ksyms, func_lengths, and tags should match */
        sub_prog_cnt = info->nr_jited_ksyms;
        if (sub_prog_cnt != info->nr_prog_tags ||
-           sub_prog_cnt != info->nr_jited_func_lens)
+           sub_prog_cnt != info->nr_jited_func_lens) {
+               free(info_linear);
                return -1;
+       }
 
        /* check BTF func info support */
        if (info->btf_id && info->nr_func_info && info->func_info_rec_size) {
                /* btf func info number should be same as sub_prog_cnt */
                if (sub_prog_cnt != info->nr_func_info) {
                        pr_debug("%s: mismatch in BPF sub program count and BTF function info count, aborting\n", __func__);
-                       err = -1;
-                       goto out;
+                       free(info_linear);
+                       return -1;
                }
                if (btf__get_from_id(info->btf_id, &btf)) {
                        pr_debug("%s: failed to get BTF of id %u, aborting\n", __func__, info->btf_id);
index 5121b4d..882cd1f 100644 (file)
@@ -1306,6 +1306,7 @@ void evlist__close(struct evlist *evlist)
                perf_evsel__free_fd(&evsel->core);
                perf_evsel__free_id(&evsel->core);
        }
+       perf_evlist__reset_id_hash(&evlist->core);
 }
 
 static int evlist__create_syswide_maps(struct evlist *evlist)
index 1bf7686..7ecbc8e 100644 (file)
@@ -46,6 +46,7 @@
 #include "string2.h"
 #include "memswap.h"
 #include "util.h"
+#include "hashmap.h"
 #include "../perf-sys.h"
 #include "util/parse-branch-options.h"
 #include <internal/xyarray.h>
@@ -1390,7 +1391,9 @@ void evsel__exit(struct evsel *evsel)
        zfree(&evsel->group_name);
        zfree(&evsel->name);
        zfree(&evsel->pmu_name);
-       zfree(&evsel->per_pkg_mask);
+       evsel__zero_per_pkg(evsel);
+       hashmap__free(evsel->per_pkg_mask);
+       evsel->per_pkg_mask = NULL;
        zfree(&evsel->metric_events);
        perf_evsel__object.fini(evsel);
 }
@@ -2781,3 +2784,16 @@ int evsel__store_ids(struct evsel *evsel, struct evlist *evlist)
 
        return store_evsel_ids(evsel, evlist);
 }
+
+void evsel__zero_per_pkg(struct evsel *evsel)
+{
+       struct hashmap_entry *cur;
+       size_t bkt;
+
+       if (evsel->per_pkg_mask) {
+               hashmap__for_each_entry(evsel->per_pkg_mask, cur, bkt)
+                       free((char *)cur->key);
+
+               hashmap__clear(evsel->per_pkg_mask);
+       }
+}
index 4e8e49f..6026487 100644 (file)
@@ -19,6 +19,7 @@ struct perf_stat_evsel;
 union perf_event;
 struct bpf_counter_ops;
 struct target;
+struct hashmap;
 
 typedef int (evsel__sb_cb_t)(union perf_event *event, void *data);
 
@@ -112,7 +113,7 @@ struct evsel {
        bool                    merged_stat;
        bool                    reset_group;
        bool                    errored;
-       unsigned long           *per_pkg_mask;
+       struct hashmap          *per_pkg_mask;
        struct evsel            *leader;
        struct list_head        config_terms;
        int                     err;
@@ -433,4 +434,5 @@ struct perf_env *evsel__env(struct evsel *evsel);
 
 int evsel__store_ids(struct evsel *evsel, struct evlist *evlist);
 
+void evsel__zero_per_pkg(struct evsel *evsel);
 #endif /* __PERF_EVSEL_H */
index 4fe9e2a..20effdf 100644 (file)
@@ -1618,8 +1618,8 @@ static void print_clock_data(struct feat_fd *ff, FILE *fp)
 
        fprintf(fp, "# clockid: %s (%u)\n", clockid_name(clockid), clockid);
        fprintf(fp, "# reference time: %s = %ld.%06d (TOD) = %ld.%09ld (%s)\n",
-                   tstr, tod_ns.tv_sec, (int) tod_ns.tv_usec,
-                   clockid_ns.tv_sec, clockid_ns.tv_nsec,
+                   tstr, (long) tod_ns.tv_sec, (int) tod_ns.tv_usec,
+                   (long) clockid_ns.tv_sec, clockid_ns.tv_nsec,
                    clockid_name(clockid));
 }
 
index 692e56d..fbc40a2 100644 (file)
@@ -77,8 +77,7 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
        if (strstarts(filename, "/system/lib/")) {
                char *ndk, *app;
                const char *arch;
-               size_t ndk_length;
-               size_t app_length;
+               int ndk_length, app_length;
 
                ndk = getenv("NDK_ROOT");
                app = getenv("APP_PLATFORM");
@@ -106,8 +105,8 @@ static inline bool replace_android_lib(const char *filename, char *newfilename)
                if (new_length > PATH_MAX)
                        return false;
                snprintf(newfilename, new_length,
-                       "%s/platforms/%s/arch-%s/usr/lib/%s",
-                       ndk, app, arch, libname);
+                       "%.*s/platforms/%.*s/arch-%s/usr/lib/%s",
+                       ndk_length, ndk, app_length, app, arch, libname);
 
                return true;
        }
index 42c84ad..c0c0fab 100644 (file)
@@ -356,6 +356,9 @@ __add_event(struct list_head *list, int *idx,
        struct perf_cpu_map *cpus = pmu ? perf_cpu_map__get(pmu->cpus) :
                               cpu_list ? perf_cpu_map__new(cpu_list) : NULL;
 
+       if (pmu && attr->type == PERF_TYPE_RAW)
+               perf_pmu__warn_invalid_config(pmu, attr->config, name);
+
        if (init_attr)
                event_attr_init(attr);
 
index d5b6aff..d57ac86 100644 (file)
@@ -89,6 +89,7 @@ static void inc_group_count(struct list_head *list,
 %type <str> PE_EVENT_NAME
 %type <str> PE_PMU_EVENT_PRE PE_PMU_EVENT_SUF PE_KERNEL_PMU_EVENT PE_PMU_EVENT_FAKE
 %type <str> PE_DRV_CFG_TERM
+%type <str> event_pmu_name
 %destructor { free ($$); } <str>
 %type <term> event_term
 %destructor { parse_events_term__delete ($$); } <term>
@@ -272,8 +273,11 @@ event_def: event_pmu |
           event_legacy_raw sep_dc |
           event_bpf_file
 
+event_pmu_name:
+PE_NAME | PE_PMU_EVENT_PRE
+
 event_pmu:
-PE_NAME opt_pmu_config
+event_pmu_name opt_pmu_config
 {
        struct parse_events_state *parse_state = _parse_state;
        struct parse_events_error *error = parse_state->error;
index 44ef283..46fd0f9 100644 (file)
@@ -1812,3 +1812,36 @@ int perf_pmu__caps_parse(struct perf_pmu *pmu)
 
        return nr_caps;
 }
+
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+                                  char *name)
+{
+       struct perf_pmu_format *format;
+       __u64 masks = 0, bits;
+       char buf[100];
+       unsigned int i;
+
+       list_for_each_entry(format, &pmu->format, list) {
+               if (format->value != PERF_PMU_FORMAT_VALUE_CONFIG)
+                       continue;
+
+               for_each_set_bit(i, format->bits, PERF_PMU_FORMAT_BITS)
+                       masks |= 1ULL << i;
+       }
+
+       /*
+        * Kernel doesn't export any valid format bits.
+        */
+       if (masks == 0)
+               return;
+
+       bits = config & ~masks;
+       if (bits == 0)
+               return;
+
+       bitmap_scnprintf((unsigned long *)&bits, sizeof(bits) * 8, buf, sizeof(buf));
+
+       pr_warning("WARNING: event '%s' not valid (bits %s of config "
+                  "'%llx' not supported by kernel)!\n",
+                  name ?: "N/A", buf, config);
+}
index 8164388..160b0f5 100644 (file)
@@ -123,4 +123,7 @@ int perf_pmu__convert_scale(const char *scale, char **end, double *sval);
 
 int perf_pmu__caps_parse(struct perf_pmu *pmu);
 
+void perf_pmu__warn_invalid_config(struct perf_pmu *pmu, __u64 config,
+                                  char *name);
+
 #endif /* __PMU_H */
index 71b7535..845dd46 100644 (file)
@@ -36,3 +36,4 @@ util/symbol_fprintf.c
 util/units.c
 util/affinity.c
 util/rwsem.c
+util/hashmap.c
index 0d5ad42..552b590 100644 (file)
@@ -3140,7 +3140,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               if (sort__mode != SORT_MODE__MEMORY)
+               if (sort__mode != SORT_MODE__BRANCH)
                        return -EINVAL;
 
                return __sort_dimension__add_output(list, sd);
@@ -3152,7 +3152,7 @@ int output_field_add(struct perf_hpp_list *list, char *tok)
                if (strncasecmp(tok, sd->name, strlen(tok)))
                        continue;
 
-               if (sort__mode != SORT_MODE__BRANCH)
+               if (sort__mode != SORT_MODE__MEMORY)
                        return -EINVAL;
 
                return __sort_dimension__add_output(list, sd);
index cce7a76..7f09cda 100644 (file)
@@ -983,7 +983,7 @@ static void print_interval(struct perf_stat_config *config,
        if (config->interval_clear)
                puts(CONSOLE_CLEAR);
 
-       sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, config->csv_sep);
+       sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep);
 
        if ((num_print_interval == 0 && !config->csv_output) || config->interval_clear) {
                switch (config->aggr_mode) {
index 5d8af29..c400f8d 100644 (file)
@@ -13,6 +13,7 @@
 #include "evlist.h"
 #include "evsel.h"
 #include "thread_map.h"
+#include "hashmap.h"
 #include <linux/zalloc.h>
 
 void update_stats(struct stats *stats, u64 val)
@@ -277,18 +278,29 @@ void evlist__save_aggr_prev_raw_counts(struct evlist *evlist)
        }
 }
 
-static void zero_per_pkg(struct evsel *counter)
+static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused)
 {
-       if (counter->per_pkg_mask)
-               memset(counter->per_pkg_mask, 0, cpu__max_cpu());
+       uint64_t *key = (uint64_t *) __key;
+
+       return *key & 0xffffffff;
+}
+
+static bool pkg_id_equal(const void *__key1, const void *__key2,
+                        void *ctx __maybe_unused)
+{
+       uint64_t *key1 = (uint64_t *) __key1;
+       uint64_t *key2 = (uint64_t *) __key2;
+
+       return *key1 == *key2;
 }
 
 static int check_per_pkg(struct evsel *counter,
                         struct perf_counts_values *vals, int cpu, bool *skip)
 {
-       unsigned long *mask = counter->per_pkg_mask;
+       struct hashmap *mask = counter->per_pkg_mask;
        struct perf_cpu_map *cpus = evsel__cpus(counter);
-       int s;
+       int s, d, ret = 0;
+       uint64_t *key;
 
        *skip = false;
 
@@ -299,7 +311,7 @@ static int check_per_pkg(struct evsel *counter,
                return 0;
 
        if (!mask) {
-               mask = zalloc(cpu__max_cpu());
+               mask = hashmap__new(pkg_id_hash, pkg_id_equal, NULL);
                if (!mask)
                        return -ENOMEM;
 
@@ -321,8 +333,25 @@ static int check_per_pkg(struct evsel *counter,
        if (s < 0)
                return -1;
 
-       *skip = test_and_set_bit(s, mask) == 1;
-       return 0;
+       /*
+        * On multi-die system, die_id > 0. On no-die system, die_id = 0.
+        * We use hashmap(socket, die) to check the used socket+die pair.
+        */
+       d = cpu_map__get_die(cpus, cpu, NULL).die;
+       if (d < 0)
+               return -1;
+
+       key = malloc(sizeof(*key));
+       if (!key)
+               return -ENOMEM;
+
+       *key = (uint64_t)d << 32 | s;
+       if (hashmap__find(mask, (void *)key, NULL))
+               *skip = true;
+       else
+               ret = hashmap__add(mask, (void *)key, (void *)1);
+
+       return ret;
 }
 
 static int
@@ -422,7 +451,7 @@ int perf_stat_process_counter(struct perf_stat_config *config,
        }
 
        if (counter->per_pkg)
-               zero_per_pkg(counter);
+               evsel__zero_per_pkg(counter);
 
        ret = process_counter_maps(config, counter);
        if (ret)
index b698046..dff1781 100644 (file)
@@ -424,7 +424,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool,
 
        while (!io.eof) {
                static const char anonstr[] = "//anon";
-               size_t size;
+               size_t size, aligned_size;
 
                /* ensure null termination since stack will be reused. */
                event->mmap2.filename[0] = '\0';
@@ -484,11 +484,12 @@ out:
                }
 
                size = strlen(event->mmap2.filename) + 1;
-               size = PERF_ALIGN(size, sizeof(u64));
+               aligned_size = PERF_ALIGN(size, sizeof(u64));
                event->mmap2.len -= event->mmap.start;
                event->mmap2.header.size = (sizeof(event->mmap2) -
-                                       (sizeof(event->mmap2.filename) - size));
-               memset(event->mmap2.filename + size, 0, machine->id_hdr_size);
+                                       (sizeof(event->mmap2.filename) - aligned_size));
+               memset(event->mmap2.filename + size, 0, machine->id_hdr_size +
+                       (aligned_size - size));
                event->mmap2.header.size += machine->id_hdr_size;
                event->mmap2.pid = tgid;
                event->mmap2.tid = pid;
@@ -758,7 +759,7 @@ static int __event__synthesize_thread(union perf_event *comm_event,
        for (i = 0; i < n; i++) {
                char *end;
                pid_t _pid;
-               bool kernel_thread;
+               bool kernel_thread = false;
 
                _pid = strtol(dirent[i]->d_name, &end, 10);
                if (*end)
index f507dff..8a01af7 100644 (file)
@@ -361,6 +361,7 @@ static int read_saved_cmdline(struct tep_handle *pevent)
                pr_debug("error reading saved cmdlines\n");
                goto out;
        }
+       buf[ret] = '\0';
 
        parse_saved_cmdline(pevent, buf, size);
        ret = 0;
index 3cc91ad..43beb16 100644 (file)
@@ -133,6 +133,8 @@ static struct dso *__machine__addnew_vdso(struct machine *machine, const char *s
        if (dso != NULL) {
                __dsos__add(&machine->dsos, dso);
                dso__set_long_name(dso, long_name, false);
+               /* Put dso here because __dsos_add already got it */
+               dso__put(dso);
        }
 
        return dso;
index a7f0603..6908700 100644 (file)
@@ -40,3 +40,5 @@
 # CONFIG_RESET_BRCMSTB_RESCAL is not set
 # CONFIG_RESET_INTEL_GW is not set
 # CONFIG_ADI_AXI_ADC is not set
+# CONFIG_DEBUG_PAGEALLOC is not set
+# CONFIG_PAGE_POISONING is not set
index 0b550cb..1e2683d 100644 (file)
@@ -13,7 +13,7 @@ from typing import List, Set
 CONFIG_IS_NOT_SET_PATTERN = r'^# CONFIG_(\w+) is not set$'
 CONFIG_PATTERN = r'^CONFIG_(\w+)=(\S+|".*")$'
 
-KconfigEntryBase = collections.namedtuple('KconfigEntry', ['name', 'value'])
+KconfigEntryBase = collections.namedtuple('KconfigEntryBase', ['name', 'value'])
 
 class KconfigEntry(KconfigEntryBase):
 
index 3b796dd..ca24f68 100644 (file)
@@ -296,21 +296,34 @@ static void *idr_throbber(void *arg)
        return NULL;
 }
 
+/*
+ * There are always either 1 or 2 objects in the IDR.  If we find nothing,
+ * or we find something at an ID we didn't expect, that's a bug.
+ */
 void idr_find_test_1(int anchor_id, int throbber_id)
 {
        pthread_t throbber;
        time_t start = time(NULL);
 
-       pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
-
        BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
                                anchor_id + 1, GFP_KERNEL) != anchor_id);
 
+       pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+       rcu_read_lock();
        do {
                int id = 0;
                void *entry = idr_get_next(&find_idr, &id);
-               BUG_ON(entry != xa_mk_value(id));
+               rcu_read_unlock();
+               if ((id != anchor_id && id != throbber_id) ||
+                   entry != xa_mk_value(id)) {
+                       printf("%s(%d, %d): %p at %d\n", __func__, anchor_id,
+                               throbber_id, entry, id);
+                       abort();
+               }
+               rcu_read_lock();
        } while (time(NULL) < start + 11);
+       rcu_read_unlock();
 
        pthread_join(throbber, NULL);
 
@@ -577,6 +590,7 @@ void ida_tests(void)
 
 int __weak main(void)
 {
+       rcu_register_thread();
        radix_tree_init();
        idr_checks();
        ida_tests();
@@ -584,5 +598,6 @@ int __weak main(void)
        rcu_barrier();
        if (nr_allocated)
                printf("nr_allocated = %d\n", nr_allocated);
+       rcu_unregister_thread();
        return 0;
 }
diff --git a/tools/testing/radix-tree/linux/compiler_types.h b/tools/testing/radix-tree/linux/compiler_types.h
deleted file mode 100644 (file)
index e69de29..0000000
index 9eae0fb..e00520c 100644 (file)
@@ -224,7 +224,9 @@ void multiorder_checks(void)
 
 int __weak main(void)
 {
+       rcu_register_thread();
        radix_tree_init();
        multiorder_checks();
+       rcu_unregister_thread();
        return 0;
 }
index e61e43e..f20e12c 100644 (file)
@@ -25,11 +25,13 @@ void xarray_tests(void)
 
 int __weak main(void)
 {
+       rcu_register_thread();
        radix_tree_init();
        xarray_tests();
        radix_tree_cpu_dead(1);
        rcu_barrier();
        if (nr_allocated)
                printf("nr_allocated = %d\n", nr_allocated);
+       rcu_unregister_thread();
        return 0;
 }
index 41f0a0a..6c575cf 100644 (file)
@@ -33,6 +33,7 @@ TARGETS += memfd
 TARGETS += memory-hotplug
 TARGETS += mincore
 TARGETS += mount
+TARGETS += mount_setattr
 TARGETS += mqueue
 TARGETS += nci
 TARGETS += net
index b2282be..612d389 100644 (file)
@@ -332,5 +332,5 @@ int main(void)
 
        ksft_print_cnts();
 
-       return 0;
+       return ret;
 }
index 9210691..e3e08d9 100644 (file)
@@ -284,16 +284,28 @@ endfunction
 // Set up test pattern in the FFR
 // x0: pid
 // x2: generation
+//
+// We need to generate a canonical FFR value, which consists of a number of
+// low "1" bits, followed by a number of zeros. This gives us 17 unique values
+// per 16 bits of FFR, so we create a 4 bit signature out of the PID and
+// generation, and use that as the initial number of ones in the pattern.
+// We fill the upper lanes of FFR with zeros.
 // Beware: corrupts P0.
 function setup_ffr
        mov     x4, x30
 
-       bl      pattern
+       and     w0, w0, #0x3
+       bfi     w0, w2, #2, #2
+       mov     w1, #1
+       lsl     w1, w1, w0
+       sub     w1, w1, #1
+
        ldr     x0, =ffrref
-       ldr     x1, =scratch
-       rdvl    x2, #1
-       lsr     x2, x2, #3
-       bl      memcpy
+       strh    w1, [x0], 2
+       rdvl    x1, #1
+       lsr     x1, x1, #3
+       sub     x1, x1, #2
+       bl      memclr
 
        mov     x0, #0
        ldr     x1, =ffrref
index 36af1c1..b62a393 100644 (file)
@@ -128,6 +128,8 @@ static void test_check_mtu_xdp(__u32 mtu, __u32 ifindex)
        test_check_mtu_run_xdp(skel, skel->progs.xdp_use_helper, mtu);
        test_check_mtu_run_xdp(skel, skel->progs.xdp_exceed_mtu, mtu);
        test_check_mtu_run_xdp(skel, skel->progs.xdp_minus_delta, mtu);
+       test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len, mtu);
+       test_check_mtu_run_xdp(skel, skel->progs.xdp_input_len_exceed, mtu);
 
 cleanup:
        test_check_mtu__destroy(skel);
@@ -187,6 +189,8 @@ static void test_check_mtu_tc(__u32 mtu, __u32 ifindex)
        test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu, mtu);
        test_check_mtu_run_tc(skel, skel->progs.tc_exceed_mtu_da, mtu);
        test_check_mtu_run_tc(skel, skel->progs.tc_minus_delta, mtu);
+       test_check_mtu_run_tc(skel, skel->progs.tc_input_len, mtu);
+       test_check_mtu_run_tc(skel, skel->progs.tc_input_len_exceed, mtu);
 cleanup:
        test_check_mtu__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c b/tools/testing/selftests/bpf/prog_tests/fexit_sleep.c
new file mode 100644 (file)
index 0000000..6c4d42a
--- /dev/null
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#define _GNU_SOURCE
+#include <sched.h>
+#include <test_progs.h>
+#include <time.h>
+#include <sys/mman.h>
+#include <sys/syscall.h>
+#include "fexit_sleep.skel.h"
+
+static int do_sleep(void *skel)
+{
+       struct fexit_sleep *fexit_skel = skel;
+       struct timespec ts1 = { .tv_nsec = 1 };
+       struct timespec ts2 = { .tv_sec = 10 };
+
+       fexit_skel->bss->pid = getpid();
+       (void)syscall(__NR_nanosleep, &ts1, NULL);
+       (void)syscall(__NR_nanosleep, &ts2, NULL);
+       return 0;
+}
+
+#define STACK_SIZE (1024 * 1024)
+static char child_stack[STACK_SIZE];
+
+void test_fexit_sleep(void)
+{
+       struct fexit_sleep *fexit_skel = NULL;
+       int wstatus, duration = 0;
+       pid_t cpid;
+       int err, fexit_cnt;
+
+       fexit_skel = fexit_sleep__open_and_load();
+       if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n"))
+               goto cleanup;
+
+       err = fexit_sleep__attach(fexit_skel);
+       if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err))
+               goto cleanup;
+
+       cpid = clone(do_sleep, child_stack + STACK_SIZE, CLONE_FILES | SIGCHLD, fexit_skel);
+       if (CHECK(cpid == -1, "clone", strerror(errno)))
+               goto cleanup;
+
+       /* wait until first sys_nanosleep ends and second sys_nanosleep starts */
+       while (READ_ONCE(fexit_skel->bss->fentry_cnt) != 2);
+       fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+       if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+               goto cleanup;
+
+       /* close progs and detach them. That will trigger two nop5->jmp5 rewrites
+        * in the trampolines to skip nanosleep_fexit prog.
+        * The nanosleep_fentry prog will get detached first.
+        * The nanosleep_fexit prog will get detached second.
+        * Detaching will trigger freeing of both progs JITed images.
+        * There will be two dying bpf_tramp_image-s, but only the initial
+        * bpf_tramp_image (with both _fentry and _fexit progs will be stuck
+        * waiting for percpu_ref_kill to confirm). The other one
+        * will be freed quickly.
+        */
+       close(bpf_program__fd(fexit_skel->progs.nanosleep_fentry));
+       close(bpf_program__fd(fexit_skel->progs.nanosleep_fexit));
+       fexit_sleep__detach(fexit_skel);
+
+       /* kill the thread to unwind sys_nanosleep stack through the trampoline */
+       kill(cpid, 9);
+
+       if (CHECK(waitpid(cpid, &wstatus, 0) == -1, "waitpid", strerror(errno)))
+               goto cleanup;
+       if (CHECK(WEXITSTATUS(wstatus) != 0, "exitstatus", "failed"))
+               goto cleanup;
+
+       /* The bypassed nanosleep_fexit prog shouldn't have executed.
+        * Unlike progs the maps were not freed and directly accessible.
+        */
+       fexit_cnt = READ_ONCE(fexit_skel->bss->fexit_cnt);
+       if (CHECK(fexit_cnt != 1, "fexit_cnt", "%d", fexit_cnt))
+               goto cleanup;
+
+cleanup:
+       fexit_sleep__destroy(fexit_skel);
+}
index 31975c9..3ac0c9a 100644 (file)
@@ -174,6 +174,12 @@ struct struct_in_struct {
        };
 };
 
+struct struct_in_array {};
+
+struct struct_in_array_typed {};
+
+typedef struct struct_in_array_typed struct_in_array_t[2];
+
 struct struct_with_embedded_stuff {
        int a;
        struct {
@@ -203,6 +209,8 @@ struct struct_with_embedded_stuff {
        } r[5];
        struct struct_in_struct s[10];
        int t[11];
+       struct struct_in_array (*u)[2];
+       struct_in_array_t *v;
 };
 
 struct root_struct {
diff --git a/tools/testing/selftests/bpf/progs/fexit_sleep.c b/tools/testing/selftests/bpf/progs/fexit_sleep.c
new file mode 100644 (file)
index 0000000..03a672d
--- /dev/null
@@ -0,0 +1,31 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2021 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char LICENSE[] SEC("license") = "GPL";
+
+int pid = 0;
+int fentry_cnt = 0;
+int fexit_cnt = 0;
+
+SEC("fentry/__x64_sys_nanosleep")
+int BPF_PROG(nanosleep_fentry, const struct pt_regs *regs)
+{
+       if ((int)bpf_get_current_pid_tgid() != pid)
+               return 0;
+
+       fentry_cnt++;
+       return 0;
+}
+
+SEC("fexit/__x64_sys_nanosleep")
+int BPF_PROG(nanosleep_fexit, const struct pt_regs *regs, int ret)
+{
+       if ((int)bpf_get_current_pid_tgid() != pid)
+               return 0;
+
+       fexit_cnt++;
+       return 0;
+}
index 6b67003..1d8918d 100644 (file)
@@ -16,6 +16,13 @@ bool skip = false;
 #define STRSIZE                        2048
 #define EXPECTED_STRSIZE       256
 
+#if defined(bpf_target_s390)
+/* NULL points to a readable struct lowcore on s390, so take the last page */
+#define BADPTR                 ((void *)0xFFFFFFFFFFFFF000ULL)
+#else
+#define BADPTR                 0
+#endif
+
 #ifndef ARRAY_SIZE
 #define ARRAY_SIZE(x)  (sizeof(x) / sizeof((x)[0]))
 #endif
@@ -113,11 +120,11 @@ int BPF_PROG(trace_netif_receive_skb, struct sk_buff *skb)
        }
 
        /* Check invalid ptr value */
-       p.ptr = 0;
+       p.ptr = BADPTR;
        __ret = bpf_snprintf_btf(str, STRSIZE, &p, sizeof(p), 0);
        if (__ret >= 0) {
-               bpf_printk("printing NULL should generate error, got (%d)",
-                          __ret);
+               bpf_printk("printing %llx should generate error, got (%d)",
+                          (unsigned long long)BADPTR, __ret);
                ret = -ERANGE;
        }
 
index b7787b4..c4a9bae 100644 (file)
@@ -105,6 +105,54 @@ int xdp_minus_delta(struct xdp_md *ctx)
        return retval;
 }
 
+SEC("xdp")
+int xdp_input_len(struct xdp_md *ctx)
+{
+       int retval = XDP_PASS; /* Expected retval on successful test */
+       void *data_end = (void *)(long)ctx->data_end;
+       void *data = (void *)(long)ctx->data;
+       __u32 ifindex = GLOBAL_USER_IFINDEX;
+       __u32 data_len = data_end - data;
+
+       /* API allow user give length to check as input via mtu_len param,
+        * resulting MTU value is still output in mtu_len param after call.
+        *
+        * Input len is L3, like MTU and iph->tot_len.
+        * Remember XDP data_len is L2.
+        */
+       __u32 mtu_len = data_len - ETH_HLEN;
+
+       if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+               retval = XDP_ABORTED;
+
+       global_bpf_mtu_xdp = mtu_len;
+       return retval;
+}
+
+SEC("xdp")
+int xdp_input_len_exceed(struct xdp_md *ctx)
+{
+       int retval = XDP_ABORTED; /* Fail */
+       __u32 ifindex = GLOBAL_USER_IFINDEX;
+       int err;
+
+       /* API allow user give length to check as input via mtu_len param,
+        * resulting MTU value is still output in mtu_len param after call.
+        *
+        * Input length value is L3 size like MTU.
+        */
+       __u32 mtu_len = GLOBAL_USER_MTU;
+
+       mtu_len += 1; /* Exceed with 1 */
+
+       err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+       if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+               retval = XDP_PASS ; /* Success in exceeding MTU check */
+
+       global_bpf_mtu_xdp = mtu_len;
+       return retval;
+}
+
 SEC("classifier")
 int tc_use_helper(struct __sk_buff *ctx)
 {
@@ -196,3 +244,47 @@ int tc_minus_delta(struct __sk_buff *ctx)
        global_bpf_mtu_xdp = mtu_len;
        return retval;
 }
+
+SEC("classifier")
+int tc_input_len(struct __sk_buff *ctx)
+{
+       int retval = BPF_OK; /* Expected retval on successful test */
+       __u32 ifindex = GLOBAL_USER_IFINDEX;
+
+       /* API allow user give length to check as input via mtu_len param,
+        * resulting MTU value is still output in mtu_len param after call.
+        *
+        * Input length value is L3 size.
+        */
+       __u32 mtu_len = GLOBAL_USER_MTU;
+
+       if (bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0))
+               retval = BPF_DROP;
+
+       global_bpf_mtu_xdp = mtu_len;
+       return retval;
+}
+
+SEC("classifier")
+int tc_input_len_exceed(struct __sk_buff *ctx)
+{
+       int retval = BPF_DROP; /* Fail */
+       __u32 ifindex = GLOBAL_USER_IFINDEX;
+       int err;
+
+       /* API allow user give length to check as input via mtu_len param,
+        * resulting MTU value is still output in mtu_len param after call.
+        *
+        * Input length value is L3 size like MTU.
+        */
+       __u32 mtu_len = GLOBAL_USER_MTU;
+
+       mtu_len += 1; /* Exceed with 1 */
+
+       err = bpf_check_mtu(ctx, ifindex, &mtu_len, 0, 0);
+       if (err == BPF_MTU_CHK_RET_FRAG_NEEDED)
+               retval = BPF_OK; /* Success in exceeding MTU check */
+
+       global_bpf_mtu_xdp = mtu_len;
+       return retval;
+}
index 2848804..ef5277d 100644 (file)
@@ -15,5 +15,5 @@ __noinline int foo(const struct S *s)
 SEC("cgroup_skb/ingress")
 int test_cls(struct __sk_buff *skb)
 {
-       return foo(skb);
+       return foo((const void *)skb);
 }
index a621b58..ba6eadf 100644 (file)
@@ -446,10 +446,8 @@ int _geneve_get_tunnel(struct __sk_buff *skb)
        }
 
        ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
-       if (ret < 0) {
-               ERROR(ret);
-               return TC_ACT_SHOT;
-       }
+       if (ret < 0)
+               gopt.opt_class = 0;
 
        bpf_trace_printk(fmt, sizeof(fmt),
                        key.tunnel_id, key.remote_ipv4, gopt.opt_class);
@@ -510,10 +508,8 @@ int _ip6geneve_get_tunnel(struct __sk_buff *skb)
        }
 
        ret = bpf_skb_get_tunnel_opt(skb, &gopt, sizeof(gopt));
-       if (ret < 0) {
-               ERROR(ret);
-               return TC_ACT_SHOT;
-       }
+       if (ret < 0)
+               gopt.opt_class = 0;
 
        bpf_trace_printk(fmt, sizeof(fmt),
                        key.tunnel_id, key.remote_ipv4, gopt.opt_class);
index bed53b5..1b138cd 100644 (file)
        BPF_MOV64_IMM(BPF_REG_5, 0),
        BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0,
                     BPF_FUNC_csum_diff),
+       BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xffff),
        BPF_EXIT_INSN(),
        },
        .prog_type = BPF_PROG_TYPE_SCHED_CLS,
        .fixup_map_array_ro = { 3 },
        .result = ACCEPT,
-       .retval = -29,
+       .retval = 65507,
 },
 {
        "invalid write map access into a read-only array 1",
index 1bdc8e6..fe4bb70 100644 (file)
        },
        .result = ACCEPT,
 },
+{
+       "BPF_ATOMIC_AND with fetch - r0 as source reg",
+       .insns = {
+               /* val = 0x110; */
+               BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0x110),
+               /* old = atomic_fetch_and(&val, 0x011); */
+               BPF_MOV64_IMM(BPF_REG_0, 0x011),
+               BPF_ATOMIC_OP(BPF_DW, BPF_AND | BPF_FETCH, BPF_REG_10, BPF_REG_0, -8),
+               /* if (old != 0x110) exit(3); */
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0x110, 2),
+               BPF_MOV64_IMM(BPF_REG_0, 3),
+               BPF_EXIT_INSN(),
+               /* if (val != 0x010) exit(2); */
+               BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -8),
+               BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0x010, 2),
+               BPF_MOV64_IMM(BPF_REG_1, 2),
+               BPF_EXIT_INSN(),
+               /* exit(0); */
+               BPF_MOV64_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+},
index 2efd8bc..6e52dfc 100644 (file)
        .result = REJECT,
        .errstr = "invalid read from stack",
 },
+{
+       "BPF_W cmpxchg should zero top 32 bits",
+       .insns = {
+               /* r0 = U64_MAX; */
+               BPF_MOV64_IMM(BPF_REG_0, 0),
+               BPF_ALU64_IMM(BPF_SUB, BPF_REG_0, 1),
+               /* u64 val = r0; */
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_0, -8),
+               /* r0 = (u32)atomic_cmpxchg((u32 *)&val, r0, 1); */
+               BPF_MOV32_IMM(BPF_REG_1, 1),
+               BPF_ATOMIC_OP(BPF_W, BPF_CMPXCHG, BPF_REG_10, BPF_REG_1, -8),
+               /* r1 = 0x00000000FFFFFFFFull; */
+               BPF_MOV64_IMM(BPF_REG_1, 1),
+               BPF_ALU64_IMM(BPF_LSH, BPF_REG_1, 32),
+               BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+               /* if (r0 != r1) exit(1); */
+               BPF_JMP_REG(BPF_JEQ, BPF_REG_0, BPF_REG_1, 2),
+               BPF_MOV32_IMM(BPF_REG_0, 1),
+               BPF_EXIT_INSN(),
+               /* exit(0); */
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+},
index 70f982e..9d0716a 100644 (file)
        },
        .result = ACCEPT,
 },
+{
+       "BPF_W atomic_fetch_or should zero top 32 bits",
+       .insns = {
+               /* r1 = U64_MAX; */
+               BPF_MOV64_IMM(BPF_REG_1, 0),
+               BPF_ALU64_IMM(BPF_SUB, BPF_REG_1, 1),
+               /* u64 val = r1; */
+               BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+               /* r1 = (u32)atomic_fetch_or((u32 *)&val, 2); */
+               BPF_MOV32_IMM(BPF_REG_1, 2),
+               BPF_ATOMIC_OP(BPF_W, BPF_OR | BPF_FETCH, BPF_REG_10, BPF_REG_1, -8),
+               /* r2 = 0x00000000FFFFFFFF; */
+               BPF_MOV64_IMM(BPF_REG_2, 1),
+               BPF_ALU64_IMM(BPF_LSH, BPF_REG_2, 32),
+               BPF_ALU64_IMM(BPF_SUB, BPF_REG_2, 1),
+               /* if (r2 != r1) exit(1); */
+               BPF_JMP_REG(BPF_JEQ, BPF_REG_2, BPF_REG_1, 2),
+               BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
+               BPF_EXIT_INSN(),
+               /* exit(0); */
+               BPF_MOV32_IMM(BPF_REG_0, 0),
+               BPF_EXIT_INSN(),
+       },
+       .result = ACCEPT,
+},
index 1fd07a4..c162498 100644 (file)
@@ -6,8 +6,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 2",
@@ -20,6 +21,8 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
                BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
        .retval = 1,
 },
@@ -31,8 +34,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 4",
@@ -45,6 +49,8 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_1, BPF_REG_0),
                BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types",
+       .result_unpriv = REJECT,
        .result = ACCEPT,
 },
 {
@@ -55,8 +61,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 6",
@@ -67,8 +74,9 @@
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 7",
@@ -80,8 +88,9 @@
                            offsetof(struct __sk_buff, mark)),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R1 tried to sub from different maps, paths, or prohibited types",
        .errstr = "dereference of modified ctx ptr",
+       .result = REJECT,
        .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
                            offsetof(struct __sk_buff, mark)),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
        .errstr = "dereference of modified ctx ptr",
+       .result = REJECT,
        .flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
 {
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
+       .errstr_unpriv = "R0 tried to sub from different maps, paths, or prohibited types",
        .errstr = "R0 tried to subtract pointer from scalar",
+       .result = REJECT,
 },
 {
        "check deducing bounds from const, 10",
                BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
                BPF_EXIT_INSN(),
        },
-       .result = REJECT,
        .errstr = "math between ctx pointer and register with unbounded min value is not allowed",
+       .result = REJECT,
 },
index b117bdd..6f610cf 100644 (file)
@@ -75,6 +75,8 @@
        BPF_EXIT_INSN(),
        },
        .fixup_map_hash_16b = { 4 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
        .result = ACCEPT,
 },
 {
@@ -91,5 +93,7 @@
        BPF_EXIT_INSN(),
        },
        .fixup_map_hash_16b = { 4 },
+       .result_unpriv = REJECT,
+       .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
        .result = ACCEPT,
 },
index b018ad7..3e32400 100644 (file)
        .result = ACCEPT,
 },
 {
-       "unpriv: adding of fp",
+       "unpriv: adding of fp, reg",
        .insns = {
        BPF_MOV64_IMM(BPF_REG_0, 0),
        BPF_MOV64_IMM(BPF_REG_1, 0),
        BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
        BPF_EXIT_INSN(),
        },
+       .errstr_unpriv = "R1 tried to add from different maps, paths, or prohibited types",
+       .result_unpriv = REJECT,
+       .result = ACCEPT,
+},
+{
+       "unpriv: adding of fp, imm",
+       .insns = {
+       BPF_MOV64_IMM(BPF_REG_0, 0),
+       BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, 0),
+       BPF_STX_MEM(BPF_DW, BPF_REG_1, BPF_REG_0, -8),
+       BPF_EXIT_INSN(),
+       },
        .errstr_unpriv = "R1 stack pointer arithmetic goes out of range",
        .result_unpriv = REJECT,
        .result = ACCEPT,
index ed4e76b..feb9126 100644 (file)
        .fixup_map_array_48b = { 1 },
        .result = ACCEPT,
        .result_unpriv = REJECT,
-       .errstr_unpriv = "R2 tried to add from different maps or paths",
+       .errstr_unpriv = "R2 tried to add from different maps, paths, or prohibited types",
        .retval = 0,
 },
 {
        .retval = 0xabcdef12,
 },
 {
+       "map access: value_ptr += N, value_ptr -= N known scalar",
+       .insns = {
+       BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+       BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+       BPF_LD_MAP_FD(BPF_REG_1, 0),
+       BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+       BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 6),
+       BPF_MOV32_IMM(BPF_REG_1, 0x12345678),
+       BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_1, 0),
+       BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 2),
+       BPF_MOV64_IMM(BPF_REG_1, 2),
+       BPF_ALU64_REG(BPF_SUB, BPF_REG_0, BPF_REG_1),
+       BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, 0),
+       BPF_EXIT_INSN(),
+       },
+       .fixup_map_array_48b = { 3 },
+       .result = ACCEPT,
+       .retval = 0x12345678,
+},
+{
        "map access: unknown scalar += value_ptr, 1",
        .insns = {
        BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
index 537d659..fb23ce9 100644 (file)
 #include <sys/mman.h>
 #include <linux/types.h>
 
+#define NSEC_PER_MSEC  1000000L
+
 #define DMA_MAP_BENCHMARK      _IOWR('d', 1, struct map_benchmark)
 #define DMA_MAP_MAX_THREADS    1024
 #define DMA_MAP_MAX_SECONDS     300
+#define DMA_MAP_MAX_TRANS_DELAY        (10 * NSEC_PER_MSEC)
 
 #define DMA_MAP_BIDIRECTIONAL  0
 #define DMA_MAP_TO_DEVICE      1
@@ -36,7 +39,8 @@ struct map_benchmark {
        __s32 node; /* which numa node this benchmark will run on */
        __u32 dma_bits; /* DMA addressing capability */
        __u32 dma_dir; /* DMA data direction */
-       __u8 expansion[84];     /* For future use */
+       __u32 dma_trans_ns; /* time for DMA transmission in ns */
+       __u8 expansion[80];     /* For future use */
 };
 
 int main(int argc, char **argv)
@@ -46,12 +50,12 @@ int main(int argc, char **argv)
        /* default single thread, run 20 seconds on NUMA_NO_NODE */
        int threads = 1, seconds = 20, node = -1;
        /* default dma mask 32bit, bidirectional DMA */
-       int bits = 32, dir = DMA_MAP_BIDIRECTIONAL;
+       int bits = 32, xdelay = 0, dir = DMA_MAP_BIDIRECTIONAL;
 
        int cmd = DMA_MAP_BENCHMARK;
        char *p;
 
-       while ((opt = getopt(argc, argv, "t:s:n:b:d:")) != -1) {
+       while ((opt = getopt(argc, argv, "t:s:n:b:d:x:")) != -1) {
                switch (opt) {
                case 't':
                        threads = atoi(optarg);
@@ -68,6 +72,9 @@ int main(int argc, char **argv)
                case 'd':
                        dir = atoi(optarg);
                        break;
+               case 'x':
+                       xdelay = atoi(optarg);
+                       break;
                default:
                        return -1;
                }
@@ -85,6 +92,12 @@ int main(int argc, char **argv)
                exit(1);
        }
 
+       if (xdelay < 0 || xdelay > DMA_MAP_MAX_TRANS_DELAY) {
+               fprintf(stderr, "invalid transmit delay, must be in 0-%ld\n",
+                       DMA_MAP_MAX_TRANS_DELAY);
+               exit(1);
+       }
+
        /* suppose the mininum DMA zone is 1MB in the world */
        if (bits < 20 || bits > 64) {
                fprintf(stderr, "invalid dma mask bit, must be in 20-64\n");
@@ -109,6 +122,8 @@ int main(int argc, char **argv)
        map.node = node;
        map.dma_bits = bits;
        map.dma_dir = dir;
+       map.dma_trans_ns = xdelay;
+
        if (ioctl(fd, cmd, &map)) {
                perror("ioctl");
                exit(1);
index 4c69408..a4969f7 100644 (file)
@@ -1,2 +1,2 @@
 # SPDX-License-Identifier: GPL-2.0-only
-gpio-mockup-chardev
+gpio-mockup-cdev
index 3a84394..7bd7e77 100644 (file)
@@ -8,10 +8,13 @@
 /x86_64/debug_regs
 /x86_64/evmcs_test
 /x86_64/get_cpuid_test
+/x86_64/get_msr_index_features
 /x86_64/kvm_pv_test
+/x86_64/hyperv_clock
 /x86_64/hyperv_cpuid
 /x86_64/mmio_warning_test
 /x86_64/platform_info_test
+/x86_64/set_boot_cpu_id
 /x86_64/set_sregs_test
 /x86_64/smm_test
 /x86_64/state_test
@@ -33,6 +36,7 @@
 /demand_paging_test
 /dirty_log_test
 /dirty_log_perf_test
+/hardware_disable_test
 /kvm_create_max_vcpus
 /memslot_modification_stress_test
 /set_memory_region_test
index 8c8eda4..67eebb5 100644 (file)
@@ -39,12 +39,15 @@ LIBKVM_aarch64 = lib/aarch64/processor.c lib/aarch64/ucall.c
 LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
 
 TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
 TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
 TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
+TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
 TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
 TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
 TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
 TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
 TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
 TEST_GEN_PROGS_x86_64 += x86_64/smm_test
 TEST_GEN_PROGS_x86_64 += x86_64/state_test
@@ -67,6 +70,7 @@ TEST_GEN_PROGS_x86_64 += x86_64/xen_vmcall_test
 TEST_GEN_PROGS_x86_64 += demand_paging_test
 TEST_GEN_PROGS_x86_64 += dirty_log_test
 TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
+TEST_GEN_PROGS_x86_64 += hardware_disable_test
 TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
 TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
 TEST_GEN_PROGS_x86_64 += set_memory_region_test
diff --git a/tools/testing/selftests/kvm/hardware_disable_test.c b/tools/testing/selftests/kvm/hardware_disable_test.c
new file mode 100644 (file)
index 0000000..5aadf84
--- /dev/null
@@ -0,0 +1,165 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * This test is intended to reproduce a crash that happens when
+ * kvm_arch_hardware_disable is called and it attempts to unregister the user
+ * return notifiers.
+ */
+
+#define _GNU_SOURCE
+
+#include <fcntl.h>
+#include <pthread.h>
+#include <semaphore.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <sys/wait.h>
+
+#include <test_util.h>
+
+#include "kvm_util.h"
+
+#define VCPU_NUM 4
+#define SLEEPING_THREAD_NUM (1 << 4)
+#define FORK_NUM (1ULL << 9)
+#define DELAY_US_MAX 2000
+#define GUEST_CODE_PIO_PORT 4
+
+sem_t *sem;
+
+/* Arguments for the pthreads */
+struct payload {
+       struct kvm_vm *vm;
+       uint32_t index;
+};
+
+static void guest_code(void)
+{
+       for (;;)
+               ;  /* Some busy work */
+       printf("Should not be reached.\n");
+}
+
+static void *run_vcpu(void *arg)
+{
+       struct payload *payload = (struct payload *)arg;
+       struct kvm_run *state = vcpu_state(payload->vm, payload->index);
+
+       vcpu_run(payload->vm, payload->index);
+
+       TEST_ASSERT(false, "%s: exited with reason %d: %s\n",
+                   __func__, state->exit_reason,
+                   exit_reason_str(state->exit_reason));
+       pthread_exit(NULL);
+}
+
+static void *sleeping_thread(void *arg)
+{
+       int fd;
+
+       while (true) {
+               fd = open("/dev/null", O_RDWR);
+               close(fd);
+       }
+       TEST_ASSERT(false, "%s: exited\n", __func__);
+       pthread_exit(NULL);
+}
+
+static inline void check_create_thread(pthread_t *thread, pthread_attr_t *attr,
+                                      void *(*f)(void *), void *arg)
+{
+       int r;
+
+       r = pthread_create(thread, attr, f, arg);
+       TEST_ASSERT(r == 0, "%s: failed to create thread", __func__);
+}
+
+static inline void check_set_affinity(pthread_t thread, cpu_set_t *cpu_set)
+{
+       int r;
+
+       r = pthread_setaffinity_np(thread, sizeof(cpu_set_t), cpu_set);
+       TEST_ASSERT(r == 0, "%s: failed set affinity", __func__);
+}
+
+static inline void check_join(pthread_t thread, void **retval)
+{
+       int r;
+
+       r = pthread_join(thread, retval);
+       TEST_ASSERT(r == 0, "%s: failed to join thread", __func__);
+}
+
+static void run_test(uint32_t run)
+{
+       struct kvm_vm *vm;
+       cpu_set_t cpu_set;
+       pthread_t threads[VCPU_NUM];
+       pthread_t throw_away;
+       struct payload payloads[VCPU_NUM];
+       void *b;
+       uint32_t i, j;
+
+       CPU_ZERO(&cpu_set);
+       for (i = 0; i < VCPU_NUM; i++)
+               CPU_SET(i, &cpu_set);
+
+       vm = vm_create(VM_MODE_DEFAULT, DEFAULT_GUEST_PHY_PAGES, O_RDWR);
+       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+       vm_create_irqchip(vm);
+
+       pr_debug("%s: [%d] start vcpus\n", __func__, run);
+       for (i = 0; i < VCPU_NUM; ++i) {
+               vm_vcpu_add_default(vm, i, guest_code);
+               payloads[i].vm = vm;
+               payloads[i].index = i;
+
+               check_create_thread(&threads[i], NULL, run_vcpu,
+                                   (void *)&payloads[i]);
+               check_set_affinity(threads[i], &cpu_set);
+
+               for (j = 0; j < SLEEPING_THREAD_NUM; ++j) {
+                       check_create_thread(&throw_away, NULL, sleeping_thread,
+                                           (void *)NULL);
+                       check_set_affinity(throw_away, &cpu_set);
+               }
+       }
+       pr_debug("%s: [%d] all threads launched\n", __func__, run);
+       sem_post(sem);
+       for (i = 0; i < VCPU_NUM; ++i)
+               check_join(threads[i], &b);
+       /* Should not be reached */
+       TEST_ASSERT(false, "%s: [%d] child escaped the ninja\n", __func__, run);
+}
+
+int main(int argc, char **argv)
+{
+       uint32_t i;
+       int s, r;
+       pid_t pid;
+
+       sem = sem_open("vm_sem", O_CREAT | O_EXCL, 0644, 0);
+       sem_unlink("vm_sem");
+
+       for (i = 0; i < FORK_NUM; ++i) {
+               pid = fork();
+               TEST_ASSERT(pid >= 0, "%s: unable to fork", __func__);
+               if (pid == 0)
+                       run_test(i); /* This function always exits */
+
+               pr_debug("%s: [%d] waiting semaphore\n", __func__, i);
+               sem_wait(sem);
+               r = (rand() % DELAY_US_MAX) + 1;
+               pr_debug("%s: [%d] waiting %dus\n", __func__, i, r);
+               usleep(r);
+               r = waitpid(pid, &s, WNOHANG);
+               TEST_ASSERT(r != pid,
+                           "%s: [%d] child exited unexpectedly status: [%d]",
+                           __func__, i, s);
+               pr_debug("%s: [%d] killing child\n", __func__, i);
+               kill(pid, SIGKILL);
+       }
+
+       sem_destroy(sem);
+       exit(0);
+}
index 2d7eb69..0f4258e 100644 (file)
@@ -16,6 +16,7 @@
 
 #include "sparsebit.h"
 
+#define KVM_DEV_PATH "/dev/kvm"
 #define KVM_MAX_VCPUS 512
 
 /*
@@ -133,6 +134,7 @@ void vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
 int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
                void *arg);
 void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
 void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
 void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
index d787cb8..b8849a1 100644 (file)
@@ -21,6 +21,8 @@
 #define KVM_UTIL_PGS_PER_HUGEPG 512
 #define KVM_UTIL_MIN_PFN       2
 
+static int vcpu_mmap_sz(void);
+
 /* Aligns x up to the next multiple of size. Size must be a power of 2. */
 static void *align(void *x, size_t size)
 {
@@ -509,7 +511,7 @@ static void vm_vcpu_rm(struct kvm_vm *vm, struct vcpu *vcpu)
                vcpu->dirty_gfns = NULL;
        }
 
-       ret = munmap(vcpu->state, sizeof(*vcpu->state));
+       ret = munmap(vcpu->state, vcpu_mmap_sz());
        TEST_ASSERT(ret == 0, "munmap of VCPU fd failed, rc: %i "
                "errno: %i", ret, errno);
        close(vcpu->fd);
@@ -978,7 +980,7 @@ void vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpuid)
        TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->state), "vcpu mmap size "
                "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi",
                vcpu_mmap_sz(), sizeof(*vcpu->state));
-       vcpu->state = (struct kvm_run *) mmap(NULL, sizeof(*vcpu->state),
+       vcpu->state = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(),
                PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0);
        TEST_ASSERT(vcpu->state != MAP_FAILED, "mmap vcpu_state failed, "
                "vcpu id: %u errno: %i", vcpuid, errno);
@@ -1695,11 +1697,16 @@ void vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
 {
        int ret;
 
-       ret = ioctl(vm->fd, cmd, arg);
+       ret = _vm_ioctl(vm, cmd, arg);
        TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
                cmd, ret, errno, strerror(errno));
 }
 
+int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+{
+       return ioctl(vm->fd, cmd, arg);
+}
+
 /*
  * KVM system ioctl
  *
index 34465dc..91ce1b5 100644 (file)
@@ -10,8 +10,6 @@
 
 #include "sparsebit.h"
 
-#define KVM_DEV_PATH           "/dev/kvm"
-
 struct userspace_mem_region {
        struct kvm_userspace_memory_region region;
        struct sparsebit *unused_phy_pages;
index de0c761..a8906e6 100644 (file)
@@ -720,7 +720,8 @@ struct kvm_cpuid2 *vcpu_get_cpuid(struct kvm_vm *vm, uint32_t vcpuid)
 {
        struct vcpu *vcpu = vcpu_find(vm, vcpuid);
        struct kvm_cpuid2 *cpuid;
-       int rc, max_ent;
+       int max_ent;
+       int rc = -1;
 
        TEST_ASSERT(vcpu != NULL, "vcpu not found, vcpuid: %u", vcpuid);
 
diff --git a/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c b/tools/testing/selftests/kvm/x86_64/get_msr_index_features.c
new file mode 100644 (file)
index 0000000..cb953df
--- /dev/null
@@ -0,0 +1,134 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_GET_MSR_INDEX_LIST and
+ * KVM_GET_MSR_FEATURE_INDEX_LIST work as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+static int kvm_num_index_msrs(int kvm_fd, int nmsrs)
+{
+       struct kvm_msr_list *list;
+       int r;
+
+       list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+       list->nmsrs = nmsrs;
+       r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+       TEST_ASSERT(r == -1 && errno == E2BIG,
+                               "Unexpected result from KVM_GET_MSR_INDEX_LIST probe, r: %i",
+                               r);
+
+       r = list->nmsrs;
+       free(list);
+       return r;
+}
+
+static void test_get_msr_index(void)
+{
+       int old_res, res, kvm_fd, r;
+       struct kvm_msr_list *list;
+
+       kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
+
+       old_res = kvm_num_index_msrs(kvm_fd, 0);
+       TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
+
+       if (old_res != 1) {
+               res = kvm_num_index_msrs(kvm_fd, 1);
+               TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
+               TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
+       }
+
+       list = malloc(sizeof(*list) + old_res * sizeof(list->indices[0]));
+       list->nmsrs = old_res;
+       r = ioctl(kvm_fd, KVM_GET_MSR_INDEX_LIST, list);
+
+       TEST_ASSERT(r == 0,
+                   "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
+                   r);
+       TEST_ASSERT(list->nmsrs == old_res, "Expecting nmsrs to be identical");
+       free(list);
+
+       close(kvm_fd);
+}
+
+static int kvm_num_feature_msrs(int kvm_fd, int nmsrs)
+{
+       struct kvm_msr_list *list;
+       int r;
+
+       list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+       list->nmsrs = nmsrs;
+       r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+       TEST_ASSERT(r == -1 && errno == E2BIG,
+               "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST probe, r: %i",
+                               r);
+
+       r = list->nmsrs;
+       free(list);
+       return r;
+}
+
+struct kvm_msr_list *kvm_get_msr_feature_list(int kvm_fd, int nmsrs)
+{
+       struct kvm_msr_list *list;
+       int r;
+
+       list = malloc(sizeof(*list) + nmsrs * sizeof(list->indices[0]));
+       list->nmsrs = nmsrs;
+       r = ioctl(kvm_fd, KVM_GET_MSR_FEATURE_INDEX_LIST, list);
+
+       TEST_ASSERT(r == 0,
+               "Unexpected result from KVM_GET_MSR_FEATURE_INDEX_LIST, r: %i",
+               r);
+
+       return list;
+}
+
+static void test_get_msr_feature(void)
+{
+       int res, old_res, i, kvm_fd;
+       struct kvm_msr_list *feature_list;
+
+       kvm_fd = open(KVM_DEV_PATH, O_RDONLY);
+       if (kvm_fd < 0)
+               exit(KSFT_SKIP);
+
+       old_res = kvm_num_feature_msrs(kvm_fd, 0);
+       TEST_ASSERT(old_res != 0, "Expecting nmsrs to be > 0");
+
+       if (old_res != 1) {
+               res = kvm_num_feature_msrs(kvm_fd, 1);
+               TEST_ASSERT(res > 1, "Expecting nmsrs to be > 1");
+               TEST_ASSERT(res == old_res, "Expecting nmsrs to be identical");
+       }
+
+       feature_list = kvm_get_msr_feature_list(kvm_fd, old_res);
+       TEST_ASSERT(old_res == feature_list->nmsrs,
+                               "Unmatching number of msr indexes");
+
+       for (i = 0; i < feature_list->nmsrs; i++)
+               kvm_get_feature_msr(feature_list->indices[i]);
+
+       free(feature_list);
+       close(kvm_fd);
+}
+
+int main(int argc, char *argv[])
+{
+       if (kvm_check_cap(KVM_CAP_GET_MSR_FEATURES))
+               test_get_msr_feature();
+
+       test_get_msr_index();
+}
diff --git a/tools/testing/selftests/kvm/x86_64/hyperv_clock.c b/tools/testing/selftests/kvm/x86_64/hyperv_clock.c
new file mode 100644 (file)
index 0000000..7f1d276
--- /dev/null
@@ -0,0 +1,269 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2021, Red Hat, Inc.
+ *
+ * Tests for Hyper-V clocksources
+ */
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+struct ms_hyperv_tsc_page {
+       volatile u32 tsc_sequence;
+       u32 reserved1;
+       volatile u64 tsc_scale;
+       volatile s64 tsc_offset;
+} __packed;
+
+#define HV_X64_MSR_GUEST_OS_ID                 0x40000000
+#define HV_X64_MSR_TIME_REF_COUNT              0x40000020
+#define HV_X64_MSR_REFERENCE_TSC               0x40000021
+#define HV_X64_MSR_TSC_FREQUENCY               0x40000022
+#define HV_X64_MSR_REENLIGHTENMENT_CONTROL     0x40000106
+#define HV_X64_MSR_TSC_EMULATION_CONTROL       0x40000107
+
+/* Simplified mul_u64_u64_shr() */
+static inline u64 mul_u64_u64_shr64(u64 a, u64 b)
+{
+       union {
+               u64 ll;
+               struct {
+                       u32 low, high;
+               } l;
+       } rm, rn, rh, a0, b0;
+       u64 c;
+
+       a0.ll = a;
+       b0.ll = b;
+
+       rm.ll = (u64)a0.l.low * b0.l.high;
+       rn.ll = (u64)a0.l.high * b0.l.low;
+       rh.ll = (u64)a0.l.high * b0.l.high;
+
+       rh.l.low = c = rm.l.high + rn.l.high + rh.l.low;
+       rh.l.high = (c >> 32) + rh.l.high;
+
+       return rh.ll;
+}
+
+static inline void nop_loop(void)
+{
+       int i;
+
+       for (i = 0; i < 1000000; i++)
+               asm volatile("nop");
+}
+
+static inline void check_tsc_msr_rdtsc(void)
+{
+       u64 tsc_freq, r1, r2, t1, t2;
+       s64 delta_ns;
+
+       tsc_freq = rdmsr(HV_X64_MSR_TSC_FREQUENCY);
+       GUEST_ASSERT(tsc_freq > 0);
+
+       /* First, check MSR-based clocksource */
+       r1 = rdtsc();
+       t1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       nop_loop();
+       r2 = rdtsc();
+       t2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+       GUEST_ASSERT(r2 > r1 && t2 > t1);
+
+       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+       if (delta_ns < 0)
+               delta_ns = -delta_ns;
+
+       /* 1% tolerance */
+       GUEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100);
+}
+
+static inline u64 get_tscpage_ts(struct ms_hyperv_tsc_page *tsc_page)
+{
+       return mul_u64_u64_shr64(rdtsc(), tsc_page->tsc_scale) + tsc_page->tsc_offset;
+}
+
+static inline void check_tsc_msr_tsc_page(struct ms_hyperv_tsc_page *tsc_page)
+{
+       u64 r1, r2, t1, t2;
+
+       /* Compare TSC page clocksource with HV_X64_MSR_TIME_REF_COUNT */
+       t1 = get_tscpage_ts(tsc_page);
+       r1 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+
+       /* 10 ms tolerance */
+       GUEST_ASSERT(r1 >= t1 && r1 - t1 < 100000);
+       nop_loop();
+
+       t2 = get_tscpage_ts(tsc_page);
+       r2 = rdmsr(HV_X64_MSR_TIME_REF_COUNT);
+       GUEST_ASSERT(r2 >= t1 && r2 - t2 < 100000);
+}
+
+static void guest_main(struct ms_hyperv_tsc_page *tsc_page, vm_paddr_t tsc_page_gpa)
+{
+       u64 tsc_scale, tsc_offset;
+
+       /* Set Guest OS id to enable Hyper-V emulation */
+       GUEST_SYNC(1);
+       wrmsr(HV_X64_MSR_GUEST_OS_ID, (u64)0x8100 << 48);
+       GUEST_SYNC(2);
+
+       check_tsc_msr_rdtsc();
+
+       GUEST_SYNC(3);
+
+       /* Set up TSC page is disabled state, check that it's clean */
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa);
+       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+       GUEST_ASSERT(tsc_page->tsc_scale == 0);
+       GUEST_ASSERT(tsc_page->tsc_offset == 0);
+
+       GUEST_SYNC(4);
+
+       /* Set up TSC page is enabled state */
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, tsc_page_gpa | 0x1);
+       GUEST_ASSERT(tsc_page->tsc_sequence != 0);
+
+       GUEST_SYNC(5);
+
+       check_tsc_msr_tsc_page(tsc_page);
+
+       GUEST_SYNC(6);
+
+       tsc_offset = tsc_page->tsc_offset;
+       /* Call KVM_SET_CLOCK from userspace, check that TSC page was updated */
+
+       GUEST_SYNC(7);
+       /* Sanity check TSC page timestamp, it should be close to 0 */
+       GUEST_ASSERT(get_tscpage_ts(tsc_page) < 100000);
+
+       GUEST_ASSERT(tsc_page->tsc_offset != tsc_offset);
+
+       nop_loop();
+
+       /*
+        * Enable Re-enlightenment and check that TSC page stays constant across
+        * KVM_SET_CLOCK.
+        */
+       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0x1 << 16 | 0xff);
+       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0x1);
+       tsc_offset = tsc_page->tsc_offset;
+       tsc_scale = tsc_page->tsc_scale;
+       GUEST_SYNC(8);
+       GUEST_ASSERT(tsc_page->tsc_offset == tsc_offset);
+       GUEST_ASSERT(tsc_page->tsc_scale == tsc_scale);
+
+       GUEST_SYNC(9);
+
+       check_tsc_msr_tsc_page(tsc_page);
+
+       /*
+        * Disable re-enlightenment and TSC page, check that KVM doesn't update
+        * it anymore.
+        */
+       wrmsr(HV_X64_MSR_REENLIGHTENMENT_CONTROL, 0);
+       wrmsr(HV_X64_MSR_TSC_EMULATION_CONTROL, 0);
+       wrmsr(HV_X64_MSR_REFERENCE_TSC, 0);
+       memset(tsc_page, 0, sizeof(*tsc_page));
+
+       GUEST_SYNC(10);
+       GUEST_ASSERT(tsc_page->tsc_sequence == 0);
+       GUEST_ASSERT(tsc_page->tsc_offset == 0);
+       GUEST_ASSERT(tsc_page->tsc_scale == 0);
+
+       GUEST_DONE();
+}
+
+#define VCPU_ID 0
+
+static void host_check_tsc_msr_rdtsc(struct kvm_vm *vm)
+{
+       u64 tsc_freq, r1, r2, t1, t2;
+       s64 delta_ns;
+
+       tsc_freq = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TSC_FREQUENCY);
+       TEST_ASSERT(tsc_freq > 0, "TSC frequency must be nonzero");
+
+       /* First, check MSR-based clocksource */
+       r1 = rdtsc();
+       t1 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+       nop_loop();
+       r2 = rdtsc();
+       t2 = vcpu_get_msr(vm, VCPU_ID, HV_X64_MSR_TIME_REF_COUNT);
+
+       TEST_ASSERT(t2 > t1, "Time reference MSR is not monotonic (%ld <= %ld)", t1, t2);
+
+       /* HV_X64_MSR_TIME_REF_COUNT is in 100ns */
+       delta_ns = ((t2 - t1) * 100) - ((r2 - r1) * 1000000000 / tsc_freq);
+       if (delta_ns < 0)
+               delta_ns = -delta_ns;
+
+       /* 1% tolerance */
+       TEST_ASSERT(delta_ns * 100 < (t2 - t1) * 100,
+                   "Elapsed time does not match (MSR=%ld, TSC=%ld)",
+                   (t2 - t1) * 100, (r2 - r1) * 1000000000 / tsc_freq);
+}
+
+int main(void)
+{
+       struct kvm_vm *vm;
+       struct kvm_run *run;
+       struct ucall uc;
+       vm_vaddr_t tsc_page_gva;
+       int stage;
+
+       vm = vm_create_default(VCPU_ID, 0, guest_main);
+       run = vcpu_state(vm, VCPU_ID);
+
+       vcpu_set_hv_cpuid(vm, VCPU_ID);
+
+       tsc_page_gva = vm_vaddr_alloc(vm, getpagesize(), 0x10000, 0, 0);
+       memset(addr_gpa2hva(vm, tsc_page_gva), 0x0, getpagesize());
+       TEST_ASSERT((addr_gva2gpa(vm, tsc_page_gva) & (getpagesize() - 1)) == 0,
+               "TSC page has to be page aligned\n");
+       vcpu_args_set(vm, VCPU_ID, 2, tsc_page_gva, addr_gva2gpa(vm, tsc_page_gva));
+
+       host_check_tsc_msr_rdtsc(vm);
+
+       for (stage = 1;; stage++) {
+               _vcpu_run(vm, VCPU_ID);
+               TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
+                           "Stage %d: unexpected exit reason: %u (%s),\n",
+                           stage, run->exit_reason,
+                           exit_reason_str(run->exit_reason));
+
+               switch (get_ucall(vm, VCPU_ID, &uc)) {
+               case UCALL_ABORT:
+                       TEST_FAIL("%s at %s:%ld", (const char *)uc.args[0],
+                                 __FILE__, uc.args[1]);
+                       /* NOT REACHED */
+               case UCALL_SYNC:
+                       break;
+               case UCALL_DONE:
+                       /* Keep in sync with guest_main() */
+                       TEST_ASSERT(stage == 11, "Testing ended prematurely, stage %d\n",
+                                   stage);
+                       goto out;
+               default:
+                       TEST_FAIL("Unknown ucall %lu", uc.cmd);
+               }
+
+               TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                           uc.args[1] == stage,
+                           "Stage %d: Unexpected register values vmexit, got %lx",
+                           stage, (ulong)uc.args[1]);
+
+               /* Reset kvmclock triggering TSC page update */
+               if (stage == 7 || stage == 8 || stage == 10) {
+                       struct kvm_clock_data clock = {0};
+
+                       vm_ioctl(vm, KVM_SET_CLOCK, &clock);
+               }
+       }
+
+out:
+       kvm_vm_free(vm);
+}
diff --git a/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c b/tools/testing/selftests/kvm/x86_64/set_boot_cpu_id.c
new file mode 100644 (file)
index 0000000..12c558f
--- /dev/null
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Test that KVM_SET_BOOT_CPU_ID works as intended
+ *
+ * Copyright (C) 2020, Red Hat, Inc.
+ */
+#define _GNU_SOURCE /* for program_invocation_name */
+#include <fcntl.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+
+#include "test_util.h"
+#include "kvm_util.h"
+#include "processor.h"
+
+#define N_VCPU 2
+#define VCPU_ID0 0
+#define VCPU_ID1 1
+
+static uint32_t get_bsp_flag(void)
+{
+       return rdmsr(MSR_IA32_APICBASE) & MSR_IA32_APICBASE_BSP;
+}
+
+static void guest_bsp_vcpu(void *arg)
+{
+       GUEST_SYNC(1);
+
+       GUEST_ASSERT(get_bsp_flag() != 0);
+
+       GUEST_DONE();
+}
+
+static void guest_not_bsp_vcpu(void *arg)
+{
+       GUEST_SYNC(1);
+
+       GUEST_ASSERT(get_bsp_flag() == 0);
+
+       GUEST_DONE();
+}
+
+static void test_set_boot_busy(struct kvm_vm *vm)
+{
+       int res;
+
+       res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID0);
+       TEST_ASSERT(res == -1 && errno == EBUSY,
+                       "KVM_SET_BOOT_CPU_ID set while running vm");
+}
+
+static void run_vcpu(struct kvm_vm *vm, uint32_t vcpuid)
+{
+       struct ucall uc;
+       int stage;
+
+       for (stage = 0; stage < 2; stage++) {
+
+               vcpu_run(vm, vcpuid);
+
+               switch (get_ucall(vm, vcpuid, &uc)) {
+               case UCALL_SYNC:
+                       TEST_ASSERT(!strcmp((const char *)uc.args[0], "hello") &&
+                                       uc.args[1] == stage + 1,
+                                       "Stage %d: Unexpected register values vmexit, got %lx",
+                                       stage + 1, (ulong)uc.args[1]);
+                       test_set_boot_busy(vm);
+                       break;
+               case UCALL_DONE:
+                       TEST_ASSERT(stage == 1,
+                                       "Expected GUEST_DONE in stage 2, got stage %d",
+                                       stage);
+                       break;
+               case UCALL_ABORT:
+                       TEST_ASSERT(false, "%s at %s:%ld\n\tvalues: %#lx, %#lx",
+                                               (const char *)uc.args[0], __FILE__,
+                                               uc.args[1], uc.args[2], uc.args[3]);
+               default:
+                       TEST_ASSERT(false, "Unexpected exit: %s",
+                                       exit_reason_str(vcpu_state(vm, vcpuid)->exit_reason));
+               }
+       }
+}
+
+static struct kvm_vm *create_vm(void)
+{
+       struct kvm_vm *vm;
+       uint64_t vcpu_pages = (DEFAULT_STACK_PGS) * 2;
+       uint64_t extra_pg_pages = vcpu_pages / PTES_PER_MIN_PAGE * N_VCPU;
+       uint64_t pages = DEFAULT_GUEST_PHY_PAGES + vcpu_pages + extra_pg_pages;
+
+       pages = vm_adjust_num_guest_pages(VM_MODE_DEFAULT, pages);
+       vm = vm_create(VM_MODE_DEFAULT, pages, O_RDWR);
+
+       kvm_vm_elf_load(vm, program_invocation_name, 0, 0);
+       vm_create_irqchip(vm);
+
+       return vm;
+}
+
+static void add_x86_vcpu(struct kvm_vm *vm, uint32_t vcpuid, bool bsp_code)
+{
+       if (bsp_code)
+               vm_vcpu_add_default(vm, vcpuid, guest_bsp_vcpu);
+       else
+               vm_vcpu_add_default(vm, vcpuid, guest_not_bsp_vcpu);
+
+       vcpu_set_cpuid(vm, vcpuid, kvm_get_supported_cpuid());
+}
+
+static void run_vm_bsp(uint32_t bsp_vcpu)
+{
+       struct kvm_vm *vm;
+       bool is_bsp_vcpu1 = bsp_vcpu == VCPU_ID1;
+
+       vm = create_vm();
+
+       if (is_bsp_vcpu1)
+               vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+
+       add_x86_vcpu(vm, VCPU_ID0, !is_bsp_vcpu1);
+       add_x86_vcpu(vm, VCPU_ID1, is_bsp_vcpu1);
+
+       run_vcpu(vm, VCPU_ID0);
+       run_vcpu(vm, VCPU_ID1);
+
+       kvm_vm_free(vm);
+}
+
+static void check_set_bsp_busy(void)
+{
+       struct kvm_vm *vm;
+       int res;
+
+       vm = create_vm();
+
+       add_x86_vcpu(vm, VCPU_ID0, true);
+       add_x86_vcpu(vm, VCPU_ID1, false);
+
+       res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+       TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set after adding vcpu");
+
+       run_vcpu(vm, VCPU_ID0);
+       run_vcpu(vm, VCPU_ID1);
+
+       res = _vm_ioctl(vm, KVM_SET_BOOT_CPU_ID, (void *) VCPU_ID1);
+       TEST_ASSERT(res == -1 && errno == EBUSY, "KVM_SET_BOOT_CPU_ID set to a terminated vcpu");
+
+       kvm_vm_free(vm);
+}
+
+int main(int argc, char *argv[])
+{
+       if (!kvm_check_cap(KVM_CAP_SET_BOOT_CPU_ID)) {
+               print_skip("set_boot_cpu_id not available");
+               return 0;
+       }
+
+       run_vm_bsp(VCPU_ID0);
+       run_vm_bsp(VCPU_ID1);
+       run_vm_bsp(VCPU_ID0);
+
+       check_set_bsp_busy();
+}
index 9246ea3..804ff5f 100644 (file)
 
 #include <stdint.h>
 #include <time.h>
+#include <sched.h>
+#include <sys/syscall.h>
 
 #define VCPU_ID                5
 
+#define SHINFO_REGION_GVA      0xc0000000ULL
 #define SHINFO_REGION_GPA      0xc0000000ULL
 #define SHINFO_REGION_SLOT     10
 #define PAGE_SIZE              4096
 
 #define PVTIME_ADDR    (SHINFO_REGION_GPA + PAGE_SIZE)
+#define RUNSTATE_ADDR  (SHINFO_REGION_GPA + PAGE_SIZE + 0x20)
+
+#define RUNSTATE_VADDR (SHINFO_REGION_GVA + PAGE_SIZE + 0x20)
 
 static struct kvm_vm *vm;
 
 #define XEN_HYPERCALL_MSR      0x40000000
 
+#define MIN_STEAL_TIME         50000
+
 struct pvclock_vcpu_time_info {
         u32   version;
         u32   pad0;
@@ -43,11 +51,67 @@ struct pvclock_wall_clock {
         u32   nsec;
 } __attribute__((__packed__));
 
+struct vcpu_runstate_info {
+    uint32_t state;
+    uint64_t state_entry_time;
+    uint64_t time[4];
+};
+
+#define RUNSTATE_running  0
+#define RUNSTATE_runnable 1
+#define RUNSTATE_blocked  2
+#define RUNSTATE_offline  3
+
 static void guest_code(void)
 {
+       struct vcpu_runstate_info *rs = (void *)RUNSTATE_VADDR;
+
+       /* Test having the host set runstates manually */
+       GUEST_SYNC(RUNSTATE_runnable);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(RUNSTATE_blocked);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       GUEST_SYNC(RUNSTATE_offline);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] != 0);
+       GUEST_ASSERT(rs->state == 0);
+
+       /* Test runstate time adjust */
+       GUEST_SYNC(4);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x5a);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x6b6b);
+
+       /* Test runstate time set */
+       GUEST_SYNC(5);
+       GUEST_ASSERT(rs->state_entry_time >= 0x8000);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] == 0);
+       GUEST_ASSERT(rs->time[RUNSTATE_blocked] == 0x6b6b);
+       GUEST_ASSERT(rs->time[RUNSTATE_offline] == 0x5a);
+
+       /* sched_yield() should result in some 'runnable' time */
+       GUEST_SYNC(6);
+       GUEST_ASSERT(rs->time[RUNSTATE_runnable] >= MIN_STEAL_TIME);
+
        GUEST_DONE();
 }
 
+static long get_run_delay(void)
+{
+        char path[64];
+        long val[2];
+        FILE *fp;
+
+        sprintf(path, "/proc/%ld/schedstat", syscall(SYS_gettid));
+        fp = fopen(path, "r");
+        fscanf(fp, "%ld %ld ", &val[0], &val[1]);
+        fclose(fp);
+
+        return val[1];
+}
+
 static int cmp_timespec(struct timespec *a, struct timespec *b)
 {
        if (a->tv_sec > b->tv_sec)
@@ -66,12 +130,14 @@ int main(int argc, char *argv[])
 {
        struct timespec min_ts, max_ts, vm_ts;
 
-       if (!(kvm_check_cap(KVM_CAP_XEN_HVM) &
-             KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
+       int xen_caps = kvm_check_cap(KVM_CAP_XEN_HVM);
+       if (!(xen_caps & KVM_XEN_HVM_CONFIG_SHARED_INFO) ) {
                print_skip("KVM_XEN_HVM_CONFIG_SHARED_INFO not available");
                exit(KSFT_SKIP);
        }
 
+       bool do_runstate_tests = !!(xen_caps & KVM_XEN_HVM_CONFIG_RUNSTATE);
+
        clock_gettime(CLOCK_REALTIME, &min_ts);
 
        vm = vm_create_default(VCPU_ID, 0, (void *) guest_code);
@@ -80,6 +146,7 @@ int main(int argc, char *argv[])
        /* Map a region for the shared_info page */
        vm_userspace_mem_region_add(vm, VM_MEM_SRC_ANONYMOUS,
                                    SHINFO_REGION_GPA, SHINFO_REGION_SLOT, 2, 0);
+       virt_map(vm, SHINFO_REGION_GVA, SHINFO_REGION_GPA, 2, 0);
 
        struct kvm_xen_hvm_config hvmc = {
                .flags = KVM_XEN_HVM_CONFIG_INTERCEPT_HCALL,
@@ -111,6 +178,17 @@ int main(int argc, char *argv[])
        };
        vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &pvclock);
 
+       if (do_runstate_tests) {
+               struct kvm_xen_vcpu_attr st = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR,
+                       .u.gpa = RUNSTATE_ADDR,
+               };
+               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &st);
+       }
+
+       struct vcpu_runstate_info *rs = addr_gpa2hva(vm, RUNSTATE_ADDR);;
+       rs->state = 0x5a;
+
        for (;;) {
                volatile struct kvm_run *run = vcpu_state(vm, VCPU_ID);
                struct ucall uc;
@@ -126,8 +204,56 @@ int main(int argc, char *argv[])
                case UCALL_ABORT:
                        TEST_FAIL("%s", (const char *)uc.args[0]);
                        /* NOT REACHED */
-               case UCALL_SYNC:
+               case UCALL_SYNC: {
+                       struct kvm_xen_vcpu_attr rst;
+                       long rundelay;
+
+                       /* If no runstate support, bail out early */
+                       if (!do_runstate_tests)
+                               goto done;
+
+                       TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                                   rs->time[1] + rs->time[2] + rs->time[3],
+                                   "runstate times don't add up");
+
+                       switch (uc.args[1]) {
+                       case RUNSTATE_running...RUNSTATE_offline:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_CURRENT;
+                               rst.u.runstate.state = uc.args[1];
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+                       case 4:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUST;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = (uint64_t)-1;
+                               rst.u.runstate.time_blocked =
+                                       0x5a - rs->time[RUNSTATE_blocked];
+                               rst.u.runstate.time_offline =
+                                       0x6b6b - rs->time[RUNSTATE_offline];
+                               rst.u.runstate.time_runnable = -rst.u.runstate.time_blocked -
+                                       rst.u.runstate.time_offline;
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+
+                       case 5:
+                               rst.type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA;
+                               memset(&rst.u, 0, sizeof(rst.u));
+                               rst.u.runstate.state = RUNSTATE_running;
+                               rst.u.runstate.state_entry_time = 0x6b6b + 0x5a;
+                               rst.u.runstate.time_blocked = 0x6b6b;
+                               rst.u.runstate.time_offline = 0x5a;
+                               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_SET_ATTR, &rst);
+                               break;
+                       case 6:
+                               /* Yield until scheduler delay exceeds target */
+                               rundelay = get_run_delay() + MIN_STEAL_TIME;
+                               do {
+                                       sched_yield();
+                               } while (get_run_delay() < rundelay);
+                               break;
+                       }
                        break;
+               }
                case UCALL_DONE:
                        goto done;
                default:
@@ -162,6 +288,33 @@ int main(int argc, char *argv[])
        TEST_ASSERT(ti2->version && !(ti2->version & 1),
                    "Bad time_info version %x", ti->version);
 
+       if (do_runstate_tests) {
+               /*
+                * Fetch runstate and check sanity. Strictly speaking in the
+                * general case we might not expect the numbers to be identical
+                * but in this case we know we aren't running the vCPU any more.
+                */
+               struct kvm_xen_vcpu_attr rst = {
+                       .type = KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_DATA,
+               };
+               vcpu_ioctl(vm, VCPU_ID, KVM_XEN_VCPU_GET_ATTR, &rst);
+
+               TEST_ASSERT(rs->state == rst.u.runstate.state, "Runstate mismatch");
+               TEST_ASSERT(rs->state_entry_time == rst.u.runstate.state_entry_time,
+                           "State entry time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_running] == rst.u.runstate.time_running,
+                           "Running time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_runnable] == rst.u.runstate.time_runnable,
+                           "Runnable time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_blocked] == rst.u.runstate.time_blocked,
+                           "Blocked time mismatch");
+               TEST_ASSERT(rs->time[RUNSTATE_offline] == rst.u.runstate.time_offline,
+                           "Offline time mismatch");
+
+               TEST_ASSERT(rs->state_entry_time == rs->time[0] +
+                           rs->time[1] + rs->time[2] + rs->time[3],
+                           "runstate times don't add up");
+       }
        kvm_vm_free(vm);
        return 0;
 }
diff --git a/tools/testing/selftests/mount_setattr/.gitignore b/tools/testing/selftests/mount_setattr/.gitignore
new file mode 100644 (file)
index 0000000..5f74d84
--- /dev/null
@@ -0,0 +1 @@
+mount_setattr_test
diff --git a/tools/testing/selftests/mount_setattr/Makefile b/tools/testing/selftests/mount_setattr/Makefile
new file mode 100644 (file)
index 0000000..2250f7d
--- /dev/null
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0
+# Makefile for mount selftests.
+CFLAGS = -g -I../../../../usr/include/ -Wall -O2 -pthread
+
+TEST_GEN_FILES += mount_setattr_test
+
+include ../lib.mk
diff --git a/tools/testing/selftests/mount_setattr/config b/tools/testing/selftests/mount_setattr/config
new file mode 100644 (file)
index 0000000..416bd53
--- /dev/null
@@ -0,0 +1 @@
+CONFIG_USER_NS=y
diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c
new file mode 100644 (file)
index 0000000..4e94e56
--- /dev/null
@@ -0,0 +1,1424 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <sched.h>
+#include <stdio.h>
+#include <errno.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <sys/mount.h>
+#include <sys/wait.h>
+#include <sys/vfs.h>
+#include <sys/statvfs.h>
+#include <sys/sysinfo.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <fcntl.h>
+#include <grp.h>
+#include <stdbool.h>
+#include <stdarg.h>
+
+#include "../kselftest_harness.h"
+
+#ifndef CLONE_NEWNS
+#define CLONE_NEWNS 0x00020000
+#endif
+
+#ifndef CLONE_NEWUSER
+#define CLONE_NEWUSER 0x10000000
+#endif
+
+#ifndef MS_REC
+#define MS_REC 16384
+#endif
+
+#ifndef MS_RELATIME
+#define MS_RELATIME (1 << 21)
+#endif
+
+#ifndef MS_STRICTATIME
+#define MS_STRICTATIME (1 << 24)
+#endif
+
+#ifndef MOUNT_ATTR_RDONLY
+#define MOUNT_ATTR_RDONLY 0x00000001
+#endif
+
+#ifndef MOUNT_ATTR_NOSUID
+#define MOUNT_ATTR_NOSUID 0x00000002
+#endif
+
+#ifndef MOUNT_ATTR_NOEXEC
+#define MOUNT_ATTR_NOEXEC 0x00000008
+#endif
+
+#ifndef MOUNT_ATTR_NODIRATIME
+#define MOUNT_ATTR_NODIRATIME 0x00000080
+#endif
+
+#ifndef MOUNT_ATTR__ATIME
+#define MOUNT_ATTR__ATIME 0x00000070
+#endif
+
+#ifndef MOUNT_ATTR_RELATIME
+#define MOUNT_ATTR_RELATIME 0x00000000
+#endif
+
+#ifndef MOUNT_ATTR_NOATIME
+#define MOUNT_ATTR_NOATIME 0x00000010
+#endif
+
+#ifndef MOUNT_ATTR_STRICTATIME
+#define MOUNT_ATTR_STRICTATIME 0x00000020
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000
+#endif
+
+#ifndef MS_SHARED
+#define MS_SHARED (1 << 20)
+#endif
+
+#define DEFAULT_THREADS 4
+#define ptr_to_int(p) ((int)((intptr_t)(p)))
+#define int_to_ptr(u) ((void *)((intptr_t)(u)))
+
+#ifndef __NR_mount_setattr
+       #if defined __alpha__
+               #define __NR_mount_setattr 552
+       #elif defined _MIPS_SIM
+               #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
+                       #define __NR_mount_setattr (442 + 4000)
+               #endif
+               #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
+                       #define __NR_mount_setattr (442 + 6000)
+               #endif
+               #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
+                       #define __NR_mount_setattr (442 + 5000)
+               #endif
+       #elif defined __ia64__
+               #define __NR_mount_setattr (442 + 1024)
+       #else
+               #define __NR_mount_setattr 442
+       #endif
+
+struct mount_attr {
+       __u64 attr_set;
+       __u64 attr_clr;
+       __u64 propagation;
+       __u64 userns_fd;
+};
+#endif
+
+#ifndef __NR_open_tree
+       #if defined __alpha__
+               #define __NR_open_tree 538
+       #elif defined _MIPS_SIM
+               #if _MIPS_SIM == _MIPS_SIM_ABI32        /* o32 */
+                       #define __NR_open_tree 4428
+               #endif
+               #if _MIPS_SIM == _MIPS_SIM_NABI32       /* n32 */
+                       #define __NR_open_tree 6428
+               #endif
+               #if _MIPS_SIM == _MIPS_SIM_ABI64        /* n64 */
+                       #define __NR_open_tree 5428
+               #endif
+       #elif defined __ia64__
+               #define __NR_open_tree (428 + 1024)
+       #else
+               #define __NR_open_tree 428
+       #endif
+#endif
+
+#ifndef MOUNT_ATTR_IDMAP
+#define MOUNT_ATTR_IDMAP 0x00100000
+#endif
+
+static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flags,
+                                   struct mount_attr *attr, size_t size)
+{
+       return syscall(__NR_mount_setattr, dfd, path, flags, attr, size);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+       return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+static ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+       ssize_t ret;
+
+       do {
+               ret = write(fd, buf, count);
+       } while (ret < 0 && errno == EINTR);
+
+       return ret;
+}
+
+static int write_file(const char *path, const void *buf, size_t count)
+{
+       int fd;
+       ssize_t ret;
+
+       fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW);
+       if (fd < 0)
+               return -1;
+
+       ret = write_nointr(fd, buf, count);
+       close(fd);
+       if (ret < 0 || (size_t)ret != count)
+               return -1;
+
+       return 0;
+}
+
+static int create_and_enter_userns(void)
+{
+       uid_t uid;
+       gid_t gid;
+       char map[100];
+
+       uid = getuid();
+       gid = getgid();
+
+       if (unshare(CLONE_NEWUSER))
+               return -1;
+
+       if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) &&
+           errno != ENOENT)
+               return -1;
+
+       snprintf(map, sizeof(map), "0 %d 1", uid);
+       if (write_file("/proc/self/uid_map", map, strlen(map)))
+               return -1;
+
+
+       snprintf(map, sizeof(map), "0 %d 1", gid);
+       if (write_file("/proc/self/gid_map", map, strlen(map)))
+               return -1;
+
+       if (setgid(0))
+               return -1;
+
+       if (setuid(0))
+               return -1;
+
+       return 0;
+}
+
+static int prepare_unpriv_mountns(void)
+{
+       if (create_and_enter_userns())
+               return -1;
+
+       if (unshare(CLONE_NEWNS))
+               return -1;
+
+       if (mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0))
+               return -1;
+
+       return 0;
+}
+
+static int read_mnt_flags(const char *path)
+{
+       int ret;
+       struct statvfs stat;
+       unsigned int mnt_flags;
+
+       ret = statvfs(path, &stat);
+       if (ret != 0)
+               return -EINVAL;
+
+       if (stat.f_flag &
+           ~(ST_RDONLY | ST_NOSUID | ST_NODEV | ST_NOEXEC | ST_NOATIME |
+             ST_NODIRATIME | ST_RELATIME | ST_SYNCHRONOUS | ST_MANDLOCK))
+               return -EINVAL;
+
+       mnt_flags = 0;
+       if (stat.f_flag & ST_RDONLY)
+               mnt_flags |= MS_RDONLY;
+       if (stat.f_flag & ST_NOSUID)
+               mnt_flags |= MS_NOSUID;
+       if (stat.f_flag & ST_NODEV)
+               mnt_flags |= MS_NODEV;
+       if (stat.f_flag & ST_NOEXEC)
+               mnt_flags |= MS_NOEXEC;
+       if (stat.f_flag & ST_NOATIME)
+               mnt_flags |= MS_NOATIME;
+       if (stat.f_flag & ST_NODIRATIME)
+               mnt_flags |= MS_NODIRATIME;
+       if (stat.f_flag & ST_RELATIME)
+               mnt_flags |= MS_RELATIME;
+       if (stat.f_flag & ST_SYNCHRONOUS)
+               mnt_flags |= MS_SYNCHRONOUS;
+       if (stat.f_flag & ST_MANDLOCK)
+               mnt_flags |= ST_MANDLOCK;
+
+       return mnt_flags;
+}
+
+static char *get_field(char *src, int nfields)
+{
+       int i;
+       char *p = src;
+
+       for (i = 0; i < nfields; i++) {
+               while (*p && *p != ' ' && *p != '\t')
+                       p++;
+
+               if (!*p)
+                       break;
+
+               p++;
+       }
+
+       return p;
+}
+
+static void null_endofword(char *word)
+{
+       while (*word && *word != ' ' && *word != '\t')
+               word++;
+       *word = '\0';
+}
+
+static bool is_shared_mount(const char *path)
+{
+       size_t len = 0;
+       char *line = NULL;
+       FILE *f = NULL;
+
+       f = fopen("/proc/self/mountinfo", "re");
+       if (!f)
+               return false;
+
+       while (getline(&line, &len, f) != -1) {
+               char *opts, *target;
+
+               target = get_field(line, 4);
+               if (!target)
+                       continue;
+
+               opts = get_field(target, 2);
+               if (!opts)
+                       continue;
+
+               null_endofword(target);
+
+               if (strcmp(target, path) != 0)
+                       continue;
+
+               null_endofword(opts);
+               if (strstr(opts, "shared:"))
+                       return true;
+       }
+
+       free(line);
+       fclose(f);
+
+       return false;
+}
+
+static void *mount_setattr_thread(void *data)
+{
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID,
+               .attr_clr       = 0,
+               .propagation    = MS_SHARED,
+       };
+
+       if (sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)))
+               pthread_exit(int_to_ptr(-1));
+
+       pthread_exit(int_to_ptr(0));
+}
+
+/* Attempt to de-conflict with the selftests tree. */
+#ifndef SKIP
+#define SKIP(s, ...)   XFAIL(s, ##__VA_ARGS__)
+#endif
+
+static bool mount_setattr_supported(void)
+{
+       int ret;
+
+       ret = sys_mount_setattr(-EBADF, "", AT_EMPTY_PATH, NULL, 0);
+       if (ret < 0 && errno == ENOSYS)
+               return false;
+
+       return true;
+}
+
+FIXTURE(mount_setattr) {
+};
+
+FIXTURE_SETUP(mount_setattr)
+{
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       ASSERT_EQ(prepare_unpriv_mountns(), 0);
+
+       (void)umount2("/mnt", MNT_DETACH);
+       (void)umount2("/tmp", MNT_DETACH);
+
+       ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+       ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+       ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+                       MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+       ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+                       MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr)
+{
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       (void)umount2("/mnt/A", MNT_DETACH);
+       (void)umount2("/tmp", MNT_DETACH);
+}
+
+TEST_F(mount_setattr, invalid_attributes)
+{
+       struct mount_attr invalid_attr = {
+               .attr_set = (1U << 31),
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr)), 0);
+
+       invalid_attr.attr_set   = 0;
+       invalid_attr.attr_clr   = (1U << 31);
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr)), 0);
+
+       invalid_attr.attr_clr           = 0;
+       invalid_attr.propagation        = (1U << 31);
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr)), 0);
+
+       invalid_attr.attr_set           = (1U << 31);
+       invalid_attr.attr_clr           = (1U << 31);
+       invalid_attr.propagation        = (1U << 31);
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr)), 0);
+
+       ASSERT_NE(sys_mount_setattr(-1, "mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr)), 0);
+}
+
+TEST_F(mount_setattr, extensibility)
+{
+       unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+       char *s = "dummy";
+       struct mount_attr invalid_attr = {};
+       struct mount_attr_large {
+               struct mount_attr attr1;
+               struct mount_attr attr2;
+               struct mount_attr attr3;
+       } large_attr = {};
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       old_flags = read_mnt_flags("/mnt/A");
+       ASSERT_GT(old_flags, 0);
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, NULL,
+                                   sizeof(invalid_attr)), 0);
+       ASSERT_EQ(errno, EFAULT);
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, (void *)s,
+                                   sizeof(invalid_attr)), 0);
+       ASSERT_EQ(errno, EINVAL);
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr, 0), 0);
+       ASSERT_EQ(errno, EINVAL);
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr) / 2), 0);
+       ASSERT_EQ(errno, EINVAL);
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &invalid_attr,
+                                   sizeof(invalid_attr) / 2), 0);
+       ASSERT_EQ(errno, EINVAL);
+
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+                                   (void *)&large_attr, sizeof(large_attr)), 0);
+
+       large_attr.attr3.attr_set = MOUNT_ATTR_RDONLY;
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+                                   (void *)&large_attr, sizeof(large_attr)), 0);
+
+       large_attr.attr3.attr_set = 0;
+       large_attr.attr1.attr_set = MOUNT_ATTR_RDONLY;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE,
+                                   (void *)&large_attr, sizeof(large_attr)), 0);
+
+       expected_flags = old_flags;
+       expected_flags |= MS_RDONLY;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, basic)
+{
+       unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+               .attr_clr       = MOUNT_ATTR__ATIME,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       old_flags = read_mnt_flags("/mnt/A");
+       ASSERT_GT(old_flags, 0);
+
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", 0, &attr, sizeof(attr)), 0);
+
+       expected_flags = old_flags;
+       expected_flags |= MS_RDONLY;
+       expected_flags |= MS_NOEXEC;
+       expected_flags &= ~MS_NOATIME;
+       expected_flags |= MS_RELATIME;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, old_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, old_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, old_flags);
+}
+
+TEST_F(mount_setattr, basic_recursive)
+{
+       int fd;
+       unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+               .attr_clr       = MOUNT_ATTR__ATIME,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       old_flags = read_mnt_flags("/mnt/A");
+       ASSERT_GT(old_flags, 0);
+
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags = old_flags;
+       expected_flags |= MS_RDONLY;
+       expected_flags |= MS_NOEXEC;
+       expected_flags &= ~MS_NOATIME;
+       expected_flags |= MS_RELATIME;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       memset(&attr, 0, sizeof(attr));
+       attr.attr_clr = MOUNT_ATTR_RDONLY;
+       attr.propagation = MS_SHARED;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags &= ~MS_RDONLY;
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+       fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+       ASSERT_GE(fd, 0);
+
+       /*
+        * We're holding a fd open for writing so this needs to fail somewhere
+        * in the middle and the mount options need to be unchanged.
+        */
+       attr.attr_set = MOUNT_ATTR_RDONLY;
+       ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+
+       EXPECT_EQ(close(fd), 0);
+}
+
+TEST_F(mount_setattr, mount_has_writers)
+{
+       int fd, dfd;
+       unsigned int old_flags = 0, new_flags = 0;
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOEXEC | MOUNT_ATTR_RELATIME,
+               .attr_clr       = MOUNT_ATTR__ATIME,
+               .propagation    = MS_SHARED,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       old_flags = read_mnt_flags("/mnt/A");
+       ASSERT_GT(old_flags, 0);
+
+       fd = open("/mnt/A/AA/B/b", O_RDWR | O_CLOEXEC | O_CREAT | O_EXCL, 0777);
+       ASSERT_GE(fd, 0);
+
+       /*
+        * We're holding a fd open to a mount somwhere in the middle so this
+        * needs to fail somewhere in the middle. After this the mount options
+        * need to be unchanged.
+        */
+       ASSERT_LT(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, old_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A"), false);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, old_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA"), false);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, old_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), false);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, old_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), false);
+
+       dfd = open("/mnt/A/AA/B", O_DIRECTORY | O_CLOEXEC);
+       ASSERT_GE(dfd, 0);
+       EXPECT_EQ(fsync(dfd), 0);
+       EXPECT_EQ(close(dfd), 0);
+
+       EXPECT_EQ(fsync(fd), 0);
+       EXPECT_EQ(close(fd), 0);
+
+       /* All writers are gone so this should succeed. */
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+}
+
+TEST_F(mount_setattr, mixed_mount_options)
+{
+       unsigned int old_flags1 = 0, old_flags2 = 0, new_flags = 0, expected_flags = 0;
+       struct mount_attr attr = {
+               .attr_clr = MOUNT_ATTR_RDONLY | MOUNT_ATTR_NOSUID | MOUNT_ATTR_NOEXEC | MOUNT_ATTR__ATIME,
+               .attr_set = MOUNT_ATTR_RELATIME,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       old_flags1 = read_mnt_flags("/mnt/B");
+       ASSERT_GT(old_flags1, 0);
+
+       old_flags2 = read_mnt_flags("/mnt/B/BB");
+       ASSERT_GT(old_flags2, 0);
+
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/B", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags = old_flags2;
+       expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+       expected_flags |= MS_RELATIME;
+
+       new_flags = read_mnt_flags("/mnt/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       expected_flags = old_flags2;
+       expected_flags &= ~(MS_RDONLY | MS_NOEXEC | MS_NOATIME | MS_NOSUID);
+       expected_flags |= MS_RELATIME;
+
+       new_flags = read_mnt_flags("/mnt/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, time_changes)
+{
+       unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       attr.attr_set = MOUNT_ATTR_STRICTATIME;
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       attr.attr_set = MOUNT_ATTR_STRICTATIME | MOUNT_ATTR_NOATIME;
+       attr.attr_clr = MOUNT_ATTR__ATIME;
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       attr.attr_set = 0;
+       attr.attr_clr = MOUNT_ATTR_STRICTATIME;
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       attr.attr_clr = MOUNT_ATTR_NOATIME;
+       ASSERT_NE(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       old_flags = read_mnt_flags("/mnt/A");
+       ASSERT_GT(old_flags, 0);
+
+       attr.attr_set = MOUNT_ATTR_NODIRATIME | MOUNT_ATTR_NOATIME;
+       attr.attr_clr = MOUNT_ATTR__ATIME;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags = old_flags;
+       expected_flags |= MS_NOATIME;
+       expected_flags |= MS_NODIRATIME;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       memset(&attr, 0, sizeof(attr));
+       attr.attr_set &= ~MOUNT_ATTR_NOATIME;
+       attr.attr_set |= MOUNT_ATTR_RELATIME;
+       attr.attr_clr |= MOUNT_ATTR__ATIME;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags &= ~MS_NOATIME;
+       expected_flags |= MS_RELATIME;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       memset(&attr, 0, sizeof(attr));
+       attr.attr_set &= ~MOUNT_ATTR_RELATIME;
+       attr.attr_set |= MOUNT_ATTR_STRICTATIME;
+       attr.attr_clr |= MOUNT_ATTR__ATIME;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags &= ~MS_RELATIME;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       memset(&attr, 0, sizeof(attr));
+       attr.attr_set &= ~MOUNT_ATTR_STRICTATIME;
+       attr.attr_set |= MOUNT_ATTR_NOATIME;
+       attr.attr_clr |= MOUNT_ATTR__ATIME;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags |= MS_NOATIME;
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       memset(&attr, 0, sizeof(attr));
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       memset(&attr, 0, sizeof(attr));
+       attr.attr_clr = MOUNT_ATTR_NODIRATIME;
+       ASSERT_EQ(sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr)), 0);
+
+       expected_flags &= ~MS_NODIRATIME;
+
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+}
+
+TEST_F(mount_setattr, multi_threaded)
+{
+       int i, j, nthreads, ret = 0;
+       unsigned int old_flags = 0, new_flags = 0, expected_flags = 0;
+       pthread_attr_t pattr;
+       pthread_t threads[DEFAULT_THREADS];
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       old_flags = read_mnt_flags("/mnt/A");
+       ASSERT_GT(old_flags, 0);
+
+       /* Try to change mount options from multiple threads. */
+       nthreads = get_nprocs_conf();
+       if (nthreads > DEFAULT_THREADS)
+               nthreads = DEFAULT_THREADS;
+
+       pthread_attr_init(&pattr);
+       for (i = 0; i < nthreads; i++)
+               ASSERT_EQ(pthread_create(&threads[i], &pattr, mount_setattr_thread, NULL), 0);
+
+       for (j = 0; j < i; j++) {
+               void *retptr = NULL;
+
+               EXPECT_EQ(pthread_join(threads[j], &retptr), 0);
+
+               ret += ptr_to_int(retptr);
+               EXPECT_EQ(ret, 0);
+       }
+       pthread_attr_destroy(&pattr);
+
+       ASSERT_EQ(ret, 0);
+
+       expected_flags = old_flags;
+       expected_flags |= MS_RDONLY;
+       expected_flags |= MS_NOSUID;
+       new_flags = read_mnt_flags("/mnt/A");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B"), true);
+
+       new_flags = read_mnt_flags("/mnt/A/AA/B/BB");
+       ASSERT_EQ(new_flags, expected_flags);
+
+       ASSERT_EQ(is_shared_mount("/mnt/A/AA/B/BB"), true);
+}
+
+TEST_F(mount_setattr, wrong_user_namespace)
+{
+       int ret;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_RDONLY,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       EXPECT_EQ(create_and_enter_userns(), 0);
+       ret = sys_mount_setattr(-1, "/mnt/A", AT_RECURSIVE, &attr, sizeof(attr));
+       ASSERT_LT(ret, 0);
+       ASSERT_EQ(errno, EPERM);
+}
+
+TEST_F(mount_setattr, wrong_mount_namespace)
+{
+       int fd, ret;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_RDONLY,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       fd = open("/mnt/A", O_DIRECTORY | O_CLOEXEC);
+       ASSERT_GE(fd, 0);
+
+       ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+       ret = sys_mount_setattr(fd, "", AT_EMPTY_PATH | AT_RECURSIVE, &attr, sizeof(attr));
+       ASSERT_LT(ret, 0);
+       ASSERT_EQ(errno, EINVAL);
+}
+
+FIXTURE(mount_setattr_idmapped) {
+};
+
+FIXTURE_SETUP(mount_setattr_idmapped)
+{
+       int img_fd = -EBADF;
+
+       ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+       ASSERT_EQ(mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0), 0);
+
+       (void)umount2("/mnt", MNT_DETACH);
+       (void)umount2("/tmp", MNT_DETACH);
+
+       ASSERT_EQ(mount("testing", "/tmp", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/tmp/B", 0777), 0);
+       ASSERT_EQ(mknodat(-EBADF, "/tmp/B/b", S_IFREG | 0644, 0), 0);
+       ASSERT_EQ(chown("/tmp/B/b", 0, 0), 0);
+
+       ASSERT_EQ(mount("testing", "/tmp/B", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/tmp/B/BB", 0777), 0);
+       ASSERT_EQ(mknodat(-EBADF, "/tmp/B/BB/b", S_IFREG | 0644, 0), 0);
+       ASSERT_EQ(chown("/tmp/B/BB/b", 0, 0), 0);
+
+       ASSERT_EQ(mount("testing", "/tmp/B/BB", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mount("testing", "/mnt", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/mnt/A", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/mnt/A", "tmpfs", MS_NOATIME | MS_NODEV,
+                       "size=100000,mode=700"), 0);
+
+       ASSERT_EQ(mkdir("/mnt/A/AA", 0777), 0);
+
+       ASSERT_EQ(mount("/tmp", "/mnt/A/AA", NULL, MS_BIND | MS_REC, NULL), 0);
+
+       ASSERT_EQ(mkdir("/mnt/B", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/mnt/B", "ramfs",
+                       MS_NOATIME | MS_NODEV | MS_NOSUID, 0), 0);
+
+       ASSERT_EQ(mkdir("/mnt/B/BB", 0777), 0);
+
+       ASSERT_EQ(mount("testing", "/tmp/B/BB", "devpts",
+                       MS_RELATIME | MS_NOEXEC | MS_RDONLY, 0), 0);
+
+       ASSERT_EQ(mkdir("/mnt/C", 0777), 0);
+       ASSERT_EQ(mkdir("/mnt/D", 0777), 0);
+       img_fd = openat(-EBADF, "/mnt/C/ext4.img", O_CREAT | O_WRONLY, 0600);
+       ASSERT_GE(img_fd, 0);
+       ASSERT_EQ(ftruncate(img_fd, 1024 * 2048), 0);
+       ASSERT_EQ(system("mkfs.ext4 -q /mnt/C/ext4.img"), 0);
+       ASSERT_EQ(system("mount -o loop -t ext4 /mnt/C/ext4.img /mnt/D/"), 0);
+       ASSERT_EQ(close(img_fd), 0);
+}
+
+FIXTURE_TEARDOWN(mount_setattr_idmapped)
+{
+       (void)umount2("/mnt/A", MNT_DETACH);
+       (void)umount2("/tmp", MNT_DETACH);
+}
+
+/**
+ * Validate that negative fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_negative)
+{
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = -EBADF,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+               TH_LOG("failure: created idmapped mount with negative fd");
+       }
+}
+
+/**
+ * Validate that excessively large fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_large)
+{
+       struct mount_attr attr = {
+               .attr_set       = MOUNT_ATTR_IDMAP,
+               .userns_fd      = INT64_MAX,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+               TH_LOG("failure: created idmapped mount with too large fd value");
+       }
+}
+
+/**
+ * Validate that closed fd values are rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_closed)
+{
+       int fd;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       fd = open("/dev/null", O_RDONLY | O_CLOEXEC);
+       ASSERT_GE(fd, 0);
+       ASSERT_GE(close(fd), 0);
+
+       attr.userns_fd = fd;
+       ASSERT_NE(sys_mount_setattr(-1, "/", 0, &attr, sizeof(attr)), 0) {
+               TH_LOG("failure: created idmapped mount with closed fd");
+       }
+}
+
+/**
+ * Validate that the initial user namespace is rejected.
+ */
+TEST_F(mount_setattr_idmapped, invalid_fd_initial_userns)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
+       ASSERT_GE(open_tree_fd, 0);
+
+       attr.userns_fd = open("/proc/1/ns/user", O_RDONLY | O_CLOEXEC);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(errno, EPERM);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static int map_ids(pid_t pid, unsigned long nsid, unsigned long hostid,
+                  unsigned long range)
+{
+       char map[100], procfile[256];
+
+       snprintf(procfile, sizeof(procfile), "/proc/%d/uid_map", pid);
+       snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+       if (write_file(procfile, map, strlen(map)))
+               return -1;
+
+
+       snprintf(procfile, sizeof(procfile), "/proc/%d/gid_map", pid);
+       snprintf(map, sizeof(map), "%lu %lu %lu", nsid, hostid, range);
+       if (write_file(procfile, map, strlen(map)))
+               return -1;
+
+       return 0;
+}
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+       void *stack;
+
+       stack = malloc(__STACK_SIZE);
+       if (!stack)
+               return -ENOMEM;
+
+#ifdef __ia64__
+       return __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#else
+       return clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg, NULL);
+#endif
+}
+
+static int get_userns_fd_cb(void *data)
+{
+       return kill(getpid(), SIGSTOP);
+}
+
+static int wait_for_pid(pid_t pid)
+{
+       int status, ret;
+
+again:
+       ret = waitpid(pid, &status, 0);
+       if (ret == -1) {
+               if (errno == EINTR)
+                       goto again;
+
+               return -1;
+       }
+
+       if (!WIFEXITED(status))
+               return -1;
+
+       return WEXITSTATUS(status);
+}
+
+static int get_userns_fd(unsigned long nsid, unsigned long hostid, unsigned long range)
+{
+       int ret;
+       pid_t pid;
+       char path[256];
+
+       pid = do_clone(get_userns_fd_cb, NULL, CLONE_NEWUSER);
+       if (pid < 0)
+               return -errno;
+
+       ret = map_ids(pid, nsid, hostid, range);
+       if (ret < 0)
+               return ret;
+
+       snprintf(path, sizeof(path), "/proc/%d/ns/user", pid);
+       ret = open(path, O_RDONLY | O_CLOEXEC);
+       kill(pid, SIGKILL);
+       wait_for_pid(pid);
+       return ret;
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ *  namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC);
+       ASSERT_GE(open_tree_fd, 0);
+
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_EQ(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that idmapping a mount is rejected if the mount's mount namespace
+ * and our mount namespace don't match.
+ * (The kernel enforces that the mount's mount namespace and the caller's mount
+ *  namespace match.)
+ */
+TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC);
+       ASSERT_GE(open_tree_fd, 0);
+
+       ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr,
+                                   sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that an attached mount in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_inside_current_mount_namespace)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       ASSERT_GE(open_tree_fd, 0);
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+                                   AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that a detached mount not in our mount namespace can be idmapped.
+ */
+TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       ASSERT_GE(open_tree_fd, 0);
+
+       ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+
+       /* Changing mount properties on a detached mount. */
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+                                   AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+/**
+ * Validate that currently changing the idmapping of an idmapped mount fails.
+ */
+TEST_F(mount_setattr_idmapped, change_idmapping)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/D",
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       ASSERT_GE(open_tree_fd, 0);
+
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_EQ(sys_mount_setattr(open_tree_fd, "",
+                                   AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+
+       /* Change idmapping on a detached mount that is already idmapped. */
+       attr.userns_fd  = get_userns_fd(0, 20000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+}
+
+static bool expected_uid_gid(int dfd, const char *path, int flags,
+                            uid_t expected_uid, gid_t expected_gid)
+{
+       int ret;
+       struct stat st;
+
+       ret = fstatat(dfd, path, &st, flags);
+       if (ret < 0)
+               return false;
+
+       return st.st_uid == expected_uid && st.st_gid == expected_gid;
+}
+
+TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid)
+{
+       int open_tree_fd = -EBADF;
+       struct mount_attr attr = {
+               .attr_set = MOUNT_ATTR_IDMAP,
+       };
+
+       if (!mount_setattr_supported())
+               SKIP(return, "mount_setattr syscall not supported");
+
+       ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+       ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+
+       open_tree_fd = sys_open_tree(-EBADF, "/mnt/A",
+                                    AT_RECURSIVE |
+                                    AT_EMPTY_PATH |
+                                    AT_NO_AUTOMOUNT |
+                                    AT_SYMLINK_NOFOLLOW |
+                                    OPEN_TREE_CLOEXEC |
+                                    OPEN_TREE_CLONE);
+       ASSERT_GE(open_tree_fd, 0);
+
+       attr.userns_fd  = get_userns_fd(0, 10000, 10000);
+       ASSERT_GE(attr.userns_fd, 0);
+       ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0);
+       ASSERT_EQ(close(attr.userns_fd), 0);
+       ASSERT_EQ(close(open_tree_fd), 0);
+
+       ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/b", 0, 0, 0), 0);
+       ASSERT_EQ(expected_uid_gid(-EBADF, "/tmp/B/BB/b", 0, 0, 0), 0);
+       ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/b", 0, 0, 0), 0);
+       ASSERT_EQ(expected_uid_gid(open_tree_fd, "B/BB/b", 0, 0, 0), 0);
+}
+
+TEST_HARNESS_MAIN
index 4c7d336..d98fb85 100755 (executable)
@@ -1524,6 +1524,14 @@ basic()
        run_cmd "$IP nexthop replace id 2 blackhole dev veth1"
        log_test $? 2 "Blackhole nexthop with other attributes"
 
+       # blackhole nexthop should not be affected by the state of the loopback
+       # device
+       run_cmd "$IP link set dev lo down"
+       check_nexthop "id 2" "id 2 blackhole"
+       log_test $? 0 "Blackhole nexthop with loopback device down"
+
+       run_cmd "$IP link set dev lo up"
+
        #
        # groups
        #
index 197e769..f8cda82 100755 (executable)
@@ -86,11 +86,20 @@ test_ip6gretap()
 
 test_gretap_stp()
 {
+       # Sometimes after mirror installation, the neighbor's state is not valid.
+       # The reason is that there is no SW datapath activity related to the
+       # neighbor for the remote GRE address. Therefore whether the corresponding
+       # neighbor will be valid is a matter of luck, and the test is thus racy.
+       # Set the neighbor's state to permanent, so it would be always valid.
+       ip neigh replace 192.0.2.130 lladdr $(mac_get $h3) \
+               nud permanent dev br2
        full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
 }
 
 test_ip6gretap_stp()
 {
+       ip neigh replace 2001:db8:2::2 lladdr $(mac_get $h3) \
+               nud permanent dev br2
        full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
 }
 
index ce6bea9..0ccb1dd 100755 (executable)
@@ -658,7 +658,7 @@ test_ecn_decap()
        # In accordance with INET_ECN_decapsulate()
        __test_ecn_decap 00 00 0x00
        __test_ecn_decap 01 01 0x01
-       __test_ecn_decap 02 01 0x02
+       __test_ecn_decap 02 01 0x01
        __test_ecn_decap 01 03 0x03
        __test_ecn_decap 02 03 0x03
        test_ecn_decap_error
index 17ced7d..f23438d 100644 (file)
@@ -1785,7 +1785,7 @@ static void grand_child_serv(unsigned int nr, int cmd_fd, void *buf,
                break;
        default:
                printk("got unknown msg type %d", msg->type);
-       };
+       }
 }
 
 static int grand_child_f(unsigned int nr, int cmd_fd, void *buf)
index 964db9e..ad32240 100755 (executable)
@@ -11,6 +11,7 @@ ksft_skip=4
 timeout=30
 mptcp_connect=""
 capture=0
+do_all_tests=1
 
 TEST_COUNT=0
 
@@ -121,12 +122,6 @@ reset_with_add_addr_timeout()
                -j DROP
 }
 
-for arg in "$@"; do
-       if [ "$arg" = "-c" ]; then
-               capture=1
-       fi
-done
-
 ip -Version > /dev/null 2>&1
 if [ $? -ne 0 ];then
        echo "SKIP: Could not run test without ip tool"
@@ -1221,7 +1216,8 @@ usage()
        echo "  -4 v4mapped_tests"
        echo "  -b backup_tests"
        echo "  -p add_addr_ports_tests"
-       echo "  -c syncookies_tests"
+       echo "  -k syncookies_tests"
+       echo "  -c capture pcap files"
        echo "  -h help"
 }
 
@@ -1235,12 +1231,24 @@ make_file "$cin" "client" 1
 make_file "$sin" "server" 1
 trap cleanup EXIT
 
-if [ -z $1 ]; then
+for arg in "$@"; do
+       # check for "capture" arg before launching tests
+       if [[ "${arg}" =~ ^"-"[0-9a-zA-Z]*"c"[0-9a-zA-Z]*$ ]]; then
+               capture=1
+       fi
+
+       # exception for the capture option, the rest means: a part of the tests
+       if [ "${arg}" != "-c" ]; then
+               do_all_tests=0
+       fi
+done
+
+if [ $do_all_tests -eq 1 ]; then
        all_tests
        exit $ret
 fi
 
-while getopts 'fsltra64bpch' opt; do
+while getopts 'fsltra64bpkch' opt; do
        case $opt in
                f)
                        subflows_tests
@@ -1272,9 +1280,11 @@ while getopts 'fsltra64bpch' opt; do
                p)
                        add_addr_ports_tests
                        ;;
-               c)
+               k)
                        syncookies_tests
                        ;;
+               c)
+                       ;;
                h | *)
                        usage
                        ;;
index 7b01b7c..066efd3 100644 (file)
@@ -30,25 +30,25 @@ struct reuse_opts {
 };
 
 struct reuse_opts unreusable_opts[12] = {
-       {0, 0, 0, 0},
-       {0, 0, 0, 1},
-       {0, 0, 1, 0},
-       {0, 0, 1, 1},
-       {0, 1, 0, 0},
-       {0, 1, 0, 1},
-       {0, 1, 1, 0},
-       {0, 1, 1, 1},
-       {1, 0, 0, 0},
-       {1, 0, 0, 1},
-       {1, 0, 1, 0},
-       {1, 0, 1, 1},
+       {{0, 0}, {0, 0}},
+       {{0, 0}, {0, 1}},
+       {{0, 0}, {1, 0}},
+       {{0, 0}, {1, 1}},
+       {{0, 1}, {0, 0}},
+       {{0, 1}, {0, 1}},
+       {{0, 1}, {1, 0}},
+       {{0, 1}, {1, 1}},
+       {{1, 0}, {0, 0}},
+       {{1, 0}, {0, 1}},
+       {{1, 0}, {1, 0}},
+       {{1, 0}, {1, 1}},
 };
 
 struct reuse_opts reusable_opts[4] = {
-       {1, 1, 0, 0},
-       {1, 1, 0, 1},
-       {1, 1, 1, 0},
-       {1, 1, 1, 1},
+       {{1, 1}, {0, 0}},
+       {{1, 1}, {0, 1}},
+       {{1, 1}, {1, 0}},
+       {{1, 1}, {1, 1}},
 };
 
 int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport)
index 3006a8e..3171069 100644 (file)
@@ -4,7 +4,7 @@
 TEST_PROGS := nft_trans_stress.sh nft_nat.sh bridge_brouter.sh \
        conntrack_icmp_related.sh nft_flowtable.sh ipvs.sh \
        nft_concat_range.sh nft_conntrack_helper.sh \
-       nft_queue.sh nft_meta.sh \
+       nft_queue.sh nft_meta.sh nf_nat_edemux.sh \
        ipip-conntrack-mtu.sh
 
 LDLIBS = -lmnl
diff --git a/tools/testing/selftests/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/netfilter/nf_nat_edemux.sh
new file mode 100755 (executable)
index 0000000..cfee3b6
--- /dev/null
@@ -0,0 +1,99 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test NAT source port clash resolution
+#
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+ret=0
+
+sfx=$(mktemp -u "XXXXXXXX")
+ns1="ns1-$sfx"
+ns2="ns2-$sfx"
+
+cleanup()
+{
+       ip netns del $ns1
+       ip netns del $ns2
+}
+
+iperf3 -v > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without iperf3"
+       exit $ksft_skip
+fi
+
+iptables --version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without iptables"
+       exit $ksft_skip
+fi
+
+ip -Version > /dev/null 2>&1
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not run test without ip tool"
+       exit $ksft_skip
+fi
+
+ip netns add "$ns1"
+if [ $? -ne 0 ];then
+       echo "SKIP: Could not create net namespace $ns1"
+       exit $ksft_skip
+fi
+
+trap cleanup EXIT
+
+ip netns add $ns2
+
+# Connect the namespaces using a veth pair
+ip link add name veth2 type veth peer name veth1
+ip link set netns $ns1 dev veth1
+ip link set netns $ns2 dev veth2
+
+ip netns exec $ns1 ip link set up dev lo
+ip netns exec $ns1 ip link set up dev veth1
+ip netns exec $ns1 ip addr add 192.168.1.1/24 dev veth1
+
+ip netns exec $ns2 ip link set up dev lo
+ip netns exec $ns2 ip link set up dev veth2
+ip netns exec $ns2 ip addr add 192.168.1.2/24 dev veth2
+
+# Create a server in one namespace
+ip netns exec $ns1 iperf3 -s > /dev/null 2>&1 &
+iperfs=$!
+
+# Restrict source port to just one so we don't have to exhaust
+# all others.
+ip netns exec $ns2 sysctl -q net.ipv4.ip_local_port_range="10000 10000"
+
+# add a virtual IP using DNAT
+ip netns exec $ns2 iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201
+
+# ... and route it to the other namespace
+ip netns exec $ns2 ip route add 10.96.0.1 via 192.168.1.1
+
+sleep 1
+
+# add a persistent connection from the other namespace
+ip netns exec $ns2 nc -q 10 -w 10 192.168.1.1 5201 > /dev/null &
+
+sleep 1
+
+# ip daddr:dport will be rewritten to 192.168.1.1 5201
+# NAT must reallocate source port 10000 because
+# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use
+echo test | ip netns exec $ns2 nc -w 3 -q 3 10.96.0.1 443 >/dev/null
+ret=$?
+
+kill $iperfs
+
+# Check nc can connect to 10.96.0.1:443 (aka 192.168.1.1:5201).
+if [ $ret -eq 0 ]; then
+       echo "PASS: nc can connect via NAT'd address"
+else
+       echo "FAIL: nc cannot connect via NAT'd address"
+       exit 1
+fi
+
+exit 0
index d42115e..8b0cd42 100644 (file)
@@ -101,7 +101,7 @@ endef
 ifeq ($(CAN_BUILD_I386),1)
 $(BINARIES_32): CFLAGS += -m32
 $(BINARIES_32): LDLIBS += -lrt -ldl -lm
-$(BINARIES_32): %_32: %.c
+$(BINARIES_32): $(OUTPUT)/%_32: %.c
        $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
 $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-32,$(t))))
 endif
@@ -109,7 +109,7 @@ endif
 ifeq ($(CAN_BUILD_X86_64),1)
 $(BINARIES_64): CFLAGS += -m64
 $(BINARIES_64): LDLIBS += -lrt -ldl
-$(BINARIES_64): %_64: %.c
+$(BINARIES_64): $(OUTPUT)/%_64: %.c
        $(CC) $(CFLAGS) $(EXTRA_CFLAGS) $(notdir $^) $(LDLIBS) -o $@
 $(foreach t,$(TARGETS),$(eval $(call gen-target-rule-64,$(t))))
 endif
index 74c69b7..7ed7cd9 100755 (executable)
@@ -39,7 +39,7 @@ ip0() { pretty 0 "ip $*"; ip -n $netns0 "$@"; }
 ip1() { pretty 1 "ip $*"; ip -n $netns1 "$@"; }
 ip2() { pretty 2 "ip $*"; ip -n $netns2 "$@"; }
 sleep() { read -t "$1" -N 1 || true; }
-waitiperf() { pretty "${1//*-}" "wait for iperf:5201 pid $2"; while [[ $(ss -N "$1" -tlpH 'sport = 5201') != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
+waitiperf() { pretty "${1//*-}" "wait for iperf:${3:-5201} pid $2"; while [[ $(ss -N "$1" -tlpH "sport = ${3:-5201}") != *\"iperf3\",pid=$2,fd=* ]]; do sleep 0.1; done; }
 waitncatudp() { pretty "${1//*-}" "wait for udp:1111 pid $2"; while [[ $(ss -N "$1" -ulpH 'sport = 1111') != *\"ncat\",pid=$2,fd=* ]]; do sleep 0.1; done; }
 waitiface() { pretty "${1//*-}" "wait for $2 to come up"; ip netns exec "$1" bash -c "while [[ \$(< \"/sys/class/net/$2/operstate\") != up ]]; do read -t .1 -N 0 || true; done;"; }
 
@@ -141,6 +141,19 @@ tests() {
        n2 iperf3 -s -1 -B fd00::2 &
        waitiperf $netns2 $!
        n1 iperf3 -Z -t 3 -b 0 -u -c fd00::2
+
+       # TCP over IPv4, in parallel
+       for max in 4 5 50; do
+               local pids=( )
+               for ((i=0; i < max; ++i)) do
+                       n2 iperf3 -p $(( 5200 + i )) -s -1 -B 192.168.241.2 &
+                       pids+=( $! ); waitiperf $netns2 $! $(( 5200 + i ))
+               done
+               for ((i=0; i < max; ++i)) do
+                       n1 iperf3 -Z -t 3 -p $(( 5200 + i )) -c 192.168.241.2 &
+               done
+               wait "${pids[@]}"
+       done
 }
 
 [[ $(ip1 link show dev wg0) =~ mtu\ ([0-9]+) ]] && orig_mtu="${BASH_REMATCH[1]}"
index 57b2080..b69de92 100644 (file)
@@ -1650,7 +1650,7 @@ static void start_printthread(void)
                if (ufd <  0 ||
                    read(ufd, seed, sizeof(*seed)) != sizeof(*seed)) {
                        printf(
-"Warning! Using trivial random nummer seed, since %s not available\n",
+"Warning! Using trivial random number seed, since %s not available\n",
                        DEV_URANDOM);
                        fflush(stdout);
                        *seed = i;
@@ -1711,8 +1711,8 @@ static void show_usage(void)
 "\t\t\tbeginning, end, and backtrace.\n\n"
 
 "-g, --graph\t\tEnable the display-graph option in trace_option. This\n"
-"\t\t\toption causes ftrace to show the functionph of how\n"
-"\t\t\tfunctions are calling other functions.\n\n"
+"\t\t\toption causes ftrace to show the graph of how functions\n"
+"\t\t\tare calling other functions.\n\n"
 
 "-c, --policy POL\tRun the program with scheduling policy POL. POL can be\n"
 "\t\t\tother, batch, idle, rr or fifo. The default is rr. When\n"
index 2599bc2..8bbcf69 100644 (file)
@@ -103,7 +103,6 @@ config RD_LZ4
 config RD_ZSTD
        bool "Support initial ramdisk/ramfs compressed using ZSTD"
        default y
-       depends on BLK_DEV_INITRD
        select DECOMPRESS_ZSTD
        help
          Support loading of a ZSTD encoded initial ramdisk or cpio buffer.
index 001b9de..383df23 100644 (file)
@@ -486,6 +486,24 @@ static int kvm_mmu_notifier_invalidate_range_start(struct mmu_notifier *mn,
         * count is also read inside the mmu_lock critical section.
         */
        kvm->mmu_notifier_count++;
+       if (likely(kvm->mmu_notifier_count == 1)) {
+               kvm->mmu_notifier_range_start = range->start;
+               kvm->mmu_notifier_range_end = range->end;
+       } else {
+               /*
+                * Fully tracking multiple concurrent ranges has dimishing
+                * returns. Keep things simple and just find the minimal range
+                * which includes the current and new ranges. As there won't be
+                * enough information to subtract a range after its invalidate
+                * completes, any ranges invalidated concurrently will
+                * accumulate and persist until all outstanding invalidates
+                * complete.
+                */
+               kvm->mmu_notifier_range_start =
+                       min(kvm->mmu_notifier_range_start, range->start);
+               kvm->mmu_notifier_range_end =
+                       max(kvm->mmu_notifier_range_end, range->end);
+       }
        need_tlb_flush = kvm_unmap_hva_range(kvm, range->start, range->end,
                                             range->flags);
        /* we've to flush the tlb before the pages can be freed */
@@ -2023,10 +2041,13 @@ exit:
 
 kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn,
                               bool atomic, bool *async, bool write_fault,
-                              bool *writable)
+                              bool *writable, hva_t *hva)
 {
        unsigned long addr = __gfn_to_hva_many(slot, gfn, NULL, write_fault);
 
+       if (hva)
+               *hva = addr;
+
        if (addr == KVM_HVA_ERR_RO_BAD) {
                if (writable)
                        *writable = false;
@@ -2054,19 +2075,19 @@ kvm_pfn_t gfn_to_pfn_prot(struct kvm *kvm, gfn_t gfn, bool write_fault,
                      bool *writable)
 {
        return __gfn_to_pfn_memslot(gfn_to_memslot(kvm, gfn), gfn, false, NULL,
-                                   write_fault, writable);
+                                   write_fault, writable, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_prot);
 
 kvm_pfn_t gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-       return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL);
+       return __gfn_to_pfn_memslot(slot, gfn, false, NULL, true, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot);
 
 kvm_pfn_t gfn_to_pfn_memslot_atomic(struct kvm_memory_slot *slot, gfn_t gfn)
 {
-       return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL);
+       return __gfn_to_pfn_memslot(slot, gfn, true, NULL, true, NULL, NULL);
 }
 EXPORT_SYMBOL_GPL(gfn_to_pfn_memslot_atomic);